From 49c0cda799f920beea5fc22ee3282c2433c1229f Mon Sep 17 00:00:00 2001 From: Gregory Guillermin Date: Wed, 18 Oct 2023 17:23:57 +0200 Subject: [PATCH 1/5] update dependencies - upgrade python3.11 - change to GetDefinitionByAlias --- .github/workflows/fastir.yml | 6 ++--- fastir/common/filesystem.py | 49 ++++++++++++++++++++++++++++++++++-- fastir_artifacts.py | 2 +- requirements-test.txt | 4 +-- requirements.txt | 20 +++++++-------- 5 files changed, 63 insertions(+), 18 deletions(-) diff --git a/.github/workflows/fastir.yml b/.github/workflows/fastir.yml index dd22762..c09895e 100644 --- a/.github/workflows/fastir.yml +++ b/.github/workflows/fastir.yml @@ -34,15 +34,15 @@ jobs: strategy: matrix: os: [windows-2019] - python-version: ['3.10'] + python-version: ['3.11'] arch: [x86, x64] include: - os: ubuntu-latest arch: x64 - python-version: '3.10' + python-version: '3.11' - os: macos-latest arch: x64 - python-version: '3.10' + python-version: '3.11' steps: - name: Use FastIR repository uses: actions/checkout@v3 diff --git a/fastir/common/filesystem.py b/fastir/common/filesystem.py index 87146df..fa3c028 100644 --- a/fastir/common/filesystem.py +++ b/fastir/common/filesystem.py @@ -5,6 +5,7 @@ import psutil import artifacts from artifacts.source_type import FileSourceType +from functools import lru_cache from fastir.common.logging import logger from fastir.common.collector import AbstractCollector @@ -130,7 +131,48 @@ def _follow_symlink(self, parent, path_object): # they are still collected return OSFileSystem('/').get_fullpath(path_object.path) + @lru_cache(maxsize=10000) def list_directory(self, path_object): + entries = [] + directory = path_object.obj + + if not isinstance(directory, pytsk3.Directory): + if not self.is_directory(path_object): + return + try: + directory = path_object.obj.as_directory() + except OSError as err: + logger.error(f"Error collecting '{str(path_object.path)}': {err}") + directory = list() + + for entry in directory: + if ( + not hasattr(entry, 'info') or + not hasattr(entry.info, 'name') or + not hasattr(entry.info.name, 'name') or + entry.info.name.name in [b'.', b'..'] or + not hasattr(entry.info, 'meta') or + not hasattr(entry.info.meta, 'size') or + not hasattr(entry.info.meta, 'type') or + not self.is_allocated(entry) + ): + continue + + name = entry.info.name.name.decode('utf-8', errors='replace') + filepath = os.path.join(path_object.path, name) + entry_path_object = PathObject(self, name, filepath, entry) + + if entry.info.meta.type == pytsk3.TSK_FS_META_TYPE_LNK: + symlink_object = self._follow_symlink(path_object, entry_path_object) + + if symlink_object: + entries.append(symlink_object) + else: + entries.append(entry_path_object) + + return entries + + def list_directory_old(self, path_object): if path_object.path in self._entries_cache: return self._entries_cache[path_object.path] else: @@ -145,8 +187,11 @@ def list_directory(self, path_object): if not isinstance(directory, pytsk3.Directory): if not self.is_directory(path_object): return - - directory = path_object.obj.as_directory() + try: + directory = path_object.obj.as_directory() + except OSError as err: + logger.error(f"Error collecting '{str(path_object.path)}': {err}") + directory = list() for entry in directory: if ( diff --git a/fastir_artifacts.py b/fastir_artifacts.py index 1ba8804..491f8a8 100644 --- a/fastir_artifacts.py +++ b/fastir_artifacts.py @@ -47,7 +47,7 @@ def resolve_artifact_groups(registry, artifact_names): resolved_names = set() for artifact in artifact_names: - definition = registry.GetDefinitionByName(artifact) + definition = registry.GetDefinitionByAlias(artifact) if definition: resolved_names.add(artifact) diff --git a/requirements-test.txt b/requirements-test.txt index a519a80..4220a11 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.1.0 -pytest-cov==3.0.0 +pytest==7.4.2 +pytest-cov==4.1.0 diff --git a/requirements.txt b/requirements.txt index fb17e0d..1feeb04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -git+https://github.com/ForensicArtifacts/artifacts.git@6b4753931aeb55f97a1838bdf582e4100ac2b3ee#egg=artifacts -ConfigArgParse==1.5.3 -pypiwin32==223 ; sys_platform == 'win32' -PyYAML==6.0 -pytsk3==20211111 -PyInstaller==4.10 -psutil==5.9.0 -jsonlines==3.0.0 -filetype==1.0.10 -pefile==2021.9.3 +artifacts==20230928 +ConfigArgParse==1.7 +pywin32==306; platform_system=='Windows' +PyYAML==6.0.1 +pytsk3==20231007 +PyInstaller==6.1.0 +psutil==5.9.6 +jsonlines==4.0.0 +filetype==1.2.0 +pefile==2023.2.7 From ffa0ae001c4989a3d0277476700079f2809fbf1a Mon Sep 17 00:00:00 2001 From: Gregory Guillermin Date: Wed, 18 Oct 2023 17:36:11 +0200 Subject: [PATCH 2/5] updating workflow to build on other branch & dependabot --- .github/workflows/dependabot.yml | 11 +++++++++++ .github/workflows/fastir.yml | 1 - Pipfile | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/dependabot.yml create mode 100644 Pipfile diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 0000000..d426669 --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "monthly" \ No newline at end of file diff --git a/.github/workflows/fastir.yml b/.github/workflows/fastir.yml index c09895e..063518c 100644 --- a/.github/workflows/fastir.yml +++ b/.github/workflows/fastir.yml @@ -30,7 +30,6 @@ jobs: draft: false build: runs-on: ${{ matrix.os }} - needs: release strategy: matrix: os: [windows-2019] diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..7a5f4d6 --- /dev/null +++ b/Pipfile @@ -0,0 +1,21 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +artifacts = "==20230928" +configargparse = "==1.7" +pywin32 = {version= "==306", sys_platform = "== 'win32'"} +pyyaml = "==6.0.1" +pytsk3 = "==20231007" +pyinstaller = "==6.1.0" +psutil = "==5.9.6" +jsonlines = "==4.0.0" +filetype = "==1.2.0" +pefile = "==2023.2.7" + +[dev-packages] + +[requires] +python_version = "3.11" From cd7122b31e8e765e4f1e7eca215b95e70631fff9 Mon Sep 17 00:00:00 2001 From: Gregory Guillermin Date: Wed, 18 Oct 2023 17:52:49 +0200 Subject: [PATCH 3/5] fixing workflow and upload artefact archive --- .github/{workflows => }/dependabot.yml | 0 .github/workflows/fastir.yml | 8 +++++++- 2 files changed, 7 insertions(+), 1 deletion(-) rename .github/{workflows => }/dependabot.yml (100%) diff --git a/.github/workflows/dependabot.yml b/.github/dependabot.yml similarity index 100% rename from .github/workflows/dependabot.yml rename to .github/dependabot.yml diff --git a/.github/workflows/fastir.yml b/.github/workflows/fastir.yml index 063518c..ae12289 100644 --- a/.github/workflows/fastir.yml +++ b/.github/workflows/fastir.yml @@ -65,10 +65,16 @@ jobs: run: | python -m pytest --cov-report xml --cov fastir tests pyinstaller fastir_artifacts.spec - - name: run tests on build + - name: run tests & zip run: | python -m pytest integration_tests python -m zipfile -c FastIR-Artifacts-${{ runner.os }}-${{ matrix.arch }}.zip dist/fastir_artifacts + - name: Archive build results + uses: actions/upload-artifact@v3 + if: github.ref != 'refs/heads/master' && github.event_name == 'push' + with: + name: FastIR-Artifacts-${{ runner.os }}-${{ matrix.arch }} + path: FastIR-Artifacts-${{ runner.os }}-${{ matrix.arch }}.zip - name: Upload asset uses: AButler/upload-release-assets@v2.0 if: github.ref == 'refs/heads/master' && github.event_name == 'push' From d8c7f944e357700763152bdec54ad13ea5470a34 Mon Sep 17 00:00:00 2001 From: Gregory Guillermin Date: Wed, 18 Oct 2023 21:16:38 +0200 Subject: [PATCH 4/5] reverse lru to custom cache --- .github/workflows/fastir.yml | 2 +- fastir/common/filesystem.py | 42 ------------------------------------ 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/.github/workflows/fastir.yml b/.github/workflows/fastir.yml index ae12289..940d28c 100644 --- a/.github/workflows/fastir.yml +++ b/.github/workflows/fastir.yml @@ -71,7 +71,7 @@ jobs: python -m zipfile -c FastIR-Artifacts-${{ runner.os }}-${{ matrix.arch }}.zip dist/fastir_artifacts - name: Archive build results uses: actions/upload-artifact@v3 - if: github.ref != 'refs/heads/master' && github.event_name == 'push' + if: github.event_name == 'push' && !contains(github.ref, 'master') with: name: FastIR-Artifacts-${{ runner.os }}-${{ matrix.arch }} path: FastIR-Artifacts-${{ runner.os }}-${{ matrix.arch }}.zip diff --git a/fastir/common/filesystem.py b/fastir/common/filesystem.py index fa3c028..6f702f7 100644 --- a/fastir/common/filesystem.py +++ b/fastir/common/filesystem.py @@ -5,7 +5,6 @@ import psutil import artifacts from artifacts.source_type import FileSourceType -from functools import lru_cache from fastir.common.logging import logger from fastir.common.collector import AbstractCollector @@ -131,48 +130,7 @@ def _follow_symlink(self, parent, path_object): # they are still collected return OSFileSystem('/').get_fullpath(path_object.path) - @lru_cache(maxsize=10000) def list_directory(self, path_object): - entries = [] - directory = path_object.obj - - if not isinstance(directory, pytsk3.Directory): - if not self.is_directory(path_object): - return - try: - directory = path_object.obj.as_directory() - except OSError as err: - logger.error(f"Error collecting '{str(path_object.path)}': {err}") - directory = list() - - for entry in directory: - if ( - not hasattr(entry, 'info') or - not hasattr(entry.info, 'name') or - not hasattr(entry.info.name, 'name') or - entry.info.name.name in [b'.', b'..'] or - not hasattr(entry.info, 'meta') or - not hasattr(entry.info.meta, 'size') or - not hasattr(entry.info.meta, 'type') or - not self.is_allocated(entry) - ): - continue - - name = entry.info.name.name.decode('utf-8', errors='replace') - filepath = os.path.join(path_object.path, name) - entry_path_object = PathObject(self, name, filepath, entry) - - if entry.info.meta.type == pytsk3.TSK_FS_META_TYPE_LNK: - symlink_object = self._follow_symlink(path_object, entry_path_object) - - if symlink_object: - entries.append(symlink_object) - else: - entries.append(entry_path_object) - - return entries - - def list_directory_old(self, path_object): if path_object.path in self._entries_cache: return self._entries_cache[path_object.path] else: From 9af2e321413c792c643478f643ba34c87af146e3 Mon Sep 17 00:00:00 2001 From: Gregory Guillermin Date: Thu, 19 Oct 2023 09:24:29 +0200 Subject: [PATCH 5/5] fixing cache values --- fastir/common/filesystem.py | 11 ++++------- fastir/common/variables.py | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fastir/common/filesystem.py b/fastir/common/filesystem.py index 6f702f7..d7b134c 100644 --- a/fastir/common/filesystem.py +++ b/fastir/common/filesystem.py @@ -94,7 +94,6 @@ def __init__(self, manager, device, path): # Cache parsed entries for better performances self._entries_cache = {} - self._entries_cache_last = [] # Open drive img_info = pytsk3.Img_Info(self._device) @@ -135,9 +134,8 @@ def list_directory(self, path_object): return self._entries_cache[path_object.path] else: # Make sure we do not keep more than 10 000 entries in the cache - if len(self._entries_cache_last) >= 10000: - first = self._entries_cache_last.pop(0) - del self._entries_cache[first] + if len(self._entries_cache) >= 10000: + self._entries_cache.pop(next(iter(self._entries_cache))) entries = [] directory = path_object.obj @@ -147,9 +145,9 @@ def list_directory(self, path_object): return try: directory = path_object.obj.as_directory() - except OSError as err: + except Exception as err: logger.error(f"Error collecting '{str(path_object.path)}': {err}") - directory = list() + return for entry in directory: if ( @@ -177,7 +175,6 @@ def list_directory(self, path_object): entries.append(entry_path_object) self._entries_cache[path_object.path] = entries - self._entries_cache_last.append(entries) return entries diff --git a/fastir/common/variables.py b/fastir/common/variables.py index 92ca95f..b12a7ad 100644 --- a/fastir/common/variables.py +++ b/fastir/common/variables.py @@ -57,6 +57,6 @@ def substitute(self, value): if not values: logger.warning(f"Value '{value}' contains unsupported variables") - values.add(value) + # values.add(value) return values