diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d308a781..517c342cc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: # You are encouraged to use static refs such as tags, instead of branch name # # Running "pre-commit autoupdate" would automatically updates rev to latest tag - rev: 0.13.1+ibm.63.dss + rev: 0.13.1+ibm.64.dss hooks: - id: detect-secrets # pragma: whitelist secret # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options. diff --git a/.secrets.baseline b/.secrets.baseline index 75312d946..30c896c1f 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "test_data/.*|tests/.*|^.secrets.baseline$", "lines": null }, - "generated_at": "2025-10-02T20:03:24Z", + "generated_at": "2025-11-24T17:44:35Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -200,7 +200,7 @@ "hashed_secret": "f32a07369c6fd4eaacb1f5a8877824ef98204a1c", "is_secret": false, "is_verified": false, - "line_number": 102, + "line_number": 105, "type": "Secret Keyword", "verified_result": null }, @@ -208,7 +208,7 @@ "hashed_secret": "1af17e73721dbe0c40011b82ed4bb1a7dbe3ce29", "is_secret": false, "is_verified": false, - "line_number": 105, + "line_number": 108, "type": "Secret Keyword", "verified_result": null } @@ -242,7 +242,7 @@ } ] }, - "version": "0.13.1+ibm.63.dss", + "version": "0.13.1+ibm.64.dss", "word_list": { "file": null, "hash": null diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index 41692f266..b3de94c66 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -24,6 +24,7 @@ def initialize( output_raw=False, output_verified_false=False, suppress_unscannable_file_warnings=False, + diff_branch=None, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. @@ -49,6 +50,10 @@ def initialize( :type suppress_unscannable_file_warnings boolean :param suppress_unscannable_file_warnings: whether or not to suppress unscannable file warnings + :type diff_branch: str|None + :param diff_branch: optional name of branch to check for + differences against in determining files to scan. + :rtype: SecretsCollection """ output = SecretsCollection( @@ -68,6 +73,10 @@ def initialize( files_to_scan.extend( _get_files_recursively(element), ) + elif diff_branch is not None: + files_to_scan.extend( + _get_git_tracked_diff_files(element, diff_branch), + ) else: files_to_scan.extend( _get_git_tracked_files(element), @@ -380,6 +389,47 @@ def _get_git_tracked_files(rootdir='.'): return output +def _get_git_tracked_diff_files(rootdir='.', diff_branch=None): + """On incremental builds it is only necessary to scan the files that + have changed. This will allow a scan of files that have differences + from the named branch. The filter does not list filess that are + deleted because it is impossible to scan them now. + + :type rootdir: str + :param rootdir: root directory of where you want to list files from + + :type diff_branch: str + :param diff_branch: name of branch to check diferences from. + 'test' would find files with differences between the current branch + and the local test branch. + 'origin/main' would find files with differences between the current + branch and the remote main branch. + + :rtype: set|None + :returns: filepaths to files with differences from the diff_branch + which git currently tracks (locally) + """ + output = [] + try: + with open(os.devnull, 'w') as fnull: + git_files = subprocess.check_output( + [ + 'git', + 'diff', + '--name-only', + '--diff-filter=ACMRTUX', + diff_branch, + '--', rootdir, + ], + stderr=fnull, + ) + for filename in git_files.decode('utf-8').split(): + output.append(filename) + except subprocess.CalledProcessError: + pass + return output + + def _get_files_recursively(rootdir): """Sometimes, we want to use this tool with non-git repositories. This function allows us to do so. diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 5d532f3d0..852d530f7 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -205,7 +205,8 @@ def add_arguments(self): self._add_initialize_baseline_argument()\ ._add_adhoc_scanning_argument()\ ._add_output_raw_argument()\ - ._add_suppress_unscannable_file_warnings() + ._add_suppress_unscannable_file_warnings()\ + ._add_diff_branch()\ PluginOptions(self.parser).add_arguments() @@ -289,6 +290,18 @@ def _add_suppress_unscannable_file_warnings(self): add_suppress_unscannable_file_warnings(self.parser) return self + def _add_diff_branch(self): + self.parser.add_argument( + '--diff-branch', + type=str, + help=( + 'Scan only files that are tracked to git containing ' + 'differences from the named branch.' + ), + dest='diff_branch', + ) + return self + class AuditOptions: def __init__(self, subparser): diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 7bbc8f8f6..8dbde7548 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -189,6 +189,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash): output_raw=args.output_raw, output_verified_false=args.output_verified_false, suppress_unscannable_file_warnings=args.suppress_unscannable_file_warnings, + diff_branch=args.diff_branch, ).format_for_baseline_output() if old_baseline: @@ -206,7 +207,8 @@ def _get_existing_baseline(import_filename): try: return _read_from_file(import_filename[0]) except FileNotFoundError as fnf_error: - if fnf_error.errno == 2: # create new baseline if not existed + if fnf_error.errno == 2 or fnf_error.errno == 129: + # create new baseline if not existed, 129 is for z/OS return None else: # throw exception for other cases print( diff --git a/docs/cheat-sheet.md b/docs/cheat-sheet.md index 178bac794..e46a9007c 100644 --- a/docs/cheat-sheet.md +++ b/docs/cheat-sheet.md @@ -76,6 +76,9 @@ detect-secrets scan file1 file2 # Scan all files except for .gitignore detect-secrets scan --all-files + +# Scan only files that are tracked to git containing differences from the named branch +detect-secrets scan --diff-branch diff_branch_name ``` ### Ad-hoc scan on a single string diff --git a/tests/main_test.py b/tests/main_test.py index 0b0368b24..d7edab6dd 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -96,6 +96,7 @@ def test_scan_basic(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) def test_scan_with_rootdir(self, mock_baseline_initialize): @@ -113,6 +114,7 @@ def test_scan_with_rootdir(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) def test_scan_with_exclude_args(self, mock_baseline_initialize): @@ -132,6 +134,7 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) @pytest.mark.parametrize( @@ -217,6 +220,25 @@ def test_scan_with_all_files_flag(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, + ) + + def test_scan_with_diff_branch(self, mock_baseline_initialize): + with mock_stdin(): + assert main('scan --diff-branch some_branch_here'.split()) == 0 + + mock_baseline_initialize.assert_called_once_with( + plugins=Any(tuple), + exclude_files_regex=None, + exclude_lines_regex=None, + path='.', + should_scan_all_files=False, + output_raw=False, + output_verified_false=False, + word_list_file=None, + word_list_hash=None, + suppress_unscannable_file_warnings=False, + diff_branch='some_branch_here', ) def test_reads_from_stdin(self, mock_merge_baseline): @@ -274,6 +296,7 @@ def test_reads_non_existed_baseline_from_file( word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) mock_merge_baseline.assert_not_called()