From c023fac2602be009b4b611364b2522302884f49c Mon Sep 17 00:00:00 2001 From: Mathew Payne <2772944+GeekMasher@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:13:05 +0000 Subject: [PATCH 1/4] feat: Add unittests --- .gitignore | 2 + filter_sarif.py | 139 +++++++++++++++++++++--------------------- tests/data/cpp.sarif | 47 ++++++++++++++ tests/test_filter.py | 26 ++++++++ tests/test_pattern.py | 33 ++++++++++ 5 files changed, 179 insertions(+), 68 deletions(-) create mode 100644 .gitignore create mode 100644 tests/data/cpp.sarif create mode 100644 tests/test_filter.py create mode 100644 tests/test_pattern.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b05740 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +__pycache__/ diff --git a/filter_sarif.py b/filter_sarif.py index ac6ca5f..1055b2f 100644 --- a/filter_sarif.py +++ b/filter_sarif.py @@ -10,7 +10,7 @@ def fail(msg): sys.exit(-1) -def match_path_and_rule(path, rule, patterns): +def match_path_and_rule(path: str, rule: str, patterns: list[tuple[bool, str, str]]) -> bool: result = True for s, fp, rp in patterns: if match(rp, rule) and match(fp, path): @@ -18,21 +18,21 @@ def match_path_and_rule(path, rule, patterns): return result -def parse_pattern(line): - sepchar = ':' - escchar = '\\' - file_pattern = '' - rule_pattern = '' +def parse_pattern(line: str) -> tuple[bool, str, str]: + sepchar = ":" + escchar = "\\" + file_pattern = "" + rule_pattern = "" seen_separator = False sign = True # inclusion or exclusion pattern? uline = line if line: - if line[0] == '-': + if line[0] == "-": sign = False uline = line[1:] - elif line[0] == '+': + elif line[0] == "+": uline = line[1:] i = 0 @@ -41,12 +41,14 @@ def parse_pattern(line): i = i + 1 if c == sepchar: if seen_separator: - raise Exception('Invalid pattern: "' + line + '" Contains more than one separator!') + raise Exception( + 'Invalid pattern: "' + line + '" Contains more than one separator!' + ) seen_separator = True continue elif c == escchar: nextc = uline[i] if (i < len(uline)) else None - if nextc in ['+' , '-', escchar, sepchar]: + if nextc in ["+", "-", escchar, sepchar]: i = i + 1 c = nextc if seen_separator: @@ -55,93 +57,94 @@ def parse_pattern(line): file_pattern = file_pattern + c if not rule_pattern: - rule_pattern = '**' + rule_pattern = "**" return sign, file_pattern, rule_pattern -def filter_sarif(args): - if args.split_lines: - tmp = [] - for p in args.patterns: - tmp = tmp + re.split('\r?\n', p) - args.patterns = tmp - - args.patterns = [parse_pattern(p) for p in args.patterns if p] - - print('Given patterns:') - for s, fp, rp in args.patterns: - print( - 'files: {file_pattern} rules: {rule_pattern} ({sign})'.format( - file_pattern=fp, - rule_pattern=rp, - sign='positive' if s else 'negative' - ) - ) - - with open(args.input, 'r', encoding='utf-8') as f: - s = json.load(f) - - for run in s.get('runs', []): - if run.get('results', []): +def filter_sarif(sarif: dict, patterns: list) -> dict: + for run in sarif.get("runs", []): + if run.get("results", []): new_results = [] - for r in run['results']: - if r.get('locations', []): + for r in run["results"]: + if r.get("locations", []): new_locations = [] - for l in r['locations']: + for l in r["locations"]: # TODO: The uri field is optional. We might have to fetch the actual uri from "artifacts" via "index" # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#-linking-results-to-artifacts) - uri = l.get('physicalLocation', {}).get('artifactLocation', {}).get('uri', None) + uri = ( + l.get("physicalLocation", {}) + .get("artifactLocation", {}) + .get("uri", None) + ) # TODO: The ruleId field is optional and potentially ambiguous. We might have to fetch the actual # ruleId from the rule metadata via the ruleIndex field. # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#rule-metadata) - ruleId = r['ruleId'] - if uri is None or match_path_and_rule(uri, ruleId, args.patterns): + ruleId = r["ruleId"] + if uri is None or match_path_and_rule(uri, ruleId, patterns): new_locations.append(l) - r['locations'] = new_locations + r["locations"] = new_locations if new_locations: new_results.append(r) else: # locations array doesn't exist or is empty, so we can't match on anything # therefore, we include the result in the output new_results.append(r) - run['results'] = new_results + run["results"] = new_results - with open(args.output, 'w', encoding='utf-8') as f: - json.dump(s, f, indent=2) + return sarif def main(): - parser = argparse.ArgumentParser( - prog='filter-sarif' - ) + parser = argparse.ArgumentParser(prog="filter-sarif") + parser.add_argument("--input", help="Input SARIF file", required=True) + parser.add_argument("--output", help="Output SARIF file", required=True) parser.add_argument( - '--input', - help='Input SARIF file', - required=True - ) - parser.add_argument( - '--output', - help='Output SARIF file', - required=True - ) - parser.add_argument( - '--split-lines', + "--split-lines", default=False, - action='store_true', - help='Split given patterns on newlines.' - ) - parser.add_argument( - 'patterns', - help='Inclusion and exclusion patterns.', - nargs='+' + action="store_true", + help="Split given patterns on newlines.", ) + parser.add_argument("patterns", help="Inclusion and exclusion patterns.", nargs="+") def print_usage(args): print(parser.format_usage()) args = parser.parse_args() - filter_sarif(args) + + if args.split_lines: + tmp = [] + for p in args.patterns: + tmp = tmp + re.split("\r?\n", p) + args.patterns = tmp + + args.patterns = [parse_pattern(p) for p in args.patterns if p] + + print("Given patterns:") + for s, fp, rp in args.patterns: + print( + "files: {file_pattern} rules: {rule_pattern} ({sign})".format( + file_pattern=fp, rule_pattern=rp, sign="positive" if s else "negative" + ) + ) + + with open(args.input, "r", encoding="utf-8") as f: + s = json.load(f) + + pre_count = len(s.get("runs", [{}])[0].get("results", [])) + print(f"Input SARIF Results Count (before filtering): {pre_count}") + + sarif = filter_sarif(s, args.patterns) + + post_count = len(sarif.get("runs", [{}])[0].get("results", [])) + print(f"Output SARIF Results Count (after filtering): {post_count}") + + if post_count == pre_count: + print("!! No results were filtered. Check your filter patterns !!") + + with open(args.output, "w", encoding="utf-8") as f: + json.dump(sarif, f, indent=2) -main() +if __name__ == "__main__": + main() diff --git a/tests/data/cpp.sarif b/tests/data/cpp.sarif new file mode 100644 index 0000000..6f1ed04 --- /dev/null +++ b/tests/data/cpp.sarif @@ -0,0 +1,47 @@ +{ + "runs": [ + { + "results": [ + { + "ruleId": "cpp/stack-overflow", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "io/kb.c", + "index": 0 + } + } + } + ] + }, + { + "ruleId": "cpp/stack-overflow", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "src/app.cpp", + "index": 0 + } + } + } + ] + }, + { + "ruleId": "cpp/stack-overflow", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "_codeql_build_dir/_deps/libcurl-src/lib/http.c", + "index": 0 + } + } + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/test_filter.py b/tests/test_filter.py new file mode 100644 index 0000000..9a0dd7a --- /dev/null +++ b/tests/test_filter.py @@ -0,0 +1,26 @@ +import json +import unittest +import filter_sarif + + +class TestFilter(unittest.TestCase): + def setUp(self) -> None: + with open("tests/data/cpp.sarif") as f: + self.sarif = json.load(f) + self.assertEqual(len(self.sarif["runs"][0]["results"]), 3) + return super().setUp() + + def test_exclude_all(self): + patterns = ["-**/*"] + parsed_patterns = [filter_sarif.parse_pattern(p) for p in patterns] + sarif = filter_sarif.filter_sarif(self.sarif, parsed_patterns) + # Excluding everything + self.assertEqual(len(sarif["runs"][0]["results"]), 0) + + def test_exclude_all_except_one(self): + # -**/* [exclude everything first], +src/** [include everything in src] + patterns = ["-**/*", "+src/**"] + parsed_patterns = [filter_sarif.parse_pattern(p) for p in patterns] + sarif = filter_sarif.filter_sarif(self.sarif, parsed_patterns) + # Only 1 + self.assertEqual(len(sarif["runs"][0]["results"]), 1) diff --git a/tests/test_pattern.py b/tests/test_pattern.py new file mode 100644 index 0000000..95fc34c --- /dev/null +++ b/tests/test_pattern.py @@ -0,0 +1,33 @@ +import json +import unittest +import filter_sarif + + +class TestPatternParsing(unittest.TestCase): + def test_parse_pattern_files(self): + # `-` doesn't set sign to True + self.assertEqual(filter_sarif.parse_pattern("-**/*"), (False, "**/*", "**")) + self.assertEqual(filter_sarif.parse_pattern("+src/**"), (True, "src/**", "**")) + # No rule but still has a `:` + self.assertEqual( + filter_sarif.parse_pattern("-**/*Test*.java:**"), + (False, "**/*Test*.java", "**"), + ) + + def test_underscores(self): + self.assertEqual( + filter_sarif.parse_pattern("-_codeql_build_dir/**"), + (False, "_codeql_build_dir/**", "**"), + ) + + def test_parse_pattern_rules(self): + self.assertEqual( + filter_sarif.parse_pattern("-**/*:rule"), (False, "**/*", "rule") + ) + self.assertEqual( + filter_sarif.parse_pattern("+src/**:rule"), (True, "src/**", "rule") + ) + + self.assertEqual( + filter_sarif.parse_pattern("+src/**:rule"), (True, "src/**", "rule") + ) From 5581a3c851217f922ccfd05d3a229b857b83045e Mon Sep 17 00:00:00 2001 From: Mathew Payne <2772944+GeekMasher@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:20:05 +0000 Subject: [PATCH 2/4] feat(ci): Add unit testing in Actions --- .github/workflows/action-test.yml | 95 ++++++++++++++++++------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/.github/workflows/action-test.yml b/.github/workflows/action-test.yml index c392d4d..ff77ec0 100644 --- a/.github/workflows/action-test.yml +++ b/.github/workflows/action-test.yml @@ -3,19 +3,32 @@ on: workflow_dispatch: inputs: pattern1: - description: 'Include or exclude pattern' + description: "Include or exclude pattern" required: true pattern2: - description: 'Include or exclude pattern' + description: "Include or exclude pattern" required: false pattern3: - description: 'Include or exclude pattern' + description: "Include or exclude pattern" required: false pattern4: - description: 'Include or exclude pattern' + description: "Include or exclude pattern" required: false jobs: + tests: + name: Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: "Run Unit Tests" + run: | + set -e + python -m unittest discover -s ./tests -p 'test_*.py' + analyze: name: Analyze runs-on: ubuntu-latest @@ -23,48 +36,48 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'java' ] + language: ["java"] steps: - - name: Checkout repository - uses: actions/checkout@v4 + - name: Checkout repository + uses: actions/checkout@v4 - - name: Initialize CodeQL - uses: github/codeql-action/init@v3 - with: - languages: ${{ matrix.language }} + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} - - run: | - javatest/build + - run: | + javatest/build - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 - with: - upload: failure-only - output: sarif-results + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + upload: failure-only + output: sarif-results - - name: filter-sarif - uses: advanced-security/filter-sarif@main - with: - #patterns: | - # -**/*.java - # +**/*Exposed* - patterns: | - ${{ github.event.inputs.pattern1 }} - ${{ github.event.inputs.pattern2 }} - ${{ github.event.inputs.pattern3 }} - ${{ github.event.inputs.pattern4 }} - input: sarif-results/java.sarif - output: sarif-results/java.sarif + - name: filter-sarif + uses: advanced-security/filter-sarif@main + with: + #patterns: | + # -**/*.java + # +**/*Exposed* + patterns: | + ${{ github.event.inputs.pattern1 }} + ${{ github.event.inputs.pattern2 }} + ${{ github.event.inputs.pattern3 }} + ${{ github.event.inputs.pattern4 }} + input: sarif-results/java.sarif + output: sarif-results/java.sarif - - name: Upload SARIF - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: sarif-results/java.sarif + - name: Upload SARIF + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: sarif-results/java.sarif - - name: Upload loc as a Build Artifact - uses: actions/upload-artifact@v4 - with: - name: sarif-results - path: sarif-results - retention-days: 1 + - name: Upload loc as a Build Artifact + uses: actions/upload-artifact@v4 + with: + name: sarif-results + path: sarif-results + retention-days: 1 From 568aecf155ba483e85171d50b7cb8cfd2ec35c1a Mon Sep 17 00:00:00 2001 From: Mathew Payne <2772944+GeekMasher@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:21:59 +0000 Subject: [PATCH 3/4] fix(ci): Move unit tests to own Action --- .github/workflows/action-test.yml | 13 ------------- .github/workflows/python-tests.yml | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/python-tests.yml diff --git a/.github/workflows/action-test.yml b/.github/workflows/action-test.yml index ff77ec0..b4ddc2f 100644 --- a/.github/workflows/action-test.yml +++ b/.github/workflows/action-test.yml @@ -16,19 +16,6 @@ on: required: false jobs: - tests: - name: Tests - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: "Run Unit Tests" - run: | - set -e - python -m unittest discover -s ./tests -p 'test_*.py' - analyze: name: Analyze runs-on: ubuntu-latest diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml new file mode 100644 index 0000000..2644ca7 --- /dev/null +++ b/.github/workflows/python-tests.yml @@ -0,0 +1,20 @@ +name: "Testing" +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + tests: + name: Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: "Run Unit Tests" + run: | + set -e + python -m unittest discover -s ./tests -p 'test_*.py' From b6d15bae98233fa1325f3338c56a0663934ba9e2 Mon Sep 17 00:00:00 2001 From: Mathew Payne <2772944+GeekMasher@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:23:17 +0000 Subject: [PATCH 4/4] fix(ci): Update PR trigger --- .github/workflows/python-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 2644ca7..1d6d901 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -2,8 +2,8 @@ name: "Testing" on: push: branches: ["main"] - pull_request: - branches: ["main"] + pull_request: + branches: ["main"] jobs: tests: