diff --git a/.github/workflows/staging-check.yml b/.github/workflows/staging-check.yml
new file mode 100644
index 0000000..15b7049
--- /dev/null
+++ b/.github/workflows/staging-check.yml
@@ -0,0 +1,34 @@
+name: Validate output against staging
+
+on:
+ pull_request:
+ branches:
+ - prod
+
+jobs:
+ validate_against_stating:
+ permissions:
+ pull-requests: write
+
+ runs-on: ubuntu-latest
+
+ env:
+ PYTHONDEVMODE: 1
+
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Run deployment script
+ run: |
+ pip install lxml
+ mkdir tmp/
+ python tools/convert.py -a -d tmp ispdb/*
+
+ - name: Compare output against staging
+ run: |
+ pip install requests
+ python tools/compare_out_files.py -b https://autoconfig-stage.thunderbird.net/v1.1/ tmp/
+
+ - name: Calculate generated_files.json diff with prod
+ run: |
+ python tools/calculate_generated_files_diff.py -b https://autoconfig.thunderbird.net/v1.1 -t ${{ secrets.GITHUB_TOKEN }} -r ${{ github.repository }} -n ${{ github.event.pull_request.number }} tmp/
diff --git a/.github/workflows/validate-config.yml b/.github/workflows/validate-config.yml
index 8ebb138..d41af32 100644
--- a/.github/workflows/validate-config.yml
+++ b/.github/workflows/validate-config.yml
@@ -6,15 +6,29 @@ jobs:
validate_config:
runs-on: ubuntu-latest
+ env:
+ PYTHONDEVMODE: 1
+
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v6
+
+ - name: BOM check
+ run: |
+ ! grep -rlI $'\xEF\xBB\xBF' ispdb
- - name: BOM check
- uses: arma-actions/bom-check@v1.1
- with:
- path: ispdb
+ - name: Validate file extensions
+ run: |
+ set -o pipefail
+ shopt -s extglob nullglob
+ files=( ispdb/!(*.xml) )
+ if (( ${#files[*]} )); then
+ for file in "${files[@]}"; do
+ printf '::error file=%s::File name "%s" does not end in .xml – Please rename!\n' "$file" "$file"
+ done
+ exit 1
+ fi
- - name: Validate file extensions
- run: |
- set -o pipefail
- ls -1 ispdb/ | grep -v '\.xml$' | awk '{print "::error file=ispdb/"$0"::File name \"ispdb/"$0"\" does not end in .xml – Please rename!"}' && exit 1 || true
+ - name: Validate XML content
+ run: |
+ pip install lxml
+ python tools/validate.py ispdb/*
diff --git a/README.md b/README.md
index 77cf705..707883c 100644
--- a/README.md
+++ b/README.md
@@ -6,3 +6,5 @@ source files for the Thunderbird ISP database.
For documentation regarding either topic, please refer to [this repository's
wiki](https://github.com/thunderbird/autoconfig/wiki).
+
+This ISPDB was created by Ben Bucksch.
diff --git a/ispdb/naver.com.xml b/ispdb/naver.com.xml
index bd11fcf..78a4fa9 100644
--- a/ispdb/naver.com.xml
+++ b/ispdb/naver.com.xml
@@ -25,6 +25,6 @@
%EMAILADDRESS%
password-encrypted
-
+
-
+
\ No newline at end of file
diff --git a/tools/calculate_generated_files_diff.py b/tools/calculate_generated_files_diff.py
new file mode 100644
index 0000000..204730f
--- /dev/null
+++ b/tools/calculate_generated_files_diff.py
@@ -0,0 +1,84 @@
+import argparse
+import difflib
+import os.path
+import requests
+
+GENERATED_FILES_NAME = "generated_files.json"
+
+GITHUB_COMMENT_TEMPLATE_WITH_DIFF = """This PR will cause the following changes to the production `generated_files.json` file:
+
+
+
+Expand to view diff
+
+
+```diff
+{deltas}
+```
+
+
+"""
+
+GITHUB_COMMENT_TEMPLATE_NO_DIFF = (
+ "This PR will not cause any change to the production `generated_files.json` file."
+)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-b", metavar="base_url", help="base URL serving ISPDB files")
+ parser.add_argument("-t", metavar="api_token", help="Github API token")
+ parser.add_argument("-r", metavar="repo", help="Github repository")
+ parser.add_argument("-n", metavar="number", help="The Github or issue number")
+ parser.add_argument(
+ "folder", help="the folder containing the local ISPDB files to compare"
+ )
+
+ args = parser.parse_args()
+
+ # Strip out any trailing slash in the base URL so we don't accidentally end
+ # up doubling it.
+ base_url: str = args.b.strip("/")
+
+ resp = requests.get(f"{base_url}/{GENERATED_FILES_NAME}")
+
+ # At the time of writing, all of the domains in ISPDB are made up of ASCII
+ # characters, but that might not stay true forever.
+ resp.encoding = "utf-8"
+
+ with open(os.path.join(args.folder, GENERATED_FILES_NAME), "r") as fp:
+ local_list = fp.readlines()
+
+ # We call the local version "staging" as a shortcut, because by this
+ # time we expect to have already validated that the
+ deltas = list(
+ difflib.unified_diff(
+ resp.text.splitlines(keepends=True),
+ local_list,
+ fromfile="current",
+ tofile="new",
+ )
+ )
+
+ comment = (
+ GITHUB_COMMENT_TEMPLATE_WITH_DIFF.format(deltas="".join(deltas))
+ if len(deltas) > 0
+ else GITHUB_COMMENT_TEMPLATE_NO_DIFF
+ )
+
+ # Create the comment via the Github API.
+ # See
+ resp = requests.post(
+ f"https://api.github.com/repos/{args.r}/issues/{args.n}/comments",
+ headers={
+ "Accept": "application/vnd.github+json",
+ "Authorization": f"Bearer {args.t}",
+ },
+ json={"body": comment},
+ )
+
+ print(f"Posted comment {resp.json()["html_url"]}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/compare_out_files.py b/tools/compare_out_files.py
new file mode 100644
index 0000000..4118c5e
--- /dev/null
+++ b/tools/compare_out_files.py
@@ -0,0 +1,134 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import difflib
+import json
+import os.path
+import sys
+import requests
+from typing import Dict, List
+
+GENERATED_FILES_NAME = "generated_files.json"
+
+
+def get_and_compare(file_name: str, base_url: str, local_folder: str) -> str:
+ """Reads a local file and compare it with its remote copy before returning
+ its content.
+
+ Returns:
+ The file's content as served by the remote server, decoded as UTF-8
+ text.
+
+ Raises:
+ RuntimeError if the local file's content doesn't match the remote copy.
+ """
+ resp = requests.get(f"{base_url}/{file_name}")
+
+ # The response might not include an content-type header, and there are some
+ # non-ASCII characters in our XML files (e.g. in display names), so we need
+ # to explicitly tell `resp` what its encoding is.
+ resp.encoding = "utf-8"
+
+ with open(os.path.join(local_folder, file_name), "r") as fp:
+ local_list = fp.readlines()
+
+ deltas = list(
+ difflib.unified_diff(
+ local_list,
+ resp.text.splitlines(keepends=True),
+ fromfile="local",
+ tofile="remote",
+ )
+ )
+
+ if len(deltas) > 0:
+ print(f"Diff deltas:\n\n{"".join(deltas)}", file=sys.stderr)
+ raise RuntimeError("local file list does not match staging copy")
+
+ return resp.text
+
+
+def get_file_list(base_url: str, local_folder: str) -> List[str]:
+ """Gets the list of files to compare.
+
+ Returns:
+ The list of file names as per the `generated_files.json` file.
+
+ Raises:
+ RuntimeError if the local `generated_files.json` file does not match the
+ remote copy.
+ """
+ file_list = get_and_compare(GENERATED_FILES_NAME, base_url, local_folder)
+ return json.loads(file_list)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-b", metavar="base_url", help="base URL serving ISPDB files")
+ parser.add_argument(
+ "folder", help="the folder containing the local ISPDB files to compare"
+ )
+
+ args = parser.parse_args()
+
+ # Strip out any trailing slash in the base URL so we don't accidentally end
+ # up doubling it.
+ base_url: str = args.b.strip("/")
+
+ print("Fetching and comparing file list")
+
+ listed_files = get_file_list(base_url, args.folder)
+
+ failed_files: Dict[str, Exception] = {}
+ for file in listed_files:
+ print(f"Fetching and comparing {file}")
+
+ try:
+ get_and_compare(file, base_url, args.folder)
+ except Exception as e:
+ print(f"Comparison failed for file {file}: {e}", file=sys.stderr)
+ failed_files[file] = e
+
+ if len(failed_files) > 0:
+ # Print the failed files, preceded by an empty line to separate them
+ # from the previous logs.
+ print("\nComparing the following file(s) has failed:", file=sys.stderr)
+
+ for file, exc in failed_files.items():
+ print(f"{file}: {exc}", file=sys.stderr)
+
+ # Check if we can find files that exist in the local directory but isn't
+ # listed in `generated_files.json`. We could also do this check in the other
+ # direction (i.e. check if a file in `generated_files.json` is missing from
+ # the local directory), but if a file from the list is missing then trying
+ # to open it earlier will have raised an exception and will already cause
+ # the script to fail.
+ local_files = os.listdir(args.folder)
+
+ # Make sure we don't try to find the JSON list file in itself.
+ local_files.remove(GENERATED_FILES_NAME)
+
+ unknown_files = []
+ for local_file in local_files:
+ if local_file not in listed_files:
+ unknown_files.append(local_file)
+
+ if len(unknown_files) > 0:
+ print("\nUnknown file(s) in local directory:", file=sys.stderr)
+
+ for file in unknown_files:
+ print(file, file=sys.stderr)
+ else:
+ print("No unknown files found")
+
+ # Fail the script if either a comparison has failed or we found an unknown
+ # file. We could fail earlier, but it's more helpful for troubleshooting if
+ # we have the script point out as many issues in one run as possible.
+ if len(failed_files) > 0 or len(unknown_files) > 0:
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/convert.py b/tools/convert.py
index 1f3b5f9..1491608 100644
--- a/tools/convert.py
+++ b/tools/convert.py
@@ -27,15 +27,16 @@
# ***** END LICENSE BLOCK *****
import argparse
-import codecs
import os.path
-import stat
import sys
+from typing import Dict, List
+import json
+
import lxml.etree as ET
def read_config(file, convertTime):
- return (ET.parse(file), max(os.stat(file).st_mtime, convertTime))
+ return (ET.parse(file), max(os.path.getmtime(file), convertTime))
def doc_to_bytestring(doc):
@@ -47,59 +48,102 @@ def print_config(doc):
def write_config(outData, time, filename=None):
- if os.path.exists(filename) and os.stat(filename).st_mtime >= time:
+ if os.path.exists(filename) and os.path.getmtime(filename) >= time:
return
- print("Writing %s" % filename)
- file = codecs.open(filename, "wb")
- file.write(outData)
- file.write(b'\n')
- file.close()
+ print(f"Writing {filename}")
+ with open(filename, "wb") as file:
+ file.write(outData)
+ file.write(b"\n")
-def write_domains(doc, time, output_dir="."):
+def write_domains(doc, time, output_dir=".") -> List[str]:
outData = doc_to_bytestring(doc)
- for d in doc.findall("//domain"):
- write_config(outData, time, output_dir + "/" + d.text)
+
+ domains: List[str] = []
+
+ for d in doc.getroot().findall(".//domain"):
+ write_config(outData, time, os.path.join(output_dir, d.text))
+ domains.append(d.text)
+
+ return domains
def main():
# parse command line options
parser = argparse.ArgumentParser()
- parser.add_argument("-d", metavar="dir",
- help="output directory")
- parser.add_argument("-a", action="store_true",
- help="write configuration files for all domains")
- parser.add_argument("file", nargs="*",
- help="input file(s) to process, wildcards allowed")
- args = parser.parse_args(sys.argv[1:])
+ parser.add_argument("-d", metavar="dir", help="output directory")
+ parser.add_argument(
+ "-a", action="store_true", help="write configuration files for all domains"
+ )
+ parser.add_argument(
+ "file", nargs="*", help="input file(s) to process, wildcards allowed"
+ )
+ args = parser.parse_args()
# process arguments
- convertTime = os.stat(sys.argv[0]).st_mtime
- is_dir = stat.S_ISDIR
+ convertTime = os.path.getmtime(sys.argv[0])
- for f in args.file:
- if is_dir(os.stat(f).st_mode):
- continue
-
- if f == "README":
- continue
+ # Record the files that failed to be processed and the errors related to
+ # them.
+ failed_files: Dict[str, Exception] = {}
- doc, time = read_config(f, convertTime)
+ # Record the names of the files written so we can list them in a file later.
+ out_file_names: List[str] = []
- if args.a:
- if args.d:
- write_domains(doc, time, args.d)
+ for f in args.file:
+ try:
+ if os.path.isdir(f):
+ continue
+
+ if f == "README":
+ continue
+
+ print(f"Processing {f}")
+
+ doc, time = read_config(f, convertTime)
+
+ if args.a:
+ if args.d:
+ domains = write_domains(doc, time, args.d)
+ out_file_names.extend(domains)
+ else:
+ print("When you want to write domain files you")
+ print("should also specify an output directory")
+ print("using -d dir")
+ parser.print_usage()
+ sys.exit(2)
+ elif args.d:
+ outData = doc_to_bytestring(doc)
+ filename = os.path.basename(f)
+ write_config(outData, time, os.path.join(args.d, filename))
+ out_file_names.append(filename)
else:
- print("When you want to write domain files you")
- print("should also specify an output directory")
- print("using -d dir")
- parser.print_usage()
- exit(2)
- elif args.d:
- outData = doc_to_bytestring(doc)
- write_config(outData, time, args.d + "/" + os.path.basename(f))
- else:
- print_config(doc)
+ print_config(doc)
+ except Exception as e:
+ print(f"File {f} could not be processed: {e}")
+ failed_files[f] = e
+
+ if len(failed_files) > 0:
+ # Print the failed files, preceded by an empty line to separate them
+ # from the previous logs.
+ print()
+ print("Processing the following file(s) has failed:")
+
+ for file, exc in failed_files.items():
+ print(f"{file}: {exc}")
+
+ sys.exit(1)
+
+ # Sort the list for idempotency.
+ out_file_names.sort()
+
+ # Write the list of output files in a JSON file. This file will mostly be
+ # used in CI processes. We generate it with indentation so that other CI
+ # scripts can produce a more legible output.
+ out_file_list_path = os.path.join(args.d, "generated_files.json")
+ with open(out_file_list_path, "w") as fp:
+ print(f"Writing list of {len(out_file_names)} files")
+ fp.write(json.dumps(out_file_names, indent=2))
if __name__ == "__main__":
diff --git a/tools/validate.py b/tools/validate.py
new file mode 100644
index 0000000..5a04456
--- /dev/null
+++ b/tools/validate.py
@@ -0,0 +1,52 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This script checks all input files to validate that their content is valid
+# XML. It is meant to run in the CI for PRs against the autoconfig repository.
+
+import argparse
+import os
+import sys
+from typing import List
+
+from lxml import etree
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "file", nargs="*", help="input file(s) to process, wildcards allowed"
+ )
+ args = parser.parse_args()
+
+ # Defining `files` here isn't strictly necessary, but the extra typing
+ # (which we can't really get otherwise) helps with maintenance.
+ files: List[str] = args.file
+
+ # The exit code. Stays 0 unless we encounter a file that doesn't parse.
+ ret = 0
+
+ for f in files:
+ # Filter out directories an non-XML files
+ if os.path.isdir(f):
+ print(f"Ignoring directory {f}")
+ continue
+
+ if not f.endswith(".xml"):
+ print(f"Ignoring non-XML file {f}")
+ continue
+
+ # Try parsing the file. If this did not work, print the error and set
+ # the exit code to 1.
+ try:
+ etree.parse(f)
+ except Exception as e:
+ print(f"File {f} did not parse: {e}")
+ ret = 1
+
+ sys.exit(ret)
+
+
+if __name__ == "__main__":
+ main()