From 93f7af4fe58949dea722af2424a9d6aa9cb81e1d Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 01:09:02 +0700 Subject: [PATCH 1/9] Add LogCountingHandler to Model exit(1) if there is an error in the log Signed-off-by: Arthit Suriyawongkul --- main.py | 6 ++++++ runparams.py | 10 ++++++---- spec_parser/model.py | 17 +++++++++++------ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index 0f2798c..9ff9a82 100644 --- a/main.py +++ b/main.py @@ -11,3 +11,9 @@ m = Model(cfg.input_path) if not cfg.no_output: m.generate(cfg) + + if m.log_handler.num_errors() > 0: + print(f"Model errors: {m.log_handler.num_errors()}") + for msg in m.log_handler.error_records: + print(msg) + exit(1) diff --git a/runparams.py b/runparams.py index 9de72f1..6ceee3a 100644 --- a/runparams.py +++ b/runparams.py @@ -16,11 +16,11 @@ class RunParams(SimpleNamespace): def __init__(self, name): self._ts = datetime.now(timezone.utc) self.log = logging.getLogger(name) - lch = LogCountingHandler() - self.log.addHandler(lch) + self.log_handler = LogCountingHandler() + self.log.addHandler(self.log_handler) opt_force = self.process_args() self.check_requirements() - if logging.ERROR in self.log._cache: + if self.log_handler.num_errors() > 0: sys.exit(1) self.create_output_dirs(opt_force) @@ -156,9 +156,12 @@ class LogCountingHandler(logging.StreamHandler): def __init__(self): super().__init__() self.count = dict.fromkeys((logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL), 0) + self.error_records = [] def emit(self, record): self.count[record.levelno] += 1 + if record.levelno == logging.ERROR: + self.error_records.append(self.format(record)) super().emit(record) def num_critical(self): @@ -169,4 +172,3 @@ def num_errors(self): def num_warnings(self): return self.count[logging.WARNING] - diff --git a/spec_parser/model.py b/spec_parser/model.py index 43436e4..25c36d2 100644 --- a/spec_parser/model.py +++ b/spec_parser/model.py @@ -12,7 +12,7 @@ SpecFile, ) -logger = logging.getLogger(__name__) +from runparams import LogCountingHandler class Model: def __init__(self, inpath=None): @@ -24,16 +24,21 @@ def __init__(self, inpath=None): self.individuals = dict() self.datatypes = dict() + self.log = logging.getLogger(str(inpath)) + self.log_handler = LogCountingHandler() + self.log.addHandler(self.log_handler) + if inpath is not None: self.load(inpath) + def load(self, inpath): p = inpath for d in [d for d in p.iterdir() if d.is_dir() and d.name[0].isupper()]: nsp = p / d.name / f"{d.name}.md" if not nsp.is_file(): - logger.error(f"Missing top-level namespace file {nsp.name}") + self.log.error(f"Missing top-level namespace file {nsp.name}") continue ns = Namespace(nsp) @@ -79,7 +84,7 @@ def load(self, inpath): self.datatypes[k] = n ns.datatypes[k] = n - logger.info( + self.log.info( f"Loaded {len(self.namespaces)} namespaces, {len(self.classes)} classes, " f"{len(self.properties)} properties, {len(self.vocabularies)} vocabularies, " f"{len(self.individuals)} individuals, {len(self.datatypes)} datatypes", @@ -88,7 +93,7 @@ def load(self, inpath): def process_after_load(self): self.types = self.classes | self.vocabularies | self.datatypes - logger.info(f"Total {len(self.types)} types") + self.log.info(f"Total {len(self.types)} types") # add used_in information to properties for c in self.classes.values(): @@ -98,7 +103,7 @@ def process_after_load(self): proptype = self.properties[pname].metadata["Range"] ptype = pkv["type"] if proptype != ptype and (not p.startswith("/") or proptype.rpartition("/")[-1] != ptype.rpartition("/")[-1]): - logger.error(f"In class {c.fqname}, property {p} has type {ptype} but the range of {pname} is {proptype}") + self.log.error(f"In class {c.fqname}, property {p} has type {ptype} but the range of {pname} is {proptype}") self.properties[pname].used_in.append(c.fqname) # add class inheritance stack @@ -154,7 +159,7 @@ def _tsort_recursive(inh, cn, visited, stack): assert c.all_properties[shortname]["fullname"] == f"/{pns}/{shortname}" for k, v in pkv.items(): if c.all_properties[shortname][k] == v: - logger.warning(f"In class {c.fqname} property {p} has same {k} as the parent class") + self.log.warning(f"In class {c.fqname} property {p} has same {k} as the parent class") c.all_properties[shortname][k] = v def generate(self, cfg): From 6690e94398e413dd791311a8ec23506962ba430b Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 02:07:29 +0700 Subject: [PATCH 2/9] Print errors from mdparsing.py Signed-off-by: Arthit Suriyawongkul --- main.py | 38 +++++++++++++++++++++++++++++++++----- spec_parser/mdparsing.py | 4 ++++ spec_parser/model.py | 18 +++++++++--------- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index 9ff9a82..7239ded 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 -from runparams import RunParams +import logging + +from runparams import LogCountingHandler, RunParams from spec_parser import Model if __name__ == "__main__": @@ -12,8 +14,34 @@ if not cfg.no_output: m.generate(cfg) - if m.log_handler.num_errors() > 0: - print(f"Model errors: {m.log_handler.num_errors()}") - for msg in m.log_handler.error_records: - print(msg) + total_errors = 0 + last_idx = 0 + + def report_logger_errors(logger, label, start_idx=1): + handler = next( + (h for h in logger.handlers if isinstance(h, LogCountingHandler)), None + ) # get the LogCountingHandler + num_errors = 0 + idx = start_idx - 1 + if handler and handler.num_errors() > 0: + num_errors = handler.num_errors() + print(f"{label} errors: {num_errors}") + for idx, msg in enumerate(sorted(handler.error_records), start_idx): + logger.error(f"{idx:3}: {msg}") + return num_errors, idx + + errors, last_idx = report_logger_errors( + logging.getLogger("spec_parser.mdparsing"), "Markdown parsing", last_idx + 1 + ) + total_errors += errors + print() + + errors, _ = report_logger_errors( + logging.getLogger("spec_parser.model"), "Model", last_idx + 1 + ) + total_errors += errors + print() + + if total_errors > 0: + print(f"Total errors: {total_errors}") exit(1) diff --git a/spec_parser/mdparsing.py b/spec_parser/mdparsing.py index bfa1927..7dc2049 100644 --- a/spec_parser/mdparsing.py +++ b/spec_parser/mdparsing.py @@ -5,7 +5,11 @@ import logging import re +from runparams import LogCountingHandler + logger = logging.getLogger(__name__) +log_handler = LogCountingHandler() +logger.addHandler(log_handler) class SpecFile: RE_SPLIT_TO_SECTIONS = re.compile(r"\n(?=(?:\Z|# |## ))") diff --git a/spec_parser/model.py b/spec_parser/model.py index 25c36d2..767dd5f 100644 --- a/spec_parser/model.py +++ b/spec_parser/model.py @@ -14,6 +14,10 @@ from runparams import LogCountingHandler +logger = logging.getLogger(__name__) +log_handler = LogCountingHandler() +logger.addHandler(log_handler) + class Model: def __init__(self, inpath=None): self.name = None @@ -24,10 +28,6 @@ def __init__(self, inpath=None): self.individuals = dict() self.datatypes = dict() - self.log = logging.getLogger(str(inpath)) - self.log_handler = LogCountingHandler() - self.log.addHandler(self.log_handler) - if inpath is not None: self.load(inpath) @@ -38,7 +38,7 @@ def load(self, inpath): for d in [d for d in p.iterdir() if d.is_dir() and d.name[0].isupper()]: nsp = p / d.name / f"{d.name}.md" if not nsp.is_file(): - self.log.error(f"Missing top-level namespace file {nsp.name}") + logger.error(f"Missing top-level namespace file {nsp.name}") continue ns = Namespace(nsp) @@ -84,7 +84,7 @@ def load(self, inpath): self.datatypes[k] = n ns.datatypes[k] = n - self.log.info( + logger.info( f"Loaded {len(self.namespaces)} namespaces, {len(self.classes)} classes, " f"{len(self.properties)} properties, {len(self.vocabularies)} vocabularies, " f"{len(self.individuals)} individuals, {len(self.datatypes)} datatypes", @@ -93,7 +93,7 @@ def load(self, inpath): def process_after_load(self): self.types = self.classes | self.vocabularies | self.datatypes - self.log.info(f"Total {len(self.types)} types") + logger.info(f"Total {len(self.types)} types") # add used_in information to properties for c in self.classes.values(): @@ -103,7 +103,7 @@ def process_after_load(self): proptype = self.properties[pname].metadata["Range"] ptype = pkv["type"] if proptype != ptype and (not p.startswith("/") or proptype.rpartition("/")[-1] != ptype.rpartition("/")[-1]): - self.log.error(f"In class {c.fqname}, property {p} has type {ptype} but the range of {pname} is {proptype}") + logger.error(f"In class {c.fqname}, property {p} has type {ptype} but the range of {pname} is {proptype}") self.properties[pname].used_in.append(c.fqname) # add class inheritance stack @@ -159,7 +159,7 @@ def _tsort_recursive(inh, cn, visited, stack): assert c.all_properties[shortname]["fullname"] == f"/{pns}/{shortname}" for k, v in pkv.items(): if c.all_properties[shortname][k] == v: - self.log.warning(f"In class {c.fqname} property {p} has same {k} as the parent class") + logger.warning(f"In class {c.fqname} property {p} has same {k} as the parent class") c.all_properties[shortname][k] = v def generate(self, cfg): From 760d3e8353a73897ef15e6db670afb0cb46dd72e Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 02:10:16 +0700 Subject: [PATCH 3/9] Revert changes for RunParams log handler Signed-off-by: Arthit Suriyawongkul --- runparams.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runparams.py b/runparams.py index 6ceee3a..4f5c4eb 100644 --- a/runparams.py +++ b/runparams.py @@ -16,11 +16,11 @@ class RunParams(SimpleNamespace): def __init__(self, name): self._ts = datetime.now(timezone.utc) self.log = logging.getLogger(name) - self.log_handler = LogCountingHandler() - self.log.addHandler(self.log_handler) + lch = LogCountingHandler() + self.log.addHandler(lch) opt_force = self.process_args() self.check_requirements() - if self.log_handler.num_errors() > 0: + if logging.ERROR in self.log._cache: sys.exit(1) self.create_output_dirs(opt_force) From 7181d246653f8139316e042fef1378ef11ffdf34 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 02:11:35 +0700 Subject: [PATCH 4/9] Remove a blank line Signed-off-by: Arthit Suriyawongkul --- spec_parser/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/spec_parser/model.py b/spec_parser/model.py index 767dd5f..ea54a17 100644 --- a/spec_parser/model.py +++ b/spec_parser/model.py @@ -31,7 +31,6 @@ def __init__(self, inpath=None): if inpath is not None: self.load(inpath) - def load(self, inpath): p = inpath From 149235ea8b0e6a9ffc5a4533ada43d79bbe7dd09 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 02:14:30 +0700 Subject: [PATCH 5/9] Print a dividing blank line Signed-off-by: Arthit Suriyawongkul --- main.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 7239ded..a732075 100644 --- a/main.py +++ b/main.py @@ -25,7 +25,7 @@ def report_logger_errors(logger, label, start_idx=1): idx = start_idx - 1 if handler and handler.num_errors() > 0: num_errors = handler.num_errors() - print(f"{label} errors: {num_errors}") + print(f"\n{label} errors: {num_errors}") for idx, msg in enumerate(sorted(handler.error_records), start_idx): logger.error(f"{idx:3}: {msg}") return num_errors, idx @@ -34,14 +34,12 @@ def report_logger_errors(logger, label, start_idx=1): logging.getLogger("spec_parser.mdparsing"), "Markdown parsing", last_idx + 1 ) total_errors += errors - print() errors, _ = report_logger_errors( logging.getLogger("spec_parser.model"), "Model", last_idx + 1 ) total_errors += errors - print() if total_errors > 0: - print(f"Total errors: {total_errors}") + print(f"\nTotal errors: {total_errors}") exit(1) From 341d5085318676043d0f80582977dd447df23ee0 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 05:13:48 +0700 Subject: [PATCH 6/9] Print from RDF logger Signed-off-by: Arthit Suriyawongkul --- main.py | 34 +++++++++++++--------------------- spec_parser/rdf.py | 4 ++++ 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index a732075..9454cd1 100644 --- a/main.py +++ b/main.py @@ -14,31 +14,23 @@ if not cfg.no_output: m.generate(cfg) + loggers = [ + ("spec_parser.mdparsing", "Markdown parsing"), + ("spec_parser.model", "Model"), + ("spec_parser.rdf", "RDF generation"), + ] # Ordered by processing sequence + total_errors = 0 - last_idx = 0 - - def report_logger_errors(logger, label, start_idx=1): - handler = next( - (h for h in logger.handlers if isinstance(h, LogCountingHandler)), None - ) # get the LogCountingHandler - num_errors = 0 - idx = start_idx - 1 + + for logger_name, label in loggers: + logger = logging.getLogger(logger_name) + handler = next((h for h in logger.handlers if isinstance(h, LogCountingHandler)), None) if handler and handler.num_errors() > 0: num_errors = handler.num_errors() print(f"\n{label} errors: {num_errors}") - for idx, msg in enumerate(sorted(handler.error_records), start_idx): - logger.error(f"{idx:3}: {msg}") - return num_errors, idx - - errors, last_idx = report_logger_errors( - logging.getLogger("spec_parser.mdparsing"), "Markdown parsing", last_idx + 1 - ) - total_errors += errors - - errors, _ = report_logger_errors( - logging.getLogger("spec_parser.model"), "Model", last_idx + 1 - ) - total_errors += errors + for idx, msg in enumerate(sorted(handler.error_records), total_errors + 1): + print(f"{idx:3}: {msg}") + total_errors += num_errors if total_errors > 0: print(f"\nTotal errors: {total_errors}") diff --git a/spec_parser/rdf.py b/spec_parser/rdf.py index 6bbf1ff..5bc7b34 100644 --- a/spec_parser/rdf.py +++ b/spec_parser/rdf.py @@ -16,9 +16,13 @@ from rdflib.namespace import DCTERMS, OWL, RDF, RDFS, SH, SKOS, XSD from rdflib.tools.rdf2dot import rdf2dot +from runparams import LogCountingHandler + URI_BASE = "https://spdx.org/rdf/3.0.1/terms/" logger = logging.getLogger(__name__) +log_handler = LogCountingHandler() +logger.addHandler(log_handler) def gen_rdf(model, outpath, cfg): p = outpath From a646c3f88d05fb3ec90711f2f23c0aa04702da76 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 11 Jun 2025 20:11:34 +0700 Subject: [PATCH 7/9] Update comment Signed-off-by: Arthit Suriyawongkul --- main.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 9454cd1..aaf7daa 100644 --- a/main.py +++ b/main.py @@ -14,17 +14,21 @@ if not cfg.no_output: m.generate(cfg) + # Logger names and their labels for error reporting, + # ordered by processing sequence. loggers = [ ("spec_parser.mdparsing", "Markdown parsing"), ("spec_parser.model", "Model"), ("spec_parser.rdf", "RDF generation"), - ] # Ordered by processing sequence + ] total_errors = 0 for logger_name, label in loggers: logger = logging.getLogger(logger_name) - handler = next((h for h in logger.handlers if isinstance(h, LogCountingHandler)), None) + handler = next( + (h for h in logger.handlers if isinstance(h, LogCountingHandler)), None + ) # Get the first LogCountingHandler if handler and handler.num_errors() > 0: num_errors = handler.num_errors() print(f"\n{label} errors: {num_errors}") From 9fba086050d942525dff9bfbde88ed64c5c92693 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Thu, 12 Jun 2025 14:41:49 +0700 Subject: [PATCH 8/9] Keep records and emit at the end only Signed-off-by: Arthit Suriyawongkul --- main.py | 9 +++++---- runparams.py | 16 +++++++++++----- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index aaf7daa..8230d06 100644 --- a/main.py +++ b/main.py @@ -30,11 +30,12 @@ (h for h in logger.handlers if isinstance(h, LogCountingHandler)), None ) # Get the first LogCountingHandler if handler and handler.num_errors() > 0: - num_errors = handler.num_errors() - print(f"\n{label} errors: {num_errors}") - for idx, msg in enumerate(sorted(handler.error_records), total_errors + 1): + print(f"\n{label} errors: {handler.num_errors()}") + for idx, msg in enumerate( + sorted(handler.record[logging.ERROR]), total_errors + 1 + ): print(f"{idx:3}: {msg}") - total_errors += num_errors + total_errors += handler.num_errors() if total_errors > 0: print(f"\nTotal errors: {total_errors}") diff --git a/runparams.py b/runparams.py index 4f5c4eb..cc6bbf9 100644 --- a/runparams.py +++ b/runparams.py @@ -155,14 +155,20 @@ def create_output_dirs(self, force): class LogCountingHandler(logging.StreamHandler): def __init__(self): super().__init__() - self.count = dict.fromkeys((logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL), 0) - self.error_records = [] + __log_levels = ( + logging.DEBUG, + logging.INFO, + logging.WARNING, + logging.ERROR, + logging.CRITICAL, + ) + self.count = dict.fromkeys(__log_levels, 0) + self.record = {level: [] for level in __log_levels} def emit(self, record): self.count[record.levelno] += 1 - if record.levelno == logging.ERROR: - self.error_records.append(self.format(record)) - super().emit(record) + self.record[record.levelno].append(self.format(record)) + # super().emit(record) def num_critical(self): return self.count[logging.CRITICAL] From 49545e46ffc7d059d308fb75522d40ec8700af09 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Thu, 12 Jun 2025 14:49:59 +0700 Subject: [PATCH 9/9] Emit record Signed-off-by: Arthit Suriyawongkul --- runparams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runparams.py b/runparams.py index cc6bbf9..34ad958 100644 --- a/runparams.py +++ b/runparams.py @@ -168,7 +168,7 @@ def __init__(self): def emit(self, record): self.count[record.levelno] += 1 self.record[record.levelno].append(self.format(record)) - # super().emit(record) + super().emit(record) def num_critical(self): return self.count[logging.CRITICAL]