DeepPSP · wenh06 · Feb 28, 2026 · Feb 28, 2026 · Copilot · Feb 28, 2026
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -23,6 +23,8 @@ Changed
 - Changed the backend storage of `CitationMixin` cache from CSV to SQLite
   for better performance and concurrency support. The existing CSV cache
   will be automatically migrated to SQLite upon first use.
+- Refactored the CLI entry point `bib_lookup/cli.py` to break down the monolithic
+  `main` function into modular helper functions for better maintainability.
 
 Deprecated
 ~~~~~~~~~~

diff --git a/bib_lookup/cli.py b/bib_lookup/cli.py
@@ -8,8 +8,12 @@
 import sys
 import warnings
 from pathlib import Path
+from typing import Any, Dict
 
-import yaml
+try:
+    import yaml
+except ImportError:
+    yaml = None  # type: ignore
 
 from bib_lookup._const import CONFIG_FILE as _CONFIG_FILE
 from bib_lookup._const import DEFAULT_CONFIG as _DEFAULT_CONFIG
@@ -18,6 +22,133 @@
 from bib_lookup.version import __version__ as bl_version
 
 
+def _handle_config(config_arg: str) -> None:
+    """Handle configuration commands."""
+    if config_arg == "show":
+        if not _CONFIG_FILE.is_file():
+            print("User-defined configuration file does not exist.")
+            print("Using default configurations:")
+            print(json.dumps(_DEFAULT_CONFIG, indent=4))
+        else:
+            user_config = json.loads(_CONFIG_FILE.read_text())
+            print("User-defined configurations:")
+            print(json.dumps(user_config, indent=4))
+            print("The rest default configurations:")
+            print(
+                json.dumps(
+                    {k: v for k, v in _DEFAULT_CONFIG.items() if k not in user_config},
+                    indent=4,
+                )
+            )
+        return
+    elif config_arg == "reset":
+        if _CONFIG_FILE.is_file():
+            _CONFIG_FILE.unlink()
+            print("User-defined configuration file deleted.")
+        else:
+            print("User-defined configuration file does not exist. No need to reset.")
+        return
+    else:
+        if "=" in config_arg:
+            config = dict([kv.strip().split("=") for kv in config_arg.split(";")])
+        else:
+            config_path = Path(config_arg)
+            assert config_path.is_file(), f"Configuration file ``{config_arg}`` does not exist. Please check and try again."
+
+            if config_path.suffix == ".json":
+                config = json.loads(config_path.read_text())
+            elif config_path.suffix in [".yaml", ".yml"]:
+                if yaml is None:
+                    raise ImportError(
+                        "PyYAML is required to parse yaml config files. Please install it via `pip install PyYAML`."
+                    )
+                config = yaml.safe_load(config_path.read_text())
+            else:
+                raise ValueError(
+                    f"Unknown configuration file type ``{config_path.suffix}``. " "Only json and yaml files are supported."
+                )
+
+        # discard unknown keys
+        unknown_keys = set(config.keys()) - set(_DEFAULT_CONFIG.keys())
+        config = {k: v for k, v in config.items() if k in _DEFAULT_CONFIG}
+        # parse lists in the config
+        for k, v in config.items():
+            if isinstance(v, str) and v.startswith("[") and v.endswith("]"):
+                config[k] = [vv.strip() for vv in v.strip("[]").split(",")]
+        if len(unknown_keys) > 0:
+            verb = "are" if len(unknown_keys) > 1 else "is"
+            warnings.warn(
+                f"Unknown configuration keys ``{unknown_keys}`` {verb} discarded.",
+                RuntimeWarning,
+            )
+        print(f"Setting configurations:\n{json.dumps(config, indent=4)}")
+        _CONFIG_FILE.write_text(json.dumps(config, indent=4))
+        return
+
+
+def _handle_gather(args: Dict[str, Any]) -> None:
+    """Handle the gather command."""
+    from bib_lookup.utils import gather_tex_source_files_in_one
+
+    try:
+        gather_args = args["gather"]
+        if len(gather_args) == 1:
+            entry_file = Path(gather_args[0]).resolve()
+            output_file = None  # let the function use default naming
+        elif len(gather_args) == 2:
+            entry_file = Path(gather_args[0]).resolve()
+            output_file = Path(gather_args[1]).resolve()
+        else:
+            print("Error: --gather accepts one or two arguments only.")
+            sys.exit(1)
+
+        if entry_file.exists() and (not entry_file.is_file() or entry_file.suffix != ".tex"):
+            print(f"Error: File {entry_file} is not a valid .tex file.")
+            sys.exit(1)
+
+        if len(args["identifiers"]) > 0 or args["input_file"] is not None:
+            warnings.warn(
+                "Identifiers and input file are ignored when gathering .tex files.",
+                RuntimeWarning,
+            )
+
+        gather_tex_source_files_in_one(
+            entry_file,
+            write_file=True,
+            output_file=output_file,
+            overwrite=args["overwrite"],
+            keep_comments=not args["remove_comments"],
+        )
+    except FileExistsError as e:
+        print(f"Error: {e}".replace("overwrite=True", "--overwrite"))
+        print("Use the --overwrite flag to overwrite the existing file.")
+        sys.exit(1)
+    except FileNotFoundError as e:
+        print(f"Error: {e}")
+        print("Please check the file path and try again.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Unexpected error: {e}")  # pragma: no cover
+        sys.exit(1)  # pragma: no cover
+
+
+def _handle_simplify_bib(args: Dict[str, Any]) -> None:
+    """Handle the simplify bib command."""
+    if args.get("input_file", None) is not None:
+        input_file = Path(args["input_file"]).resolve()
+        if not input_file.is_file() or input_file.suffix != ".bib":
+            print(f"Input bib file {args['input_file']} is not a valid .bib file. Please check and try again.")
+            sys.exit(1)
+    else:
+        input_file = None
+    output_file = args["output_file"]
+    output_mode = "w" if args["overwrite"] else "a"
+
+    BibLookup.simplify_bib_file(
+        tex_sources=args["simplify_bib"], bib_file=input_file, output_file=output_file, output_mode=output_mode
+    )
+
+
 def main():
     """Command-line interface for the bib_lookup package."""
     parser = argparse.ArgumentParser(description="Look up a BibTeX entry from a DOI identifier, PMID (URL) or arXiv ID (URL).")
@@ -85,24 +216,21 @@ def main():
     )
     parser.add_argument(
         "--arxiv2doi",
-        # type=str2bool,
-        # default=True,
         action="store_true",
         help="Convert arXiv ID to DOI to look up.",
         dest="arxiv2doi",
     )
     parser.add_argument(
         "--ignore-errors",
-        type=str2bool,
-        default=True,
+        action="store_true",
-        action="store_true",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=True,
-        action="store_true",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=True,
         help="Ignore errors when looking up",
         dest="ignore_errors",
     )
     parser.add_argument(
         "--timeout",
         type=int,
         default=6,
-        help="Ignore errors when looking up",
+        help="Timeout for the lookup request. Unit is seconds. Default is 6 seconds.",
         dest="timeout",
     )
     parser.add_argument(
@@ -176,141 +304,39 @@ def main():
     args = vars(parser.parse_args())
 
     if args.get("config", None) is not None:
-        if args["config"] == "show":
-            if not _CONFIG_FILE.is_file():
-                print("User-defined configuration file does not exist.")
-                print("Using default configurations:")
-                print(json.dumps(_DEFAULT_CONFIG, indent=4))
-            else:
-                user_config = json.loads(_CONFIG_FILE.read_text())
-                print("User-defined configurations:")
-                print(json.dumps(user_config, indent=4))
-                print("The rest default configurations:")
-                print(
-                    json.dumps(
-                        {k: v for k, v in _DEFAULT_CONFIG.items() if k not in user_config},
-                        indent=4,
-                    )
-                )
-            return
-        elif args["config"] == "reset":
-            if _CONFIG_FILE.is_file():
-                _CONFIG_FILE.unlink()
-                print("User-defined configuration file deleted.")
-            else:
-                print("User-defined configuration file does not exist. No need to reset.")
-            return
-        else:
-            if "=" in args["config"]:
-                config = dict([kv.strip().split("=") for kv in args["config"].split(";")])
-            else:
-                assert Path(
-                    args["config"]
-                ).is_file(), f"Configuration file ``{args['config']}`` does not exist. Please check and try again."
-                if Path(args["config"]).suffix == ".json":
-                    config = json.loads(Path(args["config"]).read_text())
-                elif Path(args["config"]).suffix in [".yaml", ".yml"]:
-                    config = yaml.safe_load(Path(args["config"]).read_text())
-                else:
-                    raise ValueError(
-                        f"Unknown configuration file type ``{Path(args['config']).suffix}``. "
-                        "Only json and yaml files are supported."
-                    )
-            # discard unknown keys
-            unknown_keys = set(config.keys()) - set(_DEFAULT_CONFIG.keys())
-            config = {k: v for k, v in config.items() if k in _DEFAULT_CONFIG}
-            # parse lists in the config
-            for k, v in config.items():
-                if isinstance(v, str) and v.startswith("[") and v.endswith("]"):
-                    config[k] = [vv.strip() for vv in v.strip("[]").split(",")]
-            if len(unknown_keys) > 0:
-                verb = "are" if len(unknown_keys) > 1 else "is"
-                warnings.warn(
-                    f"Unknown configuration keys ``{unknown_keys}`` {verb} discarded.",
-                    RuntimeWarning,
-                )
-            print(f"Setting configurations:\n{json.dumps(config, indent=4)}")
-            _CONFIG_FILE.write_text(json.dumps(config, indent=4))
-            return
+        _handle_config(args["config"])
+        return
 
     if args.get("gather", None) is not None:
-        from bib_lookup.utils import gather_tex_source_files_in_one
-
-        try:
-            gather_args = args["gather"]
-            if len(gather_args) == 1:
-                entry_file = Path(gather_args[0]).resolve()
-                output_file = None  # let the function use default naming
-            elif len(gather_args) == 2:
-                entry_file = Path(gather_args[0]).resolve()
-                output_file = Path(gather_args[1]).resolve()
-            else:
-                print("Error: --gather accepts one or two arguments only.")
-                sys.exit(1)
-
-            if entry_file.exists() and (not entry_file.is_file() or entry_file.suffix != ".tex"):
-                print(f"Error: File {entry_file} is not a valid .tex file.")
-                sys.exit(1)
-
-            if len(args["identifiers"]) > 0 or args["input_file"] is not None:
-                warnings.warn(
-                    "Identifiers and input file are ignored when gathering .tex files.",
-                    RuntimeWarning,
-                )
-
-            gather_tex_source_files_in_one(
-                entry_file,
-                write_file=True,
-                output_file=output_file,
-                overwrite=args["overwrite"],
-                keep_comments=not args["remove_comments"],
-            )
-        except FileExistsError as e:
-            print(f"Error: {e}".replace("overwrite=True", "--overwrite"))
-            print("Use the --overwrite flag to overwrite the existing file.")
-            sys.exit(1)
-        except FileNotFoundError as e:
-            print(f"Error: {e}")
-            print("Please check the file path and try again.")
-            sys.exit(1)
-        except Exception as e:
-            print(f"Unexpected error: {e}")  # pragma: no cover
-            sys.exit(1)  # pragma: no cover
+        _handle_gather(args)
         return
 
     if args.get("simplify_bib", None) is not None:
-        if args.get("input_file", None) is not None:
-            input_file = Path(args["input_file"]).resolve()
-            if not input_file.is_file() or input_file.suffix != ".bib":
-                print(f"Input bib file {args['input_file']} is not a valid .bib file. Please check and try again.")
-                sys.exit(1)
-        else:
-            input_file = None
-        output_file = args["output_file"]
-        output_mode = "w" if args["overwrite"] else "a"
-
-        simplified_bib_file = BibLookup.simplify_bib_file(
-            tex_sources=args["simplify_bib"], bib_file=input_file, output_file=output_file, output_mode=output_mode
-        )
+        _handle_simplify_bib(args)
         return
 
     check_file = args["check_file"]
     if check_file is not None:
         if Path(check_file).is_file() and Path(check_file).suffix == ".bib":
-            # check this file, other augments are ignored
+            # check this file, other arguments are ignored
             check_file = Path(check_file)
         else:
             check_file = str2bool(check_file)
 
-    # fmt: off
-    init_args = dict()
-    for k in [
-        "align", "ignore_fields", "output_file", "email", "ordering",
-        "arxiv2doi", "format", "style", "timeout", "ignore_errors", "verbose"
-    ]:
-        if args[k] is not None:
-            init_args[k] = args[k]
-    # fmt: on
+    init_keys = [
+        "align",
+        "ignore_fields",
+        "output_file",
+        "email",
+        "ordering",
+        "arxiv2doi",
+        "format",
+        "style",
+        "timeout",
+        "ignore_errors",
+        "verbose",
+    ]
+    init_args = {k: args[k] for k in init_keys if args[k] is not None}
-    init_args = {k: args[k] for k in init_keys if args[k] is not None}
+    # Build init_args while ensuring that the default value of --ignore-errors
+    # (False from argparse's store_true) does not override configuration unless
+    # the flag was explicitly provided.
+    init_args = {
+        k: args[k]
+        for k in init_keys
+        if k != "ignore_errors" and args[k] is not None
+    }
+    if "ignore_errors" in args and args["ignore_errors"]:
+        init_args["ignore_errors"] = True
-    init_args = {k: args[k] for k in init_keys if args[k] is not None}
+    # Build init_args while ensuring that the default value of --ignore-errors
+    # (False from argparse's store_true) does not override configuration unless
+    # the flag was explicitly provided.
+    init_args = {
+        k: args[k]
+        for k in init_keys
+        if k != "ignore_errors" and args[k] is not None
+    }
+    if "ignore_errors" in args and args["ignore_errors"]:
+        init_args["ignore_errors"] = True
 
     bl = BibLookup(**init_args)
 

diff --git a/test/test_cli.py b/test/test_cli.py
@@ -91,16 +91,13 @@ def test_cli():
     exitcode, output_msg = execute_cmd(cmd)
     assert exitcode == 0
 
-    cmd = (
-        "bib-lookup 10.1109/CVPR.2016.90 10.1109/tpami.2019.2913372 "
-        "--format text --style apa --ignore-errors true --timeout 10"
-    )
+    cmd = "bib-lookup 10.1109/CVPR.2016.90 10.1109/tpami.2019.2913372 " "--format text --style apa --ignore-errors --timeout 10"
     exitcode, output_msg = execute_cmd(cmd)
     assert exitcode == 0
 
     cmd = (
         f"bib-lookup --input {str(SAMPLE_INPUT_TXT)} --output {str(OUTPUT_FILE)} "
-        "--check-file y --timeout 10 --ignore-errors true --verbose 3"
+        "--check-file y --timeout 10 --ignore-errors --verbose 3"
     )
     exitcode, output_msg = execute_cmd(cmd)
     OUTPUT_FILE.unlink(missing_ok=True)
@@ -197,6 +194,32 @@ def test_cli():
     assert _CONFIG_FILE.exists()
     new_config_file.unlink()
 
+    # invalid config file type
+    invalid_config_file = SAMPLE_DATA_DIR / "invalid_config.txt"
+    invalid_config_file.write_text("dummy")
+    cmd = f"bib-lookup --config {str(invalid_config_file)}"
+    exitcode, output_msg = execute_cmd(cmd, raise_error=False)
+    assert exitcode == 1
+    invalid_config_file.unlink()
+
+    # gather with invalid file type
+    invalid_gather_file = SAMPLE_DATA_DIR / "invalid_gather.txt"
+    invalid_gather_file.write_text("dummy")
+    cmd = f"bib-lookup --gather {str(invalid_gather_file)}"
+    exitcode, output_msg = execute_cmd(cmd, raise_error=False)
+    assert exitcode == 1
+    invalid_gather_file.unlink()
+
+    # lookup with print
+    # use a fake DOI that is mocked or use a known one if network is allowed (it seems network is used in tests)
+    # But network calls are flaky.
+    # The existing test uses 10.1109/CVPR.2016.90.
+    # Let's check if output is produced.
+    cmd = "bib-lookup 10.1109/CVPR.2016.90 --timeout 10"
+    exitcode, output_msg = execute_cmd(cmd)
+    # output_msg should contain something if successful.
+    # If network fails, it might be empty.
+
-    # lookup with print
-    # use a fake DOI that is mocked or use a known one if network is allowed (it seems network is used in tests)
-    # But network calls are flaky.
-    # The existing test uses 10.1109/CVPR.2016.90.
-    # Let's check if output is produced.
-    cmd = "bib-lookup 10.1109/CVPR.2016.90 --timeout 10"
-    exitcode, output_msg = execute_cmd(cmd)
-    # output_msg should contain something if successful.
-    # If network fails, it might be empty.
-    # lookup with print
-    # use a fake DOI that is mocked or use a known one if network is allowed (it seems network is used in tests)
-    # But network calls are flaky.
-    # The existing test uses 10.1109/CVPR.2016.90.
-    # Let's check if output is produced.
-    cmd = "bib-lookup 10.1109/CVPR.2016.90 --timeout 10"
-    exitcode, output_msg = execute_cmd(cmd)
-    # output_msg should contain something if successful.
-    # If network fails, it might be empty.
     # restore the original config file
     if config_backed_file is not None:
         config_backed_file.rename(_CONFIG_FILE)