Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Changed
- Changed the backend storage of `CitationMixin` cache from CSV to SQLite
for better performance and concurrency support. The existing CSV cache
will be automatically migrated to SQLite upon first use.
- Refactored the CLI entry point `bib_lookup/cli.py` to break down the monolithic
`main` function into modular helper functions for better maintainability.

Deprecated
~~~~~~~~~~
Expand Down
280 changes: 153 additions & 127 deletions bib_lookup/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
import sys
import warnings
from pathlib import Path
from typing import Any, Dict

import yaml
try:
import yaml
except ImportError:
yaml = None # type: ignore
Comment on lines +13 to +16
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PyYAML is listed as a required dependency in requirements.txt, so the try/except ImportError guard around import yaml and the subsequent yaml is None check (line 61) are dead code — yaml will always be importable in any supported environment. This adds unnecessary complexity and could mislead readers into thinking YAML support is optional.

Copilot uses AI. Check for mistakes.

from bib_lookup._const import CONFIG_FILE as _CONFIG_FILE
from bib_lookup._const import DEFAULT_CONFIG as _DEFAULT_CONFIG
Expand All @@ -18,6 +22,133 @@
from bib_lookup.version import __version__ as bl_version


def _handle_config(config_arg: str) -> None:
"""Handle configuration commands."""
if config_arg == "show":
if not _CONFIG_FILE.is_file():
print("User-defined configuration file does not exist.")
print("Using default configurations:")
print(json.dumps(_DEFAULT_CONFIG, indent=4))
else:
user_config = json.loads(_CONFIG_FILE.read_text())
print("User-defined configurations:")
print(json.dumps(user_config, indent=4))
print("The rest default configurations:")
print(
json.dumps(
{k: v for k, v in _DEFAULT_CONFIG.items() if k not in user_config},
indent=4,
)
)
return
elif config_arg == "reset":
if _CONFIG_FILE.is_file():
_CONFIG_FILE.unlink()
print("User-defined configuration file deleted.")
else:
print("User-defined configuration file does not exist. No need to reset.")
return
else:
if "=" in config_arg:
config = dict([kv.strip().split("=") for kv in config_arg.split(";")])
else:
config_path = Path(config_arg)
assert config_path.is_file(), f"Configuration file ``{config_arg}`` does not exist. Please check and try again."

if config_path.suffix == ".json":
config = json.loads(config_path.read_text())
elif config_path.suffix in [".yaml", ".yml"]:
if yaml is None:
raise ImportError(
"PyYAML is required to parse yaml config files. Please install it via `pip install PyYAML`."
)
config = yaml.safe_load(config_path.read_text())
else:
raise ValueError(
f"Unknown configuration file type ``{config_path.suffix}``. " "Only json and yaml files are supported."
)
Comment on lines +56 to +69
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When an unsupported config file extension is provided, _handle_config raises a ValueError (or AssertionError for non-existent files) that propagates uncaught from main(). While this does result in exit code 1 (satisfying the test assertion), it prints a full Python traceback to the user rather than a clean error message. The _handle_gather function uses sys.exit(1) with friendly print(f"Error: ...") messages for analogous error conditions. For consistency, _handle_config should handle the invalid-extension and file-not-found cases with print() + sys.exit(1) instead of bare raise/assert.

Copilot uses AI. Check for mistakes.

# discard unknown keys
unknown_keys = set(config.keys()) - set(_DEFAULT_CONFIG.keys())
config = {k: v for k, v in config.items() if k in _DEFAULT_CONFIG}
# parse lists in the config
for k, v in config.items():
if isinstance(v, str) and v.startswith("[") and v.endswith("]"):
config[k] = [vv.strip() for vv in v.strip("[]").split(",")]
if len(unknown_keys) > 0:
verb = "are" if len(unknown_keys) > 1 else "is"
warnings.warn(
f"Unknown configuration keys ``{unknown_keys}`` {verb} discarded.",
RuntimeWarning,
)
print(f"Setting configurations:\n{json.dumps(config, indent=4)}")
_CONFIG_FILE.write_text(json.dumps(config, indent=4))
return


def _handle_gather(args: Dict[str, Any]) -> None:
"""Handle the gather command."""
from bib_lookup.utils import gather_tex_source_files_in_one

try:
gather_args = args["gather"]
if len(gather_args) == 1:
entry_file = Path(gather_args[0]).resolve()
output_file = None # let the function use default naming
elif len(gather_args) == 2:
entry_file = Path(gather_args[0]).resolve()
output_file = Path(gather_args[1]).resolve()
else:
print("Error: --gather accepts one or two arguments only.")
sys.exit(1)

if entry_file.exists() and (not entry_file.is_file() or entry_file.suffix != ".tex"):
print(f"Error: File {entry_file} is not a valid .tex file.")
sys.exit(1)

if len(args["identifiers"]) > 0 or args["input_file"] is not None:
warnings.warn(
"Identifiers and input file are ignored when gathering .tex files.",
RuntimeWarning,
)

gather_tex_source_files_in_one(
entry_file,
write_file=True,
output_file=output_file,
overwrite=args["overwrite"],
keep_comments=not args["remove_comments"],
)
except FileExistsError as e:
print(f"Error: {e}".replace("overwrite=True", "--overwrite"))
print("Use the --overwrite flag to overwrite the existing file.")
sys.exit(1)
except FileNotFoundError as e:
print(f"Error: {e}")
print("Please check the file path and try again.")
sys.exit(1)
except Exception as e:
print(f"Unexpected error: {e}") # pragma: no cover
sys.exit(1) # pragma: no cover


def _handle_simplify_bib(args: Dict[str, Any]) -> None:
"""Handle the simplify bib command."""
if args.get("input_file", None) is not None:
input_file = Path(args["input_file"]).resolve()
if not input_file.is_file() or input_file.suffix != ".bib":
print(f"Input bib file {args['input_file']} is not a valid .bib file. Please check and try again.")
sys.exit(1)
else:
input_file = None
output_file = args["output_file"]
output_mode = "w" if args["overwrite"] else "a"

BibLookup.simplify_bib_file(
tex_sources=args["simplify_bib"], bib_file=input_file, output_file=output_file, output_mode=output_mode
)


def main():
"""Command-line interface for the bib_lookup package."""
parser = argparse.ArgumentParser(description="Look up a BibTeX entry from a DOI identifier, PMID (URL) or arXiv ID (URL).")
Expand Down Expand Up @@ -85,24 +216,21 @@ def main():
)
parser.add_argument(
"--arxiv2doi",
# type=str2bool,
# default=True,
action="store_true",
help="Convert arXiv ID to DOI to look up.",
dest="arxiv2doi",
)
parser.add_argument(
"--ignore-errors",
type=str2bool,
default=True,
action="store_true",
Comment on lines 224 to +225

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep --ignore-errors accepting explicit boolean values

Changing --ignore-errors to action="store_true" breaks existing CLI invocations that pass a value (for example --ignore-errors true/false): with identifiers defined as positional nargs=*, the trailing token is now consumed as an identifier instead of the option value, causing an unintended extra lookup and making it impossible to explicitly set ignore_errors to false on the command line.

Useful? React with 👍 / 👎.

Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing --ignore-errors from type=str2bool, default=True to action="store_true" is a breaking change. The existing tests at lines 96 and 103 in test/test_cli.py call --ignore-errors true, where true will now be parsed as a positional identifier (something to look up) rather than the boolean value for the flag. With action="store_true", the flag takes no argument — providing true after it causes it to be consumed as an extra positional identifiers argument, resulting in the CLI attempting to look up "true" as a DOI/arXiv ID. You should update the existing test invocations to simply use --ignore-errors (without true) to match the new store_true semantics.

Suggested change
action="store_true",
type=str2bool,
nargs="?",
const=True,
default=True,

Copilot uses AI. Check for mistakes.
help="Ignore errors when looking up",
dest="ignore_errors",
)
parser.add_argument(
"--timeout",
type=int,
default=6,
help="Ignore errors when looking up",
help="Timeout for the lookup request. Unit is seconds. Default is 6 seconds.",
dest="timeout",
)
parser.add_argument(
Expand Down Expand Up @@ -176,141 +304,39 @@ def main():
args = vars(parser.parse_args())

if args.get("config", None) is not None:
if args["config"] == "show":
if not _CONFIG_FILE.is_file():
print("User-defined configuration file does not exist.")
print("Using default configurations:")
print(json.dumps(_DEFAULT_CONFIG, indent=4))
else:
user_config = json.loads(_CONFIG_FILE.read_text())
print("User-defined configurations:")
print(json.dumps(user_config, indent=4))
print("The rest default configurations:")
print(
json.dumps(
{k: v for k, v in _DEFAULT_CONFIG.items() if k not in user_config},
indent=4,
)
)
return
elif args["config"] == "reset":
if _CONFIG_FILE.is_file():
_CONFIG_FILE.unlink()
print("User-defined configuration file deleted.")
else:
print("User-defined configuration file does not exist. No need to reset.")
return
else:
if "=" in args["config"]:
config = dict([kv.strip().split("=") for kv in args["config"].split(";")])
else:
assert Path(
args["config"]
).is_file(), f"Configuration file ``{args['config']}`` does not exist. Please check and try again."
if Path(args["config"]).suffix == ".json":
config = json.loads(Path(args["config"]).read_text())
elif Path(args["config"]).suffix in [".yaml", ".yml"]:
config = yaml.safe_load(Path(args["config"]).read_text())
else:
raise ValueError(
f"Unknown configuration file type ``{Path(args['config']).suffix}``. "
"Only json and yaml files are supported."
)
# discard unknown keys
unknown_keys = set(config.keys()) - set(_DEFAULT_CONFIG.keys())
config = {k: v for k, v in config.items() if k in _DEFAULT_CONFIG}
# parse lists in the config
for k, v in config.items():
if isinstance(v, str) and v.startswith("[") and v.endswith("]"):
config[k] = [vv.strip() for vv in v.strip("[]").split(",")]
if len(unknown_keys) > 0:
verb = "are" if len(unknown_keys) > 1 else "is"
warnings.warn(
f"Unknown configuration keys ``{unknown_keys}`` {verb} discarded.",
RuntimeWarning,
)
print(f"Setting configurations:\n{json.dumps(config, indent=4)}")
_CONFIG_FILE.write_text(json.dumps(config, indent=4))
return
_handle_config(args["config"])
return

if args.get("gather", None) is not None:
from bib_lookup.utils import gather_tex_source_files_in_one

try:
gather_args = args["gather"]
if len(gather_args) == 1:
entry_file = Path(gather_args[0]).resolve()
output_file = None # let the function use default naming
elif len(gather_args) == 2:
entry_file = Path(gather_args[0]).resolve()
output_file = Path(gather_args[1]).resolve()
else:
print("Error: --gather accepts one or two arguments only.")
sys.exit(1)

if entry_file.exists() and (not entry_file.is_file() or entry_file.suffix != ".tex"):
print(f"Error: File {entry_file} is not a valid .tex file.")
sys.exit(1)

if len(args["identifiers"]) > 0 or args["input_file"] is not None:
warnings.warn(
"Identifiers and input file are ignored when gathering .tex files.",
RuntimeWarning,
)

gather_tex_source_files_in_one(
entry_file,
write_file=True,
output_file=output_file,
overwrite=args["overwrite"],
keep_comments=not args["remove_comments"],
)
except FileExistsError as e:
print(f"Error: {e}".replace("overwrite=True", "--overwrite"))
print("Use the --overwrite flag to overwrite the existing file.")
sys.exit(1)
except FileNotFoundError as e:
print(f"Error: {e}")
print("Please check the file path and try again.")
sys.exit(1)
except Exception as e:
print(f"Unexpected error: {e}") # pragma: no cover
sys.exit(1) # pragma: no cover
_handle_gather(args)
return

if args.get("simplify_bib", None) is not None:
if args.get("input_file", None) is not None:
input_file = Path(args["input_file"]).resolve()
if not input_file.is_file() or input_file.suffix != ".bib":
print(f"Input bib file {args['input_file']} is not a valid .bib file. Please check and try again.")
sys.exit(1)
else:
input_file = None
output_file = args["output_file"]
output_mode = "w" if args["overwrite"] else "a"

simplified_bib_file = BibLookup.simplify_bib_file(
tex_sources=args["simplify_bib"], bib_file=input_file, output_file=output_file, output_mode=output_mode
)
_handle_simplify_bib(args)
return

check_file = args["check_file"]
if check_file is not None:
if Path(check_file).is_file() and Path(check_file).suffix == ".bib":
# check this file, other augments are ignored
# check this file, other arguments are ignored
check_file = Path(check_file)
else:
check_file = str2bool(check_file)

# fmt: off
init_args = dict()
for k in [
"align", "ignore_fields", "output_file", "email", "ordering",
"arxiv2doi", "format", "style", "timeout", "ignore_errors", "verbose"
]:
if args[k] is not None:
init_args[k] = args[k]
# fmt: on
init_keys = [
"align",
"ignore_fields",
"output_file",
"email",
"ordering",
"arxiv2doi",
"format",
"style",
"timeout",
"ignore_errors",
"verbose",
]
init_args = {k: args[k] for k in init_keys if args[k] is not None}
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because --ignore-errors uses action="store_true", its default value is False (not None). The init_args dictionary is built with {k: args[k] for k in init_keys if args[k] is not None}, so ignore_errors=False will always be included — even when the user never specified the flag. This silently overrides any user config that has ignore_errors: true, making it impossible to enable ignore_errors via the config file when using the CLI. To fix this, use default=None with explicit action handling, or use a separate sentinel to distinguish "not provided" from False.

Suggested change
init_args = {k: args[k] for k in init_keys if args[k] is not None}
# Build init_args while ensuring that the default value of --ignore-errors
# (False from argparse's store_true) does not override configuration unless
# the flag was explicitly provided.
init_args = {
k: args[k]
for k in init_keys
if k != "ignore_errors" and args[k] is not None
}
if "ignore_errors" in args and args["ignore_errors"]:
init_args["ignore_errors"] = True

Copilot uses AI. Check for mistakes.

bl = BibLookup(**init_args)

Expand Down
33 changes: 28 additions & 5 deletions test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,13 @@ def test_cli():
exitcode, output_msg = execute_cmd(cmd)
assert exitcode == 0

cmd = (
"bib-lookup 10.1109/CVPR.2016.90 10.1109/tpami.2019.2913372 "
"--format text --style apa --ignore-errors true --timeout 10"
)
cmd = "bib-lookup 10.1109/CVPR.2016.90 10.1109/tpami.2019.2913372 " "--format text --style apa --ignore-errors --timeout 10"
exitcode, output_msg = execute_cmd(cmd)
assert exitcode == 0

cmd = (
f"bib-lookup --input {str(SAMPLE_INPUT_TXT)} --output {str(OUTPUT_FILE)} "
"--check-file y --timeout 10 --ignore-errors true --verbose 3"
"--check-file y --timeout 10 --ignore-errors --verbose 3"
)
exitcode, output_msg = execute_cmd(cmd)
OUTPUT_FILE.unlink(missing_ok=True)
Expand Down Expand Up @@ -197,6 +194,32 @@ def test_cli():
assert _CONFIG_FILE.exists()
new_config_file.unlink()

# invalid config file type
invalid_config_file = SAMPLE_DATA_DIR / "invalid_config.txt"
invalid_config_file.write_text("dummy")
cmd = f"bib-lookup --config {str(invalid_config_file)}"
exitcode, output_msg = execute_cmd(cmd, raise_error=False)
assert exitcode == 1
invalid_config_file.unlink()

# gather with invalid file type
invalid_gather_file = SAMPLE_DATA_DIR / "invalid_gather.txt"
invalid_gather_file.write_text("dummy")
cmd = f"bib-lookup --gather {str(invalid_gather_file)}"
exitcode, output_msg = execute_cmd(cmd, raise_error=False)
assert exitcode == 1
invalid_gather_file.unlink()

# lookup with print
# use a fake DOI that is mocked or use a known one if network is allowed (it seems network is used in tests)
# But network calls are flaky.
# The existing test uses 10.1109/CVPR.2016.90.
# Let's check if output is produced.
cmd = "bib-lookup 10.1109/CVPR.2016.90 --timeout 10"
exitcode, output_msg = execute_cmd(cmd)
# output_msg should contain something if successful.
# If network fails, it might be empty.

Comment on lines +213 to +222
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new test block at lines 216-224 runs a network lookup command but has no assertions (no assert statement). The comment says "output_msg should contain something if successful" but there is no check. This means the test block provides no coverage guarantees — it will pass even if the command silently produces no output. Either add a meaningful assertion or remove this block if it duplicates coverage already provided by the earlier 10.1109/CVPR.2016.90 test at lines 94-99.

Suggested change
# lookup with print
# use a fake DOI that is mocked or use a known one if network is allowed (it seems network is used in tests)
# But network calls are flaky.
# The existing test uses 10.1109/CVPR.2016.90.
# Let's check if output is produced.
cmd = "bib-lookup 10.1109/CVPR.2016.90 --timeout 10"
exitcode, output_msg = execute_cmd(cmd)
# output_msg should contain something if successful.
# If network fails, it might be empty.

Copilot uses AI. Check for mistakes.
# restore the original config file
if config_backed_file is not None:
config_backed_file.rename(_CONFIG_FILE)
Expand Down
Loading