diff --git a/README.md b/README.md index 7c83252a1..3c3ba01fd 100644 --- a/README.md +++ b/README.md @@ -362,7 +362,9 @@ from paperqa import Settings, ask answer_response = ask( "What is PaperQA2?", - settings=Settings(temperature=0.5, paper_directory="my_papers"), + settings=Settings( + temperature=0.5, agent={"index": {"paper_directory": "my_papers"}} + ), ) ``` @@ -374,7 +376,9 @@ from paperqa import Settings, agent_query answer_response = await agent_query( query="What is PaperQA2?", - settings=Settings(temperature=0.5, paper_directory="my_papers"), + settings=Settings( + temperature=0.5, agent={"index": {"paper_directory": "my_papers"}} + ), ) ``` @@ -469,7 +473,7 @@ from paperqa import Settings, ask answer_response = ask( "What is PaperQA2?", settings=Settings( - llm="gpt-4o-mini", summary_llm="gpt-4o-mini", paper_directory="my_papers" + llm="gpt-4o-mini", summary_llm="gpt-4o-mini", agent={"index": {"paper_directory": "my_papers"}} ), ) ``` @@ -796,9 +800,9 @@ for ... in my_docs: Indexes will be placed in the [home directory][home dir] by default. This can be controlled via the `PQA_HOME` environment variable. -Indexes are made by reading files in the `Settings.paper_directory`. +Indexes are made by reading files in the `IndexSettings.paper_directory`. By default, we recursively read from subdirectories of the paper directory, -unless disabled using `Settings.index_recursively`. +unless disabled using `IndexSettings.recurse_subdirectories`. The paper directory is not modified in any way, it's just read from. [home dir]: https://docs.python.org/3/library/pathlib.html#pathlib.Path.home @@ -824,7 +828,7 @@ which also works when called on `DocDetails`. ### Reusing Index The local search indexes are built based on a hash of the current `Settings` object. -So make sure you properly specify the `paper_directory` to your `Settings` object. +So make sure you properly specify the `paper_directory` to your `IndexSettings` object. In general, it's advisable to: 1. Pre-build an index given a folder of papers (can take several minutes) @@ -839,7 +843,7 @@ from paperqa.agents.search import get_directory_index async def amain(folder_of_papers: str | os.PathLike) -> None: - settings = Settings(paper_directory=folder_of_papers) + settings = Settings(agent={"index": {"paper_directory": folder_of_papers}}) # 1. Build the index. Note an index name is autogenerated when unspecified built_index = await get_directory_index(settings=settings) @@ -939,17 +943,13 @@ will return much faster than the first query and we'll be certain the authors ma | `answer.group_contexts_by_question` | `False` | Groups the final contexts by the underlying `gather_evidence` question in the final context prompt. | | `answer.evidence_relevance_score_cutoff` | `1` | Cutoff evidence relevance score to include in the answer context (inclusive) | | `answer.skip_evidence_citation_strip` | `False` | Skip removal of citations from the `gather_evidence` contexts | -| `parsing.chunk_size` | `5000` | Characters per chunk (0 for no chunking). | | `parsing.page_size_limit` | `1,280,000` | Character limit per page. | -| `parsing.pdfs_use_block_parsing` | `False` | Opt-in flag for block-based PDF parsing over text-based PDF parsing. | | `parsing.use_doc_details` | `True` | Whether to get metadata details for docs. | -| `parsing.overlap` | `250` | Characters to overlap chunks. | | `parsing.reader_config` | `dict` | Optional keyword arguments for the document reader. | | `parsing.multimodal` | `True` | Control to parse both text and media from applicable documents, as well as potentially enriching them with text descriptions. | | `parsing.defer_embedding` | `False` | Whether to defer embedding until summarization. | | `parsing.parse_pdf` | `paperqa_pypdf.parse_pdf_to_pages` | Function to parse PDF files. | | `parsing.configure_pdf_parser` | No-op | Callable to configure the PDF parser within `parse_pdf`, useful for behaviors such as enabling logging. | -| `parsing.chunking_algorithm` | `ChunkingOptions.SIMPLE_OVERLAP` | Algorithm for chunking. | | `parsing.doc_filters` | `None` | Optional filters for allowed documents. | | `parsing.use_human_readable_clinical_trials` | `False` | Parse clinical trial JSONs into readable text. | | `parsing.enrichment_llm` | `"gpt-4o-2024-11-20"` | LLM for media enrichment. | @@ -976,7 +976,6 @@ will return much faster than the first query and we'll be certain the authors ma | `agent.return_paper_metadata` | `False` | Whether to include paper title/year in search tool results. | | `agent.search_count` | `8` | Search count. | | `agent.timeout` | `500.0` | Timeout on agent execution (seconds). | -| `agent.should_pre_search` | `False` | Whether to run search tool before invoking agent. | | `agent.tool_names` | `None` | Optional override on tools to provide the agent. | | `agent.max_timesteps` | `None` | Optional upper limit on environment steps. | | `agent.index.name` | `None` | Optional name of the index. | diff --git a/src/paperqa/__init__.py b/src/paperqa/__init__.py index 28122c4b8..8eaf3f9c6 100644 --- a/src/paperqa/__init__.py +++ b/src/paperqa/__init__.py @@ -1,5 +1,3 @@ -import warnings - from lmi import ( EmbeddingModel, HybridEmbeddingModel, @@ -21,17 +19,10 @@ VectorStore, ) from paperqa.settings import Settings, get_settings -from paperqa.types import Answer, Context, Doc, DocDetails, Text +from paperqa.types import Context, Doc, DocDetails, Text from paperqa.version import __version__ -# TODO: remove after refactoring all models to avoid using _* private vars -warnings.filterwarnings( - "ignore", message="Valid config keys have changed in V2:", module="pydantic" -) - - __all__ = [ - "Answer", "Context", "Doc", "DocDetails", diff --git a/src/paperqa/agents/search.py b/src/paperqa/agents/search.py index 8626f4fb8..b562bc24f 100644 --- a/src/paperqa/agents/search.py +++ b/src/paperqa/agents/search.py @@ -9,7 +9,6 @@ import pickle import re import sys -import warnings import zlib from collections import Counter from collections.abc import AsyncIterator, Callable, Sequence @@ -616,11 +615,8 @@ def progress_bar_update() -> None: return contextlib.nullcontext(), None -async def get_directory_index( # noqa: PLR0912 - index_name: str | None = None, - sync_index_w_directory: bool = True, - settings: MaybeSettings = None, - build: bool = True, +async def get_directory_index( + settings: MaybeSettings = None, build: bool = True ) -> SearchIndex: """ Create a Tantivy index by reading from a directory of text files. @@ -628,26 +624,12 @@ async def get_directory_index( # noqa: PLR0912 This function only reads from the source directory, not edits or writes to it. Args: - index_name: Deprecated override on the name of the index. If unspecified, - the default behavior is to generate the name from the input settings. - sync_index_w_directory: Opt-out flag to sync the index (add or delete index - files) with the source paper directory. settings: Application settings. build: Opt-out flag (default is True) to read the contents of the source paper directory and if sync_index_w_directory is enabled also update the index. """ _settings = get_settings(settings) index_settings = _settings.agent.index - if index_name: - warnings.warn( - "The index_name argument has been moved to" - f" {type(_settings.agent.index).__name__}," - " this deprecation will conclude in version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - index_settings.name = index_name - del index_name search_index = SearchIndex( fields=[*SearchIndex.REQUIRED_FIELDS, "title", "year"], @@ -663,17 +645,6 @@ async def get_directory_index( # noqa: PLR0912 ) return search_index - if not sync_index_w_directory: - warnings.warn( - "The sync_index_w_directory argument has been moved to" - f" {type(_settings.agent.index).__name__}," - " this deprecation will conclude in version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - index_settings.sync_with_paper_directory = sync_index_w_directory - del sync_index_w_directory - paper_directory = anyio.Path(index_settings.paper_directory) manifest = await maybe_get_manifest( filename=await index_settings.finalize_manifest_file() diff --git a/src/paperqa/agents/tools.py b/src/paperqa/agents/tools.py index e676b73e4..505d14c3d 100644 --- a/src/paperqa/agents/tools.py +++ b/src/paperqa/agents/tools.py @@ -47,10 +47,10 @@ def default_status(state: "EnvironmentState") -> str: class EnvironmentState(BaseModel): """State here contains documents and answer being populated.""" - model_config = ConfigDict(extra="forbid", populate_by_name=True) + model_config = ConfigDict(extra="forbid") docs: Docs - session: PQASession = Field(..., alias="answer") + session: PQASession status_fn: Callable[[Self], str] | None = Field( default=None, description=( diff --git a/src/paperqa/contrib/openreview_paper_helper.py b/src/paperqa/contrib/openreview_paper_helper.py index 17770cff0..06fb65463 100644 --- a/src/paperqa/contrib/openreview_paper_helper.py +++ b/src/paperqa/contrib/openreview_paper_helper.py @@ -47,7 +47,7 @@ def __init__( password: str | None = None, ) -> None: self.settings = settings - Path(settings.paper_directory).mkdir(parents=True, exist_ok=True) + Path(settings.agent.index.paper_directory).mkdir(parents=True, exist_ok=True) if openreview is None: raise ImportError( "openreview requires the 'openreview-py' extra. Please run: `pip" @@ -122,7 +122,9 @@ async def _get_relevant_papers_chunk(self, question: str, chunk: str) -> list[An async def download_papers(self, submissions: list[Any]) -> None: """Download PDFs for given submissions.""" - downloaded_papers = Path(self.settings.paper_directory).rglob("*.pdf") + downloaded_papers = Path(self.settings.agent.index.paper_directory).rglob( + "*.pdf" + ) downloaded_ids = [p.stem for p in downloaded_papers] logger.info("Downloading PDFs for relevant papers.") for submission in submissions: @@ -136,7 +138,7 @@ async def _download_pdf(self, submission: Any) -> bool: response = await client.get(pdf_link) if response.status_code == httpx.codes.OK.value: async with await anyio.open_file( - f"{self.settings.paper_directory}/{submission.id}.pdf", "wb" + f"{self.settings.agent.index.paper_directory}/{submission.id}.pdf", "wb" ) as f: await f.write(response.content) return True @@ -151,7 +153,7 @@ async def aadd_docs( ) -> Docs: if docs is None: docs = Docs() - for doc_path in Path(self.settings.paper_directory).rglob("*.pdf"): + for doc_path in Path(self.settings.agent.index.paper_directory).rglob("*.pdf"): sub = subs.get(doc_path.stem) if subs is not None else None if sub: await docs.aadd( diff --git a/src/paperqa/docs.py b/src/paperqa/docs.py index f788f2714..81adae1b2 100644 --- a/src/paperqa/docs.py +++ b/src/paperqa/docs.py @@ -33,7 +33,6 @@ from paperqa.types import Doc, DocDetails, DocKey, PQASession, Text from paperqa.utils import ( citation_to_docname, - get_loop, maybe_is_html, maybe_is_pdf, maybe_is_text, @@ -91,35 +90,6 @@ def _get_unique_name(self, docname: str) -> str: docname += suffix return docname - def add_file( - self, - file: BinaryIO, - citation: str | None = None, - docname: str | None = None, - dockey: DocKey | None = None, - settings: MaybeSettings = None, - llm_model: LLMModel | None = None, - embedding_model: EmbeddingModel | None = None, - ) -> str | None: - warnings.warn( - "The synchronous `add_file` method is being deprecated in favor of the" - " asynchronous `aadd_file` method, this deprecation will conclude in" - " version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return get_loop().run_until_complete( - self.aadd_file( - file, - citation=citation, - docname=docname, - dockey=dockey, - settings=settings, - llm_model=llm_model, - embedding_model=embedding_model, - ) - ) - async def aadd_file( self, file: BinaryIO, @@ -159,35 +129,6 @@ async def aadd_file( **kwargs, ) - def add_url( - self, - url: str, - citation: str | None = None, - docname: str | None = None, - dockey: DocKey | None = None, - settings: MaybeSettings = None, - llm_model: LLMModel | None = None, - embedding_model: EmbeddingModel | None = None, - ) -> str | None: - warnings.warn( - "The synchronous `add_url` method is being deprecated in favor of the" - " asynchronous `aadd_url` method, this deprecation will conclude in" - " version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return get_loop().run_until_complete( - self.aadd_url( - url, - citation=citation, - docname=docname, - dockey=dockey, - settings=settings, - llm_model=llm_model, - embedding_model=embedding_model, - ) - ) - async def aadd_url( self, url: str, @@ -212,43 +153,6 @@ async def aadd_url( embedding_model=embedding_model, ) - def add( - self, - path: str | os.PathLike, - citation: str | None = None, - docname: str | None = None, - dockey: DocKey | None = None, - title: str | None = None, - doi: str | None = None, - authors: list[str] | None = None, - settings: MaybeSettings = None, - llm_model: LLMModel | None = None, - embedding_model: EmbeddingModel | None = None, - **kwargs, - ) -> str | None: - warnings.warn( - "The synchronous `add` method is being deprecated in favor of the" - " asynchronous `aadd` method, this deprecation will conclude in" - " version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return get_loop().run_until_complete( - self.aadd( - path, - citation=citation, - docname=docname, - dockey=dockey, - title=title, - doi=doi, - authors=authors, - settings=settings, - llm_model=llm_model, - embedding_model=embedding_model, - **kwargs, - ) - ) - async def aadd( # noqa: PLR0912 self, path: str | os.PathLike, @@ -429,26 +333,6 @@ async def aadd( # noqa: PLR0912 return doc.docname return None - def add_texts( - self, - texts: list[Text], - doc: Doc, - settings: MaybeSettings = None, - embedding_model: EmbeddingModel | None = None, - ) -> bool: - warnings.warn( - "The synchronous `add_texts` method is being deprecated in favor of the" - " asynchronous `aadd_texts` method, this deprecation will conclude in" - " version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return get_loop().run_until_complete( - self.aadd_texts( - texts, doc, settings=settings, embedding_model=embedding_model - ) - ) - async def aadd_texts( self, texts: list[Text], @@ -601,39 +485,9 @@ async def retrieve_texts( matches = [m for m in matches if m.doc.dockey not in self.deleted_dockeys] return matches[:k] - def get_evidence( - self, - query: PQASession | str, - exclude_text_filter: set[str] | None = None, - settings: MaybeSettings = None, - callbacks: Sequence[Callable] | None = None, - embedding_model: EmbeddingModel | None = None, - summary_llm_model: LLMModel | None = None, - partitioning_fn: Callable[[Embeddable], int] | None = None, - ) -> PQASession: - warnings.warn( - "The synchronous `get_evidence` method is being deprecated in favor of the" - " asynchronous `aget_evidence` method, this deprecation will conclude in" - " version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return get_loop().run_until_complete( - self.aget_evidence( - query=query, - exclude_text_filter=exclude_text_filter, - settings=settings, - callbacks=callbacks, - embedding_model=embedding_model, - summary_llm_model=summary_llm_model, - partitioning_fn=partitioning_fn, - ) - ) - async def aget_evidence( self, query: PQASession | str, - exclude_text_filter: set[str] | None = None, settings: MaybeSettings = None, callbacks: Sequence[Callable] | None = None, embedding_model: EmbeddingModel | None = None, @@ -660,21 +514,6 @@ async def aget_evidence( if summary_llm_model is None: summary_llm_model = evidence_settings.get_summary_llm() - if exclude_text_filter is not None: - text_name = Text.__name__ - warnings.warn( - ( - "The 'exclude_text_filter' argument did not work as intended" - f" due to a mix-up in excluding {text_name}.name vs {text_name}." - f" This bug enabled us to have 2+ contexts per {text_name}, so to" - " first-class that capability and simplify our implementation," - " we're removing the 'exclude_text_filter' argument." - " This deprecation will conclude in version 6" - ), - category=DeprecationWarning, - stacklevel=2, - ) - if answer_config.evidence_retrieval: matches = await self.retrieve_texts( session.question, @@ -735,35 +574,6 @@ async def aget_evidence( session.contexts += [c for c, _ in results if c is not None and c.score > 0] return session - def query( - self, - query: PQASession | str, - settings: MaybeSettings = None, - callbacks: Sequence[Callable] | None = None, - llm_model: LLMModel | None = None, - summary_llm_model: LLMModel | None = None, - embedding_model: EmbeddingModel | None = None, - partitioning_fn: Callable[[Embeddable], int] | None = None, - ) -> PQASession: - warnings.warn( - "The synchronous `query` method is being deprecated in favor of the" - " asynchronous `aquery` method, this deprecation will conclude in" - " version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return get_loop().run_until_complete( - self.aquery( - query, - settings=settings, - callbacks=callbacks, - llm_model=llm_model, - summary_llm_model=summary_llm_model, - embedding_model=embedding_model, - partitioning_fn=partitioning_fn, - ) - ) - async def aquery( self, query: PQASession | str, diff --git a/src/paperqa/settings.py b/src/paperqa/settings.py index 2ade04db7..745989f71 100644 --- a/src/paperqa/settings.py +++ b/src/paperqa/settings.py @@ -29,7 +29,6 @@ field_validator, model_validator, ) -from pydantic.fields import FieldInfo from pydantic_settings import BaseSettings, CliSettingsSource, SettingsConfigDict import paperqa.configs @@ -91,10 +90,6 @@ class AnswerSettings(BaseModel): evidence_k: int = Field( default=10, description="Number of evidence pieces to retrieve." ) - evidence_detailed_citations: bool = Field( - default=True, - description="Whether to include detailed citations in summaries.", - ) evidence_retrieval: bool = Field( default=True, description="Whether to use retrieval instead of processing all docs.", @@ -158,19 +153,6 @@ class AnswerSettings(BaseModel): description="Whether to skip stripping citations from evidence.", ) - @model_validator(mode="after") - def _deprecated_field(self) -> Self: - # default is True, so we only warn if it's False - if not self.evidence_detailed_citations: - warnings.warn( - "The 'evidence_detailed_citations' field is deprecated and will be" - " removed in version 6. Adjust 'PromptSettings.context_inner' to remove" - " detailed citations.", - category=DeprecationWarning, - stacklevel=2, - ) - return self - class ChunkingOptions(StrEnum): SIMPLE_OVERLAP = "simple_overlap" @@ -217,10 +199,6 @@ class ParsingSettings(BaseModel): model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) - chunk_size: int = Field( - default=5000, - description="Number of characters per chunk. If 0, no chunking will be done.", - ) page_size_limit: int | None = Field( default=1_280_000, description=( @@ -229,19 +207,9 @@ class ParsingSettings(BaseModel): " (ignoring chars vs tokens difference)." ), ) - pdfs_use_block_parsing: bool = Field( - default=False, - description=( - "Opt-in flag to use block-based parsing for PDFs instead of" - " text-based parsing, which is known to be better for some PDFs." - ), - ) use_doc_details: bool = Field( default=True, description="Whether to try to get metadata details for a Doc." ) - overlap: int = Field( - default=250, description="Number of characters to overlap chunks." - ) reader_config: dict[str, Any] = Field( default_factory=lambda: {"chunk_chars": 5000, "overlap": 250}, description="Optional keyword arguments for the document reader.", @@ -294,10 +262,6 @@ class ParsingSettings(BaseModel): ), exclude=True, ) - chunking_algorithm: ChunkingOptions = Field( - default=ChunkingOptions.SIMPLE_OVERLAP, - deprecated="This field is deprecated and will be removed in version 6.", - ) doc_filters: Sequence[Mapping[str, Any]] | None = Field( default=None, description=( @@ -347,50 +311,6 @@ class ParsingSettings(BaseModel): description="Prompt template for enriching media.", ) - @model_validator(mode="after") - def _deprecated_field(self) -> Self: - default_reader_config = ( - type(self).model_fields["reader_config"].default_factory() # type: ignore[call-arg,misc] - ) - if ( - self.pdfs_use_block_parsing - != type(self).model_fields["pdfs_use_block_parsing"].default - ): - warnings.warn( - "The 'pdfs_use_block_parsing' field is deprecated" - " and will be removed in version 6." - " Use 'use_block_parsing' parameter in 'reader_config' instead.", - category=DeprecationWarning, - stacklevel=2, - ) - if "use_block_parsing" not in self.reader_config: - self.reader_config["use_block_parsing"] = self.pdfs_use_block_parsing - if self.chunk_size != type(self).model_fields["chunk_size"].default: - warnings.warn( - "The 'chunk_size' field is deprecated" - " and will be removed in version 6." - " Use 'chunk_chars' parameter in 'reader_config' instead.", - category=DeprecationWarning, - stacklevel=2, - ) - if "chunk_chars" not in self.reader_config or self.reader_config[ - "chunk_chars" - ] == default_reader_config.get("chunk_chars"): - self.reader_config["chunk_chars"] = self.chunk_size - if self.overlap != type(self).model_fields["overlap"].default: - warnings.warn( - "The 'overlap' field is deprecated" - " and will be removed in version 6." - " Use 'overlap' parameter in 'reader_config' instead.", - category=DeprecationWarning, - stacklevel=2, - ) - if "overlap" not in self.reader_config or self.reader_config[ - "overlap" - ] == default_reader_config.get("overlap"): - self.reader_config["overlap"] = self.overlap - return self - @property def should_parse_and_enrich_media(self) -> tuple[bool, bool]: """Get if the settings indicate to parse and also enrich media.""" @@ -670,7 +590,6 @@ class AgentSettings(BaseModel): ), ) search_count: int = 8 - wipe_context_on_answer_failure: bool = True agent_evidence_n: int = Field( default=1, ge=1, @@ -685,10 +604,6 @@ class AgentSettings(BaseModel): " on agent execution." ), ) - should_pre_search: bool = Field( - default=False, - description="If set to true, run the search tool before invoking agent.", - ) tool_names: set[str] | Sequence[str] | None = Field( default=None, @@ -705,12 +620,6 @@ class AgentSettings(BaseModel): description="Optional upper limit on the number of environment steps.", ) - index_concurrency: int = Field( - default=5, # low default for folks without S2/Crossref keys - description="Number of concurrent filesystem reads for indexing.", - exclude=True, - frozen=True, - ) index: IndexSettings = Field(default_factory=IndexSettings) rebuild_index: bool = Field( @@ -750,38 +659,6 @@ class AgentSettings(BaseModel): exclude=True, ) - @model_validator(mode="after") - def _deprecated_field(self) -> Self: - for deprecated_field_name, new_name in (("index_concurrency", "concurrency"),): - value = getattr(self, deprecated_field_name) - if value != type(self).model_fields[deprecated_field_name].default: - warnings.warn( - f"The {deprecated_field_name!r} field has been moved to" - f" {IndexSettings.__name__}, located at Settings.agent.index," - " this deprecation will conclude in version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - setattr(self.index, new_name, value) # Propagate to new location - return self - - @field_validator("should_pre_search", "wipe_context_on_answer_failure") - @classmethod - def _deprecated_bool_fields(cls, value: bool, info) -> bool: - custom_message = "" - if info.field_name == "should_pre_search" and value: - custom_message = "dead code" - elif info.field_name == "wipe_context_on_answer_failure" and not value: - custom_message = "no longer used due to the reset tool" - if custom_message: - warnings.warn( - f"The {info.field_name!r} field is {custom_message}," - " and will be removed in version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - return value - def make_default_litellm_model_list_settings( llm: str, temperature: float = 0.0 @@ -855,27 +732,6 @@ class Settings(BaseSettings): texts_index_mmr_lambda: float = Field( default=1.0, description="Lambda for MMR in text index." ) - index_absolute_directory: bool = Field( - default=False, - description="Whether to use the absolute paper directory for the PQA index.", - exclude=True, - frozen=True, - ) - index_directory: str | os.PathLike | None = Field( - default_factory=lambda: pqa_directory("indexes"), - description=( - "Directory to store the PQA generated search index, configuration, and" - " answer indexes." - ), - exclude=True, - frozen=True, - ) - index_recursively: bool = Field( - default=True, - description="Whether to recurse into subdirectories when indexing sources.", - exclude=True, - frozen=True, - ) verbosity: int = Field( default=0, description=( @@ -883,25 +739,6 @@ class Settings(BaseSettings): " logged." ), ) - manifest_file: str | os.PathLike | None = Field( - default=None, - description=( - "Optional absolute path to a manifest CSV, or a relative path from the" - " paper_directory to a manifest CSV. A manifest CSV contains columns which" - " are attributes for a DocDetails object. Only 'file_location', 'doi', and" - " 'title' will be used when indexing, others are discarded." - ), - exclude=True, - frozen=True, - ) - paper_directory: str | os.PathLike = Field( - default=pathlib.Path.cwd(), - description=( - "Local directory which contains the papers to be indexed and searched." - ), - exclude=True, - frozen=True, - ) custom_context_serializer: AsyncContextSerializer | None = Field( default=None, description=( @@ -911,28 +748,6 @@ class Settings(BaseSettings): exclude=True, ) - @model_validator(mode="after") - def _deprecated_field(self) -> Self: - for deprecated_field_name, new_name, is_factory in ( - ("index_absolute_directory", "use_absolute_paper_directory", False), - ("index_directory", "index_directory", True), - ("index_recursively", "recurse_subdirectories", False), - ("manifest_file", "manifest_file", False), - ("paper_directory", "paper_directory", False), - ): - value = getattr(self, deprecated_field_name) - finfo: FieldInfo = type(self).model_fields[deprecated_field_name] - if value != (finfo.default_factory() if is_factory else finfo.default): # type: ignore[call-arg,misc] - warnings.warn( - f"The {deprecated_field_name!r} field has been moved to" - f" {IndexSettings.__name__}, located at Settings.agent.index," - " this deprecation will conclude in version 6.", - category=DeprecationWarning, - stacklevel=2, - ) - setattr(self.agent.index, new_name, value) # Propagate to new location - return self - @model_validator(mode="after") def _update_temperature(self) -> Self: """Ensures temperature is 1 if the LLM requires it. @@ -981,7 +796,6 @@ def get_index_name(self) -> str: str(self.parsing.parse_pdf), # Don't use __name__ as lambda wouldn't differ str(self.parsing.reader_config["chunk_chars"]), str(self.parsing.reader_config["overlap"]), - self.parsing.chunking_algorithm, str(self.parsing.multimodal), ] return f"pqa_index_{hexdigest('|'.join(segments))}" @@ -1320,16 +1134,7 @@ async def context_serializer( if c.score >= answer_config.evidence_relevance_score_cutoff ] - # shim deprecated flag - # TODO: remove in v6 context_inner_prompt = prompt_config.context_inner - if ( - not answer_config.evidence_detailed_citations - and "\nFrom {citation}" in context_inner_prompt - ): - # Only keep "\nFrom {citation}" if we are showing detailed citations - context_inner_prompt = context_inner_prompt.replace("\nFrom {citation}", "") - context_str_body = "" if answer_config.group_contexts_by_question: contexts_by_question: dict[str, list[Context]] = defaultdict(list) diff --git a/src/paperqa/types.py b/src/paperqa/types.py index 8e95fe0b7..c9282f2d7 100644 --- a/src/paperqa/types.py +++ b/src/paperqa/types.py @@ -7,7 +7,6 @@ import logging import os import re -import warnings from collections.abc import Collection, Container, Hashable, Iterable, Mapping, Sequence from copy import deepcopy from datetime import UTC, datetime @@ -472,18 +471,6 @@ def populate_formatted_answers_and_bib_from_raw_answer( self.references = bib -# for backwards compatibility -class Answer(PQASession): - def __init__(self, *args, **kwargs): - warnings.warn( - "The 'Answer' class is deprecated and will be removed in future versions." - " Use 'PQASession' instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - class ChunkMetadata(BaseModel): """Metadata for chunking algorithm.""" diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py index b31049dae..6db43b2d5 100644 --- a/tests/test_paperqa.py +++ b/tests/test_paperqa.py @@ -44,7 +44,6 @@ from pytest_subtests import SubTests from paperqa import ( - Answer, Doc, DocDetails, Docs, @@ -2183,39 +2182,12 @@ def test_get_index_name_uniqueness( ), "Expected index names to be clear they're associated with PaperQA" -@pytest.mark.asyncio -async def test_evidence_detailed_citations_shim(stub_data_dir: Path) -> None: - # TODO: delete this test in v6 - settings = Settings.from_name("fast") - # NOTE: this bypasses DeprecationWarning, as the warning is done on construction - settings.answer.evidence_detailed_citations = False - docs = Docs() - await docs.aadd( - stub_data_dir / "bates.txt", "WikiMedia Foundation, 2023, Accessed now" - ) - response = await docs.aquery("What country is Bates from?", settings=settings) - assert "WikiMedia Foundation, 2023, Accessed now" not in response.context - - def test_case_insensitive_matching(): assert strings_similarity("my test sentence", "My test sentence") == 1.0 assert strings_similarity("a b c d e", "a b c f") == 0.5 assert strings_similarity("A B c d e", "a b c f") == 0.5 -@pytest.mark.flaky( - reruns=3, # pytest-xdist can lead to >1 DeprecationWarning - only_rerun=["AssertionError"], -) -def test_answer_rename(recwarn) -> None: - # TODO: delete this test in v6 - answer = Answer(question="") - assert isinstance(answer, PQASession) - assert len(recwarn) == 1 - warning_msg = recwarn.pop(DeprecationWarning) - assert "'Answer' class is deprecated" in str(warning_msg.message) - - @pytest.mark.parametrize( "doi_journals", [ @@ -3055,33 +3027,6 @@ async def test_timeout_resilience() -> None: assert not llm_results -def test_reader_params_deprecation_warnings(recwarn: pytest.WarningsRecorder) -> None: - """Test that deprecated settings trigger warnings and are migrated to reader_config.""" - with pytest.warns(DeprecationWarning, match="chunk_size.*deprecated"): - settings1 = Settings(parsing=ParsingSettings(chunk_size=2000)) - assert settings1.parsing.reader_config["chunk_chars"] == 2000 - with pytest.warns(DeprecationWarning, match="overlap.*deprecated"): - settings2 = Settings(parsing=ParsingSettings(overlap=50)) - assert settings2.parsing.reader_config["overlap"] == 50 - with pytest.warns(DeprecationWarning, match="pdfs_use_block_parsing.*deprecated"): - settings3 = Settings(parsing=ParsingSettings(pdfs_use_block_parsing=True)) - assert settings3.parsing.reader_config["use_block_parsing"] - with pytest.warns(DeprecationWarning, match="chunk_size.*deprecated"): - settings4 = Settings( - parsing=ParsingSettings( - chunk_size=4000, reader_config={"chunk_chars": 2000} - ) - ) - assert ( - settings4.parsing.reader_config["chunk_chars"] == 2000 - ), "Expected reader_config to win out" - - _ = Settings(parsing=ParsingSettings()) - assert not [ - w for w in recwarn if issubclass(w.category, DeprecationWarning) - ], "Expected clean settings to have no warnings" - - @pytest.mark.asyncio async def test_reader_config_propagation(stub_data_dir: Path) -> None: settings = Settings(