From 19911085daca6cb0c98e0601c59743b9cee460fc Mon Sep 17 00:00:00 2001
From: Alejandro de la Vega <aleph4@gmail.com>
Date: Tue, 19 Aug 2025 16:39:08 -0500
Subject: [PATCH 1/2] Extract PMCID in both ways

---
 src/pubget/_utils.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/pubget/_utils.py b/src/pubget/_utils.py
index 30c2ae9..a1c88a0 100644
--- a/src/pubget/_utils.py
+++ b/src/pubget/_utils.py
@@ -132,9 +132,18 @@ def load_stylesheet(stylesheet_name: str) -> etree.XSLT:
 
 def get_pmcid(article: Union[etree.ElementTree, etree.Element]) -> int:
     """Extract the PubMedCentral ID from an XML article."""
-    return int(
-        article.find("front/article-meta/article-id[@pub-id-type='pmc']").text
+    pmc = article.find("front/article-meta/article-id[@pub-id-type='pmc']")
+    pmcid = article.find(
+        "front/article-meta/article-id[@pub-id-type='pmcid']"
     )
+    if pmc is None and pmcid is None:
+        raise ValueError("No PMC ID found in the article XML.")
+    if pmc:
+        val = pmc.text
+    else:
+        val = pmcid.text.replace("PMC", "")
+
+    return int(val)
 
 
 def get_pmcid_from_article_dir(article_dir: Path) -> int:

From e0d365619174780cde53c79245684089dbb132ab Mon Sep 17 00:00:00 2001
From: Alejandro de la Vega <aleph4@gmail.com>
Date: Tue, 19 Aug 2025 17:00:38 -0500
Subject: [PATCH 2/2] Extract pmcid correctly in article

---
 src/pubget/_text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pubget/_text.py b/src/pubget/_text.py
index 48efeb4..847544d 100644
--- a/src/pubget/_text.py
+++ b/src/pubget/_text.py
@@ -39,5 +39,5 @@ def extract(
         for part_name in self.fields:
             elem = transformed.find(part_name)
             result[part_name] = elem.text
-        result["pmcid"] = int(result["pmcid"])
+        result["pmcid"] = _utils.get_pmcid(article)
         return result