diff --git a/birdnames/converter.py b/birdnames/converter.py index 94d4cdd..b1422f9 100644 --- a/birdnames/converter.py +++ b/birdnames/converter.py @@ -3,11 +3,12 @@ """ import warnings -import pandas as pd -from pathlib import Path -from typing import Tuple, Union, List, Optional +from typing import List, Optional, Tuple, Union + import numpy as np -from .utils import fuzzy_match, TAXONOMIES, load_taxonomy, normalize_string +import pandas as pd + +from .utils import TAXONOMIES, fuzzy_match, load_taxonomy, normalize_string def _get_column_name(name_type: str, authority: str) -> str: @@ -83,10 +84,15 @@ def __init__( # create pd.Series for mapping from one name type to another source_taxonomy = load_taxonomy(from_authority, from_year) same_taxonomy = to_authority == from_authority and to_year == from_year + # if converting to the same type, create identity mapping + if from_col == to_col: + self.lookup = source_taxonomy[[from_col]].copy() + # Create a new column with different name temporarily for the identity mapping + self.lookup["_temp_col"] = self.lookup[from_col] # if within a taxonomy: simply index=from and values=to # if converting to scientific name, we don't need to cross taxonomies - if to_col == "scientific_name" or same_taxonomy: - self.lookup = source_taxonomy[[from_col, to_col]] + elif to_col == "scientific_name" or same_taxonomy: + self.lookup = source_taxonomy[[from_col, to_col]].copy() else: # dest_cols = [to_col] if to_col != "scientific_name" else [] dest_taxonomy = load_taxonomy(to_authority, to_year).set_index( @@ -97,7 +103,7 @@ def __init__( self.lookup = source_taxonomy.set_index("scientific_name")[ source_cols ].join(dest_taxonomy) - self.lookup = self.lookup.reset_index(drop=False)[[from_col, to_col]] + self.lookup = self.lookup.reset_index(drop=False)[[from_col, to_col]].copy() # if soft matching, apply normalization to source column if soft_matching: @@ -110,7 +116,9 @@ def __init__( self.lookup = self.lookup.drop_duplicates(subset=[from_col]) # convert to a pd.Series for fast lookup - self.lookup = self.lookup.set_index(from_col)[to_col] + # Use '_temp_col' if it exists (identity mapping case), otherwise use to_col + value_col = "_temp_col" if "_temp_col" in self.lookup.columns else to_col + self.lookup = self.lookup.set_index(from_col)[value_col] def _get_most_recent_year(self, authority: str) -> str: """Get the most recent year available for an authority. @@ -443,7 +451,7 @@ def common( authorities_with_common_name = set( TAXONOMIES[TAXONOMIES["common_name"] == True]["authority"].values ) - if not common_name_authority in authorities_with_common_name: + if common_name_authority not in authorities_with_common_name: raise ValueError( f"`common_name_authority` must be one of {authorities_with_common_name}. Got {common_name_authority}." ) diff --git a/tests/test_converter.py b/tests/test_converter.py index 5c5d7af..f824fd0 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -2,17 +2,18 @@ Comprehensive tests for the Converter class. """ -import pytest -import pandas as pd -import numpy as np -from pathlib import Path import sys +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest # Add src to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from birdnames import Converter import birdnames +from birdnames import Converter class TestConverter: @@ -395,6 +396,34 @@ def test_bbl_cross_authority_conversion(self): # Just ensure it doesn't crash and returns something reasonable assert result is None or isinstance(result, str) + def test_identity_conversion(self): + """Test converting from a type to the same type (identity mapping).""" + # Test scientific name to scientific name + converter = Converter( + from_type="scientific_name", + to_type="scientific_name", + from_authority="avilist", + to_authority="avilist", + ) + + result = converter.convert("Struthio camelus") + assert result == "Struthio camelus" + + # Test with list + result_list = converter.convert(["Struthio camelus", "Struthio molybdophanes"]) + assert result_list == ["Struthio camelus", "Struthio molybdophanes"] + + # Test common name to common name + converter_common = Converter( + from_type="common_name", + to_type="common_name", + from_authority="avilist", + to_authority="avilist", + ) + + result = converter_common.convert("Common Ostrich") + assert result == "Common Ostrich" + def test_determine_name_type(): """Test automatic detection of name type and authority.""" @@ -461,6 +490,15 @@ def test_scientific(): # test with ebird codes assert birdnames.scientific(["norcar"]) == ["Cardinalis cardinalis"] + # test with scientific names (issue fix: should return input as-is) + scientific_names = ["Struthio camelus", "Struthio molybdophanes"] + result = birdnames.scientific(scientific_names) + assert result == ["Struthio camelus", "Struthio molybdophanes"] + + # test with single scientific name + result = birdnames.scientific("Struthio camelus") + assert result == "Struthio camelus" + def test_common(): """Test the convenience function for common name conversion."""