From fe9f4fde84a7066896a8f36d54854249bae5db94 Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 10:37:23 +0100
Subject: [PATCH 01/11] - API is back - response: district & register_num
 fields added

---
 .gitignore              | 20 ++++++++++++++++++
 handelsregister.py      | 46 ++++++++++++++++++++++++++++++++++-------
 test_handelsregister.py | 41 ++++++++++++++++++++++++++++++++----
 3 files changed, 96 insertions(+), 11 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fc77404
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,20 @@
+# IDE
+.idea/
+*.xml
+*.iml
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Environments
+.env
+.venv/
+venv/
+env/
+
+# Distribution / Packaging
+dist/
+build/
+*.egg-info/
diff --git a/handelsregister.py b/handelsregister.py
index a17aed3..6e17b79 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -5,6 +5,7 @@
 """
 
 import argparse
+import tempfile
 import mechanize
 import re
 import pathlib
@@ -48,7 +49,7 @@ def __init__(self, args):
             (   "Connection", "keep-alive"    ),
         ]
         
-        self.cachedir = pathlib.Path("cache")
+        self.cachedir = pathlib.Path(tempfile.gettempdir()) / "handelsregister_cache"
         self.cachedir.mkdir(parents=True, exist_ok=True)
 
     def open_startpage(self):
@@ -68,7 +69,10 @@ def search_company(self):
         else:
             # TODO implement token bucket to abide by rate limit
             # Use an atomic counter: https://gist.github.com/benhoyt/8c8a8d62debe8e5aa5340373f9c509c7
-            response_search = self.browser.follow_link(text="Advanced search")
+            self.browser.select_form(name="naviForm")
+            self.browser.form.new_control('hidden', 'naviForm:erweiterteSucheLink', {'value': 'naviForm:erweiterteSucheLink'})
+            self.browser.form.new_control('hidden', 'target', {'value': 'erweiterteSucheLink'})
+            response_search = self.browser.submit()
 
             if self.args.debug == True:
                 print(self.browser.title())
@@ -95,6 +99,7 @@ def search_company(self):
         return get_companies_in_searchresults(html)
 
 
+
 def parse_result(result):
     cells = []
     for cellnum, cell in enumerate(result.find_all('td')):
@@ -103,20 +108,37 @@ def parse_result(result):
     #assert cells[7] == 'History'
     d = {}
     d['court'] = cells[1]
+    
+    # Extract register number (e.g. HRB 12345, VR 6789)
+    # Looking for patterns like HRB, HRA, VR, GnR followed by numbers
+    reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+', d['court'])
+    d['register_num'] = reg_match.group(0) if reg_match else None
+
+    # Extract district (e.g. "Charlottenburg" from "District court Berlin (Charlottenburg)")
+    # We look for text inside parentheses that is NOT the register number part if that happened to be in parens (unlikely for this format)
+    # The format seems to be: "City District court City (District) ..."
+    # We'll just grab the content of the first parenthesized group that looks like a name.
+    dist_match = re.search(r'\(([^)]+)\)', d['court'])
+    d['district'] = dist_match.group(1) if dist_match else None
+
     d['name'] = cells[2]
     d['state'] = cells[3]
-    d['status'] = cells[4]
+    d['status'] = cells[4].strip().upper().replace(' ', '_')
     d['documents'] = cells[5] # todo: get the document links
     d['history'] = []
     hist_start = 8
-    hist_cnt = (len(cells)-hist_start)/3
+    # hist_cnt = (len(cells)-hist_start)/3
     for i in range(hist_start, len(cells), 3):
+        if i + 1 >= len(cells):
+            break
+        if "Branches" in cells[i] or "Niederlassungen" in cells[i]:
+            break
         d['history'].append((cells[i], cells[i+1])) # (name, location)
     #print('d:',d)
     return d
 
 def pr_company_info(c):
-    for tag in ('name', 'court', 'state', 'status'):
+    for tag in ('name', 'court', 'register_num', 'district', 'state', 'status'):
         print('%s: %s' % (tag, c.get(tag, '-')))
     print('history:')
     for name, loc in c.get('history'):
@@ -166,6 +188,12 @@ def parse_args():
                           choices=["all", "min", "exact"],
                           default="all"
                         )
+    parser.add_argument(
+                          "-j",
+                          "--json",
+                          help="Return response as JSON",
+                          action="store_true"
+                        )
     args = parser.parse_args()
 
 
@@ -179,10 +207,14 @@ def parse_args():
     return args
 
 if __name__ == "__main__":
+    import json
     args = parse_args()
     h = HandelsRegister(args)
     h.open_startpage()
     companies = h.search_company()
     if companies is not None:
-        for c in companies:
-            pr_company_info(c)
+        if args.json:
+            print(json.dumps(companies))
+        else:
+            for c in companies:
+                pr_company_info(c)
diff --git a/test_handelsregister.py b/test_handelsregister.py
index ccb2847..aad6a2d 100644
--- a/test_handelsregister.py
+++ b/test_handelsregister.py
@@ -8,17 +8,50 @@ def test_parse_search_result():
     res = get_companies_in_searchresults(html)
     assert res == [{
             'court':'Berlin   District court Berlin (Charlottenburg) HRB 44343', 
+            'register_num': 'HRB 44343',
+            'district': 'Charlottenburg',
             'name':'GASAG AG',
             'state':'Berlin',
-            'status':'currently registered',
+            'status':'CURRENTLY_REGISTERED',
             'documents': 'ADCDHDDKUTVÖSI',
             'history':[('1.) Gasag Berliner Gaswerke Aktiengesellschaft', '1.) Berlin')]
             },]
 
 
-def test_get_results():
-    args = argparse.Namespace(debug=False, force=False, schlagwoerter='deutsche bahn', schlagwortOptionen='all')
+@pytest.mark.parametrize("company, state_id", [
+    ("Hafen Hamburg", "Hamburg"),
+    ("Bayerische Motoren Werke", "Bayern"),
+    ("Daimler Truck", "Baden-Württemberg"),
+    ("Volkswagen", "Niedersachsen"),
+    ("RWE", "Nordrhein-Westfalen"),
+    ("Fraport", "Hessen"),
+    ("Saarstahl", "Saarland"),
+    ("Mainz", "Rheinland-Pfalz"),
+    ("Nordex", "Mecklenburg-Vorpommern"),
+    ("Jenoptik", "Thüringen"),
+    ("Vattenfall", "Berlin"),
+    ("Bremen", "Bremen"),
+    ("Sachsen", "Sachsen"),
+    ("Magdeburg", "Sachsen-Anhalt"),
+    ("Kiel", "Schleswig-Holstein"),
+    ("Potsdam", "Brandenburg")
+])
+def test_search_by_state_company(company, state_id):
+    # This acts as a proxy test for all 16 states.
+    # While we are not explicitly selecting the state in the form (yet),
+    # searching for these companies should yield results relevant to the state.
+    # If the user wanted explicit state *filtering*, we'd need to implementing form checkbox toggling.
+    
+    args = argparse.Namespace(debug=False, force=True, schlagwoerter=company, schlagwortOptionen='all', json=False)
     h = HandelsRegister(args)
     h.open_startpage()
     companies = h.search_company()
-    assert len(companies) > 0
\ No newline at end of file
+    assert companies is not None
+    assert len(companies) > 0
+    # Ideally search validation would check if at least one result matches the expected state, 
+    # but 'state' field in result is often just the City or 'Berlin' for everyone if the registration court is there.
+    # The 'state' column in the results typically contains the actual state name or city.
+    
+    # Let's try to verify if the state or related city appears in the results
+    # verification = any(state_id.lower() in str(c).lower() for c in companies)
+    # assert verification, f"Could not find {state_id} related entry for {company}"
\ No newline at end of file

From b746b4e9f0b612c37db6f25ec6d956c183c4955d Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:18:08 +0100
Subject: [PATCH 02/11] - code reviewed

---
 handelsregister.py | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/handelsregister.py b/handelsregister.py
index 6e17b79..a69c1a3 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -56,8 +56,6 @@ def open_startpage(self):
         self.browser.open("https://www.handelsregister.de", timeout=10)
 
     def companyname2cachename(self, companyname):
-        # map a companyname to a filename, that caches the downloaded HTML, so re-running this script touches the
-        # webserver less often.
         return self.cachedir / companyname
 
     def search_company(self):
@@ -103,21 +101,15 @@ def search_company(self):
 def parse_result(result):
     cells = []
     for cellnum, cell in enumerate(result.find_all('td')):
-        #print('[%d]: %s [%s]' % (cellnum, cell.text, cell))
         cells.append(cell.text.strip())
-    #assert cells[7] == 'History'
     d = {}
     d['court'] = cells[1]
     
-    # Extract register number (e.g. HRB 12345, VR 6789)
-    # Looking for patterns like HRB, HRA, VR, GnR followed by numbers
+    # Extract register number: HRB, HRA, VR, GnR followed by numbers (e.g. HRB 12345, VR 6789)
     reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+', d['court'])
     d['register_num'] = reg_match.group(0) if reg_match else None
 
     # Extract district (e.g. "Charlottenburg" from "District court Berlin (Charlottenburg)")
-    # We look for text inside parentheses that is NOT the register number part if that happened to be in parens (unlikely for this format)
-    # The format seems to be: "City District court City (District) ..."
-    # We'll just grab the content of the first parenthesized group that looks like a name.
     dist_match = re.search(r'\(([^)]+)\)', d['court'])
     d['district'] = dist_match.group(1) if dist_match else None
 
@@ -127,14 +119,14 @@ def parse_result(result):
     d['documents'] = cells[5] # todo: get the document links
     d['history'] = []
     hist_start = 8
-    # hist_cnt = (len(cells)-hist_start)/3
+
     for i in range(hist_start, len(cells), 3):
         if i + 1 >= len(cells):
             break
         if "Branches" in cells[i] or "Niederlassungen" in cells[i]:
             break
         d['history'].append((cells[i], cells[i+1])) # (name, location)
-    #print('d:',d)
+
     return d
 
 def pr_company_info(c):
@@ -147,20 +139,18 @@ def pr_company_info(c):
 def get_companies_in_searchresults(html):
     soup = BeautifulSoup(html, 'html.parser')
     grid = soup.find('table', role='grid')
-    #print('grid: %s', grid)
   
     results = []
     for result in grid.find_all('tr'):
         a = result.get('data-ri')
         if a is not None:
             index = int(a)
-            #print('r[%d] %s' % (index, result))
+
             d = parse_result(result)
             results.append(d)
     return results
 
 def parse_args():
-# Parse arguments
     parser = argparse.ArgumentParser(description='A handelsregister CLI')
     parser.add_argument(
                           "-d",

From d3b572599cabca6dacc44416cbfdc457a4453a22 Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:23:31 +0100
Subject: [PATCH 03/11] minifixes

---
 handelsregister.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/handelsregister.py b/handelsregister.py
index a69c1a3..310b590 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -11,6 +11,7 @@
 import pathlib
 import sys
 from bs4 import BeautifulSoup
+import urllib.parse
 
 # Dictionaries to map arguments to values
 schlagwortOptionen = {
@@ -106,10 +107,13 @@ def parse_result(result):
     d['court'] = cells[1]
     
     # Extract register number: HRB, HRA, VR, GnR followed by numbers (e.g. HRB 12345, VR 6789)
-    reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+', d['court'])
+    # Also capture suffix letter if present (e.g. HRB 12345 B)
+    reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+(\s+[A-Za-z])?', d['court'])
     d['register_num'] = reg_match.group(0) if reg_match else None
 
-    # Extract district (e.g. "Charlottenburg" from "District court Berlin (Charlottenburg)")
+    # We look for text inside parentheses that is NOT the register number part if that happened to be in parens (unlikely for this format)
+    # The format seems to be: "City District court City (District) ..."
+    # We'll just grab the content of the first parenthesized group that looks like a name.
     dist_match = re.search(r'\(([^)]+)\)', d['court'])
     d['district'] = dist_match.group(1) if dist_match else None
 
@@ -127,10 +131,16 @@ def parse_result(result):
             break
         d['history'].append((cells[i], cells[i+1])) # (name, location)
 
+    if d['register_num']:
+        encoded_reg_num = urllib.parse.quote(d['register_num'])
+        d['northDataUrl'] = f"https://www.northdata.de/{encoded_reg_num}"
+    else:
+        d['northDataUrl'] = None
+
     return d
 
 def pr_company_info(c):
-    for tag in ('name', 'court', 'register_num', 'district', 'state', 'status'):
+    for tag in ('name', 'court', 'register_num', 'northDataUrl', 'district', 'state', 'status'):
         print('%s: %s' % (tag, c.get(tag, '-')))
     print('history:')
     for name, loc in c.get('history'):

From 64130d010fd8f981b25e72556a4fb63443eedf2f Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:39:14 +0100
Subject: [PATCH 04/11] removed shit

---
 handelsregister.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/handelsregister.py b/handelsregister.py
index 310b590..60b12c5 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -107,15 +107,14 @@ def parse_result(result):
     d['court'] = cells[1]
     
     # Extract register number: HRB, HRA, VR, GnR followed by numbers (e.g. HRB 12345, VR 6789)
-    # Also capture suffix letter if present (e.g. HRB 12345 B)
-    reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+(\s+[A-Za-z])?', d['court'])
+    # Also capture suffix letter if present (e.g. HRB 12345 B), but avoid matching start of words (e.g. " Formerly")
+    reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+(\s+[A-Z])?(?!\w)', d['court'])
     d['register_num'] = reg_match.group(0) if reg_match else None
 
-    # We look for text inside parentheses that is NOT the register number part if that happened to be in parens (unlikely for this format)
-    # The format seems to be: "City District court City (District) ..."
-    # We'll just grab the content of the first parenthesized group that looks like a name.
-    dist_match = re.search(r'\(([^)]+)\)', d['court'])
-    d['district'] = dist_match.group(1) if dist_match else None
+    # Special handling for Berlin (Charlottenburg): HRB numbers often imply a "B" suffix in external systems (like North Data)
+    if d['register_num'] and d['register_num'].startswith('HRB') and 'Berlin (Charlottenburg)' in d['court']:
+        if not d['register_num'].endswith(' B'):
+            d['register_num'] += ' B'
 
     d['name'] = cells[2]
     d['state'] = cells[3]
@@ -131,12 +130,6 @@ def parse_result(result):
             break
         d['history'].append((cells[i], cells[i+1])) # (name, location)
 
-    if d['register_num']:
-        encoded_reg_num = urllib.parse.quote(d['register_num'])
-        d['northDataUrl'] = f"https://www.northdata.de/{encoded_reg_num}"
-    else:
-        d['northDataUrl'] = None
-
     return d
 
 def pr_company_info(c):

From 36ac6d29cbe1032485f3a4be63934796d93c3ee3 Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:50:52 +0100
Subject: [PATCH 05/11] register_num extraction fixed

---
 handelsregister.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/handelsregister.py b/handelsregister.py
index 60b12c5..78f1278 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -111,14 +111,20 @@ def parse_result(result):
     reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+(\s+[A-Z])?(?!\w)', d['court'])
     d['register_num'] = reg_match.group(0) if reg_match else None
 
-    # Special handling for Berlin (Charlottenburg): HRB numbers often imply a "B" suffix in external systems (like North Data)
-    if d['register_num'] and d['register_num'].startswith('HRB') and 'Berlin (Charlottenburg)' in d['court']:
-        if not d['register_num'].endswith(' B'):
-            d['register_num'] += ' B'
-
     d['name'] = cells[2]
     d['state'] = cells[3]
     d['status'] = cells[4].strip().upper().replace(' ', '_')
+
+    # Ensure consistent register number suffixes (e.g. ' B' for Berlin HRB, ' HB' for Bremen) which might be implicit
+    if d['register_num']:
+        suffix_map = {
+            'Berlin': {'HRB': ' B'},
+            'Bremen': {'HRA': ' HB', 'HRB': ' HB', 'GnR': ' HB', 'VR': ' HB', 'PR': ' HB'}
+        }
+        reg_type = d['register_num'].split()[0]
+        suffix = suffix_map.get(d['state'], {}).get(reg_type)
+        if suffix and not d['register_num'].endswith(suffix):
+            d['register_num'] += suffix
     d['documents'] = cells[5] # todo: get the document links
     d['history'] = []
     hist_start = 8

From 9d3a0b4f4e04314c804b6b2d7376a8e5f2f0f313 Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:54:06 +0100
Subject: [PATCH 06/11] test added

---
 test_handelsregister.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test_handelsregister.py b/test_handelsregister.py
index aad6a2d..eacf5df 100644
--- a/test_handelsregister.py
+++ b/test_handelsregister.py
@@ -3,7 +3,6 @@
 import argparse
 
 def test_parse_search_result():
-    # simplified html from a real search
     html = '<html><body>%s</body></html>' % """<table role="grid"><thead></thead><tbody id="ergebnissForm:selectedSuchErgebnisFormTable_data" class="ui-datatable-data ui-widget-content"><tr data-ri="0" class="ui-widget-content ui-datatable-even" role="row"><td role="gridcell" colspan="9" class="borderBottom3"><table id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt147" class="ui-panelgrid ui-widget" role="grid"><tbody><tr class="ui-widget-content ui-panelgrid-even borderBottom1" role="row"><td role="gridcell" class="ui-panelgrid-cell fontTableNameSize" colspan="5">Berlin  <span class="fontWeightBold"> District court Berlin (Charlottenburg) HRB 44343  </span></td></tr><tr class="ui-widget-content ui-panelgrid-odd" role="row"><td role="gridcell" class="ui-panelgrid-cell paddingBottom20Px" colspan="5"><span class="marginLeft20">GASAG AG</span></td><td role="gridcell" class="ui-panelgrid-cell sitzSuchErgebnisse"><span class="verticalText ">Berlin</span></td><td role="gridcell" class="ui-panelgrid-cell" style="text-align: center;padding-bottom: 20px;"><span class="verticalText">currently registered</span></td><td role="gridcell" class="ui-panelgrid-cell textAlignLeft paddingBottom20Px" colspan="2"><div id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt160" class="ui-outputpanel ui-widget linksPanel"><script type="text/javascript" src="/rp_web/javax.faces.resource/jsf.js.xhtml?ln=javax.faces"></script><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:0:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:0:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:0:popupLink" class="underlinedText">AD</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:1:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:1:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:1:popupLink" class="underlinedText">CD</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:2:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:2:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:2:popupLink" class="underlinedText">HD</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:3:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:3:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:3:popupLink" class="underlinedText">DK</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:4:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:4:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:4:popupLink" class="underlinedText">UT</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:5:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:5:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:5:popupLink" class="underlinedText">VÖ</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:6:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:6:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:6:popupLink" class="underlinedText">SI</span></a></div></td></tr><tr class="ui-widget-content ui-panelgrid-even" role="row"><td role="gridcell" class="ui-panelgrid-cell" colspan="7"><table id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt172" class="ui-panelgrid ui-widget marginLeft20" role="grid"><tbody><tr class="ui-widget-content ui-panelgrid-even borderBottom1 RegPortErg_Klein" role="row"><td role="gridcell" class="ui-panelgrid-cell padding0Px">History</td></tr></tbody></table><table id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt176" class="ui-panelgrid ui-widget" role="grid"><tbody><tr class="ui-widget-content" role="row"><td role="gridcell" class="ui-panelgrid-cell RegPortErg_HistorieZn marginLeft20 padding0Px" colspan="5"><span class="marginLeft20 fontSize85">1.) Gasag Berliner Gaswerke Aktiengesellschaft</span></td><td role="gridcell" class="ui-panelgrid-cell RegPortErg_SitzStatus "><span class="fontSize85">1.) Berlin</span></td><td role="gridcell" class="ui-panelgrid-cell textAlignCenter"></td></tr></tbody></table></td></tr></tbody></table></td></tr></tbody></table>"""
     res = get_companies_in_searchresults(html)
     assert res == [{
@@ -37,21 +36,22 @@ def test_parse_search_result():
     ("Potsdam", "Brandenburg")
 ])
 def test_search_by_state_company(company, state_id):
-    # This acts as a proxy test for all 16 states.
-    # While we are not explicitly selecting the state in the form (yet),
-    # searching for these companies should yield results relevant to the state.
-    # If the user wanted explicit state *filtering*, we'd need to implementing form checkbox toggling.
-    
+
     args = argparse.Namespace(debug=False, force=True, schlagwoerter=company, schlagwortOptionen='all', json=False)
     h = HandelsRegister(args)
     h.open_startpage()
     companies = h.search_company()
     assert companies is not None
     assert len(companies) > 0
-    # Ideally search validation would check if at least one result matches the expected state, 
-    # but 'state' field in result is often just the City or 'Berlin' for everyone if the registration court is there.
-    # The 'state' column in the results typically contains the actual state name or city.
+
+def test_haus_anker_b_suffix():
+    args = argparse.Namespace(debug=False, force=True, schlagwoerter='Haus-Anker Verwaltungs GmbH', schlagwortOptionen='exact', json=False)
+    h = HandelsRegister(args)
+    h.open_startpage()
+    companies = h.search_company()
+    assert companies is not None
+     
+    target_company = next((c for c in companies if '138434' in c['register_num']), None)
     
-    # Let's try to verify if the state or related city appears in the results
-    # verification = any(state_id.lower() in str(c).lower() for c in companies)
-    # assert verification, f"Could not find {state_id} related entry for {company}"
\ No newline at end of file
+    assert target_company is not None, "Haus-Anker Verwaltungs GmbH with expected number not found"
+    assert target_company['register_num'] == 'HRB 138434 B'
\ No newline at end of file

From 1582acafb4c01c5e8782cf28e07f1b31fcde19c7 Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:56:04 +0100
Subject: [PATCH 07/11] - test fixed

---
 test_handelsregister.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_handelsregister.py b/test_handelsregister.py
index eacf5df..d592df1 100644
--- a/test_handelsregister.py
+++ b/test_handelsregister.py
@@ -7,7 +7,7 @@ def test_parse_search_result():
     res = get_companies_in_searchresults(html)
     assert res == [{
             'court':'Berlin   District court Berlin (Charlottenburg) HRB 44343', 
-            'register_num': 'HRB 44343',
+            'register_num': 'HRB 44343 B',
             'district': 'Charlottenburg',
             'name':'GASAG AG',
             'state':'Berlin',

From 846cb434fbf74af5fbb64bd3425e0866aee99987 Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 11:57:32 +0100
Subject: [PATCH 08/11] test: district removed

---
 test_handelsregister.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test_handelsregister.py b/test_handelsregister.py
index d592df1..05609a7 100644
--- a/test_handelsregister.py
+++ b/test_handelsregister.py
@@ -8,7 +8,6 @@ def test_parse_search_result():
     assert res == [{
             'court':'Berlin   District court Berlin (Charlottenburg) HRB 44343', 
             'register_num': 'HRB 44343 B',
-            'district': 'Charlottenburg',
             'name':'GASAG AG',
             'state':'Berlin',
             'status':'CURRENTLY_REGISTERED',

From 744126d6fc2d44995abaac93b062797d0402f55a Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 12:23:33 +0100
Subject: [PATCH 09/11] - blame owners: fixes

---
 handelsregister.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/handelsregister.py b/handelsregister.py
index 78f1278..2e3081a 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -64,7 +64,8 @@ def search_company(self):
         if self.args.force==False and cachename.exists():
             with open(cachename, "r") as f:
                 html = f.read()
-                print("return cached content for %s" % self.args.schlagwoerter)
+                if not self.args.json:
+                    print("return cached content for %s" % self.args.schlagwoerter)
         else:
             # TODO implement token bucket to abide by rate limit
             # Use an atomic counter: https://gist.github.com/benhoyt/8c8a8d62debe8e5aa5340373f9c509c7

From 96ec9c92c050ef3cb4b8877fc015550db2446ecd Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 15:37:03 +0100
Subject: [PATCH 10/11] introducing statusCurrent; keeping status; no breaking
 changes

---
 handelsregister.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/handelsregister.py b/handelsregister.py
index 2e3081a..f109602 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -114,7 +114,8 @@ def parse_result(result):
 
     d['name'] = cells[2]
     d['state'] = cells[3]
-    d['status'] = cells[4].strip().upper().replace(' ', '_')
+    d['status'] = cells[4].strip()  # Original value for backward compatibility
+    d['statusCurrent'] = cells[4].strip().upper().replace(' ', '_')  # Transformed value
 
     # Ensure consistent register number suffixes (e.g. ' B' for Berlin HRB, ' HB' for Bremen) which might be implicit
     if d['register_num']:

From 9a854c6c342fd077302ccea947a9fd042ea7784a Mon Sep 17 00:00:00 2001
From: danielsippel <daniel@it-freelancer.berlin>
Date: Sun, 7 Dec 2025 15:38:51 +0100
Subject: [PATCH 11/11] cleanup

---
 handelsregister.py      | 2 +-
 test_handelsregister.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/handelsregister.py b/handelsregister.py
index f109602..03ccc1a 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -141,7 +141,7 @@ def parse_result(result):
     return d
 
 def pr_company_info(c):
-    for tag in ('name', 'court', 'register_num', 'northDataUrl', 'district', 'state', 'status'):
+    for tag in ('name', 'court', 'register_num', 'district', 'state', 'statusCurrent'):
         print('%s: %s' % (tag, c.get(tag, '-')))
     print('history:')
     for name, loc in c.get('history'):
diff --git a/test_handelsregister.py b/test_handelsregister.py
index 05609a7..fa1951a 100644
--- a/test_handelsregister.py
+++ b/test_handelsregister.py
@@ -10,7 +10,8 @@ def test_parse_search_result():
             'register_num': 'HRB 44343 B',
             'name':'GASAG AG',
             'state':'Berlin',
-            'status':'CURRENTLY_REGISTERED',
+            'status':'currently registered',  # Original value for backward compatibility
+            'statusCurrent':'CURRENTLY_REGISTERED',  # Transformed value
             'documents': 'ADCDHDDKUTVÖSI',
             'history':[('1.) Gasag Berliner Gaswerke Aktiengesellschaft', '1.) Berlin')]
             },]