From 1dfbad8d7d08b8a6a1db17a25d29d9052bc64444 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Thu, 28 Aug 2025 15:47:36 +0200
Subject: [PATCH 01/11] refactor: savestate

---
 .../FR/geo/latitude_l93/__init__.py           |  1 +
 .../geo/latitude_wgs_fr_metropole/__init__.py |  1 +
 .../FR/geo/longitude_l93/__init__.py          |  1 +
 .../longitude_wgs_fr_metropole/__init__.py    |  1 +
 .../FR/{other => temp}/date_fr/__init__.py    |  1 +
 csv_detective/detect_fields/__init__.py       |  3 +-
 .../geo/json_geojson/__init__.py              |  1 +
 .../geo/latitude_wgs/__init__.py              |  1 +
 .../geo/longitude_wgs/__init__.py             |  1 +
 .../temp/datetime_rfc822/__init__.py          |  1 +
 .../detect_fields/temp/year/__init__.py       |  1 +
 .../FR/{other => temp}/date_fr/__init__.py    |  0
 csv_detective/detect_labels/__init__.py       |  3 +-
 csv_detective/detection/formats.py            |  1 +
 csv_detective/load_tests.py                   | 22 +++++-
 csv_detective/parsing/columns.py              | 70 +++++++++++++------
 csv_detective/validate.py                     |  1 +
 tests/test_fields.py                          |  3 +-
 18 files changed, 83 insertions(+), 30 deletions(-)
 rename csv_detective/detect_fields/FR/{other => temp}/date_fr/__init__.py (94%)
 rename csv_detective/detect_labels/FR/{other => temp}/date_fr/__init__.py (100%)

diff --git a/csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py b/csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py
index c01fc58f..64e2c654 100644
--- a/csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py
+++ b/csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py
@@ -4,6 +4,7 @@
 from csv_detective.detect_fields.other.float import float_casting
 
 PROPORTION = 0.9
+PARENT = "float"
 
 _latitudel93 = LatitudeL93()
 
diff --git a/csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py b/csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py
index 333fa182..53153d87 100644
--- a/csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py
+++ b/csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py
@@ -1,6 +1,7 @@
 from csv_detective.detect_fields.other.float import _is as is_float
 
 PROPORTION = 0.9
+PARENT = "latitude_wgs"
 
 
 def _is(val):
diff --git a/csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py b/csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py
index 58cfeed1..dfa90fc7 100644
--- a/csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py
+++ b/csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py
@@ -4,6 +4,7 @@
 from csv_detective.detect_fields.other.float import float_casting
 
 PROPORTION = 0.9
+PARENT = "float"
 
 _longitudel93 = LongitudeL93()
 
diff --git a/csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py b/csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py
index 30036066..7fa7b60d 100644
--- a/csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py
+++ b/csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py
@@ -1,6 +1,7 @@
 from csv_detective.detect_fields.other.float import _is as is_float
 
 PROPORTION = 0.9
+PARENT = "longitude_wgs"
 
 
 def _is(val):
diff --git a/csv_detective/detect_fields/FR/other/date_fr/__init__.py b/csv_detective/detect_fields/FR/temp/date_fr/__init__.py
similarity index 94%
rename from csv_detective/detect_fields/FR/other/date_fr/__init__.py
rename to csv_detective/detect_fields/FR/temp/date_fr/__init__.py
index 1d234dda..89e8419c 100644
--- a/csv_detective/detect_fields/FR/other/date_fr/__init__.py
+++ b/csv_detective/detect_fields/FR/temp/date_fr/__init__.py
@@ -1,6 +1,7 @@
 import re
 
 PROPORTION = 1
+PARENT = "date"
 regex = (
     r"^\d{1,2}[ \-](janvier|fevrier|mars|avril|mai|juin|juillet|aout|septembre"
     r"|octobre|novembre|decembre)[ \-]\d{4}$"
diff --git a/csv_detective/detect_fields/__init__.py b/csv_detective/detect_fields/__init__.py
index c47c0019..fb6d06e5 100644
--- a/csv_detective/detect_fields/__init__.py
+++ b/csv_detective/detect_fields/__init__.py
@@ -21,7 +21,6 @@
     code_rna,
     code_waldec,
     csp_insee,
-    date_fr,
     insee_ape700,
     sexe,
     siren,
@@ -29,7 +28,7 @@
     tel_fr,
     uai,
 )
-from .FR.temp import jour_de_la_semaine, mois_de_annee
+from .FR.temp import date_fr, jour_de_la_semaine, mois_de_annee
 from .geo import (
     iso_country_code_alpha2,
     iso_country_code_alpha3,
diff --git a/csv_detective/detect_fields/geo/json_geojson/__init__.py b/csv_detective/detect_fields/geo/json_geojson/__init__.py
index 2f7a06bd..be0f80bb 100644
--- a/csv_detective/detect_fields/geo/json_geojson/__init__.py
+++ b/csv_detective/detect_fields/geo/json_geojson/__init__.py
@@ -1,6 +1,7 @@
 import json
 
 PROPORTION = 0.9
+PARENT = "json"
 
 
 def _is(val):
diff --git a/csv_detective/detect_fields/geo/latitude_wgs/__init__.py b/csv_detective/detect_fields/geo/latitude_wgs/__init__.py
index 4ae9ef19..ab374ad5 100644
--- a/csv_detective/detect_fields/geo/latitude_wgs/__init__.py
+++ b/csv_detective/detect_fields/geo/latitude_wgs/__init__.py
@@ -1,6 +1,7 @@
 from csv_detective.detect_fields.other.float import _is as is_float
 
 PROPORTION = 0.9
+PARENT = "float"
 
 
 def _is(val):
diff --git a/csv_detective/detect_fields/geo/longitude_wgs/__init__.py b/csv_detective/detect_fields/geo/longitude_wgs/__init__.py
index 83a5ea2a..33f3a496 100644
--- a/csv_detective/detect_fields/geo/longitude_wgs/__init__.py
+++ b/csv_detective/detect_fields/geo/longitude_wgs/__init__.py
@@ -1,6 +1,7 @@
 from csv_detective.detect_fields.other.float import _is as is_float
 
 PROPORTION = 0.9
+PARENT = "float"
 
 
 def _is(val):
diff --git a/csv_detective/detect_fields/temp/datetime_rfc822/__init__.py b/csv_detective/detect_fields/temp/datetime_rfc822/__init__.py
index ea2f6078..2a4b3584 100644
--- a/csv_detective/detect_fields/temp/datetime_rfc822/__init__.py
+++ b/csv_detective/detect_fields/temp/datetime_rfc822/__init__.py
@@ -1,6 +1,7 @@
 import re
 
 PROPORTION = 1
+PARENT = "datetime_aware"
 
 
 def _is(val):
diff --git a/csv_detective/detect_fields/temp/year/__init__.py b/csv_detective/detect_fields/temp/year/__init__.py
index 79a68e1f..2975b306 100644
--- a/csv_detective/detect_fields/temp/year/__init__.py
+++ b/csv_detective/detect_fields/temp/year/__init__.py
@@ -1,4 +1,5 @@
 PROPORTION = 1
+PARENT = "int"
 
 
 def _is(val):
diff --git a/csv_detective/detect_labels/FR/other/date_fr/__init__.py b/csv_detective/detect_labels/FR/temp/date_fr/__init__.py
similarity index 100%
rename from csv_detective/detect_labels/FR/other/date_fr/__init__.py
rename to csv_detective/detect_labels/FR/temp/date_fr/__init__.py
diff --git a/csv_detective/detect_labels/__init__.py b/csv_detective/detect_labels/__init__.py
index c78d34cb..f5ffea16 100644
--- a/csv_detective/detect_labels/__init__.py
+++ b/csv_detective/detect_labels/__init__.py
@@ -20,7 +20,6 @@
     code_rna,
     code_waldec,
     csp_insee,
-    date_fr,
     insee_ape700,
     sexe,
     siren,
@@ -28,7 +27,7 @@
     tel_fr,
     uai,
 )
-from .FR.temp import jour_de_la_semaine, mois_de_annee
+from .FR.temp import date_fr, jour_de_la_semaine, mois_de_annee
 from .geo import (
     iso_country_code_alpha2,
     iso_country_code_alpha3,
diff --git a/csv_detective/detection/formats.py b/csv_detective/detection/formats.py
index 51fb52f1..d5676651 100755
--- a/csv_detective/detection/formats.py
+++ b/csv_detective/detection/formats.py
@@ -73,6 +73,7 @@ def detect_formats(
     scores_table_fields = test_col(
         table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose
     )
+    print(scores_table_fields)
     analysis["columns_fields"] = prepare_output_dict(scores_table_fields, limited_output)
 
     # Perform testing on labels
diff --git a/csv_detective/load_tests.py b/csv_detective/load_tests.py
index e1938ad0..a3f2382d 100755
--- a/csv_detective/load_tests.py
+++ b/csv_detective/load_tests.py
@@ -1,10 +1,11 @@
 import os
+from types import ModuleType
 from typing import Union
 
 from csv_detective import detect_fields, detect_labels  # noqa
 
 
-def get_all_packages(detect_type) -> list:
+def get_all_packages(detect_type) -> list[str]:
     root_dir = os.path.dirname(os.path.abspath(__file__)) + "/" + detect_type
     modules = []
     for dirpath, _, filenames in os.walk(root_dir):
@@ -20,7 +21,7 @@ def get_all_packages(detect_type) -> list:
 def return_all_tests(
     user_input_tests: Union[str, list],
     detect_type: str,
-) -> list:
+) -> list[ModuleType]:
     """
     returns all tests that have a method _is and are listed in the user_input_tests
     the function can select a sub_package from csv_detective
@@ -51,3 +52,20 @@ def return_all_tests(
     # to remove groups of tests
     all_tests = [test for test in all_tests if "_is" in dir(test)]
     return all_tests
+
+
+def build_test_priorities(tests: list[ModuleType]) -> tuple[dict[str, dict], dict[str, dict]]:
+    tests_dict = {
+        test.__name__.split(".")[-1]: {
+            "func": test._is,
+            "prop": test.PROPORTION,
+            "parent": getattr(test, "PARENT", None),
+        }
+        for test in tests
+    }
+    parents = {v["parent"] for v in tests_dict.values() if v["parent"] is not None}
+    specific_tests = {
+        k: v for k, v in tests_dict.items()
+        if k not in parents
+    }
+    return tests_dict, specific_tests
diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index b83bec25..09fdd335 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -1,9 +1,11 @@
+from collections import defaultdict
 import logging
 from time import time
 from typing import Callable
 
 import pandas as pd
 
+from csv_detective.load_tests import build_test_priorities
 from csv_detective.utils import display_logs_depending_process_time
 
 MAX_ROWS_ANALYSIS = int(1e4)
@@ -89,40 +91,64 @@ def test_col(
     if verbose:
         start = time()
         logging.info("Testing columns to get types")
-    test_funcs = {
-        test.__name__.split(".")[-1]: {
-            "func": test._is,
-            "prop": test.PROPORTION,
-        }
-        for test in all_tests
-    }
-    return_table = pd.DataFrame(columns=table.columns)
-    for idx, (key, value) in enumerate(test_funcs.items()):
+    test_funcs, specific_tests = build_test_priorities(all_tests)
+    results = defaultdict(dict)
+    nb_cols = len(table.columns)
+    for idx, column in enumerate(table.columns):
         if verbose:
-            start_type = time()
-            logging.info(f"\t- Starting with type '{key}'")
-        # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
-        # => the following needs to change, "apply" means all columns are tested for one type at once
-        return_table.loc[key] = table.apply(
-            lambda serie: test_col_val(
-                serie,
-                value["func"],
-                value["prop"],
+            start_col = time()
+            logging.info(f"\t- Starting with column '{column}'")
+        tested = set()
+        # testing for the most specific formats first (we have early stops in test_col_val)
+        for test_name, test_attr in specific_tests.items():
+            results[column][test_name] = test_col_val(
+                table[column],
+                test_attr["func"],
+                test_attr["prop"],
                 skipna=skipna,
                 limited_output=limited_output,
                 verbose=verbose,
             )
-        )
+            print(f"{test_name}: {results[column][test_name]}")
+            tested.add(test_name)
+            # should we break if one of the specific tests is successful?
+        # performing less and less specific tests if specific ones fail
+        for test_name in [test for test in specific_tests if test not in tested]:
+            current_test = test_name
+            while test_funcs[current_test]["parent"] is not None:
+                if test_funcs[current_test]["parent"] in results[column]:
+                    print(f"already here {test_name}: {results[column][test_name]}")
+                    # already tested as a parent of a previous test
+                    break
+                if results[column][current_test] > 0:
+                    # if a child test is successful, we set the parent's score to the same value
+                    # this is not perfect: the column can be 50% child but 100% parent
+                    # we would have to perform the parent test to know exactly, but this saves much time
+                    results[column][test_funcs[current_test]["parent"]] = results[column][current_test]
+                    print(f"bypassed {test_name}: {results[column][test_name]}")
+                else:
+                    results[column][test_funcs[current_test]["parent"]] = test_col_val(
+                        table[column],
+                        test_attr["func"],
+                        test_attr["prop"],
+                        skipna=skipna,
+                        limited_output=limited_output,
+                        verbose=verbose,
+                    )
+                    print(f"processed {test_name}: {results[column][test_name]}")
+                    tested.add(current_test)
+                current_test = test_funcs[current_test]["parent"]
         if verbose:
             display_logs_depending_process_time(
-                f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(test_funcs)})',
-                time() - start_type,
+                f'\t> Done with column "{column}" in {round(time() - start_col, 3)}s'
+                f' ({idx + 1}/{nb_cols}), {len(tested)} tests performed',
+                time() - start_col,
             )
     if verbose:
         display_logs_depending_process_time(
             f"Done testing columns in {round(time() - start, 3)}s", time() - start
         )
-    return return_table
+    return pd.DataFrame(results)
 
 
 def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
diff --git a/csv_detective/validate.py b/csv_detective/validate.py
index 1648b65e..2a58c48a 100755
--- a/csv_detective/validate.py
+++ b/csv_detective/validate.py
@@ -13,6 +13,7 @@
     t.__name__.split(".")[-1]: {
         "func": t._is,
         "prop": t.PROPORTION,
+        "parent": getattr(t, "PARENT", None),
     }
     for t in return_all_tests("ALL", "detect_fields")
 }
diff --git a/tests/test_fields.py b/tests/test_fields.py
index c1397645..6e833912 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -29,7 +29,6 @@
     code_rna,
     code_waldec,
     csp_insee,
-    date_fr,
     insee_ape700,
     sexe,
     siren,
@@ -37,7 +36,7 @@
     tel_fr,
     uai,
 )
-from csv_detective.detect_fields.FR.temp import jour_de_la_semaine, mois_de_annee
+from csv_detective.detect_fields.FR.temp import date_fr, jour_de_la_semaine, mois_de_annee
 from csv_detective.detect_fields.geo import (
     iso_country_code_alpha2,
     iso_country_code_alpha3,

From 79a1e61f04b9dd840824fe297f2737d57b754eae Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Thu, 28 Aug 2025 17:03:12 +0200
Subject: [PATCH 02/11] fix: make it work

---
 csv_detective/parsing/columns.py | 30 ++++++++++++++----------------
 csv_detective/validate.py        | 11 ++---------
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index 09fdd335..c2027ed2 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -109,35 +109,33 @@ def test_col(
                 limited_output=limited_output,
                 verbose=verbose,
             )
-            print(f"{test_name}: {results[column][test_name]}")
             tested.add(test_name)
             # should we break if one of the specific tests is successful?
         # performing less and less specific tests if specific ones fail
-        for test_name in [test for test in specific_tests if test not in tested]:
-            current_test = test_name
-            while test_funcs[current_test]["parent"] is not None:
-                if test_funcs[current_test]["parent"] in results[column]:
-                    print(f"already here {test_name}: {results[column][test_name]}")
-                    # already tested as a parent of a previous test
+        for test_name in specific_tests:
+            current = test_name
+            parent = test_funcs[current]["parent"]
+            while parent is not None:
+                if parent in results[column]:
+                    # already tested as a parent of a previous test, no need to get higher parents
                     break
-                if results[column][current_test] > 0:
+                if results[column][current] > 0:
                     # if a child test is successful, we set the parent's score to the same value
                     # this is not perfect: the column can be 50% child but 100% parent
                     # we would have to perform the parent test to know exactly, but this saves much time
-                    results[column][test_funcs[current_test]["parent"]] = results[column][current_test]
-                    print(f"bypassed {test_name}: {results[column][test_name]}")
+                    results[column][parent] = results[column][current]
                 else:
-                    results[column][test_funcs[current_test]["parent"]] = test_col_val(
+                    results[column][parent] = test_col_val(
                         table[column],
-                        test_attr["func"],
-                        test_attr["prop"],
+                        test_funcs[parent]["func"],
+                        test_funcs[parent]["prop"],
                         skipna=skipna,
                         limited_output=limited_output,
                         verbose=verbose,
                     )
-                    print(f"processed {test_name}: {results[column][test_name]}")
-                    tested.add(current_test)
-                current_test = test_funcs[current_test]["parent"]
+                    tested.add(parent)
+                current = parent
+                parent = test_funcs[current]["parent"]
         if verbose:
             display_logs_depending_process_time(
                 f'\t> Done with column "{column}" in {round(time() - start_col, 3)}s'
diff --git a/csv_detective/validate.py b/csv_detective/validate.py
index 2a58c48a..e1ecd6b7 100755
--- a/csv_detective/validate.py
+++ b/csv_detective/validate.py
@@ -3,20 +3,13 @@
 
 import pandas as pd
 
-from csv_detective.load_tests import return_all_tests
+from csv_detective.load_tests import build_test_priorities, return_all_tests
 from csv_detective.parsing.columns import test_col_val
 from csv_detective.parsing.load import load_file
 
 logging.basicConfig(level=logging.INFO)
 
-tests = {
-    t.__name__.split(".")[-1]: {
-        "func": t._is,
-        "prop": t.PROPORTION,
-        "parent": getattr(t, "PARENT", None),
-    }
-    for t in return_all_tests("ALL", "detect_fields")
-}
+tests, _ = build_test_priorities(return_all_tests("ALL", "detect_fields"))
 
 
 def validate(

From 6bcd63da95f3c089d64b17bed8cf12bdbcfbbcbc Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Thu, 28 Aug 2025 17:10:26 +0200
Subject: [PATCH 03/11] refactor: better name

---
 csv_detective/load_tests.py      | 2 +-
 csv_detective/parsing/columns.py | 4 ++--
 csv_detective/validate.py        | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/csv_detective/load_tests.py b/csv_detective/load_tests.py
index a3f2382d..9ddd2895 100755
--- a/csv_detective/load_tests.py
+++ b/csv_detective/load_tests.py
@@ -54,7 +54,7 @@ def return_all_tests(
     return all_tests
 
 
-def build_test_priorities(tests: list[ModuleType]) -> tuple[dict[str, dict], dict[str, dict]]:
+def build_tests_dicts(tests: list[ModuleType]) -> tuple[dict[str, dict], dict[str, dict]]:
     tests_dict = {
         test.__name__.split(".")[-1]: {
             "func": test._is,
diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index c2027ed2..23d64a75 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -5,7 +5,7 @@
 
 import pandas as pd
 
-from csv_detective.load_tests import build_test_priorities
+from csv_detective.load_tests import build_tests_dicts
 from csv_detective.utils import display_logs_depending_process_time
 
 MAX_ROWS_ANALYSIS = int(1e4)
@@ -91,7 +91,7 @@ def test_col(
     if verbose:
         start = time()
         logging.info("Testing columns to get types")
-    test_funcs, specific_tests = build_test_priorities(all_tests)
+    test_funcs, specific_tests = build_tests_dicts(all_tests)
     results = defaultdict(dict)
     nb_cols = len(table.columns)
     for idx, column in enumerate(table.columns):
diff --git a/csv_detective/validate.py b/csv_detective/validate.py
index e1ecd6b7..8dcdd0eb 100755
--- a/csv_detective/validate.py
+++ b/csv_detective/validate.py
@@ -3,13 +3,13 @@
 
 import pandas as pd
 
-from csv_detective.load_tests import build_test_priorities, return_all_tests
+from csv_detective.load_tests import build_tests_dicts, return_all_tests
 from csv_detective.parsing.columns import test_col_val
 from csv_detective.parsing.load import load_file
 
 logging.basicConfig(level=logging.INFO)
 
-tests, _ = build_test_priorities(return_all_tests("ALL", "detect_fields"))
+tests, _ = build_tests_dicts(return_all_tests("ALL", "detect_fields"))
 
 
 def validate(

From 66b2d45749a845b0cd6f54fc9d9a4de5a1ea1646 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Thu, 28 Aug 2025 17:13:18 +0200
Subject: [PATCH 04/11] docs: update changelog

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5038efe6..6ce2b017 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,8 @@
 ## Current (in progress)
 
 - Better email detection [#151](https://github.com/datagouv/csv-detective/pull/151)
-- Sample can handle full NaN columns [#152](https://github.com/datagouv/csv-detective/pull/152)
+- Sample can handle full NaN columns [#154](https://github.com/datagouv/csv-detective/pull/154)
+- Add checks priorization to prevent testing all formats [#155](https://github.com/datagouv/csv-detective/pull/155)
 
 ## 0.9.2 (2025-08-26)
 

From b7f5c4f9ad3a51875a0c793e77f7a6db3ec72467 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Thu, 28 Aug 2025 17:13:46 +0200
Subject: [PATCH 05/11] chore: lint

---
 csv_detective/load_tests.py      | 5 +----
 csv_detective/parsing/columns.py | 4 ++--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/csv_detective/load_tests.py b/csv_detective/load_tests.py
index 9ddd2895..be1eb400 100755
--- a/csv_detective/load_tests.py
+++ b/csv_detective/load_tests.py
@@ -64,8 +64,5 @@ def build_tests_dicts(tests: list[ModuleType]) -> tuple[dict[str, dict], dict[st
         for test in tests
     }
     parents = {v["parent"] for v in tests_dict.values() if v["parent"] is not None}
-    specific_tests = {
-        k: v for k, v in tests_dict.items()
-        if k not in parents
-    }
+    specific_tests = {k: v for k, v in tests_dict.items() if k not in parents}
     return tests_dict, specific_tests
diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index 23d64a75..51d13f93 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -1,5 +1,5 @@
-from collections import defaultdict
 import logging
+from collections import defaultdict
 from time import time
 from typing import Callable
 
@@ -139,7 +139,7 @@ def test_col(
         if verbose:
             display_logs_depending_process_time(
                 f'\t> Done with column "{column}" in {round(time() - start_col, 3)}s'
-                f' ({idx + 1}/{nb_cols}), {len(tested)} tests performed',
+                f" ({idx + 1}/{nb_cols}), {len(tested)} tests performed",
                 time() - start_col,
             )
     if verbose:

From 0e3ac232c8889bb074fd5bf213b54fa567a1f897 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Thu, 28 Aug 2025 17:32:33 +0200
Subject: [PATCH 06/11] fix: remove log

---
 csv_detective/detection/formats.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/csv_detective/detection/formats.py b/csv_detective/detection/formats.py
index d5676651..51fb52f1 100755
--- a/csv_detective/detection/formats.py
+++ b/csv_detective/detection/formats.py
@@ -73,7 +73,6 @@ def detect_formats(
     scores_table_fields = test_col(
         table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose
     )
-    print(scores_table_fields)
     analysis["columns_fields"] = prepare_output_dict(scores_table_fields, limited_output)
 
     # Perform testing on labels

From a0cdcb278a460311180ab21683f947c15cc93d06 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Fri, 29 Aug 2025 10:11:06 +0200
Subject: [PATCH 07/11] savestate

---
 CHANGELOG.md                     |  2 +-
 csv_detective/parsing/columns.py | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ce2b017..04cca5bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 - Better email detection [#151](https://github.com/datagouv/csv-detective/pull/151)
 - Sample can handle full NaN columns [#154](https://github.com/datagouv/csv-detective/pull/154)
-- Add checks priorization to prevent testing all formats [#155](https://github.com/datagouv/csv-detective/pull/155)
+- Add tests priorization to prevent testing all formats [#155](https://github.com/datagouv/csv-detective/pull/155)
 
 ## 0.9.2 (2025-08-26)
 
diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index 51d13f93..5d7fe803 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -112,10 +112,17 @@ def test_col(
             tested.add(test_name)
             # should we break if one of the specific tests is successful?
         # performing less and less specific tests if specific ones fail
-        for test_name in specific_tests:
+        # starting with highest scores to set the parents from there
+        for test_name in reversed([
+            test for test, _ in sorted(
+                (tup for tup in results[column].items()),
+                key=lambda tup: tup[1],
+            )
+        ]):
             current = test_name
             parent = test_funcs[current]["parent"]
             while parent is not None:
+                print(current, parent)
                 if parent in results[column]:
                     # already tested as a parent of a previous test, no need to get higher parents
                     break
@@ -123,6 +130,7 @@ def test_col(
                     # if a child test is successful, we set the parent's score to the same value
                     # this is not perfect: the column can be 50% child but 100% parent
                     # we would have to perform the parent test to know exactly, but this saves much time
+                    print(f"setting {parent} from {current}, score : {results[column][current]}")
                     results[column][parent] = results[column][current]
                 else:
                     results[column][parent] = test_col_val(
@@ -134,8 +142,7 @@ def test_col(
                         verbose=verbose,
                     )
                     tested.add(parent)
-                current = parent
-                parent = test_funcs[current]["parent"]
+                current, parent = parent, test_funcs[parent]["parent"]
         if verbose:
             display_logs_depending_process_time(
                 f'\t> Done with column "{column}" in {round(time() - start_col, 3)}s'

From 7d7db5ae40dd6df47c3eee7aacc55ba3de39a295 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Fri, 29 Aug 2025 10:24:22 +0200
Subject: [PATCH 08/11] refactor: savestate

---
 csv_detective/parsing/columns.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index 5d7fe803..f4762da9 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -122,7 +122,6 @@ def test_col(
             current = test_name
             parent = test_funcs[current]["parent"]
             while parent is not None:
-                print(current, parent)
                 if parent in results[column]:
                     # already tested as a parent of a previous test, no need to get higher parents
                     break

From 7b167e48957b2ae52f212320d2830cd2df9b5972 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Fri, 29 Aug 2025 10:55:13 +0200
Subject: [PATCH 09/11] fix: remove log

---
 csv_detective/parsing/columns.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index f4762da9..b4e4bc60 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -129,7 +129,6 @@ def test_col(
                     # if a child test is successful, we set the parent's score to the same value
                     # this is not perfect: the column can be 50% child but 100% parent
                     # we would have to perform the parent test to know exactly, but this saves much time
-                    print(f"setting {parent} from {current}, score : {results[column][current]}")
                     results[column][parent] = results[column][current]
                 else:
                     results[column][parent] = test_col_val(

From 551a3c8f5c17bffaac57e79664fadb4e967fea72 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Fri, 29 Aug 2025 10:57:03 +0200
Subject: [PATCH 10/11] chore: lint

---
 csv_detective/parsing/columns.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index b4e4bc60..724c1213 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -113,12 +113,15 @@ def test_col(
             # should we break if one of the specific tests is successful?
         # performing less and less specific tests if specific ones fail
         # starting with highest scores to set the parents from there
-        for test_name in reversed([
-            test for test, _ in sorted(
-                (tup for tup in results[column].items()),
-                key=lambda tup: tup[1],
-            )
-        ]):
+        for test_name in reversed(
+            [
+                test
+                for test, _ in sorted(
+                    (tup for tup in results[column].items()),
+                    key=lambda tup: tup[1],
+                )
+            ]
+        ):
             current = test_name
             parent = test_funcs[current]["parent"]
             while parent is not None:

From e05e646393a9188fc8cd45093b14cd860258ccb4 Mon Sep 17 00:00:00 2001
From: Pierlou <pierlou.ramade@data.gouv.fr>
Date: Fri, 29 Aug 2025 11:22:49 +0200
Subject: [PATCH 11/11] refactor: better logs

---
 csv_detective/parsing/columns.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/csv_detective/parsing/columns.py b/csv_detective/parsing/columns.py
index 724c1213..c86b241c 100755
--- a/csv_detective/parsing/columns.py
+++ b/csv_detective/parsing/columns.py
@@ -97,7 +97,7 @@ def test_col(
     for idx, column in enumerate(table.columns):
         if verbose:
             start_col = time()
-            logging.info(f"\t- Starting with column '{column}'")
+            logging.info(f"\t- Starting with column '{column}' ({idx + 1}/{nb_cols})")
         tested = set()
         # testing for the most specific formats first (we have early stops in test_col_val)
         for test_name, test_attr in specific_tests.items():
@@ -147,7 +147,7 @@ def test_col(
         if verbose:
             display_logs_depending_process_time(
                 f'\t> Done with column "{column}" in {round(time() - start_col, 3)}s'
-                f" ({idx + 1}/{nb_cols}), {len(tested)} tests performed",
+                f", {len(tested)} tests performed",
                 time() - start_col,
             )
     if verbose: