From 18bb9fa2db3caf5b13eeaa9a4984a690bd7f23c3 Mon Sep 17 00:00:00 2001
From: Mariano Scasso <75589700+mscasso-scanoss@users.noreply.github.com>
Date: Wed, 15 Jan 2025 16:13:38 -0300
Subject: [PATCH 01/19] fix bug with scanning test. Add -T scanning parameter.
 Change default snippets scanning tolerace (#83)

Co-authored-by: core software devel <cs@scanoss.com>
---
 inc/match_list.h |  3 ++-
 src/debug.c      |  2 +-
 src/help.c       | 58 ++++++++++++++++++++++++------------------------
 src/main.c       |  5 ++++-
 src/match_list.c | 12 +++++++++-
 src/util.c       |  3 ++-
 6 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/inc/match_list.h b/inc/match_list.h
index 91b3b32..d9915e8 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -78,7 +78,7 @@
 #define SCAN_MAX_SNIPPETS_DEFAULT 	1
 #define SCAN_MAX_COMPONENTS_DEFAULT 3
 
-#define MATCH_LIST_TOLERANCE 98.5
+#define MATCH_LIST_TOLERANCE 97.5
 typedef struct match_data_t match_data_t; /* Forward declaration */
 
 /**
@@ -145,5 +145,6 @@ bool component_list_add(component_list_t * list, component_data_t * new_comp, bo
 void component_list_print(component_list_t * list, bool (*printer) (component_data_t * fpa), char * separator);
 void component_list_destroy(component_list_t *list);
 bool component_list_add_binary(component_list_t *list, component_data_t *new_comp, bool (*val)(component_data_t *a, component_data_t *b), bool remove_a);
+void match_list_tolerance_set(float in);
 
 #endif
diff --git a/src/debug.c b/src/debug.c
index c6ffea5..0b1e9f8 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -210,7 +210,7 @@ void scan_benchmark()
 			scan->hashes[i] = rand() % 256 + (rand() % 256) * 256 + (rand() % 256) * 256 * 256 + (rand() % 256) * 256 * 256 * 256;
 		}
 		scan->hash_count = total_hashes;
-
+		scan->total_lines = 10;
 		ldb_scan_snippets(scan);
 		scan_data_free(scan);
 	}
diff --git a/src/help.c b/src/help.c
index 8916f1a..8b15dcc 100644
--- a/src/help.c
+++ b/src/help.c
@@ -38,41 +38,42 @@
  */
 void help ()
 {
-	printf ("ScanOSS Engine v%s\n", SCANOSS_VERSION);
-	printf ("\n\
-This program performs an OSS inventory for the given TARGET comparing against the ScanOSS Knowledgebase.\n\
-Results are printed in STDOUT in JSON format\n\
+  printf ("ScanOSS Engine v%s\n", SCANOSS_VERSION);
+  printf ("\n\
+This program performs an OSS inventory scan of the specified TARGET by comparing it against the ScanOSS Knowledgebase.\n\
+Results are displayed in JSON format through STDOUT.\n\
 \n\
 Syntax: scanoss [parameters] [TARGET]\n\
 \n\
 Configuration:\n\
--w         Treats TARGET as a .wfp file regardless of the actual file extension.\n\
--H         High Precision Snippet Match mode, 'libhpsm.so' must be present in the system.\n\
--e         Expect matching extensions to equal the file extension being scanned (default: off).\n\
--M NUMBER  Looks for NUMBER of different components in a file (MAX 9).\n\
--s SBOM    Use assets specified in JSON SBOM (CycloneDX/SPDX2.2 JSON format) as input to identification.\n\
--b SBOM    Ignore matches to assets specified in JSON SBOM (CycloneDX/SPDX2.2 JSON format).\n\
--B SBOM    Same than \"-b\" but forcing snippet scan.\n\
--a SBOM    Displays attribution notices for provided SBOM.json.\n\
--c HINT    Provide a component HINT to influence scan results.\n\
--k KEY     Displays contents of file KEY from MZ sources archive.\n\
--l LICENSE Displays OSADL metadata for the provided SPDX license ID.\n\
+-w         Process TARGET as a .wfp file, regardless of its actual extension.\n\
+-H         Enable High Precision Snippet Match mode (requires 'libhpsm.so' in the system).\n\
+-e         Match only files with identical extensions as the scanned file (default: off).\n\
+-M NUMBER  Search for up to NUMBER different components in each file (maximum: 9).\n\
+-T NUMBER  Set snippet scanning tolerance percentage (default: 3.5).\n\
+-s SBOM    Include assets from a JSON SBOM file (CycloneDX/SPDX2.2 format) in identification.\n\
+-b SBOM    Exclude matches from assets listed in JSON SBOM file (CycloneDX/SPDX2.2 format).\n\
+-B SBOM    Same as \"-b\" but with forced snippet scanning.\n\
+-a SBOM    Show attribution notices for the provided SBOM.json file.\n\
+-c HINT    Add a component HINT to guide scan results.\n\
+-k KEY     Show contents of the specified KEY file from MZ sources archive.\n\
+-l LICENSE Display OSADL metadata for the given SPDX license ID.\n\
 \n\
 Options:\n\
--t  Tests engine performance.\n\
--v  Display version and exit.\n\
--n  Specify DB name (default: oss).\n\
--h  Display this help and exit.\n\
--d  Save debugging information to disk (/tmp).\n\
--q  Produces no JSON output. Only debugging info via STDERR.\n\
+-t  Run engine performance tests.\n\
+-v  Show version information and exit.\n\
+-n  Set database name (default: oss).\n\
+-h  Display this help information and exit.\n\
+-d  Store debugging information to disk (/tmp).\n\
+-q  Suppress JSON output (show only debugging info via STDERR).\n\
 \n\
-Enviroment variables:\n\
-SCANOSS_MATCHMAP_MAX: define the snippet scanning match map size, %d by default.\n\
-SCANOSS_API_URL: defines the API url, %s by default.\n\
+Environment variables:\n\
+SCANOSS_MATCHMAP_MAX: Set the snippet scanning match map size (default: %d).\n\
+SCANOSS_API_URL: Define the API endpoint URL (default: %s).\n\
 \n\
 Engine scanning flags:\n\
-The scanning engine can be configured by passing configuration flags with the -F parameter.\n\
-Alternatively, these value can be written in %s\n\
+Configure the scanning engine using flags with the -F parameter.\n\
+These settings can also be specified in %s\n\
 +-------+-------------------------------------------------------+\n\
 | Flag  | Setting                                               |\n\
 +-------+-------------------------------------------------------+\n\
@@ -83,7 +84,7 @@ Alternatively, these value can be written in %s\n\
 |   16  | Disable copyrights (default: enabled)                 |\n\
 |   32  | Disable vulnerabilities (default: enabled)            |\n\
 |   64  | Disable quality (default: enabled)                    |\n\
-|  128  | Disable cryptography (defalt: enabled)                |\n\
+|  128  | Disable cryptography (default: enabled)               |\n\
 |  256  | Disable best match only (default: enabled)            |\n\
 |  512  | Hide identified files (default: disabled)             |\n\
 | 1024  | Enable download_url (default: disabled)               |\n\
@@ -92,8 +93,7 @@ Alternatively, these value can be written in %s\n\
 | 8192  | Disable health layer (default: enabled)               |\n\
 | 16384 | Enable high accuracy, slower scan (default: disabled) |\n\
 +-------+-------------------------------------------------------+\n\
-Example: scanoss -F 12 DIRECTORY (scans DIRECTORY disabling license and dependency data)\n\
+Example: scanoss -F 12 DIRECTORY (scan DIRECTORY without license and dependency data)\n\
 \n\
 Copyright (C) 2018-2022 SCANOSS.COM\n", DEFAULT_MATCHMAP_FILES, API_URL, ENGINE_FLAGS_FILE);
-
 }
diff --git a/src/main.c b/src/main.c
index 9a488aa..337a466 100644
--- a/src/main.c
+++ b/src/main.c
@@ -291,7 +291,7 @@ int main(int argc, char **argv)
 	int option;
 	bool invalid_argument = false;
 	char * ldb_db_name = NULL;
-	while ((option = getopt(argc, argv, ":f:s:b:B:c:k:a:F:l:n:M:N:wtvhedqH")) != -1)
+	while ((option = getopt(argc, argv, ":T:s:b:B:c:k:a:F:l:n:M:N:wtvhedqH")) != -1)
 	{
 		/* Check valid alpha is entered */
 		if (optarg)
@@ -355,6 +355,9 @@ int main(int argc, char **argv)
 			case 'N':
 				scan_max_components = atol(optarg);
 				break;
+			case 'T':
+				match_list_tolerance_set(atof(optarg));
+				break;
 			case 'w':
 				force_wfp = true;
 				break;
diff --git a/src/match_list.c b/src/match_list.c
index ff51e68..276fa7d 100644
--- a/src/match_list.c
+++ b/src/match_list.c
@@ -9,6 +9,7 @@
 #include "component.h"
 
 int list_size = 0;
+static float match_list_tolerance = MATCH_LIST_TOLERANCE;
 
 void component_list_destroy(component_list_t *list)
 {
@@ -210,10 +211,19 @@ bool component_list_add_binary(component_list_t *list, component_data_t *new_com
     return false;
 }
 
+void match_list_tolerance_set(float in)
+{
+    if (in > 99)
+        in = 99;
+    
+    match_list_tolerance = 100.0-in;
+    scanlog("setting match list tolerance to %.1f\n", match_list_tolerance);
+}
+
 bool tolerance_eval(int a, int b)
 {
     int relative_error = (abs(a - b) * 100) / ((a + b) / 2);
-    if (100 - relative_error >= MATCH_LIST_TOLERANCE)
+    if (100 - relative_error >= match_list_tolerance)
         return true;
     else
         return false;
diff --git a/src/util.c b/src/util.c
index 4a7f1b4..cfbcb99 100644
--- a/src/util.c
+++ b/src/util.c
@@ -372,7 +372,8 @@ bool path_is_third_party(const char* path)
         "local_packages",
         "managed",
         "3rd",
-        "thirdparty"
+        "thirdparty",
+		"LibResources"
     };
     
     // Número de patrones a verificar

From ca293fb43a65bc291883dea4b023561158f5ea75 Mon Sep 17 00:00:00 2001
From: Jeronimo Ortiz <166400360+ortizjeronimo@users.noreply.github.com>
Date: Mon, 20 Jan 2025 09:40:56 -0300
Subject: [PATCH 02/19] updated documentation theme

---
 docs/requirements-docs.txt | 2 +-
 docs/source/conf.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index a95ae18..483a4e9 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -1 +1 @@
-furo
+sphinx_rtd_theme
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3b9402e..9fc9dc7 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,7 +13,7 @@
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-extensions = []
+extensions = ['sphinx_rtd_theme']
 
 templates_path = ['_templates']
 exclude_patterns = []
@@ -23,6 +23,6 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'furo'
+html_theme = 'sphinx_rtd_theme'
 html_logo = 'scanosslogo.jpg'
 html_static_path = ['_static']

From e41cf0b5ab81bc7f961aadaffeaf4a3669868729 Mon Sep 17 00:00:00 2001
From: Mariano Scasso <75589700+mscasso-scanoss@users.noreply.github.com>
Date: Mon, 10 Mar 2025 19:18:17 -0300
Subject: [PATCH 03/19] 5.4.10 (#85)

* fix memory bug processing versions

* change sources env-variables

* tune up scanning limits

* fix ranges assembling bug. Improve snippets and component selection

* change on report, file content url will be returned empty if the env-var wasn't defined
---
 inc/limits.h     |   4 +-
 inc/match.h      |   1 +
 inc/match_list.h |   3 +-
 inc/scan.h       |   2 +-
 inc/scanoss.h    |   3 +-
 src/help.c       |   6 +--
 src/match.c      |  40 ++++------------
 src/match_list.c |  16 ++++++-
 src/report.c     |  10 ++--
 src/scan.c       |   2 +-
 src/snippets.c   | 120 +++++++++++++++++++++++++----------------------
 src/util.c       |   4 +-
 src/versions.c   |   4 +-
 13 files changed, 106 insertions(+), 109 deletions(-)

diff --git a/inc/limits.h b/inc/limits.h
index b91dedd..69122a4 100644
--- a/inc/limits.h
+++ b/inc/limits.h
@@ -37,9 +37,9 @@
 
 /* Snippets */
 #define DEFAULT_MATCHMAP_FILES 10000     // Default number of files evaluated in snippet matching
-#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 5)     // Max number of files evaluated in snippet matching to prevent performance issues
+#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 10)     // Max number of files evaluated in snippet matching to prevent performance issues
+#define MIN_LINES_COVERAGE 0.8
 #define SKIP_SNIPPETS_IF_FILE_BIGGER (1024 * 1024 * 4)
-#define SKIP_SNIPPETS_IF_STARTS_WITH (const char*[3]) {"{", "<?xml", "<html"}
 #define MAX_SNIPPETS_SCANNED 2500
 
 /* Variables */
diff --git a/inc/match.h b/inc/match.h
index ae972e2..31278f3 100644
--- a/inc/match.h
+++ b/inc/match.h
@@ -13,6 +13,7 @@ typedef struct match_data_t
 	component_list_t component_list; /*Component list object */ 
 	match_t type; /*math type (none, snippet, file) */
     int hits; /*match hits number, more hits equal bigger snippet matching*/
+	int lines_matched; /*number of matched lines*/
 	char * line_ranges; /*input snippet line ranges */
 	char * oss_ranges; /* kb snippet line ranges */
 	char * matched_percent; /* matched percent */
diff --git a/inc/match_list.h b/inc/match_list.h
index d9915e8..6520724 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -78,7 +78,7 @@
 #define SCAN_MAX_SNIPPETS_DEFAULT 	1
 #define SCAN_MAX_COMPONENTS_DEFAULT 3
 
-#define MATCH_LIST_TOLERANCE 97.5
+#define MATCH_LIST_TOLERANCE 99.9
 typedef struct match_data_t match_data_t; /* Forward declaration */
 
 /**
@@ -145,6 +145,7 @@ bool component_list_add(component_list_t * list, component_data_t * new_comp, bo
 void component_list_print(component_list_t * list, bool (*printer) (component_data_t * fpa), char * separator);
 void component_list_destroy(component_list_t *list);
 bool component_list_add_binary(component_list_t *list, component_data_t *new_comp, bool (*val)(component_data_t *a, component_data_t *b), bool remove_a);
+bool match_list_eval(match_list_t *list, match_data_t * in,  bool (*eval)(match_data_t *fpa, match_data_t *fpb));
 void match_list_tolerance_set(float in);
 
 #endif
diff --git a/inc/scan.h b/inc/scan.h
index 51aaca3..e3e768d 100644
--- a/inc/scan.h
+++ b/inc/scan.h
@@ -52,7 +52,7 @@ typedef struct scan_data_t
 	match_t match_type; /* match_t (file, snippet, none), this is replicated in each match in the matches list */
 	matchmap_entry *matchmap; /*matchmap pointer, used in snippet scanning */
 	uint32_t matchmap_size; /*size of the match map */
-	int matchmap_rank_by_sector[255]; /* Indirection array pointing to the max hits from the matchmap classyfied by sector.*/
+	int matchmap_rank_by_sector[256]; /* Indirection array pointing to the max hits from the matchmap classyfied by sector.*/
 	uint8_t *match_ptr; // pointer to matching record in match_map
 	match_list_t * matches_list_array[MAX_MULTIPLE_COMPONENTS]; /* array of "match_list_t", each snippet with different "from line" will generate its own matches list */
 	int matches_list_array_index; /* elements in the matches list array*/
diff --git a/inc/scanoss.h b/inc/scanoss.h
index 318dfc3..841b545 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -40,12 +40,11 @@
 #define WFP_REC_LN 18
 
 /* Log files */
-#define SCANOSS_VERSION "5.4.9"
+#define SCANOSS_VERSION "5.4.10"
 #define SCAN_LOG "/tmp/scanoss_scan.log"
 #define MAP_DUMP "/tmp/scanoss_map.dump"
 #define SLOW_QUERY_LOG "/tmp/scanoss_slow_query.log"
 
-#define API_URL "https://api.osskb.org"
 #define DEFAULT_OSS_DB_NAME "oss"
 
 /* Engine configuration flags */
diff --git a/src/help.c b/src/help.c
index 8b15dcc..f36ebe5 100644
--- a/src/help.c
+++ b/src/help.c
@@ -50,7 +50,7 @@ Configuration:\n\
 -H         Enable High Precision Snippet Match mode (requires 'libhpsm.so' in the system).\n\
 -e         Match only files with identical extensions as the scanned file (default: off).\n\
 -M NUMBER  Search for up to NUMBER different components in each file (maximum: 9).\n\
--T NUMBER  Set snippet scanning tolerance percentage (default: 3.5).\n\
+-T NUMBER  Set snippet scanning tolerance percentage (default: 0.1).\n\
 -s SBOM    Include assets from a JSON SBOM file (CycloneDX/SPDX2.2 format) in identification.\n\
 -b SBOM    Exclude matches from assets listed in JSON SBOM file (CycloneDX/SPDX2.2 format).\n\
 -B SBOM    Same as \"-b\" but with forced snippet scanning.\n\
@@ -69,7 +69,7 @@ Options:\n\
 \n\
 Environment variables:\n\
 SCANOSS_MATCHMAP_MAX: Set the snippet scanning match map size (default: %d).\n\
-SCANOSS_API_URL: Define the API endpoint URL (default: %s).\n\
+SCANOSS_FILE_CONTENTS_URL: Define the API URL endpoint for sources. Source url wont be reported if it's not defined.\n\
 \n\
 Engine scanning flags:\n\
 Configure the scanning engine using flags with the -F parameter.\n\
@@ -95,5 +95,5 @@ These settings can also be specified in %s\n\
 +-------+-------------------------------------------------------+\n\
 Example: scanoss -F 12 DIRECTORY (scan DIRECTORY without license and dependency data)\n\
 \n\
-Copyright (C) 2018-2022 SCANOSS.COM\n", DEFAULT_MATCHMAP_FILES, API_URL, ENGINE_FLAGS_FILE);
+Copyright (C) 2018-2022 SCANOSS.COM\n", DEFAULT_MATCHMAP_FILES, ENGINE_FLAGS_FILE);
 }
diff --git a/src/match.c b/src/match.c
index 1b13bb1..56f9427 100644
--- a/src/match.c
+++ b/src/match.c
@@ -320,7 +320,7 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
 	if (!*a->release_date)
 		return true;
 		
-	if (!path_is_third_party(a->file) && path_is_third_party(b->file) && !(engine_flags & ENABLE_PATH_HINT))
+	if (!path_is_third_party(a->file) && path_is_third_party(b->file))
 	{
 		scanlog("Component rejected by third party filter\n");
 		return false;
@@ -331,7 +331,7 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
 	{	
 		if (purl_source_check(a) > purl_source_check(b))
 		{
-			scanlog("Component prefered by vsource\n");
+			scanlog("Component prefered by source\n");
 			return true;
 		}
 
@@ -471,39 +471,15 @@ bool load_matches(match_data_t *match)
 {
 	scanlog("Load matches\n");
 
-	/* Compile match ranges and fill up matched percent */
-	int hits = 100;
-	int matched_percent = 100;
 
-	/* Get matching line ranges (snippet match) */
-	if (match->type == MATCH_SNIPPET)
-	{
-		hits = compile_ranges(match);
-		scanlog("compile_ranges returns %d hits\n", hits);
-
-		if (hits < min_match_hits)
-		{
-			match->type = MATCH_NONE;
-			return false;
-		}
-		
-		float percent = (hits * 100) / match->scan_ower->total_lines;
-		if (hits)
-			matched_percent = floor(percent);
-		if (matched_percent > 99)
-			matched_percent = 99;
-		if (matched_percent < 1)
-			matched_percent = 1;
-
-		asprintf(&match->matched_percent, "%u%%", matched_percent);
-	}
-	else if (match->type == MATCH_BINARY)
+	
+	if (match->type == MATCH_BINARY)
 	{
 		asprintf(&match->line_ranges, "n/a");
 		asprintf(&match->oss_ranges, "n/a");
 		asprintf(&match->matched_percent, "%d functions matched", match->hits);
 	}
-	else
+	else if (match->type == MATCH_FILE)
 	{
 		asprintf(&match->line_ranges, "all");
 		asprintf(&match->oss_ranges, "all");
@@ -696,11 +672,13 @@ void match_select_best(scan_data_t *scan)
 				break;
 			}
 
-			if (!best_match_component->identified && match_component->identified)
+			if ((!best_match_component->identified && match_component->identified) ||
+				(strcmp(best_match_component->vendor,best_match_component->component) && !strcmp(match_component->vendor, match_component->component)) ||
+				(path_is_third_party(best_match_component->file) && !path_is_third_party(match_component->file)))
 			{
 				scanlog("Replacing best match for a prefered component\n");
 				scan->matches_list_array[i]->best_match = item->match;
-			}	
+			}
 		}
 	}
 
diff --git a/src/match_list.c b/src/match_list.c
index 276fa7d..9df35af 100644
--- a/src/match_list.c
+++ b/src/match_list.c
@@ -311,11 +311,11 @@ bool match_list_add(match_list_t *list, match_data_t *new_match, bool (*val)(mat
         }
         /* in autolimit mode the list doesnt have a fix size, it will accept all the matchest until a 75% of the fist element (the biggest) */
         //TODO: this part of the code should be in the function pointer or I need to re-evaluate the archtecture of this function */
-        if (list->autolimit && !tolerance_eval(list->headp.lh_first->match->hits, list->last_element->match->hits))
+        if (list->autolimit && !tolerance_eval(list->headp.lh_first->match->lines_matched, list->last_element->match->lines_matched))
         {    
             np = list->headp.lh_first;
             /*We have to find and remove the unwanted elements */
-            for (; np->entries.le_next != NULL && tolerance_eval(list->headp.lh_first->match->hits, np->entries.le_next->match->hits); np = np->entries.le_next)
+            for (; np->entries.le_next != NULL && tolerance_eval(list->headp.lh_first->match->lines_matched, np->entries.le_next->match->lines_matched); np = np->entries.le_next)
             {
 
             }
@@ -403,6 +403,18 @@ bool match_list_print(match_list_t *list, bool (*printer)(match_data_t *fpa), ch
     return true;
 }
 
+bool match_list_eval(match_list_t *list, match_data_t * in,  bool (*eval)(match_data_t *fpa, match_data_t *fpb))
+{
+    int i = 0;
+    for (struct entry *np = list->headp.lh_first; np != NULL && i<list->items; np = np->entries.le_next)
+    {
+        if(eval(np->match, in))
+            return true;
+        i++;
+    }
+    return false;
+}
+
 void component_list_print(component_list_t *list, bool (*printer)(component_data_t *fpa), char *separator)
 {
     for (struct comp_entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)
diff --git a/src/report.c b/src/report.c
index 99c058c..c650328 100644
--- a/src/report.c
+++ b/src/report.c
@@ -334,17 +334,19 @@ bool print_json_match(struct match_data_t * match)
 	printf(",\"source_hash\": \"%s\"", match->source_md5);
 
 	/* Output file_url (same as url when match type = url) */
-	char * file_url_enabled = getenv("SCANOSS_FILE_CONTENTS");
-	if (!file_url_enabled || strcmp(file_url_enabled, "false"))
+	char * file_contents_url = getenv("SCANOSS_FILE_CONTENTS_URL");
+	if (file_contents_url && *file_contents_url && strcmp(file_contents_url, "false"))
 	{
 		if (!match->component_list.headp.lh_first->component->url_match)
 		{
-			char *custom_url = getenv("SCANOSS_API_URL");
-			printf(",\"file_url\": \"%s/file_contents/%s\"", custom_url ? custom_url : API_URL, file_id);
+			printf(",\"file_url\": \"%s/%s\"", file_contents_url, file_id);
 		}
 		else
 			printf(",\"file_url\": \"%s\"", match->component_list.headp.lh_first->component->url);
 	}
+	else //return an empty string
+		printf(",\"file_url\": \" \"");
+
 
 	free(file_id);
 	
diff --git a/src/scan.c b/src/scan.c
index 608fdfd..8d586d0 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -59,7 +59,7 @@ scan_data_t * scan_data_init(char *target, int max_snippets, int max_components)
 	scan_data_t * scan = calloc(1, sizeof(*scan));
 	scan->file_path = strdup(target);
 	scan->file_size = malloc(32);
-	scan->hashes = malloc(MAX_FILE_SIZE);
+	scan->hashes = calloc(MAX_FILE_SIZE,1);
 	scan->lines  = malloc(MAX_FILE_SIZE);
 	scan->match_type = MATCH_NONE;
 
diff --git a/src/snippets.c b/src/snippets.c
index 6bb4fb2..1c3717c 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -40,6 +40,7 @@
 #include "match.h"
 #include "match_list.h"
 #include "stdlib.h"
+#include "snippets.h"
 int matchmap_max_files = DEFAULT_MATCHMAP_FILES;
 
 /**
@@ -91,6 +92,20 @@ static bool hit_test(match_data_t *a, match_data_t *b)
 	else
 		return false;
 }
+
+bool ranges_intersection(match_data_t *a, match_data_t *b)
+{
+	for (int i = 0; i < a->matchmap_reg->ranges_number; i++)
+	{
+		for (int j = 0; j < b->matchmap_reg->ranges_number; j++)
+		{
+			if (a->matchmap_reg->range[i].from <= b->matchmap_reg->range[j].to &&
+				b->matchmap_reg->range[j].from <= a->matchmap_reg->range[i].to)
+				return true;
+		}
+	}
+	return false;
+}
 /**
  * @brief Fill the matches list array based on the matchmap. The possible matches will be sorted by hits number.
  *
@@ -103,9 +118,8 @@ void biggest_snippet(scan_data_t *scan)
 	for (int i = 0; i < scan->max_snippets_to_process; i++)
 		scan->matches_list_array_indirection[i] = -1;
 
-	int snippet_tolerance = range_tolerance / scan->max_snippets_to_process + min_match_lines; /* Used to define bounds between two possible snippets */
 	/*Fill the matches list with the files from the matchmap */
-	for (int sector = 0; sector < 255; sector++)
+	for (int sector = 0; sector < 256; sector++)
 	{
 		int j = scan->matchmap_rank_by_sector[sector];
 		
@@ -115,48 +129,65 @@ void biggest_snippet(scan_data_t *scan)
 		if (scan->matchmap[j].hits >= min_match_hits) /* Only consider file with more than min_match_hits */
 		{
 			match_data_t *match_new = calloc(1, sizeof(match_data_t)); /* Create a match object */
-			memcpy(match_new->file_md5, scan->matchmap[j].md5, MD5_LEN);
+			memcpy(match_new->file_md5, scan->matchmap[j].md5, oss_file.key_ln);
 			match_new->hits = scan->matchmap[j].hits;
 			match_new->matchmap_reg = &scan->matchmap[j];
 			match_new->type = scan->match_type;
 			match_new->from = scan->matchmap[j].range->from;
 			strcpy(match_new->source_md5, scan->source_md5);
 			match_new->scan_ower = scan;
-			bool found = false;
 			int i = 0;
-			for (; i < scan->matches_list_array_index; i++) /*Check if there is already a list for this line ranges */
+
+			if (snippet_extension_discard(match_new))
 			{
-				if (scan->matches_list_array_indirection[i] > -1 &&
-					abs(scan->matches_list_array_indirection[i] - match_new->from) < snippet_tolerance)
-				{
-					found = true;
-					break;
-				}
+				match_data_free(match_new); 
+				continue;
+			}
+
+			int matched_lines = compile_ranges(match_new);
+			if (matched_lines < min_match_lines) {
+				match_data_free(match_new); 
+				continue;
 			}
 
-			if (!found) /*If there is no list for the snippet range we have to create a new one */
+			float percent = (matched_lines * 100) / match_new->scan_ower->total_lines;
+			int matched_percent = floor(percent);
+			if (matched_percent > 99)
+				matched_percent = 99;
+			if (matched_percent < 1)
+				matched_percent = 1;
+			asprintf(&match_new->matched_percent, "%u%%", matched_percent);
+			match_new->lines_matched = matched_lines;
+			//match_new->hits = hits;
+
+			do /*Check if there is already a list for this line ranges */
 			{
-				if (scan->matches_list_array_index < scan->max_snippets_to_process) /* Check for the list limit */
+				if (!scan->matches_list_array[scan->matches_list_array_index] && scan->matches_list_array_index < scan->max_snippets_to_process)
 				{
-					scan->matches_list_array_indirection[scan->matches_list_array_index] = match_new->from; /*update indirection*/
-					scan->matches_list_array[scan->matches_list_array_index] = match_list_init(true, 1);	/*create the list*/
-					i = scan->matches_list_array_index;														/* update index*/
+					scan->matches_list_array[scan->matches_list_array_index] = match_list_init(true, 1);	/*create the list if it doesnt exist*/
 					scan->matches_list_array_index++;
+					if(!match_list_add(scan->matches_list_array[i], match_new, hit_test, true))
+					{
+						match_data_free(match_new); 
+					}
+					break;
 				}
-				else
-					i = scan->max_snippets_to_process - 1; /*add in the last available list if there is no more space for new lists*/
-			}
-
-			if (snippet_extension_discard(match_new) || !match_list_add(scan->matches_list_array[i], match_new, hit_test, true)) /*Add the match in the selected list */
-			{
-				scanlog("Rejected match with %d hits\n", match_new->hits);
-				match_data_free(match_new); /* the the memory if the match was not accepted in the list */
-			}
+				if (match_list_eval(scan->matches_list_array[i], match_new, ranges_intersection) || i == scan->max_snippets_to_process -1)
+				{
+					if(!match_list_add(scan->matches_list_array[i], match_new, hit_test, true))
+					{
+						match_data_free(match_new); 
+					}
+					break;
+				}
+				i++;
+			} while(i < scan->matches_list_array_index); /*Check if there is already a list for this line ranges */
 		}
 	}
 	/*just for loging*/
 	if (debug_on)
 	{
+		scanlog("Match list array index: %d\n", scan->matches_list_array_index);
 		for (int i = 0; i < scan->matches_list_array_index; i++)
 		{
 			scanlog("Match list N %d, with %d matches. %d <= HITS <= %d \n", i, scan->matches_list_array[i]->items,
@@ -165,8 +196,8 @@ void biggest_snippet(scan_data_t *scan)
 			struct entry *item = NULL;
 			LIST_FOREACH(item, &scan->matches_list_array[i]->headp, entries)
 			{
-				char md5_hex[MD5_LEN * 2 + 1];
-				ldb_bin_to_hex(item->match->file_md5, MD5_LEN, md5_hex);
+				char md5_hex[oss_file.key_ln * 2 + 1];
+				ldb_bin_to_hex(item->match->file_md5, oss_file.key_ln, md5_hex);
 				scanlog("%s - %d\n", md5_hex, item->match->hits);
 			}
 		}
@@ -348,8 +379,11 @@ matchmap_range * ranges_join_overlapping(matchmap_range *ranges, int size)
 			{
 				if(out_ranges_index >= 0 && (ranges[i].from - tolerance <= out_ranges[out_ranges_index].to))
 				{
+					if (out_ranges[out_ranges_index].to > ranges[i].to)
+						continue;
+
 					out_ranges[out_ranges_index].to = ranges[i].to;
-					scanlog("join range %d with %d\n", i, out_ranges_index);
+					//scanlog("join range %d with %d: %d - %d\n", i, out_ranges_index, out_ranges[out_ranges_index].from, out_ranges[out_ranges_index].to);
 				}
 				else
 				{
@@ -397,35 +431,7 @@ uint32_t compile_ranges(match_data_t *match)
 		return 0;
 	}
 
-	uint16_t reported_hits = match->matchmap_reg->hits;
 	int hits = 0;
-	/* Revise hits and decrease if needed */
-	for (uint32_t i = 0; i < match->matchmap_reg->ranges_number; i++)
-	{
-		long from =  match->matchmap_reg->range[i].from; //uint16_read(match->matchmap_reg + MD5_LEN + 2 + i * 6);
-		long to = match->matchmap_reg->range[i].to; //uint16_read(match->matchmap_reg + MD5_LEN + 2 + i * 6 + 2);
-		long delta = to - from;
-
-		if (to < 1)
-			break;
-
-		/* Ranges to be ignored (under min_match_lines) should decrease hits counter */
-		if (delta < min_match_lines)
-		{
-			/* Single-line range decreases by 1, otherwise decrease by 2 (from and to) */
-			reported_hits -= ((delta == 0) ? 1 : 2);
-		}
-
-		/* Exit if hits is below two */
-		if (reported_hits < min_match_hits)
-		{
-			scanlog("Discarted ranges brings hits count to %u\n", reported_hits);
-			return 0;
-		}
-
-		scanlog("compile_ranges #%d = %ld to %ld - OSS from: %d\n", i, from, to, match->matchmap_reg->range[i].oss_line);
-	}
-	
 	/* Add tolerances and assemble line ranges */
 	ranges_sort(match->matchmap_reg->range, match->matchmap_reg->ranges_number);
 
@@ -800,7 +806,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 			}
 			if (cat_limit > scan->max_matchmap_size)
 			{
-				if ((hashes_to_process < scan->hash_count / 10 || (float) lines_coverage / scan->hash_count < 0.6) && cat_limit < MAX_MATCHMAP_FILES)
+				if ((hashes_to_process < scan->hash_count / 10 || (float) lines_coverage / scan->hash_count < MIN_LINES_COVERAGE) && cat_limit < MAX_MATCHMAP_FILES)
 				{
 					scan->max_matchmap_size += map[map_indirection[i][j]].size;
 				}
diff --git a/src/util.c b/src/util.c
index cfbcb99..4151d66 100644
--- a/src/util.c
+++ b/src/util.c
@@ -330,10 +330,10 @@ void free_and_null(void * pr)
 
 bool path_is_third_party(const char* path) 
 {
-    // Array de patrones comunes
     const char* patterns[] = {
         "third_party",
         "3rdparty",
+		"site-packages",
         "vendor",
         "external",
         "dependencies",
@@ -376,10 +376,8 @@ bool path_is_third_party(const char* path)
 		"LibResources"
     };
     
-    // Número de patrones a verificar
     const int numPatterns = sizeof(patterns) / sizeof(patterns[0]);
     
-    // Verificar cada patrón
     for (int i = 0; i < numPatterns; i++) 
 	{
         if (strstr(path, patterns[i]) != NULL) 
diff --git a/src/versions.c b/src/versions.c
index 405beed..2639bf8 100644
--- a/src/versions.c
+++ b/src/versions.c
@@ -55,10 +55,10 @@ void normalise_version(char *version, char *component)
 		return;
 
 	char aux[MAX_FIELD_LN] = "\0";
+	int compt_len = strlen(component);
 	/* Remove leading component name from version */
-	if ((version && component) && stristart(version, component))
+	if ((version && component) && stristart(version, component) && strlen(version) > compt_len + 1)
 	{
-		int compt_len = strlen(component);
 		sprintf(aux, "%s",version + compt_len + 1);
 	}
 

From e39a4d9299cca189c38708c813d39ef13ead148a Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Mon, 5 Aug 2024 12:16:40 +0000
Subject: [PATCH 04/19] major change on mutiple snippet detection

---
 src/snippets.c | 49 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/src/snippets.c b/src/snippets.c
index 1c3717c..ae8a89b 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -144,20 +144,14 @@ void biggest_snippet(scan_data_t *scan)
 				continue;
 			}
 
-			int matched_lines = compile_ranges(match_new);
-			if (matched_lines < min_match_lines) {
-				match_data_free(match_new); 
-				continue;
-			}
-
-			float percent = (matched_lines * 100) / match_new->scan_ower->total_lines;
+			int hits = compile_ranges(match_new);
+			float percent = (hits * 100) / match_new->scan_ower->total_lines;
 			int matched_percent = floor(percent);
 			if (matched_percent > 99)
 				matched_percent = 99;
 			if (matched_percent < 1)
 				matched_percent = 1;
 			asprintf(&match_new->matched_percent, "%u%%", matched_percent);
-			match_new->lines_matched = matched_lines;
 			//match_new->hits = hits;
 
 			do /*Check if there is already a list for this line ranges */
@@ -372,7 +366,7 @@ matchmap_range * ranges_join_overlapping(matchmap_range *ranges, int size)
 		processed = 0;
 		out_ranges[0] = ranges[0];
 		memset(out_ranges, 0, sizeof(matchmap_range) * MATCHMAP_RANGES);
-		scanlog("Range tolerance: %d\n", tolerance);
+		//scanlog("Range tolerance: %d\n", tolerance);
 		for (int i = 0; i < size; i++)
 		{
 			if (ranges[i].from && ranges[i].to)
@@ -383,7 +377,7 @@ matchmap_range * ranges_join_overlapping(matchmap_range *ranges, int size)
 						continue;
 
 					out_ranges[out_ranges_index].to = ranges[i].to;
-					//scanlog("join range %d with %d: %d - %d\n", i, out_ranges_index, out_ranges[out_ranges_index].from, out_ranges[out_ranges_index].to);
+					//scanlog("join range %d with %d\n", i, out_ranges_index);
 				}
 				else
 				{
@@ -432,10 +426,37 @@ uint32_t compile_ranges(match_data_t *match)
 	}
 
 	int hits = 0;
+	/* Revise hits and decrease if needed */
+	for (uint32_t i = 0; i < match->matchmap_reg->ranges_number; i++)
+	{
+		long from =  match->matchmap_reg->range[i].from;
+		long to = match->matchmap_reg->range[i].to;
+		long delta = to - from;
+
+		if (to < 1)
+			break;
+
+		/* Ranges to be ignored (under min_match_lines) should decrease hits counter */
+		if (delta < min_match_lines)
+		{
+			/* Single-line range decreases by 1, otherwise decrease by 2 (from and to) */
+			reported_hits -= ((delta == 0) ? 1 : 2);
+		}
+
+		/* Exit if hits is below two */
+		if (reported_hits < min_match_hits)
+		{
+			scanlog("Discarted ranges brings hits count to %u\n", reported_hits);
+			return 0;
+		}
+
+		//scanlog("compile_ranges #%d = %ld to %ld - OSS from: %d\n", i, from, to, match->matchmap_reg->range[i].oss_line);
+	}
+	
 	/* Add tolerances and assemble line ranges */
 	ranges_sort(match->matchmap_reg->range, match->matchmap_reg->ranges_number);
 
-	if (debug_on)
+	/*if (debug_on)
 	{
 		scanlog("Accepted ranges (min lines range = %d):\n", min_match_lines);
 		for (uint32_t i = 0; i < match->matchmap_reg->ranges_number; i++)
@@ -444,7 +465,7 @@ uint32_t compile_ranges(match_data_t *match)
 				scanlog("	%d = %ld to %ld - OSS from: %d\n", i, match->matchmap_reg->range[i].from,match->matchmap_reg->range[i].to, 
 																match->matchmap_reg->range[i].oss_line);
 		}
-	}
+	}*/
 
 	matchmap_range *ranges = ranges_join_overlapping(match->matchmap_reg->range,  match->matchmap_reg->ranges_number);
 	
@@ -459,7 +480,7 @@ uint32_t compile_ranges(match_data_t *match)
 		}
 	}
 		
-	if (debug_on)
+	/*if (debug_on)
 	{
 		scanlog("Final ranges:\n");
 		for (uint32_t i = 0; i < MATCHMAP_RANGES; i++)
@@ -467,7 +488,7 @@ uint32_t compile_ranges(match_data_t *match)
 		if ( ranges[i].from && ranges[i].to)
 				scanlog("	%d = %ld to %ld - OSS from: %d\n", i, ranges[i].from, ranges[i].to, ranges[i].oss_line);
 		}
-	}
+	}*/
 	hits = ranges_assemble(ranges, line_ranges, oss_ranges);
 	match->line_ranges = strdup(line_ranges);
 	match->oss_ranges = strdup(oss_ranges);

From 4ed8fac3fa5eef289711d61b96d66622f068503a Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Wed, 7 Aug 2024 11:15:14 +0000
Subject: [PATCH 05/19] force the engine to pick different components. Add
 component_list_update function

---
 inc/match_list.h |  9 ++++++++-
 src/match.c      | 51 +++++++++++++++++++++++++++++++++++++-----------
 src/match_list.c | 18 +++++++++++++++++
 3 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/inc/match_list.h b/inc/match_list.h
index 6520724..6773972 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -81,6 +81,13 @@
 #define MATCH_LIST_TOLERANCE 99.9
 typedef struct match_data_t match_data_t; /* Forward declaration */
 
+typedef enum
+{
+	LIST_ITEM_NOT_FOUND = 0,
+	LIST_ITEM_FOUND,
+	LIST_ITEM_UPDATE
+} list_update_t;
+
 /**
  * @brief Define a list of component_data_t
  * 
@@ -147,5 +154,5 @@ void component_list_destroy(component_list_t *list);
 bool component_list_add_binary(component_list_t *list, component_data_t *new_comp, bool (*val)(component_data_t *a, component_data_t *b), bool remove_a);
 bool match_list_eval(match_list_t *list, match_data_t * in,  bool (*eval)(match_data_t *fpa, match_data_t *fpb));
 void match_list_tolerance_set(float in);
-
+bool component_list_update(component_list_t *list, component_data_t * in, list_update_t (*eval)(component_data_t *fpa, component_data_t *fpb));
 #endif
diff --git a/src/match.c b/src/match.c
index 56f9427..a3ad3c8 100644
--- a/src/match.c
+++ b/src/match.c
@@ -381,6 +381,19 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
 	return false;
 }
 
+list_update_t component_update(component_data_t *a, component_data_t *b)
+{
+	if (a && b && a->purls[0] && b->purls[0] && a->release_date && b->release_date && !strcmp(a->purls[0], b->purls[0]))
+	{
+		if (strcmp(b->release_date, a->release_date) < 0)
+			return LIST_ITEM_UPDATE;
+		else
+			return LIST_ITEM_FOUND;
+	} 
+	else
+		return LIST_ITEM_NOT_FOUND;
+}
+
 bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id, char *path)
 {
 	component_data_t *new_comp = NULL;
@@ -391,18 +404,34 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 		
 	fill_component_path(new_comp, path);
 	/* Create a new component and fill it from the url record */
-
-	new_comp->file_md5_ref = component_list->match_ref->file_md5;
-	/* If the component is valid add it to the component list */
-	/* The component list is a fixed size list, of size 3 by default, this means the list will keep the free oldest components*/
-	/* The oldest component will be the first in the list, if two components have the same age the purl date will untie */
-	new_comp->file_path_ref = component_list->match_ref->scan_ower->file_path;
-	new_comp->path_rank = PATH_LEVEL_COMP_INIT_VALUE;
-
-	scanlog("--- new comp: %s@%s %s %d---\n", new_comp->purls[0], new_comp->version, new_comp->release_date, new_comp->identified);
-	if (!component_list_add(component_list, new_comp, component_hint_date_comparation, true))
+	component_data_t *new_comp = calloc(1, sizeof(*new_comp));
+	bool result = fill_component(new_comp, url_id, path, (uint8_t *)url_rec);
+	if (result)
 	{
-		component_data_free(new_comp); /* Free if the componet was rejected */
+		new_comp->file_md5_ref = component_list->match_ref->file_md5;
+		/* If the component is valid add it to the component list */
+		/* The component list is a fixed size list, of size 3 by default, this means the list will keep the free oldest components*/
+		/* The oldest component will be the first in the list, if two components have the same age the purl date will untie */
+		new_comp->identified = IDENTIFIED_NONE;
+		asset_declared(new_comp);
+		new_comp->file_path_ref = component_list->match_ref->scan_ower->file_path;
+		new_comp->path_rank = PATH_LEVEL_COMP_INIT_VALUE;
+		if (!component_list_update(component_list, new_comp, component_update))
+		{
+			scanlog("--- new comp %s---\n", new_comp->component);
+			if (!component_list_add(component_list, new_comp, component_hint_date_comparation, true))
+			{
+				scanlog("component rejected: %s\n", new_comp->purls[0]);
+				component_data_free(new_comp); /* Free if the componet was rejected */
+			}
+			else
+				scanlog("component accepted: %s - pathrank: %d\n", new_comp->purls[0], new_comp->path_rank);
+		}
+		else if (debug_on)
+		{
+			scanlog("--- Componen already exist: %s---\n", new_comp->component);
+		}
+
 	}
 	else
 	{
diff --git a/src/match_list.c b/src/match_list.c
index 9df35af..09981e5 100644
--- a/src/match_list.c
+++ b/src/match_list.c
@@ -427,6 +427,24 @@ void component_list_print(component_list_t *list, bool (*printer)(component_data
     }
 }
 
+bool component_list_update(component_list_t *list, component_data_t * in, list_update_t (*eval)(component_data_t *fpa, component_data_t *fpb))
+{
+    for (struct comp_entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)
+    {
+        list_update_t r = eval(np->component, in);
+        if (r == LIST_ITEM_UPDATE)
+        {
+            component_data_t * aux = np->component;
+            np->component = in;
+            component_data_free(aux);
+            return true;
+        }
+        else if (r == LIST_ITEM_FOUND)
+            return true;
+    }
+    return false;
+}
+
 void match_list_process(match_list_t *list, bool (*funct_p)(match_data_t *fpa))
 {
     for (struct entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)

From 3164bcb08d38a5c31dda5e7eb6e5f2cbc2f0a39e Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Wed, 7 Aug 2024 13:56:54 +0000
Subject: [PATCH 06/19] solve memory leaks

---
 src/license.c |  7 +++----
 src/match.c   |  9 ++++-----
 src/quality.c | 19 +++++++++----------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/src/license.c b/src/license.c
index 93c5e5a..3281ce6 100644
--- a/src/license.c
+++ b/src/license.c
@@ -80,19 +80,18 @@ void normalize_license(char *license)
 	{
 		char def[MAX_ARGLN];
 		strcpy(def, license_normalization[i]);
-		char *token;
 
 		/* get the first token */
-		token = strtok(def, ",");
+		char * token = strtok(def, ",");
 
-		char *spdx = token;
+		//char *spdx = token;
 
 		/* walk through other tokens */
 		while (token != NULL)
 		{
 			if (stricmp(license, token))
 			{
-				strcpy(license, spdx);
+				strcpy(license, token);
 				return;
 			}
 			token = strtok(NULL, ",");
diff --git a/src/match.c b/src/match.c
index a3ad3c8..dd68605 100644
--- a/src/match.c
+++ b/src/match.c
@@ -388,7 +388,11 @@ list_update_t component_update(component_data_t *a, component_data_t *b)
 		if (strcmp(b->release_date, a->release_date) < 0)
 			return LIST_ITEM_UPDATE;
 		else
+		{
+			scanlog("--- Componen already exist: %s---\n", b->component);
+			component_data_free(b);
 			return LIST_ITEM_FOUND;
+		}
 	} 
 	else
 		return LIST_ITEM_NOT_FOUND;
@@ -427,11 +431,6 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 			else
 				scanlog("component accepted: %s - pathrank: %d\n", new_comp->purls[0], new_comp->path_rank);
 		}
-		else if (debug_on)
-		{
-			scanlog("--- Componen already exist: %s---\n", new_comp->component);
-		}
-
 	}
 	else
 	{
diff --git a/src/quality.c b/src/quality.c
index bc9fcb6..5f9a8db 100644
--- a/src/quality.c
+++ b/src/quality.c
@@ -56,13 +56,13 @@ const char *quality_sources[] = {"best_practices"};
 bool print_quality_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	
-	match_data_t * match  = (match_data_t*) ptr;
-	char *CSV = (char*) data;
+	char ** out  = ptr;
+	char *csv = (char*) data;
 	char *source  = calloc(MAX_JSON_VALUE_LEN, 1);
 	char *quality = calloc(MAX_JSON_VALUE_LEN, 1);
 
-	extract_csv(source, CSV, 1, MAX_JSON_VALUE_LEN);
-	extract_csv(quality, CSV, 2, MAX_JSON_VALUE_LEN);
+	extract_csv(source, csv, 1, MAX_JSON_VALUE_LEN);
+	extract_csv(quality, csv, 2, MAX_JSON_VALUE_LEN);
 
 	int src = atoi(source);
 
@@ -85,7 +85,7 @@ bool print_quality_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *d
 			len += sprintf(result+len,"\"score\": \"%s\",", quality);
 		len += sprintf(result+len,"\"source\": \"%s\"", quality_sources[atoi(source)]);
 		len += sprintf(result+len,"}");
-		match->quality_text = strdup(result);
+		*out = strdup(result);
 	}
 
 
@@ -105,17 +105,16 @@ void print_quality(match_data_t * match)
 		return;
 	
 	char result[MAX_FIELD_LN] = "\0";
-	match->quality_text = NULL;
+	char * aux = NULL;
 
 	sprintf(result,"\"quality\": [");
 
 
-	ldb_fetch_recordset(NULL, oss_quality, match->file_md5, false, print_quality_item, match);	
+	ldb_fetch_recordset(NULL, oss_quality, match->file_md5, false, print_quality_item, &aux);	
 	
-	char * aux = NULL;
-	asprintf(&aux, "%s%s]", result, match->quality_text ? match->quality_text : "");
 	free(match->quality_text);	
-	match->quality_text = aux;
+	asprintf(&match->quality_text, "%s%s]", result, aux ? aux : "");
+	free(aux);
 
 }
 

From c96d28c58e6921a21a35a2a78329592b103b3398 Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Thu, 22 Aug 2024 12:35:49 +0000
Subject: [PATCH 07/19] replace MD5_LEN by key_ln

---
 inc/util.h         |  5 -----
 src/attributions.c | 16 ++++++++--------
 src/binary_scan.c  | 20 ++++++++++----------
 src/component.c    | 12 ++++++------
 src/debug.c        |  4 ++--
 src/dependency.c   | 12 ++++++------
 src/file.c         |  8 ++++----
 src/hpsm.c         |  6 +++---
 src/license.c      |  2 +-
 src/match.c        | 16 ++++++++--------
 src/match_list.c   |  4 ++--
 src/mz.c           |  6 +++---
 src/query.c        |  4 ++--
 src/report.c       | 13 +++++--------
 src/scan.c         | 19 +++++++------------
 src/snippets.c     |  4 ++--
 src/url.c          |  6 +++---
 src/util.c         | 21 ++++++---------------
 18 files changed, 78 insertions(+), 100 deletions(-)

diff --git a/inc/util.h b/inc/util.h
index 7741e6b..5f1077e 100644
--- a/inc/util.h
+++ b/inc/util.h
@@ -26,11 +26,6 @@ char *datestamp(void);
 /* Prints a "created" JSON element with the current datestamp */
 void print_datestamp(void);
 
-//void file_md5(char *filepath, uint8_t *md5_result);
-
-/* Returns a string with a hex representation of md5 */
-char *md5_hex(uint8_t *md5);
-
 /* Removes chr from str */
 void remove_char(char *str, char chr);
 
diff --git a/src/attributions.c b/src/attributions.c
index e9a85b4..ac67673 100644
--- a/src/attributions.c
+++ b/src/attributions.c
@@ -54,10 +54,10 @@
 bool notices_handler(uint8_t *key, uint8_t *subkey, int subkey_ln, \
 uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
-	if (datalen != 2 * MD5_LEN) return false;
-	char hexkey[MD5_LEN * 2 + 1];
-	memcpy(hexkey, data, MD5_LEN * 2);
-	hexkey[MD5_LEN * 2] = 0;
+	if (datalen != 2 * oss_attribution.key_ln) return false;
+	char hexkey[oss_attribution.key_ln * 2 + 1];
+	memcpy(hexkey, data, oss_attribution.key_ln * 2);
+	hexkey[oss_attribution.key_ln * 2] = 0;
 
 	/* Print attribution notice header */
 	char *component = (char *) ptr;
@@ -86,11 +86,11 @@ uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	bool *valid = (bool *) ptr;
 
-	if (datalen != MD5_LEN) return false;
+	if (datalen != oss_attribution.key_ln) return false;
 
 	/* Convert key */
 	uint8_t attr_id[16];
-	ldb_hex_to_bin((char *) data, MD5_LEN * 2, attr_id);
+	ldb_hex_to_bin((char *) data, oss_attribution.key_ln * 2, attr_id);
 
 	/* Define mz_job values */
 	struct mz_job job;
@@ -100,7 +100,7 @@ uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 	job.mz_ln = 0;
 	job.id = NULL;
 	job.ln = 0;
-	job.md5[MD5_LEN] = 0;
+	job.md5[oss_attribution.key_ln] = 0;
 	job.key = NULL;
 
 	/* If file does not exist, exit with valid = false */
@@ -288,7 +288,7 @@ int attribution_notices(char * components)
 	char * licenses_json = notices_load_file();
 	/* Validate SBOM */
 	declared_components = get_components(components);
-	if (check_purl_attributions(oss_attribution, licenses_json) && !debug_on)
+	if (check_purl_attributions(oss_attribution, licenses_json))
 		/* Print attribution notices */
 		print_purl_attribution_notices(oss_attribution, licenses_json);
 
diff --git a/src/binary_scan.c b/src/binary_scan.c
index a7f7a88..9e72ed8 100644
--- a/src/binary_scan.c
+++ b/src/binary_scan.c
@@ -68,7 +68,7 @@ static bool add_purl_from_urlid(uint8_t *key, uint8_t *subkey, int subkey_ln, ui
 	if (iteration > MAX_URLS)
 		return true;
 	/* Ignore path lengths over the limit */
-	if (!datalen || datalen >= (MD5_LEN + MAX_FILE_PATH)) return false;
+	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
 
 	/* Decrypt data */
 	char * decrypted = decrypt_data(raw_data, datalen, oss_file, key, subkey);
@@ -77,8 +77,8 @@ static bool add_purl_from_urlid(uint8_t *key, uint8_t *subkey, int subkey_ln, ui
 	
 	component_list_t * component_list = (component_list_t*) ptr;
 	/* Copy data to memory */
-	uint8_t url_id[MD5_LEN];
-	memcpy(url_id, raw_data, MD5_LEN);
+	uint8_t url_id[oss_url.key_ln];
+	memcpy(url_id, raw_data, oss_url.key_ln);
 	char path[MAX_FILE_PATH+1];
 	strncpy(path, decrypted, MAX_FILE_PATH);
 
@@ -136,7 +136,7 @@ static bool get_all_file_ids(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8
 	{
 		if (iteration < max_files_to_process * 2)
 		{
-			memcpy(files[iteration].url_id, data, MD5_LEN);
+			memcpy(files[iteration].url_id, data, oss_url.key_ln);
 			return false;
 		} 
 		return true;
@@ -253,16 +253,16 @@ extern bool first_file;
 int binary_scan(char * input)
 {
 	/* Get file MD5 */
-	char * hexmd5 = strndup(input, MD5_LEN * 2);
+	char * hexmd5 = strndup(input, oss_file.key_ln * 2);
 	scanlog("Bin File md5 to be scanned: %s\n", hexmd5);
-	uint8_t bin_md5[MD5_LEN];
-	ldb_hex_to_bin(hexmd5, MD5_LEN * 2, bin_md5);
+	uint8_t bin_md5[oss_file.key_ln];
+	ldb_hex_to_bin(hexmd5, oss_file.key_ln * 2, bin_md5);
 	free(hexmd5);
 
-	uint8_t zero_md5[MD5_LEN] = {0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e}; //empty string md5
+	/*uint8_t zero_md5[oss_file.key_ln] = {0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e}; //empty string md5
 	
 	if (!memcmp(zero_md5,bin_md5, MD5_LEN)) //the md5 key of an empty string must be skipped.
-		return -1;
+		return -1;*/
 	
 	if (ldb_key_exists(oss_file, bin_md5))
 	{
@@ -272,7 +272,7 @@ int binary_scan(char * input)
 		char * target = strndup(file_name, target_len);
 		scan_data_t * scan =  scan_data_init(target, 1, 1);
 		free(target);
-		memcpy(scan->md5, bin_md5, MD5_LEN);
+		memcpy(scan->md5, bin_md5, oss_file.key_ln);
 		scan->match_type = MATCH_FILE;
 		compile_matches(scan);
 
diff --git a/src/component.c b/src/component.c
index 6861ec3..1e2734d 100644
--- a/src/component.c
+++ b/src/component.c
@@ -95,7 +95,7 @@ component_data_t *component_data_copy(component_data_t *in)
     out->latest_version = strdup(in->latest_version);
     out->license = strdup(in->license);
     out->url_match = in->url_match;
-    memcpy(out->url_md5, in->url_md5, MD5_LEN);
+    memcpy(out->url_md5, in->url_md5, oss_url.key_ln);
     if (in->main_url)
         out->main_url = strdup(in->main_url);
     out->url = strdup(in->url);
@@ -109,8 +109,8 @@ component_data_t *component_data_copy(component_data_t *in)
 
         if (in->purls_md5[i])
         {
-            out->purls_md5[i] = malloc(MD5_LEN);
-            memcpy(out->purls_md5[i], in->purls_md5[i], MD5_LEN);
+            out->purls_md5[i] = malloc(oss_purl.key_ln);
+            memcpy(out->purls_md5[i], in->purls_md5[i], oss_purl.key_ln);
         }
     }
 
@@ -236,7 +236,7 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
 	/* Extract fields from file record */
 	if (url_key)
 	{
-		memcpy(component->url_md5, url_key, MD5_LEN);
+		memcpy(component->url_md5, url_key, oss_url.key_ln);
 		if (file_path)
 		{
 			fill_component_path(component, file_path);
@@ -301,14 +301,14 @@ bool component_date_comparation(component_data_t *a, component_data_t *b)
 
 	if (!a->purls_md5[0] && a->purls[0])
 	{
-		a->purls_md5[0] = malloc(MD5_LEN);
+		a->purls_md5[0] = malloc(oss_url.key_ln);
 		MD5((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
 		a->age = get_component_age(a->purls_md5[0]);
 	}
 
 	if (!b->purls_md5[0] && b->purls[0])
 	{
-		b->purls_md5[0] = malloc(MD5_LEN);
+		b->purls_md5[0] = malloc(oss_purl.key_ln);
 		MD5((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
 		b->age = get_component_age(b->purls_md5[0]);
 	}
diff --git a/src/debug.c b/src/debug.c
index 0b1e9f8..040a267 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -162,7 +162,7 @@ void map_dump(scan_data_t *scan)
 		
 		/* Print matching MD5 */
 		uint8_t *md5 = scan->matchmap[i].md5;
-		for (int j = 0; j < MD5_LEN; j++) fprintf(map, "%02x", md5[j]);
+		for (int j = 0; j < oss_file.key_ln; j++) fprintf(map, "%02x", md5[j]);
 
 		/* Print hits */
 		fprintf(map, " %04x ", scan->matchmap[i].hits);
@@ -198,7 +198,7 @@ void scan_benchmark()
 	{
 		scan_data_t * scan = scan_data_init("pseudo_file", 0, 0);
 		scan->preload = true;
-		memcpy(scan->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", MD5_LEN);
+		memset(scan->md5, 0, oss_file.key_ln);
 		strcpy(scan->file_size, "1024");
 
 		progress ("Scanning: ", f + 1, total_files, false);
diff --git a/src/dependency.c b/src/dependency.c
index e3dae5f..1334694 100644
--- a/src/dependency.c
+++ b/src/dependency.c
@@ -123,10 +123,10 @@ int print_dependencies(component_data_t * comp)
 	if (!records)
 		for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++)
 		{
-			uint8_t md5[MD5_LEN];
-			purl_version_md5(md5, comp->purls[i], comp->version);
+			uint8_t hash[oss_purl.key_ln];
+			purl_version_md5(hash, comp->purls[i], comp->version);
 
-			records = ldb_fetch_recordset(NULL, oss_dependency, md5, false, print_dependencies_item, comp);
+			records = ldb_fetch_recordset(NULL, oss_dependency, hash, false, print_dependencies_item, comp);
 			if (records)
 			{
 				scanlog("Dependency matches (%d) reported for %s@%s\n", records, comp->purls[i],comp->version);
@@ -139,10 +139,10 @@ int print_dependencies(component_data_t * comp)
 	if (!records)
 		for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++)
 		{
-			uint8_t md5[MD5_LEN];
-			purl_version_md5(md5, comp->purls[i], comp->latest_version);
+			uint8_t hash[oss_purl.key_ln];
+			purl_version_md5(hash, comp->purls[i], comp->latest_version);
 
-			records = ldb_fetch_recordset(NULL, oss_dependency, md5, false, print_dependencies_item, comp);
+			records = ldb_fetch_recordset(NULL, oss_dependency, hash, false, print_dependencies_item, comp);
 			if (records)
 			{
 				scanlog("Dependency matches (%d) reported for %s@%s\n", records, comp->purls[i],comp->latest_version);
diff --git a/src/file.c b/src/file.c
index 204915e..acd9338 100644
--- a/src/file.c
+++ b/src/file.c
@@ -196,7 +196,7 @@ bool collect_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *ra
 	if (iteration >= FETCH_MAX_FILES) return true;
 
 	/* Ignore path lengths over the limit */
-	if (!datalen || datalen >= (MD5_LEN + MAX_FILE_PATH)) return false;
+	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
 
 	/* Decrypt data */
 	char * decrypted = decrypt_data(raw_data, datalen, oss_file, key, subkey);
@@ -205,7 +205,7 @@ bool collect_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *ra
 	/* Copy data to memory */
 	file_recordset *files = ptr;
 
-	memcpy(files[iteration].url_id, raw_data, MD5_LEN);
+	memcpy(files[iteration].url_id, raw_data, oss_url.key_ln);
 	strncpy(files[iteration].path, decrypted, MAX_FILE_PATH);
 	free(decrypted);
 	
@@ -227,7 +227,7 @@ bool collect_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *ra
 bool count_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
 {
 	/* Ignore path lengths over the limit */
-	if (!datalen || datalen >= (MD5_LEN + MAX_FILE_PATH)) return false;
+	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
 
 	int * count = ptr;
 	*count = iteration;
@@ -273,7 +273,7 @@ bool get_first_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data,
 		return false;
 
 	*(char *)ptr = 0;
-	char *ext = file_extension((char *)file_data + MD5_LEN);
+	char *ext = file_extension((char *)file_data + oss_file.key_ln);
 	
 	if (ext)
 		strcpy((char *) ptr, ext);
diff --git a/src/hpsm.c b/src/hpsm.c
index c116774..aef202e 100644
--- a/src/hpsm.c
+++ b/src/hpsm.c
@@ -105,9 +105,9 @@ struct ranges hpsm_calc(uint8_t *file_md5)
         return r;
     }
     scanlog("Running HPSM\n");
-    char *file = md5_hex(file_md5);
-    struct ranges result = hpsm(hpsm_crc_lines, file);
-    free(file);
+    char file_hex[oss_file.key_ln * 2 + 1];
+    ldb_bin_to_hex(file_md5, oss_file.key_ln, file_hex);
+    struct ranges result = hpsm(hpsm_crc_lines, file_hex);
     return result;
 }
 
diff --git a/src/license.c b/src/license.c
index 3281ce6..e203cbf 100644
--- a/src/license.c
+++ b/src/license.c
@@ -372,7 +372,7 @@ void print_licenses(component_data_t *comp)
 	for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++)
 	{
 		/* Calculate purl@version md5 */
-		uint8_t purlversion_md5[MD5_LEN];
+		uint8_t purlversion_md5[oss_purl.key_ln];
 		purl_version_md5(purlversion_md5, comp->purls[i], comp->version);
 
 		records = ldb_fetch_recordset(NULL, oss_license, purlversion_md5, false, print_licenses_item, comp);
diff --git a/src/match.c b/src/match.c
index dd68605..421018c 100644
--- a/src/match.c
+++ b/src/match.c
@@ -86,7 +86,7 @@ void match_data_free(match_data_t *data)
 match_data_t * match_data_copy(match_data_t * in)
 {
     match_data_t * out = calloc(1, sizeof(*out));
-    memcpy(out->file_md5,in->file_md5,MD5_LEN);
+    memcpy(out->file_md5,in->file_md5,oss_file.key_ln);
     out->hits = in->hits;
     out->type = in->type;
     out->line_ranges = strdup(in->line_ranges);
@@ -354,14 +354,14 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
 		
 		if (!a->purls_md5[0] && a->purls[0])
 		{
-			a->purls_md5[0] = malloc(MD5_LEN);
+			a->purls_md5[0] = malloc(oss_purl.key_ln);
 			MD5((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
 			a->age = get_component_age(a->purls_md5[0]);
 		}
 		
 		if (!b->purls_md5[0] && b->purls[0])
 		{
-			b->purls_md5[0] = malloc(MD5_LEN);
+			b->purls_md5[0] = malloc(oss_purl.key_ln);
 			MD5((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
 			b->age = get_component_age(b->purls_md5[0]);
 		}
@@ -464,7 +464,7 @@ bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *
 		return true;
 
 	/* Ignore path lengths over the limit */
-	if (!datalen || datalen >= (MD5_LEN + MAX_FILE_PATH)) return false;
+	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
 
 	/* Decrypt data */
 	char * decrypted = decrypt_data(raw_data, datalen, oss_file, key, subkey);
@@ -474,12 +474,12 @@ bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *
 	component_list_t * component_list = (component_list_t*) ptr;
 	/* Copy data to memory */
 
-	uint8_t url_id[MD5_LEN] = {0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e}; //empty string md5
+	uint8_t url_id[oss_url.key_ln]; /*= {0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e}; //empty string md5
 	
 	if (!memcmp(raw_data,url_id, MD5_LEN)) //the md5 key of an empty string must be skipped.
-		return false;
+		return false;*/
 
-	memcpy(url_id, raw_data, MD5_LEN);
+	memcpy(url_id, raw_data, oss_url.key_ln);
 	char path[MAX_FILE_PATH+1];
 	strncpy(path, decrypted, MAX_FILE_PATH);
 	//check the ignore list only if the match type is MATCH_SNIPPET. TODO: remove this after remine everything.
@@ -824,7 +824,7 @@ void compile_matches(scan_data_t *scan)
 		match_data_t *match_new = calloc(1, sizeof(match_data_t));
 		match_new->type = scan->match_type;
 		strcpy(match_new->source_md5, scan->source_md5);
-		memcpy(match_new->file_md5, scan->match_ptr, MD5_LEN);
+		memcpy(match_new->file_md5, scan->match_ptr, oss_file.key_ln);
 		match_new->scan_ower = scan;
 		if (!match_list_add(scan->matches_list_array[0], match_new, NULL, false))
 		{
diff --git a/src/match_list.c b/src/match_list.c
index 09981e5..89d6ff1 100644
--- a/src/match_list.c
+++ b/src/match_list.c
@@ -370,8 +370,8 @@ void match_list_debug(match_list_t *list)
     scanlog("Print list\n");
     for (struct entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)
     {
-        char md5_hex[MD5_LEN * 2 + 1];
-        ldb_bin_to_hex(np->match->matchmap_reg->md5, MD5_LEN, md5_hex);
+        char md5_hex[oss_file.key_ln * 2 + 1];
+        ldb_bin_to_hex(np->match->matchmap_reg->md5, oss_file.key_ln, md5_hex);
         //   printf("Item: %d - hits: %d - md5: %s - file: %s - release_date: %s - ranges: %s - purl:%s\n",
         // i, np->match->hits, md5_hex, np->match->file, np->match->release_date, np->match->line_ranges, np->match->purls[0]);
         printf("\nItem: %d - hits: %d - md5: %s - release: %s \n", i, np->match->hits, md5_hex, np->match->component_list.headp.lh_first->component->release_date);
diff --git a/src/mz.c b/src/mz.c
index c3ab15a..b9969d2 100644
--- a/src/mz.c
+++ b/src/mz.c
@@ -47,7 +47,7 @@
 void mz_get_key(struct ldb_table kb, char *key)
 {
 	/* Calculate mz file path */
-	char mz_path[LDB_MAX_PATH + MD5_LEN] = "\0";
+	char mz_path[LDB_MAX_PATH + kb.key_ln];
 	char mz_file_id[5] = "\0\0\0\0\0";
 	struct mz_job job;
 	memcpy(mz_file_id, key, 4);
@@ -66,8 +66,8 @@ void mz_get_key(struct ldb_table kb, char *key)
 	scanlog("MZ path: %s \n", mz_path);
 
 	/* Save path and key on job */
-	job.key = calloc(MD5_LEN, 1);
-	ldb_hex_to_bin(key, MD5_LEN * 2, job.key);	
+	job.key = calloc(kb.key_ln, 1);
+	ldb_hex_to_bin(key, kb.key_ln * 2, job.key);	
 
 	/* Read source mz file into memory */
 	job.mz = file_read(mz_path, &job.mz_ln);
diff --git a/src/query.c b/src/query.c
index b85809b..84f048a 100644
--- a/src/query.c
+++ b/src/query.c
@@ -47,8 +47,8 @@
 char *get_filename(char *md5)
 {
 	/* Convert md5 to bin */
-	uint8_t md5bin[MD5_LEN];
-	ldb_hex_to_bin(md5, MD5_LEN * 2, md5bin);
+	uint8_t md5bin[oss_file.key_ln];
+	ldb_hex_to_bin(md5, oss_file.key_ln * 2, md5bin);
 
 	/* Init record */
 	uint8_t *record = calloc(LDB_MAX_REC_LN + 1, 1);
diff --git a/src/report.c b/src/report.c
index c650328..96620c1 100644
--- a/src/report.c
+++ b/src/report.c
@@ -203,7 +203,7 @@ bool print_json_component(component_data_t * component)
 	{
 		if (component->purls[i] && !component->purls_md5[i])
 		{
-			component->purls_md5[i] = malloc(MD5_LEN);
+			component->purls_md5[i] = malloc(oss_purl.key_ln);
 			MD5((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
 		}
 	}
@@ -245,7 +245,8 @@ bool print_json_component(component_data_t * component)
 	if (engine_flags & ENABLE_PATH_HINT)
 		printf("\"path_rank\": %d,", component->path_rank);
 
-	char *url_id = md5_hex(component->url_md5);
+	char url_id[oss_url.key_ln * 2 + 1];
+    ldb_bin_to_hex(component->url_md5, oss_url.key_ln, url_id);
 	printf("\"url_hash\": \"%s\"", url_id);
 	free(url_id);
 
@@ -315,7 +316,8 @@ bool print_json_match(struct match_data_t * match)
 		scanlog("Match with no components ignored: %s", match->source_md5);
 		return false;
 	}
-	char *file_id = md5_hex(match->file_md5);
+	char file_id[oss_file.key_ln * 2 +1];
+	ldb_bin_to_hex(match->file_md5, oss_file.key_ln, file_id);
 
 	if (engine_flags & DISABLE_BEST_MATCH)
 		printf("{");
@@ -344,11 +346,6 @@ bool print_json_match(struct match_data_t * match)
 		else
 			printf(",\"file_url\": \"%s\"", match->component_list.headp.lh_first->component->url);
 	}
-	else //return an empty string
-		printf(",\"file_url\": \" \"");
-
-
-	free(file_id);
 	
 	if (!(engine_flags & DISABLE_QUALITY))
 	{
diff --git a/src/scan.c b/src/scan.c
index 8d586d0..d1f18f6 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -19,7 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
-
 #include "debug.h"
 #include "file.h"
 #include "ignorelist.h"
@@ -195,7 +194,7 @@ int hash_scan(char *path, int scan_max_snippets, int scan_max_components)
 	scan->preload = true;
 
 		/* Get file MD5 */
-	ldb_hex_to_bin(scan->file_path, MD5_LEN * 2, scan->md5);
+	ldb_hex_to_bin(scan->file_path, oss_file.key_ln * 2, scan->md5);
 
 	/* Fake file length */
 	strcpy(scan->file_size, "999");
@@ -236,7 +235,6 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components)
 	/* Get wfp MD5 hash */
 	uint8_t tmp_md5[16];
 	get_file_md5(path, tmp_md5);
-	char *tmp_md5_hex = md5_hex(tmp_md5);
 
 	/* Read line by line */
 	while ((lineln = getline(&line, &len, fp)) != -1)
@@ -268,25 +266,25 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components)
 			const int tagln = 5; // len of 'file='
 
 			/* Get file MD5 */
-			char * hexmd5 = strndup(line + tagln, MD5_LEN * 2);
-			if (strlen(hexmd5) <  MD5_LEN * 2)
+			char * hexmd5 = strndup(line + tagln, oss_file.key_ln * 2);
+			if (strlen(hexmd5) <  oss_file.key_ln * 2)
 			{
 				scanlog("Incorrect md5 len in line %s. Skipping\n", line);
 				free(hexmd5);
 				continue;
 			}
 			
-			rec = (uint8_t*) strdup(line + tagln + (MD5_LEN * 2) + 1);
+			rec = (uint8_t*) strdup(line + tagln + (oss_file.key_ln * 2) + 1);
 			char * target = field_n(2, (char *)rec);
 			
 			/*Init a new scan object for the next file to be scanned */
 			scan = scan_data_init(target, scan_max_snippets, scan_max_components);
-			strcpy(scan->source_md5, tmp_md5_hex);
+			ldb_bin_to_hex(tmp_md5, oss_file.key_ln, scan->source_md5);
 			extract_csv(scan->file_size, (char *)rec, 1, LDB_MAX_REC_LN);
 			scan->preload = true;
 			free(rec);
 			scanlog("File md5 to be scanned: %s\n", hexmd5);
-			ldb_hex_to_bin(hexmd5, MD5_LEN * 2, scan->md5);
+			ldb_hex_to_bin(hexmd5, oss_file.key_ln * 2, scan->md5);
 			free(hexmd5);
 		}
 
@@ -331,7 +329,6 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components)
 	fclose(fp);
 	if (line) free(line);
 	
-	free(tmp_md5_hex);
 	return EXIT_SUCCESS;
 }
 
@@ -445,9 +442,7 @@ void ldb_scan(scan_data_t *scan)
 		get_file_md5(scan->file_path, scan->md5);
 
 	/* Scan full file */
-	char *tmp_md5_hex = md5_hex(scan->md5);
-	strcpy(scan->source_md5, tmp_md5_hex);
-	free(tmp_md5_hex);
+	ldb_bin_to_hex(scan->md5, oss_file.key_ln, scan->source_md5);
 
 	/* Look for full file match or url match in ldb */
 	scan->match_type = ldb_scan_file(scan);
diff --git a/src/snippets.c b/src/snippets.c
index ae8a89b..601c3d0 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -650,14 +650,14 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
 
 		found = scan->matchmap_size;
 		/* Write MD5 */
-		memcpy(scan->matchmap[found].md5, md5, MD5_LEN);
+		memcpy(scan->matchmap[found].md5, md5, oss_file.key_ln);
 		scan->matchmap[found].ranges_number = 0;	
 	}
 
 	/* Search for the right range */
 
 	uint32_t from = 0;
-	uint16_t oss_line = uint16_read(md5 + MD5_LEN);
+	uint16_t oss_line = uint16_read(md5 + oss_file.key_ln);
 	bool range_found = false;
 
 	for (uint32_t t = 0; t < scan->matchmap[found].ranges_number; t++)
diff --git a/src/url.c b/src/url.c
index 677f675..5a1fbe6 100644
--- a/src/url.c
+++ b/src/url.c
@@ -213,7 +213,7 @@ bool handle_purl_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *d
 			{
 				scanlog("Related PURL: %s\n", purl);
 				component->purls[i] = purl;
-				component->purls_md5[i] = malloc(MD5_LEN);
+				component->purls_md5[i] = malloc(oss_purl.key_ln);
 				MD5((uint8_t *)purl, strlen(purl), component->purls_md5[i]);
 				return false;
 			}
@@ -246,7 +246,7 @@ void fetch_related_purls(component_data_t *component)
 	/* add main purl md5 if it is not ready */
 	if (!component->purls_md5[0] && component->purls[0])
 	{
-		component->purls_md5[0] = malloc(MD5_LEN);
+		component->purls_md5[0] = malloc(oss_purl.key_ln);
 		MD5((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
 	}
 
@@ -308,7 +308,7 @@ void purl_release_date(char *purl, char *date)
 	if (!ldb_table_exists(oss_purl.db, oss_purl.table)) //skip purl if the table is not present
 		return; 
 
-	uint8_t purl_md5[MD5_LEN];
+	uint8_t purl_md5[oss_purl.key_ln];
 	MD5((uint8_t *)purl, strlen(purl), purl_md5);
 
 	ldb_fetch_recordset(NULL, oss_purl, purl_md5, false, get_purl_first_release, (void *) date);
diff --git a/src/util.c b/src/util.c
index 4151d66..eeef02f 100644
--- a/src/util.c
+++ b/src/util.c
@@ -143,9 +143,12 @@ void vendor_component_md5(char *component, char *vendor, uint8_t *out)
 	MD5((uint8_t *)pair, strlen(pair), out);
 
 	/* Log pair_md5 */
-	char hex[MD5_LEN * 2 + 1] = "\0";
-	ldb_bin_to_hex(out, MD5_LEN, hex);
-	scanlog("vendor/component: %s = %s\n", pair, hex);
+	if (debug_on)
+	{
+		char hex[oss_purl.key_ln * 2 + 1];
+		ldb_bin_to_hex(out, oss_purl.key_ln, hex);
+		scanlog("vendor/component: %s = %s\n", pair, hex);
+	}
 }
 
 /**
@@ -227,18 +230,6 @@ void print_datestamp()
 	free(stamp);
 }
 
-/**
- * @brief Returns a string with a hex representation of md5
- * @param md5 input md5
- * @return pointer to string
- */
-char *md5_hex(uint8_t *md5)
-{
-	char *out =  calloc(2 * MD5_LEN + 1, 1);
-	for (int i = 0; i < MD5_LEN; i++) sprintf(out + strlen(out), "%02x", md5[i]);
-	return out;
-}
-
 /**
  * @brief Returns the CRC32C for a string
  * @param str input string

From f5f4d6572fd4a40ffc5892dc58fb6774ac947e54 Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Sun, 25 Aug 2024 21:30:21 +0000
Subject: [PATCH 08/19] update fetch_recordset function handlers to new
 definition

---
 inc/match.h         |  2 +-
 inc/url.h           |  4 ++--
 src/attributions.c  |  8 +++-----
 src/binary_scan.c   |  6 +++---
 src/copyright.c     |  4 ++--
 src/cryptography.c  |  4 ++--
 src/dependency.c    |  4 ++--
 src/file.c          |  4 ++--
 src/health.c        |  4 ++--
 src/license.c       |  4 ++--
 src/match.c         |  4 ++--
 src/quality.c       |  2 +-
 src/query.c         |  9 ++++-----
 src/snippets.c      |  2 +-
 src/url.c           | 18 +++++++++---------
 src/vulnerability.c |  4 ++--
 16 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/inc/match.h b/inc/match.h
index 31278f3..d36c8cf 100644
--- a/inc/match.h
+++ b/inc/match.h
@@ -35,6 +35,6 @@ void output_matches_json(scan_data_t *scan);
 void compile_matches(scan_data_t *scan);
 match_list_t * match_select_m_best(scan_data_t * scan);
 match_list_t * match_select_m_component_best(scan_data_t * scan);
-bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
+bool component_from_file(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
 
 #endif
diff --git a/inc/url.h b/inc/url.h
index 28ca4ac..abb62fe 100644
--- a/inc/url.h
+++ b/inc/url.h
@@ -4,7 +4,7 @@
 #include "scanoss.h"
 #include "match_list.h"
 
-bool handle_url_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
+bool handle_url_record(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
 
 /* Calculates a main project URL from the PURL */
 void fill_main_url(component_data_t *match);
@@ -13,7 +13,7 @@ void fill_main_url(component_data_t *match);
 void fetch_related_purls(component_data_t *component);
 
 /* Handler function for getting the oldest URL */
-bool get_oldest_url(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr);
+bool get_oldest_url(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr);
 
 bool get_purl_first_release(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr);
 
diff --git a/src/attributions.c b/src/attributions.c
index ac67673..d265574 100644
--- a/src/attributions.c
+++ b/src/attributions.c
@@ -51,13 +51,12 @@
  * @param ptr //TODO 
  * @return //TODO  
  */
-bool notices_handler(uint8_t *key, uint8_t *subkey, int subkey_ln, \
-uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool notices_handler(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	if (datalen != 2 * oss_attribution.key_ln) return false;
 	char hexkey[oss_attribution.key_ln * 2 + 1];
 	memcpy(hexkey, data, oss_attribution.key_ln * 2);
-	hexkey[oss_attribution.key_ln * 2] = 0;
+	hexkey[table->key_ln * 2] = 0;
 
 	/* Print attribution notice header */
 	char *component = (char *) ptr;
@@ -81,8 +80,7 @@ uint8_t *data, uint32_t datalen, int iteration, void *ptr)
  * @param ptr //TODO
  * @return return true or false if the atribution exist or not.
  */
-bool attribution_handler(uint8_t *key, uint8_t *subkey, int subkey_ln, \
-uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool attribution_handler(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	bool *valid = (bool *) ptr;
 
diff --git a/src/binary_scan.c b/src/binary_scan.c
index 9e72ed8..efc0b67 100644
--- a/src/binary_scan.c
+++ b/src/binary_scan.c
@@ -62,7 +62,7 @@ static bool sort_by_hits(component_data_t *a, component_data_t *b)
 
 #define MAX_URLS 100
 
-static bool add_purl_from_urlid(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
+static bool add_purl_from_urlid(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
 {
 
 	if (iteration > MAX_URLS)
@@ -71,7 +71,7 @@ static bool add_purl_from_urlid(uint8_t *key, uint8_t *subkey, int subkey_ln, ui
 	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
 
 	/* Decrypt data */
-	char * decrypted = decrypt_data(raw_data, datalen, oss_file, key, subkey);
+	char * decrypted = decrypt_data(raw_data, datalen, *table, key, subkey);
 	if (!decrypted)
 		return NULL;
 	
@@ -128,7 +128,7 @@ int max_files_to_process = 4;
  * @param ptr //TODO
  * @return //TODO
  */
-static bool get_all_file_ids(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+static bool get_all_file_ids(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	//component_list_t * comp_list = (component_list_t *) ptr;
 	file_recordset * files = (file_recordset *) ptr;
diff --git a/src/copyright.c b/src/copyright.c
index 74c1f5f..5139c3b 100644
--- a/src/copyright.c
+++ b/src/copyright.c
@@ -85,10 +85,10 @@ static void clean_copyright(char *out, char *copyright)
  * @param subkey //TODO
  * @return //TODO
  */
-static bool print_copyrights_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+static bool print_copyrights_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	component_data_t * comp = ptr;
-	char * CSV = decrypt_data(data, datalen, oss_copyright, key, subkey);
+	char * CSV = decrypt_data(data, datalen, *table, key, subkey);
 
 	char *source  = calloc(MAX_JSON_VALUE_LEN + 1, 1);
 	char *copyright = calloc(MAX_COPYRIGHT + 1, 1);
diff --git a/src/cryptography.c b/src/cryptography.c
index 29fa8b9..10e85fe 100644
--- a/src/cryptography.c
+++ b/src/cryptography.c
@@ -50,12 +50,12 @@
  * @param ptr //TODO 
  * @return //TODO  
  */
-bool print_crypto_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool print_crypto_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	match_data_t *match = ptr;
 
 	if (!datalen) return false;
-	char * CSV = decrypt_data(data, datalen, oss_cryptography, key, subkey);
+	char * CSV = decrypt_data(data, datalen, *table, key, subkey);
 
 	char *algorithm = calloc(MAX_JSON_VALUE_LEN, 1);
 	char *strength = calloc(MAX_JSON_VALUE_LEN, 1);
diff --git a/src/dependency.c b/src/dependency.c
index 1334694..534c4eb 100644
--- a/src/dependency.c
+++ b/src/dependency.c
@@ -54,9 +54,9 @@ const char *dependency_sources[] = {"component_declared"};
  * @param ptr //TODO
  * @return //TODO
  */
-bool print_dependencies_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool print_dependencies_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
-	char *CSV = decrypt_data(data, datalen, oss_dependency, key, subkey);
+	char *CSV = decrypt_data(data, datalen, *table, key, subkey);
 	component_data_t * comp = (component_data_t *) ptr;
 	char *source = calloc(MAX_JSON_VALUE_LEN, 1);
 	char *vendor = calloc(MAX_JSON_VALUE_LEN, 1);
diff --git a/src/file.c b/src/file.c
index acd9338..6c94e77 100644
--- a/src/file.c
+++ b/src/file.c
@@ -263,11 +263,11 @@ char *file_extension(char *path)
  * @param ptr //TODO
  * @return //TODO
  */
-bool get_first_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool get_first_file(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	if (!datalen) return false;
 
-	char * file_data = decrypt_data(data, datalen, oss_file, key, subkey);
+	char * file_data = decrypt_data(data, datalen, *table, key, subkey);
 	
 	if (!file_data || !*file_data) 
 		return false;
diff --git a/src/health.c b/src/health.c
index 393c97f..bede8df 100644
--- a/src/health.c
+++ b/src/health.c
@@ -43,11 +43,11 @@
  * @brief Prints information about statistics of a component comming from GitHub or gitee
  * 
  */
-bool print_health_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool print_health_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	component_data_t *match = ptr;
 
-	char * decrypted = decrypt_data(data, datalen, oss_purl, key, subkey);
+	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
 
 	/* Expect at least a date or a pkg:*/
 	if (strlen(decrypted) < 9) 
diff --git a/src/license.c b/src/license.c
index e203cbf..28d5bbe 100644
--- a/src/license.c
+++ b/src/license.c
@@ -277,14 +277,14 @@ bool get_first_license_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_
  * @param ptr //TODO
  * @return
  */
-bool print_licenses_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool print_licenses_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	component_data_t *comp = ptr;
 
 	if (!datalen)
 		return false;
 
-	char *CSV = decrypt_data(data, datalen, oss_license, key, subkey);
+	char *CSV = decrypt_data(data, datalen, *table, key, subkey);
 
 	if (!CSV)
 		return false;
diff --git a/src/match.c b/src/match.c
index 421018c..27246f2 100644
--- a/src/match.c
+++ b/src/match.c
@@ -465,9 +465,9 @@ bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *
 
 	/* Ignore path lengths over the limit */
 	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
-
+	
 	/* Decrypt data */
-	char * decrypted = decrypt_data(raw_data, datalen, oss_file, key, subkey);
+	char * decrypted = decrypt_data(raw_data, datalen, *table, key, subkey);
 	if (!decrypted)
 		return false;
 	
diff --git a/src/quality.c b/src/quality.c
index 5f9a8db..ee93ac1 100644
--- a/src/quality.c
+++ b/src/quality.c
@@ -53,7 +53,7 @@ const char *quality_sources[] = {"best_practices"};
  * @param ptr //TODO
  * @return //TODO
  */
-bool print_quality_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool print_quality_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	
 	char ** out  = ptr;
diff --git a/src/query.c b/src/query.c
index 84f048a..5e9a428 100644
--- a/src/query.c
+++ b/src/query.c
@@ -80,9 +80,9 @@ char *get_filename(char *md5)
  * @param ptr //TODO
  * @return //TODO
  */
-bool ldb_get_first_url_not_ignored(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool ldb_get_first_url_not_ignored(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
-	char * decrypted = decrypt_data(data, datalen, oss_url, key, subkey);
+	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
 
 	char *record = (char *) ptr;
 
@@ -123,12 +123,11 @@ void get_url_record(uint8_t *md5, uint8_t *record)
  * @param ptr //TODO
  * @return //TODO
  */
-bool handle_get_component_age(uint8_t *key, uint8_t *subkey, int subkey_ln, \
-uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool handle_get_component_age(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	long *age = (long *) ptr;
 
-	char * decrypted = decrypt_data(data, datalen, oss_purl, key, subkey);
+	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
 
 	/* Expect at least a date*/
 	if (strlen(decrypted) < 9) 
diff --git a/src/snippets.c b/src/snippets.c
index 601c3d0..961a65a 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -211,7 +211,7 @@ void biggest_snippet(scan_data_t *scan)
  * @return //TODO
  */
 #define MATCHMAP_ITEM_SIZE (matchmap_max_files * 2)
-static bool get_all_file_ids(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+static bool get_all_file_ids(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	uint8_t *record = (uint8_t *)ptr;
 
diff --git a/src/url.c b/src/url.c
index 5a1fbe6..751ef3b 100644
--- a/src/url.c
+++ b/src/url.c
@@ -51,11 +51,11 @@
  * @param ptr //TODO
  * @return //TODO
  */
-bool handle_url_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
+bool handle_url_record(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
 {
 	if (!datalen && datalen >= MAX_PATH) return false;
 
-	char * data = decrypt_data(raw_data, datalen, oss_url, key, subkey);
+	char * data = decrypt_data(raw_data, datalen, *table, key, subkey);
 
 	if (!data)
 		return false;
@@ -65,7 +65,7 @@ bool handle_url_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *ra
 		free(data);
 		return false;
 	}
-
+	int subkey_ln = table->key_ln - LDB_KEY_LN;
 	component_list_t * component_list = (component_list_t*) ptr;
 	
 	component_data_t * new_comp = calloc(1, sizeof(*new_comp));
@@ -177,7 +177,7 @@ bool purl_type_matches(char *purl1, char *purl2)
  * Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
 **/
 
-bool handle_purl_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool handle_purl_record(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	component_data_t *component = (component_data_t *) ptr;
 
@@ -213,7 +213,7 @@ bool handle_purl_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *d
 			{
 				scanlog("Related PURL: %s\n", purl);
 				component->purls[i] = purl;
-				component->purls_md5[i] = malloc(oss_purl.key_ln);
+				component->purls_md5[i] = malloc(table->key_ln);
 				MD5((uint8_t *)purl, strlen(purl), component->purls_md5[i]);
 				return false;
 			}
@@ -273,11 +273,11 @@ void fetch_related_purls(component_data_t *component)
  * Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
 **/
 
-bool get_purl_first_release(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool get_purl_first_release(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	if (!datalen) return false;
 
-	char * purl = decrypt_data(data, datalen, oss_purl, key, subkey);
+	char * purl = decrypt_data(data, datalen, *table, key, subkey);
 	uint8_t *oldest = (uint8_t *) ptr;
 
 	if (!purl)
@@ -320,9 +320,9 @@ void purl_release_date(char *purl, char *date)
  * @brief Handler function for getting the oldest URL.
  * Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
 **/
-bool get_oldest_url(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+bool get_oldest_url(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
-	char * url = decrypt_data(data, datalen, oss_url, key, subkey);
+	char * url = decrypt_data(data, datalen, *table, key, subkey);
 	if (!url) 
 		return false;
 
diff --git a/src/vulnerability.c b/src/vulnerability.c
index 3ec53ed..02c87e5 100644
--- a/src/vulnerability.c
+++ b/src/vulnerability.c
@@ -141,14 +141,14 @@ static bool vulnerability_version_matches(char * version, char *introduced, char
  * @param ptr //TODO
  * @return //TODO
  */
-static bool print_vulnerability_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+static bool print_vulnerability_item(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	component_data_t *comp = ptr;
 
 	/* Set a limit to the amount of vulnerabilities returned */
 	if (comp->vulnerabilities > max_vulnerabilities) return true;
 
-	char * CSV = decrypt_data(data, datalen, oss_vulnerability, key, subkey);
+	char * CSV = decrypt_data(data, datalen, *table, key, subkey);
 
 	if (!CSV)
 		return false;

From 4c481bfa5792653e7542d215155a92fff3d01712 Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Mon, 26 Aug 2024 21:51:40 +0000
Subject: [PATCH 09/19] add path table

---
 inc/file.h    |  1 -
 inc/scan.h    |  1 +
 inc/scanoss.h |  1 +
 src/file.c    | 35 -----------------------------------
 src/main.c    | 10 ++++++++++
 src/match.c   | 41 +++++++++++++++++++++++++++++++++--------
 src/scan.c    |  7 ++++++-
 7 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/inc/file.h b/inc/file.h
index 9b29814..0c9d35f 100644
--- a/inc/file.h
+++ b/inc/file.h
@@ -10,7 +10,6 @@ bool is_file(char *path);
 bool is_dir(char *path);
 
 void get_file_md5(char *filepath, uint8_t *md5_result);
-bool collect_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
 bool count_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
 char *get_file_extension(uint8_t *md5);
 
diff --git a/inc/scan.h b/inc/scan.h
index e3e768d..ce1a64b 100644
--- a/inc/scan.h
+++ b/inc/scan.h
@@ -67,6 +67,7 @@ typedef struct scan_data_t
 } scan_data_t;
 
 extern bool force_snippet_scan;
+extern bool path_table_present;
 
 scan_data_t * scan_data_init(char *target, int max_snippets, int max_components);
 void scan_data_free (scan_data_t * scan);
diff --git a/inc/scanoss.h b/inc/scanoss.h
index 841b545..66c3b3c 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -126,6 +126,7 @@ extern char * component_hint;
 /* DB tables */
 extern struct ldb_table oss_url;
 extern struct ldb_table oss_file;
+extern struct ldb_table oss_path;
 extern struct ldb_table oss_wfp;
 extern struct ldb_table oss_purl;
 extern struct ldb_table oss_copyright;
diff --git a/src/file.c b/src/file.c
index 6c94e77..499c606 100644
--- a/src/file.c
+++ b/src/file.c
@@ -178,41 +178,6 @@ int dir_count(char *path)
 	return count;
 }
 
-/**
- * @brief Collect all files function pointer. Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
- * @param key //TODO
- * @param subkey //TODO
- * @param subkey_ln //TODO
- * @param raw_data //TODO
- * @param datalen //TODO
- * @param iteration //TODO
- * @param ptr //TODO
- * @return //TODO
- */
-bool collect_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
-{
-
-	/* Leave if FETCH_MAX_FILES is reached */
-	if (iteration >= FETCH_MAX_FILES) return true;
-
-	/* Ignore path lengths over the limit */
-	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
-
-	/* Decrypt data */
-	char * decrypted = decrypt_data(raw_data, datalen, oss_file, key, subkey);
-	if (!decrypted)
-		return NULL;
-	/* Copy data to memory */
-	file_recordset *files = ptr;
-
-	memcpy(files[iteration].url_id, raw_data, oss_url.key_ln);
-	strncpy(files[iteration].path, decrypted, MAX_FILE_PATH);
-	free(decrypted);
-	
-	files[iteration].path_ln = dir_count(files[iteration].path);
-	return false;
-}
-
 /**
  * @brief Count all entries for a given md5. Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
  * @param key //TODO
diff --git a/src/main.c b/src/main.c
index 337a466..e2a2d5a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -49,6 +49,7 @@
 
 struct ldb_table oss_url;
 struct ldb_table oss_file;
+struct ldb_table oss_path;
 struct ldb_table oss_wfp;
 struct ldb_table oss_purl;
 struct ldb_table oss_copyright;
@@ -135,6 +136,15 @@ void initialize_ldb_tables(char *name)
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "file");
 	oss_file = ldb_read_cfg(dbtable);
 
+	ldb_hash_mode_select(oss_file.key_ln);
+
+	if (ldb_table_exists(oss_db_name, "path"))
+	{
+		path_table_present = true;
+		snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "path");
+		oss_path = ldb_read_cfg(dbtable);
+	}
+
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "wfp");
 	oss_wfp = ldb_read_cfg(dbtable);
 
diff --git a/src/match.c b/src/match.c
index 27246f2..24102e3 100644
--- a/src/match.c
+++ b/src/match.c
@@ -51,8 +51,8 @@
 #include "health.h"
 
 const char *matchtypes[] = {"none", "file", "snippet", "binary"}; /** describe the availables kinds of match */
-bool match_extensions = false;									  /** global match extension flag */
-
+bool match_extensions = false;								  /** global match extension flag */
+bool path_table_present = false;
 char *component_hint = NULL;
 
 /**
@@ -442,6 +442,24 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 	return true;
 }
 
+bool path_query_handler(struct ldb_table * table, uint8_t * key, uint8_t * subkey, uint8_t * data, uint32_t datalen, int record_number, void * ptr)
+{
+	char **path = ptr;
+	/* Decrypt data */
+	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
+	if (!decrypted || !*decrypted)
+		return false;
+	
+	*path = decrypted;
+	return true;
+}
+static char * path_query(uint8_t * file_id)
+{
+	char * path = NULL;
+	ldb_fetch_recordset(NULL, oss_path, file_id, false, path_query_handler, (void *) &path);
+	return path;
+}
+
 /**
  * @brief Load componentes for a match processing the file recordset list.
  * For each file in the recordset we will query for the oldest url in the url table.
@@ -462,24 +480,31 @@ bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *
 	/*Return we high accuracy it is not enabled*/
 	if (iteration > iteration_max * 2 && !(engine_flags & ENABLE_HIGH_ACCURACY))
 		return true;
-
 	/* Ignore path lengths over the limit */
-	if (!datalen || datalen >= (oss_file.key_ln + MAX_FILE_PATH)) return false;
+	if (!datalen || datalen >= (table->key_ln + MAX_FILE_PATH)) return false;
+	char * decrypted = NULL;
+	if (path_table_present)
+	{
+		decrypted = path_query(&raw_data[table->key_ln]);
+	}
+	else
+	{
+		/* Decrypt data */
+		decrypted = decrypt_data(raw_data, datalen, *table, key, subkey);
+	}
 	
-	/* Decrypt data */
-	char * decrypted = decrypt_data(raw_data, datalen, *table, key, subkey);
 	if (!decrypted)
 		return false;
 	
 	component_list_t * component_list = (component_list_t*) ptr;
 	/* Copy data to memory */
 
-	uint8_t url_id[oss_url.key_ln]; /*= {0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e}; //empty string md5
+	uint8_t url_id[table->key_ln]; /*= {0xd4,0x1d,0x8c,0xd9,0x8f,0x00,0xb2,0x04,0xe9,0x80,0x09,0x98,0xec,0xf8,0x42,0x7e}; //empty string md5
 	
 	if (!memcmp(raw_data,url_id, MD5_LEN)) //the md5 key of an empty string must be skipped.
 		return false;*/
 
-	memcpy(url_id, raw_data, oss_url.key_ln);
+	memcpy(url_id, raw_data, table->key_ln);
 	char path[MAX_FILE_PATH+1];
 	strncpy(path, decrypted, MAX_FILE_PATH);
 	//check the ignore list only if the match type is MATCH_SNIPPET. TODO: remove this after remine everything.
diff --git a/src/scan.c b/src/scan.c
index d1f18f6..017f679 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -114,7 +114,12 @@ static bool zero_bytes (uint8_t *md5)
  */
 static match_t ldb_scan_file(scan_data_t * scan) {
 			
-	scanlog("Checking entire file %s\n", scan->file_path);
+	if (debug_on)
+	{
+		char hex_hash[oss_file.key_ln * 2 +1];
+		ldb_bin_to_hex(scan->md5, oss_file.key_ln, hex_hash);
+		scanlog("Checking entire file %s - hash: %s\n", scan->file_path, hex_hash);
+	}
 	
 	if (zero_bytes(scan->md5)) return MATCH_NONE;
 	

From 042aeb373448f99a769d12692b2ee4615fadf72d Mon Sep 17 00:00:00 2001
From: core software devel <cs@scanoss.com>
Date: Mon, 9 Sep 2024 14:09:37 +0000
Subject: [PATCH 10/19] update hash calculation to the new format

---
 inc/decrypt.h       |  2 +-
 inc/scanoss.h       |  2 --
 inc/util.h          |  5 +----
 src/attributions.c  |  4 ++--
 src/component.c     |  4 ++--
 src/decrypt.c       |  4 ++--
 src/file.c          |  6 +++---
 src/main.c          | 25 +++++++++++++++++++++++--
 src/match.c         |  4 ++--
 src/mz.c            | 11 ++++++-----
 src/query.c         |  2 +-
 src/report.c        |  2 +-
 src/scan.c          |  2 +-
 src/snippets.c      | 28 ++++++++++++++++++++--------
 src/url.c           |  6 +++---
 src/util.c          | 29 ++---------------------------
 src/vulnerability.c |  2 +-
 17 files changed, 71 insertions(+), 67 deletions(-)

diff --git a/inc/decrypt.h b/inc/decrypt.h
index bceeb47..9fecbdf 100644
--- a/inc/decrypt.h
+++ b/inc/decrypt.h
@@ -4,7 +4,7 @@
 #include "scanoss.h"
 
 extern char * (*decrypt_data) (uint8_t *data, uint32_t size, struct ldb_table table, uint8_t *key, uint8_t *subkey);
-extern void  (*decrypt_mz) (uint8_t *data, uint32_t len);
+extern void  (*decrypt_mz) (int key_ln, uint8_t *data, uint32_t len);
 extern void (*encoder_version) (char * version);
 
 char * standalone_decrypt_data(uint8_t *data, uint32_t size,struct ldb_table table, uint8_t *key, uint8_t *subkey);
diff --git a/inc/scanoss.h b/inc/scanoss.h
index 66c3b3c..f85e5be 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -37,8 +37,6 @@
 #define SNIPPET_LINE_TOLERANCE 10
 
 #define WFP_LN 4
-#define WFP_REC_LN 18
-
 /* Log files */
 #define SCANOSS_VERSION "5.4.10"
 #define SCAN_LOG "/tmp/scanoss_scan.log"
diff --git a/inc/util.h b/inc/util.h
index 5f1077e..5d7c6c4 100644
--- a/inc/util.h
+++ b/inc/util.h
@@ -12,14 +12,11 @@ void uint32_reverse(uint8_t *data);
 void hex_to_bin(char *hex, uint32_t len, uint8_t *out);
 
 /* Compares two MD5 checksums */
-bool md5cmp(uint8_t *md51, uint8_t *md52);
+bool hashcmp(int hash_len, uint8_t *md51, uint8_t *md52);
 
 /* Trim str */
 void trim(char *str);
 
-/* Returns the pair md5 of "component/vendor" */
-void vendor_component_md5(char *component, char *vendor, uint8_t *out);
-
 /* Returns the current date stamp */
 char *datestamp(void);
 
diff --git a/src/attributions.c b/src/attributions.c
index d265574..425831a 100644
--- a/src/attributions.c
+++ b/src/attributions.c
@@ -220,7 +220,7 @@ bool check_purl_attributions(struct ldb_table oss_attributions, char * licenses_
 		{
 			/* Get purl md5 */
 			uint8_t md5[16];
-			MD5((uint8_t *)purl, strlen(purl), md5);
+			oss_attribution.hash_calc((uint8_t *)purl, strlen(purl), md5);
 			if (declared_components[i].license && licenses_json && 
 				license_search_on_licenses_json(declared_components[i].license, licenses_json))
 			{
@@ -269,7 +269,7 @@ void print_purl_attribution_notices(struct ldb_table oss_attributions, char * li
 		{
 			/* Get purl md5 */
 			uint8_t md5[16];
-			MD5((uint8_t *)purl, strlen(purl), md5);
+			oss_attribution.hash_calc((uint8_t *)purl, strlen(purl), md5);
 			print_notices(oss_attributions, md5, purl);
 		}
   	}
diff --git a/src/component.c b/src/component.c
index 1e2734d..8afda27 100644
--- a/src/component.c
+++ b/src/component.c
@@ -302,14 +302,14 @@ bool component_date_comparation(component_data_t *a, component_data_t *b)
 	if (!a->purls_md5[0] && a->purls[0])
 	{
 		a->purls_md5[0] = malloc(oss_url.key_ln);
-		MD5((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
+		oss_purl.hash_calc((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
 		a->age = get_component_age(a->purls_md5[0]);
 	}
 
 	if (!b->purls_md5[0] && b->purls[0])
 	{
 		b->purls_md5[0] = malloc(oss_purl.key_ln);
-		MD5((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
+		oss_purl.hash_calc((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
 		b->age = get_component_age(b->purls_md5[0]);
 	}
 
diff --git a/src/decrypt.c b/src/decrypt.c
index a91372d..985e812 100644
--- a/src/decrypt.c
+++ b/src/decrypt.c
@@ -36,7 +36,7 @@
 #include "decrypt.h"
 
 char * (*decrypt_data) (uint8_t *data, uint32_t size, struct ldb_table table, uint8_t *key, uint8_t *subkey);
-void  (*decrypt_mz) (uint8_t *data, uint32_t len);
+void  (*decrypt_mz) (int key_ln, uint8_t *data, uint32_t len);
 void (*encoder_version) (char * version);
 /**
  * @brief Decrypt data function pointer. Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
@@ -51,7 +51,7 @@ char * standalone_decrypt_data(uint8_t *data, uint32_t size, struct ldb_table ta
 	char * msg = NULL;
   
   if (!strcmp(table.table, "file"))
-    msg = strndup((char*) data + 16, size - 16);
+    msg = strndup((char*) data + table.key_ln, size - table.key_ln);
   else
     msg = strndup((char*) data, size);
   
diff --git a/src/file.c b/src/file.c
index 499c606..563db22 100644
--- a/src/file.c
+++ b/src/file.c
@@ -141,7 +141,7 @@ void get_file_md5(char *filepath, uint8_t *md5_result)
 
 	if (!in)
 	{
-		MD5(NULL, 0, md5_result);
+		oss_file.hash_calc(NULL, 0, md5_result);
 		return;
 	}
 
@@ -149,7 +149,7 @@ void get_file_md5(char *filepath, uint8_t *md5_result)
 	long filesize = ftell(in);
 	if (!filesize)
 	{
-		MD5(NULL, 0, md5_result);
+		oss_file.hash_calc(NULL, 0, md5_result);
 	}
 	else
 	{
@@ -160,7 +160,7 @@ void get_file_md5(char *filepath, uint8_t *md5_result)
 			fprintf(stderr, "Warning: cannot open file %s\n", filepath);
 
 		/* Calculate MD5sum */
-		MD5(buffer, filesize, md5_result);
+		oss_file.hash_calc(buffer, filesize, md5_result);
 		free(buffer);
 		fclose(in);
 	}
diff --git a/src/main.c b/src/main.c
index e2a2d5a..486ce6a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -108,10 +108,17 @@ bool lib_encoder_load()
 #endif
 }
 
+static hash_calc_t hash_function_select(int key_ln)
+{
+	if (key_ln == 8)
+		return ldb_crc64;
+	
+	return md5_string;
+}
+
 /* Initialize tables for the DB name indicated (defaults to oss) */
 void initialize_ldb_tables(char *name)
 {
-	
 	char * ldb_ver = NULL;
 	ldb_version(&ldb_ver);
 	scanlog("ldb version: %s\n", ldb_ver);
@@ -132,51 +139,65 @@ void initialize_ldb_tables(char *name)
 	scanlog("Loading tables definitions\n");
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "url");
 	oss_url = ldb_read_cfg(dbtable);
+	oss_url.hash_calc = hash_function_select(oss_url.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "file");
 	oss_file = ldb_read_cfg(dbtable);
+	oss_file.hash_calc = hash_function_select(oss_file.key_ln);
 
-	ldb_hash_mode_select(oss_file.key_ln);
+	//ldb_hash_mode_select(oss_file.key_ln);
 
 	if (ldb_table_exists(oss_db_name, "path"))
 	{
 		path_table_present = true;
 		snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "path");
 		oss_path = ldb_read_cfg(dbtable);
+		oss_path.hash_calc = hash_function_select(oss_path.key_ln);
 	}
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "wfp");
 	oss_wfp = ldb_read_cfg(dbtable);
+	oss_wfp.hash_calc = hash_function_select(oss_wfp.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "purl");
 	oss_purl = ldb_read_cfg(dbtable);
+	oss_purl.hash_calc = hash_function_select(oss_purl.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "copyright");
 	oss_copyright = ldb_read_cfg(dbtable);
+	oss_copyright.hash_calc = hash_function_select(oss_copyright.key_ln);
 	
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "quality");
 	oss_quality = ldb_read_cfg(dbtable);
+	oss_quality.hash_calc = hash_function_select(oss_quality.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "vulnerability");
 	oss_vulnerability = ldb_read_cfg(dbtable);
+	oss_vulnerability.hash_calc = hash_function_select(oss_vulnerability.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "dependency");
 	oss_dependency = ldb_read_cfg(dbtable);
+	oss_dependency.hash_calc = hash_function_select(oss_dependency.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "license");
 	oss_license = ldb_read_cfg(dbtable);
+	oss_license.hash_calc = hash_function_select(oss_license.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "attribution");
 	oss_attribution = ldb_read_cfg(dbtable);
+	oss_attribution.hash_calc = hash_function_select(oss_attribution.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "cryptography");
 	oss_cryptography = ldb_read_cfg(dbtable);
+	oss_cryptography.hash_calc = hash_function_select(oss_cryptography.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "sources");
 	oss_sources = ldb_read_cfg(dbtable);
+	oss_sources.hash_calc = hash_function_select(oss_sources.key_ln);
 
 	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "notices");
 	oss_notices = ldb_read_cfg(dbtable);
+	oss_notices.hash_calc = hash_function_select(oss_notices.key_ln);
 
 	kb_version_get();
 	osadl_load_file();
diff --git a/src/match.c b/src/match.c
index 24102e3..a3d6c65 100644
--- a/src/match.c
+++ b/src/match.c
@@ -355,14 +355,14 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
 		if (!a->purls_md5[0] && a->purls[0])
 		{
 			a->purls_md5[0] = malloc(oss_purl.key_ln);
-			MD5((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
+			oss_purl.hash_calc((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
 			a->age = get_component_age(a->purls_md5[0]);
 		}
 		
 		if (!b->purls_md5[0] && b->purls[0])
 		{
 			b->purls_md5[0] = malloc(oss_purl.key_ln);
-			MD5((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
+			oss_purl.hash_calc((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
 			b->age = get_component_age(b->purls_md5[0]);
 		}
 		
diff --git a/src/mz.c b/src/mz.c
index b9969d2..d4b2de1 100644
--- a/src/mz.c
+++ b/src/mz.c
@@ -50,6 +50,7 @@ void mz_get_key(struct ldb_table kb, char *key)
 	char mz_path[LDB_MAX_PATH + kb.key_ln];
 	char mz_file_id[5] = "\0\0\0\0\0";
 	struct mz_job job;
+	job.key_ln = kb.key_ln -2;
 	memcpy(mz_file_id, key, 4);
 	sprintf(mz_path, "%s/%s/%s/%s.mz", ldb_root, kb.db, kb.table,mz_file_id);
 
@@ -79,7 +80,7 @@ void mz_get_key(struct ldb_table kb, char *key)
 	{
 		/* Position pointers */
 		job.id = job.mz + ptr;
-		uint8_t *file_ln = job.id + MZ_MD5;
+		uint8_t *file_ln = job.id + job.key_ln;
 		job.zdata = file_ln + MZ_SIZE;
 
 		/* Get compressed data size */
@@ -88,19 +89,19 @@ void mz_get_key(struct ldb_table kb, char *key)
 		job.zdata_ln = tmpln;
 
 		/* Get total mz record length */
-		job.ln = MZ_MD5 + MZ_SIZE + job.zdata_ln;
+		job.ln = job.key_ln + MZ_SIZE + job.zdata_ln;
 
 		/* Pass job to handler */
-		if (!memcmp(job.id, job.key + 2, MZ_MD5))
+		if (!memcmp(job.id, job.key + 2, job.key_ln))
 		{
 			if (kb.definitions & LDB_TABLE_DEFINITION_ENCRYPTED)
 			{
-				decrypt_mz(job.id, job.zdata_ln);
+				decrypt_mz(kb.key_ln, job.id, job.zdata_ln);
 			}
 			/* Decompress */
 			MZ_DEFLATE(&job);
 
-			job.data[job.data_ln] = 0;
+			//job.data[job.data_ln] = 0;
 			printf("%s", job.data);
 			return;
 		}
diff --git a/src/query.c b/src/query.c
index 5e9a428..8d42f8d 100644
--- a/src/query.c
+++ b/src/query.c
@@ -215,6 +215,6 @@ void purl_version_md5(uint8_t *out, char *purl, char *version)
 {
 	char purl_version[MAX_ARGLN] = "\0";
 	sprintf(purl_version, "%s@%s", purl, version);
-	MD5((uint8_t *)purl_version, strlen(purl_version), out);
+	oss_purl.hash_calc((uint8_t *)purl_version, strlen(purl_version), out);
 }
 
diff --git a/src/report.c b/src/report.c
index 96620c1..46ffd06 100644
--- a/src/report.c
+++ b/src/report.c
@@ -204,7 +204,7 @@ bool print_json_component(component_data_t * component)
 		if (component->purls[i] && !component->purls_md5[i])
 		{
 			component->purls_md5[i] = malloc(oss_purl.key_ln);
-			MD5((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
+			oss_purl.hash_calc((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
 		}
 	}
 		
diff --git a/src/scan.c b/src/scan.c
index 017f679..32633ea 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -260,7 +260,7 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components)
 		if (is_bin)
 			binary_scan(&line[4]);
 
-		/* Parse file information with format: file=MD5(32),file_size,file_path */
+		/* Parse file information with format: file=HASH(16/32),file_size,file_path */
 		if (is_file)
 		{
 			/* A scan data was fullfilled and is ready to be scanned */
diff --git a/src/snippets.c b/src/snippets.c
index 961a65a..d2ebe5b 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -226,7 +226,7 @@ static bool get_all_file_ids(struct ldb_table * table, uint8_t *key, uint8_t *su
 	{
 		uint32_t size = uint32_read(record);
 		/* End recordset fetch if MAX_QUERY_RESPONSE is reached */
-		if (size + datalen + 4 >= WFP_REC_LN * MATCHMAP_ITEM_SIZE)
+		if (size + datalen + 4 >= table->rec_ln * MATCHMAP_ITEM_SIZE)
 		{
 			return true;
 		}
@@ -615,7 +615,7 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
 			return -1;
 		}
 		
-		if (md5cmp(scan->matchmap[t].md5, md5))
+		if (hashcmp(oss_file.key_ln, scan->matchmap[t].md5, md5))
 		{
 			lastwfp = scan->matchmap[t].lastwfp;
 			found = t;
@@ -746,13 +746,13 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 	for (long i = 0; i < scan->hash_count; i++)
 	{
 		/* Get all file IDs for given wfp */
-		map[i].md5_set = malloc(WFP_REC_LN * MATCHMAP_ITEM_SIZE);
+		map[i].md5_set = malloc(oss_wfp.rec_ln * MATCHMAP_ITEM_SIZE);
 		wfp_invert(scan->hashes[i], map[i].wfp);
-		//scanlog(" Add wfp %02x%02x%02x%02x to map\n",map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
+		scanlog(" Add wfp %02x%02x%02x%02x to map\n",map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
 		uint32_write(map[i].md5_set, 0);
 		map[i].line = scan->lines[i];
 		ldb_fetch_recordset(NULL, oss_wfp, map[i].wfp, false, get_all_file_ids, (void *)map[i].md5_set);
-		map[i].size = uint32_read(map[i].md5_set) / WFP_REC_LN;
+		map[i].size = uint32_read(map[i].md5_set) / oss_wfp.rec_ln;
 		//Initializate the lines indirection when a wfp from a line has at least one md5 linked
 		if (map[i].size)
 			map_lines_indirection[scan->lines[i]] = 0;
@@ -761,6 +761,18 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 			map_max_size = map[i].size;
 		
 	}
+
+	/*for (long i = 0; i < scan->hash_count; i++)
+	{ 
+		printf("%02x%02x%02x%02x: ", map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
+		for (int j=0; j < map[i].size; j++)
+		{
+			char hex[MD5_LEN_HEX] = "\0";
+			ldb_bin_to_hex(map[i].md5_set + 4 + j * oss_wfp.rec_ln,  oss_file.key_ln, hex);
+			printf(" %s", hex);
+		}
+		printf("\n");
+	}*/
 	/* Classify the WFPs in cathegories depending on popularity
 	Each cathegoy will contain a sub set of index refered to map rows*/
 	#define MAP_INDIRECTION_CAT_NUMBER 1000
@@ -890,7 +902,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 				/* Add each item to the matchmap*/
 				for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++)
 				{
-					int wfp_p = wfp_index * WFP_REC_LN;
+					int wfp_p = wfp_index * oss_wfp.rec_ln;
 					/*Stop when a new sector appers*/
 					if (md5s[wfp_p] != sector)
 					{
@@ -948,7 +960,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 				/* Add each item to the matchmap*/
 				for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++)
 				{
-					int wfp_p = wfp_index * WFP_REC_LN;
+					int wfp_p = wfp_index * oss_wfp.rec_ln;
 					int sector = md5s[wfp_p];
 					int sector_max = min_match_hits;
 
@@ -957,7 +969,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 					else
 						sector_max = scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits;
 
-					if (md5cmp(&md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5))
+					if (hashcmp(oss_file.key_ln, &md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5))
 					{				 
 						add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], 0, &sector_max, &scan->matchmap_rank_by_sector[sector]);
 						md5_proceced++;
diff --git a/src/url.c b/src/url.c
index 751ef3b..2fe9467 100644
--- a/src/url.c
+++ b/src/url.c
@@ -214,7 +214,7 @@ bool handle_purl_record(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
 				scanlog("Related PURL: %s\n", purl);
 				component->purls[i] = purl;
 				component->purls_md5[i] = malloc(table->key_ln);
-				MD5((uint8_t *)purl, strlen(purl), component->purls_md5[i]);
+				oss_purl.hash_calc((uint8_t *)purl, strlen(purl), component->purls_md5[i]);
 				return false;
 			}
 			/* Already exists, exit */
@@ -247,7 +247,7 @@ void fetch_related_purls(component_data_t *component)
 	if (!component->purls_md5[0] && component->purls[0])
 	{
 		component->purls_md5[0] = malloc(oss_purl.key_ln);
-		MD5((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
+		oss_purl.hash_calc((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
 	}
 
 	/* Fill purls */
@@ -309,7 +309,7 @@ void purl_release_date(char *purl, char *date)
 		return; 
 
 	uint8_t purl_md5[oss_purl.key_ln];
-	MD5((uint8_t *)purl, strlen(purl), purl_md5);
+	oss_purl.hash_calc((uint8_t *)purl, strlen(purl), purl_md5);
 
 	ldb_fetch_recordset(NULL, oss_purl, purl_md5, false, get_purl_first_release, (void *) date);
 }
diff --git a/src/util.c b/src/util.c
index eeef02f..be218e6 100644
--- a/src/util.c
+++ b/src/util.c
@@ -99,9 +99,9 @@ void uint32_reverse(uint8_t *data)
  * @param md52 md5 2
  * @return true for equal
  */
-bool md5cmp(uint8_t *md51, uint8_t *md52)
+bool hashcmp(int hash_len, uint8_t *md51, uint8_t *md52)
 {
-	for (int i = 0; i < 16; i++)
+	for (int i = 0; i < hash_len; i++)
 		if (md51[i] != md52[i])
 			return false;
 	return true;
@@ -126,31 +126,6 @@ void trim(char *str)
     str[i + 1] = 0;
 }
 
-/**
- * @brief Returns the pair md5 of "component/vendor"
- * @param component component string
- * @param vendor vendor sting
- * @param out[out] pointer ot md5
- */
-void vendor_component_md5(char *component, char *vendor, uint8_t *out)
-{
-	char pair[1024] = "\0";
-	if (strlen(component) + strlen(vendor) + 2 >= 1024) return;
-
-	/* Calculate pair_md5 */
-	sprintf(pair, "%s/%s", component, vendor);
-	for (int i = 0; i < strlen(pair); i++) pair[i] = tolower(pair[i]);
-	MD5((uint8_t *)pair, strlen(pair), out);
-
-	/* Log pair_md5 */
-	if (debug_on)
-	{
-		char hex[oss_purl.key_ln * 2 + 1];
-		ldb_bin_to_hex(out, oss_purl.key_ln, hex);
-		scanlog("vendor/component: %s = %s\n", pair, hex);
-	}
-}
-
 /**
  * @brief  Removes chr from str
  * @param str input string
diff --git a/src/vulnerability.c b/src/vulnerability.c
index 02c87e5..defc37a 100644
--- a/src/vulnerability.c
+++ b/src/vulnerability.c
@@ -244,7 +244,7 @@ void version_md5(uint8_t *out, char *vendor, char *component, char *version)
 {
 	char triplet[MAX_ARGLN];
 	sprintf(triplet, "%s/%s/%s", vendor, component, version);
-	MD5((uint8_t *)triplet, strlen(triplet), out);
+	oss_vulnerability.hash_calc((uint8_t *)triplet, strlen(triplet), out);
 }
 
 /**

From 939f6f78bdcfc55cb5c0f63a438efa190c84a015 Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Tue, 3 Dec 2024 11:44:13 +0100
Subject: [PATCH 11/19] update multi-snippet, add fetch_recordset local
 wrapper, update selection logic using vulnerabilities, solve memory leaks

---
 inc/component.h     |  1 +
 inc/match_list.h    |  4 +++-
 inc/scanoss.h       |  3 +++
 src/attributions.c  |  6 +++---
 src/binary_scan.c   |  7 ++++---
 src/component.c     |  4 ++--
 src/copyright.c     |  6 +++---
 src/cryptography.c  |  2 +-
 src/dependency.c    |  6 +++---
 src/file.c          |  2 +-
 src/health.c        |  2 +-
 src/license.c       |  8 ++++----
 src/match.c         | 49 +++++++++++++++++++++++++++++++++------------
 src/match_list.c    | 47 +++++++++++++++++++++++++++++++++++++++----
 src/quality.c       |  2 +-
 src/query.c         | 14 ++++++++++---
 src/report.c        | 13 ++----------
 src/snippets.c      | 20 +++++++++++++-----
 src/url.c           |  4 ++--
 src/vulnerability.c | 13 +++++++-----
 20 files changed, 147 insertions(+), 66 deletions(-)

diff --git a/inc/component.h b/inc/component.h
index 9af3519..5069983 100644
--- a/inc/component.h
+++ b/inc/component.h
@@ -55,5 +55,6 @@ bool component_date_comparation(component_data_t * a, component_data_t * b);
 component_data_t * component_data_copy(component_data_t * in);
 int asset_declared(component_data_t * comp);
 void component_item_free(component_item * comp_item);
+void component_purl_md5(component_data_t * component);
 void fill_component_path(component_data_t *component, char *file_path);
 #endif
\ No newline at end of file
diff --git a/inc/match_list.h b/inc/match_list.h
index 6773972..7d81be1 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -154,5 +154,7 @@ void component_list_destroy(component_list_t *list);
 bool component_list_add_binary(component_list_t *list, component_data_t *new_comp, bool (*val)(component_data_t *a, component_data_t *b), bool remove_a);
 bool match_list_eval(match_list_t *list, match_data_t * in,  bool (*eval)(match_data_t *fpa, match_data_t *fpb));
 void match_list_tolerance_set(float in);
-bool component_list_update(component_list_t *list, component_data_t * in, list_update_t (*eval)(component_data_t *fpa, component_data_t *fpb));
+list_update_t component_list_update(component_list_t *list, component_data_t * in, list_update_t (*eval)(component_data_t *fpa, component_data_t *fpb));
+void component_list_sort(struct comp_entry *np, bool (*val)(component_data_t *a, component_data_t *b));
+
 #endif
diff --git a/inc/scanoss.h b/inc/scanoss.h
index f85e5be..6008b40 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -155,4 +155,7 @@ bool ignored_asset_match(uint8_t *url_record);
 void ldb_get_first_record(struct ldb_table table, uint8_t* key, void *void_ptr);
 
 int binary_scan(char * bfp);
+
+uint32_t fetch_recordset(struct ldb_table table, uint8_t *key, ldb_record_handler_t handler, void *ptr);
+
 #endif
diff --git a/src/attributions.c b/src/attributions.c
index 425831a..773ce56 100644
--- a/src/attributions.c
+++ b/src/attributions.c
@@ -39,7 +39,7 @@
 #include "parse.h"
 #include "util.h"
 #include "mz.h"
-
+#include "query.h"
 /**
  * @brief Notices LDB function pointer. Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
  * @param key ldb key looking for
@@ -120,7 +120,7 @@ bool attribution_handler(struct ldb_table * table, uint8_t *key, uint8_t *subkey
 bool purl_notices_exist(struct ldb_table oss_attribution, uint8_t *key)
 {
 	bool validated = true;
-	ldb_fetch_recordset(NULL, oss_attribution, key, false, attribution_handler, &validated);
+	fetch_recordset(oss_attribution, key, attribution_handler, &validated);
 	return validated;
 }
 
@@ -134,7 +134,7 @@ bool purl_notices_exist(struct ldb_table oss_attribution, uint8_t *key)
 bool print_notices(struct ldb_table oss_attribution, uint8_t *key, char *component)
 {
 	bool validated = true;
-	ldb_fetch_recordset(NULL, oss_attribution, key, false, notices_handler, component);
+	fetch_recordset(oss_attribution, key, notices_handler, component);
 	return validated;
 }
 
diff --git a/src/binary_scan.c b/src/binary_scan.c
index efc0b67..54848c9 100644
--- a/src/binary_scan.c
+++ b/src/binary_scan.c
@@ -38,6 +38,7 @@
 #include "url.h"
 #include "decrypt.h"
 #include "report.h"
+#include "query.h"
 
 component_data_t comp_max_hits = {.hits=-1};
 static bool component_hits_comparation(component_data_t *a, component_data_t *b)
@@ -83,7 +84,7 @@ static bool add_purl_from_urlid(struct ldb_table * table, uint8_t *key, uint8_t
 	strncpy(path, decrypted, MAX_FILE_PATH);
 
 	uint8_t *url_rec = calloc(LDB_MAX_REC_LN, 1); /*Alloc memory for url records */
-	ldb_fetch_recordset(NULL, oss_url, url_id, false, get_oldest_url, (void *)url_rec);
+	fetch_recordset(oss_url, url_id, get_oldest_url, (void *)url_rec);
 
 	/* Create a new component and fill it from the url record */
 	component_data_t *new_comp = calloc(1, sizeof(*new_comp));
@@ -168,12 +169,12 @@ static void fhash_process(char * hash, component_list_t * comp_list)
 	ldb_hex_to_bin(hash, 32, fhash);
 	/* Get all file IDs for given wfp */
 	file_recordset *files = calloc(1001, sizeof(file_recordset));;
-	int records = ldb_fetch_recordset(NULL, oss_fhash, fhash, false, get_all_file_ids, (void *) files);
+	int records = fetch_recordset( oss_fhash, fhash, get_all_file_ids, (void *) files);
 	if (records < max_files_to_process)
 	{
 		for (int i = 0; i < records; i++)
 		{
-			ldb_fetch_recordset(NULL, oss_file, files[i].url_id, false, add_purl_from_urlid,(void *)comp_list);
+			fetch_recordset( oss_file, files[i].url_id, add_purl_from_urlid,(void *)comp_list);
 		}
 	}
 	free(files);
diff --git a/src/component.c b/src/component.c
index 8afda27..5c20c09 100644
--- a/src/component.c
+++ b/src/component.c
@@ -298,7 +298,7 @@ bool component_date_comparation(component_data_t *a, component_data_t *b)
 		return false;
 	if (!*a->release_date)
 		return true;
-
+scanlog("%s - %s vs %s - %s\n", a->purls[0], a->release_date, b->purls[0], b->release_date);
 	if (!a->purls_md5[0] && a->purls[0])
 	{
 		a->purls_md5[0] = malloc(oss_url.key_ln);
@@ -348,7 +348,7 @@ void component_purl_md5(component_data_t * component)
 		if (component->purls[i] && !component->purls_md5[i])
 		{
 			component->purls_md5[i] = malloc(oss_purl.key_ln);
-			MD5((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
+			oss_purl.hash_calc((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/copyright.c b/src/copyright.c
index 5139c3b..11f5499 100644
--- a/src/copyright.c
+++ b/src/copyright.c
@@ -145,17 +145,17 @@ void print_copyrights(component_data_t * comp)
 	
 	uint32_t records = 0;
 
-	records = ldb_fetch_recordset(NULL, oss_copyright, comp->file_md5_ref, false, print_copyrights_item, comp);
+	records = fetch_recordset( oss_copyright, comp->file_md5_ref, print_copyrights_item, comp);
 	scanlog("File md5 copyright records %d\n", records);
 	if (!records)
 	{
-		records = ldb_fetch_recordset(NULL, oss_copyright, comp->url_md5, false, print_copyrights_item, comp);
+		records = fetch_recordset( oss_copyright, comp->url_md5, print_copyrights_item, comp);
 		scanlog("URL md5 copyright records %d\n", records);
 
 	}
 	if (!records)
 		for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++)
-			if (ldb_fetch_recordset(NULL, oss_copyright, comp->purls_md5[i], false, print_copyrights_item, comp)) break;
+			if (fetch_recordset( oss_copyright, comp->purls_md5[i], print_copyrights_item, comp)) break;
 
 	char * aux = NULL;
 	if (comp->copyright_text && *comp->copyright_text)
diff --git a/src/cryptography.c b/src/cryptography.c
index 10e85fe..1348049 100644
--- a/src/cryptography.c
+++ b/src/cryptography.c
@@ -108,7 +108,7 @@ void print_cryptography(match_data_t * match)
 	memset(crclist, 0, sizeof(crclist));
 	match->crclist = crclist;
 	
-	ldb_fetch_recordset(NULL, oss_cryptography, match->file_md5, false, print_crypto_item, match);
+	fetch_recordset(oss_cryptography, match->file_md5, print_crypto_item, match);
 	
 	char * aux = NULL;
 	asprintf(&aux, "%s%s]", result, (match->crytography_text && *match->crytography_text) ? match->crytography_text : "" );
diff --git a/src/dependency.c b/src/dependency.c
index 534c4eb..91e14b0 100644
--- a/src/dependency.c
+++ b/src/dependency.c
@@ -113,7 +113,7 @@ int print_dependencies(component_data_t * comp)
 	uint32_t records = 0;
 
 	/* Pull URL dependencies */
-	records = ldb_fetch_recordset(NULL, oss_dependency, comp->url_md5, false, print_dependencies_item, NULL);
+	records = fetch_recordset( oss_dependency, comp->url_md5, print_dependencies_item, NULL);
 	if (records)
 		scanlog("Dependency matches (%d) reported for url_hash\n", records);
 	else
@@ -126,7 +126,7 @@ int print_dependencies(component_data_t * comp)
 			uint8_t hash[oss_purl.key_ln];
 			purl_version_md5(hash, comp->purls[i], comp->version);
 
-			records = ldb_fetch_recordset(NULL, oss_dependency, hash, false, print_dependencies_item, comp);
+			records = fetch_recordset( oss_dependency, hash, print_dependencies_item, comp);
 			if (records)
 			{
 				scanlog("Dependency matches (%d) reported for %s@%s\n", records, comp->purls[i],comp->version);
@@ -142,7 +142,7 @@ int print_dependencies(component_data_t * comp)
 			uint8_t hash[oss_purl.key_ln];
 			purl_version_md5(hash, comp->purls[i], comp->latest_version);
 
-			records = ldb_fetch_recordset(NULL, oss_dependency, hash, false, print_dependencies_item, comp);
+			records = fetch_recordset( oss_dependency, hash, print_dependencies_item, comp);
 			if (records)
 			{
 				scanlog("Dependency matches (%d) reported for %s@%s\n", records, comp->purls[i],comp->latest_version);
diff --git a/src/file.c b/src/file.c
index 563db22..35a9e0d 100644
--- a/src/file.c
+++ b/src/file.c
@@ -257,6 +257,6 @@ char *get_file_extension(uint8_t *md5)
 	char *out = malloc(MAX_ARGLN + 1);
 	*out = 0;
 
-	ldb_fetch_recordset(NULL, oss_file, md5, false, get_first_file, out);
+	fetch_recordset(oss_file, md5, get_first_file, out);
 	return out;
 }
diff --git a/src/health.c b/src/health.c
index bede8df..b03f7ed 100644
--- a/src/health.c
+++ b/src/health.c
@@ -111,6 +111,6 @@ void print_health(component_data_t *component)
 {
 	if (!ldb_table_exists(oss_purl.db, oss_purl.table)) //skip crypto if the table is not present
 		return;
-	ldb_fetch_recordset(NULL, oss_purl, component->purls_md5[0], false, print_health_item, component);
+	fetch_recordset(oss_purl, component->purls_md5[0], print_health_item, component);
 }
 
diff --git a/src/license.c b/src/license.c
index 28d5bbe..5dae43e 100644
--- a/src/license.c
+++ b/src/license.c
@@ -363,10 +363,10 @@ void print_licenses(component_data_t *comp)
 
 	/* Look for component or file license */
 
-	records = ldb_fetch_recordset(NULL, oss_license, comp->file_md5_ref, false, print_licenses_item, comp);
+	records = fetch_recordset(oss_license, comp->file_md5_ref, print_licenses_item, comp);
 	scanlog("License for file_id license returns %d hits\n", records);
 
-	records = ldb_fetch_recordset(NULL, oss_license, comp->url_md5, false, print_licenses_item, comp);
+	records = fetch_recordset(oss_license, comp->url_md5, print_licenses_item, comp);
 	scanlog("License for url_id license returns %d hits\n", records);
 
 	for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++)
@@ -375,13 +375,13 @@ void print_licenses(component_data_t *comp)
 		uint8_t purlversion_md5[oss_purl.key_ln];
 		purl_version_md5(purlversion_md5, comp->purls[i], comp->version);
 
-		records = ldb_fetch_recordset(NULL, oss_license, purlversion_md5, false, print_licenses_item, comp);
+		records = fetch_recordset(oss_license, purlversion_md5, print_licenses_item, comp);
 		scanlog("License for %s@%s license returns %d hits\n", comp->purls[i], comp->version, records);
 
 		if (records)
 			break;
 
-		records = ldb_fetch_recordset(NULL, oss_license, comp->purls_md5[i], false, print_licenses_item, comp);
+		records = fetch_recordset(oss_license, comp->purls_md5[i], print_licenses_item, comp);
 		scanlog("License for %s license returns %d hits\n", comp->purls[i], records);
 		
 		if (records)
diff --git a/src/match.c b/src/match.c
index a3d6c65..ca82a33 100644
--- a/src/match.c
+++ b/src/match.c
@@ -289,9 +289,14 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
 		if (result < 0)
 			return false;
 	}
+
+	/*if (strstr(a->file, "contrib") && !strstr(b->file, "contrib"))
+			return true;
+		if (!strstr(a->file, "contrib") && strstr(b->file, "contrib"))
+			return false;*/
 	
 	if ((engine_flags & ENABLE_PATH_HINT) && a->file_path_ref && b->file_path_ref)
-	{
+	{			
 		//evalute path rank for component a
 		evaluate_path_rank(a);
 		
@@ -389,7 +394,7 @@ list_update_t component_update(component_data_t *a, component_data_t *b)
 			return LIST_ITEM_UPDATE;
 		else
 		{
-			scanlog("--- Componen already exist: %s---\n", b->component);
+			scanlog("--- Componen already exist: %s--- %s vs %s\n", b->purls[0], b->release_date, a->release_date);
 			component_data_free(b);
 			return LIST_ITEM_FOUND;
 		}
@@ -402,11 +407,11 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 {
 	component_data_t *new_comp = NULL;
 
-	ldb_fetch_recordset(NULL, oss_url, url_id, false, get_oldest_url, (void *)&new_comp);
-	if (!new_comp)
-		return false;
-		
-	fill_component_path(new_comp, path);
+	ldb_fetch_recordset(NULL, oss_url, url_id, false, get_oldest_url, (void *)url_rec);
+
+	/* Extract date from url_rec */
+	char date[MAX_ARGLN] = "0";
+	extract_csv(date, (char *)url_rec, 4, MAX_ARGLN);
 	/* Create a new component and fill it from the url record */
 	component_data_t *new_comp = calloc(1, sizeof(*new_comp));
 	bool result = fill_component(new_comp, url_id, path, (uint8_t *)url_rec);
@@ -420,16 +425,21 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 		asset_declared(new_comp);
 		new_comp->file_path_ref = component_list->match_ref->scan_ower->file_path;
 		new_comp->path_rank = PATH_LEVEL_COMP_INIT_VALUE;
-		if (!component_list_update(component_list, new_comp, component_update))
+		list_update_t r = component_list_update(component_list, new_comp, component_update);
+		if (r == LIST_ITEM_NOT_FOUND)
 		{
 			scanlog("--- new comp %s---\n", new_comp->component);
 			if (!component_list_add(component_list, new_comp, component_hint_date_comparation, true))
 			{
-				scanlog("component rejected: %s\n", new_comp->purls[0]);
+				scanlog("component rejected: %s - %s\n", new_comp->purls[0], new_comp->release_date);
 				component_data_free(new_comp); /* Free if the componet was rejected */
 			}
 			else
-				scanlog("component accepted: %s - pathrank: %d\n", new_comp->purls[0], new_comp->path_rank);
+				scanlog("component accepted: %s - %s - pathrank: %d\n", new_comp->purls[0], new_comp->release_date, new_comp->path_rank);
+		}
+		else if (r == LIST_ITEM_UPDATE && component_list->headp.lh_first)
+		{
+			component_list_sort(component_list->headp.lh_first, component_hint_date_comparation);
 		}
 	}
 	else
@@ -456,7 +466,7 @@ bool path_query_handler(struct ldb_table * table, uint8_t * key, uint8_t * subke
 static char * path_query(uint8_t * file_id)
 {
 	char * path = NULL;
-	ldb_fetch_recordset(NULL, oss_path, file_id, false, path_query_handler, (void *) &path);
+	fetch_recordset(oss_path, file_id, path_query_handler, (void *) &path);
 	return path;
 }
 
@@ -542,11 +552,11 @@ bool load_matches(match_data_t *match)
 	uint32_t records = 0;
 
 	/*Query to url table looking for a url match, will add the components to component list */
-	records = ldb_fetch_recordset(NULL, oss_url, match->file_md5, false, handle_url_record, (void *)&match->component_list);
+	records = fetch_recordset(oss_url, match->file_md5, handle_url_record, (void *)&match->component_list);
 	scanlog("URL recordset contains %u records\n", records);
 
 	/*Collect all files from the files table matching with the match md5 being processed */
-	records = ldb_fetch_recordset(NULL, oss_file, match->file_md5, false, component_from_file,(void *)&match->component_list);
+	records = fetch_recordset(oss_file, match->file_md5, component_from_file,(void *)&match->component_list);
 	scanlog("Found %d file entries\n", records);
 
 	/* Final optimization based on the available information for a component */
@@ -557,6 +567,10 @@ bool load_matches(match_data_t *match)
 		struct comp_entry *item = NULL;
 		LIST_FOREACH(item, &match->component_list.headp, entries)
 		{
+				/*Check if there are some purl's md5 missing. We could do this earlier, but this is a performance optimization*/	
+			component_purl_md5(item->component);
+			
+			scanlog("Tiebreak\n");
 			if (!item->entries.le_next || !item->entries.le_next->component)
 				break;
 			
@@ -678,6 +692,15 @@ void match_select_best(scan_data_t *scan)
 		struct entry *item = NULL;
 		LIST_FOREACH(item, &scan->matches_list_array[i]->headp, entries)
 		{
+			if (debug_on)
+			{
+				struct comp_entry *comp = NULL;
+				int comp_n = 0;
+				LIST_FOREACH(comp, &item->match->component_list.headp, entries)
+				{
+					scanlog("<<<%d %s - %s>>>\n", comp_n, comp->component->purls[0], comp->component->release_date);
+				}
+			}
 			if (find_oldest_match(scan->matches_list_array[i]->best_match, item->match))
 				scan->matches_list_array[i]->best_match = item->match;
 		}
diff --git a/src/match_list.c b/src/match_list.c
index 89d6ff1..60ef8ef 100644
--- a/src/match_list.c
+++ b/src/match_list.c
@@ -137,6 +137,11 @@ bool component_list_add(component_list_t *list, component_data_t *new_comp, bool
             
             if(list->last_element_aux)
             {
+                if (debug_on) 
+                {
+                    scanlog(">>> component %s-%s is replaced by %s-%s <<<\n", list->last_element->component->purls[0], list->last_element->component->release_date,
+                                                                                new_comp->purls[0], new_comp->release_date);
+                }
                 component_data_free(list->last_element->component);
                 LIST_REMOVE(list->last_element_aux->entries.le_next, entries);
                 free(list->last_element);
@@ -427,33 +432,67 @@ void component_list_print(component_list_t *list, bool (*printer)(component_data
     }
 }
 
-bool component_list_update(component_list_t *list, component_data_t * in, list_update_t (*eval)(component_data_t *fpa, component_data_t *fpb))
+static void component_switch(struct comp_entry * na, struct comp_entry * nb)
+{
+    component_data_t * aux = na->component;
+    na->component = nb->component;
+    nb->component = aux;
+}
+void component_list_sort(struct comp_entry *np, bool (*val)(component_data_t *a, component_data_t *b))
+{
+    struct comp_entry *next = np->entries.le_next;
+    if (!next)
+        return;
+    if (next->entries.le_next)
+    {
+        component_list_sort(next, val);
+    }    
+   if (val(np->component, next->component)) 
+        component_switch(np, next);
+}
+
+
+list_update_t component_list_update(component_list_t *list, component_data_t * in, list_update_t (*eval)(component_data_t *fpa, component_data_t *fpb))
 {
     for (struct comp_entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)
     {
         list_update_t r = eval(np->component, in);
         if (r == LIST_ITEM_UPDATE)
         {
+            scanlog("update component %s with release date %s by %s\n", np->component->purls[0], np->component->release_date, in->release_date);
             component_data_t * aux = np->component;
             np->component = in;
             component_data_free(aux);
-            return true;
+            return r;
         }
         else if (r == LIST_ITEM_FOUND)
-            return true;
+            return r;
     }
-    return false;
+    return LIST_ITEM_NOT_FOUND;
 }
 
 void match_list_process(match_list_t *list, bool (*funct_p)(match_data_t *fpa))
 {
     for (struct entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)
     {
+        if (debug_on)
+        {
+            char md5_hex[MD5_LEN * 2 + 1];
+            ldb_bin_to_hex(np->match->file_md5, MD5_LEN, md5_hex);
+            scanlog("-------- looking matches for md5: %s --------\n", md5_hex);
+        }
         bool result = funct_p(np->match);
 
+        if (debug_on)
+        {
+            scanlog("<<<Best match was: %s - %s>>>\n", np->match->component_list.headp.lh_first->component->purls[0], 
+                                                        np->match->component_list.headp.lh_first->component->release_date);
+        }
+
         if (result)
             break;
     }
+
 }
 
 bool match_list_is_empty(match_list_t *list)
diff --git a/src/quality.c b/src/quality.c
index ee93ac1..e4281e8 100644
--- a/src/quality.c
+++ b/src/quality.c
@@ -110,7 +110,7 @@ void print_quality(match_data_t * match)
 	sprintf(result,"\"quality\": [");
 
 
-	ldb_fetch_recordset(NULL, oss_quality, match->file_md5, false, print_quality_item, &aux);	
+	fetch_recordset(oss_quality, match->file_md5, print_quality_item, &aux);	
 	
 	free(match->quality_text);	
 	asprintf(&match->quality_text, "%s%s]", result, aux ? aux : "");
diff --git a/src/query.c b/src/query.c
index 8d42f8d..e29fd24 100644
--- a/src/query.c
+++ b/src/query.c
@@ -28,7 +28,7 @@
   * //TODO Long description
   * @see https://github.com/scanoss/engine/blob/master/src/quality.c
   */
-
+#include "scanoss.h"
 #include "query.h"
 #include "parse.h"
 #include "util.h"
@@ -108,7 +108,7 @@ void get_url_record(uint8_t *md5, uint8_t *record)
 	*record = 0;
 
 	/* Fetch record */
-	ldb_fetch_recordset(NULL, oss_url, md5, false, ldb_get_first_url_not_ignored, (void *) record);
+	fetch_recordset(oss_url, md5, ldb_get_first_url_not_ignored, (void *) record);
 }
 
 /**
@@ -200,7 +200,7 @@ int get_component_age(uint8_t *md5)
 	long age = 0;
 
 	if (ldb_table_exists(oss_purl.db, oss_purl.table)) //skip purl if the table is not present
-		ldb_fetch_recordset(NULL, oss_purl, md5, false, handle_get_component_age, &age);
+		fetch_recordset(oss_purl, md5, handle_get_component_age, &age);
 
 	return age;
 }
@@ -218,3 +218,11 @@ void purl_version_md5(uint8_t *out, char *purl, char *version)
 	oss_purl.hash_calc((uint8_t *)purl_version, strlen(purl_version), out);
 }
 
+uint32_t fetch_recordset(struct ldb_table table, uint8_t *key, ldb_record_handler_t handler, void *ptr)
+ {
+	if (!key)
+		return 0;
+	ldb_sector_t sector = {.data = NULL, .size= 0, .id = *key};
+	return ldb_fetch_recordset(&sector, table, key, false, handler, ptr);
+}
+
diff --git a/src/report.c b/src/report.c
index 46ffd06..391e9e9 100644
--- a/src/report.c
+++ b/src/report.c
@@ -197,16 +197,6 @@ bool print_json_component(component_data_t * component)
 {
 	if (!component)
 		return true;
-
-	/*Check if there are some purl's md5 missing. We could do this earlier, but this is a performance optimization*/	
-	for (int i = 0; i < MAX_PURLS; i++)	
-	{
-		if (component->purls[i] && !component->purls_md5[i])
-		{
-			component->purls_md5[i] = malloc(oss_purl.key_ln);
-			oss_purl.hash_calc((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
-		}
-	}
 		
 	scanlog("print component\n");
 	if (engine_flags & DISABLE_BEST_MATCH)
@@ -298,7 +288,8 @@ bool print_json_component(component_data_t * component)
 
 	if (!(engine_flags & DISABLE_VULNERABILITIES))
 	{
-		print_vulnerabilities(component);
+		if (!component->vulnerabilities_text)
+			print_vulnerabilities(component);
 		if (component->vulnerabilities_text)
 			printf(",%s", json_remove_invalid_char(component->vulnerabilities_text));
 	}
diff --git a/src/snippets.c b/src/snippets.c
index d2ebe5b..870669e 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -145,6 +145,9 @@ void biggest_snippet(scan_data_t *scan)
 			}
 
 			int hits = compile_ranges(match_new);
+			if (hits < min_match_hits)
+				continue;
+
 			float percent = (hits * 100) / match_new->scan_ower->total_lines;
 			int matched_percent = floor(percent);
 			if (matched_percent > 99)
@@ -446,8 +449,8 @@ uint32_t compile_ranges(match_data_t *match)
 		/* Exit if hits is below two */
 		if (reported_hits < min_match_hits)
 		{
-			scanlog("Discarted ranges brings hits count to %u\n", reported_hits);
-			return 0;
+			scanlog("Discarted ranges brings hits count to %u (MIN MATCH HITS: %d)\n", reported_hits, min_match_hits);
+			return reported_hits;
 		}
 
 		//scanlog("compile_ranges #%d = %ld to %ld - OSS from: %d\n", i, from, to, match->matchmap_reg->range[i].oss_line);
@@ -711,7 +714,7 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
 
 	if (found == scan->matchmap_size)
 		scan->matchmap_size++;
-	return 0;
+	return found;
 }
 
 /**
@@ -751,7 +754,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 		scanlog(" Add wfp %02x%02x%02x%02x to map\n",map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
 		uint32_write(map[i].md5_set, 0);
 		map[i].line = scan->lines[i];
-		ldb_fetch_recordset(NULL, oss_wfp, map[i].wfp, false, get_all_file_ids, (void *)map[i].md5_set);
+		fetch_recordset(oss_wfp, map[i].wfp, get_all_file_ids, (void *)map[i].md5_set);
 		map[i].size = uint32_read(map[i].md5_set) / oss_wfp.rec_ln;
 		//Initializate the lines indirection when a wfp from a line has at least one md5 linked
 		if (map[i].size)
@@ -910,7 +913,14 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 						break;
 					}
 
-					add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], last_sector_aux, &sector_max, &scan->matchmap_rank_by_sector[sector]);
+					int pos = add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], last_sector_aux, &sector_max, &scan->matchmap_rank_by_sector[sector]);
+					/*if (pos >= 0 && debug_on)
+					{
+						char key_hex[(MD5_LEN+2)*2 + 1];
+						ldb_bin_to_hex(&md5s[wfp_p], MD5_LEN+2, key_hex);
+						printf("%02x%02x%02x%02x,%s\n", map[i].wfp[0], map[i].wfp[1], map[i].wfp[2], map[i].wfp[3], key_hex);
+					}*/
+					
 				}
 			}	
 		}
diff --git a/src/url.c b/src/url.c
index 2fe9467..aa3a4eb 100644
--- a/src/url.c
+++ b/src/url.c
@@ -260,7 +260,7 @@ void fetch_related_purls(component_data_t *component)
 		uint32_t CRC = string_crc32c(purl_type);
 		add_CRC(component->crclist, CRC);
 		
-		int purls = ldb_fetch_recordset(NULL, oss_purl, component->purls_md5[i], false, handle_purl_record, component);
+		int purls = fetch_recordset( oss_purl, component->purls_md5[i], handle_purl_record, component);
 		if (purls)
 			scanlog("Finding related PURLs for %s returned %d matches\n", component->purls[i], purls);
 		else
@@ -311,7 +311,7 @@ void purl_release_date(char *purl, char *date)
 	uint8_t purl_md5[oss_purl.key_ln];
 	oss_purl.hash_calc((uint8_t *)purl, strlen(purl), purl_md5);
 
-	ldb_fetch_recordset(NULL, oss_purl, purl_md5, false, get_purl_first_release, (void *) date);
+	fetch_recordset( oss_purl, purl_md5, get_purl_first_release, (void *) date);
 }
 
 
diff --git a/src/vulnerability.c b/src/vulnerability.c
index defc37a..b7b3f16 100644
--- a/src/vulnerability.c
+++ b/src/vulnerability.c
@@ -252,9 +252,11 @@ void version_md5(uint8_t *out, char *vendor, char *component, char *version)
  * @param match match structure
  */
 int print_vulnerabilities(component_data_t *component)
+int print_vulnerabilities(component_data_t *component)
 {
 	if (!ldb_table_exists(oss_vulnerability.db, oss_vulnerability.table)) // skip purl if the table is not present
 		return 0;
+		return 0;
 	scanlog("Process vulnerabilities\n");
 	uint32_t crclist[CRC_LIST_LEN];
 	memset(crclist, 0, sizeof(crclist));
@@ -265,9 +267,10 @@ int print_vulnerabilities(component_data_t *component)
 	component->vulnerabilities = 0;
 	component->crclist = crclist;
 	int records = 0;
+	int records = 0;
 	/* Search for purl */
 	for (int i = 0; i < MAX_PURLS && component->purls[i]; i++)
-		records += ldb_fetch_recordset(NULL, oss_vulnerability, component->purls_md5[i], false, print_vulnerability_item, component);
+		records += fetch_recordset( oss_vulnerability, component->purls_md5[i], print_vulnerability_item, component);
 
 	/* Search for purl@version in NVD */
 
@@ -275,7 +278,7 @@ int print_vulnerabilities(component_data_t *component)
 	{
 		uint8_t md5[MD5_LEN];
 		purl_version_md5(md5, component->purls[i], comp.version);
-		records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component);
+		records += fetch_recordset( oss_vulnerability, md5, print_vulnerability_item, component);
 	}
 
 	/* Search for for purl@latest_version in NVD */
@@ -285,21 +288,21 @@ int print_vulnerabilities(component_data_t *component)
 		{
 			uint8_t md5[MD5_LEN];
 			purl_version_md5(md5, component->purls[i], comp.latest_version);
-			records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component);
+			records += fetch_recordset( oss_vulnerability, md5, print_vulnerability_item, component);
 		}
 	}
 
 	/* Search for vendor/component/version in NVD */
 	uint8_t md5[MD5_LEN];
 	version_md5(md5, component->vendor, component->component, comp.version);
-	records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component);
+	records += fetch_recordset( oss_vulnerability, md5, print_vulnerability_item, component);
 
 	/* Search for vendor/component/latest_version in NVD */
 	if (strcmp(comp.version, comp.latest_version))
 	{
 		uint8_t md5[MD5_LEN];
 		version_md5(md5, component->vendor, comp.component, comp.latest_version);
-		records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component);
+		records += fetch_recordset( oss_vulnerability, md5, print_vulnerability_item, component);
 	}
 
 	char * aux = NULL;

From 950dcd26e46d07fc3bdbe0032a2c86c9ea06d01b Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Fri, 3 Jan 2025 00:03:22 +0100
Subject: [PATCH 12/19] remove match extensions flag

---
 inc/match_list.h |  2 +-
 inc/scanoss.h    |  1 -
 src/help.c       | 22 ++++++++++------------
 src/main.c       |  4 ----
 src/match.c      |  1 -
 5 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/inc/match_list.h b/inc/match_list.h
index 7d81be1..6fece78 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -78,7 +78,7 @@
 #define SCAN_MAX_SNIPPETS_DEFAULT 	1
 #define SCAN_MAX_COMPONENTS_DEFAULT 3
 
-#define MATCH_LIST_TOLERANCE 99.9
+#define MATCH_LIST_TOLERANCE 95
 typedef struct match_data_t match_data_t; /* Forward declaration */
 
 typedef enum
diff --git a/inc/scanoss.h b/inc/scanoss.h
index 6008b40..63646a4 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -114,7 +114,6 @@ typedef struct component_item
 
 extern long microseconds_start;
 extern int map_rec_len;
-extern bool match_extensions;
 
 /*component hint hold the last component matched/guessed */
 extern char * component_hint;
diff --git a/src/help.c b/src/help.c
index f36ebe5..15772c7 100644
--- a/src/help.c
+++ b/src/help.c
@@ -46,18 +46,16 @@ Results are displayed in JSON format through STDOUT.\n\
 Syntax: scanoss [parameters] [TARGET]\n\
 \n\
 Configuration:\n\
--w         Process TARGET as a .wfp file, regardless of its actual extension.\n\
--H         Enable High Precision Snippet Match mode (requires 'libhpsm.so' in the system).\n\
--e         Match only files with identical extensions as the scanned file (default: off).\n\
--M NUMBER  Search for up to NUMBER different components in each file (maximum: 9).\n\
--T NUMBER  Set snippet scanning tolerance percentage (default: 0.1).\n\
--s SBOM    Include assets from a JSON SBOM file (CycloneDX/SPDX2.2 format) in identification.\n\
--b SBOM    Exclude matches from assets listed in JSON SBOM file (CycloneDX/SPDX2.2 format).\n\
--B SBOM    Same as \"-b\" but with forced snippet scanning.\n\
--a SBOM    Show attribution notices for the provided SBOM.json file.\n\
--c HINT    Add a component HINT to guide scan results.\n\
--k KEY     Show contents of the specified KEY file from MZ sources archive.\n\
--l LICENSE Display OSADL metadata for the given SPDX license ID.\n\
+-w         Treats TARGET as a .wfp file regardless of the actual file extension.\n\
+-H         High Precision Snippet Match mode, 'libhpsm.so' must be present in the system.\n\
+-M NUMBER  Looks for NUMBER of different components in a file (MAX 9).\n\
+-s SBOM    Use assets specified in JSON SBOM (CycloneDX/SPDX2.2 JSON format) as input to identification.\n\
+-b SBOM    Ignore matches to assets specified in JSON SBOM (CycloneDX/SPDX2.2 JSON format).\n\
+-B SBOM    Same than \"-b\" but forcing snippet scan.\n\
+-a SBOM    Displays attribution notices for provided SBOM.json.\n\
+-c HINT    Provide a component HINT to influence scan results.\n\
+-k KEY     Displays contents of file KEY from MZ sources archive.\n\
+-l LICENSE Displays OSADL metadata for the provided SPDX license ID.\n\
 \n\
 Options:\n\
 -t  Run engine performance tests.\n\
diff --git a/src/main.c b/src/main.c
index 486ce6a..30b0f6b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -412,10 +412,6 @@ int main(int argc, char **argv)
 				exit(EXIT_SUCCESS);
 				break;
 
-			case 'e':
-				match_extensions = true;
-				break;
-
 			case 'q':
 				engine_flags = engine_flags_cmd_line;
 				debug_on = true;
diff --git a/src/match.c b/src/match.c
index ca82a33..a4456f3 100644
--- a/src/match.c
+++ b/src/match.c
@@ -51,7 +51,6 @@
 #include "health.h"
 
 const char *matchtypes[] = {"none", "file", "snippet", "binary"}; /** describe the availables kinds of match */
-bool match_extensions = false;								  /** global match extension flag */
 bool path_table_present = false;
 char *component_hint = NULL;
 

From 2967bd4bac2e17db7e75032eb4e913f51d3587e9 Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Fri, 3 Jan 2025 02:42:08 +0100
Subject: [PATCH 13/19] solve errors after rebase from main

---
 src/match.c         | 57 +++++++++++++++++----------------------------
 src/report.c        |  1 -
 src/util.c          |  4 +---
 src/vulnerability.c |  4 +---
 4 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/src/match.c b/src/match.c
index a4456f3..deb8fe2 100644
--- a/src/match.c
+++ b/src/match.c
@@ -406,46 +406,31 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 {
 	component_data_t *new_comp = NULL;
 
-	ldb_fetch_recordset(NULL, oss_url, url_id, false, get_oldest_url, (void *)url_rec);
-
-	/* Extract date from url_rec */
-	char date[MAX_ARGLN] = "0";
-	extract_csv(date, (char *)url_rec, 4, MAX_ARGLN);
-	/* Create a new component and fill it from the url record */
-	component_data_t *new_comp = calloc(1, sizeof(*new_comp));
-	bool result = fill_component(new_comp, url_id, path, (uint8_t *)url_rec);
-	if (result)
+	fetch_recordset(oss_url, url_id, get_oldest_url, (void *)&new_comp);
+	if (!new_comp)
+		return false;
+	fill_component_path(new_comp, path);
+	new_comp->file_md5_ref = component_list->match_ref->file_md5;
+	/* If the component is valid add it to the component list */
+	/* The component list is a fixed size list, of size 3 by default, this means the list will keep the free oldest components*/
+	/* The oldest component will be the first in the list, if two components have the same age the purl date will untie */
+	new_comp->file_path_ref = component_list->match_ref->scan_ower->file_path;
+	new_comp->path_rank = PATH_LEVEL_COMP_INIT_VALUE;
+	list_update_t r = component_list_update(component_list, new_comp, component_update);
+	if (r == LIST_ITEM_NOT_FOUND)
 	{
-		new_comp->file_md5_ref = component_list->match_ref->file_md5;
-		/* If the component is valid add it to the component list */
-		/* The component list is a fixed size list, of size 3 by default, this means the list will keep the free oldest components*/
-		/* The oldest component will be the first in the list, if two components have the same age the purl date will untie */
-		new_comp->identified = IDENTIFIED_NONE;
-		asset_declared(new_comp);
-		new_comp->file_path_ref = component_list->match_ref->scan_ower->file_path;
-		new_comp->path_rank = PATH_LEVEL_COMP_INIT_VALUE;
-		list_update_t r = component_list_update(component_list, new_comp, component_update);
-		if (r == LIST_ITEM_NOT_FOUND)
+		scanlog("--- new comp %s---\n", new_comp->component);
+		if (!component_list_add(component_list, new_comp, component_hint_date_comparation, true))
 		{
-			scanlog("--- new comp %s---\n", new_comp->component);
-			if (!component_list_add(component_list, new_comp, component_hint_date_comparation, true))
-			{
-				scanlog("component rejected: %s - %s\n", new_comp->purls[0], new_comp->release_date);
-				component_data_free(new_comp); /* Free if the componet was rejected */
-			}
-			else
-				scanlog("component accepted: %s - %s - pathrank: %d\n", new_comp->purls[0], new_comp->release_date, new_comp->path_rank);
-		}
-		else if (r == LIST_ITEM_UPDATE && component_list->headp.lh_first)
-		{
-			component_list_sort(component_list->headp.lh_first, component_hint_date_comparation);
+			scanlog("component rejected: %s - %s\n", new_comp->purls[0], new_comp->release_date);
+			component_data_free(new_comp); /* Free if the componet was rejected */
 		}
+		else
+			scanlog("component accepted: %s - %s - pathrank: %d\n", new_comp->purls[0], new_comp->release_date, new_comp->path_rank);
 	}
-	else
+	else if (r == LIST_ITEM_UPDATE && component_list->headp.lh_first)
 	{
-		char hex_url[MD5_LEN * 2 + 1];
-		ldb_bin_to_hex(new_comp->url_md5, MD5_LEN, hex_url);
-		scanlog("component accepted: %s@%s - pathrank: %d - %s - %s\n", new_comp->purls[0], new_comp->version, new_comp->path_rank, new_comp->file, hex_url);
+		component_list_sort(component_list->headp.lh_first, component_hint_date_comparation);
 	}
 
 	return true;
@@ -481,7 +466,7 @@ static char * path_query(uint8_t * file_id)
  * @return false
  */
 
-bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
+bool component_from_file(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
 {
 	/*Iterations must be doubled if high accuracy is enabled*/
 	int iteration_max = ((engine_flags & ENABLE_HIGH_ACCURACY) ? FETCH_MAX_FILES * 4 : FETCH_MAX_FILES);
diff --git a/src/report.c b/src/report.c
index 391e9e9..36409b0 100644
--- a/src/report.c
+++ b/src/report.c
@@ -238,7 +238,6 @@ bool print_json_component(component_data_t * component)
 	char url_id[oss_url.key_ln * 2 + 1];
     ldb_bin_to_hex(component->url_md5, oss_url.key_ln, url_id);
 	printf("\"url_hash\": \"%s\"", url_id);
-	free(url_id);
 
 	if (!(engine_flags & DISABLE_LICENSES))
 	{
diff --git a/src/util.c b/src/util.c
index be218e6..a162359 100644
--- a/src/util.c
+++ b/src/util.c
@@ -294,7 +294,7 @@ void free_and_null(void * pr)
     pr = NULL;
 }
 
-bool path_is_third_party(const char* path) 
+bool path_is_third_party(const char *path)
 {
     const char* patterns[] = {
         "third_party",
@@ -354,5 +354,3 @@ bool path_is_third_party(const char* path)
     
     return false;
 }
-
-
diff --git a/src/vulnerability.c b/src/vulnerability.c
index b7b3f16..b526ccd 100644
--- a/src/vulnerability.c
+++ b/src/vulnerability.c
@@ -252,11 +252,10 @@ void version_md5(uint8_t *out, char *vendor, char *component, char *version)
  * @param match match structure
  */
 int print_vulnerabilities(component_data_t *component)
-int print_vulnerabilities(component_data_t *component)
 {
 	if (!ldb_table_exists(oss_vulnerability.db, oss_vulnerability.table)) // skip purl if the table is not present
 		return 0;
-		return 0;
+
 	scanlog("Process vulnerabilities\n");
 	uint32_t crclist[CRC_LIST_LEN];
 	memset(crclist, 0, sizeof(crclist));
@@ -267,7 +266,6 @@ int print_vulnerabilities(component_data_t *component)
 	component->vulnerabilities = 0;
 	component->crclist = crclist;
 	int records = 0;
-	int records = 0;
 	/* Search for purl */
 	for (int i = 0; i < MAX_PURLS && component->purls[i]; i++)
 		records += fetch_recordset( oss_vulnerability, component->purls_md5[i], print_vulnerability_item, component);

From 119c49b52b71e5a9b8810849546a86197ac42103 Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Wed, 7 May 2025 04:31:52 +0200
Subject: [PATCH 14/19] add build project structure functionality

---
 inc/scanoss.h   |  3 ++-
 src/component.c |  2 +-
 src/main.c      | 10 +++++++
 src/pivot.c     | 70 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 src/pivot.c

diff --git a/inc/scanoss.h b/inc/scanoss.h
index 63646a4..59ad37c 100644
--- a/inc/scanoss.h
+++ b/inc/scanoss.h
@@ -122,6 +122,7 @@ extern char * component_hint;
 
 /* DB tables */
 extern struct ldb_table oss_url;
+extern struct ldb_table oss_pivot;
 extern struct ldb_table oss_file;
 extern struct ldb_table oss_path;
 extern struct ldb_table oss_wfp;
@@ -156,5 +157,5 @@ void ldb_get_first_record(struct ldb_table table, uint8_t* key, void *void_ptr);
 int binary_scan(char * bfp);
 
 uint32_t fetch_recordset(struct ldb_table table, uint8_t *key, ldb_record_handler_t handler, void *ptr);
-
+void get_project_files(char * url_key);
 #endif
diff --git a/src/component.c b/src/component.c
index 5c20c09..ab57494 100644
--- a/src/component.c
+++ b/src/component.c
@@ -286,7 +286,7 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
 	{
 		component->purls[0] = strdup(purl);
 		component->purls_md5[0] = malloc(MD5_LEN);
-		MD5((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
+		oss_purl.hash_calc( (unsigned char *) component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
 	}
 	component->age = -1;
 	return true;
diff --git a/src/main.c b/src/main.c
index 30b0f6b..5328ec9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -48,6 +48,7 @@
 #include <dlfcn.h>
 
 struct ldb_table oss_url;
+struct ldb_table oss_pivot;
 struct ldb_table oss_file;
 struct ldb_table oss_path;
 struct ldb_table oss_wfp;
@@ -199,6 +200,10 @@ void initialize_ldb_tables(char *name)
 	oss_notices = ldb_read_cfg(dbtable);
 	oss_notices.hash_calc = hash_function_select(oss_notices.key_ln);
 
+	snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "pivot");
+	oss_pivot = ldb_read_cfg(dbtable);
+	oss_pivot.hash_calc = hash_function_select(oss_pivot.key_ln);
+
 	kb_version_get();
 	osadl_load_file();
 
@@ -401,6 +406,11 @@ int main(int argc, char **argv)
 				scan_benchmark();
 				exit(EXIT_SUCCESS);
 				break;
+			case 'p':
+				initialize_ldb_tables(ldb_db_name);
+				get_project_files(optarg);
+				exit(EXIT_SUCCESS);
+				break;
 
 			case 'v':
 				printf("scanoss-%s\n", SCANOSS_VERSION);
diff --git a/src/pivot.c b/src/pivot.c
new file mode 100644
index 0000000..be26b89
--- /dev/null
+++ b/src/pivot.c
@@ -0,0 +1,70 @@
+#include "scanoss.h"
+#include <stdio.h>
+#include "decrypt.h"
+struct out_buffer_s {
+	char * buffer;
+	int pos;
+};
+
+struct get_path_s {
+	char * path;
+	uint8_t * url_key;
+};
+
+bool get_path(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+{
+	char * path = decrypt_data(data, datalen, *table, key, subkey);
+	if (!path) {
+		return false;
+	}
+	char ** out = (char**) ptr;
+	*out = path;
+	return true;
+}
+
+bool get_file_path_hash(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+{
+	if (datalen < table->key_ln)
+		return false;
+	struct get_path_s * get_path_url = ptr;
+	//if the url key is not the same is not a useful match
+	if (memcmp(get_path_url->url_key, data, table->key_ln))
+		return false;
+
+	uint8_t * path_key = &data[table->key_ln];
+	char * path = NULL;
+	fetch_recordset(oss_path, path_key, get_path, (void *)&path);
+	get_path_url->path = path;
+	return true;
+}
+
+
+bool get_project_hashes(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
+{
+	uint8_t * file_key = data;
+	struct get_path_s get_path = {.url_key = key, .path = NULL};
+	char key_hex[17];
+	ldb_bin_to_hex(file_key,table->key_ln,key_hex);
+
+	fetch_recordset(oss_file, file_key, get_file_path_hash, (void *)&get_path);
+	char * output = ptr;
+	char * line = NULL;
+	if (get_path.path)
+	{
+		asprintf(&line, "%s,%s\n", key_hex, get_path.path);
+		strcat(output, line);
+		free(line);
+	}
+
+	free(get_path.path);
+	return false;
+}
+
+void get_project_files(char * url_key_hex)
+{
+	uint8_t url_key[8];
+	ldb_hex_to_bin(url_key_hex, 16, url_key);
+	char * out = calloc(1,1024*1024*500);
+	fetch_recordset(oss_pivot, url_key, get_project_hashes, (void *)out);
+	printf("%s", out);
+}
\ No newline at end of file

From fb2bec89deef1bdb78f487efd5e9bf67f6369922 Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Wed, 7 May 2025 12:53:07 +0200
Subject: [PATCH 15/19] fix rebase issues

---
 inc/match_list.h |  2 +-
 src/report.c     |  3 +++
 src/snippets.c   | 53 +++++++++++-------------------------------------
 3 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/inc/match_list.h b/inc/match_list.h
index 6fece78..7d81be1 100644
--- a/inc/match_list.h
+++ b/inc/match_list.h
@@ -78,7 +78,7 @@
 #define SCAN_MAX_SNIPPETS_DEFAULT 	1
 #define SCAN_MAX_COMPONENTS_DEFAULT 3
 
-#define MATCH_LIST_TOLERANCE 95
+#define MATCH_LIST_TOLERANCE 99.9
 typedef struct match_data_t match_data_t; /* Forward declaration */
 
 typedef enum
diff --git a/src/report.c b/src/report.c
index 36409b0..292b609 100644
--- a/src/report.c
+++ b/src/report.c
@@ -336,6 +336,9 @@ bool print_json_match(struct match_data_t * match)
 		else
 			printf(",\"file_url\": \"%s\"", match->component_list.headp.lh_first->component->url);
 	}
+	else //return an empty string
+		printf(",\"file_url\": \" \"");
+
 	
 	if (!(engine_flags & DISABLE_QUALITY))
 	{
diff --git a/src/snippets.c b/src/snippets.c
index 870669e..c5ca57a 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -144,17 +144,20 @@ void biggest_snippet(scan_data_t *scan)
 				continue;
 			}
 
-			int hits = compile_ranges(match_new);
-			if (hits < min_match_hits)
+			int matched_lines = compile_ranges(match_new);
+			if (matched_lines < min_match_lines) {
+				match_data_free(match_new); 
 				continue;
+			}
 
-			float percent = (hits * 100) / match_new->scan_ower->total_lines;
+			float percent = (matched_lines * 100) / match_new->scan_ower->total_lines;
 			int matched_percent = floor(percent);
 			if (matched_percent > 99)
 				matched_percent = 99;
 			if (matched_percent < 1)
 				matched_percent = 1;
 			asprintf(&match_new->matched_percent, "%u%%", matched_percent);
+			match_new->lines_matched = matched_lines;
 			//match_new->hits = hits;
 
 			do /*Check if there is already a list for this line ranges */
@@ -201,6 +204,7 @@ void biggest_snippet(scan_data_t *scan)
 	}
 }
 
+
 /**
  * @brief Handler function to collect all file ids.
  * Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
@@ -429,37 +433,10 @@ uint32_t compile_ranges(match_data_t *match)
 	}
 
 	int hits = 0;
-	/* Revise hits and decrease if needed */
-	for (uint32_t i = 0; i < match->matchmap_reg->ranges_number; i++)
-	{
-		long from =  match->matchmap_reg->range[i].from;
-		long to = match->matchmap_reg->range[i].to;
-		long delta = to - from;
-
-		if (to < 1)
-			break;
-
-		/* Ranges to be ignored (under min_match_lines) should decrease hits counter */
-		if (delta < min_match_lines)
-		{
-			/* Single-line range decreases by 1, otherwise decrease by 2 (from and to) */
-			reported_hits -= ((delta == 0) ? 1 : 2);
-		}
-
-		/* Exit if hits is below two */
-		if (reported_hits < min_match_hits)
-		{
-			scanlog("Discarted ranges brings hits count to %u (MIN MATCH HITS: %d)\n", reported_hits, min_match_hits);
-			return reported_hits;
-		}
-
-		//scanlog("compile_ranges #%d = %ld to %ld - OSS from: %d\n", i, from, to, match->matchmap_reg->range[i].oss_line);
-	}
-	
 	/* Add tolerances and assemble line ranges */
 	ranges_sort(match->matchmap_reg->range, match->matchmap_reg->ranges_number);
 
-	/*if (debug_on)
+	if (debug_on)
 	{
 		scanlog("Accepted ranges (min lines range = %d):\n", min_match_lines);
 		for (uint32_t i = 0; i < match->matchmap_reg->ranges_number; i++)
@@ -468,7 +445,7 @@ uint32_t compile_ranges(match_data_t *match)
 				scanlog("	%d = %ld to %ld - OSS from: %d\n", i, match->matchmap_reg->range[i].from,match->matchmap_reg->range[i].to, 
 																match->matchmap_reg->range[i].oss_line);
 		}
-	}*/
+	}
 
 	matchmap_range *ranges = ranges_join_overlapping(match->matchmap_reg->range,  match->matchmap_reg->ranges_number);
 	
@@ -483,7 +460,7 @@ uint32_t compile_ranges(match_data_t *match)
 		}
 	}
 		
-	/*if (debug_on)
+	if (debug_on)
 	{
 		scanlog("Final ranges:\n");
 		for (uint32_t i = 0; i < MATCHMAP_RANGES; i++)
@@ -491,7 +468,7 @@ uint32_t compile_ranges(match_data_t *match)
 		if ( ranges[i].from && ranges[i].to)
 				scanlog("	%d = %ld to %ld - OSS from: %d\n", i, ranges[i].from, ranges[i].to, ranges[i].oss_line);
 		}
-	}*/
+	}
 	hits = ranges_assemble(ranges, line_ranges, oss_ranges);
 	match->line_ranges = strdup(line_ranges);
 	match->oss_ranges = strdup(oss_ranges);
@@ -913,13 +890,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
 						break;
 					}
 
-					int pos = add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], last_sector_aux, &sector_max, &scan->matchmap_rank_by_sector[sector]);
-					/*if (pos >= 0 && debug_on)
-					{
-						char key_hex[(MD5_LEN+2)*2 + 1];
-						ldb_bin_to_hex(&md5s[wfp_p], MD5_LEN+2, key_hex);
-						printf("%02x%02x%02x%02x,%s\n", map[i].wfp[0], map[i].wfp[1], map[i].wfp[2], map[i].wfp[3], key_hex);
-					}*/
+					add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], last_sector_aux, &sector_max, &scan->matchmap_rank_by_sector[sector]);
 					
 				}
 			}	

From 034701759e595f026662624d612d5e20fb4557e1 Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Wed, 2 Jul 2025 00:05:33 +0200
Subject: [PATCH 16/19] add p flag

---
 src/main.c     | 2 +-
 src/snippets.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main.c b/src/main.c
index 5328ec9..37c4c6d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -327,7 +327,7 @@ int main(int argc, char **argv)
 	int option;
 	bool invalid_argument = false;
 	char * ldb_db_name = NULL;
-	while ((option = getopt(argc, argv, ":T:s:b:B:c:k:a:F:l:n:M:N:wtvhedqH")) != -1)
+	while ((option = getopt(argc, argv, ":p:T:s:b:B:c:k:a:F:l:n:M:N:wtvhedqH")) != -1)
 	{
 		/* Check valid alpha is entered */
 		if (optarg)
diff --git a/src/snippets.c b/src/snippets.c
index c5ca57a..3109445 100644
--- a/src/snippets.c
+++ b/src/snippets.c
@@ -701,6 +701,8 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
  */
 match_t ldb_scan_snippets(scan_data_t *scan)
 {
+	if (!ldb_table_exists(oss_wfp.db, oss_wfp.table)) //skip purl if the table is not present
+		return MATCH_NONE;
 
 	scanlog("ldb_scan_snippets\n");
 	if (!scan->hash_count)

From 41feee1a110fc3b930a0f85c99d2137687f1defe Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Tue, 5 Aug 2025 13:51:06 +0200
Subject: [PATCH 17/19] accept more than one path for file md5 when rebuilding
 a project

---
 src/pivot.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/pivot.c b/src/pivot.c
index be26b89..39b1fbb 100644
--- a/src/pivot.c
+++ b/src/pivot.c
@@ -7,8 +7,9 @@ struct out_buffer_s {
 };
 
 struct get_path_s {
-	char * path;
+	char **paths;
 	uint8_t * url_key;
+	int paths_index;
 };
 
 bool get_path(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
@@ -34,7 +35,9 @@ bool get_file_path_hash(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
 	uint8_t * path_key = &data[table->key_ln];
 	char * path = NULL;
 	fetch_recordset(oss_path, path_key, get_path, (void *)&path);
-	get_path_url->path = path;
+	get_path_url->paths = realloc(get_path_url->paths, (get_path_url->paths_index + 1) * sizeof(char*));
+	get_path_url->paths[get_path_url->paths_index] = path;
+	get_path_url->paths_index++;
 	return true;
 }
 
@@ -42,21 +45,22 @@ bool get_file_path_hash(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
 bool get_project_hashes(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
 	uint8_t * file_key = data;
-	struct get_path_s get_path = {.url_key = key, .path = NULL};
+	struct get_path_s get_path = {.url_key = key, .paths = NULL, .paths_index = 0};
 	char key_hex[17];
 	ldb_bin_to_hex(file_key,table->key_ln,key_hex);
 
 	fetch_recordset(oss_file, file_key, get_file_path_hash, (void *)&get_path);
 	char * output = ptr;
 	char * line = NULL;
-	if (get_path.path)
+	for (int i = 0; i < get_path.paths_index; i++)
 	{
-		asprintf(&line, "%s,%s\n", key_hex, get_path.path);
+		asprintf(&line, "%s,%s\n", key_hex, get_path.paths[i]);
+		free(get_path.paths[i]);
 		strcat(output, line);
 		free(line);
 	}
 
-	free(get_path.path);
+	free(get_path.paths);
 	return false;
 }
 

From 2292e5a8f4f5f6223ec06e6cbb749f215389fe27 Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Wed, 6 Aug 2025 02:57:31 +0200
Subject: [PATCH 18/19] support MD5 as url hash. Update help

---
 inc/file.h  |  2 +-
 inc/match.h |  1 -
 src/file.c  | 29 +++++++++++++++++++++++++++++
 src/help.c  |  1 +
 src/match.c | 18 ------------------
 src/pivot.c | 42 +++++++++++++++++++++++++-----------------
 6 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/inc/file.h b/inc/file.h
index 0c9d35f..0c06300 100644
--- a/inc/file.h
+++ b/inc/file.h
@@ -12,5 +12,5 @@ bool is_dir(char *path);
 void get_file_md5(char *filepath, uint8_t *md5_result);
 bool count_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
 char *get_file_extension(uint8_t *md5);
-
+char * path_query(uint8_t * file_id);
 #endif
diff --git a/inc/match.h b/inc/match.h
index d36c8cf..af920dc 100644
--- a/inc/match.h
+++ b/inc/match.h
@@ -36,5 +36,4 @@ void compile_matches(scan_data_t *scan);
 match_list_t * match_select_m_best(scan_data_t * scan);
 match_list_t * match_select_m_component_best(scan_data_t * scan);
 bool component_from_file(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr);
-
 #endif
diff --git a/src/file.c b/src/file.c
index 35a9e0d..7525237 100644
--- a/src/file.c
+++ b/src/file.c
@@ -260,3 +260,32 @@ char *get_file_extension(uint8_t *md5)
 	fetch_recordset(oss_file, md5, get_first_file, out);
 	return out;
 }
+
+static bool path_query_handler(struct ldb_table * table, uint8_t * key, uint8_t * subkey, uint8_t * data, uint32_t datalen, int record_number, void * ptr)
+{
+	char **path = ptr;
+	/* Decrypt data */
+	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
+	if (!decrypted || !*decrypted)
+		return false;
+	
+	*path = decrypted;
+	return true;
+}
+/**
+ * @brief Get the file path from the path table.
+ * @param md5 input path md5
+ * @return string with the path
+ */
+char * path_query(uint8_t * file_id)
+{
+	char * path = NULL;
+	if (!path_table_present)
+	{
+		scanlog("path_query: path table must be present to use this query\n");
+		return NULL;
+	}
+
+	fetch_recordset(oss_path, file_id, path_query_handler, (void *) &path);
+	return path;
+}
\ No newline at end of file
diff --git a/src/help.c b/src/help.c
index 15772c7..712fc7c 100644
--- a/src/help.c
+++ b/src/help.c
@@ -56,6 +56,7 @@ Configuration:\n\
 -c HINT    Provide a component HINT to influence scan results.\n\
 -k KEY     Displays contents of file KEY from MZ sources archive.\n\
 -l LICENSE Displays OSADL metadata for the provided SPDX license ID.\n\
+-p URL_HASH Returns a list with the md5 and path for each project file (pivot table is requeried).\n\
 \n\
 Options:\n\
 -t  Run engine performance tests.\n\
diff --git a/src/match.c b/src/match.c
index deb8fe2..b2de677 100644
--- a/src/match.c
+++ b/src/match.c
@@ -436,24 +436,6 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 	return true;
 }
 
-bool path_query_handler(struct ldb_table * table, uint8_t * key, uint8_t * subkey, uint8_t * data, uint32_t datalen, int record_number, void * ptr)
-{
-	char **path = ptr;
-	/* Decrypt data */
-	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
-	if (!decrypted || !*decrypted)
-		return false;
-	
-	*path = decrypted;
-	return true;
-}
-static char * path_query(uint8_t * file_id)
-{
-	char * path = NULL;
-	fetch_recordset(oss_path, file_id, path_query_handler, (void *) &path);
-	return path;
-}
-
 /**
  * @brief Load componentes for a match processing the file recordset list.
  * For each file in the recordset we will query for the oldest url in the url table.
diff --git a/src/pivot.c b/src/pivot.c
index 39b1fbb..ecc2584 100644
--- a/src/pivot.c
+++ b/src/pivot.c
@@ -1,6 +1,8 @@
 #include "scanoss.h"
 #include <stdio.h>
 #include "decrypt.h"
+#include "debug.h"
+#include "file.h"
 struct out_buffer_s {
 	char * buffer;
 	int pos;
@@ -12,16 +14,6 @@ struct get_path_s {
 	int paths_index;
 };
 
-bool get_path(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
-{
-	char * path = decrypt_data(data, datalen, *table, key, subkey);
-	if (!path) {
-		return false;
-	}
-	char ** out = (char**) ptr;
-	*out = path;
-	return true;
-}
 
 bool get_file_path_hash(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
 {
@@ -32,11 +24,21 @@ bool get_file_path_hash(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
 	if (memcmp(get_path_url->url_key, data, table->key_ln))
 		return false;
 
-	uint8_t * path_key = &data[table->key_ln];
-	char * path = NULL;
-	fetch_recordset(oss_path, path_key, get_path, (void *)&path);
+	char * decrypted = NULL;
+	
+	if (path_table_present)
+	{
+		decrypted = path_query(&data[table->key_ln]);
+	}
+	else
+	{
+		/* Decrypt data */
+		decrypted = decrypt_data(data, datalen, *table, key, subkey);
+	}
+	
+	
 	get_path_url->paths = realloc(get_path_url->paths, (get_path_url->paths_index + 1) * sizeof(char*));
-	get_path_url->paths[get_path_url->paths_index] = path;
+	get_path_url->paths[get_path_url->paths_index] = decrypted;
 	get_path_url->paths_index++;
 	return true;
 }
@@ -46,7 +48,7 @@ bool get_project_hashes(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
 {
 	uint8_t * file_key = data;
 	struct get_path_s get_path = {.url_key = key, .paths = NULL, .paths_index = 0};
-	char key_hex[17];
+	char key_hex[oss_url.key_ln*2+1];
 	ldb_bin_to_hex(file_key,table->key_ln,key_hex);
 
 	fetch_recordset(oss_file, file_key, get_file_path_hash, (void *)&get_path);
@@ -66,8 +68,14 @@ bool get_project_hashes(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
 
 void get_project_files(char * url_key_hex)
 {
-	uint8_t url_key[8];
-	ldb_hex_to_bin(url_key_hex, 16, url_key);
+	uint8_t url_key[oss_url.key_ln];
+	scanlog("Reconstructing project structure for url %s\n",url_key_hex);
+	if (!ldb_table_exists(oss_pivot.db, oss_pivot.table))
+	{
+		printf("the pivot table must be present to use this functionality\n");
+		exit(EXIT_FAILURE);
+	}
+	ldb_hex_to_bin(url_key_hex, oss_url.key_ln*2, url_key);
 	char * out = calloc(1,1024*1024*500);
 	fetch_recordset(oss_pivot, url_key, get_project_hashes, (void *)out);
 	printf("%s", out);

From c9ea2323246bdf77d621acc200c10397a47bbeff Mon Sep 17 00:00:00 2001
From: coresoftware dev <coredev@scanoss.com>
Date: Wed, 6 Aug 2025 04:03:35 +0200
Subject: [PATCH 19/19] remove duplicated code

---
 src/match.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/src/match.c b/src/match.c
index f0b30ee..83baefa 100644
--- a/src/match.c
+++ b/src/match.c
@@ -436,24 +436,6 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
 	return true;
 }
 
-bool path_query_handler(struct ldb_table * table, uint8_t * key, uint8_t * subkey, uint8_t * data, uint32_t datalen, int record_number, void * ptr)
-{
-	char **path = ptr;
-	/* Decrypt data */
-	char * decrypted = decrypt_data(data, datalen, *table, key, subkey);
-	if (!decrypted || !*decrypted)
-		return false;
-	
-	*path = decrypted;
-	return true;
-}
-static char * path_query(uint8_t * file_id)
-{
-	char * path = NULL;
-	fetch_recordset(oss_path, file_id, path_query_handler, (void *) &path);
-	return path;
-}
-
 /**
  * @brief Load componentes for a match processing the file recordset list.
  * For each file in the recordset we will query for the oldest url in the url table.