diff --git a/inc/component.h b/inc/component.h index 7e9718a..3a3c122 100644 --- a/inc/component.h +++ b/inc/component.h @@ -4,7 +4,15 @@ #include "scanoss.h" #define COMPONENT_DEFAULT_RANK 999 //default rank for components without rank information +#define COMPONENT_RANK_SELECTION_MAX 8 //max rank to be considered in component selection + extern int component_rank_max; + +// Third-party confidence thresholds for path_is_third_party() +#define TP_THRESHOLD_HIGH 12 // 0-11: high confidence third-party (node_modules, vendor, etc.) +#define TP_THRESHOLD_MED 27 // 12-26: medium confidence (external, dependencies, etc.) + // 27-31: medium-low confidence (dist, contrib, etc.) + // 32+: not third-party /** * @brief Component object definition. * @@ -50,6 +58,7 @@ typedef struct component_data_t int url_stats[5]; /* url stats: quantity of file */ int health_stats[3]; /* health stats: forks, watchers, contributors */ int rank; /* purl ranking - optional*/ + int path_depth; /* depth of the matched file path*/ } component_data_t; component_data_t * component_init(void); diff --git a/inc/scanoss.h b/inc/scanoss.h index 6b8bb8d..1dfbf46 100644 --- a/inc/scanoss.h +++ b/inc/scanoss.h @@ -39,7 +39,7 @@ #define WFP_LN 4 #define WFP_REC_LN 18 -#define SCANOSS_VERSION "5.4.16" +#define SCANOSS_VERSION "5.4.17" /* Log files */ #define SCAN_LOG "/tmp/scanoss_scan.log" diff --git a/inc/util.h b/inc/util.h index a271ed3..6445c0e 100644 --- a/inc/util.h +++ b/inc/util.h @@ -1,9 +1,10 @@ #ifndef __UTIL_H #define __UTIL_H - + #include #include #include +#include "component.h" /* Reverse an uint32 number */ void uint32_reverse(uint8_t *data); @@ -62,6 +63,12 @@ char * str_cat_realloc(char **a, char * b); void free_and_null(void ** pr); -bool path_is_third_party(const char* path); +int path_is_third_party(const char* path); + +/* Counts the number of '/' characters in a path string */ +int path_depth(char* path); + +/* Detects binary file type and validates if PURL matches file extension */ +bool binary_file_to_purl(component_data_t *comp); #endif diff --git a/inc/versions.h b/inc/versions.h index cded782..ad19116 100644 --- a/inc/versions.h +++ b/inc/versions.h @@ -9,7 +9,7 @@ typedef struct release_version char date[MAX_FIELD_LN]; } release_version; -void normalise_version(char *version, char *component); +char* normalise_version(const char* input_string, char* result); void add_versions(component_data_t *component, file_recordset *files, uint32_t records); void get_purl_version(release_version *release, char *purl, uint8_t *file_id); char * version_cleanup(char * version, char * component); diff --git a/src/component.c b/src/component.c index 1a274c8..40b4eeb 100644 --- a/src/component.c +++ b/src/component.c @@ -298,6 +298,8 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa } else component->rank = COMPONENT_DEFAULT_RANK; + + component->path_depth = path_depth(component->file); return true; } diff --git a/src/license.c b/src/license.c index e202594..0457a3a 100644 --- a/src/license.c +++ b/src/license.c @@ -54,12 +54,38 @@ const char *license_sources[] = {"component_declared", "file_spdx_tag", "file_header", "license_file", "scancode", "scancode-file", "osselot"}; bool full_license_report = false; -struct licenses_s + +struct license_list { - char **license_by_type; - uint32_t * crclist; + char **licenses; + int count; }; +bool license_add_to_list(struct license_list * ptr, char * license) +{ + if (!ptr || !license || strlen(license) < 2) + return false; + ptr->licenses = realloc(ptr->licenses, sizeof(char *) * (ptr->count + 1)); + if (!ptr->licenses) + return false; + ptr->licenses[ptr->count] = strdup(license); + ptr->count++; + return true; +} + +void license_free_list(struct license_list * ptr) +{ + if (!ptr || !ptr->licenses) + return; + for (int i = 0; i < ptr->count; i++) + { + free(ptr->licenses[i]); + } + free(ptr->licenses); + ptr->licenses = NULL; + ptr->count = 0; +} + /** * @brief Remove invalid characters from a license name * @param license license string @@ -249,12 +275,12 @@ static char *split_in_json_array(uint32_t *crclist, char *buffer, char *license, return buffer; } -void license_to_json(uint32_t *crclist, char *buffer, char *license, int src, bool *first_record) +char * license_to_json(uint32_t *crclist, char *buffer, char *license, int src, bool *first_record) { if (!strchr(license, '/')) - json_from_license(crclist, buffer, license, src, first_record); + return json_from_license(crclist, buffer, license, src, first_record); else - split_in_json_array(crclist, buffer, license, src, first_record); + return split_in_json_array(crclist, buffer, license, src, first_record); } /** @@ -292,7 +318,7 @@ bool get_first_license_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_ */ bool print_licenses_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr) { - struct licenses_s * license_results = ptr; + struct license_list * licenses = ptr; if (!datalen) return false; @@ -312,16 +338,10 @@ bool print_licenses_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t * int src = atoi(source); scanlog("Fetched license %s\n", license); - char result[MAX_FIELD_LN * 10] = "\0"; - int len = 0; if (strlen(license) > 2 && (src < (sizeof(license_sources) / sizeof(license_sources[0])))) - { - bool first_record = !(license_results->license_by_type[src] && *license_results->license_by_type[src]); - license_to_json(license_results->crclist, result + len, license, src, &first_record); - str_cat_realloc(&license_results->license_by_type[src], result); + license_add_to_list(&licenses[src], license); - } free(source); free(license); @@ -347,22 +367,18 @@ void print_licenses(component_data_t *comp) /* CRC list (used to avoid duplicates) */ uint32_t crclist[CRC_LIST_LEN]; memset(crclist, 0, sizeof(crclist)); - comp->crclist = crclist; uint32_t records = 0; comp->license_text = NULL; int license_types = sizeof(license_sources) / sizeof(license_sources[0]); - struct licenses_s license_result = { .crclist = crclist, .license_by_type = calloc(license_types, sizeof(char *)) }; + struct license_list licenses_by_type[license_types]; + memset(licenses_by_type, 0, sizeof(licenses_by_type)); /* Print URL license */ if (comp->license && strlen(comp->license) > 2) { - bool first_record = true; - license_result.license_by_type[0] = calloc(MAX_FIELD_LN * 10, 1); - license_to_json(crclist, license_result.license_by_type[0], comp->license, 0, &first_record); + license_add_to_list(&licenses_by_type[0], comp->license); scanlog("License present in URL table"); - /* Add license to CRC list (to avoid duplicates) */ - add_CRC(license_result.crclist, string_crc32c(comp->license)); } else { @@ -370,10 +386,10 @@ void print_licenses(component_data_t *comp) } /* Look for component or file license */ - records = ldb_fetch_recordset(NULL, oss_license, comp->url_md5, false, print_licenses_item, &license_result); + records = ldb_fetch_recordset(NULL, oss_license, comp->url_md5, false, print_licenses_item, &licenses_by_type); scanlog("License for url_id license returns %d hits\n", records); - records = ldb_fetch_recordset(NULL, oss_license, comp->file_md5_ref, false, print_licenses_item, &license_result); + records = ldb_fetch_recordset(NULL, oss_license, comp->file_md5_ref, false, print_licenses_item, &licenses_by_type); scanlog("License for file_id license returns %d hits\n", records); for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++) { @@ -381,13 +397,13 @@ void print_licenses(component_data_t *comp) uint8_t purlversion_md5[MD5_LEN]; purl_version_md5(purlversion_md5, comp->purls[i], comp->version); - records = ldb_fetch_recordset(NULL, oss_license, purlversion_md5, false, print_licenses_item, &license_result); + records = ldb_fetch_recordset(NULL, oss_license, purlversion_md5, false, print_licenses_item, &licenses_by_type); scanlog("License for %s@%s license returns %d hits\n", comp->purls[i], comp->version, records); if (records) break; - records = ldb_fetch_recordset(NULL, oss_license, comp->purls_md5[i], false, print_licenses_item, &license_result); + records = ldb_fetch_recordset(NULL, oss_license, comp->purls_md5[i], false, print_licenses_item, &licenses_by_type); scanlog("License for %s license returns %d hits\n", comp->purls[i], records); if (records) @@ -396,35 +412,33 @@ void print_licenses(component_data_t *comp) /* Open licenses structure */ char * result = calloc(MAX_FIELD_LN * 100, 1); + char * buffer = result; int len = 0; len += sprintf(result + len, "\"licenses\": ["); + buffer = result + len; bool first = true; - if (comp->license_text) - { - len += sprintf(result + len, "%s", comp->license_text); - free(comp->license_text); - first = false; - } - for (int i = 0; i < license_types; i++) { - if (license_result.license_by_type[i] && *license_result.license_by_type[i]) + if (licenses_by_type[i].count > 0) { - if (!first) + if (i > 3 && !full_license_report) + break; + for (int j = 0; j < licenses_by_type[i].count; j++) { - if (i > 3 && !full_license_report) - break; - len += sprintf(result + len, ","); + buffer = license_to_json(crclist, buffer, licenses_by_type[i].licenses[j], i, &first); } - first = false; - len += sprintf(result + len, "%s", license_result.license_by_type[i]); - free(license_result.license_by_type[i]); } } - asprintf(&comp->license_text, "%s]", result); - free(license_result.license_by_type); - free(result); + len = buffer - result; + len += sprintf(result + len, "]"); + comp->license_text = result; + + /* Free all license lists */ + for (int i = 0; i < license_types; i++) + { + license_free_list(&licenses_by_type[i]); + } } diff --git a/src/match.c b/src/match.c index 2c28459..1b1a41d 100644 --- a/src/match.c +++ b/src/match.c @@ -320,28 +320,59 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_ return false; if (!*a->release_date) return true; - - if (!path_is_third_party(a->file) && path_is_third_party(b->file)) + + // Third-party path evaluation + int tp_a = path_is_third_party(a->file); + int tp_b = path_is_third_party(b->file); + + if (tp_a > tp_b) { - scanlog("Component rejected by third party filter\n"); + scanlog("Component rejected by third party path filter (%s=%d > %s=%d)\n", a->purls[0], tp_a, b->purls[0], tp_b); return false; } - - //lower rank selection logic - if (b->rank < COMPONENT_DEFAULT_RANK) + else if (tp_a < tp_b) + { + scanlog("Component accepted by third party path filter (%s=%d < %s=%d)\n", a->purls[0], tp_a, b->purls[0], tp_b); + return true; + } + //when the url ranking is enabled + if (b->rank < COMPONENT_DEFAULT_RANK || a->rank < COMPONENT_DEFAULT_RANK) { - if (b->rank < a->rank) + //shorter path lenght are prefered + if (b->rank < a->rank &&b->path_depth < a->path_depth/2) + return true; + else if (b->rank > a->rank && a->path_depth < b->path_depth/2) + return false; + + bool good_purl_a = binary_file_to_purl(a); + bool good_purl_b = binary_file_to_purl(b); + if (good_purl_b && !good_purl_a) { - scanlog("%s wins %s by rank %d/%d\n", b->purls[0], a->purls[0], b->rank, a->rank); + scanlog("Component %s prefered over %s by binary purl match\n", b->purls[0], a->purls[0]); return true; } - else if (b->rank > a->rank) + else if (good_purl_a && !good_purl_b) { - scanlog("%s rejected by rank %d\n", b->purls[0], b->rank); + scanlog("Component %s rejected by binary purl match\n", b->purls[0]); return false; } - } + //lower rank selection logic + if (b->rank < COMPONENT_RANK_SELECTION_MAX && b->path_depth <= a->path_depth) + { + scanlog("path lenght: %s - %d vs %s - %d\n", b->file, b->path_depth, a->file, a->path_depth); + if (b->rank < a->rank) + { + scanlog("%s wins %s by rank %d/%d\n", b->purls[0], a->purls[0], b->rank, a->rank); + return true; + } + else if (b->rank > a->rank) + { + scanlog("%s rejected by rank %d\n", b->purls[0], b->rank); + return false; + } + } + } /*if the relese date is the same untie with the component age (purl)*/ if (!strcmp(b->release_date, a->release_date)) { @@ -404,6 +435,7 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_ /*select the oldest release date */ if (strcmp(b->release_date, a->release_date) < 0) { + scanlog("Component %s prefered over %s by release date\n", b->purls[0], a->purls[0]); return true; } @@ -680,12 +712,14 @@ void match_select_best(scan_data_t *scan) continue; component_data_t * match_component = match->component_list.headp.lh_first->component; - scanlog("%s\n",match_component->purls[0]); - match_data_t * best_match = scan->matches_list_array[i]->best_match; component_data_t * best_match_component = best_match->component_list.headp.lh_first->component; - if (path_is_third_party(match_component->file)) + scanlog("Current purl %s - current best %s\n",match_component->purls[0], best_match_component->purls[0]); + if (match_component == best_match_component) + continue; + + if (path_is_third_party(match_component->file) < path_is_third_party(best_match_component->file) || !strcmp(match_component->release_date, "9999-99-99")) continue; scanlog("%s - %s - %d - %d VS %s - %s - %d - %d\n", @@ -693,10 +727,17 @@ void match_select_best(scan_data_t *scan) best_match_component->release_date, scan->matches_list_array[i]->best_match->hits,best_match_component->rank, match_component->purls[0], match_component->release_date, item->match->hits, match_component->rank); + + if (best_match_component->identified < match_component->identified) + { + scanlog("Replacing best match for an identified component\n"); + scan->matches_list_array[i]->best_match = item->match; + continue; + } //If the best match is not good or is not identified be prefer the candidate. if ((!best_match_component->identified && match_component->identified) || - (path_is_third_party(best_match_component->file))) + (path_is_third_party(best_match_component->file) < path_is_third_party(match_component->file))) { scanlog("Replacing best match for a prefered component\n"); scan->matches_list_array[i]->best_match = item->match; diff --git a/src/scan.c b/src/scan.c index b2b858a..0123ab0 100644 --- a/src/scan.c +++ b/src/scan.c @@ -291,7 +291,17 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components) rec = (uint8_t*) strdup(line + tagln + (MD5_LEN * 2) + 1); char * target = field_n(2, (char *)rec); - + + /* Validate that the WFP file has the correct format */ + if (!target) + { + fprintf(stderr, "Error: Malformed WFP file. Missing target field in line: %s\n", line); + fprintf(stderr, "Expected format: file=,,\n"); + free(rec); + free(hexmd5); + exit(EXIT_FAILURE); + } + /*Init a new scan object for the next file to be scanned */ scan = scan_data_init(target, scan_max_snippets, scan_max_components); strcpy(scan->source_md5, tmp_md5_hex); diff --git a/src/url.c b/src/url.c index f881cdd..027cd88 100644 --- a/src/url.c +++ b/src/url.c @@ -363,7 +363,7 @@ bool get_oldest_url(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, else if(comp->identified == comp_oldest->identified) { - if (comp->rank < comp_oldest->rank) //lowest rank is 1. + if (comp->rank < comp_oldest->rank && comp->path_depth <= comp_oldest->path_depth) //lowest rank is 1. replace = true; else if ((!*comp_oldest->release_date && *comp->release_date) || (*comp->release_date && (strcmp(comp->release_date, comp_oldest->release_date) < 0))) diff --git a/src/util.c b/src/util.c index 6ed372d..820f216 100644 --- a/src/util.c +++ b/src/util.c @@ -31,6 +31,7 @@ #include #include +#include #include #include "util.h" @@ -331,67 +332,265 @@ void free_and_null(void ** pr) } } -bool path_is_third_party(const char* path) +int path_is_third_party(const char* path) { const char* patterns[] = { - "third_party", - "ThirdParty", - "3rdparty", - "site-packages", - "vendor", - "external", - "dependencies", - "ext", - "contrib", - "externals", - "third-party", - "subprojects", - "node_modules", - "components", - "deps", - "modules", - "nuget", - "imported", - "foreign", - "extern", - "bundle", - "pip_packages", - "bower_components", - "jspm_packages", - "site-packages", - "jars", - "assemblies", - "assets/vendor", - "published", - "packages.lock", - "pod", - "Pods", - "cargo_home", - "gems", - "composer/vendor", - "_vendor", - "go/pkg", - "vendors", - "extern", - "extlib", - "local_packages", - "managed", - "3rd", - "thirdparty", - "LibResources" + // Explicit third-party naming + "third_party", // Covers third_party, ThirdParty, third-party via strcasestr + "thirdparty", // Alternative spelling + "3rdparty", // Alternative naming + + // Package manager directories - very high confidence + "node_modules", // npm (JavaScript) + "site-packages", // pip (Python) + "vendor", // Composer (PHP), Go modules, Ruby bundler + "gems", // RubyGems + "cargo_home", // Cargo (Rust) + "go/pkg", // Go packages + "bower_components", // Bower (JavaScript) + "jspm_packages", // JSPM (JavaScript) + + // Build/dependency management directories + "external", // Maven, CMake external dependencies + "externals", // Alternative + "dependencies", // Generic dependency directories + "dep", // Short form + "packages", // NuGet, Generic (covers packages.lock) + + // Language-specific package directories + "pod", // CocoaPods (iOS) - covers pod, Pods + "composer", // PHP Composer + "nuget", // .NET NuGet + "jars", // Java JAR libraries + "assemblies", // .NET assemblies + + // Common third-party conventions + "extern", // External code convention + "extlib", // External libraries + "_vendor", // Vendor variant + "imported", // Imported code + "foreign", // Foreign code + + // Build output that may contain third-party + "dist", // Distribution builds + "release", // Release builds + "bundle", // Bundled dependencies + + // Contribution/extension directories + "contrib", // Contributed/third-party code + "plugin", // Plugins (often third-party) + + "lib", "components", "modules", "ext", + "test", "fixtures", "examples", + "files", "assets", "runtime", + "subprojects", "managed", "local_packages", "published", + "driver", "libresources", "offloading" }; - + const int numPatterns = sizeof(patterns) / sizeof(patterns[0]); - - for (int i = 0; i < numPatterns; i++) + + for (int i = 0; i < numPatterns; i++) { - if (strstr(path, patterns[i]) != NULL) + if (strcasestr(path, patterns[i]) != NULL) { - return true; + return i; } } - - return false; + + return numPatterns + 1; } +/** + * @brief Counts the number of '/' characters in a path string + * @param path input path string + * @return number of '/' characters found, or 0 if none + */ +int path_depth(char* path) +{ + if (!path) + return 0; + + int count = 0; + char *p = path; + + while (*p) + { + if (*p == '/') + count++; + p++; + } + if (count == 0) + scanlog("No '/' found in path: %s\n", path); + return count; +} + +/** + * @brief Detects if a file is binary based on extension and checks if the recommended PURL matches comp->purls[0] + * @param comp Component data containing file path and PURL information + * @return true if the recommended PURL matches comp->purls[0], false otherwise + */ +bool binary_file_to_purl(component_data_t *comp) +{ + char * path = comp->file; + if (!path) + path = comp->url; + if (!comp || !path || !comp->purls[0]) + return false; + + const char *recommended_purl = NULL; + + /* Find the last dot for extension */ + const char* ext = strrchr(path, '.'); + if (!ext) + { + /* No extension - treat as generic binary */ + recommended_purl = NULL; + } + else + { + /* Skip the dot */ + ext++; + + /* Map binary extensions to package manager PURLs */ + + /* Java ecosystem (Maven/Gradle) */ + if (strcasecmp(ext, "jar") == 0 || strcasecmp(ext, "war") == 0 || + strcasecmp(ext, "ear") == 0 || strcasecmp(ext, "aar") == 0) + recommended_purl = "pkg:maven/"; + + /* .NET ecosystem */ + else if (strcasecmp(ext, "dll") == 0 || strcasecmp(ext, "exe") == 0 || + strcasecmp(ext, "nupkg") == 0) + recommended_purl = "pkg:nuget/"; + + /* Python ecosystem */ + else if (strcasecmp(ext, "whl") == 0 || strcasecmp(ext, "egg") == 0 || + strcasecmp(ext, "pyz") == 0 || strcasecmp(ext, "pex") == 0) + recommended_purl = "pkg:pypi/"; + + /* Ruby ecosystem */ + else if (strcasecmp(ext, "gem") == 0) + recommended_purl = "pkg:gem/"; + + /* Rust ecosystem */ + else if (strcasecmp(ext, "rlib") == 0 || strcasecmp(ext, "rmeta") == 0) + recommended_purl = "pkg:cargo/"; + + /* Go ecosystem */ + else if (strcasecmp(ext, "a") == 0) + { + /* Check if path contains go-related patterns */ + if (strcasestr(path, "/go/") || strcasestr(path, "/golang/") || strcasestr(path, "GOPATH")) + recommended_purl = "pkg:golang/"; + /* Otherwise could be C/C++ static library */ + else + recommended_purl = "pkg:generic/"; + } + + /* Node.js/JavaScript ecosystem */ + else if (strcasecmp(ext, "node") == 0 || strcasecmp(ext, "napi") == 0) + recommended_purl = "pkg:npm/"; + + /* PHP ecosystem */ + else if (strcasecmp(ext, "phar") == 0) + recommended_purl = "pkg:composer/"; + + /* Debian/Ubuntu packages */ + else if (strcasecmp(ext, "deb") == 0) + recommended_purl = "pkg:deb/"; + + /* RPM-based (Red Hat, Fedora, CentOS) */ + else if (strcasecmp(ext, "rpm") == 0) + recommended_purl = "pkg:rpm/"; + + /* Alpine Linux / Android */ + else if (strcasecmp(ext, "apk") == 0) + { + /* Android packages use Maven coordinates */ + if (strcasestr(path, "android")) + recommended_purl = "pkg:maven/"; + else + recommended_purl = "pkg:apk/"; + } + + /* macOS/iOS */ + else if (strcasecmp(ext, "framework") == 0 || strcasecmp(ext, "dylib") == 0 || + strcasecmp(ext, "bundle") == 0) + recommended_purl = "pkg:cocoapods/"; + + /* Swift Package Manager */ + else if (strcasecmp(ext, "swiftmodule") == 0) + recommended_purl = "pkg:swift/"; + + /* Objective-C/C++ */ + else if (strcasecmp(ext, "o") == 0) + recommended_purl = "pkg:generic/"; + + /* Shared libraries */ + else if (strcasecmp(ext, "so") == 0) + recommended_purl = "pkg:generic/"; + + /* Windows shared libraries */ + else if (strcasecmp(ext, "pyd") == 0) + recommended_purl = "pkg:pypi/"; /* Python extension module */ + + /* Erlang/Elixir */ + else if (strcasecmp(ext, "beam") == 0 || strcasecmp(ext, "ez") == 0) + recommended_purl = "pkg:hex/"; + + /* Docker images */ + else if (strcasecmp(ext, "tar") == 0 || strcasecmp(ext, "tar.gz") == 0) + { + if (strcasestr(path, "docker") || strcasestr(path, "container")) + recommended_purl = "pkg:docker/"; + else + recommended_purl = "pkg:generic/"; + } + + /* OCI/Container images */ + else if (strcasecmp(ext, "oci") == 0) + recommended_purl = "pkg:oci/"; + + /* Generic binary extensions */ + else if (strcasecmp(ext, "bin") == 0 || strcasecmp(ext, "dat") == 0 || + strcasecmp(ext, "class") == 0) + recommended_purl = "pkg:generic/"; + + /* WebAssembly */ + else if (strcasecmp(ext, "wasm") == 0) + recommended_purl = "pkg:generic/"; + } + + /* If no recommended PURL was found, return false */ + if (!recommended_purl) + return false; + + /* Normalize the PURL for case-insensitive comparison */ + char *purl_lower = strdup(comp->purls[0]); + if (!purl_lower) + return false; + + for (char *p = purl_lower; *p; p++) + *p = tolower(*p); + + /* Create lowercase version of recommended PURL for comparison */ + char *recommended_lower = strdup(recommended_purl); + if (!recommended_lower) + { + free(purl_lower); + return false; + } + + for (char *p = recommended_lower; *p; p++) + *p = tolower(*p); + + /* Check if purl_lower starts with the recommended PURL namespace */ + bool result = (strncmp(purl_lower, recommended_lower, strlen(recommended_lower)) == 0); + + /* Free allocated memory */ + free(purl_lower); + free(recommended_lower); + + return result; +} diff --git a/src/versions.c b/src/versions.c index 54fa4a4..e6f4526 100644 --- a/src/versions.c +++ b/src/versions.c @@ -44,36 +44,6 @@ #include "decrypt.h" #include "versions.h" -/** - * @brief Normalize component version - * @param version version string to be processed - * @param component component string - */ -void normalise_version(char *version, char *component) -{ - if (!version) - return; - - char aux[MAX_FIELD_LN] = "\0"; - int compt_len = strlen(component); - /* Remove leading component name from version */ - if ((version && component) && stristart(version, component) && strlen(version) > compt_len + 1) - { - sprintf(aux, "%s",version + compt_len + 1); - } - - /* Remove unwanted leading characters from the version */ - if (version && (((*version == 'v' || *version =='r') && isdigit(version[1])) || !isalnum(*version))) - { - sprintf(aux, "%s",version + 1); - } - - /* Remove trailing ".orig" from version */ - char *orig = strstr(aux, ".orig"); - if (orig) *orig = 0; - if (*aux) - strcpy(version, aux); -} static char * purl_indirection_reference[FETCH_MAX_FILES]; static int purl_indirection_index = 0; @@ -136,3 +106,48 @@ void purl_latest_version_free() } purl_indirection_index = 0; } + +char* normalise_version(const char* input_string, char* result) { + if (input_string == NULL || result == NULL) { + if (result != NULL) result[0] = '\0'; + return result; + } + + // 1. Find first digit (strip non-digits from beginning) + const char* start = input_string; + while (*start && !isdigit(*start)) { + start++; + } + + // If no digits found, return empty string + if (*start == '\0') { + result[0] = '\0'; + return result; + } + + // 2. Find last digit (strip non-digits from end) + const char* end = input_string + strlen(input_string) - 1; + while (end > start && !isdigit(*end)) { + end--; + } + + // 3. Copy digits and replace non-digit sequences with a single dot + char* dest = result; + int in_non_digit_sequence = 0; + + for (const char* p = start; p <= end; p++) { + if (isdigit(*p)) { + *dest++ = *p; + in_non_digit_sequence = 0; + } else { + // Only add one dot per sequence of non-digits + if (!in_non_digit_sequence) { + *dest++ = '.'; + in_non_digit_sequence = 1; + } + } + } + + *dest = '\0'; + return result; +} \ No newline at end of file diff --git a/src/vulnerability.c b/src/vulnerability.c index a23592c..3765014 100644 --- a/src/vulnerability.c +++ b/src/vulnerability.c @@ -259,7 +259,6 @@ int print_vulnerabilities(component_data_t *component) uint32_t crclist[CRC_LIST_LEN]; memset(crclist, 0, sizeof(crclist)); - component_data_t comp = *component; component->vulnerabilities_text = NULL; component->vulnerabilities = 0; component->crclist = crclist; @@ -268,39 +267,48 @@ int print_vulnerabilities(component_data_t *component) for (int i = 0; i < MAX_PURLS && component->purls[i]; i++) records += ldb_fetch_recordset(NULL, oss_vulnerability, component->purls_md5[i], false, print_vulnerability_item, component); - /* Search for purl@version in NVD */ + char * version_normalized = calloc(strlen(component->version) + 1, 1); + normalise_version(component->version, version_normalized); + + char * version_normalized_latest = calloc(strlen(component->latest_version) + 1, 1); + normalise_version(component->latest_version, version_normalized_latest); + /* Search for purl@version in NVD */ for (int i = 0; i < MAX_PURLS && component->purls[i]; i++) { uint8_t md5[MD5_LEN]; - purl_version_md5(md5, component->purls[i], comp.version); + purl_version_md5(md5, component->purls[i], version_normalized); records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component); } /* Search for for purl@latest_version in NVD */ - if (strcmp(comp.version, comp.latest_version)) + if (strcmp(version_normalized, version_normalized_latest)) { for (int i = 0; i < MAX_PURLS && component->purls[i]; i++) { uint8_t md5[MD5_LEN]; - purl_version_md5(md5, component->purls[i], comp.latest_version); + + purl_version_md5(md5, component->purls[i], version_normalized_latest); records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component); } } /* Search for vendor/component/version in NVD */ uint8_t md5[MD5_LEN]; - version_md5(md5, component->vendor, component->component, comp.version); + version_md5(md5, component->vendor, component->component, version_normalized); records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component); /* Search for vendor/component/latest_version in NVD */ - if (strcmp(comp.version, comp.latest_version)) + if (strcmp(version_normalized, version_normalized_latest)) { uint8_t md5[MD5_LEN]; - version_md5(md5, component->vendor, comp.component, comp.latest_version); + version_md5(md5, component->vendor, component->component, version_normalized_latest); records += ldb_fetch_recordset(NULL, oss_vulnerability, md5, false, print_vulnerability_item, component); } + free(version_normalized); + free(version_normalized_latest); + char * aux = NULL; asprintf(&aux, "\"vulnerabilities\": [%s]", component->vulnerabilities_text ? component->vulnerabilities_text : ""); free(component->vulnerabilities_text);