Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions inc/component.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@
#include "scanoss.h"

#define COMPONENT_DEFAULT_RANK 999 //default rank for components without rank information
#define COMPONENT_RANK_SELECTION_MAX 8 //max rank to be considered in component selection

extern int component_rank_max;

// Third-party confidence thresholds for path_is_third_party()
#define TP_THRESHOLD_HIGH 12 // 0-11: high confidence third-party (node_modules, vendor, etc.)
#define TP_THRESHOLD_MED 27 // 12-26: medium confidence (external, dependencies, etc.)
// 27-31: medium-low confidence (dist, contrib, etc.)
// 32+: not third-party
/**
* @brief Component object definition.
*
Expand Down Expand Up @@ -50,6 +58,7 @@ typedef struct component_data_t
int url_stats[5]; /* url stats: quantity of file */
int health_stats[3]; /* health stats: forks, watchers, contributors */
int rank; /* purl ranking - optional*/
int path_depth; /* depth of the matched file path*/
} component_data_t;

component_data_t * component_init(void);
Expand Down
2 changes: 1 addition & 1 deletion inc/scanoss.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#define WFP_LN 4
#define WFP_REC_LN 18

#define SCANOSS_VERSION "5.4.16"
#define SCANOSS_VERSION "5.4.17"

/* Log files */
#define SCAN_LOG "/tmp/scanoss_scan.log"
Expand Down
11 changes: 9 additions & 2 deletions inc/util.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#ifndef __UTIL_H
#define __UTIL_H

#include <stdint.h>
#include <stdbool.h>
#include <scanoss.h>
#include "component.h"

/* Reverse an uint32 number */
void uint32_reverse(uint8_t *data);
Expand Down Expand Up @@ -62,6 +63,12 @@ char * str_cat_realloc(char **a, char * b);

void free_and_null(void ** pr);

bool path_is_third_party(const char* path);
int path_is_third_party(const char* path);

/* Counts the number of '/' characters in a path string */
int path_depth(char* path);

/* Detects binary file type and validates if PURL matches file extension */
bool binary_file_to_purl(component_data_t *comp);

#endif
2 changes: 1 addition & 1 deletion inc/versions.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ typedef struct release_version
char date[MAX_FIELD_LN];
} release_version;

void normalise_version(char *version, char *component);
char* normalise_version(const char* input_string, char* result);
void add_versions(component_data_t *component, file_recordset *files, uint32_t records);
void get_purl_version(release_version *release, char *purl, uint8_t *file_id);
char * version_cleanup(char * version, char * component);
Expand Down
2 changes: 2 additions & 0 deletions src/component.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
}
else
component->rank = COMPONENT_DEFAULT_RANK;

component->path_depth = path_depth(component->file);

return true;
}
Expand Down
100 changes: 57 additions & 43 deletions src/license.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,38 @@
const char *license_sources[] = {"component_declared", "file_spdx_tag", "file_header", "license_file", "scancode", "scancode-file", "osselot"};
bool full_license_report = false;

struct licenses_s

struct license_list
{
char **license_by_type;
uint32_t * crclist;
char **licenses;
int count;
};

bool license_add_to_list(struct license_list * ptr, char * license)
{
if (!ptr || !license || strlen(license) < 2)
return false;
ptr->licenses = realloc(ptr->licenses, sizeof(char *) * (ptr->count + 1));
if (!ptr->licenses)
return false;
ptr->licenses[ptr->count] = strdup(license);
ptr->count++;
return true;
}

void license_free_list(struct license_list * ptr)
{
if (!ptr || !ptr->licenses)
return;
for (int i = 0; i < ptr->count; i++)
{
free(ptr->licenses[i]);
}
free(ptr->licenses);
ptr->licenses = NULL;
ptr->count = 0;
}

/**
* @brief Remove invalid characters from a license name
* @param license license string
Expand Down Expand Up @@ -249,12 +275,12 @@ static char *split_in_json_array(uint32_t *crclist, char *buffer, char *license,
return buffer;
}

void license_to_json(uint32_t *crclist, char *buffer, char *license, int src, bool *first_record)
char * license_to_json(uint32_t *crclist, char *buffer, char *license, int src, bool *first_record)
{
if (!strchr(license, '/'))
json_from_license(crclist, buffer, license, src, first_record);
return json_from_license(crclist, buffer, license, src, first_record);
else
split_in_json_array(crclist, buffer, license, src, first_record);
return split_in_json_array(crclist, buffer, license, src, first_record);
}

/**
Expand Down Expand Up @@ -292,7 +318,7 @@ bool get_first_license_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_
*/
bool print_licenses_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
{
struct licenses_s * license_results = ptr;
struct license_list * licenses = ptr;

if (!datalen)
return false;
Expand All @@ -312,16 +338,10 @@ bool print_licenses_item(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *
int src = atoi(source);

scanlog("Fetched license %s\n", license);
char result[MAX_FIELD_LN * 10] = "\0";
int len = 0;

if (strlen(license) > 2 && (src < (sizeof(license_sources) / sizeof(license_sources[0]))))
{
bool first_record = !(license_results->license_by_type[src] && *license_results->license_by_type[src]);
license_to_json(license_results->crclist, result + len, license, src, &first_record);
str_cat_realloc(&license_results->license_by_type[src], result);
license_add_to_list(&licenses[src], license);

}
free(source);
free(license);

Expand All @@ -347,47 +367,43 @@ void print_licenses(component_data_t *comp)
/* CRC list (used to avoid duplicates) */
uint32_t crclist[CRC_LIST_LEN];
memset(crclist, 0, sizeof(crclist));
comp->crclist = crclist;
uint32_t records = 0;
comp->license_text = NULL;

int license_types = sizeof(license_sources) / sizeof(license_sources[0]);
struct licenses_s license_result = { .crclist = crclist, .license_by_type = calloc(license_types, sizeof(char *)) };
struct license_list licenses_by_type[license_types];
memset(licenses_by_type, 0, sizeof(licenses_by_type));

/* Print URL license */
if (comp->license && strlen(comp->license) > 2)
{
bool first_record = true;
license_result.license_by_type[0] = calloc(MAX_FIELD_LN * 10, 1);
license_to_json(crclist, license_result.license_by_type[0], comp->license, 0, &first_record);
license_add_to_list(&licenses_by_type[0], comp->license);
scanlog("License present in URL table");
/* Add license to CRC list (to avoid duplicates) */
add_CRC(license_result.crclist, string_crc32c(comp->license));
}
else
{
scanlog("License NOT present in URL table\n");
}

/* Look for component or file license */
records = ldb_fetch_recordset(NULL, oss_license, comp->url_md5, false, print_licenses_item, &license_result);
records = ldb_fetch_recordset(NULL, oss_license, comp->url_md5, false, print_licenses_item, &licenses_by_type);
scanlog("License for url_id license returns %d hits\n", records);

records = ldb_fetch_recordset(NULL, oss_license, comp->file_md5_ref, false, print_licenses_item, &license_result);
records = ldb_fetch_recordset(NULL, oss_license, comp->file_md5_ref, false, print_licenses_item, &licenses_by_type);
scanlog("License for file_id license returns %d hits\n", records);
for (int i = 0; i < MAX_PURLS && comp->purls[i]; i++)
{
/* Calculate purl@version md5 */
uint8_t purlversion_md5[MD5_LEN];
purl_version_md5(purlversion_md5, comp->purls[i], comp->version);

records = ldb_fetch_recordset(NULL, oss_license, purlversion_md5, false, print_licenses_item, &license_result);
records = ldb_fetch_recordset(NULL, oss_license, purlversion_md5, false, print_licenses_item, &licenses_by_type);
scanlog("License for %s@%s license returns %d hits\n", comp->purls[i], comp->version, records);

if (records)
break;

records = ldb_fetch_recordset(NULL, oss_license, comp->purls_md5[i], false, print_licenses_item, &license_result);
records = ldb_fetch_recordset(NULL, oss_license, comp->purls_md5[i], false, print_licenses_item, &licenses_by_type);
scanlog("License for %s license returns %d hits\n", comp->purls[i], records);

if (records)
Expand All @@ -396,35 +412,33 @@ void print_licenses(component_data_t *comp)

/* Open licenses structure */
char * result = calloc(MAX_FIELD_LN * 100, 1);
char * buffer = result;
int len = 0;

len += sprintf(result + len, "\"licenses\": [");
buffer = result + len;
bool first = true;

if (comp->license_text)
{
len += sprintf(result + len, "%s", comp->license_text);
free(comp->license_text);
first = false;
}

for (int i = 0; i < license_types; i++)
{
if (license_result.license_by_type[i] && *license_result.license_by_type[i])
if (licenses_by_type[i].count > 0)
{
if (!first)
if (i > 3 && !full_license_report)
break;
for (int j = 0; j < licenses_by_type[i].count; j++)
{
if (i > 3 && !full_license_report)
break;
len += sprintf(result + len, ",");
buffer = license_to_json(crclist, buffer, licenses_by_type[i].licenses[j], i, &first);
}
first = false;
len += sprintf(result + len, "%s", license_result.license_by_type[i]);
free(license_result.license_by_type[i]);
}
}

asprintf(&comp->license_text, "%s]", result);
free(license_result.license_by_type);
free(result);
len = buffer - result;
len += sprintf(result + len, "]");
comp->license_text = result;

/* Free all license lists */
for (int i = 0; i < license_types; i++)
{
license_free_list(&licenses_by_type[i]);
}
}
71 changes: 56 additions & 15 deletions src/match.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,28 +320,59 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
return false;
if (!*a->release_date)
return true;

if (!path_is_third_party(a->file) && path_is_third_party(b->file))

// Third-party path evaluation
int tp_a = path_is_third_party(a->file);
int tp_b = path_is_third_party(b->file);

if (tp_a > tp_b)
{
scanlog("Component rejected by third party filter\n");
scanlog("Component rejected by third party path filter (%s=%d > %s=%d)\n", a->purls[0], tp_a, b->purls[0], tp_b);
return false;
}

//lower rank selection logic
if (b->rank < COMPONENT_DEFAULT_RANK)
else if (tp_a < tp_b)
{
scanlog("Component accepted by third party path filter (%s=%d < %s=%d)\n", a->purls[0], tp_a, b->purls[0], tp_b);
return true;
}
//when the url ranking is enabled
if (b->rank < COMPONENT_DEFAULT_RANK || a->rank < COMPONENT_DEFAULT_RANK)
{
if (b->rank < a->rank)
//shorter path lenght are prefered
if (b->rank < a->rank &&b->path_depth < a->path_depth/2)
return true;
else if (b->rank > a->rank && a->path_depth < b->path_depth/2)
return false;

bool good_purl_a = binary_file_to_purl(a);
bool good_purl_b = binary_file_to_purl(b);
if (good_purl_b && !good_purl_a)
{
scanlog("%s wins %s by rank %d/%d\n", b->purls[0], a->purls[0], b->rank, a->rank);
scanlog("Component %s prefered over %s by binary purl match\n", b->purls[0], a->purls[0]);
return true;
}
else if (b->rank > a->rank)
else if (good_purl_a && !good_purl_b)
{
scanlog("%s rejected by rank %d\n", b->purls[0], b->rank);
scanlog("Component %s rejected by binary purl match\n", b->purls[0]);
return false;
}
}

//lower rank selection logic
if (b->rank < COMPONENT_RANK_SELECTION_MAX && b->path_depth <= a->path_depth)
{
scanlog("path lenght: %s - %d vs %s - %d\n", b->file, b->path_depth, a->file, a->path_depth);
if (b->rank < a->rank)
{
scanlog("%s wins %s by rank %d/%d\n", b->purls[0], a->purls[0], b->rank, a->rank);
return true;
}
else if (b->rank > a->rank)
{
scanlog("%s rejected by rank %d\n", b->purls[0], b->rank);
return false;
}
}
}
/*if the relese date is the same untie with the component age (purl)*/
if (!strcmp(b->release_date, a->release_date))
{
Expand Down Expand Up @@ -404,6 +435,7 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
/*select the oldest release date */
if (strcmp(b->release_date, a->release_date) < 0)
{
scanlog("Component %s prefered over %s by release date\n", b->purls[0], a->purls[0]);
return true;
}

Expand Down Expand Up @@ -680,23 +712,32 @@ void match_select_best(scan_data_t *scan)
continue;
component_data_t * match_component = match->component_list.headp.lh_first->component;

scanlog("%s\n",match_component->purls[0]);

match_data_t * best_match = scan->matches_list_array[i]->best_match;
component_data_t * best_match_component = best_match->component_list.headp.lh_first->component;

if (path_is_third_party(match_component->file))
scanlog("Current purl %s - current best %s\n",match_component->purls[0], best_match_component->purls[0]);
if (match_component == best_match_component)
continue;

if (path_is_third_party(match_component->file) < path_is_third_party(best_match_component->file) || !strcmp(match_component->release_date, "9999-99-99"))
continue;

scanlog("%s - %s - %d - %d VS %s - %s - %d - %d\n",
best_match_component->purls[0],
best_match_component->release_date,
scan->matches_list_array[i]->best_match->hits,best_match_component->rank,
match_component->purls[0], match_component->release_date, item->match->hits, match_component->rank);

if (best_match_component->identified < match_component->identified)
{
scanlog("Replacing best match for an identified component\n");
scan->matches_list_array[i]->best_match = item->match;
continue;
}

//If the best match is not good or is not identified be prefer the candidate.
if ((!best_match_component->identified && match_component->identified) ||
(path_is_third_party(best_match_component->file)))
(path_is_third_party(best_match_component->file) < path_is_third_party(match_component->file)))
{
scanlog("Replacing best match for a prefered component\n");
scan->matches_list_array[i]->best_match = item->match;
Expand Down
Loading
Loading