From aebdf685f6cff14d2cf0a0b462195846481f7f28 Mon Sep 17 00:00:00 2001 From: davidcoscor <107520285+davidcoscor@users.noreply.github.com> Date: Wed, 21 Aug 2024 20:51:11 +0100 Subject: [PATCH] Update get_entities.sh to identify entities containing dashes --- merpy/MER/get_entities.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merpy/MER/get_entities.sh b/merpy/MER/get_entities.sh index 0c248cb..51a5359 100755 --- a/merpy/MER/get_entities.sh +++ b/merpy/MER/get_entities.sh @@ -191,7 +191,7 @@ get_entities_source () { if [ -e "$source"_links.tsv ]; then while read -r line; do local label=$(cut -d$'\t' -f3- <<< "$line") - local text=$(sed "s/[^[:alnum:][:space:]()]/./g" <<< "$label") # Replace special characters + local text=$(sed "s/[^[:alnum:][:space:]()-]/./g" <<< "$label") # Replace special characters text=$(sed -e 's/[[:space:]()@]\+/ /g' <<< "$text") # Remove multiple whitespace text=$(sed -e 's/\.$//' -e 's/\. / /g' <<< "$text") # Remove full stops text=$(tr '[:upper:]' '[:lower:]' <<< "$text") # Make text lowercase