From bea48c514818b351bfdaa07cdc0827b4911bc910 Mon Sep 17 00:00:00 2001 From: John Carroll Date: Wed, 25 Mar 2026 11:22:13 -0400 Subject: [PATCH 1/3] filter to remove redundancy btwn doc desc and doc lbl --- fec/data/templatetags/filters.py | 21 +++++++++++++++++++++ fec/legal/templates/rulemaking.jinja | 26 +++++++++++++++----------- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/fec/data/templatetags/filters.py b/fec/data/templatetags/filters.py index db7d9be048..4a5c9546af 100644 --- a/fec/data/templatetags/filters.py +++ b/fec/data/templatetags/filters.py @@ -4,6 +4,7 @@ import re from dateutil.parser import parse as parse_date +from difflib import SequenceMatcher from django.conf import settings from django_jinja import library @@ -164,6 +165,26 @@ def filesize(value): return '%d %s' % (value, units[unit]) +@library.filter +def compare(string, string_1): + """Compares two strings to determine if they are nearly identical or semantically similar. + Returns False if `string` is >= than 50% similar to `string_1`. Else returns True. + Uses difflib.SequenceMatcher: https://docs.python.org/3/library/difflib.html + """ + + # Calculate the similarity ratio + similarity_ratio = SequenceMatcher(None, string, string_1).ratio() + + # Define a threshold for redundancy (e.g., 50% similar) + threshold = .5 + + if similarity_ratio >= threshold: + return False + + else: + return True + + @library.global_function def path_for_css(key): """Looks up the hashed asset key in rev-manifest-css.json diff --git a/fec/legal/templates/rulemaking.jinja b/fec/legal/templates/rulemaking.jinja index c0ad863c57..c6b1f494bd 100644 --- a/fec/legal/templates/rulemaking.jinja +++ b/fec/legal/templates/rulemaking.jinja @@ -71,16 +71,15 @@