From 645163b285e125e02af5028ac5280a3a7e1a3d51 Mon Sep 17 00:00:00 2001 From: Artem Date: Wed, 28 Feb 2024 23:29:47 +0300 Subject: [PATCH 1/2] update gitignore --- .gitignore | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 074e251..fc93236 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,9 @@ app/mirror_bot.py /config .DS_store .history -/.venv/ \ No newline at end of file +/.venv/ +/bin +/lib +/lib64 +/share +pyvenv.cfg \ No newline at end of file From 649dea3503bf089bdc42dd6d81917e524d95df5d Mon Sep 17 00:00:00 2001 From: Artem Date: Thu, 29 Feb 2024 00:39:27 +0300 Subject: [PATCH 2/2] change message processing pipeline && fix bug with spam detection --- src/utils/message_processing.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/utils/message_processing.py b/src/utils/message_processing.py index 6b105cc..511a301 100644 --- a/src/utils/message_processing.py +++ b/src/utils/message_processing.py @@ -76,25 +76,30 @@ async def classify_message( return msg_features # Classifying the message - msg_features["model_name"] = "GptSpamClassifier" + msg_features['model_name'] = "RuleBasedClassifier" + msg_features['score'], msg_features['reasons'] = rule_based_classifier.predict(X) + + msg_features['reasons'] = "Причины:\n" + msg_features['reasons'] + msg_features['label'] = 1 if msg_features['score'] >= THRESHOLD_RULE_BASED else 0 + + if msg_features['label'] == 1: + return msg_features + + # The second check using GPT if rule based model is marked as not spam + gpt_msg_features = {"label": None, "reasons": None, "model_name": "None", + "score": 0.0, "time_spent": 0.0, "prompt_name": "None", + "prompt_tokens": 0, 'completion_tokens': 0 + } + gpt_msg_features["model_name"] = "GptSpamClassifier" response = await gpt_classifier.predict(X) response = response[0] logger.info(response) keys = ['label', 'reasons', 'prompt_tokens', 'completion_tokens', 'time_spent', 'prompt_name'] for key, value in zip(keys, response.values()): - msg_features[key] = value - - # If there was an Error with OpenAI (timeout, unexpected response or different error), rule_based model will be used - if msg_features['label'] is None: - msg_features['model_name'] = "RuleBasedClassifier" - msg_features['score'], msg_features['reasons'] = rule_based_classifier.predict(X) - - msg_features['reasons'] = "Причины:\n" + msg_features['reasons'] - msg_features['label'] = 1 if score >= THRESHOLD_RULE_BASED else 0 - - return msg_features - + gpt_msg_features[key] = value + # If there was an Error with OpenAI (timeout, unexpected response or different error), rule_based model predictions will be used + return msg_features if gpt_msg_features['label'] is None else gpt_msg_features async def send_spam_alert(