From 645163b285e125e02af5028ac5280a3a7e1a3d51 Mon Sep 17 00:00:00 2001
From: Artem <amarskiywork@gmail.com>
Date: Wed, 28 Feb 2024 23:29:47 +0300
Subject: [PATCH 1/2] update gitignore

---
 .gitignore | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 074e251..fc93236 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,4 +19,9 @@ app/mirror_bot.py
 /config
 .DS_store
 .history
-/.venv/
\ No newline at end of file
+/.venv/
+/bin
+/lib
+/lib64
+/share
+pyvenv.cfg
\ No newline at end of file

From 649dea3503bf089bdc42dd6d81917e524d95df5d Mon Sep 17 00:00:00 2001
From: Artem <amarskiywork@gmail.com>
Date: Thu, 29 Feb 2024 00:39:27 +0300
Subject: [PATCH 2/2] change message processing pipeline &&  fix bug with spam
 detection

---
 src/utils/message_processing.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/utils/message_processing.py b/src/utils/message_processing.py
index 6b105cc..511a301 100644
--- a/src/utils/message_processing.py
+++ b/src/utils/message_processing.py
@@ -76,25 +76,30 @@ async def classify_message(
         return msg_features
 
     # Classifying the message
-    msg_features["model_name"] = "GptSpamClassifier"
+    msg_features['model_name'] = "RuleBasedClassifier"
+    msg_features['score'], msg_features['reasons'] = rule_based_classifier.predict(X)
+
+    msg_features['reasons'] = "Причины:\n" + msg_features['reasons']
+    msg_features['label'] = 1 if msg_features['score'] >= THRESHOLD_RULE_BASED else 0
+
+    if msg_features['label'] == 1:
+        return msg_features
+
+    # The second check using GPT if rule based model is marked as not spam
+    gpt_msg_features = {"label": None, "reasons": None, "model_name": "None",
+                    "score": 0.0, "time_spent": 0.0, "prompt_name": "None",
+                    "prompt_tokens": 0, 'completion_tokens': 0
+                    }
+    gpt_msg_features["model_name"] = "GptSpamClassifier"
     response = await gpt_classifier.predict(X)
     response = response[0]
     logger.info(response)
     keys = ['label', 'reasons', 'prompt_tokens', 'completion_tokens', 'time_spent', 'prompt_name']
     for key, value in zip(keys, response.values()):
-        msg_features[key] = value
-
-    # If there was an Error with OpenAI (timeout, unexpected response or different error), rule_based model will be used
-    if msg_features['label'] is None:
-        msg_features['model_name'] = "RuleBasedClassifier"
-        msg_features['score'], msg_features['reasons'] = rule_based_classifier.predict(X)
-
-        msg_features['reasons'] = "Причины:\n" + msg_features['reasons']
-        msg_features['label'] = 1 if score >= THRESHOLD_RULE_BASED else 0
-
-    return msg_features
-
+        gpt_msg_features[key] = value
 
+    # If there was an Error with OpenAI (timeout, unexpected response or different error), rule_based model predictions will be used
+    return msg_features if gpt_msg_features['label'] is None else gpt_msg_features
 
 
 async def send_spam_alert(