Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,9 @@ app/mirror_bot.py
/config
.DS_store
.history
/.venv/
/.venv/
/bin
/lib
/lib64
/share
pyvenv.cfg
31 changes: 18 additions & 13 deletions src/utils/message_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,25 +76,30 @@ async def classify_message(
return msg_features

# Classifying the message
msg_features["model_name"] = "GptSpamClassifier"
msg_features['model_name'] = "RuleBasedClassifier"
msg_features['score'], msg_features['reasons'] = rule_based_classifier.predict(X)

msg_features['reasons'] = "Причины:\n" + msg_features['reasons']
msg_features['label'] = 1 if msg_features['score'] >= THRESHOLD_RULE_BASED else 0

if msg_features['label'] == 1:
return msg_features

# The second check using GPT if rule based model is marked as not spam
gpt_msg_features = {"label": None, "reasons": None, "model_name": "None",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

как будто необязательно заводить еще один словарь с такими же ключами

можно использовать старый

"score": 0.0, "time_spent": 0.0, "prompt_name": "None",
"prompt_tokens": 0, 'completion_tokens': 0
}
gpt_msg_features["model_name"] = "GptSpamClassifier"
response = await gpt_classifier.predict(X)
response = response[0]
logger.info(response)
keys = ['label', 'reasons', 'prompt_tokens', 'completion_tokens', 'time_spent', 'prompt_name']
for key, value in zip(keys, response.values()):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

в response лежит это:
{
"label": None,
"reasons": "Input is missing required columns.",
"prompt_tokens": 0,
"completion_tokens": 0,
"time_spent": 0,
"prompt": None,
}
в целом можно не делать такие странные итерации, а просто for key, value in response.items()

msg_features[key] = value

# If there was an Error with OpenAI (timeout, unexpected response or different error), rule_based model will be used
if msg_features['label'] is None:
msg_features['model_name'] = "RuleBasedClassifier"
msg_features['score'], msg_features['reasons'] = rule_based_classifier.predict(X)

msg_features['reasons'] = "Причины:\n" + msg_features['reasons']
msg_features['label'] = 1 if score >= THRESHOLD_RULE_BASED else 0

return msg_features

gpt_msg_features[key] = value

# If there was an Error with OpenAI (timeout, unexpected response or different error), rule_based model predictions will be used
return msg_features if gpt_msg_features['label'] is None else gpt_msg_features


async def send_spam_alert(
Expand Down