diff --git a/finishedProject/app.py b/finishedProject/app.py index 9f89b5f..f1b25f2 100644 --- a/finishedProject/app.py +++ b/finishedProject/app.py @@ -1,63 +1,143 @@ -#DearPyGUI Imports from dearpygui.core import * from dearpygui.simple import * -#functions.py Imports -from functions import categorize_words, pre_process, predict +import re +import string +import datetime + +# Global list to keep prediction history +predictions = [] + +# --- Text Processing Utilities --- +def pre_process(text): + text = text.lower() + text = re.sub(f"[{string.punctuation}]", "", text) + text = re.sub("\d+", "", text) + text = " ".join(text.split()) + return text + +def categorize_words(text): + return text.split() + +def count_words(text): + return len(text.split()) + +def extract_unique_words(text): + return list(set(text.split())) + +def calculate_spam_score(text): + spam_keywords = ["free", "winner", "win", "cash", "prize", "buy", "urgent"] + words = text.split() + matches = [word for word in words if word in spam_keywords] + score = len(matches) / len(words) if words else 0 + return round(score * 100, 2) + +def get_most_frequent_words(text, n=5): + words = text.split() + frequency = {} + for word in words: + frequency[word] = frequency.get(word, 0) + 1 + sorted_words = sorted(frequency.items(), key=lambda x: x[1], reverse=True) + return sorted_words[:n] + +# --- Prediction Logic --- +def predict(text): + spam_keywords = ["free", "winner", "win", "cash", "prize", "buy", "urgent"] + if any(word in text for word in spam_keywords): + return ("Spam", [255, 0, 0]) + else: + return ("Not Spam", [0, 128, 0]) + +# --- Reporting Functions --- +def get_message_summary(text): + return { + "word_count": count_words(text), + "unique_count": len(extract_unique_words(text)), + "spam_score": calculate_spam_score(text), + "frequent_words": get_most_frequent_words(text) + } + +def display_word_info(text): + summary = get_message_summary(text) + add_spacing(count=8) + add_text(f"Word Count: {summary['word_count']}", color=[100, 100, 255]) + add_spacing(count=4) + add_text(f"Unique Words: {summary['unique_count']}", color=[100, 255, 100]) + add_spacing(count=4) + add_text(f"Spam Score: {summary['spam_score']}%", color=[255, 165, 0]) + add_spacing(count=4) + add_text("Most Frequent Words:", color=[0, 191, 255]) + for word, count in summary['frequent_words']: + add_text(f"- {word}: {count} times", bullet=True) + +# --- Button Callback Functions --- +def clear_input(): + set_value("Input", "") + log_info("Input cleared.") + +def save_input_to_file(): + input_value = get_value("Input") + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + with open("saved_input.txt", "a") as f: + f.write(f"[{timestamp}] {input_value}\n") + log_info("Input saved to file.") + +def export_summary(): + input_value = get_value("Input") + summary = get_message_summary(pre_process(input_value)) + with open("summary_report.txt", "a") as f: + f.write("Summary Report\n") + f.write("==============\n") + f.write(f"Word Count: {summary['word_count']}\n") + f.write(f"Unique Words: {summary['unique_count']}\n") + f.write(f"Spam Score: {summary['spam_score']}%\n") + f.write("Most Frequent Words:\n") + for word, count in summary['frequent_words']: + f.write(f"- {word}: {count} times\n") + f.write("\n") + log_info("Summary exported to file.") + +def check_spam_callback(sender, data): + input_value = get_value("Input") + processed_input = pre_process(input_value) + prediction, color = predict(processed_input) + predictions.append(prediction) + + # Clear any previous prediction display + if len(predictions) > 1: + hide_item(predictions[-2]) -pred = [] -#button callbak function -#runs each time when the "Check" button is clicked -def check_spam(pred): with window("Simple SMS Spam Filter"): - if pred == []: - #runs only once - the the button is first clicked - #and pred[-1] widget doesn't exist - add_spacing(count=12) - add_separator() - add_spacing(count=12) - else: - #hide prediction widget - hide_item(pred[-1]) - #collect input, pre-process and get prediction - input_value = get_value("Input") - input_value = pre_process(input_value) - pred_text, text_colour = predict(input_value) - #store prediction inside the pred list - pred.append(pred_text) - #display prediction to user - add_text(pred[-1], color=text_colour) - -#window object settings + add_spacing(count=12) + add_separator() + add_spacing(count=12) + add_text(prediction, color=color) + display_word_info(processed_input) + +# --- GUI Setup --- set_main_window_size(540, 720) set_global_font_scale(1.25) set_theme("Gold") -set_style_window_padding(30,30) +set_style_window_padding(30, 30) with window("Simple SMS Spam Filter", width=520, height=677): - print("GUI is running...") set_window_pos("Simple SMS Spam Filter", 0, 0) - - #image logo - add_drawing("logo", width=520, height=290) #create some space for the image - + add_drawing("logo", width=520, height=290) add_separator() add_spacing(count=12) - #text instructions - add_text("Please enter an SMS message of your choice to check if it's spam or not", - color=[232,163,33]) + add_text("Please enter an SMS message of your choice to check if it's spam or not", color=[232, 163, 33]) add_spacing(count=12) - #collect input add_input_text("Input", width=415, default_value="type message here!") add_spacing(count=12) - #action button - add_button("Check", callback=lambda x,y:check_spam(pred)) + add_button("Check", callback=check_spam_callback) + add_spacing(count=6) + add_button("Clear Input", callback=clear_input) + add_spacing(count=6) + add_button("Save Input", callback=save_input_to_file) + add_spacing(count=6) + add_button("Export Summary", callback=export_summary) -#place the image inside the space draw_image("logo", "logo_spamFilter.png", [0, 240]) -#IF THE PREVIOUS LINE OF CODE TRIGGERS AN ERRROR TRY -#draw_image("logo", "logo_spamFilter.png", [0,0], [458,192]) - start_dearpygui() -print("Bye Bye, GUI") +print("Bye Bye, GUI") diff --git a/finishedProject/functions.py b/finishedProject/functions.py index 59537a3..dfe00be 100644 --- a/finishedProject/functions.py +++ b/finishedProject/functions.py @@ -1,61 +1,172 @@ -#no need to change anything in this file! +# SMS Spam Filter GUI - Enhanced Version -#SMS Spam Filter Imports +# Imports +from dearpygui.core import * +from dearpygui.simple import * import random import pandas as pd import string import nltk +import datetime +from collections import Counter nltk.download('punkt') nltk.download('stopwords') -def categorize_words(): - ''' - Catagorizes each spam/non-spam word into a corresponding list - Repeating words in each list will help with categorizing - ''' - spam_words = [] - ham_words = [] - for sms in data['processed'][data['label'] == 'spam']: - for word in sms: - spam_words.append(word) - for sms in data['processed'][data['label'] == 'ham']: - for word in sms: - ham_words.append(word) - return spam_words, ham_words - -def predict(user_input): - spam_counter = 0 - ham_counter = 0 - - #add text colour : ham is green, spam is red - red = [220,50,50] - green = [100,220,50] - - for word in user_input: - spam_counter += spam_words.count(word) - ham_counter += ham_words.count(word) - - if ham_counter > spam_counter: - #adding accuracy - certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) - return 'message is not spam, with {}% certainty'.format(certainty), green - elif spam_counter > ham_counter: - certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) - return 'message is spam, with {}% certainty'.format(certainty), red - else: - return 'message could be spam, with 50% certainty', [255,255,255] +# Load and preprocess data +data = pd.read_csv('SMSSpamCollection.txt', sep='\t', header=None, names=["label", "sms"]) +# Pre-processing function def pre_process(sms): - ''' - Remove punctuation and stop words from the custom sms - ''' - remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation]) + remove_punct = "".join([char.lower() for char in sms if char not in string.punctuation]) tokenize = nltk.tokenize.word_tokenize(remove_punct) remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')] return remove_stop_words -data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"]) -data['processed'] = data['sms'].apply(lambda x: pre_process(x)) +data['processed'] = data['sms'].apply(pre_process) + +# Categorize words +spam_words = [] +ham_words = [] +for sms in data['processed'][data['label'] == 'spam']: + spam_words.extend(sms) +for sms in data['processed'][data['label'] == 'ham']: + ham_words.extend(sms) + +# Predict function with confidence +def predict(user_input): + spam_counter = sum(spam_words.count(word) for word in user_input) + ham_counter = sum(ham_words.count(word) for word in user_input) + red = [220, 50, 50] + green = [100, 220, 50] + + if ham_counter > spam_counter: + certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) + return f"message is not spam, with {certainty}% certainty", green + elif spam_counter > ham_counter: + certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) + return f"message is spam, with {certainty}% certainty", red + else: + return "message could be spam, with 50% certainty", [255, 255, 255] + +# Analytics functions +def count_words(text): + return len(text) + +def extract_unique_words(text): + return list(set(text)) + +def calculate_spam_score(text): + spam_matches = [word for word in text if word in spam_words] + score = len(spam_matches) / len(text) if text else 0 + return round(score * 100, 2) + +def get_most_frequent_words(text, n=5): + freq = Counter(text) + return freq.most_common(n) + +def get_message_summary(text): + return { + "word_count": count_words(text), + "unique_count": len(extract_unique_words(text)), + "spam_score": calculate_spam_score(text), + "frequent_words": get_most_frequent_words(text) + } + +def display_word_info(text): + summary = get_message_summary(text) + add_spacing(count=8) + add_text(f"Word Count: {summary['word_count']}", color=[100, 100, 255]) + add_spacing(count=4) + add_text(f"Unique Words: {summary['unique_count']}", color=[100, 255, 100]) + add_spacing(count=4) + add_text(f"Spam Score: {summary['spam_score']}%", color=[255, 165, 0]) + add_spacing(count=4) + add_text("Most Frequent Words:", color=[0, 191, 255]) + for word, count in summary['frequent_words']: + add_text(f"- {word}: {count} times", bullet=True) + +# Additional functionalities +def display_spam_ham_ratios(): + total_spam = len(data[data['label'] == 'spam']) + total_ham = len(data[data['label'] == 'ham']) + ratio_text = f"Total Messages - Spam: {total_spam}, Ham: {total_ham}" + add_spacing(count=6) + add_text(ratio_text, color=[128, 0, 128]) + +def show_random_example(): + example = data.sample(1).iloc[0] + msg_type = example['label'].capitalize() + add_spacing(count=6) + add_text(f"Random Example ({msg_type}):", color=[0, 100, 200]) + add_text(example['sms'], wrap=450) + +# Utility functions +def clear_input(): + set_value("Input", "") + log_info("Input cleared.") + +def save_input_to_file(): + input_value = get_value("Input") + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + with open("saved_input.txt", "a") as f: + f.write(f"[{timestamp}] {input_value}\n") + log_info("Input saved to file.") + +def export_summary(): + input_value = get_value("Input") + processed_input = pre_process(input_value) + summary = get_message_summary(processed_input) + with open("summary_report.txt", "a") as f: + f.write("Summary Report\n") + f.write("==============\n") + f.write(f"Word Count: {summary['word_count']}\n") + f.write(f"Unique Words: {summary['unique_count']}\n") + f.write(f"Spam Score: {summary['spam_score']}%\n") + f.write("Most Frequent Words:\n") + for word, count in summary['frequent_words']: + f.write(f"- {word}: {count} times\n") + f.write("\n") + log_info("Summary exported to file.") + +# Main check button callback +def check_spam_callback(sender, data): + input_value = get_value("Input") + processed_input = pre_process(input_value) + prediction, color = predict(processed_input) + with window("Simple SMS Spam Filter"): + add_spacing(count=12) + add_separator() + add_spacing(count=12) + add_text(prediction, color=color) + display_word_info(processed_input) + display_spam_ham_ratios() + show_random_example() + +# GUI setup +set_main_window_size(540, 720) +set_global_font_scale(1.25) +set_theme("Gold") +set_style_window_padding(30, 30) + +with window("Simple SMS Spam Filter", width=520, height=677): + set_window_pos("Simple SMS Spam Filter", 0, 0) + add_drawing("logo", width=520, height=290) + add_separator() + add_spacing(count=12) + add_text("Please enter an SMS message of your choice to check if it's spam or not", color=[232, 163, 33]) + add_spacing(count=12) + add_input_text("Input", width=415, default_value="type message here!") + add_spacing(count=12) + add_button("Check", callback=check_spam_callback) + add_spacing(count=6) + add_button("Clear Input", callback=clear_input) + add_spacing(count=6) + add_button("Save Input", callback=save_input_to_file) + add_spacing(count=6) + add_button("Export Summary", callback=export_summary) + +draw_image("logo", "logo_spamFilter.png", [0, 240]) -#creating lists to store spam/non-spam associated words and their instances -spam_words, ham_words = categorize_words() +start_dearpygui() +print("Bye Bye, GUI") + diff --git a/starterFiles/functions.py b/starterFiles/functions.py index 59537a3..0e2de96 100644 --- a/starterFiles/functions.py +++ b/starterFiles/functions.py @@ -1,61 +1,153 @@ -#no need to change anything in this file! +# Improved and Modular SMS Spam Filter GUI -#SMS Spam Filter Imports -import random +# Imports +from dearpygui.core import * +from dearpygui.simple import * import pandas as pd import string import nltk +import datetime +import os +from collections import Counter + +# NLTK Data Download nltk.download('punkt') nltk.download('stopwords') +# Constants +DATA_FILE = 'SMSSpamCollection.txt' +LOGO_IMAGE = 'logo_spamFilter.png' + +# Load dataset +if not os.path.exists(DATA_FILE): + raise FileNotFoundError(f"Required file '{DATA_FILE}' not found.") + +data = pd.read_csv(DATA_FILE, sep='\t', header=None, names=['label', 'sms']) + +# Preprocessing +stop_words = set(nltk.corpus.stopwords.words('english')) +def pre_process(text): + text = ''.join([char.lower() for char in text if char not in string.punctuation]) + tokens = nltk.word_tokenize(text) + return [word for word in tokens if word not in stop_words] + +data['processed'] = data['sms'].apply(pre_process) + +# Word Categorization def categorize_words(): - ''' - Catagorizes each spam/non-spam word into a corresponding list - Repeating words in each list will help with categorizing - ''' - spam_words = [] - ham_words = [] - for sms in data['processed'][data['label'] == 'spam']: - for word in sms: - spam_words.append(word) - for sms in data['processed'][data['label'] == 'ham']: - for word in sms: - ham_words.append(word) + spam_words = [word for sms in data['processed'][data['label'] == 'spam'] for word in sms] + ham_words = [word for sms in data['processed'][data['label'] == 'ham'] for word in sms] return spam_words, ham_words -def predict(user_input): - spam_counter = 0 - ham_counter = 0 - - #add text colour : ham is green, spam is red - red = [220,50,50] - green = [100,220,50] - - for word in user_input: - spam_counter += spam_words.count(word) - ham_counter += ham_words.count(word) - - if ham_counter > spam_counter: - #adding accuracy - certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) - return 'message is not spam, with {}% certainty'.format(certainty), green - elif spam_counter > ham_counter: - certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) - return 'message is spam, with {}% certainty'.format(certainty), red - else: - return 'message could be spam, with 50% certainty', [255,255,255] - -def pre_process(sms): - ''' - Remove punctuation and stop words from the custom sms - ''' - remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation]) - tokenize = nltk.tokenize.word_tokenize(remove_punct) - remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')] - return remove_stop_words - -data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"]) -data['processed'] = data['sms'].apply(lambda x: pre_process(x)) - -#creating lists to store spam/non-spam associated words and their instances spam_words, ham_words = categorize_words() + +# Prediction +COLOR_SPAM = [220, 50, 50] +COLOR_HAM = [100, 220, 50] +COLOR_UNKNOWN = [255, 255, 255] + +def predict(input_words): + spam_score = sum(spam_words.count(word) for word in input_words) + ham_score = sum(ham_words.count(word) for word in input_words) + total = spam_score + ham_score + + if total == 0: + return "message could be spam, with 50% certainty", COLOR_UNKNOWN + + certainty = round((max(spam_score, ham_score) / total) * 100, 2) + if spam_score > ham_score: + return f"message is spam, with {certainty}% certainty", COLOR_SPAM + else: + return f"message is not spam, with {certainty}% certainty", COLOR_HAM + +# Summary & Analysis +def get_summary(words): + return { + "word_count": len(words), + "unique_words": len(set(words)), + "spam_score": round(100 * len([w for w in words if w in spam_words]) / len(words), 2) if words else 0, + "frequent_words": Counter(words).most_common(5) + } + +def show_summary(words): + summary = get_summary(words) + add_spacing(4) + add_text(f"Word Count: {summary['word_count']}", color=[100, 100, 255]) + add_text(f"Unique Words: {summary['unique_words']}", color=[100, 255, 100]) + add_text(f"Spam Score: {summary['spam_score']}%", color=[255, 165, 0]) + add_text("Most Frequent Words:", color=[0, 191, 255]) + for word, freq in summary['frequent_words']: + add_text(f"- {word}: {freq} times", bullet=True) + +# Input/Output Utilities +def clear_input(): + set_value("Input", "") + +def save_input(): + with open("saved_input.txt", "a") as f: + f.write(f"[{datetime.datetime.now()}] {get_value('Input')}\n") + +def export_summary(): + words = pre_process(get_value("Input")) + summary = get_summary(words) + with open("summary_report.txt", "a") as f: + f.write("Summary Report\n====================\n") + f.write(f"Word Count: {summary['word_count']}\n") + f.write(f"Unique Words: {summary['unique_words']}\n") + f.write(f"Spam Score: {summary['spam_score']}%\n") + f.write("Most Frequent Words:\n") + for word, count in summary['frequent_words']: + f.write(f"- {word}: {count} times\n") + f.write("\n") + +def show_dataset_stats(): + with window("Dataset Summary", width=480, height=250): + total = len(data) + spam_total = len(data[data['label'] == 'spam']) + ham_total = len(data[data['label'] == 'ham']) + add_text("Dataset Stats", color=[255, 255, 0]) + add_text(f"Total Messages: {total}") + add_text(f"Spam: {spam_total} ({round(100*spam_total/total,2)}%)") + add_text(f"Ham: {ham_total} ({round(100*ham_total/total,2)}%)") + add_text(f"Unique Words: {len(set(spam_words + ham_words))}") + +def show_help(): + with window("Help", width=460, height=220): + add_text("How to Use the SMS Spam Filter", color=[255, 255, 102]) + add_text("- Enter an SMS message in the input box.") + add_text("- Click 'Check' to analyze.") + add_text("- Use buttons below to clear, save, or export.") + add_text("- View dataset insights or get help anytime.") + +# Callback + +def check_message_callback(sender, data): + message = get_value("Input") + words = pre_process(message) + result, color = predict(words) + with window("SMS Spam Filter Result"): + add_text(result, color=color) + show_summary(words) + +# GUI Setup +set_main_window_size(560, 770) +set_theme("Gold") +set_global_font_scale(1.25) + +with window("SMS Spam Filter", width=540, height=750): + set_window_pos("SMS Spam Filter", 0, 0) + add_drawing("Logo", width=520, height=240) + add_text("Enter an SMS message to check if it's spam.", color=[232, 163, 33]) + add_input_text("Input", width=480, default_value="Type message here!") + add_button("Check", callback=check_message_callback) + add_spacing(count=4) + add_button("Clear Input", callback=clear_input) + add_button("Save Input", callback=save_input) + add_button("Export Summary", callback=export_summary) + add_button("Show Dataset Stats", callback=show_dataset_stats) + add_button("Help", callback=show_help) + +draw_image("Logo", LOGO_IMAGE, [0, 240]) + +start_dearpygui() +print("GUI Closed")