From 2d3a29bd0d8ec1cdb51a4bfb401a82fd7fd79b1f Mon Sep 17 00:00:00 2001 From: Shelby Potts Date: Wed, 23 Apr 2025 15:30:51 -0500 Subject: [PATCH 1/4] gutting all the match stuff, making match analytics into a class --- .env_example | 3 +- app/analytics/MatchAnalytics.py | 60 ++++++--- app/pages/MatchPage.py | 161 +------------------------ tests/analytics/test_MatchAnalytics.py | 152 +++++++++++++++++++++++ 4 files changed, 200 insertions(+), 176 deletions(-) create mode 100644 tests/analytics/test_MatchAnalytics.py diff --git a/.env_example b/.env_example index 5f5aa38..ae95262 100644 --- a/.env_example +++ b/.env_example @@ -1,5 +1,6 @@ -HOST=0.0.0.0 +HOST='0.0.0.0' PORT=8050 USER_FILE_PATH='file/path/user.json' +MATCH_FILE_PATH='file/path/matches.json' ASSETS_PATH='app/assets/' GEOLITE_DB_PATH='data/GeoLite2-City.mmdb' \ No newline at end of file diff --git a/app/analytics/MatchAnalytics.py b/app/analytics/MatchAnalytics.py index 71fd691..0770e54 100644 --- a/app/analytics/MatchAnalytics.py +++ b/app/analytics/MatchAnalytics.py @@ -1,12 +1,52 @@ import pandas as pd import re -import json +import json, os + +class MatchAnalytics: + def __init__(self): + self.match_file_path = os.environ.get("MATCH_FILE_PATH") + + if self.match_file_path is None: + raise Exception("MATCH_FILE_PATH environment varviable is not set.") + + if '.json' not in self.match_file_path: + raise Exception("The match file needs to be a JSON file.") + + with open(self.match_file_path, 'r') as file: + match_data = json.load(file) + self.match_data = match_data + + def get_match_data(self): + all_matches = [] + for entry in self.match_data: + matches = entry.get("match", []) + all_matches.extend(matches) + return all_matches + + def get_block_data(self): + all_blocks = [] + for entry in self.match_data: + blocks = entry.get("block", []) + all_blocks.extend(blocks) + return all_blocks + + def get_likes_data(self): + all_likes = [] + for entry in self.match_data: + likes = entry.get("like", []) + all_likes.extend(likes) + return all_likes + + def get_chat_data(self): + # TODO: this is actually getting each message, do we want that? + all_chats = [] + for entry in self.match_data: + chats = entry.get("chats", []) + all_chats.extend(chats) + return all_chats def prepare_uploaded_match_data(file_path="../data/app_uploaded_files/matches.json"): - __validate_upload_file_type(file_path) - __validate_match_file_upload(file_path) - with open(file_path, 'r') as file: # match upload data is a list of dictionaries match_upload_data = json.load(file) @@ -119,14 +159,4 @@ def __build_comments_list(events): if like_event.get('comment') is not None: likes_w_comments.append(like_event.get('comment')) - return likes_w_comments - - -def __validate_upload_file_type(file_path): - if not file_path.endswith('.json'): - raise ValueError("Invalid file type. Please upload a JSON file.") - - -def __validate_match_file_upload(file_path): - if 'match' not in file_path: - raise ValueError("Invalid file name. Please upload a file with 'match' in the file name.") + return likes_w_comments \ No newline at end of file diff --git a/app/pages/MatchPage.py b/app/pages/MatchPage.py index 83c0fc3..f142c49 100644 --- a/app/pages/MatchPage.py +++ b/app/pages/MatchPage.py @@ -1,168 +1,9 @@ from dash import html import dash_mantine_components as dmc -from dash import dcc, dash_table, Input, Output, callback -import plotly.express as px -from dash.exceptions import PreventUpdate -import analytics.MatchAnalytics as ma - - -global normalized_events - - -def serve_layout(): - return html.Div([ - # TODO: need to remove the button, but it has dependencies with the charts so will leave it until I can redo the charts - html.Button('Reload Graphs', id='refresh-page', style={"fontSize": 16, 'font-family': "Open Sans, verdana, arial, sans-serif"}), - dmc.Space(h=20), +layout = html.Div([ dmc.Text("Match Analytics", align="center", style={"fontSize": 28}, weight=500), dmc.Text("This section reveals patterns in the user's matching behavior, preferences, and key factors that influence successful connections with potential matches."), dmc.Space(h=20), - - # funnel graph showing breakdown of interactions - dmc.Text("Interaction Funnel", size="xl", align="left", weight=500), - dmc.Text("This funnel represents the funnel of your interactions with people on Hinge. The outermost layer " - "represents the total number of people you interacted with. Then it shows the number of outgoing likes " - "you sent, matches received, and conversations started from those matches.", align="left"), - html.Div([ - dcc.Graph(id='live-update-graph'), - ]), - - # side by side pie charts drilling into specifics of outgoing likes - dmc.Text("Outgoing Likes You've Sent", size="xl", align="left", weight=500), - dmc.Text("This is a deep dive into your outgoing likes. The pie chart on the left shows a breakdown of the rare" - " cases where Hinge shows you a users you have already sent an outgoing like to vs the users you liked" - " once. The pie chart on the right shows how many outgoing likes you sent where you left a comment on the" - " other person's profile.", align="left"), - html.Div(className='row', children=[ - html.Div(className='six columns', children=[ - dcc.Graph(id="live-update-double-likes-graph", style={'width': '50%', 'display': 'inline-block'}), - dcc.Graph(id="live-update-commented-likes-graph", style={'width': '50%', 'display': 'inline-block'}) - ]), - ]), - - # table showing like comments - dmc.Text("What You're Commenting When You Like Someone's Content", size="xl", align="left", weight=500), - html.Div([ - dash_table.DataTable(id='datatable-interactivity'), - html.Div(id='datatable-interactivity-container'), - ]), - - # line chart showing activity type frequencies by day - dmc.Text("Frequency of Action Types by Day", size="xl", align="left", weight=500), - dmc.Text("This line graph displays the counts of each action type (likes, matches, chats, and blocks aka unmatches)" - " per day over the duration of time you have been on Hinge. The legend on the right lists each of the" - " different action types, and you can select/ unselect different types to look at particular ones.", - align="left"), - dcc.Graph("live-update-action_types-graph"), - - # pie chart showing percentage of interactions with a phone number share - dmc.Text("How Many People Did You Give Your Number To?", size="xl", align="left", weight=500), - dmc.Text("This is the ratio of people you shared your phone number with out of the total number of people you " - "had chats with. This operates on the assumption you gave your phone number in a standard format, " - "ex: XXX-XXX-XXXX, XXXXXXXXXX, or (XXX)XXX-XXXX.", - align="left"), - dcc.Graph("live-update-number_shares-graph"), - - # histogram showing the number of outgoing messages in each chat - dmc.Text("Outgoing Messages Sent per Chat", size="xl", align="left", weight=500), - dmc.Text("This histogram shows the number of outgoing messages you sent in each chat.", - align="left"), - dcc.Graph("live-update-messages-per-chat-graph"), ]) - - -@callback( - Output('live-update-graph', 'figure'), - [Input('refresh-page', 'n_clicks')] -) -def update_graph_live(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - return px.funnel(ma.total_counts(normalized_events), x=ma.total_counts(normalized_events)["count"], - y=ma.total_counts(normalized_events)["action_type"], - labels={'y': 'interaction count'}) - - -@callback( - Output('live-update-double-likes-graph', 'figure'), - [Input('refresh-page', 'n_clicks')] -) -def update_double_likes_pie(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - return px.pie(ma.analyze_double_likes(normalized_events), values="Count", names="Like Frequency", - title="Number of Outgoing Likes per Person") - - -@callback( - Output('live-update-commented-likes-graph', 'figure'), - [Input('refresh-page', 'n_clicks')] -) -def update_commented_likes_pie(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - return px.pie(ma.like_comment_ratios(normalized_events), values="Count", names="Likes With/ Without Comments", - title="Outgoing Likes with Comments") - - -@callback( - Output('live-update-action_types-graph', 'figure'), - [Input('refresh-page', 'n_clicks')] -) -def update_action_types_graph(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - return px.line(ma.activity_by_date(normalized_events), - x=ma.activity_by_date(normalized_events)['activity_date'], - y=ma.activity_by_date(normalized_events)['count'], - color=ma.activity_by_date(normalized_events)['type'], - labels={'x': 'activity_date', 'y': 'count'}) - - -@callback( - Output('live-update-number_shares-graph', 'figure'), - [Input('refresh-page', 'n_clicks')] -) -def update_number_shares_graph(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - return px.pie(ma.phone_number_shares(normalized_events), values="Count", names="Message Outcomes") - - -@callback( - Output('live-update-messages-per-chat-graph', 'figure'), - [Input('refresh-page', 'n_clicks')] -) -def update_messages_per_chat_graph(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - return px.histogram(ma.date_count_distribution(normalized_events), x='outgoing_messages', nbins=50).update_layout(bargap=0.2) - - -@callback( - Output('datatable-interactivity-container', 'children'), - [Input('refresh-page', 'n_clicks')] -) -def update_comment_table(data): - __check_for_live_update_data(data) - __setup_global_norm_events() - commented_outgoing_likes_data = ma.commented_outgoing_likes(normalized_events).to_dict('records') - return [ - dash_table.DataTable(data=commented_outgoing_likes_data, page_size=10, - style_cell={'textAlign': 'left'}) - ] - - -layout = serve_layout() - - -def __setup_global_norm_events(file_path="../data/app_uploaded_files/matches.json"): - global normalized_events - normalized_events = ma.prepare_uploaded_match_data(file_path) - - -def __check_for_live_update_data(data): - if data is None: - raise PreventUpdate diff --git a/tests/analytics/test_MatchAnalytics.py b/tests/analytics/test_MatchAnalytics.py new file mode 100644 index 0000000..1669bbf --- /dev/null +++ b/tests/analytics/test_MatchAnalytics.py @@ -0,0 +1,152 @@ +import pytest, os, json +from unittest.mock import mock_open, patch + +from app.analytics.MatchAnalytics import MatchAnalytics + +######################################################################################### +# test values +######################################################################################### +MATCH_FILE_PATH = "fake/file/path/matches.json" +FIRST_MATCH_TIMESTAMP = '2005-04-23 14:53:01' +FIRST_BLOCK_TIMESTAMP = '2005-04-23 16:32:53' +FIRST_LIKE_TIMESTAMP = "2012-11-04 03:24:14" +FIRST_CHAT_MESSAGE = "Hey there!" +MATCH_DATA = ''' +[ + { + "match": [ + { + "timestamp": "2005-04-23 14:53:01" + } + ], + "chats": [ + { + "body": "Hey there!", + "timestamp": "2005-04-23 14:53:22" + } + ], + "block": [ + { + "block_type": "remove", + "timestamp": "2005-04-23 16:32:53" + } + ] + }, + { + "match": [ + { + "timestamp": "2006-08-06 23:08:31" + } + ], + "chats": [ + { + "body": "What's up?", + "timestamp": "2006-08-06 23:11:04" + } + ], + "block": [ + { + "block_type": "remove", + "timestamp": "2006-09-15 16:32:49" + } + ] + }, + { + "match": [ + { + "timestamp": "2013-05-06 23:09:16" + } + ], + "chats": [ + { + "body": "Hi!", + "timestamp": "2013-05-06 23:09:52" + }, + { + "body": "Here's another message", + "timestamp": "2013-05-09 02:41:05" + }, + { + "body": "And another message!", + "timestamp": "2013-05-10 12:27:21" + }, + { + "body": "And one last message", + "timestamp": "2013-05-10 12:27:00" + } + ], + "block": [ + { + "block_type": "remove", + "timestamp": "2013-06-15 16:32:45" + } + ], + "like": [ + { + "timestamp": "2012-11-04 03:24:14", + "like": [ + { + "timestamp": "2012-11-04 03:24:14" + } + ] + } + ] + } +] +''' +######################################################################################### +# pytest fixtures +######################################################################################### +@pytest.fixture +def match_analytics(monkeypatch): + monkeypatch.setenv("MATCH_FILE_PATH", MATCH_FILE_PATH) + + with patch("builtins.open", mock_open(read_data=MATCH_DATA)) as mock_file, \ + patch("json.load", return_value=json.loads(MATCH_DATA)) as mock_json_load: + + match_analytics = MatchAnalytics() + return match_analytics + +######################################################################################### +# unit tests +######################################################################################### +def test_exists(match_analytics): + assert match_analytics is not None + +def test_match_file_path_not_set(): + if "MATCH_FILE_PATH" in os.environ: + del os.environ["MATCH_FILE_PATH"] + + with pytest.raises(Exception, match="MATCH_FILE_PATH environment varviable is not set."): + MatchAnalytics() + +def test_match_file_not_json(): + os.environ["MATCH_FILE_PATH"] = "invalid_file.txt" + + with pytest.raises(Exception, match="The match file needs to be a JSON file."): + MatchAnalytics() + +def test_loads_match_data(match_analytics): + assert isinstance(match_analytics.match_data, list) + assert len(match_analytics.match_data) == 3 # 3 test matches + +def test_get_matches(match_analytics): + matches = match_analytics.get_match_data() + assert len(matches) == 3 + assert matches[0].get("timestamp") == FIRST_MATCH_TIMESTAMP + +def test_get_blocks(match_analytics): + blocks = match_analytics.get_block_data() + assert len(blocks) == 3 + assert blocks[0].get("timestamp") == FIRST_BLOCK_TIMESTAMP + assert blocks[0].get("block_type") == "remove" + +def test_get_likes(match_analytics): + likes = match_analytics.get_likes_data() + assert len(likes) == 1 + assert likes[0].get("timestamp") == FIRST_LIKE_TIMESTAMP + +def test_get_chats(match_analytics): + chats = match_analytics.get_chat_data() + assert len(chats) == 6 + assert chats[0].get("body") == FIRST_CHAT_MESSAGE From 1c9e2b7ce42623d32b9b3e573fe176fac7ca3d24 Mon Sep 17 00:00:00 2001 From: Shelby Potts Date: Wed, 23 Apr 2025 20:18:11 -0500 Subject: [PATCH 2/4] added boxplot for message counts --- app/analytics/MatchAnalytics.py | 132 ++++--------------------- app/pages/MatchPage.py | 42 +++++++- tests/analytics/test_MatchAnalytics.py | 36 ++++--- 3 files changed, 80 insertions(+), 130 deletions(-) diff --git a/app/analytics/MatchAnalytics.py b/app/analytics/MatchAnalytics.py index 0770e54..22fc13b 100644 --- a/app/analytics/MatchAnalytics.py +++ b/app/analytics/MatchAnalytics.py @@ -1,6 +1,7 @@ import pandas as pd import re import json, os +from datetime import datetime, timedelta class MatchAnalytics: def __init__(self): @@ -38,125 +39,26 @@ def get_likes_data(self): return all_likes def get_chat_data(self): - # TODO: this is actually getting each message, do we want that? all_chats = [] for entry in self.match_data: chats = entry.get("chats", []) all_chats.extend(chats) return all_chats + def get_message_count_last_12_months(self): + now = datetime.now() + one_year_ago = now - timedelta(days=365) -def prepare_uploaded_match_data(file_path="../data/app_uploaded_files/matches.json"): - with open(file_path, 'r') as file: - # match upload data is a list of dictionaries - match_upload_data = json.load(file) - - events = [] - for interaction, all_actions in enumerate(match_upload_data): - # action type is like, match, chats, blocks, overarching "action" - for action_type, actions in all_actions.items(): - # action is the metadata assoc. one event of the action type - for action in actions: - action["interaction_id"] = interaction - events.append(action) - - return pd.DataFrame(events).sort_values("timestamp") - - -def date_count_distribution(events): - chat_events = events[events["type"] == "chats"] - chats_per_interaction = chat_events.groupby('interaction_id').size() - - # convert the Series to a DataFrame with specified column names, have to reset the index - interaction_counts = chats_per_interaction.to_frame().reset_index() - interaction_counts.columns = ['interaction_id', 'outgoing_messages'] - return interaction_counts - - -def activity_by_date(events): - events['activity_date'] = pd.DatetimeIndex(events["timestamp"]).date - - event_type_counts_by_date = events.groupby(['activity_date', 'type']).size() - - # creating a DataFrame from the Series, have to reset the index - action_type_freq_per_day = pd.DataFrame(event_type_counts_by_date).reset_index() - action_type_freq_per_day.columns = ['activity_date', 'type', 'count'] - - return action_type_freq_per_day - - -def analyze_double_likes(events): - like_events = events[events["type"] == "like"] - multi_like_event_count = len(like_events.groupby('interaction_id').filter(lambda x: len(x) > 1)) - single_like_event_count = len(like_events) - multi_like_event_count - - single_vs_double_likes = pd.DataFrame( - [['Single Like', single_like_event_count], ['Multiple Likes', multi_like_event_count]], - columns=["Like Frequency", "Count"]) - - return single_vs_double_likes - - -def total_counts(events): - distinct_interaction_count = len(pd.unique(events['interaction_id'])) - like_event_count = len(events[events['type'] == "like"]) - match_event_count = len(events[events['type'] == "match"]) - - chat_events = events[events['type'] == "chats"] - chat_event_count = len(chat_events.interaction_id.unique()) - - totals = pd.DataFrame( - [['Distinct Interactions', distinct_interaction_count], ['Outgoing Likes', like_event_count], - ['Matches', match_event_count], - ['Chats', chat_event_count]], - columns=["action_type", "count"]) - return totals - - -def commented_outgoing_likes(events): - likes_w_comments = __build_comments_list(events) - - return pd.DataFrame(likes_w_comments, columns=["Comments"]) - - -def like_comment_ratios(events): - likes_w_comments = __build_comments_list(events) - likes_wo_comment = len(events) - len(likes_w_comments) - - likes_w_wo_comments = pd.DataFrame( - [['Likes with Comments', len(likes_w_comments)], ['Likes without Comments', likes_wo_comment]], - columns=["Likes With/ Without Comments", "Count"]) - return likes_w_wo_comments - - -def phone_number_shares(events): - chats_w_messages = events.where(events["type"] == "chats") - chats_w_messages = chats_w_messages[chats_w_messages['body'].notna()] - total_messages_w_chats = len(chats_w_messages) - - message_bodies = chats_w_messages['body'] - - phone_number_shared = [] - for message in message_bodies: - # finds common phone number formats in the message: XXX-XXX-XXXX, XXX.XXX.XXXX, (XXX) XXX-XXXX - message_containing_number = re.findall(r"\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}", message) - - if len(message_containing_number) >= 1: - phone_number_shared.append(message_containing_number) - - phone_number_share_ratios = pd.DataFrame([['Gave Phone Number', len(phone_number_shared)], - ['Did Not Give Phone Number', total_messages_w_chats]], - columns=["Message Outcomes", "Count"]) - return phone_number_share_ratios - -def __build_comments_list(events): - likes_w_comments = [] - like_events = events["like"].dropna() - for value in like_events: - # likes are an array with a single element (most of the time) - # TODO: handle multiple likes, it's rare but possible - like_event = value[0] - if like_event.get('comment') is not None: - likes_w_comments.append(like_event.get('comment')) - - return likes_w_comments \ No newline at end of file + msg_counts_per_month = [] + for entry in self.match_data: + match = entry.get("match", []) + chats = entry.get("chats", []) + if match: + match_time = datetime.fromisoformat(match[0]["timestamp"]) + if match_time >= one_year_ago: + month = match_time.strftime("%Y-%m") + msg_counts_per_month.append({ + "month": month, + "message_count": len(chats) + }) + return msg_counts_per_month diff --git a/app/pages/MatchPage.py b/app/pages/MatchPage.py index f142c49..c3d11ad 100644 --- a/app/pages/MatchPage.py +++ b/app/pages/MatchPage.py @@ -1,9 +1,49 @@ -from dash import html +from dash import html, dcc +import pandas as pd import dash_mantine_components as dmc +import plotly.express as px + +from analytics.MatchAnalytics import MatchAnalytics + +def message_counts_boxplot(): + message_counts = MatchAnalytics().get_message_count_last_12_months() + + df = pd.DataFrame(message_counts) + # change the month to a date so we can sort it and then convert it back to a string + df["month"] = pd.to_datetime(df["month"]) + df = df.sort_values("month") + df["month"] = df["month"].dt.strftime("%Y-%m") + + fig = px.box( + df, + x="month", + y="message_count", + height=600, # increase height + # title="Message Counts per Match by Month (Last 12 Months)", + labels={"month": "Month", "message_count": "Number of Messages"}, + points="all" # show individual data points too + ) + fig.update_layout(xaxis_tickangle=-45) + + return dmc.Card( + children=[ + dmc.Space(h=10), + dmc.Text("Message Count Variability by Month (Last 12 Months)", weight=700, size="xl"), + dmc.Space(h=10), + dmc.Text("This box plot shows how the number of messages exchanged per match varies across each month over the past year. Each box represents the distribution of message counts for matches that had at least one message in that month. The plot highlights patterns in user engagement, such as which months tend to have higher or lower activity, and reveals any outliers — matches with unusually high or low message counts. This can be useful for identifying seasonal trends or behavioral shifts in how users interact over time.", size="md"), + dmc.Space(h=10), + dcc.Graph(figure=fig) + ], + # withBorder=True, + shadow="sm", + radius="md", + style={"height": "750px"}, + ) layout = html.Div([ dmc.Text("Match Analytics", align="center", style={"fontSize": 28}, weight=500), dmc.Text("This section reveals patterns in the user's matching behavior, preferences, and key factors that influence successful connections with potential matches."), dmc.Space(h=20), + message_counts_boxplot() ]) diff --git a/tests/analytics/test_MatchAnalytics.py b/tests/analytics/test_MatchAnalytics.py index 1669bbf..eedd143 100644 --- a/tests/analytics/test_MatchAnalytics.py +++ b/tests/analytics/test_MatchAnalytics.py @@ -16,77 +16,77 @@ { "match": [ { - "timestamp": "2005-04-23 14:53:01" + "timestamp": "2025-04-23 14:53:01" } ], "chats": [ { "body": "Hey there!", - "timestamp": "2005-04-23 14:53:22" + "timestamp": "2025-04-23 14:53:22" } ], "block": [ { "block_type": "remove", - "timestamp": "2005-04-23 16:32:53" + "timestamp": "2025-04-23 16:32:53" } ] }, { "match": [ { - "timestamp": "2006-08-06 23:08:31" + "timestamp": "2025-03-06 23:08:31" } ], "chats": [ { "body": "What's up?", - "timestamp": "2006-08-06 23:11:04" + "timestamp": "2025-03-06 23:11:04" } ], "block": [ { "block_type": "remove", - "timestamp": "2006-09-15 16:32:49" + "timestamp": "2025-03-15 16:32:49" } ] }, { "match": [ { - "timestamp": "2013-05-06 23:09:16" + "timestamp": "2025-04-06 23:09:16" } ], "chats": [ { "body": "Hi!", - "timestamp": "2013-05-06 23:09:52" + "timestamp": "2025-04-06 23:09:52" }, { "body": "Here's another message", - "timestamp": "2013-05-09 02:41:05" + "timestamp": "2025-04-09 02:41:05" }, { "body": "And another message!", - "timestamp": "2013-05-10 12:27:21" + "timestamp": "2025-04-10 12:27:21" }, { "body": "And one last message", - "timestamp": "2013-05-10 12:27:00" + "timestamp": "2025-04-10 12:27:00" } ], "block": [ { "block_type": "remove", - "timestamp": "2013-06-15 16:32:45" + "timestamp": "2025-04-15 16:32:45" } ], "like": [ { - "timestamp": "2012-11-04 03:24:14", + "timestamp": "2025-03-04 03:24:14", "like": [ { - "timestamp": "2012-11-04 03:24:14" + "timestamp": "2025-03-04 03:24:14" } ] } @@ -150,3 +150,11 @@ def test_get_chats(match_analytics): chats = match_analytics.get_chat_data() assert len(chats) == 6 assert chats[0].get("body") == FIRST_CHAT_MESSAGE + +def test_get_message_count_last_12_months(match_analytics): + message_counts = match_analytics.get_message_count_last_12_months() + print(message_counts) + assert message_counts is not None + assert len(message_counts) == 3 + assert message_counts[2].get("month") == "2025-04" + assert message_counts[2].get("message_count") == 4 \ No newline at end of file From d22522770fa851e2b2e8ba6b39aae31a8fdb837c Mon Sep 17 00:00:00 2001 From: Shelby Potts Date: Wed, 23 Apr 2025 21:10:01 -0500 Subject: [PATCH 3/4] adds histograms and scatter plots --- app/analytics/MatchAnalytics.py | 59 ++++++++++++++++++++- app/pages/MatchPage.py | 92 +++++++++++++++++++++++++++++++-- 2 files changed, 145 insertions(+), 6 deletions(-) diff --git a/app/analytics/MatchAnalytics.py b/app/analytics/MatchAnalytics.py index 22fc13b..6d6a165 100644 --- a/app/analytics/MatchAnalytics.py +++ b/app/analytics/MatchAnalytics.py @@ -1,5 +1,3 @@ -import pandas as pd -import re import json, os from datetime import datetime, timedelta @@ -62,3 +60,60 @@ def get_message_count_last_12_months(self): "message_count": len(chats) }) return msg_counts_per_month + + def get_response_latency(self): + latency_data = [] + for entry in self.match_data: + match = entry.get("match", []) + chats = entry.get("chats", []) + + if match and chats: + match_time = datetime.fromisoformat(match[0]["timestamp"]) + first_message_time = datetime.fromisoformat(chats[0]["timestamp"]) + latency = (first_message_time - match_time).total_seconds() / (3600 * 24) + + latency_data.append({ + "match_time": match_time, + "first_message_time": first_message_time, + "latency_days": latency + }) + return latency_data + + def get_match_durations(self): + durations = [] + for entry in self.match_data: + match = entry.get("match", []) + block = entry.get("block", []) + + if match and block: + match_time = datetime.fromisoformat(match[0]["timestamp"]) + block_time = datetime.fromisoformat(block[0]["timestamp"]) + duration_days = (block_time - match_time).days + + durations.append({ + "match_time": match_time, + "block_time": block_time, + "duration_days": duration_days + }) + return durations + + def get_match_removal_v_count_scatter_data(self): + records = [] + + for entry in self.match_data: + match_time = entry.get("match", [{}])[0].get("timestamp") + block_time = entry.get("block", [{}])[0].get("timestamp") + chats = entry.get("chats", []) + + if match_time and block_time: + match_dt = datetime.fromisoformat(match_time) + block_dt = datetime.fromisoformat(block_time) + delta_days = (block_dt - match_dt).days + message_count = len(chats) + + records.append({ + "message_count": message_count, + "duration_days": delta_days + }) + + return records \ No newline at end of file diff --git a/app/pages/MatchPage.py b/app/pages/MatchPage.py index c3d11ad..16d5c13 100644 --- a/app/pages/MatchPage.py +++ b/app/pages/MatchPage.py @@ -5,8 +5,10 @@ from analytics.MatchAnalytics import MatchAnalytics +match_analytics = MatchAnalytics() + def message_counts_boxplot(): - message_counts = MatchAnalytics().get_message_count_last_12_months() + message_counts = match_analytics.get_message_count_last_12_months() df = pd.DataFrame(message_counts) # change the month to a date so we can sort it and then convert it back to a string @@ -19,7 +21,6 @@ def message_counts_boxplot(): x="month", y="message_count", height=600, # increase height - # title="Message Counts per Match by Month (Last 12 Months)", labels={"month": "Month", "message_count": "Number of Messages"}, points="all" # show individual data points too ) @@ -34,16 +35,99 @@ def message_counts_boxplot(): dmc.Space(h=10), dcc.Graph(figure=fig) ], - # withBorder=True, shadow="sm", radius="md", style={"height": "750px"}, ) +def response_latency_hist(): + latency_data = match_analytics.get_response_latency() + fig = px.histogram( + latency_data, + x="latency_days", + nbins=20, + labels={"latency_days": "Latency (days)"} + ) + return dmc.Card( + children=[ + dmc.Space(h=10), + dmc.Text("Response Latency between Match and First Message Sent", weight=700, size="xl"), + dmc.Space(h=10), + dmc.Text("This graph visualizes the response latency, or the time delay between when a match occurs and when the first message is sent." \ + "Shorter latencies may indicate higher levels of engagement or interest, while longer delays could suggest hesitation, lower enthusiasm, or forgotten matches.", size="md"), + dmc.Space(h=10), + dcc.Graph(figure=fig) + ], + shadow="sm", + radius="md", + style={"height": "520px"}, + ) + +def match_duration_hist(): + durations = match_analytics.get_match_durations() + + fig = px.histogram( + durations, + x="duration_days", + labels={"duration_days": "Duration (days)"} + ) + return dmc.Card( + children=[ + dmc.Space(h=10), + dmc.Text("Duration of Time Between Match and Remove", weight=700, size="xl"), + dmc.Space(h=10), + dmc.Text("This histogram visualizes the duration of a connection and when it was removed or blocked." \ + "Short durations might reflect mismatched expectations, ghosting, or immediate disinterest, while longer " \ + "durations may point to sustained conversations or lingering connections that eventually tapered off. ", size="md"), + dmc.Space(h=10), + dcc.Graph(figure=fig) + ], + shadow="sm", + radius="md", + style={"height": "520px"}, + ) + +def match_removal_count_scatter(): + match_rm_counts = pd.DataFrame(match_analytics.get_match_removal_v_count_scatter_data()) + + fig = px.scatter( + match_rm_counts, + x="message_count", + y="duration_days", + labels={ + "message_count": "Messages Exchanged", + "duration_days": "Days Between Match and Removal" + }, + opacity=0.7 + ) + fig.update_traces(marker=dict(size=10)) + + return dmc.Card( + children=[ + dmc.Space(h=10), + dmc.Text("Match Duration vs. Message Count", weight=700, size="xl"), + dmc.Space(h=10), + dmc.Text("This scatter plot explores the relationship between the number of messages exchanged in a match and the time until the match was removed or blocked. " \ + "Clusters near the bottom-left corner indicate 'early exits' — matches that were short-lived and involved little to no conversation, often pointing to ghosting or " \ + "instant disengagement. Conversely, matches in the top-right show more sustained interactions before ending.", size="md"), + dmc.Space(h=10), + dcc.Graph(figure=fig) + ], + shadow="sm", + radius="md", + style={"height": "600px"}, + ) + layout = html.Div([ dmc.Text("Match Analytics", align="center", style={"fontSize": 28}, weight=500), dmc.Text("This section reveals patterns in the user's matching behavior, preferences, and key factors that influence successful connections with potential matches."), dmc.Space(h=20), - message_counts_boxplot() + message_counts_boxplot(), + dmc.Space(h=20), + response_latency_hist(), + dmc.Space(h=20), + match_duration_hist(), + dmc.Space(h=20), + match_removal_count_scatter() ]) From bf4bb14cf10a42c54a01f128fa0b362d6705a87b Mon Sep 17 00:00:00 2001 From: Shelby Potts Date: Thu, 24 Apr 2025 09:42:23 -0500 Subject: [PATCH 4/4] test coverage for new hist helper methods --- app/analytics/MatchAnalytics.py | 2 +- app/{app.py => main.py} | 0 app/pages/MatchPage.py | 2 +- tests/analytics/test_MatchAnalytics.py | 34 ++++++++++++++++++++++---- 4 files changed, 31 insertions(+), 7 deletions(-) rename app/{app.py => main.py} (100%) diff --git a/app/analytics/MatchAnalytics.py b/app/analytics/MatchAnalytics.py index 6d6a165..ff9565a 100644 --- a/app/analytics/MatchAnalytics.py +++ b/app/analytics/MatchAnalytics.py @@ -97,7 +97,7 @@ def get_match_durations(self): }) return durations - def get_match_removal_v_count_scatter_data(self): + def get_match_rm_counts(self): records = [] for entry in self.match_data: diff --git a/app/app.py b/app/main.py similarity index 100% rename from app/app.py rename to app/main.py diff --git a/app/pages/MatchPage.py b/app/pages/MatchPage.py index 16d5c13..1919533 100644 --- a/app/pages/MatchPage.py +++ b/app/pages/MatchPage.py @@ -88,7 +88,7 @@ def match_duration_hist(): ) def match_removal_count_scatter(): - match_rm_counts = pd.DataFrame(match_analytics.get_match_removal_v_count_scatter_data()) + match_rm_counts = pd.DataFrame(match_analytics.get_match_rm_counts()) fig = px.scatter( match_rm_counts, diff --git a/tests/analytics/test_MatchAnalytics.py b/tests/analytics/test_MatchAnalytics.py index eedd143..fac9e41 100644 --- a/tests/analytics/test_MatchAnalytics.py +++ b/tests/analytics/test_MatchAnalytics.py @@ -1,5 +1,6 @@ import pytest, os, json from unittest.mock import mock_open, patch +from datetime import datetime from app.analytics.MatchAnalytics import MatchAnalytics @@ -7,9 +8,10 @@ # test values ######################################################################################### MATCH_FILE_PATH = "fake/file/path/matches.json" -FIRST_MATCH_TIMESTAMP = '2005-04-23 14:53:01' -FIRST_BLOCK_TIMESTAMP = '2005-04-23 16:32:53' -FIRST_LIKE_TIMESTAMP = "2012-11-04 03:24:14" +FIRST_MATCH_TIMESTAMP = '2025-04-23 14:53:01' +FIRST_CHAT_TIMESTAMP = "2025-04-23 14:53:22" +FIRST_BLOCK_TIMESTAMP = '2025-04-23 16:32:53' +FIRST_LIKE_TIMESTAMP = "2025-03-04 03:24:14" FIRST_CHAT_MESSAGE = "Hey there!" MATCH_DATA = ''' [ @@ -153,8 +155,30 @@ def test_get_chats(match_analytics): def test_get_message_count_last_12_months(match_analytics): message_counts = match_analytics.get_message_count_last_12_months() - print(message_counts) + # print(message_counts) assert message_counts is not None assert len(message_counts) == 3 assert message_counts[2].get("month") == "2025-04" - assert message_counts[2].get("message_count") == 4 \ No newline at end of file + assert message_counts[2].get("message_count") == 4 + +def test_get_response_latency(match_analytics): + latency_data = match_analytics.get_response_latency() + # print(latency_data) + assert latency_data[0].get("latency_days") == 0.00024305555555555555 + assert latency_data[0].get("match_time") == datetime.fromisoformat(FIRST_MATCH_TIMESTAMP) + assert latency_data[0].get("first_message_time") == datetime.fromisoformat(FIRST_CHAT_TIMESTAMP) + +def test_get_match_durations(match_analytics): + match_durations = match_analytics.get_match_durations() + # print(match_durations) + + assert len(match_durations) == 3 + assert isinstance(match_durations, list) + assert match_durations[2].get("duration_days") == 8 + +def test_get_match_rm_counts(match_analytics): + match_rm_counts = match_analytics.get_match_rm_counts() + + # print(match_rm_counts) + assert match_rm_counts[2].get("message_count") == 4 + assert match_rm_counts[2].get("duration_days") == 8 \ No newline at end of file