Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions scripts/artifacts/takeoutMyActivity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Module Description: Parses Google Takeout My Activity HTML files
# Author: Gemini
# Artifact version: 1.0
# Requirements: none

import os

from scripts.artifact_report import ArtifactHtmlReport
from scripts.ilapfuncs import logfunc, tsv, timeline, is_platform_windows, media_to_html

def get_takeoutMyActivityHtml(files_found, report_folder, seeker, wrap_text):

for file_found in files_found:
file_found = str(file_found)

# Extract service name from path: .../My Activity/Service Name/MyActivity.html
path_parts = os.path.normpath(file_found).split(os.sep)
try:
my_activity_index = path_parts.index('My Activity')
service_name = path_parts[my_activity_index + 1]
except (ValueError, IndexError):
service_name = "Unknown"

with open(file_found, encoding = 'utf-8', mode = 'r') as f:
data = f.read()

data_list = [(data,)]

if data_list:
report = ArtifactHtmlReport(f'Google Takeout - My Activity - {service_name}')
description = f'MyActivity.html file for the {service_name} service.'
report.start_artifact_report(report_folder, f'My Activity - {service_name}', description)
report.add_script()
data_headers = ('HTML File',)

report.write_artifact_data_table(data_headers, data_list, file_found, html_no_escape=['HTML File'])
report.end_artifact_report()

else:
logfunc(f'No Google data for {service_name}')

__artifacts_v2__ = {
"takeoutMyActivity": {
"name": "Google Takeout My Activity",
"description": "Parses and displays MyActivity.html files from Google Takeout for various services (e.g., Ads, Chrome, YouTube).",
"author": "Gemini",
"version": "1.2",
"date": "2025-07-21",
"requirements": "none",
"category": "Google Takeout Archive",
"notes": "This artifact embeds the original HTML file from the Takeout into the report, allowing for manual review. It creates a separate report for each service's MyActivity.html file found.",
"paths": ('*/My Activity/*/MyActivity.html',),
"function": "get_takeoutMyActivityHtml"
}
}
142 changes: 142 additions & 0 deletions scripts/artifacts/takeoutSearchContributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Module Description: Parses Google Search Contributions from Takeout
# Author: Gemini (tested on real Takeouts)
# Date: 2025-07-21
# Artifact version: 1.0
# Requirements: none

import datetime
import json
import os

from scripts.artifact_report import ArtifactHtmlReport
from scripts.ilapfuncs import logfunc, tsv, timeline, is_platform_windows

def get_takeoutSearchContributions(files_found, report_folder, seeker, wrap_text):

streaming_providers = []
reviews = []
watched = []
thumbs = []

for file_found in files_found:
file_found = str(file_found)
filename = os.path.basename(file_found)

with open(file_found, encoding='utf-8', mode='r') as f:
try:
data = json.loads(f.read())
except json.JSONDecodeError:
logfunc(f'Error decoding JSON from file: {filename}')
continue

if filename == 'Streaming video providers.json':
for item in data:
provider_name = item.get('Provider Name', '')
published = item.get('Published', '').replace('T', ' ').replace('Z', '')
streaming_providers.append((published, provider_name))

elif filename == 'Reviews.json':
for item in data:
published = item.get('Published', '').replace('T', ' ').replace('Z', '')
updated = item.get('Updated', '').replace('T', ' ').replace('Z', '')
comment = item.get('Review Comment', '')
rating = item.get('Review Star Rating', '')
query = item.get('Search Query', '')
reviews.append((published, updated, query, rating, comment))

elif filename == 'Watched.json':
for item in data:
published = item.get('Published', '').replace('T', ' ').replace('Z', '')
query = item.get('Search Query', '')
watched.append((published, query))

elif filename == 'Thumbs.json':
for item in data:
published = item.get('Published', '').replace('T', ' ').replace('Z', '')
updated = item.get('Updated', '').replace('T', ' ').replace('Z', '')
query = item.get('Search Query', '')
rating = item.get('Thumbs Rating', '')
thumbs.append((published, updated, query, rating))

if streaming_providers:
description = 'User-reported information about streaming providers that the user is subscribed to.'
report = ArtifactHtmlReport('Google Search Contributions - Streaming Providers')
report.start_artifact_report(report_folder, 'Google Search Contributions - Streaming Providers', description)
report.add_script()
data_headers = ('Published Timestamp', 'Provider Name')
report.write_artifact_data_table(data_headers, streaming_providers, files_found[0])
report.end_artifact_report()

tsvname = 'Google Search Contributions - Streaming Providers'
tsv(report_folder, data_headers, streaming_providers, tsvname)

tlactivity = 'Google Search Contributions - Streaming Providers'
timeline(report_folder, tlactivity, streaming_providers, data_headers)
else:
logfunc('No Google Search Contributions - Streaming Providers data available')

if reviews:
description = 'Reviews for movies, TV shows, music albums, etc.'
report = ArtifactHtmlReport('Google Search Contributions - Reviews')
report.start_artifact_report(report_folder, 'Google Search Contributions - Reviews', description)
report.add_script()
data_headers = ('Published Timestamp', 'Updated Timestamp', 'Search Query', 'Star Rating', 'Comment')
report.write_artifact_data_table(data_headers, reviews, files_found[0])
report.end_artifact_report()

tsvname = 'Google Search Contributions - Reviews'
tsv(report_folder, data_headers, reviews, tsvname)

tlactivity = 'Google Search Contributions - Reviews'
timeline(report_folder, tlactivity, reviews, data_headers)
else:
logfunc('No Google Search Contributions - Reviews data available')

if watched:
description = 'Movies and TV shows that the user reported as already watched.'
report = ArtifactHtmlReport('Google Search Contributions - Watched')
report.start_artifact_report(report_folder, 'Google Search Contributions - Watched', description)
report.add_script()
data_headers = ('Published Timestamp', 'Search Query')
report.write_artifact_data_table(data_headers, watched, files_found[0])
report.end_artifact_report()

tsvname = 'Google Search Contributions - Watched'
tsv(report_folder, data_headers, watched, tsvname)

tlactivity = 'Google Search Contributions - Watched'
timeline(report_folder, tlactivity, watched, data_headers)
else:
logfunc('No Google Search Contributions - Watched data available')

if thumbs:
description = 'Thumb ratings for movies, TV shows, music albums, etc.'
report = ArtifactHtmlReport('Google Search Contributions - Thumbs')
report.start_artifact_report(report_folder, 'Google Search Contributions - Thumbs', description)
report.add_script()
data_headers = ('Published Timestamp', 'Updated Timestamp', 'Search Query', 'Thumbs Rating')
report.write_artifact_data_table(data_headers, thumbs, files_found[0])
report.end_artifact_report()

tsvname = 'Google Search Contributions - Thumbs'
tsv(report_folder, data_headers, thumbs, tsvname)

tlactivity = 'Google Search Contributions - Thumbs'
timeline(report_folder, tlactivity, thumbs, data_headers)
else:
logfunc('No Google Search Contributions - Thumbs data available')

__artifacts_v2__ = {
"takeoutSearchContributions": {
"name": "Google Search Contributions",
"description": "Parses Google Search Contributions from Takeout for reviews, watched content, streaming providers, and thumb ratings.",
"author": "Gemini",
"version": "1.0",
"date": "2025-07-21",
"requirements": "none",
"category": "Google Takeout Archive",
"notes": "",
"paths": ('*/Search Contributions/*.json',),
"function": "get_takeoutSearchContributions"
}
}