From bfa87980ed0cc73f2a72cbc2dfc051d50699dc6c Mon Sep 17 00:00:00 2001 From: Samuel Miserendino Date: Mon, 31 Mar 2025 22:06:39 -0700 Subject: [PATCH 1/2] add utility to download videos --- utils/download_videos.py | 62 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 utils/download_videos.py diff --git a/utils/download_videos.py b/utils/download_videos.py new file mode 100644 index 00000000..88afad0e --- /dev/null +++ b/utils/download_videos.py @@ -0,0 +1,62 @@ +import os +import re +import requests + +def fetch_issue(issue_number): + url = f"https://api.github.com/repos/Expensify/App/issues/{issue_number}" + headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "ExpensifyVideoDownloader" + } + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.json() + except Exception as e: + print(f"Error fetching issue {issue_number}: {e}") + return None + +def download_issue_videos(issue): + title = issue.get('title', '') + body = issue.get('body', '') + + print("Issue Title:", title) + print("Issue Body:", body) + + video_urls = re.findall(r'(https?://[^\s]+?\.(?:mp4|mov))', body) + if not video_urls: + print("No .mp4 or .mov files found in the issue body.") + return + + issue_id = str(issue.get('number') or issue.get('id', 'unknown')) + destination_dir = os.path.join("issue_videos", issue_id) + os.makedirs(destination_dir, exist_ok=True) + + for url in video_urls: + video_name = os.path.basename(url.split('?')[0]) + destination_path = os.path.join(destination_dir, video_name) + print(f"Downloading {url} to {destination_path} ...") + + try: + video_response = requests.get(url, stream=True) + video_response.raise_for_status() + with open(destination_path, 'wb') as out_file: + for chunk in video_response.iter_content(chunk_size=8192): + if chunk: + out_file.write(chunk) + print(f"Downloaded {video_name} successfully.") + except Exception as e: + print(f"Error downloading {url}: {e}") + +def fetch_and_download_issue_videos(issue_number): + issue = fetch_issue(issue_number) + if issue is not None: + download_issue_videos(issue) + +if __name__ == '__main__': + import sys + if len(sys.argv) < 2: + print("Usage: python utils/download_videos.py ") + else: + issue_number = sys.argv[1] + fetch_and_download_issue_videos(issue_number) From a4b2e5cdfe592d7280bd7cb7c7146f8f4f879df6 Mon Sep 17 00:00:00 2001 From: Samuel Miserendino Date: Mon, 31 Mar 2025 22:08:22 -0700 Subject: [PATCH 2/2] update readme --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 197cdaac..08765c62 100644 --- a/README.md +++ b/README.md @@ -183,3 +183,9 @@ For a complete example of a ComputerInterface implementation, you can refer to t url={https://arxiv.org/abs/2502.12115}, } ``` + +## Utilities + +We include the following utilities to facilitate future research: + +- `download_videos.py` allows you to download the videos attached to an Expensify GitHub issue if your model supports video input \ No newline at end of file