Use info.json get metadata of downloaded files

NotJoeMartinez · NotJoeMartinez · commit 58b538b7eeab · 2024-04-09T15:29:05.000-05:00
diff --git a/yt_fts/db_utils.py b/yt_fts/db_utils.py
@@ -80,14 +80,19 @@ def add_channel_info(channel_id, channel_name, channel_url):
 
 def add_video(channel_id, video_id,  video_title, video_url):
     
-    db = Database(get_db_path())
-
-    db["Videos"].insert({
-        "video_id": video_id,
-        "video_title": video_title,
-        "video_url": video_url,
-        "channel_id": channel_id
-    })
+        conn = sqlite3.connect(get_db_path())
+        cur = conn.cursor()
+        existing_video = cur.execute("SELECT * FROM Videos WHERE video_id = ?", 
+                                     (video_id,)).fetchone()
+
+        if existing_video is None:
+            cur.execute("INSERT INTO Videos (video_id, video_title, video_url, channel_id) VALUES (?, ?, ?, ?)",
+                        (video_id, video_title, video_url, channel_id))
+            conn.commit()
+
+        else:
+            print(f"{video_id} Video already exists in the database.")
+        conn.close()
 
 
 def add_subtitle(video_id, start_time, text):
diff --git a/yt_fts/download.py b/yt_fts/download.py
@@ -182,7 +182,7 @@ def get_vtt(tmp_dir, video_url, language):
     ])
 
 
-def vtt_to_db(channel_id, dir_path, s):
+def vtt_to_db(dir_path):
     """
     Iterates through all vtt files in the temp_dir, passes them to 
     the vtt parsing function, then inserts the data into the database.
@@ -205,9 +205,18 @@ def vtt_to_db(channel_id, dir_path, s):
 
     for vtt in track(file_paths, description="Adding subtitles to database..."):
         base_name = os.path.basename(vtt)
+        
         vid_id = base_name.split('.')[0]
         vid_url = f"https://youtu.be/{vid_id}"
-        vid_title = get_vid_title(os.path.join(os.path.dirname(vtt), f'{vid_id}.info.json'))
+
+        vid_json_path = os.path.join(os.path.dirname(vtt), f'{vid_id}.info.json')
+
+        with open(vid_json_path, "r") as f:
+            vid_json = json.load(f)
+
+        vid_title =  vid_json['title']
+        channel_id = vid_json['channel_id']
+
         add_video(channel_id, vid_id, vid_title, vid_url)
 
         vtt_json = parse_vtt(vtt)
@@ -224,14 +233,6 @@ def vtt_to_db(channel_id, dir_path, s):
     con.close()
 
 
-def get_vid_title(info_json_path):
-    """
-    Retrieves video title from the info json file.
-    """
-    with open(info_json_path) as f:
-        return json.load(f)['title']
-
-
 def validate_channel_url(channel_url):
     """
     valid patterns
@@ -297,7 +298,7 @@ def download_channel(channel_id, channel_name, language, number_of_jobs, s):
 
         download_vtts(number_of_jobs, list_of_videos_urls, language, tmp_dir)
         add_channel_info(channel_id, channel_name, channel_url)
-        vtt_to_db(channel_id, tmp_dir, s)
+        vtt_to_db(tmp_dir)
     return True
 
 
@@ -321,15 +322,13 @@ def download_playlist(playlist_url, s, language=None, number_of_jobs=None):
             add_channel_info(channel_id, channel_name, channel_url)
         
 
-    channel_ids = list(set(video["channel_id"] for video in playlist_data))
     video_ids = list(set(video["video_id"] for video in playlist_data))
 
+
     with tempfile.TemporaryDirectory() as tmp_dir:
         console.print(f"[green][bold]Downloading [red]{len(playlist_data)}[/red] vtt files[/bold][/green]\n")
         download_vtts(number_of_jobs, video_ids, language, tmp_dir)
-
-        for channel_id in channel_ids: 
-            vtt_to_db(channel_id, tmp_dir, s)
+        vtt_to_db(tmp_dir)
             
 
 def get_channel_id_from_input(channel_input):
diff --git a/yt_fts/update.py b/yt_fts/update.py
@@ -36,6 +36,6 @@ def update_channel(channel_id, channel_name, language, number_of_jobs, s):
             print(f"{len(fresh_videos)} videos on \"{channel_name}\" do not have subtitles")
             exit()
 
-        vtt_to_db(channel_id, tmp_dir, s)
+        vtt_to_db(tmp_dir)
 
         print(f"Added {len(vtt_to_parse)} new videos from \"{channel_name}\" to the database")