Skip to content

Commit 58b538b

Browse files
Use info.json get metadata of downloaded files
1 parent c7a8f62 commit 58b538b

File tree

3 files changed

+28
-24
lines changed

3 files changed

+28
-24
lines changed

yt_fts/db_utils.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,19 @@ def add_channel_info(channel_id, channel_name, channel_url):
8080

8181
def add_video(channel_id, video_id, video_title, video_url):
8282

83-
db = Database(get_db_path())
84-
85-
db["Videos"].insert({
86-
"video_id": video_id,
87-
"video_title": video_title,
88-
"video_url": video_url,
89-
"channel_id": channel_id
90-
})
83+
conn = sqlite3.connect(get_db_path())
84+
cur = conn.cursor()
85+
existing_video = cur.execute("SELECT * FROM Videos WHERE video_id = ?",
86+
(video_id,)).fetchone()
87+
88+
if existing_video is None:
89+
cur.execute("INSERT INTO Videos (video_id, video_title, video_url, channel_id) VALUES (?, ?, ?, ?)",
90+
(video_id, video_title, video_url, channel_id))
91+
conn.commit()
92+
93+
else:
94+
print(f"{video_id} Video already exists in the database.")
95+
conn.close()
9196

9297

9398
def add_subtitle(video_id, start_time, text):

yt_fts/download.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def get_vtt(tmp_dir, video_url, language):
182182
])
183183

184184

185-
def vtt_to_db(channel_id, dir_path, s):
185+
def vtt_to_db(dir_path):
186186
"""
187187
Iterates through all vtt files in the temp_dir, passes them to
188188
the vtt parsing function, then inserts the data into the database.
@@ -205,9 +205,18 @@ def vtt_to_db(channel_id, dir_path, s):
205205

206206
for vtt in track(file_paths, description="Adding subtitles to database..."):
207207
base_name = os.path.basename(vtt)
208+
208209
vid_id = base_name.split('.')[0]
209210
vid_url = f"https://youtu.be/{vid_id}"
210-
vid_title = get_vid_title(os.path.join(os.path.dirname(vtt), f'{vid_id}.info.json'))
211+
212+
vid_json_path = os.path.join(os.path.dirname(vtt), f'{vid_id}.info.json')
213+
214+
with open(vid_json_path, "r") as f:
215+
vid_json = json.load(f)
216+
217+
vid_title = vid_json['title']
218+
channel_id = vid_json['channel_id']
219+
211220
add_video(channel_id, vid_id, vid_title, vid_url)
212221

213222
vtt_json = parse_vtt(vtt)
@@ -224,14 +233,6 @@ def vtt_to_db(channel_id, dir_path, s):
224233
con.close()
225234

226235

227-
def get_vid_title(info_json_path):
228-
"""
229-
Retrieves video title from the info json file.
230-
"""
231-
with open(info_json_path) as f:
232-
return json.load(f)['title']
233-
234-
235236
def validate_channel_url(channel_url):
236237
"""
237238
valid patterns
@@ -297,7 +298,7 @@ def download_channel(channel_id, channel_name, language, number_of_jobs, s):
297298

298299
download_vtts(number_of_jobs, list_of_videos_urls, language, tmp_dir)
299300
add_channel_info(channel_id, channel_name, channel_url)
300-
vtt_to_db(channel_id, tmp_dir, s)
301+
vtt_to_db(tmp_dir)
301302
return True
302303

303304

@@ -321,15 +322,13 @@ def download_playlist(playlist_url, s, language=None, number_of_jobs=None):
321322
add_channel_info(channel_id, channel_name, channel_url)
322323

323324

324-
channel_ids = list(set(video["channel_id"] for video in playlist_data))
325325
video_ids = list(set(video["video_id"] for video in playlist_data))
326326

327+
327328
with tempfile.TemporaryDirectory() as tmp_dir:
328329
console.print(f"[green][bold]Downloading [red]{len(playlist_data)}[/red] vtt files[/bold][/green]\n")
329330
download_vtts(number_of_jobs, video_ids, language, tmp_dir)
330-
331-
for channel_id in channel_ids:
332-
vtt_to_db(channel_id, tmp_dir, s)
331+
vtt_to_db(tmp_dir)
333332

334333

335334
def get_channel_id_from_input(channel_input):

yt_fts/update.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ def update_channel(channel_id, channel_name, language, number_of_jobs, s):
3636
print(f"{len(fresh_videos)} videos on \"{channel_name}\" do not have subtitles")
3737
exit()
3838

39-
vtt_to_db(channel_id, tmp_dir, s)
39+
vtt_to_db(tmp_dir)
4040

4141
print(f"Added {len(vtt_to_parse)} new videos from \"{channel_name}\" to the database")

0 commit comments

Comments
 (0)