@@ -182,7 +182,7 @@ def get_vtt(tmp_dir, video_url, language):
182
182
])
183
183
184
184
185
- def vtt_to_db (channel_id , dir_path , s ):
185
+ def vtt_to_db (dir_path ):
186
186
"""
187
187
Iterates through all vtt files in the temp_dir, passes them to
188
188
the vtt parsing function, then inserts the data into the database.
@@ -205,9 +205,18 @@ def vtt_to_db(channel_id, dir_path, s):
205
205
206
206
for vtt in track (file_paths , description = "Adding subtitles to database..." ):
207
207
base_name = os .path .basename (vtt )
208
+
208
209
vid_id = base_name .split ('.' )[0 ]
209
210
vid_url = f"https://youtu.be/{ vid_id } "
210
- vid_title = get_vid_title (os .path .join (os .path .dirname (vtt ), f'{ vid_id } .info.json' ))
211
+
212
+ vid_json_path = os .path .join (os .path .dirname (vtt ), f'{ vid_id } .info.json' )
213
+
214
+ with open (vid_json_path , "r" ) as f :
215
+ vid_json = json .load (f )
216
+
217
+ vid_title = vid_json ['title' ]
218
+ channel_id = vid_json ['channel_id' ]
219
+
211
220
add_video (channel_id , vid_id , vid_title , vid_url )
212
221
213
222
vtt_json = parse_vtt (vtt )
@@ -224,14 +233,6 @@ def vtt_to_db(channel_id, dir_path, s):
224
233
con .close ()
225
234
226
235
227
- def get_vid_title (info_json_path ):
228
- """
229
- Retrieves video title from the info json file.
230
- """
231
- with open (info_json_path ) as f :
232
- return json .load (f )['title' ]
233
-
234
-
235
236
def validate_channel_url (channel_url ):
236
237
"""
237
238
valid patterns
@@ -297,7 +298,7 @@ def download_channel(channel_id, channel_name, language, number_of_jobs, s):
297
298
298
299
download_vtts (number_of_jobs , list_of_videos_urls , language , tmp_dir )
299
300
add_channel_info (channel_id , channel_name , channel_url )
300
- vtt_to_db (channel_id , tmp_dir , s )
301
+ vtt_to_db (tmp_dir )
301
302
return True
302
303
303
304
@@ -321,15 +322,13 @@ def download_playlist(playlist_url, s, language=None, number_of_jobs=None):
321
322
add_channel_info (channel_id , channel_name , channel_url )
322
323
323
324
324
- channel_ids = list (set (video ["channel_id" ] for video in playlist_data ))
325
325
video_ids = list (set (video ["video_id" ] for video in playlist_data ))
326
326
327
+
327
328
with tempfile .TemporaryDirectory () as tmp_dir :
328
329
console .print (f"[green][bold]Downloading [red]{ len (playlist_data )} [/red] vtt files[/bold][/green]\n " )
329
330
download_vtts (number_of_jobs , video_ids , language , tmp_dir )
330
-
331
- for channel_id in channel_ids :
332
- vtt_to_db (channel_id , tmp_dir , s )
331
+ vtt_to_db (tmp_dir )
333
332
334
333
335
334
def get_channel_id_from_input (channel_input ):
0 commit comments