@@ -186,3 +186,99 @@ def get_summary_by_video_language(video_id, language, model=MODEL_TO_USE):
186186async def get_summary_by_video_language_async (video_id , language , model = MODEL_TO_USE ):
187187 loop = asyncio .get_event_loop ()
188188 return await loop .run_in_executor (None , get_summary_by_video_language , video_id , language , model )
189+
190+ def insert_transcript (transcript_properties ):
191+ """
192+ Inserts a transcript record into the transcripts table.
193+ """
194+ try :
195+ video_id = transcript_properties .get ('video_id' )
196+ video_title = transcript_properties .get ('video_title' )
197+ channel_name = transcript_properties .get ('channel_name' )
198+ channel_id = transcript_properties .get ('channel_id' )
199+ duration = transcript_properties .get ('duration' )
200+ video_url = transcript_properties .get ('video_url' )
201+ user_id = transcript_properties .get ('user_id' )
202+ language_code = transcript_properties .get ('language_code' )
203+ normalized_language_code = transcript_properties .get ('normalized_language_code' )
204+ is_generated = transcript_properties .get ('is_generated' , True )
205+ text = transcript_properties .get ('text' , '' )
206+ filename = transcript_properties .get ('filename' , '' )
207+ base_filename = transcript_properties .get ('base_filename' , '' )
208+ type = transcript_properties .get ('type' , 'transcript' )
209+ summary = transcript_properties .get ('summary' , '' )
210+ word_count = transcript_properties .get ('word_count' , 0 )
211+ tokens_used = transcript_properties .get ('tokens_used' , 0 )
212+ estimated_cost = transcript_properties .get ('estimated_cost' , 0.0 )
213+ model = transcript_properties .get ('model' , MODEL_TO_USE )
214+
215+ with db_cursor () as cursor :
216+ query = """
217+ INSERT INTO transcripts (
218+ video_id, video_title, channel_name, channel_id, duration, video_url,
219+ user_id, language_code, normalized_language_code,
220+ is_generated, text, filename, base_filename, type, summary, word_count,
221+ tokens_used, estimated_cost, model
222+ ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
223+ """
224+ cursor .execute (query , (
225+ video_id , video_title , channel_name , channel_id , duration , video_url ,
226+ user_id , language_code , normalized_language_code ,
227+ is_generated , text , filename , base_filename , type , summary , word_count ,
228+ tokens_used , estimated_cost , model
229+ ))
230+ logger .info (f"Inserted transcript for video_id={ video_id } , user_id={ user_id } , language_code={ language_code } " )
231+ except Exception as e :
232+ logger .error (f"Failed to insert transcript: { e } " )
233+ raise
234+
235+ async def insert_transcript_async (transcript_properties ):
236+ loop = asyncio .get_event_loop ()
237+ await loop .run_in_executor (
238+ None ,
239+ insert_transcript ,transcript_properties
240+ )
241+
242+ def get_existing_transcripts (video_id , normalized_language_code = None ):
243+ """
244+ Retrieves all transcripts from the transcripts table for a given video_id and optional language_code.
245+ Returns a list of transcript dicts (can be empty if none found).
246+ """
247+ try :
248+ with db_cursor () as cursor :
249+ if normalized_language_code :
250+ query = """
251+ SELECT video_id, video_title, channel_name, channel_id, duration, video_url,
252+ user_id, language_code, normalized_language_code,
253+ is_generated, text, filename, base_filename, type, summary, word_count,
254+ tokens_used, estimated_cost, model
255+ FROM transcripts
256+ WHERE video_id = %s AND (normalized_language_code = %s OR normalized_language_code = 'en' OR normalized_language_code = 'ru')
257+ ORDER BY id DESC
258+ """
259+ cursor .execute (query , (video_id , normalized_language_code ))
260+ else :
261+ query = """
262+ SELECT video_id, video_title, channel_name, channel_id, duration, video_url,
263+ user_id, language_code, normalized_language_code,
264+ is_generated, text, filename, base_filename, type, summary, word_count,
265+ tokens_used, estimated_cost, model
266+ FROM transcripts
267+ WHERE video_id = %s
268+ ORDER BY id DESC
269+ """
270+ cursor .execute (query , (video_id ,))
271+
272+ rows = cursor .fetchall ()
273+ keys = ["video_id" , "video_title" , "channel_name" , "channel_id" , "duration" , "video_url" ,
274+ "user_id" , "language_code" , "normalized_language_code" ,
275+ "is_generated" , "text" , "filename" , "base_filename" , "type" , "summary" , "word_count" ,
276+ "tokens_used" , "estimated_cost" , "model" ]
277+ return [dict (zip (keys , row )) for row in rows ]
278+ except Exception as e :
279+ logger .error (f"Failed to fetch transcripts for video_id={ video_id } , language_code={ normalized_language_code } : { e } " )
280+ return []
281+
282+ async def get_existing_transcripts_async (video_id , normalized_language_code = None ):
283+ loop = asyncio .get_event_loop ()
284+ return await loop .run_in_executor (None , get_existing_transcripts , video_id , normalized_language_code )
0 commit comments