@@ -333,7 +333,19 @@ async def handle_youtube_link(update: Update, context: CallbackContext):
333333
334334 if transcripts :
335335 context .user_data ['video_id' ] = video_id # Store video_id_id for later use
336- context .user_data ['transcripts' ] = transcripts # Store transcript_request_id for later use
336+
337+ transcripts_in_context = []
338+ transcripts_in_context = context .user_data .get ('transcripts' , [])
339+ logger .info (f"Current transcripts in user context: { len (transcripts_in_context )} " )
340+
341+ if transcripts_in_context and len (transcripts_in_context ) > 0 and len (transcripts_in_context ) < 10 :
342+ logger .info (f"Appending new transcripts to user context. Current count: { len (transcripts_in_context )} " )
343+ transcripts_in_context .extend (transcripts )
344+ logger .info (f"New transcripts count in user context: { len (transcripts_in_context )} " ) # Extend the existing list with new transcripts
345+ context .user_data ['transcripts' ] = transcripts_in_context
346+ else :
347+ logger .info (f"Clearing transcripts in user context and storing from scratch. New count: { len (transcripts )} " )
348+ context .user_data ['transcripts' ] = transcripts # Store transcript_request_id for later use
337349
338350 try :
339351 for transcript in transcripts :
@@ -488,8 +500,30 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
488500 video_id , language , transcript_request_id = query .data .split ('&' )[1 :4 ]
489501 logger .info (f"The following language selected for summary: { language } " )
490502
491- base_filename = context .user_data .get ('base_filename' )
492- transcripts = context .user_data .get ('transcripts' )
503+ # base_filename = context.user_data.get('base_filename')
504+ base_filename = 'unknown'
505+ transcripts = context .user_data .get ('transcripts' , [])
506+
507+
508+ logger .info (f"Summary requested for video id: { video_id } ." )
509+ logger .info (f"Total number of transcripts found in user context: { len (transcripts )} " )
510+
511+ if transcripts and len (transcripts ) > 0 :
512+ logger .info (f"The first available trascript video id: { transcripts [0 ].get ('video_id' , 'unknown' )} and filename { transcripts [0 ].get ('filename' , 'unknown' )} " )
513+
514+ transcript = None
515+
516+ for t in transcripts :
517+ if t .get ('video_id' ) == video_id and t .get ('type' ) == 'transcript' :
518+ logger .info (f"Found matching transcript for video { video_id } in user context." )
519+ transcript = t
520+ base_filename = t .get ('base_filename' )
521+ break
522+
523+ if not transcript :
524+ logger .info (f"No matching transcript found for video { video_id } in user context." )
525+ transcripts = None
526+
493527
494528 model = MODEL_TO_USE # Use the model specified in the config
495529
@@ -520,17 +554,17 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
520554 }
521555 )
522556
523- if not base_filename :
524- logger .error ("base_filename not found in user_data." )
525- await query .edit_message_text ("⚠️ Failed to generate summary. Missing file information." )
526- return
557+ # if not base_filename:
558+ # logger.error("base_filename not found in user_data.")
559+ # await query.edit_message_text("⚠️ Failed to generate summary. Missing file information.")
560+ # return
527561
528562 await query .edit_message_text ("🧠 Working on summary..." )
529563
530564 summary = await get_summary_by_video_language_async (video_id = video_id , language = language , model = model )
531565
532566 if summary :
533- logger .info (f"Summary already exists for video { video_id } in language { language } and model { model } ." )
567+ logger .info (f"Summary already exists in DB for video { video_id } in language { language } and model { model } ." )
534568
535569 track_event (
536570 user_id = user .id ,
@@ -567,15 +601,14 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
567601 return
568602
569603 elif transcripts :
570- logger .info (f"No existing summary found for video { video_id } in language { language } and model { model } . Proceeding to generate a new summary." )
571- logger .info (f"Proceeding with transcript from user context: { transcripts [0 ].get ('filename' , 'unknown' )} " )
572-
573- transcript = transcripts [0 ]
604+ logger .info (f"No existing summary found in DB for video { video_id } in language { language } and model { model } . Proceeding to generate a new summary." )
605+
574606
575607 if transcript :
608+ logger .info (f"Proceeding with summary for transcript from user context with video id: { transcript .get ('video_id' )} ." )
576609 original_language = transcript .get ('normalized_language_code' , 'en' )
577610 text = transcript .get ('text' )
578- logger .info (f"Transcript found in user contextfor original language: { original_language } " )
611+ logger .info (f"Transcript found in user context for original language: { original_language } " )
579612 # Handle summarization request
580613 logger .info (f"Starting summarization request from original '{ original_language } ' to target '{ language } '" )
581614 logger .info (f"Text length to summarize is { len (text .split ())} words." )
@@ -609,8 +642,11 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
609642 except Exception as e :
610643 logger .error (f"Failed to produce summary from data in user context: { e } " )
611644
645+ else :
646+ logger .error (f"No transcript found in user context for video { video_id } . Failed to generate summary." )
647+
612648 else :
613- logger .info (f"No transcripts found in user context. Proceeding to read from file." )
649+ logger .info (f"No transcripts found in user context for { video_id } . Proceeding to read from file on disk ." )
614650
615651 try :
616652 # Ensure transcript_request_id is an integer
@@ -620,8 +656,18 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
620656 transcript_folder = "transcripts"
621657
622658 try :
659+
660+ logger .info ("Fetching video details to get the base_filename and determine original language." )
623661 # Get video details
624662 video_details = get_video_details (video_id )
663+ # Include channel name in the file name (truncated to 60 characters)
664+ channel_name = video_details ['snippet' ]['channelTitle' ][:60 ]
665+ # Include video title in the file name (truncated to 140 characters)
666+ video_title = video_details ['snippet' ]['title' ][:140 ]
667+ # Sanitize the base filename
668+ base_filename = sanitize_filename (f"{ channel_name } _{ video_title } " )
669+
670+
625671 original_language = normalize_language_code (video_details ['snippet' ]["defaultAudioLanguage" ])
626672 if original_language :
627673 summary_properties ['language' ] = original_language
@@ -640,16 +686,16 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
640686 transcript_filename = f"{ transcript_folder } /{ base_filename } _transcript_{ original_language } .txt"
641687
642688 # If no transcript found in user context, read from file
643- logger .info (f"Looking for transcript file: { transcript_filename } " )
689+ logger .info (f"Looking for transcript file on disk : { transcript_filename } " )
644690 if not os .path .exists (transcript_filename ):
645- logger .error (f"Transcript file not found: { transcript_filename } " )
646- raise FileNotFoundError (f"Transcript file not found: { transcript_filename } " )
691+ logger .error (f"Transcript file not found on disk : { transcript_filename } " )
692+ raise FileNotFoundError (f"Transcript file not found on disk : { transcript_filename } " )
647693
648694 with open (transcript_filename , 'r' , encoding = 'utf-8' ) as f :
649695 transcript = f .read ()
650696
651697 # Handle summarization request
652- logger .info (f"Starting summarization request from original '{ original_language } ' to target '{ language } '" )
698+ logger .info (f"Starting summarization request with file on disk from original '{ original_language } ' to target '{ language } '" )
653699 try :
654700 summary , tokens_used , estimated_cost , word_count , model = await handle_summarization_request (
655701 text = transcript ,
@@ -680,7 +726,7 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
680726
681727 except ValueError as e :
682728 logger .error (f"Invalid transcript_request_id: { transcript_request_id } . Error: { e } " )
683- await query .edit_message_text ("⚠️ Failed to generate summary. Invalid request ID ." )
729+ await query .edit_message_text ("⚠️ Failed to generate summary. Please try again ." )
684730 except FileNotFoundError as e :
685731 logger .error (f"Transcript file not found: { e } " )
686732 await query .edit_message_text ("⚠️ Failed to generate summary. Transcript file not found." )
@@ -774,7 +820,7 @@ async def handle_summarization_button(update: Update, context: CallbackContext):
774820 }
775821 )
776822
777- await query .edit_message_text ("⚠️ Failed to generate summary. Please try again later." )
823+ msg = await query .edit_message_text ("⚠️ Failed to generate summary. Please try again later." )
778824 return
779825
780826
0 commit comments