@@ -55,54 +55,60 @@ def get_all_transcripts(video_id):
5555
5656 if len (PROXY_PASSWORD ) > 0 and len (PROXY_USERNAME ) > 0 :
5757 logger .info (f"Using proxy configuration. You can fall back to default by removing PROXY_USERNAME and PROXY_PASSWORD from config." )
58- ytt_api = YouTubeTranscriptApi (
58+ ytt_api_proxied = YouTubeTranscriptApi (
5959 proxy_config = WebshareProxyConfig (
6060 proxy_username = PROXY_USERNAME ,
6161 proxy_password = PROXY_PASSWORD ,
6262 )
6363 )
64- else :
65- logger .info (f"Using default YouTubeTranscriptApi without proxy." )
66- ytt_api = YouTubeTranscriptApi ()
64+
65+ logger .info (f"Using default YouTubeTranscriptApi without proxy." )
66+ ytt_api = YouTubeTranscriptApi ()
6767
6868 try :
6969 # logger.info(f"Available methods for YouTubeTranscriptApi: {dir(YouTubeTranscriptApi)}")
7070 transcript_list = ytt_api .list (video_id )
71- # logger.info(f"Available methods for TranscriptList: {dir(transcript_list)}")
72- # print("Available transcripts:\n")
73- # print(transcript_list)
74- logger .info (f"Transcripts seem to have been received from YouTube.\n " )
75- if not transcript_list :
76- logger .info (f"Or maybe not...\n " )
71+ if transcript_list :
72+ logger .info (f"Transcript list is not empty. Fetched transcripts list for video ID: { video_id } " )
7773 else :
78- logger .info (f"Transcript list is not empty. Successfully fetched transcripts list for video ID: { video_id } " )
79- return transcript_list
80-
81- # except (TranscriptsDisabled, NoTranscriptFound):
82- # logger.warning(f"No transcripts for video {video_id}")
83- # return None
84- # except TooManyRequests:
85- # logger.error("YouTube rate limit hit. Consider adding a delay or proxy.")
86- # # Optionally: sleep and retry
87- # return None
74+ logger .info (f"Transcript list is empty. No transcripts found without proxy for video ID: { video_id } " )
8875 except Exception as e :
89- logger .error (f"Unexpected error for video { video_id } : { e } " )
90- return None
76+ logger .error (f"Failed to fetch transcript list without proxy for video { video_id } : { e } " )
77+ transcript_list = None
78+
79+ if not transcript_list and ytt_api_proxied :
80+ try :
81+ logger .info (f"Attempting to fetch transcripts with proxy configuration." )
82+ transcript_list = ytt_api_proxied .list (video_id )
83+ # logger.info(f"Available methods for TranscriptList: {dir(transcript_list)}")
84+ # print("Available transcripts:\n")
85+ # print(transcript_list)
86+ if not transcript_list :
87+ logger .info (f"Transcript list is empty. No transcripts found with proxy for video ID: { video_id } " )
88+ else :
89+ logger .info (f"Transcript list is not empty. Fetched transcripts list with proxy for video ID: { video_id } " )
90+
91+ except Exception as e :
92+ logger .error (f"Unexpected error for video { video_id } : { e } " )
93+ return None
94+
95+ return transcript_list
96+
9197
9298# Save transcripts and translate if necessary
9399async def save_transcripts (transcript_list , base_filename , transcript_properties = None ):
94100
95101 if len (PROXY_PASSWORD ) > 0 and len (PROXY_USERNAME ) > 0 :
96102 logger .info (f"Using proxy configuration. You can fall back to default by removing PROXY_USERNAME and PROXY_PASSWORD from config." )
97- ytt_api = YouTubeTranscriptApi (
103+ ytt_api_proxied = YouTubeTranscriptApi (
98104 proxy_config = WebshareProxyConfig (
99105 proxy_username = PROXY_USERNAME ,
100106 proxy_password = PROXY_PASSWORD ,
101107 )
102108 )
103- else :
104- logger .info (f"Using default YouTubeTranscriptApi without proxy." )
105- ytt_api = YouTubeTranscriptApi ()
109+
110+ logger .info (f"Creating default YouTubeTranscriptApi without proxy." )
111+ ytt_api = YouTubeTranscriptApi ()
106112
107113 formatter = TextFormatter ()
108114 os .makedirs ("transcripts" , exist_ok = True ) # Create the transcripts directory if it doesn't exist
@@ -130,21 +136,31 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties
130136 # is_generated = transcript.is_generated
131137
132138 logger .info (f"Fetching transcript for language: { language } ({ normalized_language_code } )" )
139+ logger .info (f"Attempting to fetch without proxy" )
133140
134- # Retry logic for fetching transcript
135- retry_attempts = 3
136- for attempt in range (1 , retry_attempts + 1 ):
137- try :
138- transcript_data = ytt_api .fetch (transcript_properties .get ('video_id' ), languages = [language_code ]) # ytt_api.fetch(transcript.video_id)
139- logger .info (f"Transcript data from fetch attempt: { str (transcript_data )[:500 ]} " )
140- break
141- except Exception as e :
142- logger .warning (f"Fetch failed for { language } (attempt { attempt } ): { e } " )
143- if attempt < retry_attempts :
144- await asyncio .sleep (1.5 )
145- else :
146- logger .error (f"Giving up on { language } after { retry_attempts } attempts." )
147- transcript_data = None
141+ try :
142+ transcript_data = ytt_api .fetch (transcript_properties .get ('video_id' ), languages = [language_code ])
143+ logger .info (f"Transcript data from fetch attempt: { str (transcript_data )[:500 ]} " )
144+ except Exception as e :
145+ logger .warning (f"Fetch without proxy failed for { language } : { e } " )
146+ transcript_data = None
147+
148+ if not transcript_data and ytt_api_proxied :
149+ await asyncio .sleep (1.5 )
150+ # Retry logic for fetching transcript
151+ retry_attempts = 3
152+ for attempt in range (1 , retry_attempts + 1 ):
153+ try :
154+ transcript_data = ytt_api_proxied .fetch (transcript_properties .get ('video_id' ), languages = [language_code ]) # ytt_api.fetch(transcript.video_id)
155+ logger .info (f"Transcript data from fetch attempt: { str (transcript_data )[:500 ]} " )
156+ break
157+ except Exception as e :
158+ logger .warning (f"Fetch failed for { language } (attempt { attempt } ): { e } " )
159+ if attempt < retry_attempts :
160+ await asyncio .sleep (2 )
161+ else :
162+ logger .error (f"Giving up on { language } after { retry_attempts } attempts." )
163+ transcript_data = None
148164
149165 if not transcript_data :
150166 continue
0 commit comments