|
7 | 7 | import os |
8 | 8 | import logging |
9 | 9 | from translate import translate_text |
10 | | -from config import MODEL_TO_USE |
| 10 | +from config import MODEL_TO_USE, SUPADATA_API_KEY |
| 11 | +from supadata import Supadata, SupadataError |
11 | 12 | from config import PROXY_USERNAME |
12 | 13 | from config import PROXY_PASSWORD |
13 | 14 | from model_params import get_model_params |
@@ -153,11 +154,11 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties |
153 | 154 | # continue |
154 | 155 |
|
155 | 156 | logger.info(f"Fetching transcript for language: {language} ({normalized_language_code})") |
156 | | - logger.info(f"[Get Single Transcript] Attempting to fetch with(out) proxy") |
| 157 | + logger.info(f"[Get Single Transcript] Attempting to fetch without proxy") |
157 | 158 |
|
158 | 159 | try: |
159 | 160 | # transcript_data = transcript.fetch() |
160 | | - transcript_data = ytt_api_proxied.fetch(transcript_properties.get('video_id'), languages=[language_code]) |
| 161 | + transcript_data = ytt_api.fetch(transcript_properties.get('video_id'), languages=[language_code]) |
161 | 162 | logger.info(f"Transcript data from fetch attempt: {str(transcript_data)[:500]}") |
162 | 163 | except Exception as e: |
163 | 164 | logger.warning(f"Fetch without proxy failed for {language}: {e}") |
@@ -185,7 +186,23 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties |
185 | 186 | transcript_data = None |
186 | 187 |
|
187 | 188 | if not transcript_data: |
188 | | - continue |
| 189 | + logger.error(f"Failed to fetch transcript for {language} with proxy after all attempts.") |
| 190 | + logger.info(f"[Supadata] Trying to fetch the transcript with Supadata for {language}.") |
| 191 | + |
| 192 | + try: |
| 193 | + # Initialize the client |
| 194 | + supadata = Supadata(api_key=SUPADATA_API_KEY) |
| 195 | + # Get YouTube transcript with Spanish language preference |
| 196 | + transcript_data = supadata.youtube.transcript(video_id=transcript_properties.get('video_id'), lang=language_code) |
| 197 | + if transcript_data: |
| 198 | + logger.info(f"[Supadata] Got transcript for {language_code} from Supadata: {str(transcript_data)[:500]}") |
| 199 | + transcript_data = transcript_data.content |
| 200 | + else: |
| 201 | + logger.error(f"[Supadata] No transcript data found for {language_code}.") |
| 202 | + continue |
| 203 | + except SupadataError as e: |
| 204 | + logger.error(f"[Supadata] Failed to fetch transcript from Supadata for {language_code}: {e}") |
| 205 | + continue |
189 | 206 |
|
190 | 207 | try: |
191 | 208 | formatted_transcript = formatter.format_transcript(transcript_data) |
|
0 commit comments