Skip to content

Commit 2062d03

Browse files
committed
Add option to fetch without proxy first
1 parent ce37cb5 commit 2062d03

1 file changed

Lines changed: 56 additions & 40 deletions

File tree

youtubevideotranscriptbot/transcript.py

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -55,54 +55,60 @@ def get_all_transcripts(video_id):
5555

5656
if len(PROXY_PASSWORD) > 0 and len(PROXY_USERNAME) > 0:
5757
logger.info(f"Using proxy configuration. You can fall back to default by removing PROXY_USERNAME and PROXY_PASSWORD from config.")
58-
ytt_api = YouTubeTranscriptApi(
58+
ytt_api_proxied = YouTubeTranscriptApi(
5959
proxy_config=WebshareProxyConfig(
6060
proxy_username=PROXY_USERNAME,
6161
proxy_password=PROXY_PASSWORD,
6262
)
6363
)
64-
else:
65-
logger.info(f"Using default YouTubeTranscriptApi without proxy.")
66-
ytt_api = YouTubeTranscriptApi()
64+
65+
logger.info(f"Using default YouTubeTranscriptApi without proxy.")
66+
ytt_api = YouTubeTranscriptApi()
6767

6868
try:
6969
# logger.info(f"Available methods for YouTubeTranscriptApi: {dir(YouTubeTranscriptApi)}")
7070
transcript_list = ytt_api.list(video_id)
71-
# logger.info(f"Available methods for TranscriptList: {dir(transcript_list)}")
72-
# print("Available transcripts:\n")
73-
# print(transcript_list)
74-
logger.info(f"Transcripts seem to have been received from YouTube.\n")
75-
if not transcript_list:
76-
logger.info(f"Or maybe not...\n")
71+
if transcript_list:
72+
logger.info(f"Transcript list is not empty. Fetched transcripts list for video ID: {video_id}")
7773
else:
78-
logger.info(f"Transcript list is not empty. Successfully fetched transcripts list for video ID: {video_id}")
79-
return transcript_list
80-
81-
# except (TranscriptsDisabled, NoTranscriptFound):
82-
# logger.warning(f"No transcripts for video {video_id}")
83-
# return None
84-
# except TooManyRequests:
85-
# logger.error("YouTube rate limit hit. Consider adding a delay or proxy.")
86-
# # Optionally: sleep and retry
87-
# return None
74+
logger.info(f"Transcript list is empty. No transcripts found without proxy for video ID: {video_id}")
8875
except Exception as e:
89-
logger.error(f"Unexpected error for video {video_id}: {e}")
90-
return None
76+
logger.error(f"Failed to fetch transcript list without proxy for video {video_id}: {e}")
77+
transcript_list = None
78+
79+
if not transcript_list and ytt_api_proxied:
80+
try:
81+
logger.info(f"Attempting to fetch transcripts with proxy configuration.")
82+
transcript_list = ytt_api_proxied.list(video_id)
83+
# logger.info(f"Available methods for TranscriptList: {dir(transcript_list)}")
84+
# print("Available transcripts:\n")
85+
# print(transcript_list)
86+
if not transcript_list:
87+
logger.info(f"Transcript list is empty. No transcripts found with proxy for video ID: {video_id}")
88+
else:
89+
logger.info(f"Transcript list is not empty. Fetched transcripts list with proxy for video ID: {video_id}")
90+
91+
except Exception as e:
92+
logger.error(f"Unexpected error for video {video_id}: {e}")
93+
return None
94+
95+
return transcript_list
96+
9197

9298
# Save transcripts and translate if necessary
9399
async def save_transcripts(transcript_list, base_filename, transcript_properties=None):
94100

95101
if len(PROXY_PASSWORD) > 0 and len(PROXY_USERNAME) > 0:
96102
logger.info(f"Using proxy configuration. You can fall back to default by removing PROXY_USERNAME and PROXY_PASSWORD from config.")
97-
ytt_api = YouTubeTranscriptApi(
103+
ytt_api_proxied = YouTubeTranscriptApi(
98104
proxy_config=WebshareProxyConfig(
99105
proxy_username=PROXY_USERNAME,
100106
proxy_password=PROXY_PASSWORD,
101107
)
102108
)
103-
else:
104-
logger.info(f"Using default YouTubeTranscriptApi without proxy.")
105-
ytt_api = YouTubeTranscriptApi()
109+
110+
logger.info(f"Creating default YouTubeTranscriptApi without proxy.")
111+
ytt_api = YouTubeTranscriptApi()
106112

107113
formatter = TextFormatter()
108114
os.makedirs("transcripts", exist_ok=True) # Create the transcripts directory if it doesn't exist
@@ -130,21 +136,31 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties
130136
# is_generated = transcript.is_generated
131137

132138
logger.info(f"Fetching transcript for language: {language} ({normalized_language_code})")
139+
logger.info(f"Attempting to fetch without proxy")
133140

134-
# Retry logic for fetching transcript
135-
retry_attempts = 3
136-
for attempt in range(1, retry_attempts + 1):
137-
try:
138-
transcript_data = ytt_api.fetch(transcript_properties.get('video_id'), languages=[language_code]) # ytt_api.fetch(transcript.video_id)
139-
logger.info(f"Transcript data from fetch attempt: {str(transcript_data)[:500]}")
140-
break
141-
except Exception as e:
142-
logger.warning(f"Fetch failed for {language} (attempt {attempt}): {e}")
143-
if attempt < retry_attempts:
144-
await asyncio.sleep(1.5)
145-
else:
146-
logger.error(f"Giving up on {language} after {retry_attempts} attempts.")
147-
transcript_data = None
141+
try:
142+
transcript_data = ytt_api.fetch(transcript_properties.get('video_id'), languages=[language_code])
143+
logger.info(f"Transcript data from fetch attempt: {str(transcript_data)[:500]}")
144+
except Exception as e:
145+
logger.warning(f"Fetch without proxy failed for {language}: {e}")
146+
transcript_data = None
147+
148+
if not transcript_data and ytt_api_proxied:
149+
await asyncio.sleep(1.5)
150+
# Retry logic for fetching transcript
151+
retry_attempts = 3
152+
for attempt in range(1, retry_attempts + 1):
153+
try:
154+
transcript_data = ytt_api_proxied.fetch(transcript_properties.get('video_id'), languages=[language_code]) # ytt_api.fetch(transcript.video_id)
155+
logger.info(f"Transcript data from fetch attempt: {str(transcript_data)[:500]}")
156+
break
157+
except Exception as e:
158+
logger.warning(f"Fetch failed for {language} (attempt {attempt}): {e}")
159+
if attempt < retry_attempts:
160+
await asyncio.sleep(2)
161+
else:
162+
logger.error(f"Giving up on {language} after {retry_attempts} attempts.")
163+
transcript_data = None
148164

149165
if not transcript_data:
150166
continue

0 commit comments

Comments
 (0)