Skip to content

Commit 4f02feb

Browse files
committed
Update module and add Supadata fallback option
1 parent 049f913 commit 4f02feb

5 files changed

Lines changed: 29 additions & 7 deletions

File tree

youtubevideotranscriptbot/Dockerfile_template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This is Dockerfile
22
# Use an official Python runtime as a parent image
3-
FROM python:3.9-slim
3+
FROM python:3.10-slim
44

55
# Set the working directory in the container
66
WORKDIR /app
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
python-telegram-bot==20.3
22
google-api-python-client==2.104.0
3-
youtube-transcript-api==1.0.3 # 1.0.3 # 0.6.2 #1.0.3 # 0.6.1
3+
youtube-transcript-api==1.1.0 # 1.0.3 # 0.6.2 #1.0.3 # 0.6.1
44
deep-translator==1.11.4
55
mysql-connector-python==8.1.0
66
isodate==0.6.1
@@ -9,3 +9,5 @@ tiktoken==0.9.0
99
deepseek==1.0.0
1010
aiofiles>=0.4.0
1111
amplitude-analytics==1.1.5
12+
supadata==1.2.1
13+

youtubevideotranscriptbot/run_bot.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ fi
1414
# Build the Docker image
1515
echo "Building the Docker image..."
1616
docker build -t ${IMAGE_TAG} .
17+
# Use the line below if you want to build without cache
18+
# docker build --no-cache -t ${IMAGE_TAG} .
1719

1820
# Run the container with the volume and auto-restart
1921
echo "Starting the container..."

youtubevideotranscriptbot/transcript.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import os
88
import logging
99
from translate import translate_text
10-
from config import MODEL_TO_USE
10+
from config import MODEL_TO_USE, SUPADATA_API_KEY
11+
from supadata import Supadata, SupadataError
1112
from config import PROXY_USERNAME
1213
from config import PROXY_PASSWORD
1314
from model_params import get_model_params
@@ -153,11 +154,11 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties
153154
# continue
154155

155156
logger.info(f"Fetching transcript for language: {language} ({normalized_language_code})")
156-
logger.info(f"[Get Single Transcript] Attempting to fetch with(out) proxy")
157+
logger.info(f"[Get Single Transcript] Attempting to fetch without proxy")
157158

158159
try:
159160
# transcript_data = transcript.fetch()
160-
transcript_data = ytt_api_proxied.fetch(transcript_properties.get('video_id'), languages=[language_code])
161+
transcript_data = ytt_api.fetch(transcript_properties.get('video_id'), languages=[language_code])
161162
logger.info(f"Transcript data from fetch attempt: {str(transcript_data)[:500]}")
162163
except Exception as e:
163164
logger.warning(f"Fetch without proxy failed for {language}: {e}")
@@ -185,7 +186,23 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties
185186
transcript_data = None
186187

187188
if not transcript_data:
188-
continue
189+
logger.error(f"Failed to fetch transcript for {language} with proxy after all attempts.")
190+
logger.info(f"[Supadata] Trying to fetch the transcript with Supadata for {language}.")
191+
192+
try:
193+
# Initialize the client
194+
supadata = Supadata(api_key=SUPADATA_API_KEY)
195+
# Get YouTube transcript with Spanish language preference
196+
transcript_data = supadata.youtube.transcript(video_id=transcript_properties.get('video_id'), lang=language_code)
197+
if transcript_data:
198+
logger.info(f"[Supadata] Got transcript for {language_code} from Supadata: {str(transcript_data)[:500]}")
199+
transcript_data = transcript_data.content
200+
else:
201+
logger.error(f"[Supadata] No transcript data found for {language_code}.")
202+
continue
203+
except SupadataError as e:
204+
logger.error(f"[Supadata] Failed to fetch transcript from Supadata for {language_code}: {e}")
205+
continue
189206

190207
try:
191208
formatted_transcript = formatter.format_transcript(transcript_data)

youtubevideotranscriptbot/youtube_api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@ def get_channel_subscribers(channel_id):
4343
response = request.execute()
4444
if response['items']:
4545
return response['items'][0]['statistics']['subscriberCount']
46-
return 'N/A'
46+
return 'N/A'
47+

0 commit comments

Comments
 (0)