Skip to content

Commit 049f913

Browse files
committed
Minor corrections
1 parent 69967d0 commit 049f913

4 files changed

Lines changed: 73 additions & 11 deletions

File tree

youtubevideotranscriptbot/model_params.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
import logging
44
from config import OPENAI_API_KEY
55
from config import DEEPSEEK_API_KEY
6+
from config import DEEPSEEK_R1_API_KEY
67
from config import XAI_API_KEY
78
from config import ANTHROPIC_API_KEY
8-
from config import MODEL_TO_USE # Import the model selection
9+
from config import MODEL_TO_USE
10+
from config import LLAMA_API_KEY
911

1012

1113
# Set up OpenAI
@@ -47,6 +49,18 @@ def get_model_params(model=MODEL_TO_USE):
4749
"cost_per_100k_tokens_input": 0.027,
4850
"cost_per_100k_tokens_output": 0.11
4951
}
52+
elif "r1" in model.lower():
53+
logger.info(f"Using DeepSeek {model}.")
54+
# This is the R1 model, which is free to use through OpenRouter
55+
return {
56+
"tokens_per_chunk": 150000,
57+
"max_chunks_allowed": 5,
58+
"max_tokens": 1024,
59+
"model": model or "deepseek-chat",
60+
"client": openai.OpenAI(api_key=DEEPSEEK_R1_API_KEY, base_url="https://openrouter.ai/api/v1"),
61+
"cost_per_100k_tokens_input": 0.0,
62+
"cost_per_100k_tokens_output": 0.0
63+
}
5064
elif "grok" in model.lower():
5165
logger.info(f"Using xAI Grok {model}.")
5266
return {
@@ -69,6 +83,30 @@ def get_model_params(model=MODEL_TO_USE):
6983
"cost_per_100k_tokens_input": 0.25,
7084
"cost_per_100k_tokens_output": 1.5
7185
}
86+
elif "llama" in model.lower():
87+
logger.info(f"Using Meta Llama {model}.")
88+
# This is a free model, through OpenRouter
89+
return {
90+
"tokens_per_chunk": 110000,
91+
"max_chunks_allowed": 5,
92+
"max_tokens": 1024,
93+
"model": model or "meta-llama/llama-3.3-8b-instruct:free",
94+
"client": openai.OpenAI(api_key=LLAMA_API_KEY, base_url="https://openrouter.ai/api/v1"),
95+
"cost_per_100k_tokens_input": 0.0,
96+
"cost_per_100k_tokens_output": 0.0
97+
}
98+
elif "gemini" in model.lower():
99+
logger.info(f"Using Meta Llama {model}.")
100+
# This is a free model, through OpenRouter
101+
return {
102+
"tokens_per_chunk": 900000,
103+
"max_chunks_allowed": 5,
104+
"max_tokens": 1024,
105+
"model": model or "google/gemini-2.0-flash-exp:free",
106+
"client": openai.OpenAI(api_key=DEEPSEEK_R1_API_KEY, base_url="https://openrouter.ai/api/v1"),
107+
"cost_per_100k_tokens_input": 0.0,
108+
"cost_per_100k_tokens_output": 0.0
109+
}
72110
else:
73111
logger.error("Invalid model selection in config. Please select 'gpt' for OpenAI or 'deepseek' for DeepSeek.")
74112
logger.warning("Falling back to DeepSeek model as default.")

youtubevideotranscriptbot/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
python-telegram-bot==20.3
22
google-api-python-client==2.104.0
3-
youtube-transcript-api==1.0.3 # 0.6.2 #1.0.3 # 0.6.1
3+
youtube-transcript-api==1.0.3 # 1.0.3 # 0.6.2 #1.0.3 # 0.6.1
44
deep-translator==1.11.4
55
mysql-connector-python==8.1.0
66
isodate==0.6.1

youtubevideotranscriptbot/telegram_bot.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from config import TELEGRAM_TOKEN, OPENAI_API_KEY, MODEL_TO_USE, ENVIRONMENT, AMPLITUDE_API_KEY
99
from database import store_user, store_video, store_transcript_request, get_db_connection, store_summarization_request, store_user_async, store_video_async, store_transcript_request_async, store_summarization_request_async, get_summary_by_video_language_async, get_existing_transcripts_async, insert_transcript_async
1010
from youtube_api import extract_video_id, get_video_details, get_channel_subscribers
11-
from transcript import get_all_transcripts, save_transcripts, normalize_language_code
11+
from transcript import get_all_transcripts, save_transcripts, normalize_language_code, test_proxy
1212
from summarize import handle_summarization_request
1313
from duration import format_duration
1414
from analytics import track_event
@@ -37,10 +37,22 @@ async def error_handler(update, context):
3737
logger.error(f"Exception while handling an update: {context.error}")
3838
if update and update.effective_user:
3939
user_id = update.effective_user.id
40-
logger.error(f"User {user_id} may have blocked the bot.")
40+
logger.error(f"User {user_id} may have not received the message. User might have blocked the bot.")
4141
else:
4242
logger.error("Exception occurred, but no user info available.")
4343

44+
45+
async def proxy_command(update: Update, context: CallbackContext):
46+
logger.info(f"User {update.message.from_user.id} issued the /proxy command.")
47+
proxy = test_proxy()
48+
49+
if not proxy:
50+
logger.error("Proxy test failed. No proxy available.")
51+
proxy = "❌ Undefined. Proxy test might have failed."
52+
53+
await update.message.reply_text(f"The proxy test returned server IP as: {proxy}.")
54+
55+
4456
# Start command
4557
async def start(update: Update, context: CallbackContext):
4658
logger.info(f"User {update.message.from_user.id} issued the /start command.")
@@ -748,6 +760,7 @@ def main():
748760
# Add handlers
749761
application.add_handler(CommandHandler("start", start))
750762
application.add_handler(CommandHandler("help", help_command))
763+
application.add_handler(CommandHandler("proxy", proxy_command))
751764
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_youtube_link))
752765
application.add_handler(CallbackQueryHandler(handle_summarization_button))
753766
application.add_error_handler(error_handler)

youtubevideotranscriptbot/transcript.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import aiofiles
1616
import asyncio
1717
import requests
18+
import traceback
1819

1920
logger = logging.getLogger(__name__)
2021

@@ -38,11 +39,10 @@ def test_proxy():
3839
try:
3940
response = requests.get("https://httpbin.org/ip", proxies=proxy_dict, timeout=10)
4041
logger.info(f"Proxy working! Your IP: {response.json()}")
42+
return response.json()
4143
except Exception as e:
4244
logger.info(f"Proxy failed: {e}")
4345

44-
return
45-
4646

4747
# Get all available transcripts
4848
def get_all_transcripts(video_id):
@@ -62,7 +62,7 @@ def get_all_transcripts(video_id):
6262
)
6363
)
6464

65-
logger.info(f"Using default YouTubeTranscriptApi without proxy.")
65+
logger.info(f"[Get Transcript List] Using default YouTubeTranscriptApi without proxy.")
6666
ytt_api = YouTubeTranscriptApi()
6767

6868
try:
@@ -78,7 +78,7 @@ def get_all_transcripts(video_id):
7878

7979
if not transcript_list and ytt_api_proxied:
8080
try:
81-
logger.info(f"Attempting to fetch transcripts with proxy configuration.")
81+
logger.info(f"[Get Transcript List] Attempting to fetch transcripts with proxy configuration.")
8282
transcript_list = ytt_api_proxied.list(video_id)
8383
# logger.info(f"Available methods for TranscriptList: {dir(transcript_list)}")
8484
# print("Available transcripts:\n")
@@ -145,19 +145,27 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties
145145
# language = transcript.language
146146
# is_generated = transcript.is_generated
147147

148+
original_audio_language = transcript_properties.get('normalized_language_code', '')
149+
transcript_data = None
150+
151+
# if original_audio_language != normalized_language_code or normalized_language_code != 'en':
152+
# logger.info(f"Transcript retrieval for language {normalized_language_code} skipped since nor original audio or 'en'. Original audio is: {original_audio_language}.")
153+
# continue
154+
148155
logger.info(f"Fetching transcript for language: {language} ({normalized_language_code})")
149-
logger.info(f"Attempting to fetch without proxy")
156+
logger.info(f"[Get Single Transcript] Attempting to fetch with(out) proxy")
150157

151158
try:
152-
transcript_data = ytt_api.fetch(transcript_properties.get('video_id'), languages=[language_code])
159+
# transcript_data = transcript.fetch()
160+
transcript_data = ytt_api_proxied.fetch(transcript_properties.get('video_id'), languages=[language_code])
153161
logger.info(f"Transcript data from fetch attempt: {str(transcript_data)[:500]}")
154162
except Exception as e:
155163
logger.warning(f"Fetch without proxy failed for {language}: {e}")
156164
transcript_data = None
157165

158166
if not transcript_data and ytt_api_proxied:
159167
await asyncio.sleep(1.5)
160-
logger.info(f"Attempting to fetch with proxy")
168+
logger.info(f"[Get Single Transcript] Attempting to fetch with proxy")
161169
# Retry logic for fetching transcript
162170
retry_attempts = 3
163171
for attempt in range(1, retry_attempts + 1):
@@ -167,6 +175,9 @@ async def save_transcripts(transcript_list, base_filename, transcript_properties
167175
break
168176
except Exception as e:
169177
logger.warning(f"Fetch failed for {language} (attempt {attempt}): {e}")
178+
# logger.warning("Proxy failed error type:", type(e).__name__)
179+
# logger.warning("Full traceback:\n")
180+
# logger.warning(traceback.print_exc())
170181
if attempt < retry_attempts:
171182
await asyncio.sleep(2)
172183
else:

0 commit comments

Comments
 (0)