stellarnode
diff --git a/‎youtubevideotranscriptbot/database.py‎
Lines changed: 96 additions & 0 deletions b/‎youtubevideotranscriptbot/database.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎youtubevideotranscriptbot/model_params.py‎
Lines changed: 17 additions & 34 deletions b/‎youtubevideotranscriptbot/model_params.py‎
Lines changed: 17 additions & 34 deletions
@@ -186,3 +186,99 @@ def get_summary_by_video_language(video_id, language, model=MODEL_TO_USE):
 async def get_summary_by_video_language_async(video_id, language, model=MODEL_TO_USE):
     loop = asyncio.get_event_loop()
     return await loop.run_in_executor(None, get_summary_by_video_language, video_id, language, model)
+
+def insert_transcript(transcript_properties):
+    """
+    Inserts a transcript record into the transcripts table.
+    """
+    try:
+        video_id = transcript_properties.get('video_id')
+        video_title = transcript_properties.get('video_title')
+        channel_name = transcript_properties.get('channel_name')
+        channel_id = transcript_properties.get('channel_id')
+        duration = transcript_properties.get('duration')
+        video_url = transcript_properties.get('video_url')
+        user_id = transcript_properties.get('user_id')
+        language_code = transcript_properties.get('language_code')
+        normalized_language_code = transcript_properties.get('normalized_language_code')
+        is_generated = transcript_properties.get('is_generated', True)
+        text = transcript_properties.get('text', '')
+        filename = transcript_properties.get('filename', '')
+        base_filename = transcript_properties.get('base_filename', '')
+        type = transcript_properties.get('type', 'transcript')
+        summary = transcript_properties.get('summary', '')
+        word_count = transcript_properties.get('word_count', 0)
+        tokens_used = transcript_properties.get('tokens_used', 0)
+        estimated_cost = transcript_properties.get('estimated_cost', 0.0)
+        model = transcript_properties.get('model', MODEL_TO_USE)
+
+        with db_cursor() as cursor:
+            query = """
+            INSERT INTO transcripts (
+                video_id, video_title, channel_name, channel_id, duration, video_url, 
+                user_id, language_code, normalized_language_code,
+                is_generated, text, filename, base_filename, type, summary, word_count,
+                tokens_used, estimated_cost, model
+            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+            """
+            cursor.execute(query, (
+                video_id, video_title, channel_name, channel_id, duration, video_url, 
+                user_id, language_code, normalized_language_code,
+                is_generated, text, filename, base_filename, type, summary, word_count,
+                tokens_used, estimated_cost, model
+            ))
+            logger.info(f"Inserted transcript for video_id={video_id}, user_id={user_id}, language_code={language_code}")
+    except Exception as e:
+        logger.error(f"Failed to insert transcript: {e}")
+        raise
+
+async def insert_transcript_async(transcript_properties):
+    loop = asyncio.get_event_loop()
+    await loop.run_in_executor(
+        None,
+        insert_transcript,transcript_properties
+    )
+
+def get_existing_transcripts(video_id, normalized_language_code=None):
+    """
+    Retrieves all transcripts from the transcripts table for a given video_id and optional language_code.
+    Returns a list of transcript dicts (can be empty if none found).
+    """
+    try:
+        with db_cursor() as cursor:
+            if normalized_language_code:
+                query = """
+                SELECT video_id, video_title, channel_name, channel_id, duration, video_url, 
+                    user_id, language_code, normalized_language_code,
+                    is_generated, text, filename, base_filename, type, summary, word_count,
+                    tokens_used, estimated_cost, model
+                FROM transcripts
+                WHERE video_id = %s AND (normalized_language_code = %s OR normalized_language_code = 'en' OR normalized_language_code = 'ru')
+                ORDER BY id DESC
+                """
+                cursor.execute(query, (video_id, normalized_language_code))
+            else:
+                query = """
+                SELECT video_id, video_title, channel_name, channel_id, duration, video_url, 
+                    user_id, language_code, normalized_language_code,
+                    is_generated, text, filename, base_filename, type, summary, word_count,
+                    tokens_used, estimated_cost, model
+                FROM transcripts
+                WHERE video_id = %s
+                ORDER BY id DESC
+                """
+                cursor.execute(query, (video_id,))
+
+            rows = cursor.fetchall()
+            keys = ["video_id", "video_title", "channel_name", "channel_id", "duration", "video_url", 
+                    "user_id", "language_code", "normalized_language_code",
+                    "is_generated", "text", "filename", "base_filename", "type", "summary", "word_count",
+                    "tokens_used", "estimated_cost", "model"]
+            return [dict(zip(keys, row)) for row in rows]
+    except Exception as e:
+        logger.error(f"Failed to fetch transcripts for video_id={video_id}, language_code={normalized_language_code}: {e}")
+        return []
+
+async def get_existing_transcripts_async(video_id, normalized_language_code=None):
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, get_existing_transcripts, video_id, normalized_language_code)
@@ -25,6 +25,8 @@ def get_model_params(model=MODEL_TO_USE):
     
     Returns:
         dict: A dictionary containing the model parameters.
+
+        Cost is indicated in USD per 100K tokens.
     """
     if "gpt" in model.lower():
         logger.info(f"Using OpenAI {model} model.")
@@ -33,7 +35,9 @@ def get_model_params(model=MODEL_TO_USE):
             "max_chunks_allowed": 4,
             "max_tokens": 1024,
             "model": model or "gpt-4o-mini",
-            "client": openai.OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
+            "client": openai.OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1"),
+            "cost_per_100k_tokens_input": 0.25,
+            "cost_per_100k_tokens_output": 1  # Example cost, adjust as needed
         }
     elif "deepseek" in model.lower():
         logger.info(f"Using DeepSeek {model}.")
@@ -42,7 +46,9 @@ def get_model_params(model=MODEL_TO_USE):
             "max_chunks_allowed": 5,
             "max_tokens": 1024,
             "model": model or "deepseek-chat",
-            "client": openai.OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
+            "client": openai.OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com"),
+            "cost_per_100k_tokens_input": 0.027,
+            "cost_per_100k_tokens_output": 0.11
         }
     elif "grok" in model.lower():
         logger.info(f"Using xAI Grok {model}.")
@@ -51,7 +57,9 @@ def get_model_params(model=MODEL_TO_USE):
             "max_chunks_allowed": 5,
             "max_tokens": 1024,
             "model": model or "grok-3-mini",
-            "client": openai.OpenAI(api_key=XAI_API_KEY, base_url="https://api.x.ai/v1")
+            "client": openai.OpenAI(api_key=XAI_API_KEY, base_url="https://api.x.ai/v1"),
+            "cost_per_100k_tokens_input": 0.3,
+            "cost_per_100k_tokens_output": 1.5
         }
     elif "claude" in model.lower():
         logger.info(f"Using Anthropic's Claude {model}.")
@@ -60,7 +68,9 @@ def get_model_params(model=MODEL_TO_USE):
             "max_chunks_allowed": 3,
             "max_tokens": 1024,
             "model": model or "claude-3-haiku-20240307",
-            "client": openai.OpenAI(api_key=ANTHROPIC_API_KEY, base_url="https://api.anthropic.com/v1/")
+            "client": openai.OpenAI(api_key=ANTHROPIC_API_KEY, base_url="https://api.anthropic.com/v1/"),
+            "cost_per_100k_tokens_input": 0.25,
+            "cost_per_100k_tokens_output": 1.5
         } 
     else:
         logger.error("Invalid model selection in config. Please select 'gpt' for OpenAI or 'deepseek' for DeepSeek.")
@@ -70,35 +80,8 @@ def get_model_params(model=MODEL_TO_USE):
             "max_chunks_allowed": 5,
             "max_tokens": 1024,
             "model": model or "deepseek-chat",
-            "client": openai.OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
+            "client": openai.OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com"),
+            "cost_per_100k_tokens_input": 0.3,
+            "cost_per_100k_tokens_output": 1.5
         }
 
-
-# if MODEL_TO_USE:
-#     if "gpt" in MODEL_TO_USE.lower():
-#         model_to_use = 1  # OpenAI model
-#     elif "deepseek" in MODEL_TO_USE.lower():
-#         model_to_use = 2
-#     else:
-#         logger.error("Invalid model selection in config. Please select 'gpt' for OpenAI or 'deepseek' for DeepSeek.")
-#         logger.warning("Falling back to DeepSeek model as default.")
-#         model_to_use = 2 # 1 for OpenAI, 2 for DeepSeek
-#         raise ValueError("Invalid model selection in config. Please select 'gpt' for OpenAI or 'deepseek' for DeepSeek.")
-
-# if model_to_use == 1:
-#     tokens_per_chunk = 100000
-#     max_chunks_allowed = 4
-#     max_tokens = 1024
-#     model = "gpt-4o-mini"
-#     client = openai.OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
-#     logger.info(f"Using OpenAI {model} model for summarization.")
-# elif model_to_use == 2:
-#     tokens_per_chunk = 64000
-#     max_chunks_allowed = 5
-#     max_tokens = 1024
-#     model = "deepseek-chat"
-#     # for DeepSeek backward compatibility, you can still use `https://api.deepseek.com/v1` as `base_url`.
-#     client = openai.OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
-#     logger.info(f"Using DeepSeek {model} for summarization.")
-# else:
-#     logger.error("Invalid model selection. Please select 1 for OpenAI or 2 for DeepSeek.")