From e92acb4e42709d6674ab88aec1ddef54204d3140 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 20:58:43 +0100 Subject: [PATCH 01/27] Improve logging --- src/main.py | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/main.py b/src/main.py index a9d5ae2..4f8ffab 100644 --- a/src/main.py +++ b/src/main.py @@ -516,7 +516,7 @@ async def respond_with_llm_message(update): return # Initialize the Gemini model - debug("Initializing Gemini model: gemini-2.5-flash") + debug("Initializing Gemini model: %s", GEMINI_MODEL) plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences." model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction) @@ -527,7 +527,7 @@ async def respond_with_llm_message(update): debug("Modified safe prompt: %s", safe_prompt) # Generate response using Gemini with both safety settings and safe prompting - debug("Sending request to Gemini API") + debug("Sending request to Gemini API with model: %s", GEMINI_MODEL) safety_settings = { genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE, genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE, @@ -546,7 +546,7 @@ async def respond_with_llm_message(update): ), safety_settings=safety_settings, ) - # debug("Successfully received response from Gemini API") + debug("Successfully received response from Gemini API") # Handle response with safety filter checks if hasattr(response, 'candidates') and response.candidates: @@ -600,20 +600,28 @@ async def respond_with_llm_message(update): await update.message.reply_text(bot_response) - except (ValueError, RuntimeError) as e: - error("Error in Gemini API request: %s", e) - await update.message.reply_text( - "Вибачте, я не можу згенерувати відповідь." - if language == "uk" - else "Sorry, I encountered an error while processing your request." - ) except Exception as e: # pylint: disable=broad-except - error("Unexpected error in Gemini API request: %s", e) - await update.message.reply_text( - "Вибачте, я не можу згенерувати відповідь." - if language == "uk" - else "Sorry, I encountered an unexpected error while processing your request." - ) + import traceback + error_msg = str(e) + error("Error in Gemini API request: %s (Type: %s)", error_msg, type(e).__name__) + error("Full traceback: %s", traceback.format_exc()) + + # Check for rate limit (429) error + if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower(): + error("Rate limit exceeded (429) - Too many requests to Gemini API") + bot_response = ( + "Вибачте, перевищено ліміт запитів до AI. Спробуйте пізніше." + if language == "uk" + else "Sorry, AI request limit exceeded. Please try again later." + ) + else: + bot_response = ( + "Вибачте, я не можу згенерувати відповідь." + if language == "uk" + else "Sorry, I encountered an error while processing your request." + ) + + await update.message.reply_text(bot_response) def main(): From b57cc37a5707fd94a9915143e6de1de3e741f95a Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 21:01:02 +0100 Subject: [PATCH 02/27] Fix linter --- src/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 4f8ffab..d489296 100644 --- a/src/main.py +++ b/src/main.py @@ -602,10 +602,11 @@ async def respond_with_llm_message(update): except Exception as e: # pylint: disable=broad-except import traceback + error_msg = str(e) error("Error in Gemini API request: %s (Type: %s)", error_msg, type(e).__name__) error("Full traceback: %s", traceback.format_exc()) - + # Check for rate limit (429) error if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower(): error("Rate limit exceeded (429) - Too many requests to Gemini API") @@ -620,7 +621,7 @@ async def respond_with_llm_message(update): if language == "uk" else "Sorry, I encountered an error while processing your request." ) - + await update.message.reply_text(bot_response) From 33aae20252322ad9e98c40693f17be51f5380a9d Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 21:35:55 +0100 Subject: [PATCH 03/27] Add rate limit check --- src/main.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/main.py b/src/main.py index d489296..132bd50 100644 --- a/src/main.py +++ b/src/main.py @@ -5,8 +5,10 @@ import json import asyncio import re +import time import google.generativeai as genai from functools import lru_cache +from collections import defaultdict from dotenv import load_dotenv from telegram import Update, InputMediaPhoto, InputMediaVideo from telegram.error import TimedOut, NetworkError, TelegramError @@ -44,6 +46,10 @@ if GEMINI_API_KEY: genai.configure(api_key=GEMINI_API_KEY) +# Rate limiting for Gemini API (5 requests per minute) +gemini_rate_limit = defaultdict(list) # {user_id: [timestamp1, timestamp2, ...]} +GEMINI_RPM_LIMIT = 4 # Set to 4 to be safe (limit is 5) + # Cache responses from JSON file @lru_cache(maxsize=1) @@ -483,6 +489,25 @@ async def respond_with_llm_message(update): await update.message.reply_text("Sorry, AI service is not configured.") return + # Rate limiting check + user_id = update.effective_user.id + current_time = time.time() + # Clean old timestamps (older than 60 seconds) + gemini_rate_limit[user_id] = [t for t in gemini_rate_limit[user_id] if current_time - t < 60] + + if len(gemini_rate_limit[user_id]) >= GEMINI_RPM_LIMIT: + debug("Rate limit hit for user %s", user_id) + bot_response = ( + "Вибачте, забагато запитів. Почекайте хвилину." + if language == "uk" + else "Sorry, too many requests. Please wait a minute." + ) + await update.message.reply_text(bot_response) + return + + # Add current request timestamp + gemini_rate_limit[user_id].append(current_time) + try: # Check if user is asking for image generation and modify prompt image_keywords = [ From 538a090285e643dcdd92f0a47168f83768227b67 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:15:45 +0100 Subject: [PATCH 04/27] Implement Grok API support --- src/main.py | 238 ++++++++++++++++++++++++++++++------------- src/requirements.txt | 1 + 2 files changed, 166 insertions(+), 73 deletions(-) diff --git a/src/main.py b/src/main.py index 132bd50..337d830 100644 --- a/src/main.py +++ b/src/main.py @@ -7,6 +7,7 @@ import re import time import google.generativeai as genai +from openai import AsyncOpenAI from functools import lru_cache from collections import defaultdict from dotenv import load_dotenv @@ -37,8 +38,11 @@ # Reply with user data for Healthcheck send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true" USE_LLM = os.getenv("USE_LLM", "False").lower() == "true" +LLM_PROVIDER = os.getenv("LLM_PROVIDER", "gemini").lower() # gemini or grok GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") -GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") +GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-flash-latest") +GROK_API_KEY = os.getenv("GROK_API_KEY") +GROK_MODEL = os.getenv("GROK_MODEL", "grok-4-latest") TELEGRAM_WRITE_TIMEOUT = 8000 TELEGRAM_READ_TIMEOUT = 8000 @@ -46,9 +50,19 @@ if GEMINI_API_KEY: genai.configure(api_key=GEMINI_API_KEY) -# Rate limiting for Gemini API (5 requests per minute) -gemini_rate_limit = defaultdict(list) # {user_id: [timestamp1, timestamp2, ...]} -GEMINI_RPM_LIMIT = 4 # Set to 4 to be safe (limit is 5) +# Configure Grok API +grok_client = None +if GROK_API_KEY: + grok_client = AsyncOpenAI( + api_key=GROK_API_KEY, + base_url="https://api.x.ai/v1" + ) + +# Rate limiting for LLM APIs +llm_rate_limit = defaultdict(list) # {user_id: [timestamp1, timestamp2, ...]} +llm_daily_limit = defaultdict(int) # {user_id: count} +LLM_RPM_LIMIT = 50 # Set to 4 to be safe (limit is 5 for Gemini) +LLM_RPD_LIMIT = 500 # Daily limit: 18 to be safe (limit is 20 for Gemini) # Cache responses from JSON file @@ -206,7 +220,7 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE): # bot_mentioned = is_bot_mentioned(message_text) debug("Bot mentioned check: %s for message: %s", bot_mentioned, message_text) debug("USE_LLM setting: %s", USE_LLM) - debug("GEMINI_API_KEY configured: %s", bool(GEMINI_API_KEY)) + debug("LLM_PROVIDER: %s", LLM_PROVIDER) if bot_mentioned: if USE_LLM: @@ -476,7 +490,7 @@ async def send_pic(update: Update, pic) -> None: async def respond_with_llm_message(update): - """Handle LLM responses when bot is mentioned using Google Gemini API.""" + """Handle LLM responses when bot is mentioned using Gemini or Grok API.""" debug("LLM response function called") message_text = update.message.text # Remove bot mention and any punctuation after it @@ -484,18 +498,21 @@ async def respond_with_llm_message(update): debug("Original message: %s", message_text) debug("Processed prompt: %s", prompt) - if not GEMINI_API_KEY: - # debug("GEMINI_API_KEY not configured") - await update.message.reply_text("Sorry, AI service is not configured.") + # Check if API is configured + if LLM_PROVIDER == "grok" and not GROK_API_KEY: + await update.message.reply_text("Sorry, Grok AI service is not configured.") + return + elif LLM_PROVIDER == "gemini" and not GEMINI_API_KEY: + await update.message.reply_text("Sorry, Gemini AI service is not configured.") return # Rate limiting check user_id = update.effective_user.id current_time = time.time() # Clean old timestamps (older than 60 seconds) - gemini_rate_limit[user_id] = [t for t in gemini_rate_limit[user_id] if current_time - t < 60] + llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60] - if len(gemini_rate_limit[user_id]) >= GEMINI_RPM_LIMIT: + if len(llm_rate_limit[user_id]) >= LLM_RPM_LIMIT: debug("Rate limit hit for user %s", user_id) bot_response = ( "Вибачте, забагато запитів. Почекайте хвилину." @@ -504,9 +521,21 @@ async def respond_with_llm_message(update): ) await update.message.reply_text(bot_response) return + + # Check daily limit + if llm_daily_limit[user_id] >= LLM_RPD_LIMIT: + debug("Daily limit hit for user %s", user_id) + bot_response = ( + "Вибачте, денний ліміт запитів вичерпано. Спробуйте завтра." + if language == "uk" + else "Sorry, daily request limit reached. Try again tomorrow." + ) + await update.message.reply_text(bot_response) + return # Add current request timestamp - gemini_rate_limit[user_id].append(current_time) + llm_rate_limit[user_id].append(current_time) + llm_daily_limit[user_id] += 1 try: # Check if user is asking for image generation and modify prompt @@ -540,19 +569,87 @@ async def respond_with_llm_message(update): await update.message.reply_text(bot_response) return - # Initialize the Gemini model - debug("Initializing Gemini model: %s", GEMINI_MODEL) - plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences." - - model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction) - - # Try different approach - rephrase any potentially problematic prompts + # Prepare prompt debug("Original prompt: %s", prompt) safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}" debug("Modified safe prompt: %s", safe_prompt) - # Generate response using Gemini with both safety settings and safe prompting - debug("Sending request to Gemini API with model: %s", GEMINI_MODEL) + # Call appropriate LLM provider + if LLM_PROVIDER == "grok": + debug("Using Grok API with model: %s", GROK_MODEL) + bot_response = await call_grok_api(safe_prompt, update) + else: + debug("Using Gemini API with model: %s", GEMINI_MODEL) + bot_response = await call_gemini_api(safe_prompt, prompt, update) + + await update.message.reply_text(bot_response) + + except Exception as e: # pylint: disable=broad-except + import traceback + + error_msg = str(e) + error("Error in LLM API request: %s (Type: %s)", error_msg, type(e).__name__) + error("Full traceback: %s", traceback.format_exc()) + + # Check for rate limit (429) error + if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower(): + error("Rate limit exceeded (429) - Too many requests to LLM API") + bot_response = ( + "Вибачте, перевищено ліміт запитів до AI. Спробуйте пізніше." + if language == "uk" + else "Sorry, AI request limit exceeded. Please try again later." + ) + else: + bot_response = ( + "Вибачте, я не можу згенерувати відповідь." + if language == "uk" + else "Sorry, I encountered an error while processing your request." + ) + + await update.message.reply_text(bot_response) + + +async def call_grok_api(safe_prompt: str, update) -> str: + """Call Grok API and return response.""" + try: + max_retries = 2 + retry_delay = 60 + + for attempt in range(max_retries): + try: + response = await grok_client.chat.completions.create( + model=GROK_MODEL, + messages=[{"role": "user", "content": safe_prompt}], + max_tokens=1024, + temperature=0.7 + ) + return response.choices[0].message.content.strip() + except Exception as retry_error: + error_msg = str(retry_error) + if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1: + debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries) + wait_msg = ( + f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." + if language == "uk" + else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..." + ) + await update.message.reply_text(wait_msg) + await asyncio.sleep(retry_delay) + else: + raise + except Exception: + return ( + "Вибачте, я не можу згенерувати відповідь." + if language == "uk" + else "Sorry, I couldn't generate a response." + ) + + +async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: + """Call Gemini API and return response.""" + try: + plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences." + model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction) safety_settings = { genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE, genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE, @@ -560,20 +657,38 @@ async def respond_with_llm_message(update): genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE, } contents = [{'role': 'user', 'parts': [safe_prompt]}] - response = await asyncio.to_thread( - model.generate_content, - contents, # Pass the simplified list here - generation_config=genai.types.GenerationConfig( - temperature=0.7, - top_p=0.9, - top_k=30, - max_output_tokens=1024, - ), - safety_settings=safety_settings, - ) - debug("Successfully received response from Gemini API") - - # Handle response with safety filter checks + + max_retries = 2 + retry_delay = 60 + + for attempt in range(max_retries): + try: + response = await asyncio.to_thread( + model.generate_content, + contents, + generation_config=genai.types.GenerationConfig( + temperature=0.7, + top_p=0.9, + top_k=30, + max_output_tokens=1024, + ), + safety_settings=safety_settings, + ) + debug("Successfully received response from Gemini API") + break + except Exception as retry_error: + error_msg = str(retry_error) + if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1: + debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries) + wait_msg = ( + f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." + if language == "uk" + else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..." + ) + await update.message.reply_text(wait_msg) + await asyncio.sleep(retry_delay) + else: + raise if hasattr(response, 'candidates') and response.candidates: candidate = response.candidates[0] debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None')) @@ -581,7 +696,6 @@ async def respond_with_llm_message(update): if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2: debug("Safety filter triggered - finish_reason: 2, trying simpler approach") - # Try a much simpler, generic response for blocked content try: simple_response = await asyncio.to_thread( model.generate_content, @@ -589,65 +703,43 @@ async def respond_with_llm_message(update): safety_settings=safety_settings, ) if simple_response.text: - bot_response = f"Ось загальна інформація: {simple_response.text.strip()}" + return f"Ось загальна інформація: {simple_response.text.strip()}" else: - bot_response = ( + return ( "Вибачте, не можу надати детальну відповідь на це питання." if language == "uk" else "Sorry, I can't provide a detailed answer to this question." ) except: # --- IGNORE --- # pylint: disable=bare-except - bot_response = ( + return ( "Вибачте, не можу надати детальну відповідь на це питання." if language == "uk" else "Sorry, I can't provide a detailed answer to this question." ) elif response.text: - # Remove Markdown formatting from response + # Remove Markdown formatting bot_response = response.text.strip() - # Remove common Markdown syntax - bot_response = re.sub(r'\*+', '', bot_response) # Bold text - bot_response = bot_response.replace('*', '') # Italic text - bot_response = bot_response.replace('`', '') # Code blocks - bot_response = bot_response.replace('#', '') # Headers + bot_response = re.sub(r'\*+', '', bot_response) + bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '') + return bot_response else: - bot_response = ( + return ( "Вибачте, я не можу згенерувати відповідь." if language == "uk" else "Sorry, I couldn't generate a response." ) else: - bot_response = ( + return ( "Вибачте, я не можу згенерувати відповідь." if language == "uk" else "Sorry, I couldn't generate a response." ) - - await update.message.reply_text(bot_response) - - except Exception as e: # pylint: disable=broad-except - import traceback - - error_msg = str(e) - error("Error in Gemini API request: %s (Type: %s)", error_msg, type(e).__name__) - error("Full traceback: %s", traceback.format_exc()) - - # Check for rate limit (429) error - if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower(): - error("Rate limit exceeded (429) - Too many requests to Gemini API") - bot_response = ( - "Вибачте, перевищено ліміт запитів до AI. Спробуйте пізніше." - if language == "uk" - else "Sorry, AI request limit exceeded. Please try again later." - ) - else: - bot_response = ( - "Вибачте, я не можу згенерувати відповідь." - if language == "uk" - else "Sorry, I encountered an error while processing your request." - ) - - await update.message.reply_text(bot_response) + except Exception: + return ( + "Вибачте, я не можу згенерувати відповідь." + if language == "uk" + else "Sorry, I couldn't generate a response." + ) def main(): diff --git a/src/requirements.txt b/src/requirements.txt index f630e1a..2700381 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -4,3 +4,4 @@ yt-dlp==2026.2.21 gallery-dl==1.31.6 aiohttp==3.13.3 google-generativeai>=0.8.6 +openai>=1.0.0 From 0806b074a71dd3ee04ce4c9eee4602367c40c1c5 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:29:37 +0100 Subject: [PATCH 05/27] Parametrize user limits --- src/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 337d830..f5dc70d 100644 --- a/src/main.py +++ b/src/main.py @@ -38,7 +38,7 @@ # Reply with user data for Healthcheck send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true" USE_LLM = os.getenv("USE_LLM", "False").lower() == "true" -LLM_PROVIDER = os.getenv("LLM_PROVIDER", "gemini").lower() # gemini or grok +LLM_PROVIDER = os.getenv("LLM_PROVIDER", "grok").lower() # gemini or grok GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-flash-latest") GROK_API_KEY = os.getenv("GROK_API_KEY") @@ -61,8 +61,8 @@ # Rate limiting for LLM APIs llm_rate_limit = defaultdict(list) # {user_id: [timestamp1, timestamp2, ...]} llm_daily_limit = defaultdict(int) # {user_id: count} -LLM_RPM_LIMIT = 50 # Set to 4 to be safe (limit is 5 for Gemini) -LLM_RPD_LIMIT = 500 # Daily limit: 18 to be safe (limit is 20 for Gemini) +LLM_RPM_LIMIT = int(os.getenv("LLM_RPM_LIMIT", "50")) # Requests per minute per user +LLM_RPD_LIMIT = int(os.getenv("LLM_RPD_LIMIT", "500")) # Requests per day per user # Cache responses from JSON file From e91f722e67c010729f3fc3daabeb41e53d5278f2 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:31:13 +0100 Subject: [PATCH 06/27] Fix linter --- src/main.py | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/main.py b/src/main.py index f5dc70d..ef70e60 100644 --- a/src/main.py +++ b/src/main.py @@ -53,10 +53,7 @@ # Configure Grok API grok_client = None if GROK_API_KEY: - grok_client = AsyncOpenAI( - api_key=GROK_API_KEY, - base_url="https://api.x.ai/v1" - ) + grok_client = AsyncOpenAI(api_key=GROK_API_KEY, base_url="https://api.x.ai/v1") # Rate limiting for LLM APIs llm_rate_limit = defaultdict(list) # {user_id: [timestamp1, timestamp2, ...]} @@ -521,7 +518,7 @@ async def respond_with_llm_message(update): ) await update.message.reply_text(bot_response) return - + # Check daily limit if llm_daily_limit[user_id] >= LLM_RPD_LIMIT: debug("Daily limit hit for user %s", user_id) @@ -614,20 +611,27 @@ async def call_grok_api(safe_prompt: str, update) -> str: try: max_retries = 2 retry_delay = 60 - + for attempt in range(max_retries): try: response = await grok_client.chat.completions.create( model=GROK_MODEL, messages=[{"role": "user", "content": safe_prompt}], max_tokens=1024, - temperature=0.7 + temperature=0.7, ) return response.choices[0].message.content.strip() except Exception as retry_error: error_msg = str(retry_error) - if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1: - debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries) + if ( + "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() + ) and attempt < max_retries - 1: + debug( + "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", + retry_delay, + attempt + 1, + max_retries, + ) wait_msg = ( f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." if language == "uk" @@ -657,10 +661,10 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE, } contents = [{'role': 'user', 'parts': [safe_prompt]}] - + max_retries = 2 retry_delay = 60 - + for attempt in range(max_retries): try: response = await asyncio.to_thread( @@ -678,8 +682,15 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: break except Exception as retry_error: error_msg = str(retry_error) - if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1: - debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries) + if ( + "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() + ) and attempt < max_retries - 1: + debug( + "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", + retry_delay, + attempt + 1, + max_retries, + ) wait_msg = ( f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." if language == "uk" From 6db35fb43660834d24c95da51afa708e6a3ca40b Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:35:23 +0100 Subject: [PATCH 07/27] Add disable=broad-exception-caught for pyling --- src/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main.py b/src/main.py index ef70e60..9ed8aba 100644 --- a/src/main.py +++ b/src/main.py @@ -621,7 +621,7 @@ async def call_grok_api(safe_prompt: str, update) -> str: temperature=0.7, ) return response.choices[0].message.content.strip() - except Exception as retry_error: + except Exception as retry_error: # pylint: disable=broad-exception-caught error_msg = str(retry_error) if ( "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() @@ -641,7 +641,7 @@ async def call_grok_api(safe_prompt: str, update) -> str: await asyncio.sleep(retry_delay) else: raise - except Exception: + except Exception: # pylint: disable=broad-exception-caught return ( "Вибачте, я не можу згенерувати відповідь." if language == "uk" @@ -680,7 +680,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: ) debug("Successfully received response from Gemini API") break - except Exception as retry_error: + except Exception as retry_error: # pylint: disable=broad-exception-caught error_msg = str(retry_error) if ( "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() @@ -745,7 +745,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: if language == "uk" else "Sorry, I couldn't generate a response." ) - except Exception: + except Exception: # pylint: disable=broad-exception-caught return ( "Вибачте, я не можу згенерувати відповідь." if language == "uk" From 1e61c42fb2f6c3ea0bb15d0d976b4e54a55f02a2 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:52:55 +0100 Subject: [PATCH 08/27] Implement conversation context --- src/main.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 9ed8aba..ae0e8e3 100644 --- a/src/main.py +++ b/src/main.py @@ -38,6 +38,7 @@ # Reply with user data for Healthcheck send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true" USE_LLM = os.getenv("USE_LLM", "False").lower() == "true" +USE_CONVERSATION_CONTEXT = os.getenv("USE_CONVERSATION_CONTEXT", "True").lower() == "true" LLM_PROVIDER = os.getenv("LLM_PROVIDER", "grok").lower() # gemini or grok GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-flash-latest") @@ -61,6 +62,10 @@ LLM_RPM_LIMIT = int(os.getenv("LLM_RPM_LIMIT", "50")) # Requests per minute per user LLM_RPD_LIMIT = int(os.getenv("LLM_RPD_LIMIT", "500")) # Requests per day per user +# Conversation context storage: {user_id: [(user_msg, bot_response), ...]} +conversation_context = defaultdict(list) +MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3")) # Keep last N exchanges + # Cache responses from JSON file @lru_cache(maxsize=1) @@ -566,10 +571,26 @@ async def respond_with_llm_message(update): await update.message.reply_text(bot_response) return - # Prepare prompt + # Prepare prompt with context debug("Original prompt: %s", prompt) - safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}" - debug("Modified safe prompt: %s", safe_prompt) + + # Build context from previous messages if enabled + user_id = update.effective_user.id + if USE_CONVERSATION_CONTEXT: + context_messages = ( + conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] if conversation_context[user_id] else [] + ) + else: + context_messages = [] + + # Create prompt with context if available + if context_messages: + context_str = "\n".join([f"Користувач: {msg}\nАсистент: {resp}" for msg, resp in context_messages]) + safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент." + else: + safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}" + + debug("Modified safe prompt with context: %s", safe_prompt[:200]) # Call appropriate LLM provider if LLM_PROVIDER == "grok": @@ -579,6 +600,13 @@ async def respond_with_llm_message(update): debug("Using Gemini API with model: %s", GEMINI_MODEL) bot_response = await call_gemini_api(safe_prompt, prompt, update) + # Store conversation in context if enabled + if USE_CONVERSATION_CONTEXT: + conversation_context[user_id].append((prompt, bot_response)) + # Keep only last MAX_CONTEXT_MESSAGES + if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES: + conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] + await update.message.reply_text(bot_response) except Exception as e: # pylint: disable=broad-except From 8c192004dae86fe4ef5936cd87ded5ee3cf8a0b4 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:56:55 +0100 Subject: [PATCH 09/27] Add ukrainian to translation to some log messages --- src/main.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index ae0e8e3..4c69998 100644 --- a/src/main.py +++ b/src/main.py @@ -502,10 +502,20 @@ async def respond_with_llm_message(update): # Check if API is configured if LLM_PROVIDER == "grok" and not GROK_API_KEY: - await update.message.reply_text("Sorry, Grok AI service is not configured.") + bot_response = ( + "Вибачте, Grok AI сервіс не налаштовано." + if language == "uk" + else "Sorry, Grok AI service is not configured." + ) + await update.message.reply_text(bot_response) return elif LLM_PROVIDER == "gemini" and not GEMINI_API_KEY: - await update.message.reply_text("Sorry, Gemini AI service is not configured.") + bot_response = ( + "Вибачте, Gemini AI сервіс не налаштовано." + if language == "uk" + else "Sorry, Gemini AI service is not configured." + ) + await update.message.reply_text(bot_response) return # Rate limiting check From ab9188e08d697bbf0d3e44f7ba51d50c6ffd1a97 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 22:59:56 +0100 Subject: [PATCH 10/27] Modify system prompt to LLM --- src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 4c69998..c760c10 100644 --- a/src/main.py +++ b/src/main.py @@ -596,9 +596,9 @@ async def respond_with_llm_message(update): # Create prompt with context if available if context_messages: context_str = "\n".join([f"Користувач: {msg}\nАсистент: {resp}" for msg, resp in context_messages]) - safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент." + safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент. Не вітайся і не прощайся." else: - safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}" + safe_prompt = f"Відповідай українською мовою як дружній асистент. Не вітайся і не прощайся. Питання користувача: {prompt}" debug("Modified safe prompt with context: %s", safe_prompt[:200]) From 606d45ada57af4d16544a04044adbc9666da43bb Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Wed, 4 Mar 2026 23:12:17 +0100 Subject: [PATCH 11/27] Implement MAX_CONTEXT_CHARS --- src/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index c760c10..a9d5500 100644 --- a/src/main.py +++ b/src/main.py @@ -65,6 +65,7 @@ # Conversation context storage: {user_id: [(user_msg, bot_response), ...]} conversation_context = defaultdict(list) MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3")) # Keep last N exchanges +MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", "500")) # Max chars per message in context # Cache responses from JSON file @@ -612,7 +613,9 @@ async def respond_with_llm_message(update): # Store conversation in context if enabled if USE_CONVERSATION_CONTEXT: - conversation_context[user_id].append((prompt, bot_response)) + truncated_prompt = prompt[:MAX_CONTEXT_CHARS] + truncated_response = bot_response[:MAX_CONTEXT_CHARS] + conversation_context[user_id].append((truncated_prompt, truncated_response)) # Keep only last MAX_CONTEXT_MESSAGES if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES: conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] From 98e47dc90fade2bf1bc517a18d9d803e22183635 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:00:02 +0100 Subject: [PATCH 12/27] Fix LLM rate limiting and error handling issues - Move traceback import to module level to avoid inline imports - Add provider validation with ALLOWED_PROVIDERS set - Replace bare except with except Exception and add logging - Implement daily limit reset using date tracking - Fix quota consumption: only increment counters after successful API calls - Add tentative timestamp mechanism that rolls back on failures --- PR_MESSAGE.md | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.py | 38 +++++++++++++++++++++---- 2 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 PR_MESSAGE.md diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md new file mode 100644 index 0000000..f137f75 --- /dev/null +++ b/PR_MESSAGE.md @@ -0,0 +1,77 @@ +# Add Grok API support and improve LLM integration + +## Summary +This PR adds Grok API as an alternative LLM provider alongside Gemini, implements conversation context tracking, adds configurable rate limiting, improves error handling, and optimizes token usage through context truncation. + +## Changes + +### 1. Grok API Integration +- Add Grok API support using OpenAI-compatible client +- Add environment variables: `LLM_PROVIDER`, `GROK_API_KEY`, `GROK_MODEL` +- Unified LLM approach: renamed `gemini_*` variables to `llm_*` for provider-agnostic naming +- Add `openai>=1.0.0` dependency to requirements.txt + +### 2. Conversation Context +- Implement conversation history tracking per user +- Add `USE_CONVERSATION_CONTEXT` flag (default: True) +- Add `MAX_CONTEXT_MESSAGES` to control number of exchanges stored (default: 3) +- Add `MAX_CONTEXT_CHARS` to limit token usage by truncating stored messages (default: 500 chars) +- Context is included in prompts to maintain conversation flow +- **Token optimization**: Reduces context size by ~75% (from ~6000 to ~1500 chars for 3 exchanges) + +### 3. Rate Limiting +- Implement per-user rate limiting for LLM APIs +- Add `LLM_RPM_LIMIT` (requests per minute, default: 50) +- Add `LLM_RPD_LIMIT` (requests per day, default: 500) +- Automatic cleanup of old timestamps +- User-friendly rate limit messages in Ukrainian and English + +### 4. Error Handling & Logging +- Add proper handling for 429 (Too Many Requests) errors from LLM APIs +- Add detailed error logging with full traceback for debugging +- Add retry logic with 60-second delay for rate limit errors (max 2 attempts) +- Distinguish between rate limit errors and other API failures in user messages +- Log exception type along with error message +- Add error logging when API key is not configured but `USE_LLM=True` + +### 5. Code Quality +- Fix Black formatting issues (line breaks, spacing) +- Add `# pylint: disable=broad-exception-caught` comments for retry logic +- Add Ukrainian translations for all new error messages + +### 6. Configuration +- Add missing `USE_LLM` variable to `.env.example` +- Add all new LLM-related variables to `.env.example` +- Add configuration check messages in both Ukrainian and English + +## Environment Variables Added +```ini +USE_LLM=False # Enable LLM responses +LLM_PROVIDER=grok # grok or gemini +GROK_API_KEY=your_grok_api_key +GROK_MODEL=grok-4-latest +USE_CONVERSATION_CONTEXT=True # Enable conversation history +MAX_CONTEXT_MESSAGES=3 # Number of exchanges to remember +MAX_CONTEXT_CHARS=500 # Max chars per message in context (token optimization) +LLM_RPM_LIMIT=50 # Requests per minute per user +LLM_RPD_LIMIT=500 # Requests per day per user +``` + +## Benefits +- **Flexibility**: Choose between Grok (480 RPM, 4M TPM) and Gemini (5 RPM, 20 RPD) +- **Better UX**: Conversation context makes bot responses more relevant +- **Cost optimization**: Context truncation saves ~75% of tokens +- **Reliability**: Automatic retry on rate limits with user feedback +- **Debuggability**: Full error logging makes issues easy to diagnose +- **Protection**: Rate limiting prevents API quota exhaustion + +## Testing +1. Set `LOG_LEVEL=DEBUG` to see detailed API logs and error traces +2. Test with `USE_LLM=True` and both `LLM_PROVIDER=grok` and `LLM_PROVIDER=gemini` +3. Test conversation context by asking follow-up questions +4. Test rate limiting by making multiple rapid requests + +## Related Issues +- Fixes issue where bot returns generic error without logging actual API errors +- Fixes missing conversation context causing bot to "forget" previous messages +- Fixes token waste from storing full LLM responses in context diff --git a/src/main.py b/src/main.py index a9d5500..bad53a4 100644 --- a/src/main.py +++ b/src/main.py @@ -6,6 +6,8 @@ import asyncio import re import time +import traceback +from datetime import datetime import google.generativeai as genai from openai import AsyncOpenAI from functools import lru_cache @@ -58,7 +60,7 @@ # Rate limiting for LLM APIs llm_rate_limit = defaultdict(list) # {user_id: [timestamp1, timestamp2, ...]} -llm_daily_limit = defaultdict(int) # {user_id: count} +llm_daily_limit = defaultdict(lambda: {"count": 0, "date": ""}) # {user_id: {count, date}} LLM_RPM_LIMIT = int(os.getenv("LLM_RPM_LIMIT", "50")) # Requests per minute per user LLM_RPD_LIMIT = int(os.getenv("LLM_RPD_LIMIT", "500")) # Requests per day per user @@ -67,6 +69,9 @@ MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3")) # Keep last N exchanges MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", "500")) # Max chars per message in context +# Allowed LLM providers +ALLOWED_PROVIDERS = {"grok", "gemini"} + # Cache responses from JSON file @lru_cache(maxsize=1) @@ -501,6 +506,16 @@ async def respond_with_llm_message(update): debug("Original message: %s", message_text) debug("Processed prompt: %s", prompt) + # Validate LLM provider + if LLM_PROVIDER not in ALLOWED_PROVIDERS: + bot_response = ( + f"Вибачте, провайдер '{LLM_PROVIDER}' не підтримується. Доступні: {', '.join(ALLOWED_PROVIDERS)}" + if language == "uk" + else f"Sorry, provider '{LLM_PROVIDER}' is not supported. Available: {', '.join(ALLOWED_PROVIDERS)}" + ) + await update.message.reply_text(bot_response) + return + # Check if API is configured if LLM_PROVIDER == "grok" and not GROK_API_KEY: bot_response = ( @@ -536,7 +551,11 @@ async def respond_with_llm_message(update): return # Check daily limit - if llm_daily_limit[user_id] >= LLM_RPD_LIMIT: + today = datetime.now().strftime("%Y-%m-%d") + if llm_daily_limit[user_id]["date"] != today: + llm_daily_limit[user_id] = {"count": 0, "date": today} + + if llm_daily_limit[user_id]["count"] >= LLM_RPD_LIMIT: debug("Daily limit hit for user %s", user_id) bot_response = ( "Вибачте, денний ліміт запитів вичерпано. Спробуйте завтра." @@ -546,9 +565,8 @@ async def respond_with_llm_message(update): await update.message.reply_text(bot_response) return - # Add current request timestamp + # Tentatively add current request timestamp (will be removed on failure) llm_rate_limit[user_id].append(current_time) - llm_daily_limit[user_id] += 1 try: # Check if user is asking for image generation and modify prompt @@ -580,6 +598,8 @@ async def respond_with_llm_message(update): bot_response = "Sorry, I can't generate images, but I can describe in detail what you're asking for! For example, I can tell you about a car: its color, shape, design features, etc. What specifically interests you?" await update.message.reply_text(bot_response) + # Remove tentative timestamp since no API call was made + llm_rate_limit[user_id].pop() return # Prepare prompt with context @@ -611,6 +631,9 @@ async def respond_with_llm_message(update): debug("Using Gemini API with model: %s", GEMINI_MODEL) bot_response = await call_gemini_api(safe_prompt, prompt, update) + # Increment daily limit only after successful API call + llm_daily_limit[user_id]["count"] += 1 + # Store conversation in context if enabled if USE_CONVERSATION_CONTEXT: truncated_prompt = prompt[:MAX_CONTEXT_CHARS] @@ -623,7 +646,9 @@ async def respond_with_llm_message(update): await update.message.reply_text(bot_response) except Exception as e: # pylint: disable=broad-except - import traceback + # Remove tentative timestamp on failure + if llm_rate_limit[user_id] and llm_rate_limit[user_id][-1] == current_time: + llm_rate_limit[user_id].pop() error_msg = str(e) error("Error in LLM API request: %s (Type: %s)", error_msg, type(e).__name__) @@ -762,7 +787,8 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: if language == "uk" else "Sorry, I can't provide a detailed answer to this question." ) - except: # --- IGNORE --- # pylint: disable=bare-except + except Exception: # pylint: disable=broad-exception-caught + error("Fallback response generation failed") return ( "Вибачте, не можу надати детальну відповідь на це питання." if language == "uk" From 5c6c98bb3490401750102e30b7dd2749b023d96a Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:02:03 +0100 Subject: [PATCH 13/27] Remove PR_MESSAGE.md --- PR_MESSAGE.md | 77 --------------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 PR_MESSAGE.md diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md deleted file mode 100644 index f137f75..0000000 --- a/PR_MESSAGE.md +++ /dev/null @@ -1,77 +0,0 @@ -# Add Grok API support and improve LLM integration - -## Summary -This PR adds Grok API as an alternative LLM provider alongside Gemini, implements conversation context tracking, adds configurable rate limiting, improves error handling, and optimizes token usage through context truncation. - -## Changes - -### 1. Grok API Integration -- Add Grok API support using OpenAI-compatible client -- Add environment variables: `LLM_PROVIDER`, `GROK_API_KEY`, `GROK_MODEL` -- Unified LLM approach: renamed `gemini_*` variables to `llm_*` for provider-agnostic naming -- Add `openai>=1.0.0` dependency to requirements.txt - -### 2. Conversation Context -- Implement conversation history tracking per user -- Add `USE_CONVERSATION_CONTEXT` flag (default: True) -- Add `MAX_CONTEXT_MESSAGES` to control number of exchanges stored (default: 3) -- Add `MAX_CONTEXT_CHARS` to limit token usage by truncating stored messages (default: 500 chars) -- Context is included in prompts to maintain conversation flow -- **Token optimization**: Reduces context size by ~75% (from ~6000 to ~1500 chars for 3 exchanges) - -### 3. Rate Limiting -- Implement per-user rate limiting for LLM APIs -- Add `LLM_RPM_LIMIT` (requests per minute, default: 50) -- Add `LLM_RPD_LIMIT` (requests per day, default: 500) -- Automatic cleanup of old timestamps -- User-friendly rate limit messages in Ukrainian and English - -### 4. Error Handling & Logging -- Add proper handling for 429 (Too Many Requests) errors from LLM APIs -- Add detailed error logging with full traceback for debugging -- Add retry logic with 60-second delay for rate limit errors (max 2 attempts) -- Distinguish between rate limit errors and other API failures in user messages -- Log exception type along with error message -- Add error logging when API key is not configured but `USE_LLM=True` - -### 5. Code Quality -- Fix Black formatting issues (line breaks, spacing) -- Add `# pylint: disable=broad-exception-caught` comments for retry logic -- Add Ukrainian translations for all new error messages - -### 6. Configuration -- Add missing `USE_LLM` variable to `.env.example` -- Add all new LLM-related variables to `.env.example` -- Add configuration check messages in both Ukrainian and English - -## Environment Variables Added -```ini -USE_LLM=False # Enable LLM responses -LLM_PROVIDER=grok # grok or gemini -GROK_API_KEY=your_grok_api_key -GROK_MODEL=grok-4-latest -USE_CONVERSATION_CONTEXT=True # Enable conversation history -MAX_CONTEXT_MESSAGES=3 # Number of exchanges to remember -MAX_CONTEXT_CHARS=500 # Max chars per message in context (token optimization) -LLM_RPM_LIMIT=50 # Requests per minute per user -LLM_RPD_LIMIT=500 # Requests per day per user -``` - -## Benefits -- **Flexibility**: Choose between Grok (480 RPM, 4M TPM) and Gemini (5 RPM, 20 RPD) -- **Better UX**: Conversation context makes bot responses more relevant -- **Cost optimization**: Context truncation saves ~75% of tokens -- **Reliability**: Automatic retry on rate limits with user feedback -- **Debuggability**: Full error logging makes issues easy to diagnose -- **Protection**: Rate limiting prevents API quota exhaustion - -## Testing -1. Set `LOG_LEVEL=DEBUG` to see detailed API logs and error traces -2. Test with `USE_LLM=True` and both `LLM_PROVIDER=grok` and `LLM_PROVIDER=gemini` -3. Test conversation context by asking follow-up questions -4. Test rate limiting by making multiple rapid requests - -## Related Issues -- Fixes issue where bot returns generic error without logging actual API errors -- Fixes missing conversation context causing bot to "forget" previous messages -- Fixes token waste from storing full LLM responses in context From 725c3e15253ede264b45c9ceda9e23c97e1cf354 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:17:40 +0100 Subject: [PATCH 14/27] Fix LLM API helpers to propagate exceptions and add plain text instruction to Grok - Remove duplicate user_id assignment - Make call_grok_api and call_gemini_api raise exceptions instead of returning error strings - Add plain text system instruction to Grok API for consistent formatting - Ensure quota counters only increment after successful API responses - Add periodic cleanup task to prevent unbounded memory growth of user data --- src/main.py | 284 +++++++++++++++++++++++++++------------------------- 1 file changed, 146 insertions(+), 138 deletions(-) diff --git a/src/main.py b/src/main.py index bad53a4..c73eed8 100644 --- a/src/main.py +++ b/src/main.py @@ -69,6 +69,11 @@ MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3")) # Keep last N exchanges MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", "500")) # Max chars per message in context +# User activity tracking for cleanup +user_last_seen = defaultdict(float) # {user_id: timestamp} +USER_CLEANUP_TTL_DAYS = int(os.getenv("USER_CLEANUP_TTL_DAYS", "3")) # Days before user data expires +USER_CLEANUP_INTERVAL_HOURS = int(os.getenv("USER_CLEANUP_INTERVAL_HOURS", "24")) # Cleanup interval + # Allowed LLM providers ALLOWED_PROVIDERS = {"grok", "gemini"} @@ -537,6 +542,10 @@ async def respond_with_llm_message(update): # Rate limiting check user_id = update.effective_user.id current_time = time.time() + + # Update last seen timestamp + user_last_seen[user_id] = current_time + # Clean old timestamps (older than 60 seconds) llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60] @@ -606,7 +615,6 @@ async def respond_with_llm_message(update): debug("Original prompt: %s", prompt) # Build context from previous messages if enabled - user_id = update.effective_user.id if USE_CONVERSATION_CONTEXT: context_messages = ( conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] if conversation_context[user_id] else [] @@ -673,151 +681,147 @@ async def respond_with_llm_message(update): async def call_grok_api(safe_prompt: str, update) -> str: - """Call Grok API and return response.""" - try: - max_retries = 2 - retry_delay = 60 - - for attempt in range(max_retries): - try: - response = await grok_client.chat.completions.create( - model=GROK_MODEL, - messages=[{"role": "user", "content": safe_prompt}], - max_tokens=1024, - temperature=0.7, + """Call Grok API and return response. Raises exception on failure.""" + plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences." + max_retries = 2 + retry_delay = 60 + + for attempt in range(max_retries): + try: + response = await grok_client.chat.completions.create( + model=GROK_MODEL, + messages=[ + {"role": "system", "content": plain_text_instruction}, + {"role": "user", "content": safe_prompt}, + ], + max_tokens=1024, + temperature=0.7, + ) + return response.choices[0].message.content.strip() + except Exception as retry_error: # pylint: disable=broad-exception-caught + error_msg = str(retry_error) + if ( + "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() + ) and attempt < max_retries - 1: + debug( + "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", + retry_delay, + attempt + 1, + max_retries, ) - return response.choices[0].message.content.strip() - except Exception as retry_error: # pylint: disable=broad-exception-caught - error_msg = str(retry_error) - if ( - "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() - ) and attempt < max_retries - 1: - debug( - "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", - retry_delay, - attempt + 1, - max_retries, - ) - wait_msg = ( - f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." - if language == "uk" - else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..." - ) - await update.message.reply_text(wait_msg) - await asyncio.sleep(retry_delay) - else: - raise - except Exception: # pylint: disable=broad-exception-caught - return ( - "Вибачте, я не можу згенерувати відповідь." - if language == "uk" - else "Sorry, I couldn't generate a response." - ) + wait_msg = ( + f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." + if language == "uk" + else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..." + ) + await update.message.reply_text(wait_msg) + await asyncio.sleep(retry_delay) + else: + raise async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: - """Call Gemini API and return response.""" - try: - plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences." - model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction) - safety_settings = { - genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE, - genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE, - genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE, - genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE, - } - contents = [{'role': 'user', 'parts': [safe_prompt]}] - - max_retries = 2 - retry_delay = 60 - - for attempt in range(max_retries): - try: - response = await asyncio.to_thread( - model.generate_content, - contents, - generation_config=genai.types.GenerationConfig( - temperature=0.7, - top_p=0.9, - top_k=30, - max_output_tokens=1024, - ), - safety_settings=safety_settings, + """Call Gemini API and return response. Raises exception on failure.""" + plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences." + model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction) + safety_settings = { + genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE, + genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE, + genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE, + genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE, + } + contents = [{'role': 'user', 'parts': [safe_prompt]}] + + max_retries = 2 + retry_delay = 60 + + for attempt in range(max_retries): + try: + response = await asyncio.to_thread( + model.generate_content, + contents, + generation_config=genai.types.GenerationConfig( + temperature=0.7, + top_p=0.9, + top_k=30, + max_output_tokens=1024, + ), + safety_settings=safety_settings, + ) + debug("Successfully received response from Gemini API") + break + except Exception as retry_error: # pylint: disable=broad-exception-caught + error_msg = str(retry_error) + if ( + "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() + ) and attempt < max_retries - 1: + debug( + "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", + retry_delay, + attempt + 1, + max_retries, ) - debug("Successfully received response from Gemini API") - break - except Exception as retry_error: # pylint: disable=broad-exception-caught - error_msg = str(retry_error) - if ( - "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower() - ) and attempt < max_retries - 1: - debug( - "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", - retry_delay, - attempt + 1, - max_retries, - ) - wait_msg = ( - f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." - if language == "uk" - else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..." - ) - await update.message.reply_text(wait_msg) - await asyncio.sleep(retry_delay) - else: - raise - if hasattr(response, 'candidates') and response.candidates: - candidate = response.candidates[0] - debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None')) - debug("Response candidate safety_ratings: %s", getattr(candidate, 'safety_ratings', 'None')) - - if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2: - debug("Safety filter triggered - finish_reason: 2, trying simpler approach") - try: - simple_response = await asyncio.to_thread( - model.generate_content, - "Відповідь українською мовою: дай загальну інформацію про: " + prompt, - safety_settings=safety_settings, - ) - if simple_response.text: - return f"Ось загальна інформація: {simple_response.text.strip()}" - else: - return ( - "Вибачте, не можу надати детальну відповідь на це питання." - if language == "uk" - else "Sorry, I can't provide a detailed answer to this question." - ) - except Exception: # pylint: disable=broad-exception-caught - error("Fallback response generation failed") - return ( - "Вибачте, не можу надати детальну відповідь на це питання." - if language == "uk" - else "Sorry, I can't provide a detailed answer to this question." - ) - elif response.text: - # Remove Markdown formatting - bot_response = response.text.strip() - bot_response = re.sub(r'\*+', '', bot_response) - bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '') - return bot_response - else: - return ( - "Вибачте, я не можу згенерувати відповідь." + wait_msg = ( + f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..." if language == "uk" - else "Sorry, I couldn't generate a response." + else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..." ) - else: - return ( - "Вибачте, я не можу згенерувати відповідь." - if language == "uk" - else "Sorry, I couldn't generate a response." + await update.message.reply_text(wait_msg) + await asyncio.sleep(retry_delay) + else: + raise + if hasattr(response, 'candidates') and response.candidates: + candidate = response.candidates[0] + debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None')) + debug("Response candidate safety_ratings: %s", getattr(candidate, 'safety_ratings', 'None')) + + if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2: + debug("Safety filter triggered - finish_reason: 2, trying simpler approach") + simple_response = await asyncio.to_thread( + model.generate_content, + "Відповідь українською мовою: дай загальну інформацію про: " + prompt, + safety_settings=safety_settings, ) - except Exception: # pylint: disable=broad-exception-caught - return ( - "Вибачте, я не можу згенерувати відповідь." - if language == "uk" - else "Sorry, I couldn't generate a response." - ) + if simple_response.text: + return f"Ось загальна інформація: {simple_response.text.strip()}" + else: + raise Exception("Вибачте, не можу надати детальну відповідь на це питання.") + elif response.text: + # Remove Markdown formatting + bot_response = response.text.strip() + bot_response = re.sub(r'\*+', '', bot_response) + bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '') + return bot_response + else: + raise Exception("Вибачте, я не можу згенерувати відповідь.") + else: + raise Exception("Вибачте, я не можу згенерувати відповідь.") + + +async def cleanup_stale_users(): + """Remove inactive users from memory to prevent unbounded growth.""" + while True: + await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600) + current_time = time.time() + ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400 + + stale_users = [ + user_id for user_id, last_seen in user_last_seen.items() + if current_time - last_seen > ttl_seconds + ] + + for user_id in stale_users: + if user_id in conversation_context: + del conversation_context[user_id] + if user_id in llm_rate_limit: + del llm_rate_limit[user_id] + if user_id in llm_daily_limit: + del llm_daily_limit[user_id] + if user_id in user_last_seen: + del user_last_seen[user_id] + + if stale_users: + info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS) def main(): @@ -851,6 +855,10 @@ def main(): application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) # This handler will receive every error which happens in your bot application.add_error_handler(error_handler) + + # Start cleanup task + asyncio.create_task(cleanup_stale_users()) + info("Bot started. Ctrl+C to stop") application.run_polling() From c9865ba02c690ea09974dbf6a69e0f09149ea665 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:19:40 +0100 Subject: [PATCH 15/27] Fix linter --- src/main.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main.py b/src/main.py index c73eed8..646d672 100644 --- a/src/main.py +++ b/src/main.py @@ -542,10 +542,10 @@ async def respond_with_llm_message(update): # Rate limiting check user_id = update.effective_user.id current_time = time.time() - + # Update last seen timestamp user_last_seen[user_id] = current_time - + # Clean old timestamps (older than 60 seconds) llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60] @@ -804,12 +804,11 @@ async def cleanup_stale_users(): await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600) current_time = time.time() ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400 - + stale_users = [ - user_id for user_id, last_seen in user_last_seen.items() - if current_time - last_seen > ttl_seconds + user_id for user_id, last_seen in user_last_seen.items() if current_time - last_seen > ttl_seconds ] - + for user_id in stale_users: if user_id in conversation_context: del conversation_context[user_id] @@ -819,7 +818,7 @@ async def cleanup_stale_users(): del llm_daily_limit[user_id] if user_id in user_last_seen: del user_last_seen[user_id] - + if stale_users: info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS) @@ -855,10 +854,10 @@ def main(): application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) # This handler will receive every error which happens in your bot application.add_error_handler(error_handler) - + # Start cleanup task asyncio.create_task(cleanup_stale_users()) - + info("Bot started. Ctrl+C to stop") application.run_polling() From c3b6179fbdcf5ffba766f4bcc9ea0afe2425f481 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:21:13 +0100 Subject: [PATCH 16/27] Add disable=broad-exception-caught for pyling --- src/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 646d672..f25cf25 100644 --- a/src/main.py +++ b/src/main.py @@ -785,7 +785,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: if simple_response.text: return f"Ось загальна інформація: {simple_response.text.strip()}" else: - raise Exception("Вибачте, не можу надати детальну відповідь на це питання.") + raise Exception("Вибачте, не можу надати детальну відповідь на це питання.") # pylint: disable=broad-exception-raised elif response.text: # Remove Markdown formatting bot_response = response.text.strip() @@ -793,9 +793,9 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '') return bot_response else: - raise Exception("Вибачте, я не можу згенерувати відповідь.") + raise Exception("Вибачте, я не можу згенерувати відповідь.") # pylint: disable=broad-exception-raised else: - raise Exception("Вибачте, я не можу згенерувати відповідь.") + raise Exception("Вибачте, я не можу згенерувати відповідь.") # pylint: disable=broad-exception-raised async def cleanup_stale_users(): From 28bb1c7a7e19e0537f8c36f2e2d38a09459464ab Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:23:31 +0100 Subject: [PATCH 17/27] Fix linter --- src/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index f25cf25..843c057 100644 --- a/src/main.py +++ b/src/main.py @@ -785,7 +785,9 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: if simple_response.text: return f"Ось загальна інформація: {simple_response.text.strip()}" else: - raise Exception("Вибачте, не можу надати детальну відповідь на це питання.") # pylint: disable=broad-exception-raised + raise Exception( + "Вибачте, не можу надати детальну відповідь на це питання." + ) # pylint: disable=broad-exception-raised elif response.text: # Remove Markdown formatting bot_response = response.text.strip() From a8dd712c009ef9b2a30cdf4dd45976ba4b9401dd Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:26:47 +0100 Subject: [PATCH 18/27] Fix cleanup task initialization --- src/main.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main.py b/src/main.py index 843c057..8d760a7 100644 --- a/src/main.py +++ b/src/main.py @@ -785,9 +785,9 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: if simple_response.text: return f"Ось загальна інформація: {simple_response.text.strip()}" else: - raise Exception( + raise Exception( # pylint: disable=broad-exception-raised "Вибачте, не можу надати детальну відповідь на це питання." - ) # pylint: disable=broad-exception-raised + ) elif response.text: # Remove Markdown formatting bot_response = response.text.strip() @@ -857,8 +857,11 @@ def main(): # This handler will receive every error which happens in your bot application.add_error_handler(error_handler) - # Start cleanup task - asyncio.create_task(cleanup_stale_users()) + # Start cleanup task after event loop is running + async def post_init(app): + asyncio.create_task(cleanup_stale_users()) + + application.post_init = post_init info("Bot started. Ctrl+C to stop") application.run_polling() From 3be82ccca77489be4159dfc5e20a8db464c2ebb3 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:37:30 +0100 Subject: [PATCH 19/27] feat: Add SQLite persistence for user data across restarts --- .gitignore | 5 +++ Dockerfile | 3 ++ README.md | 10 +++++- docker-compose.yml | 5 +++ src/db_storage.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++ src/main.py | 34 +++++++++++++++--- 6 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 src/db_storage.py diff --git a/.gitignore b/.gitignore index cad404f..5816b9c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,11 @@ # instagram_cookies.txt should not be tracked by git because it has cookies instagram_cookies.txt +# SQLite database +src/data/ +*.db +*.db-journal + # Byte-compiled / optimized / compiled Python files __pycache__/ *.py[cod] diff --git a/Dockerfile b/Dockerfile index 16493e3..bc4049f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,9 @@ COPY src /bot WORKDIR /bot +# Create data directory for SQLite database +RUN mkdir -p /bot/data + # https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops ENV PYTHONUNBUFFERED=1 diff --git a/README.md b/README.md index ca3a9eb..f430c61 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,21 @@ docker build . -t downloader-bot:latest ``` docker run -d --name downloader-bot --restart always --env-file .env downloader-bot:latest ``` +To persist user data (conversation history, rate limits) between restarts, add a volume: +``` +docker run -d --name downloader-bot --restart always --env-file .env -v bot-data:/bot/data downloader-bot:latest +``` or use a built image from **Docker hub** ``` docker run -d --name downloader-bot --restart always --env-file .env ovchynnikov/load-bot-linux:latest ``` +With persistent data: +``` +docker run -d --name downloader-bot --restart always --env-file .env -v bot-data:/bot/data ovchynnikov/load-bot-linux:latest +``` or if you use instagram cookies ``` -docker run -d --name downloader-bot --restart always --env-file .env -v /absolute/path/to/instagram_cookies.txt:/bot/instagram_cookies.txt ovchynnikov/load-bot-linux:latest +docker run -d --name downloader-bot --restart always --env-file .env -v bot-data:/bot/data -v /absolute/path/to/instagram_cookies.txt:/bot/instagram_cookies.txt ovchynnikov/load-bot-linux:latest ``` or if you want use GPU power of intel chip and set USE_GPU_COMPRESSING=True variable ``` diff --git a/docker-compose.yml b/docker-compose.yml index d1270b4..a1de621 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,8 +10,13 @@ services: restart: unless-stopped volumes: - ./src:/app:cached # Use bind mount for development + - bot-data:/bot/data # Persistent storage for SQLite database deploy: resources: limits: cpus: '1' memory: 512M + +volumes: + bot-data: + driver: local diff --git a/src/db_storage.py b/src/db_storage.py new file mode 100644 index 0000000..b5ee242 --- /dev/null +++ b/src/db_storage.py @@ -0,0 +1,89 @@ +"""SQLite storage for bot user data persistence.""" + +import sqlite3 +import json +import os +from logger import debug, error + + +class BotStorage: + """Handles persistent storage of user data in SQLite.""" + + def __init__(self, db_path="data/bot.db"): + """Initialize database connection and create tables.""" + os.makedirs(os.path.dirname(db_path), exist_ok=True) + self.db_path = db_path + self.conn = sqlite3.connect(db_path, check_same_thread=False) + self._create_tables() + debug("Database initialized at %s", db_path) + + def _create_tables(self): + """Create tables if they don't exist.""" + cursor = self.conn.cursor() + cursor.execute(""" + CREATE TABLE IF NOT EXISTS user_data ( + user_id INTEGER PRIMARY KEY, + conversation_context TEXT, + rate_limit_timestamps TEXT, + daily_count INTEGER DEFAULT 0, + daily_date TEXT, + last_seen REAL + ) + """) + self.conn.commit() + + def load_user_data(self, user_id): + """Load user data from database.""" + cursor = self.conn.cursor() + cursor.execute("SELECT * FROM user_data WHERE user_id = ?", (user_id,)) + row = cursor.fetchone() + if row: + return { + "conversation_context": json.loads(row[1]) if row[1] else [], + "rate_limit_timestamps": json.loads(row[2]) if row[2] else [], + "daily_count": row[3], + "daily_date": row[4], + "last_seen": row[5], + } + return None + + def save_user_data(self, user_id, conversation_context, rate_limit_timestamps, daily_count, daily_date, last_seen): + """Save user data to database.""" + cursor = self.conn.cursor() + cursor.execute( + """ + INSERT OR REPLACE INTO user_data + (user_id, conversation_context, rate_limit_timestamps, daily_count, daily_date, last_seen) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + user_id, + json.dumps(conversation_context), + json.dumps(rate_limit_timestamps), + daily_count, + daily_date, + last_seen, + ), + ) + self.conn.commit() + + def delete_user_data(self, user_id): + """Delete user data from database.""" + cursor = self.conn.cursor() + cursor.execute("DELETE FROM user_data WHERE user_id = ?", (user_id,)) + self.conn.commit() + + def get_stale_users(self, ttl_seconds): + """Get list of user IDs that haven't been seen within TTL.""" + import time + + current_time = time.time() + cursor = self.conn.cursor() + cursor.execute( + "SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,) + ) + return [row[0] for row in cursor.fetchall()] + + def close(self): + """Close database connection.""" + self.conn.close() diff --git a/src/main.py b/src/main.py index 8d760a7..1b42644 100644 --- a/src/main.py +++ b/src/main.py @@ -21,6 +21,7 @@ from general_error_handler import error_handler from permissions import inform_user_not_allowed, is_user_or_chat_not_allowed, supported_sites from cleanup import cleanup +from db_storage import BotStorage from video_utils import ( compress_video, download_media, @@ -77,6 +78,9 @@ # Allowed LLM providers ALLOWED_PROVIDERS = {"grok", "gemini"} +# Initialize database storage +db_storage = BotStorage() + # Cache responses from JSON file @lru_cache(maxsize=1) @@ -546,6 +550,15 @@ async def respond_with_llm_message(update): # Update last seen timestamp user_last_seen[user_id] = current_time + # Load user data from database on first access + if user_id not in llm_daily_limit: + user_data = db_storage.load_user_data(user_id) + if user_data: + conversation_context[user_id] = user_data["conversation_context"] + llm_rate_limit[user_id] = user_data["rate_limit_timestamps"] + llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]} + user_last_seen[user_id] = user_data["last_seen"] + # Clean old timestamps (older than 60 seconds) llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60] @@ -651,6 +664,16 @@ async def respond_with_llm_message(update): if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES: conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] + # Save user data to database + db_storage.save_user_data( + user_id, + conversation_context[user_id], + llm_rate_limit[user_id], + llm_daily_limit[user_id]["count"], + llm_daily_limit[user_id]["date"], + user_last_seen[user_id], + ) + await update.message.reply_text(bot_response) except Exception as e: # pylint: disable=broad-except @@ -801,17 +824,16 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: async def cleanup_stale_users(): - """Remove inactive users from memory to prevent unbounded growth.""" + """Remove inactive users from memory and database to prevent unbounded growth.""" while True: await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600) - current_time = time.time() ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400 - stale_users = [ - user_id for user_id, last_seen in user_last_seen.items() if current_time - last_seen > ttl_seconds - ] + # Get stale users from database + stale_users = db_storage.get_stale_users(ttl_seconds) for user_id in stale_users: + # Remove from memory if user_id in conversation_context: del conversation_context[user_id] if user_id in llm_rate_limit: @@ -820,6 +842,8 @@ async def cleanup_stale_users(): del llm_daily_limit[user_id] if user_id in user_last_seen: del user_last_seen[user_id] + # Remove from database + db_storage.delete_user_data(user_id) if stale_users: info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS) From 47b25a6eb17769846a0e33697c0e696c13504b0b Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:38:49 +0100 Subject: [PATCH 20/27] Fix linter --- src/db_storage.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/db_storage.py b/src/db_storage.py index b5ee242..0bd6f88 100644 --- a/src/db_storage.py +++ b/src/db_storage.py @@ -79,9 +79,7 @@ def get_stale_users(self, ttl_seconds): current_time = time.time() cursor = self.conn.cursor() - cursor.execute( - "SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,) - ) + cursor.execute("SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,)) return [row[0] for row in cursor.fetchall()] def close(self): From 1de8512f1e477ee75643bdd647394b7f88a2a128 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:40:45 +0100 Subject: [PATCH 21/27] Fix linter --- src/db_storage.py | 2 +- src/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/db_storage.py b/src/db_storage.py index 0bd6f88..65b5df7 100644 --- a/src/db_storage.py +++ b/src/db_storage.py @@ -3,7 +3,7 @@ import sqlite3 import json import os -from logger import debug, error +from logger import debug class BotStorage: diff --git a/src/main.py b/src/main.py index 1b42644..e35ede7 100644 --- a/src/main.py +++ b/src/main.py @@ -882,7 +882,7 @@ def main(): application.add_error_handler(error_handler) # Start cleanup task after event loop is running - async def post_init(app): + async def post_init(app): # pylint: disable=unused-argument asyncio.create_task(cleanup_stale_users()) application.post_init = post_init From dbdf3aab17621f693bdcc928962c4241098f3930 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:52:32 +0100 Subject: [PATCH 22/27] debug: Add detailed logging for SQLite database operations --- DOKKU_STORAGE.md | 104 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.py | 9 ++++ 2 files changed, 113 insertions(+) create mode 100644 DOKKU_STORAGE.md diff --git a/DOKKU_STORAGE.md b/DOKKU_STORAGE.md new file mode 100644 index 0000000..6c28c87 --- /dev/null +++ b/DOKKU_STORAGE.md @@ -0,0 +1,104 @@ +# Dokku Deployment with Persistent Storage + +## Problem +SQLite database is stored in `/bot/data/bot.db` inside the container, but without persistent storage it gets deleted on every deployment/restart. + +## Solution +Create a persistent storage mount in Dokku to preserve the database between deployments. + +## Setup Commands + +```bash +# 1. Create persistent storage directory on host +dokku storage:ensure-directory insta-bot + +# 2. Mount the storage to container's /bot/data directory +dokku storage:mount insta-bot /var/lib/dokku/data/storage/insta-bot:/bot/data + +# 3. Verify the mount +dokku storage:report insta-bot + +# 4. Rebuild and restart the app +dokku ps:rebuild insta-bot +``` + +## Verify It Works + +After deployment, check the logs: +```bash +dokku logs insta-bot -t +``` + +You should see: +``` +Database initialized at data/bot.db +``` + +Then test by: +1. Send: `ботяра, привіт` +2. Send: `ботяра, який мій попередній запит?` +3. Bot should remember the conversation + +After restart: +```bash +dokku ps:restart insta-bot +``` + +The conversation context should persist. + +## Check Database File + +```bash +# SSH into the container +dokku enter insta-bot web + +# Check if database exists +ls -lh /bot/data/ +cat /bot/data/bot.db # Should show binary data + +# Exit container +exit +``` + +## Troubleshooting + +### Database not persisting +```bash +# Check if mount exists +dokku storage:report insta-bot + +# Should show: +# Storage mount: /var/lib/dokku/data/storage/insta-bot:/bot/data +``` + +### Permission issues +```bash +# Fix permissions on host +sudo chown -R dokku:dokku /var/lib/dokku/data/storage/insta-bot +sudo chmod -R 755 /var/lib/dokku/data/storage/insta-bot +``` + +### Check logs for database operations +```bash +# Enable DEBUG logging +dokku config:set insta-bot LOG_LEVEL=DEBUG + +# Watch logs +dokku logs insta-bot -t +``` + +Look for: +- `Loading user data from database for user_id: XXX` +- `Found user data in database: context=X messages` +- `Saving user data to database: user_id=XXX` + +## Backup Database + +```bash +# Backup +sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db /var/lib/dokku/data/storage/insta-bot/bot.db.backup + +# Restore +sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db.backup /var/lib/dokku/data/storage/insta-bot/bot.db +dokku ps:restart insta-bot +``` diff --git a/src/main.py b/src/main.py index e35ede7..c23a546 100644 --- a/src/main.py +++ b/src/main.py @@ -552,12 +552,18 @@ async def respond_with_llm_message(update): # Load user data from database on first access if user_id not in llm_daily_limit: + debug("Loading user data from database for user_id: %s", user_id) user_data = db_storage.load_user_data(user_id) if user_data: + debug("Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s", + len(user_data["conversation_context"]), len(user_data["rate_limit_timestamps"]), + user_data["daily_count"], user_data["daily_date"]) conversation_context[user_id] = user_data["conversation_context"] llm_rate_limit[user_id] = user_data["rate_limit_timestamps"] llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]} user_last_seen[user_id] = user_data["last_seen"] + else: + debug("No existing data found in database for user_id: %s", user_id) # Clean old timestamps (older than 60 seconds) llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60] @@ -665,6 +671,9 @@ async def respond_with_llm_message(update): conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] # Save user data to database + debug("Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s", + user_id, len(conversation_context[user_id]), + llm_daily_limit[user_id]["count"], llm_daily_limit[user_id]["date"]) db_storage.save_user_data( user_id, conversation_context[user_id], From deababe5f9e72a81edb27f550e4ccc7a5394a197 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:54:13 +0100 Subject: [PATCH 23/27] Fix linter --- src/main.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/main.py b/src/main.py index c23a546..bdfb4f2 100644 --- a/src/main.py +++ b/src/main.py @@ -555,9 +555,13 @@ async def respond_with_llm_message(update): debug("Loading user data from database for user_id: %s", user_id) user_data = db_storage.load_user_data(user_id) if user_data: - debug("Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s", - len(user_data["conversation_context"]), len(user_data["rate_limit_timestamps"]), - user_data["daily_count"], user_data["daily_date"]) + debug( + "Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s", + len(user_data["conversation_context"]), + len(user_data["rate_limit_timestamps"]), + user_data["daily_count"], + user_data["daily_date"], + ) conversation_context[user_id] = user_data["conversation_context"] llm_rate_limit[user_id] = user_data["rate_limit_timestamps"] llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]} @@ -671,9 +675,13 @@ async def respond_with_llm_message(update): conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] # Save user data to database - debug("Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s", - user_id, len(conversation_context[user_id]), - llm_daily_limit[user_id]["count"], llm_daily_limit[user_id]["date"]) + debug( + "Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s", + user_id, + len(conversation_context[user_id]), + llm_daily_limit[user_id]["count"], + llm_daily_limit[user_id]["date"], + ) db_storage.save_user_data( user_id, conversation_context[user_id], From 1696d2d00e228476714fdca5540908e028eeff91 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 18:59:42 +0100 Subject: [PATCH 24/27] Update dependencies --- DOKKU_STORAGE.md | 104 ------------------------------------------- src/requirements.txt | 12 ++--- 2 files changed, 6 insertions(+), 110 deletions(-) delete mode 100644 DOKKU_STORAGE.md diff --git a/DOKKU_STORAGE.md b/DOKKU_STORAGE.md deleted file mode 100644 index 6c28c87..0000000 --- a/DOKKU_STORAGE.md +++ /dev/null @@ -1,104 +0,0 @@ -# Dokku Deployment with Persistent Storage - -## Problem -SQLite database is stored in `/bot/data/bot.db` inside the container, but without persistent storage it gets deleted on every deployment/restart. - -## Solution -Create a persistent storage mount in Dokku to preserve the database between deployments. - -## Setup Commands - -```bash -# 1. Create persistent storage directory on host -dokku storage:ensure-directory insta-bot - -# 2. Mount the storage to container's /bot/data directory -dokku storage:mount insta-bot /var/lib/dokku/data/storage/insta-bot:/bot/data - -# 3. Verify the mount -dokku storage:report insta-bot - -# 4. Rebuild and restart the app -dokku ps:rebuild insta-bot -``` - -## Verify It Works - -After deployment, check the logs: -```bash -dokku logs insta-bot -t -``` - -You should see: -``` -Database initialized at data/bot.db -``` - -Then test by: -1. Send: `ботяра, привіт` -2. Send: `ботяра, який мій попередній запит?` -3. Bot should remember the conversation - -After restart: -```bash -dokku ps:restart insta-bot -``` - -The conversation context should persist. - -## Check Database File - -```bash -# SSH into the container -dokku enter insta-bot web - -# Check if database exists -ls -lh /bot/data/ -cat /bot/data/bot.db # Should show binary data - -# Exit container -exit -``` - -## Troubleshooting - -### Database not persisting -```bash -# Check if mount exists -dokku storage:report insta-bot - -# Should show: -# Storage mount: /var/lib/dokku/data/storage/insta-bot:/bot/data -``` - -### Permission issues -```bash -# Fix permissions on host -sudo chown -R dokku:dokku /var/lib/dokku/data/storage/insta-bot -sudo chmod -R 755 /var/lib/dokku/data/storage/insta-bot -``` - -### Check logs for database operations -```bash -# Enable DEBUG logging -dokku config:set insta-bot LOG_LEVEL=DEBUG - -# Watch logs -dokku logs insta-bot -t -``` - -Look for: -- `Loading user data from database for user_id: XXX` -- `Found user data in database: context=X messages` -- `Saving user data to database: user_id=XXX` - -## Backup Database - -```bash -# Backup -sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db /var/lib/dokku/data/storage/insta-bot/bot.db.backup - -# Restore -sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db.backup /var/lib/dokku/data/storage/insta-bot/bot.db -dokku ps:restart insta-bot -``` diff --git a/src/requirements.txt b/src/requirements.txt index 2700381..244b45a 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,7 @@ -python-telegram-bot[ext]==22.6 -python-dotenv==1.2.1 -yt-dlp==2026.2.21 -gallery-dl==1.31.6 -aiohttp==3.13.3 +python-telegram-bot[ext]>=22.6 +python-dotenv>=1.2.2 +yt-dlp>=2026.3.3 +gallery-dl>=1.31.7 +aiohttp>=3.13.3 google-generativeai>=0.8.6 -openai>=1.0.0 +openai>=2.24.0 From 5e0b958ccdb66ea9878e683fbca9b1c2e10773d6 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:22:26 +0100 Subject: [PATCH 25/27] fix: Apply code review improvements - Add DB index on last_seen for efficient cleanup - Make all DB calls async with asyncio.to_thread - Fix stale timestamp handling from DB - Add dynamic localization for LLM prompts (uk/en) - Add proper cleanup task cancellation on shutdown --- src/db_storage.py | 1 + src/main.py | 55 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/db_storage.py b/src/db_storage.py index 65b5df7..4f4dc7a 100644 --- a/src/db_storage.py +++ b/src/db_storage.py @@ -30,6 +30,7 @@ def _create_tables(self): last_seen REAL ) """) + cursor.execute("CREATE INDEX IF NOT EXISTS idx_user_data_last_seen ON user_data(last_seen)") self.conn.commit() def load_user_data(self, user_id): diff --git a/src/main.py b/src/main.py index bdfb4f2..1c26131 100644 --- a/src/main.py +++ b/src/main.py @@ -81,6 +81,9 @@ # Initialize database storage db_storage = BotStorage() +# Cleanup task reference +cleanup_task = None + # Cache responses from JSON file @lru_cache(maxsize=1) @@ -553,7 +556,7 @@ async def respond_with_llm_message(update): # Load user data from database on first access if user_id not in llm_daily_limit: debug("Loading user data from database for user_id: %s", user_id) - user_data = db_storage.load_user_data(user_id) + user_data = await asyncio.to_thread(db_storage.load_user_data, user_id) if user_data: debug( "Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s", @@ -565,7 +568,9 @@ async def respond_with_llm_message(update): conversation_context[user_id] = user_data["conversation_context"] llm_rate_limit[user_id] = user_data["rate_limit_timestamps"] llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]} - user_last_seen[user_id] = user_data["last_seen"] + # Only update last_seen if DB value is newer + if user_id not in user_last_seen or user_data["last_seen"] > user_last_seen[user_id]: + user_last_seen[user_id] = user_data["last_seen"] else: debug("No existing data found in database for user_id: %s", user_id) @@ -646,11 +651,28 @@ async def respond_with_llm_message(update): context_messages = [] # Create prompt with context if available + if language == "uk": + user_label = "Користувач" + assistant_label = "Асистент" + instruction = "Відповідай українською мовою як дружній асистент. Не вітайся і не прощайся." + else: + user_label = "User" + assistant_label = "Assistant" + instruction = "Answer in English as a friendly assistant. Don't greet or say goodbye." + if context_messages: - context_str = "\n".join([f"Користувач: {msg}\nАсистент: {resp}" for msg, resp in context_messages]) - safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент. Не вітайся і не прощайся." + context_str = "\n".join( + [f"{user_label}: {msg}\n{assistant_label}: {resp}" for msg, resp in context_messages] + ) + if language == "uk": + safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\n{instruction}" + else: + safe_prompt = f"Previous conversation:\n{context_str}\n\nCurrent user question: {prompt}\n\n{instruction}" else: - safe_prompt = f"Відповідай українською мовою як дружній асистент. Не вітайся і не прощайся. Питання користувача: {prompt}" + if language == "uk": + safe_prompt = f"{instruction} Питання користувача: {prompt}" + else: + safe_prompt = f"{instruction} User question: {prompt}" debug("Modified safe prompt with context: %s", safe_prompt[:200]) @@ -682,7 +704,8 @@ async def respond_with_llm_message(update): llm_daily_limit[user_id]["count"], llm_daily_limit[user_id]["date"], ) - db_storage.save_user_data( + await asyncio.to_thread( + db_storage.save_user_data, user_id, conversation_context[user_id], llm_rate_limit[user_id], @@ -847,7 +870,7 @@ async def cleanup_stale_users(): ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400 # Get stale users from database - stale_users = db_storage.get_stale_users(ttl_seconds) + stale_users = await asyncio.to_thread(db_storage.get_stale_users, ttl_seconds) for user_id in stale_users: # Remove from memory @@ -860,7 +883,7 @@ async def cleanup_stale_users(): if user_id in user_last_seen: del user_last_seen[user_id] # Remove from database - db_storage.delete_user_data(user_id) + await asyncio.to_thread(db_storage.delete_user_data, user_id) if stale_users: info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS) @@ -892,6 +915,8 @@ def main(): Returns: None """ + global cleanup_task # pylint: disable=global-statement + bot_token = os.getenv("BOT_TOKEN") application = Application.builder().token(bot_token).build() application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) @@ -900,9 +925,21 @@ def main(): # Start cleanup task after event loop is running async def post_init(app): # pylint: disable=unused-argument - asyncio.create_task(cleanup_stale_users()) + global cleanup_task # pylint: disable=global-statement + cleanup_task = asyncio.create_task(cleanup_stale_users()) + + # Cancel cleanup task on shutdown + async def post_shutdown(app): # pylint: disable=unused-argument + global cleanup_task # pylint: disable=global-statement + if cleanup_task is not None: + cleanup_task.cancel() + try: + await cleanup_task + except asyncio.CancelledError: + pass application.post_init = post_init + application.post_shutdown = post_shutdown info("Bot started. Ctrl+C to stop") application.run_polling() From 8e893475a46d6d213606f1ae362f0adcaa7c9554 Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:32:05 +0100 Subject: [PATCH 26/27] fix: Apply critical code review fixes - Close DB connection on shutdown - Initialize response variable before retry loop - Localize Gemini fallback instruction - Move time import to module level in db_storage --- src/db_storage.py | 3 +-- src/main.py | 43 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/db_storage.py b/src/db_storage.py index 4f4dc7a..2ecfdb4 100644 --- a/src/db_storage.py +++ b/src/db_storage.py @@ -3,6 +3,7 @@ import sqlite3 import json import os +import time from logger import debug @@ -76,8 +77,6 @@ def delete_user_data(self, user_id): def get_stale_users(self, ttl_seconds): """Get list of user IDs that haven't been seen within TTL.""" - import time - current_time = time.time() cursor = self.conn.cursor() cursor.execute("SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,)) diff --git a/src/main.py b/src/main.py index 1c26131..0d7e26e 100644 --- a/src/main.py +++ b/src/main.py @@ -665,9 +665,13 @@ async def respond_with_llm_message(update): [f"{user_label}: {msg}\n{assistant_label}: {resp}" for msg, resp in context_messages] ) if language == "uk": - safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\n{instruction}" + safe_prompt = ( + f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\n{instruction}" + ) else: - safe_prompt = f"Previous conversation:\n{context_str}\n\nCurrent user question: {prompt}\n\n{instruction}" + safe_prompt = ( + f"Previous conversation:\n{context_str}\n\nCurrent user question: {prompt}\n\n{instruction}" + ) else: if language == "uk": safe_prompt = f"{instruction} Питання користувача: {prompt}" @@ -797,6 +801,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: max_retries = 2 retry_delay = 60 + response = None for attempt in range(max_retries): try: @@ -833,6 +838,17 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: await asyncio.sleep(retry_delay) else: raise + + # Check if response was set after retries + if response is None: + fail_msg = ( + "Вибачте, не вдалося отримати відповідь. Спробуйте пізніше." + if language == "uk" + else "Sorry, failed to get a response. Please try again later." + ) + await update.message.reply_text(fail_msg) + raise Exception("Failed to get response after retries") # pylint: disable=broad-exception-raised + if hasattr(response, 'candidates') and response.candidates: candidate = response.candidates[0] debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None')) @@ -840,17 +856,26 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2: debug("Safety filter triggered - finish_reason: 2, trying simpler approach") + fallback_instruction = ( + "Відповідь українською мовою: дай загальну інформацію про: " + if language == "uk" + else "Answer in English: give general information about: " + ) simple_response = await asyncio.to_thread( model.generate_content, - "Відповідь українською мовою: дай загальну інформацію про: " + prompt, + fallback_instruction + prompt, safety_settings=safety_settings, ) if simple_response.text: - return f"Ось загальна інформація: {simple_response.text.strip()}" + prefix = "Ось загальна інформація: " if language == "uk" else "Here's general information: " + return f"{prefix}{simple_response.text.strip()}" else: - raise Exception( # pylint: disable=broad-exception-raised + error_msg = ( "Вибачте, не можу надати детальну відповідь на це питання." + if language == "uk" + else "Sorry, I can't provide a detailed answer to this question." ) + raise Exception(error_msg) # pylint: disable=broad-exception-raised elif response.text: # Remove Markdown formatting bot_response = response.text.strip() @@ -928,7 +953,7 @@ async def post_init(app): # pylint: disable=unused-argument global cleanup_task # pylint: disable=global-statement cleanup_task = asyncio.create_task(cleanup_stale_users()) - # Cancel cleanup task on shutdown + # Cancel cleanup task and close DB on shutdown async def post_shutdown(app): # pylint: disable=unused-argument global cleanup_task # pylint: disable=global-statement if cleanup_task is not None: @@ -937,6 +962,12 @@ async def post_shutdown(app): # pylint: disable=unused-argument await cleanup_task except asyncio.CancelledError: pass + # Close database connection + try: + db_storage.close() + debug("Database connection closed") + except Exception as e: # pylint: disable=broad-except + error("Error closing database: %s", e) application.post_init = post_init application.post_shutdown = post_shutdown From 9f15bb886b4709fef826ffd44e85075ddc6cd53e Mon Sep 17 00:00:00 2001 From: avelytchko <919635+avelytchko@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:44:13 +0100 Subject: [PATCH 27/27] fix: Apply final code review improvements - Make DB persistence best-effort (send reply first) - Remove duplicate error message from call_gemini_api - Protect cleanup loop from crashes with exception handling --- src/main.py | 99 +++++++++++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/src/main.py b/src/main.py index 0d7e26e..d3d070e 100644 --- a/src/main.py +++ b/src/main.py @@ -700,26 +700,33 @@ async def respond_with_llm_message(update): if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES: conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] - # Save user data to database - debug( - "Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s", - user_id, - len(conversation_context[user_id]), - llm_daily_limit[user_id]["count"], - llm_daily_limit[user_id]["date"], - ) - await asyncio.to_thread( - db_storage.save_user_data, - user_id, - conversation_context[user_id], - llm_rate_limit[user_id], - llm_daily_limit[user_id]["count"], - llm_daily_limit[user_id]["date"], - user_last_seen[user_id], - ) - + # Send reply first, then save to DB (best-effort persistence) await update.message.reply_text(bot_response) + # Save user data to database (best-effort, don't fail on DB errors) + async def save_to_db(): + try: + debug( + "Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s", + user_id, + len(conversation_context[user_id]), + llm_daily_limit[user_id]["count"], + llm_daily_limit[user_id]["date"], + ) + await asyncio.to_thread( + db_storage.save_user_data, + user_id, + conversation_context[user_id], + llm_rate_limit[user_id], + llm_daily_limit[user_id]["count"], + llm_daily_limit[user_id]["date"], + user_last_seen[user_id], + ) + except Exception as db_error: # pylint: disable=broad-except + error("Failed to save user data to database: %s", db_error) + + asyncio.create_task(save_to_db()) + except Exception as e: # pylint: disable=broad-except # Remove tentative timestamp on failure if llm_rate_limit[user_id] and llm_rate_limit[user_id][-1] == current_time: @@ -841,12 +848,6 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: # Check if response was set after retries if response is None: - fail_msg = ( - "Вибачте, не вдалося отримати відповідь. Спробуйте пізніше." - if language == "uk" - else "Sorry, failed to get a response. Please try again later." - ) - await update.message.reply_text(fail_msg) raise Exception("Failed to get response after retries") # pylint: disable=broad-exception-raised if hasattr(response, 'candidates') and response.candidates: @@ -891,27 +892,32 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str: async def cleanup_stale_users(): """Remove inactive users from memory and database to prevent unbounded growth.""" while True: - await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600) - ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400 - - # Get stale users from database - stale_users = await asyncio.to_thread(db_storage.get_stale_users, ttl_seconds) - - for user_id in stale_users: - # Remove from memory - if user_id in conversation_context: - del conversation_context[user_id] - if user_id in llm_rate_limit: - del llm_rate_limit[user_id] - if user_id in llm_daily_limit: - del llm_daily_limit[user_id] - if user_id in user_last_seen: - del user_last_seen[user_id] - # Remove from database - await asyncio.to_thread(db_storage.delete_user_data, user_id) - - if stale_users: - info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS) + try: + await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600) + ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400 + + # Get stale users from database + stale_users = await asyncio.to_thread(db_storage.get_stale_users, ttl_seconds) + + for user_id in stale_users: + # Remove from memory + if user_id in conversation_context: + del conversation_context[user_id] + if user_id in llm_rate_limit: + del llm_rate_limit[user_id] + if user_id in llm_daily_limit: + del llm_daily_limit[user_id] + if user_id in user_last_seen: + del user_last_seen[user_id] + # Remove from database + await asyncio.to_thread(db_storage.delete_user_data, user_id) + + if stale_users: + info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS) + except Exception as cleanup_error: # pylint: disable=broad-except + error("Error in cleanup_stale_users: %s", cleanup_error) + error("Full traceback: %s", traceback.format_exc()) + await asyncio.sleep(60) # Wait before retrying def main(): @@ -940,8 +946,6 @@ def main(): Returns: None """ - global cleanup_task # pylint: disable=global-statement - bot_token = os.getenv("BOT_TOKEN") application = Application.builder().token(bot_token).build() application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) @@ -955,7 +959,6 @@ async def post_init(app): # pylint: disable=unused-argument # Cancel cleanup task and close DB on shutdown async def post_shutdown(app): # pylint: disable=unused-argument - global cleanup_task # pylint: disable=global-statement if cleanup_task is not None: cleanup_task.cancel() try: