From e92acb4e42709d6674ab88aec1ddef54204d3140 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 20:58:43 +0100
Subject: [PATCH 01/27] Improve logging

---
 src/main.py | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/main.py b/src/main.py
index a9d5ae2..4f8ffab 100644
--- a/src/main.py
+++ b/src/main.py
@@ -516,7 +516,7 @@ async def respond_with_llm_message(update):
             return
 
         # Initialize the Gemini model
-        debug("Initializing Gemini model: gemini-2.5-flash")
+        debug("Initializing Gemini model: %s", GEMINI_MODEL)
         plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences."
 
         model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction)
@@ -527,7 +527,7 @@ async def respond_with_llm_message(update):
         debug("Modified safe prompt: %s", safe_prompt)
 
         # Generate response using Gemini with both safety settings and safe prompting
-        debug("Sending request to Gemini API")
+        debug("Sending request to Gemini API with model: %s", GEMINI_MODEL)
         safety_settings = {
             genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
             genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
@@ -546,7 +546,7 @@ async def respond_with_llm_message(update):
             ),
             safety_settings=safety_settings,
         )
-        # debug("Successfully received response from Gemini API")
+        debug("Successfully received response from Gemini API")
 
         # Handle response with safety filter checks
         if hasattr(response, 'candidates') and response.candidates:
@@ -600,20 +600,28 @@ async def respond_with_llm_message(update):
 
         await update.message.reply_text(bot_response)
 
-    except (ValueError, RuntimeError) as e:
-        error("Error in Gemini API request: %s", e)
-        await update.message.reply_text(
-            "Вибачте, я не можу згенерувати відповідь."
-            if language == "uk"
-            else "Sorry, I encountered an error while processing your request."
-        )
     except Exception as e:  # pylint: disable=broad-except
-        error("Unexpected error in Gemini API request: %s", e)
-        await update.message.reply_text(
-            "Вибачте, я не можу згенерувати відповідь."
-            if language == "uk"
-            else "Sorry, I encountered an unexpected error while processing your request."
-        )
+        import traceback
+        error_msg = str(e)
+        error("Error in Gemini API request: %s (Type: %s)", error_msg, type(e).__name__)
+        error("Full traceback: %s", traceback.format_exc())
+        
+        # Check for rate limit (429) error
+        if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
+            error("Rate limit exceeded (429) - Too many requests to Gemini API")
+            bot_response = (
+                "Вибачте, перевищено ліміт запитів до AI. Спробуйте пізніше."
+                if language == "uk"
+                else "Sorry, AI request limit exceeded. Please try again later."
+            )
+        else:
+            bot_response = (
+                "Вибачте, я не можу згенерувати відповідь."
+                if language == "uk"
+                else "Sorry, I encountered an error while processing your request."
+            )
+        
+        await update.message.reply_text(bot_response)
 
 
 def main():

From b57cc37a5707fd94a9915143e6de1de3e741f95a Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 21:01:02 +0100
Subject: [PATCH 02/27] Fix linter

---
 src/main.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/main.py b/src/main.py
index 4f8ffab..d489296 100644
--- a/src/main.py
+++ b/src/main.py
@@ -602,10 +602,11 @@ async def respond_with_llm_message(update):
 
     except Exception as e:  # pylint: disable=broad-except
         import traceback
+
         error_msg = str(e)
         error("Error in Gemini API request: %s (Type: %s)", error_msg, type(e).__name__)
         error("Full traceback: %s", traceback.format_exc())
-        
+
         # Check for rate limit (429) error
         if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
             error("Rate limit exceeded (429) - Too many requests to Gemini API")
@@ -620,7 +621,7 @@ async def respond_with_llm_message(update):
                 if language == "uk"
                 else "Sorry, I encountered an error while processing your request."
             )
-        
+
         await update.message.reply_text(bot_response)
 
 

From 33aae20252322ad9e98c40693f17be51f5380a9d Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 21:35:55 +0100
Subject: [PATCH 03/27] Add rate limit check

---
 src/main.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/main.py b/src/main.py
index d489296..132bd50 100644
--- a/src/main.py
+++ b/src/main.py
@@ -5,8 +5,10 @@
 import json
 import asyncio
 import re
+import time
 import google.generativeai as genai
 from functools import lru_cache
+from collections import defaultdict
 from dotenv import load_dotenv
 from telegram import Update, InputMediaPhoto, InputMediaVideo
 from telegram.error import TimedOut, NetworkError, TelegramError
@@ -44,6 +46,10 @@
 if GEMINI_API_KEY:
     genai.configure(api_key=GEMINI_API_KEY)
 
+# Rate limiting for Gemini API (5 requests per minute)
+gemini_rate_limit = defaultdict(list)  # {user_id: [timestamp1, timestamp2, ...]}
+GEMINI_RPM_LIMIT = 4  # Set to 4 to be safe (limit is 5)
+
 
 # Cache responses from JSON file
 @lru_cache(maxsize=1)
@@ -483,6 +489,25 @@ async def respond_with_llm_message(update):
         await update.message.reply_text("Sorry, AI service is not configured.")
         return
 
+    # Rate limiting check
+    user_id = update.effective_user.id
+    current_time = time.time()
+    # Clean old timestamps (older than 60 seconds)
+    gemini_rate_limit[user_id] = [t for t in gemini_rate_limit[user_id] if current_time - t < 60]
+
+    if len(gemini_rate_limit[user_id]) >= GEMINI_RPM_LIMIT:
+        debug("Rate limit hit for user %s", user_id)
+        bot_response = (
+            "Вибачте, забагато запитів. Почекайте хвилину."
+            if language == "uk"
+            else "Sorry, too many requests. Please wait a minute."
+        )
+        await update.message.reply_text(bot_response)
+        return
+
+    # Add current request timestamp
+    gemini_rate_limit[user_id].append(current_time)
+
     try:
         # Check if user is asking for image generation and modify prompt
         image_keywords = [

From 538a090285e643dcdd92f0a47168f83768227b67 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:15:45 +0100
Subject: [PATCH 04/27] Implement Grok API support

---
 src/main.py          | 238 ++++++++++++++++++++++++++++++-------------
 src/requirements.txt |   1 +
 2 files changed, 166 insertions(+), 73 deletions(-)

diff --git a/src/main.py b/src/main.py
index 132bd50..337d830 100644
--- a/src/main.py
+++ b/src/main.py
@@ -7,6 +7,7 @@
 import re
 import time
 import google.generativeai as genai
+from openai import AsyncOpenAI
 from functools import lru_cache
 from collections import defaultdict
 from dotenv import load_dotenv
@@ -37,8 +38,11 @@
 # Reply with user data for Healthcheck
 send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true"
 USE_LLM = os.getenv("USE_LLM", "False").lower() == "true"
+LLM_PROVIDER = os.getenv("LLM_PROVIDER", "gemini").lower()  # gemini or grok
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
+GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-flash-latest")
+GROK_API_KEY = os.getenv("GROK_API_KEY")
+GROK_MODEL = os.getenv("GROK_MODEL", "grok-4-latest")
 TELEGRAM_WRITE_TIMEOUT = 8000
 TELEGRAM_READ_TIMEOUT = 8000
 
@@ -46,9 +50,19 @@
 if GEMINI_API_KEY:
     genai.configure(api_key=GEMINI_API_KEY)
 
-# Rate limiting for Gemini API (5 requests per minute)
-gemini_rate_limit = defaultdict(list)  # {user_id: [timestamp1, timestamp2, ...]}
-GEMINI_RPM_LIMIT = 4  # Set to 4 to be safe (limit is 5)
+# Configure Grok API
+grok_client = None
+if GROK_API_KEY:
+    grok_client = AsyncOpenAI(
+        api_key=GROK_API_KEY,
+        base_url="https://api.x.ai/v1"
+    )
+
+# Rate limiting for LLM APIs
+llm_rate_limit = defaultdict(list)  # {user_id: [timestamp1, timestamp2, ...]}
+llm_daily_limit = defaultdict(int)  # {user_id: count}
+LLM_RPM_LIMIT = 50  # Set to 4 to be safe (limit is 5 for Gemini)
+LLM_RPD_LIMIT = 500  # Daily limit: 18 to be safe (limit is 20 for Gemini)
 
 
 # Cache responses from JSON file
@@ -206,7 +220,7 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):  #
     bot_mentioned = is_bot_mentioned(message_text)
     debug("Bot mentioned check: %s for message: %s", bot_mentioned, message_text)
     debug("USE_LLM setting: %s", USE_LLM)
-    debug("GEMINI_API_KEY configured: %s", bool(GEMINI_API_KEY))
+    debug("LLM_PROVIDER: %s", LLM_PROVIDER)
 
     if bot_mentioned:
         if USE_LLM:
@@ -476,7 +490,7 @@ async def send_pic(update: Update, pic) -> None:
 
 
 async def respond_with_llm_message(update):
-    """Handle LLM responses when bot is mentioned using Google Gemini API."""
+    """Handle LLM responses when bot is mentioned using Gemini or Grok API."""
     debug("LLM response function called")
     message_text = update.message.text
     # Remove bot mention and any punctuation after it
@@ -484,18 +498,21 @@ async def respond_with_llm_message(update):
     debug("Original message: %s", message_text)
     debug("Processed prompt: %s", prompt)
 
-    if not GEMINI_API_KEY:
-        # debug("GEMINI_API_KEY not configured")
-        await update.message.reply_text("Sorry, AI service is not configured.")
+    # Check if API is configured
+    if LLM_PROVIDER == "grok" and not GROK_API_KEY:
+        await update.message.reply_text("Sorry, Grok AI service is not configured.")
+        return
+    elif LLM_PROVIDER == "gemini" and not GEMINI_API_KEY:
+        await update.message.reply_text("Sorry, Gemini AI service is not configured.")
         return
 
     # Rate limiting check
     user_id = update.effective_user.id
     current_time = time.time()
     # Clean old timestamps (older than 60 seconds)
-    gemini_rate_limit[user_id] = [t for t in gemini_rate_limit[user_id] if current_time - t < 60]
+    llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60]
 
-    if len(gemini_rate_limit[user_id]) >= GEMINI_RPM_LIMIT:
+    if len(llm_rate_limit[user_id]) >= LLM_RPM_LIMIT:
         debug("Rate limit hit for user %s", user_id)
         bot_response = (
             "Вибачте, забагато запитів. Почекайте хвилину."
@@ -504,9 +521,21 @@ async def respond_with_llm_message(update):
         )
         await update.message.reply_text(bot_response)
         return
+    
+    # Check daily limit
+    if llm_daily_limit[user_id] >= LLM_RPD_LIMIT:
+        debug("Daily limit hit for user %s", user_id)
+        bot_response = (
+            "Вибачте, денний ліміт запитів вичерпано. Спробуйте завтра."
+            if language == "uk"
+            else "Sorry, daily request limit reached. Try again tomorrow."
+        )
+        await update.message.reply_text(bot_response)
+        return
 
     # Add current request timestamp
-    gemini_rate_limit[user_id].append(current_time)
+    llm_rate_limit[user_id].append(current_time)
+    llm_daily_limit[user_id] += 1
 
     try:
         # Check if user is asking for image generation and modify prompt
@@ -540,19 +569,87 @@ async def respond_with_llm_message(update):
             await update.message.reply_text(bot_response)
             return
 
-        # Initialize the Gemini model
-        debug("Initializing Gemini model: %s", GEMINI_MODEL)
-        plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences."
-
-        model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction)
-
-        # Try different approach - rephrase any potentially problematic prompts
+        # Prepare prompt
         debug("Original prompt: %s", prompt)
         safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}"
         debug("Modified safe prompt: %s", safe_prompt)
 
-        # Generate response using Gemini with both safety settings and safe prompting
-        debug("Sending request to Gemini API with model: %s", GEMINI_MODEL)
+        # Call appropriate LLM provider
+        if LLM_PROVIDER == "grok":
+            debug("Using Grok API with model: %s", GROK_MODEL)
+            bot_response = await call_grok_api(safe_prompt, update)
+        else:
+            debug("Using Gemini API with model: %s", GEMINI_MODEL)
+            bot_response = await call_gemini_api(safe_prompt, prompt, update)
+
+        await update.message.reply_text(bot_response)
+
+    except Exception as e:  # pylint: disable=broad-except
+        import traceback
+
+        error_msg = str(e)
+        error("Error in LLM API request: %s (Type: %s)", error_msg, type(e).__name__)
+        error("Full traceback: %s", traceback.format_exc())
+
+        # Check for rate limit (429) error
+        if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
+            error("Rate limit exceeded (429) - Too many requests to LLM API")
+            bot_response = (
+                "Вибачте, перевищено ліміт запитів до AI. Спробуйте пізніше."
+                if language == "uk"
+                else "Sorry, AI request limit exceeded. Please try again later."
+            )
+        else:
+            bot_response = (
+                "Вибачте, я не можу згенерувати відповідь."
+                if language == "uk"
+                else "Sorry, I encountered an error while processing your request."
+            )
+
+        await update.message.reply_text(bot_response)
+
+
+async def call_grok_api(safe_prompt: str, update) -> str:
+    """Call Grok API and return response."""
+    try:
+        max_retries = 2
+        retry_delay = 60
+        
+        for attempt in range(max_retries):
+            try:
+                response = await grok_client.chat.completions.create(
+                    model=GROK_MODEL,
+                    messages=[{"role": "user", "content": safe_prompt}],
+                    max_tokens=1024,
+                    temperature=0.7
+                )
+                return response.choices[0].message.content.strip()
+            except Exception as retry_error:
+                error_msg = str(retry_error)
+                if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1:
+                    debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries)
+                    wait_msg = (
+                        f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
+                        if language == "uk"
+                        else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..."
+                    )
+                    await update.message.reply_text(wait_msg)
+                    await asyncio.sleep(retry_delay)
+                else:
+                    raise
+    except Exception:
+        return (
+            "Вибачте, я не можу згенерувати відповідь."
+            if language == "uk"
+            else "Sorry, I couldn't generate a response."
+        )
+
+
+async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
+    """Call Gemini API and return response."""
+    try:
+        plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences."
+        model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction)
         safety_settings = {
             genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
             genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
@@ -560,20 +657,38 @@ async def respond_with_llm_message(update):
             genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
         }
         contents = [{'role': 'user', 'parts': [safe_prompt]}]
-        response = await asyncio.to_thread(
-            model.generate_content,
-            contents,  # Pass the simplified list here
-            generation_config=genai.types.GenerationConfig(
-                temperature=0.7,
-                top_p=0.9,
-                top_k=30,
-                max_output_tokens=1024,
-            ),
-            safety_settings=safety_settings,
-        )
-        debug("Successfully received response from Gemini API")
-
-        # Handle response with safety filter checks
+        
+        max_retries = 2
+        retry_delay = 60
+        
+        for attempt in range(max_retries):
+            try:
+                response = await asyncio.to_thread(
+                    model.generate_content,
+                    contents,
+                    generation_config=genai.types.GenerationConfig(
+                        temperature=0.7,
+                        top_p=0.9,
+                        top_k=30,
+                        max_output_tokens=1024,
+                    ),
+                    safety_settings=safety_settings,
+                )
+                debug("Successfully received response from Gemini API")
+                break
+            except Exception as retry_error:
+                error_msg = str(retry_error)
+                if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1:
+                    debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries)
+                    wait_msg = (
+                        f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
+                        if language == "uk"
+                        else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..."
+                    )
+                    await update.message.reply_text(wait_msg)
+                    await asyncio.sleep(retry_delay)
+                else:
+                    raise
         if hasattr(response, 'candidates') and response.candidates:
             candidate = response.candidates[0]
             debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None'))
@@ -581,7 +696,6 @@ async def respond_with_llm_message(update):
 
             if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2:
                 debug("Safety filter triggered - finish_reason: 2, trying simpler approach")
-                # Try a much simpler, generic response for blocked content
                 try:
                     simple_response = await asyncio.to_thread(
                         model.generate_content,
@@ -589,65 +703,43 @@ async def respond_with_llm_message(update):
                         safety_settings=safety_settings,
                     )
                     if simple_response.text:
-                        bot_response = f"Ось загальна інформація: {simple_response.text.strip()}"
+                        return f"Ось загальна інформація: {simple_response.text.strip()}"
                     else:
-                        bot_response = (
+                        return (
                             "Вибачте, не можу надати детальну відповідь на це питання."
                             if language == "uk"
                             else "Sorry, I can't provide a detailed answer to this question."
                         )
                 except:  # --- IGNORE --- # pylint: disable=bare-except
-                    bot_response = (
+                    return (
                         "Вибачте, не можу надати детальну відповідь на це питання."
                         if language == "uk"
                         else "Sorry, I can't provide a detailed answer to this question."
                     )
             elif response.text:
-                # Remove Markdown formatting from response
+                # Remove Markdown formatting
                 bot_response = response.text.strip()
-                # Remove common Markdown syntax
-                bot_response = re.sub(r'\*+', '', bot_response)  # Bold text
-                bot_response = bot_response.replace('*', '')  # Italic text
-                bot_response = bot_response.replace('`', '')  # Code blocks
-                bot_response = bot_response.replace('#', '')  # Headers
+                bot_response = re.sub(r'\*+', '', bot_response)
+                bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '')
+                return bot_response
             else:
-                bot_response = (
+                return (
                     "Вибачте, я не можу згенерувати відповідь."
                     if language == "uk"
                     else "Sorry, I couldn't generate a response."
                 )
         else:
-            bot_response = (
+            return (
                 "Вибачте, я не можу згенерувати відповідь."
                 if language == "uk"
                 else "Sorry, I couldn't generate a response."
             )
-
-        await update.message.reply_text(bot_response)
-
-    except Exception as e:  # pylint: disable=broad-except
-        import traceback
-
-        error_msg = str(e)
-        error("Error in Gemini API request: %s (Type: %s)", error_msg, type(e).__name__)
-        error("Full traceback: %s", traceback.format_exc())
-
-        # Check for rate limit (429) error
-        if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
-            error("Rate limit exceeded (429) - Too many requests to Gemini API")
-            bot_response = (
-                "Вибачте, перевищено ліміт запитів до AI. Спробуйте пізніше."
-                if language == "uk"
-                else "Sorry, AI request limit exceeded. Please try again later."
-            )
-        else:
-            bot_response = (
-                "Вибачте, я не можу згенерувати відповідь."
-                if language == "uk"
-                else "Sorry, I encountered an error while processing your request."
-            )
-
-        await update.message.reply_text(bot_response)
+    except Exception:
+        return (
+            "Вибачте, я не можу згенерувати відповідь."
+            if language == "uk"
+            else "Sorry, I couldn't generate a response."
+        )
 
 
 def main():
diff --git a/src/requirements.txt b/src/requirements.txt
index f630e1a..2700381 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -4,3 +4,4 @@ yt-dlp==2026.2.21
 gallery-dl==1.31.6
 aiohttp==3.13.3
 google-generativeai>=0.8.6
+openai>=1.0.0

From 0806b074a71dd3ee04ce4c9eee4602367c40c1c5 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:29:37 +0100
Subject: [PATCH 05/27] Parametrize user limits

---
 src/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main.py b/src/main.py
index 337d830..f5dc70d 100644
--- a/src/main.py
+++ b/src/main.py
@@ -38,7 +38,7 @@
 # Reply with user data for Healthcheck
 send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true"
 USE_LLM = os.getenv("USE_LLM", "False").lower() == "true"
-LLM_PROVIDER = os.getenv("LLM_PROVIDER", "gemini").lower()  # gemini or grok
+LLM_PROVIDER = os.getenv("LLM_PROVIDER", "grok").lower()  # gemini or grok
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-flash-latest")
 GROK_API_KEY = os.getenv("GROK_API_KEY")
@@ -61,8 +61,8 @@
 # Rate limiting for LLM APIs
 llm_rate_limit = defaultdict(list)  # {user_id: [timestamp1, timestamp2, ...]}
 llm_daily_limit = defaultdict(int)  # {user_id: count}
-LLM_RPM_LIMIT = 50  # Set to 4 to be safe (limit is 5 for Gemini)
-LLM_RPD_LIMIT = 500  # Daily limit: 18 to be safe (limit is 20 for Gemini)
+LLM_RPM_LIMIT = int(os.getenv("LLM_RPM_LIMIT", "50"))  # Requests per minute per user
+LLM_RPD_LIMIT = int(os.getenv("LLM_RPD_LIMIT", "500"))  # Requests per day per user
 
 
 # Cache responses from JSON file

From e91f722e67c010729f3fc3daabeb41e53d5278f2 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:31:13 +0100
Subject: [PATCH 06/27] Fix linter

---
 src/main.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/src/main.py b/src/main.py
index f5dc70d..ef70e60 100644
--- a/src/main.py
+++ b/src/main.py
@@ -53,10 +53,7 @@
 # Configure Grok API
 grok_client = None
 if GROK_API_KEY:
-    grok_client = AsyncOpenAI(
-        api_key=GROK_API_KEY,
-        base_url="https://api.x.ai/v1"
-    )
+    grok_client = AsyncOpenAI(api_key=GROK_API_KEY, base_url="https://api.x.ai/v1")
 
 # Rate limiting for LLM APIs
 llm_rate_limit = defaultdict(list)  # {user_id: [timestamp1, timestamp2, ...]}
@@ -521,7 +518,7 @@ async def respond_with_llm_message(update):
         )
         await update.message.reply_text(bot_response)
         return
-    
+
     # Check daily limit
     if llm_daily_limit[user_id] >= LLM_RPD_LIMIT:
         debug("Daily limit hit for user %s", user_id)
@@ -614,20 +611,27 @@ async def call_grok_api(safe_prompt: str, update) -> str:
     try:
         max_retries = 2
         retry_delay = 60
-        
+
         for attempt in range(max_retries):
             try:
                 response = await grok_client.chat.completions.create(
                     model=GROK_MODEL,
                     messages=[{"role": "user", "content": safe_prompt}],
                     max_tokens=1024,
-                    temperature=0.7
+                    temperature=0.7,
                 )
                 return response.choices[0].message.content.strip()
             except Exception as retry_error:
                 error_msg = str(retry_error)
-                if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1:
-                    debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries)
+                if (
+                    "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
+                ) and attempt < max_retries - 1:
+                    debug(
+                        "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)",
+                        retry_delay,
+                        attempt + 1,
+                        max_retries,
+                    )
                     wait_msg = (
                         f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
                         if language == "uk"
@@ -657,10 +661,10 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
             genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
         }
         contents = [{'role': 'user', 'parts': [safe_prompt]}]
-        
+
         max_retries = 2
         retry_delay = 60
-        
+
         for attempt in range(max_retries):
             try:
                 response = await asyncio.to_thread(
@@ -678,8 +682,15 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
                 break
             except Exception as retry_error:
                 error_msg = str(retry_error)
-                if ("429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()) and attempt < max_retries - 1:
-                    debug("Rate limit hit, waiting %s seconds before retry (attempt %s/%s)", retry_delay, attempt + 1, max_retries)
+                if (
+                    "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
+                ) and attempt < max_retries - 1:
+                    debug(
+                        "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)",
+                        retry_delay,
+                        attempt + 1,
+                        max_retries,
+                    )
                     wait_msg = (
                         f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
                         if language == "uk"

From 6db35fb43660834d24c95da51afa708e6a3ca40b Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:35:23 +0100
Subject: [PATCH 07/27] Add disable=broad-exception-caught for pyling

---
 src/main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main.py b/src/main.py
index ef70e60..9ed8aba 100644
--- a/src/main.py
+++ b/src/main.py
@@ -621,7 +621,7 @@ async def call_grok_api(safe_prompt: str, update) -> str:
                     temperature=0.7,
                 )
                 return response.choices[0].message.content.strip()
-            except Exception as retry_error:
+            except Exception as retry_error:  # pylint: disable=broad-exception-caught
                 error_msg = str(retry_error)
                 if (
                     "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
@@ -641,7 +641,7 @@ async def call_grok_api(safe_prompt: str, update) -> str:
                     await asyncio.sleep(retry_delay)
                 else:
                     raise
-    except Exception:
+    except Exception:  # pylint: disable=broad-exception-caught
         return (
             "Вибачте, я не можу згенерувати відповідь."
             if language == "uk"
@@ -680,7 +680,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
                 )
                 debug("Successfully received response from Gemini API")
                 break
-            except Exception as retry_error:
+            except Exception as retry_error:  # pylint: disable=broad-exception-caught
                 error_msg = str(retry_error)
                 if (
                     "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
@@ -745,7 +745,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
                 if language == "uk"
                 else "Sorry, I couldn't generate a response."
             )
-    except Exception:
+    except Exception:  # pylint: disable=broad-exception-caught
         return (
             "Вибачте, я не можу згенерувати відповідь."
             if language == "uk"

From 1e61c42fb2f6c3ea0bb15d0d976b4e54a55f02a2 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:52:55 +0100
Subject: [PATCH 08/27] Implement conversation context

---
 src/main.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/main.py b/src/main.py
index 9ed8aba..ae0e8e3 100644
--- a/src/main.py
+++ b/src/main.py
@@ -38,6 +38,7 @@
 # Reply with user data for Healthcheck
 send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true"
 USE_LLM = os.getenv("USE_LLM", "False").lower() == "true"
+USE_CONVERSATION_CONTEXT = os.getenv("USE_CONVERSATION_CONTEXT", "True").lower() == "true"
 LLM_PROVIDER = os.getenv("LLM_PROVIDER", "grok").lower()  # gemini or grok
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-flash-latest")
@@ -61,6 +62,10 @@
 LLM_RPM_LIMIT = int(os.getenv("LLM_RPM_LIMIT", "50"))  # Requests per minute per user
 LLM_RPD_LIMIT = int(os.getenv("LLM_RPD_LIMIT", "500"))  # Requests per day per user
 
+# Conversation context storage: {user_id: [(user_msg, bot_response), ...]}
+conversation_context = defaultdict(list)
+MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3"))  # Keep last N exchanges
+
 
 # Cache responses from JSON file
 @lru_cache(maxsize=1)
@@ -566,10 +571,26 @@ async def respond_with_llm_message(update):
             await update.message.reply_text(bot_response)
             return
 
-        # Prepare prompt
+        # Prepare prompt with context
         debug("Original prompt: %s", prompt)
-        safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}"
-        debug("Modified safe prompt: %s", safe_prompt)
+
+        # Build context from previous messages if enabled
+        user_id = update.effective_user.id
+        if USE_CONVERSATION_CONTEXT:
+            context_messages = (
+                conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] if conversation_context[user_id] else []
+            )
+        else:
+            context_messages = []
+
+        # Create prompt with context if available
+        if context_messages:
+            context_str = "\n".join([f"Користувач: {msg}\nАсистент: {resp}" for msg, resp in context_messages])
+            safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент."
+        else:
+            safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}"
+
+        debug("Modified safe prompt with context: %s", safe_prompt[:200])
 
         # Call appropriate LLM provider
         if LLM_PROVIDER == "grok":
@@ -579,6 +600,13 @@ async def respond_with_llm_message(update):
             debug("Using Gemini API with model: %s", GEMINI_MODEL)
             bot_response = await call_gemini_api(safe_prompt, prompt, update)
 
+        # Store conversation in context if enabled
+        if USE_CONVERSATION_CONTEXT:
+            conversation_context[user_id].append((prompt, bot_response))
+            # Keep only last MAX_CONTEXT_MESSAGES
+            if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES:
+                conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:]
+
         await update.message.reply_text(bot_response)
 
     except Exception as e:  # pylint: disable=broad-except

From 8c192004dae86fe4ef5936cd87ded5ee3cf8a0b4 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:56:55 +0100
Subject: [PATCH 09/27] Add ukrainian to translation to some log messages

---
 src/main.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/main.py b/src/main.py
index ae0e8e3..4c69998 100644
--- a/src/main.py
+++ b/src/main.py
@@ -502,10 +502,20 @@ async def respond_with_llm_message(update):
 
     # Check if API is configured
     if LLM_PROVIDER == "grok" and not GROK_API_KEY:
-        await update.message.reply_text("Sorry, Grok AI service is not configured.")
+        bot_response = (
+            "Вибачте, Grok AI сервіс не налаштовано."
+            if language == "uk"
+            else "Sorry, Grok AI service is not configured."
+        )
+        await update.message.reply_text(bot_response)
         return
     elif LLM_PROVIDER == "gemini" and not GEMINI_API_KEY:
-        await update.message.reply_text("Sorry, Gemini AI service is not configured.")
+        bot_response = (
+            "Вибачте, Gemini AI сервіс не налаштовано."
+            if language == "uk"
+            else "Sorry, Gemini AI service is not configured."
+        )
+        await update.message.reply_text(bot_response)
         return
 
     # Rate limiting check

From ab9188e08d697bbf0d3e44f7ba51d50c6ffd1a97 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:59:56 +0100
Subject: [PATCH 10/27] Modify system prompt to LLM

---
 src/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main.py b/src/main.py
index 4c69998..c760c10 100644
--- a/src/main.py
+++ b/src/main.py
@@ -596,9 +596,9 @@ async def respond_with_llm_message(update):
         # Create prompt with context if available
         if context_messages:
             context_str = "\n".join([f"Користувач: {msg}\nАсистент: {resp}" for msg, resp in context_messages])
-            safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент."
+            safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент. Не вітайся і не прощайся."
         else:
-            safe_prompt = f"Відповідай українською мовою як дружній асистент. Питання користувача: {prompt}"
+            safe_prompt = f"Відповідай українською мовою як дружній асистент. Не вітайся і не прощайся. Питання користувача: {prompt}"
 
         debug("Modified safe prompt with context: %s", safe_prompt[:200])
 

From 606d45ada57af4d16544a04044adbc9666da43bb Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Wed, 4 Mar 2026 23:12:17 +0100
Subject: [PATCH 11/27] Implement MAX_CONTEXT_CHARS

---
 src/main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/main.py b/src/main.py
index c760c10..a9d5500 100644
--- a/src/main.py
+++ b/src/main.py
@@ -65,6 +65,7 @@
 # Conversation context storage: {user_id: [(user_msg, bot_response), ...]}
 conversation_context = defaultdict(list)
 MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3"))  # Keep last N exchanges
+MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", "500"))  # Max chars per message in context
 
 
 # Cache responses from JSON file
@@ -612,7 +613,9 @@ async def respond_with_llm_message(update):
 
         # Store conversation in context if enabled
         if USE_CONVERSATION_CONTEXT:
-            conversation_context[user_id].append((prompt, bot_response))
+            truncated_prompt = prompt[:MAX_CONTEXT_CHARS]
+            truncated_response = bot_response[:MAX_CONTEXT_CHARS]
+            conversation_context[user_id].append((truncated_prompt, truncated_response))
             # Keep only last MAX_CONTEXT_MESSAGES
             if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES:
                 conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:]

From 98e47dc90fade2bf1bc517a18d9d803e22183635 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:00:02 +0100
Subject: [PATCH 12/27] Fix LLM rate limiting and error handling issues

- Move traceback import to module level to avoid inline imports
- Add provider validation with ALLOWED_PROVIDERS set
- Replace bare except with except Exception and add logging
- Implement daily limit reset using date tracking
- Fix quota consumption: only increment counters after successful API calls
- Add tentative timestamp mechanism that rolls back on failures
---
 PR_MESSAGE.md | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.py   | 38 +++++++++++++++++++++----
 2 files changed, 109 insertions(+), 6 deletions(-)
 create mode 100644 PR_MESSAGE.md

diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md
new file mode 100644
index 0000000..f137f75
--- /dev/null
+++ b/PR_MESSAGE.md
@@ -0,0 +1,77 @@
+# Add Grok API support and improve LLM integration
+
+## Summary
+This PR adds Grok API as an alternative LLM provider alongside Gemini, implements conversation context tracking, adds configurable rate limiting, improves error handling, and optimizes token usage through context truncation.
+
+## Changes
+
+### 1. Grok API Integration
+- Add Grok API support using OpenAI-compatible client
+- Add environment variables: `LLM_PROVIDER`, `GROK_API_KEY`, `GROK_MODEL`
+- Unified LLM approach: renamed `gemini_*` variables to `llm_*` for provider-agnostic naming
+- Add `openai>=1.0.0` dependency to requirements.txt
+
+### 2. Conversation Context
+- Implement conversation history tracking per user
+- Add `USE_CONVERSATION_CONTEXT` flag (default: True)
+- Add `MAX_CONTEXT_MESSAGES` to control number of exchanges stored (default: 3)
+- Add `MAX_CONTEXT_CHARS` to limit token usage by truncating stored messages (default: 500 chars)
+- Context is included in prompts to maintain conversation flow
+- **Token optimization**: Reduces context size by ~75% (from ~6000 to ~1500 chars for 3 exchanges)
+
+### 3. Rate Limiting
+- Implement per-user rate limiting for LLM APIs
+- Add `LLM_RPM_LIMIT` (requests per minute, default: 50)
+- Add `LLM_RPD_LIMIT` (requests per day, default: 500)
+- Automatic cleanup of old timestamps
+- User-friendly rate limit messages in Ukrainian and English
+
+### 4. Error Handling & Logging
+- Add proper handling for 429 (Too Many Requests) errors from LLM APIs
+- Add detailed error logging with full traceback for debugging
+- Add retry logic with 60-second delay for rate limit errors (max 2 attempts)
+- Distinguish between rate limit errors and other API failures in user messages
+- Log exception type along with error message
+- Add error logging when API key is not configured but `USE_LLM=True`
+
+### 5. Code Quality
+- Fix Black formatting issues (line breaks, spacing)
+- Add `# pylint: disable=broad-exception-caught` comments for retry logic
+- Add Ukrainian translations for all new error messages
+
+### 6. Configuration
+- Add missing `USE_LLM` variable to `.env.example`
+- Add all new LLM-related variables to `.env.example`
+- Add configuration check messages in both Ukrainian and English
+
+## Environment Variables Added
+```ini
+USE_LLM=False                          # Enable LLM responses
+LLM_PROVIDER=grok                      # grok or gemini
+GROK_API_KEY=your_grok_api_key
+GROK_MODEL=grok-4-latest
+USE_CONVERSATION_CONTEXT=True          # Enable conversation history
+MAX_CONTEXT_MESSAGES=3                 # Number of exchanges to remember
+MAX_CONTEXT_CHARS=500                  # Max chars per message in context (token optimization)
+LLM_RPM_LIMIT=50                       # Requests per minute per user
+LLM_RPD_LIMIT=500                      # Requests per day per user
+```
+
+## Benefits
+- **Flexibility**: Choose between Grok (480 RPM, 4M TPM) and Gemini (5 RPM, 20 RPD)
+- **Better UX**: Conversation context makes bot responses more relevant
+- **Cost optimization**: Context truncation saves ~75% of tokens
+- **Reliability**: Automatic retry on rate limits with user feedback
+- **Debuggability**: Full error logging makes issues easy to diagnose
+- **Protection**: Rate limiting prevents API quota exhaustion
+
+## Testing
+1. Set `LOG_LEVEL=DEBUG` to see detailed API logs and error traces
+2. Test with `USE_LLM=True` and both `LLM_PROVIDER=grok` and `LLM_PROVIDER=gemini`
+3. Test conversation context by asking follow-up questions
+4. Test rate limiting by making multiple rapid requests
+
+## Related Issues
+- Fixes issue where bot returns generic error without logging actual API errors
+- Fixes missing conversation context causing bot to "forget" previous messages
+- Fixes token waste from storing full LLM responses in context
diff --git a/src/main.py b/src/main.py
index a9d5500..bad53a4 100644
--- a/src/main.py
+++ b/src/main.py
@@ -6,6 +6,8 @@
 import asyncio
 import re
 import time
+import traceback
+from datetime import datetime
 import google.generativeai as genai
 from openai import AsyncOpenAI
 from functools import lru_cache
@@ -58,7 +60,7 @@
 
 # Rate limiting for LLM APIs
 llm_rate_limit = defaultdict(list)  # {user_id: [timestamp1, timestamp2, ...]}
-llm_daily_limit = defaultdict(int)  # {user_id: count}
+llm_daily_limit = defaultdict(lambda: {"count": 0, "date": ""})  # {user_id: {count, date}}
 LLM_RPM_LIMIT = int(os.getenv("LLM_RPM_LIMIT", "50"))  # Requests per minute per user
 LLM_RPD_LIMIT = int(os.getenv("LLM_RPD_LIMIT", "500"))  # Requests per day per user
 
@@ -67,6 +69,9 @@
 MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3"))  # Keep last N exchanges
 MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", "500"))  # Max chars per message in context
 
+# Allowed LLM providers
+ALLOWED_PROVIDERS = {"grok", "gemini"}
+
 
 # Cache responses from JSON file
 @lru_cache(maxsize=1)
@@ -501,6 +506,16 @@ async def respond_with_llm_message(update):
     debug("Original message: %s", message_text)
     debug("Processed prompt: %s", prompt)
 
+    # Validate LLM provider
+    if LLM_PROVIDER not in ALLOWED_PROVIDERS:
+        bot_response = (
+            f"Вибачте, провайдер '{LLM_PROVIDER}' не підтримується. Доступні: {', '.join(ALLOWED_PROVIDERS)}"
+            if language == "uk"
+            else f"Sorry, provider '{LLM_PROVIDER}' is not supported. Available: {', '.join(ALLOWED_PROVIDERS)}"
+        )
+        await update.message.reply_text(bot_response)
+        return
+
     # Check if API is configured
     if LLM_PROVIDER == "grok" and not GROK_API_KEY:
         bot_response = (
@@ -536,7 +551,11 @@ async def respond_with_llm_message(update):
         return
 
     # Check daily limit
-    if llm_daily_limit[user_id] >= LLM_RPD_LIMIT:
+    today = datetime.now().strftime("%Y-%m-%d")
+    if llm_daily_limit[user_id]["date"] != today:
+        llm_daily_limit[user_id] = {"count": 0, "date": today}
+
+    if llm_daily_limit[user_id]["count"] >= LLM_RPD_LIMIT:
         debug("Daily limit hit for user %s", user_id)
         bot_response = (
             "Вибачте, денний ліміт запитів вичерпано. Спробуйте завтра."
@@ -546,9 +565,8 @@ async def respond_with_llm_message(update):
         await update.message.reply_text(bot_response)
         return
 
-    # Add current request timestamp
+    # Tentatively add current request timestamp (will be removed on failure)
     llm_rate_limit[user_id].append(current_time)
-    llm_daily_limit[user_id] += 1
 
     try:
         # Check if user is asking for image generation and modify prompt
@@ -580,6 +598,8 @@ async def respond_with_llm_message(update):
                 bot_response = "Sorry, I can't generate images, but I can describe in detail what you're asking for! For example, I can tell you about a car: its color, shape, design features, etc. What specifically interests you?"
 
             await update.message.reply_text(bot_response)
+            # Remove tentative timestamp since no API call was made
+            llm_rate_limit[user_id].pop()
             return
 
         # Prepare prompt with context
@@ -611,6 +631,9 @@ async def respond_with_llm_message(update):
             debug("Using Gemini API with model: %s", GEMINI_MODEL)
             bot_response = await call_gemini_api(safe_prompt, prompt, update)
 
+        # Increment daily limit only after successful API call
+        llm_daily_limit[user_id]["count"] += 1
+
         # Store conversation in context if enabled
         if USE_CONVERSATION_CONTEXT:
             truncated_prompt = prompt[:MAX_CONTEXT_CHARS]
@@ -623,7 +646,9 @@ async def respond_with_llm_message(update):
         await update.message.reply_text(bot_response)
 
     except Exception as e:  # pylint: disable=broad-except
-        import traceback
+        # Remove tentative timestamp on failure
+        if llm_rate_limit[user_id] and llm_rate_limit[user_id][-1] == current_time:
+            llm_rate_limit[user_id].pop()
 
         error_msg = str(e)
         error("Error in LLM API request: %s (Type: %s)", error_msg, type(e).__name__)
@@ -762,7 +787,8 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
                             if language == "uk"
                             else "Sorry, I can't provide a detailed answer to this question."
                         )
-                except:  # --- IGNORE --- # pylint: disable=bare-except
+                except Exception:  # pylint: disable=broad-exception-caught
+                    error("Fallback response generation failed")
                     return (
                         "Вибачте, не можу надати детальну відповідь на це питання."
                         if language == "uk"

From 5c6c98bb3490401750102e30b7dd2749b023d96a Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:02:03 +0100
Subject: [PATCH 13/27] Remove PR_MESSAGE.md

---
 PR_MESSAGE.md | 77 ---------------------------------------------------
 1 file changed, 77 deletions(-)
 delete mode 100644 PR_MESSAGE.md

diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md
deleted file mode 100644
index f137f75..0000000
--- a/PR_MESSAGE.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Add Grok API support and improve LLM integration
-
-## Summary
-This PR adds Grok API as an alternative LLM provider alongside Gemini, implements conversation context tracking, adds configurable rate limiting, improves error handling, and optimizes token usage through context truncation.
-
-## Changes
-
-### 1. Grok API Integration
-- Add Grok API support using OpenAI-compatible client
-- Add environment variables: `LLM_PROVIDER`, `GROK_API_KEY`, `GROK_MODEL`
-- Unified LLM approach: renamed `gemini_*` variables to `llm_*` for provider-agnostic naming
-- Add `openai>=1.0.0` dependency to requirements.txt
-
-### 2. Conversation Context
-- Implement conversation history tracking per user
-- Add `USE_CONVERSATION_CONTEXT` flag (default: True)
-- Add `MAX_CONTEXT_MESSAGES` to control number of exchanges stored (default: 3)
-- Add `MAX_CONTEXT_CHARS` to limit token usage by truncating stored messages (default: 500 chars)
-- Context is included in prompts to maintain conversation flow
-- **Token optimization**: Reduces context size by ~75% (from ~6000 to ~1500 chars for 3 exchanges)
-
-### 3. Rate Limiting
-- Implement per-user rate limiting for LLM APIs
-- Add `LLM_RPM_LIMIT` (requests per minute, default: 50)
-- Add `LLM_RPD_LIMIT` (requests per day, default: 500)
-- Automatic cleanup of old timestamps
-- User-friendly rate limit messages in Ukrainian and English
-
-### 4. Error Handling & Logging
-- Add proper handling for 429 (Too Many Requests) errors from LLM APIs
-- Add detailed error logging with full traceback for debugging
-- Add retry logic with 60-second delay for rate limit errors (max 2 attempts)
-- Distinguish between rate limit errors and other API failures in user messages
-- Log exception type along with error message
-- Add error logging when API key is not configured but `USE_LLM=True`
-
-### 5. Code Quality
-- Fix Black formatting issues (line breaks, spacing)
-- Add `# pylint: disable=broad-exception-caught` comments for retry logic
-- Add Ukrainian translations for all new error messages
-
-### 6. Configuration
-- Add missing `USE_LLM` variable to `.env.example`
-- Add all new LLM-related variables to `.env.example`
-- Add configuration check messages in both Ukrainian and English
-
-## Environment Variables Added
-```ini
-USE_LLM=False                          # Enable LLM responses
-LLM_PROVIDER=grok                      # grok or gemini
-GROK_API_KEY=your_grok_api_key
-GROK_MODEL=grok-4-latest
-USE_CONVERSATION_CONTEXT=True          # Enable conversation history
-MAX_CONTEXT_MESSAGES=3                 # Number of exchanges to remember
-MAX_CONTEXT_CHARS=500                  # Max chars per message in context (token optimization)
-LLM_RPM_LIMIT=50                       # Requests per minute per user
-LLM_RPD_LIMIT=500                      # Requests per day per user
-```
-
-## Benefits
-- **Flexibility**: Choose between Grok (480 RPM, 4M TPM) and Gemini (5 RPM, 20 RPD)
-- **Better UX**: Conversation context makes bot responses more relevant
-- **Cost optimization**: Context truncation saves ~75% of tokens
-- **Reliability**: Automatic retry on rate limits with user feedback
-- **Debuggability**: Full error logging makes issues easy to diagnose
-- **Protection**: Rate limiting prevents API quota exhaustion
-
-## Testing
-1. Set `LOG_LEVEL=DEBUG` to see detailed API logs and error traces
-2. Test with `USE_LLM=True` and both `LLM_PROVIDER=grok` and `LLM_PROVIDER=gemini`
-3. Test conversation context by asking follow-up questions
-4. Test rate limiting by making multiple rapid requests
-
-## Related Issues
-- Fixes issue where bot returns generic error without logging actual API errors
-- Fixes missing conversation context causing bot to "forget" previous messages
-- Fixes token waste from storing full LLM responses in context

From 725c3e15253ede264b45c9ceda9e23c97e1cf354 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:17:40 +0100
Subject: [PATCH 14/27] Fix LLM API helpers to propagate exceptions and add
 plain text instruction to Grok

- Remove duplicate user_id assignment
- Make call_grok_api and call_gemini_api raise exceptions instead of returning error strings
- Add plain text system instruction to Grok API for consistent formatting
- Ensure quota counters only increment after successful API responses
- Add periodic cleanup task to prevent unbounded memory growth of user data
---
 src/main.py | 284 +++++++++++++++++++++++++++-------------------------
 1 file changed, 146 insertions(+), 138 deletions(-)

diff --git a/src/main.py b/src/main.py
index bad53a4..c73eed8 100644
--- a/src/main.py
+++ b/src/main.py
@@ -69,6 +69,11 @@
 MAX_CONTEXT_MESSAGES = int(os.getenv("MAX_CONTEXT_MESSAGES", "3"))  # Keep last N exchanges
 MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", "500"))  # Max chars per message in context
 
+# User activity tracking for cleanup
+user_last_seen = defaultdict(float)  # {user_id: timestamp}
+USER_CLEANUP_TTL_DAYS = int(os.getenv("USER_CLEANUP_TTL_DAYS", "3"))  # Days before user data expires
+USER_CLEANUP_INTERVAL_HOURS = int(os.getenv("USER_CLEANUP_INTERVAL_HOURS", "24"))  # Cleanup interval
+
 # Allowed LLM providers
 ALLOWED_PROVIDERS = {"grok", "gemini"}
 
@@ -537,6 +542,10 @@ async def respond_with_llm_message(update):
     # Rate limiting check
     user_id = update.effective_user.id
     current_time = time.time()
+    
+    # Update last seen timestamp
+    user_last_seen[user_id] = current_time
+    
     # Clean old timestamps (older than 60 seconds)
     llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60]
 
@@ -606,7 +615,6 @@ async def respond_with_llm_message(update):
         debug("Original prompt: %s", prompt)
 
         # Build context from previous messages if enabled
-        user_id = update.effective_user.id
         if USE_CONVERSATION_CONTEXT:
             context_messages = (
                 conversation_context[user_id][-MAX_CONTEXT_MESSAGES:] if conversation_context[user_id] else []
@@ -673,151 +681,147 @@ async def respond_with_llm_message(update):
 
 
 async def call_grok_api(safe_prompt: str, update) -> str:
-    """Call Grok API and return response."""
-    try:
-        max_retries = 2
-        retry_delay = 60
-
-        for attempt in range(max_retries):
-            try:
-                response = await grok_client.chat.completions.create(
-                    model=GROK_MODEL,
-                    messages=[{"role": "user", "content": safe_prompt}],
-                    max_tokens=1024,
-                    temperature=0.7,
+    """Call Grok API and return response. Raises exception on failure."""
+    plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences."
+    max_retries = 2
+    retry_delay = 60
+
+    for attempt in range(max_retries):
+        try:
+            response = await grok_client.chat.completions.create(
+                model=GROK_MODEL,
+                messages=[
+                    {"role": "system", "content": plain_text_instruction},
+                    {"role": "user", "content": safe_prompt},
+                ],
+                max_tokens=1024,
+                temperature=0.7,
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as retry_error:  # pylint: disable=broad-exception-caught
+            error_msg = str(retry_error)
+            if (
+                "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
+            ) and attempt < max_retries - 1:
+                debug(
+                    "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)",
+                    retry_delay,
+                    attempt + 1,
+                    max_retries,
                 )
-                return response.choices[0].message.content.strip()
-            except Exception as retry_error:  # pylint: disable=broad-exception-caught
-                error_msg = str(retry_error)
-                if (
-                    "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
-                ) and attempt < max_retries - 1:
-                    debug(
-                        "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)",
-                        retry_delay,
-                        attempt + 1,
-                        max_retries,
-                    )
-                    wait_msg = (
-                        f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
-                        if language == "uk"
-                        else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..."
-                    )
-                    await update.message.reply_text(wait_msg)
-                    await asyncio.sleep(retry_delay)
-                else:
-                    raise
-    except Exception:  # pylint: disable=broad-exception-caught
-        return (
-            "Вибачте, я не можу згенерувати відповідь."
-            if language == "uk"
-            else "Sorry, I couldn't generate a response."
-        )
+                wait_msg = (
+                    f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
+                    if language == "uk"
+                    else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..."
+                )
+                await update.message.reply_text(wait_msg)
+                await asyncio.sleep(retry_delay)
+            else:
+                raise
 
 
 async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
-    """Call Gemini API and return response."""
-    try:
-        plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences."
-        model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction)
-        safety_settings = {
-            genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
-            genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
-            genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
-            genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
-        }
-        contents = [{'role': 'user', 'parts': [safe_prompt]}]
-
-        max_retries = 2
-        retry_delay = 60
-
-        for attempt in range(max_retries):
-            try:
-                response = await asyncio.to_thread(
-                    model.generate_content,
-                    contents,
-                    generation_config=genai.types.GenerationConfig(
-                        temperature=0.7,
-                        top_p=0.9,
-                        top_k=30,
-                        max_output_tokens=1024,
-                    ),
-                    safety_settings=safety_settings,
+    """Call Gemini API and return response. Raises exception on failure."""
+    plain_text_instruction = "Provide the entire response exclusively as plain text. Do not use any Markdown formatting (no **bold**, *italics*, # headers, or lists). The response must be text only. Provide concise, short answers. Aim for 1-3 sentences."
+    model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=plain_text_instruction)
+    safety_settings = {
+        genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
+        genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
+        genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
+        genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
+    }
+    contents = [{'role': 'user', 'parts': [safe_prompt]}]
+
+    max_retries = 2
+    retry_delay = 60
+
+    for attempt in range(max_retries):
+        try:
+            response = await asyncio.to_thread(
+                model.generate_content,
+                contents,
+                generation_config=genai.types.GenerationConfig(
+                    temperature=0.7,
+                    top_p=0.9,
+                    top_k=30,
+                    max_output_tokens=1024,
+                ),
+                safety_settings=safety_settings,
+            )
+            debug("Successfully received response from Gemini API")
+            break
+        except Exception as retry_error:  # pylint: disable=broad-exception-caught
+            error_msg = str(retry_error)
+            if (
+                "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
+            ) and attempt < max_retries - 1:
+                debug(
+                    "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)",
+                    retry_delay,
+                    attempt + 1,
+                    max_retries,
                 )
-                debug("Successfully received response from Gemini API")
-                break
-            except Exception as retry_error:  # pylint: disable=broad-exception-caught
-                error_msg = str(retry_error)
-                if (
-                    "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower()
-                ) and attempt < max_retries - 1:
-                    debug(
-                        "Rate limit hit, waiting %s seconds before retry (attempt %s/%s)",
-                        retry_delay,
-                        attempt + 1,
-                        max_retries,
-                    )
-                    wait_msg = (
-                        f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
-                        if language == "uk"
-                        else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..."
-                    )
-                    await update.message.reply_text(wait_msg)
-                    await asyncio.sleep(retry_delay)
-                else:
-                    raise
-        if hasattr(response, 'candidates') and response.candidates:
-            candidate = response.candidates[0]
-            debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None'))
-            debug("Response candidate safety_ratings: %s", getattr(candidate, 'safety_ratings', 'None'))
-
-            if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2:
-                debug("Safety filter triggered - finish_reason: 2, trying simpler approach")
-                try:
-                    simple_response = await asyncio.to_thread(
-                        model.generate_content,
-                        "Відповідь українською мовою: дай загальну інформацію про: " + prompt,
-                        safety_settings=safety_settings,
-                    )
-                    if simple_response.text:
-                        return f"Ось загальна інформація: {simple_response.text.strip()}"
-                    else:
-                        return (
-                            "Вибачте, не можу надати детальну відповідь на це питання."
-                            if language == "uk"
-                            else "Sorry, I can't provide a detailed answer to this question."
-                        )
-                except Exception:  # pylint: disable=broad-exception-caught
-                    error("Fallback response generation failed")
-                    return (
-                        "Вибачте, не можу надати детальну відповідь на це питання."
-                        if language == "uk"
-                        else "Sorry, I can't provide a detailed answer to this question."
-                    )
-            elif response.text:
-                # Remove Markdown formatting
-                bot_response = response.text.strip()
-                bot_response = re.sub(r'\*+', '', bot_response)
-                bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '')
-                return bot_response
-            else:
-                return (
-                    "Вибачте, я не можу згенерувати відповідь."
+                wait_msg = (
+                    f"Перевищено ліміт запитів. Зачекайте {retry_delay} секунд, я спробую ще раз..."
                     if language == "uk"
-                    else "Sorry, I couldn't generate a response."
+                    else f"Rate limit exceeded. Waiting {retry_delay} seconds before retrying..."
                 )
-        else:
-            return (
-                "Вибачте, я не можу згенерувати відповідь."
-                if language == "uk"
-                else "Sorry, I couldn't generate a response."
+                await update.message.reply_text(wait_msg)
+                await asyncio.sleep(retry_delay)
+            else:
+                raise
+    if hasattr(response, 'candidates') and response.candidates:
+        candidate = response.candidates[0]
+        debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None'))
+        debug("Response candidate safety_ratings: %s", getattr(candidate, 'safety_ratings', 'None'))
+
+        if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2:
+            debug("Safety filter triggered - finish_reason: 2, trying simpler approach")
+            simple_response = await asyncio.to_thread(
+                model.generate_content,
+                "Відповідь українською мовою: дай загальну інформацію про: " + prompt,
+                safety_settings=safety_settings,
             )
-    except Exception:  # pylint: disable=broad-exception-caught
-        return (
-            "Вибачте, я не можу згенерувати відповідь."
-            if language == "uk"
-            else "Sorry, I couldn't generate a response."
-        )
+            if simple_response.text:
+                return f"Ось загальна інформація: {simple_response.text.strip()}"
+            else:
+                raise Exception("Вибачте, не можу надати детальну відповідь на це питання.")
+        elif response.text:
+            # Remove Markdown formatting
+            bot_response = response.text.strip()
+            bot_response = re.sub(r'\*+', '', bot_response)
+            bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '')
+            return bot_response
+        else:
+            raise Exception("Вибачте, я не можу згенерувати відповідь.")
+    else:
+        raise Exception("Вибачте, я не можу згенерувати відповідь.")
+
+
+async def cleanup_stale_users():
+    """Remove inactive users from memory to prevent unbounded growth."""
+    while True:
+        await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600)
+        current_time = time.time()
+        ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400
+        
+        stale_users = [
+            user_id for user_id, last_seen in user_last_seen.items()
+            if current_time - last_seen > ttl_seconds
+        ]
+        
+        for user_id in stale_users:
+            if user_id in conversation_context:
+                del conversation_context[user_id]
+            if user_id in llm_rate_limit:
+                del llm_rate_limit[user_id]
+            if user_id in llm_daily_limit:
+                del llm_daily_limit[user_id]
+            if user_id in user_last_seen:
+                del user_last_seen[user_id]
+        
+        if stale_users:
+            info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS)
 
 
 def main():
@@ -851,6 +855,10 @@ def main():
     application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
     # This handler will receive every error which happens in your bot
     application.add_error_handler(error_handler)
+    
+    # Start cleanup task
+    asyncio.create_task(cleanup_stale_users())
+    
     info("Bot started. Ctrl+C to stop")
     application.run_polling()
 

From c9865ba02c690ea09974dbf6a69e0f09149ea665 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:19:40 +0100
Subject: [PATCH 15/27] Fix linter

---
 src/main.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/main.py b/src/main.py
index c73eed8..646d672 100644
--- a/src/main.py
+++ b/src/main.py
@@ -542,10 +542,10 @@ async def respond_with_llm_message(update):
     # Rate limiting check
     user_id = update.effective_user.id
     current_time = time.time()
-    
+
     # Update last seen timestamp
     user_last_seen[user_id] = current_time
-    
+
     # Clean old timestamps (older than 60 seconds)
     llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60]
 
@@ -804,12 +804,11 @@ async def cleanup_stale_users():
         await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600)
         current_time = time.time()
         ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400
-        
+
         stale_users = [
-            user_id for user_id, last_seen in user_last_seen.items()
-            if current_time - last_seen > ttl_seconds
+            user_id for user_id, last_seen in user_last_seen.items() if current_time - last_seen > ttl_seconds
         ]
-        
+
         for user_id in stale_users:
             if user_id in conversation_context:
                 del conversation_context[user_id]
@@ -819,7 +818,7 @@ async def cleanup_stale_users():
                 del llm_daily_limit[user_id]
             if user_id in user_last_seen:
                 del user_last_seen[user_id]
-        
+
         if stale_users:
             info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS)
 
@@ -855,10 +854,10 @@ def main():
     application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
     # This handler will receive every error which happens in your bot
     application.add_error_handler(error_handler)
-    
+
     # Start cleanup task
     asyncio.create_task(cleanup_stale_users())
-    
+
     info("Bot started. Ctrl+C to stop")
     application.run_polling()
 

From c3b6179fbdcf5ffba766f4bcc9ea0afe2425f481 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:21:13 +0100
Subject: [PATCH 16/27] Add disable=broad-exception-caught for pyling

---
 src/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main.py b/src/main.py
index 646d672..f25cf25 100644
--- a/src/main.py
+++ b/src/main.py
@@ -785,7 +785,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
             if simple_response.text:
                 return f"Ось загальна інформація: {simple_response.text.strip()}"
             else:
-                raise Exception("Вибачте, не можу надати детальну відповідь на це питання.")
+                raise Exception("Вибачте, не можу надати детальну відповідь на це питання.")  # pylint: disable=broad-exception-raised
         elif response.text:
             # Remove Markdown formatting
             bot_response = response.text.strip()
@@ -793,9 +793,9 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
             bot_response = bot_response.replace('*', '').replace('`', '').replace('#', '')
             return bot_response
         else:
-            raise Exception("Вибачте, я не можу згенерувати відповідь.")
+            raise Exception("Вибачте, я не можу згенерувати відповідь.")  # pylint: disable=broad-exception-raised
     else:
-        raise Exception("Вибачте, я не можу згенерувати відповідь.")
+        raise Exception("Вибачте, я не можу згенерувати відповідь.")  # pylint: disable=broad-exception-raised
 
 
 async def cleanup_stale_users():

From 28bb1c7a7e19e0537f8c36f2e2d38a09459464ab Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:23:31 +0100
Subject: [PATCH 17/27] Fix linter

---
 src/main.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main.py b/src/main.py
index f25cf25..843c057 100644
--- a/src/main.py
+++ b/src/main.py
@@ -785,7 +785,9 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
             if simple_response.text:
                 return f"Ось загальна інформація: {simple_response.text.strip()}"
             else:
-                raise Exception("Вибачте, не можу надати детальну відповідь на це питання.")  # pylint: disable=broad-exception-raised
+                raise Exception(
+                    "Вибачте, не можу надати детальну відповідь на це питання."
+                )  # pylint: disable=broad-exception-raised
         elif response.text:
             # Remove Markdown formatting
             bot_response = response.text.strip()

From a8dd712c009ef9b2a30cdf4dd45976ba4b9401dd Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:26:47 +0100
Subject: [PATCH 18/27] Fix cleanup task initialization

---
 src/main.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/main.py b/src/main.py
index 843c057..8d760a7 100644
--- a/src/main.py
+++ b/src/main.py
@@ -785,9 +785,9 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
             if simple_response.text:
                 return f"Ось загальна інформація: {simple_response.text.strip()}"
             else:
-                raise Exception(
+                raise Exception(  # pylint: disable=broad-exception-raised
                     "Вибачте, не можу надати детальну відповідь на це питання."
-                )  # pylint: disable=broad-exception-raised
+                )
         elif response.text:
             # Remove Markdown formatting
             bot_response = response.text.strip()
@@ -857,8 +857,11 @@ def main():
     # This handler will receive every error which happens in your bot
     application.add_error_handler(error_handler)
 
-    # Start cleanup task
-    asyncio.create_task(cleanup_stale_users())
+    # Start cleanup task after event loop is running
+    async def post_init(app):
+        asyncio.create_task(cleanup_stale_users())
+
+    application.post_init = post_init
 
     info("Bot started. Ctrl+C to stop")
     application.run_polling()

From 3be82ccca77489be4159dfc5e20a8db464c2ebb3 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:37:30 +0100
Subject: [PATCH 19/27] feat: Add SQLite persistence for user data across
 restarts

---
 .gitignore         |  5 +++
 Dockerfile         |  3 ++
 README.md          | 10 +++++-
 docker-compose.yml |  5 +++
 src/db_storage.py  | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 src/main.py        | 34 +++++++++++++++---
 6 files changed, 140 insertions(+), 6 deletions(-)
 create mode 100644 src/db_storage.py

diff --git a/.gitignore b/.gitignore
index cad404f..5816b9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,11 @@
 # instagram_cookies.txt should not be tracked by git because it has cookies
 instagram_cookies.txt
 
+# SQLite database
+src/data/
+*.db
+*.db-journal
+
 # Byte-compiled / optimized / compiled Python files
 __pycache__/
 *.py[cod]
diff --git a/Dockerfile b/Dockerfile
index 16493e3..bc4049f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,6 +19,9 @@ COPY src /bot
 
 WORKDIR /bot
 
+# Create data directory for SQLite database
+RUN mkdir -p /bot/data
+
 # https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
 ENV PYTHONUNBUFFERED=1
 
diff --git a/README.md b/README.md
index ca3a9eb..f430c61 100644
--- a/README.md
+++ b/README.md
@@ -27,13 +27,21 @@ docker build . -t downloader-bot:latest
 ```
 docker run -d --name downloader-bot --restart always --env-file .env downloader-bot:latest
 ```
+To persist user data (conversation history, rate limits) between restarts, add a volume:
+```
+docker run -d --name downloader-bot --restart always --env-file .env -v bot-data:/bot/data downloader-bot:latest
+```
 or use a built image from **Docker hub**
 ```
 docker run -d --name downloader-bot --restart always --env-file .env ovchynnikov/load-bot-linux:latest
 ```
+With persistent data:
+```
+docker run -d --name downloader-bot --restart always --env-file .env -v bot-data:/bot/data ovchynnikov/load-bot-linux:latest
+```
 or if you use instagram cookies
 ```
-docker run -d --name downloader-bot --restart always --env-file .env -v /absolute/path/to/instagram_cookies.txt:/bot/instagram_cookies.txt ovchynnikov/load-bot-linux:latest
+docker run -d --name downloader-bot --restart always --env-file .env -v bot-data:/bot/data -v /absolute/path/to/instagram_cookies.txt:/bot/instagram_cookies.txt ovchynnikov/load-bot-linux:latest
 ```
 or if you want use GPU power of intel chip and set USE_GPU_COMPRESSING=True variable
 ```
diff --git a/docker-compose.yml b/docker-compose.yml
index d1270b4..a1de621 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,8 +10,13 @@ services:
     restart: unless-stopped
     volumes:
       - ./src:/app:cached  # Use bind mount for development
+      - bot-data:/bot/data  # Persistent storage for SQLite database
     deploy:
       resources:
         limits:
           cpus: '1'
           memory: 512M
+
+volumes:
+  bot-data:
+    driver: local
diff --git a/src/db_storage.py b/src/db_storage.py
new file mode 100644
index 0000000..b5ee242
--- /dev/null
+++ b/src/db_storage.py
@@ -0,0 +1,89 @@
+"""SQLite storage for bot user data persistence."""
+
+import sqlite3
+import json
+import os
+from logger import debug, error
+
+
+class BotStorage:
+    """Handles persistent storage of user data in SQLite."""
+
+    def __init__(self, db_path="data/bot.db"):
+        """Initialize database connection and create tables."""
+        os.makedirs(os.path.dirname(db_path), exist_ok=True)
+        self.db_path = db_path
+        self.conn = sqlite3.connect(db_path, check_same_thread=False)
+        self._create_tables()
+        debug("Database initialized at %s", db_path)
+
+    def _create_tables(self):
+        """Create tables if they don't exist."""
+        cursor = self.conn.cursor()
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS user_data (
+                user_id INTEGER PRIMARY KEY,
+                conversation_context TEXT,
+                rate_limit_timestamps TEXT,
+                daily_count INTEGER DEFAULT 0,
+                daily_date TEXT,
+                last_seen REAL
+            )
+        """)
+        self.conn.commit()
+
+    def load_user_data(self, user_id):
+        """Load user data from database."""
+        cursor = self.conn.cursor()
+        cursor.execute("SELECT * FROM user_data WHERE user_id = ?", (user_id,))
+        row = cursor.fetchone()
+        if row:
+            return {
+                "conversation_context": json.loads(row[1]) if row[1] else [],
+                "rate_limit_timestamps": json.loads(row[2]) if row[2] else [],
+                "daily_count": row[3],
+                "daily_date": row[4],
+                "last_seen": row[5],
+            }
+        return None
+
+    def save_user_data(self, user_id, conversation_context, rate_limit_timestamps, daily_count, daily_date, last_seen):
+        """Save user data to database."""
+        cursor = self.conn.cursor()
+        cursor.execute(
+            """
+            INSERT OR REPLACE INTO user_data 
+            (user_id, conversation_context, rate_limit_timestamps, daily_count, daily_date, last_seen)
+            VALUES (?, ?, ?, ?, ?, ?)
+            """,
+            (
+                user_id,
+                json.dumps(conversation_context),
+                json.dumps(rate_limit_timestamps),
+                daily_count,
+                daily_date,
+                last_seen,
+            ),
+        )
+        self.conn.commit()
+
+    def delete_user_data(self, user_id):
+        """Delete user data from database."""
+        cursor = self.conn.cursor()
+        cursor.execute("DELETE FROM user_data WHERE user_id = ?", (user_id,))
+        self.conn.commit()
+
+    def get_stale_users(self, ttl_seconds):
+        """Get list of user IDs that haven't been seen within TTL."""
+        import time
+
+        current_time = time.time()
+        cursor = self.conn.cursor()
+        cursor.execute(
+            "SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,)
+        )
+        return [row[0] for row in cursor.fetchall()]
+
+    def close(self):
+        """Close database connection."""
+        self.conn.close()
diff --git a/src/main.py b/src/main.py
index 8d760a7..1b42644 100644
--- a/src/main.py
+++ b/src/main.py
@@ -21,6 +21,7 @@
 from general_error_handler import error_handler
 from permissions import inform_user_not_allowed, is_user_or_chat_not_allowed, supported_sites
 from cleanup import cleanup
+from db_storage import BotStorage
 from video_utils import (
     compress_video,
     download_media,
@@ -77,6 +78,9 @@
 # Allowed LLM providers
 ALLOWED_PROVIDERS = {"grok", "gemini"}
 
+# Initialize database storage
+db_storage = BotStorage()
+
 
 # Cache responses from JSON file
 @lru_cache(maxsize=1)
@@ -546,6 +550,15 @@ async def respond_with_llm_message(update):
     # Update last seen timestamp
     user_last_seen[user_id] = current_time
 
+    # Load user data from database on first access
+    if user_id not in llm_daily_limit:
+        user_data = db_storage.load_user_data(user_id)
+        if user_data:
+            conversation_context[user_id] = user_data["conversation_context"]
+            llm_rate_limit[user_id] = user_data["rate_limit_timestamps"]
+            llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]}
+            user_last_seen[user_id] = user_data["last_seen"]
+
     # Clean old timestamps (older than 60 seconds)
     llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60]
 
@@ -651,6 +664,16 @@ async def respond_with_llm_message(update):
             if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES:
                 conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:]
 
+        # Save user data to database
+        db_storage.save_user_data(
+            user_id,
+            conversation_context[user_id],
+            llm_rate_limit[user_id],
+            llm_daily_limit[user_id]["count"],
+            llm_daily_limit[user_id]["date"],
+            user_last_seen[user_id],
+        )
+
         await update.message.reply_text(bot_response)
 
     except Exception as e:  # pylint: disable=broad-except
@@ -801,17 +824,16 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
 
 
 async def cleanup_stale_users():
-    """Remove inactive users from memory to prevent unbounded growth."""
+    """Remove inactive users from memory and database to prevent unbounded growth."""
     while True:
         await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600)
-        current_time = time.time()
         ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400
 
-        stale_users = [
-            user_id for user_id, last_seen in user_last_seen.items() if current_time - last_seen > ttl_seconds
-        ]
+        # Get stale users from database
+        stale_users = db_storage.get_stale_users(ttl_seconds)
 
         for user_id in stale_users:
+            # Remove from memory
             if user_id in conversation_context:
                 del conversation_context[user_id]
             if user_id in llm_rate_limit:
@@ -820,6 +842,8 @@ async def cleanup_stale_users():
                 del llm_daily_limit[user_id]
             if user_id in user_last_seen:
                 del user_last_seen[user_id]
+            # Remove from database
+            db_storage.delete_user_data(user_id)
 
         if stale_users:
             info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS)

From 47b25a6eb17769846a0e33697c0e696c13504b0b Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:38:49 +0100
Subject: [PATCH 20/27] Fix linter

---
 src/db_storage.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/db_storage.py b/src/db_storage.py
index b5ee242..0bd6f88 100644
--- a/src/db_storage.py
+++ b/src/db_storage.py
@@ -79,9 +79,7 @@ def get_stale_users(self, ttl_seconds):
 
         current_time = time.time()
         cursor = self.conn.cursor()
-        cursor.execute(
-            "SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,)
-        )
+        cursor.execute("SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,))
         return [row[0] for row in cursor.fetchall()]
 
     def close(self):

From 1de8512f1e477ee75643bdd647394b7f88a2a128 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:40:45 +0100
Subject: [PATCH 21/27] Fix linter

---
 src/db_storage.py | 2 +-
 src/main.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/db_storage.py b/src/db_storage.py
index 0bd6f88..65b5df7 100644
--- a/src/db_storage.py
+++ b/src/db_storage.py
@@ -3,7 +3,7 @@
 import sqlite3
 import json
 import os
-from logger import debug, error
+from logger import debug
 
 
 class BotStorage:
diff --git a/src/main.py b/src/main.py
index 1b42644..e35ede7 100644
--- a/src/main.py
+++ b/src/main.py
@@ -882,7 +882,7 @@ def main():
     application.add_error_handler(error_handler)
 
     # Start cleanup task after event loop is running
-    async def post_init(app):
+    async def post_init(app):  # pylint: disable=unused-argument
         asyncio.create_task(cleanup_stale_users())
 
     application.post_init = post_init

From dbdf3aab17621f693bdcc928962c4241098f3930 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:52:32 +0100
Subject: [PATCH 22/27] debug: Add detailed logging for SQLite database
 operations

---
 DOKKU_STORAGE.md | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 src/main.py      |   9 ++++
 2 files changed, 113 insertions(+)
 create mode 100644 DOKKU_STORAGE.md

diff --git a/DOKKU_STORAGE.md b/DOKKU_STORAGE.md
new file mode 100644
index 0000000..6c28c87
--- /dev/null
+++ b/DOKKU_STORAGE.md
@@ -0,0 +1,104 @@
+# Dokku Deployment with Persistent Storage
+
+## Problem
+SQLite database is stored in `/bot/data/bot.db` inside the container, but without persistent storage it gets deleted on every deployment/restart.
+
+## Solution
+Create a persistent storage mount in Dokku to preserve the database between deployments.
+
+## Setup Commands
+
+```bash
+# 1. Create persistent storage directory on host
+dokku storage:ensure-directory insta-bot
+
+# 2. Mount the storage to container's /bot/data directory
+dokku storage:mount insta-bot /var/lib/dokku/data/storage/insta-bot:/bot/data
+
+# 3. Verify the mount
+dokku storage:report insta-bot
+
+# 4. Rebuild and restart the app
+dokku ps:rebuild insta-bot
+```
+
+## Verify It Works
+
+After deployment, check the logs:
+```bash
+dokku logs insta-bot -t
+```
+
+You should see:
+```
+Database initialized at data/bot.db
+```
+
+Then test by:
+1. Send: `ботяра, привіт`
+2. Send: `ботяра, який мій попередній запит?`
+3. Bot should remember the conversation
+
+After restart:
+```bash
+dokku ps:restart insta-bot
+```
+
+The conversation context should persist.
+
+## Check Database File
+
+```bash
+# SSH into the container
+dokku enter insta-bot web
+
+# Check if database exists
+ls -lh /bot/data/
+cat /bot/data/bot.db  # Should show binary data
+
+# Exit container
+exit
+```
+
+## Troubleshooting
+
+### Database not persisting
+```bash
+# Check if mount exists
+dokku storage:report insta-bot
+
+# Should show:
+# Storage mount:  /var/lib/dokku/data/storage/insta-bot:/bot/data
+```
+
+### Permission issues
+```bash
+# Fix permissions on host
+sudo chown -R dokku:dokku /var/lib/dokku/data/storage/insta-bot
+sudo chmod -R 755 /var/lib/dokku/data/storage/insta-bot
+```
+
+### Check logs for database operations
+```bash
+# Enable DEBUG logging
+dokku config:set insta-bot LOG_LEVEL=DEBUG
+
+# Watch logs
+dokku logs insta-bot -t
+```
+
+Look for:
+- `Loading user data from database for user_id: XXX`
+- `Found user data in database: context=X messages`
+- `Saving user data to database: user_id=XXX`
+
+## Backup Database
+
+```bash
+# Backup
+sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db /var/lib/dokku/data/storage/insta-bot/bot.db.backup
+
+# Restore
+sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db.backup /var/lib/dokku/data/storage/insta-bot/bot.db
+dokku ps:restart insta-bot
+```
diff --git a/src/main.py b/src/main.py
index e35ede7..c23a546 100644
--- a/src/main.py
+++ b/src/main.py
@@ -552,12 +552,18 @@ async def respond_with_llm_message(update):
 
     # Load user data from database on first access
     if user_id not in llm_daily_limit:
+        debug("Loading user data from database for user_id: %s", user_id)
         user_data = db_storage.load_user_data(user_id)
         if user_data:
+            debug("Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s",
+                  len(user_data["conversation_context"]), len(user_data["rate_limit_timestamps"]),
+                  user_data["daily_count"], user_data["daily_date"])
             conversation_context[user_id] = user_data["conversation_context"]
             llm_rate_limit[user_id] = user_data["rate_limit_timestamps"]
             llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]}
             user_last_seen[user_id] = user_data["last_seen"]
+        else:
+            debug("No existing data found in database for user_id: %s", user_id)
 
     # Clean old timestamps (older than 60 seconds)
     llm_rate_limit[user_id] = [t for t in llm_rate_limit[user_id] if current_time - t < 60]
@@ -665,6 +671,9 @@ async def respond_with_llm_message(update):
                 conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:]
 
         # Save user data to database
+        debug("Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s",
+              user_id, len(conversation_context[user_id]),
+              llm_daily_limit[user_id]["count"], llm_daily_limit[user_id]["date"])
         db_storage.save_user_data(
             user_id,
             conversation_context[user_id],

From deababe5f9e72a81edb27f550e4ccc7a5394a197 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 00:54:13 +0100
Subject: [PATCH 23/27] Fix linter

---
 src/main.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/main.py b/src/main.py
index c23a546..bdfb4f2 100644
--- a/src/main.py
+++ b/src/main.py
@@ -555,9 +555,13 @@ async def respond_with_llm_message(update):
         debug("Loading user data from database for user_id: %s", user_id)
         user_data = db_storage.load_user_data(user_id)
         if user_data:
-            debug("Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s",
-                  len(user_data["conversation_context"]), len(user_data["rate_limit_timestamps"]),
-                  user_data["daily_count"], user_data["daily_date"])
+            debug(
+                "Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s",
+                len(user_data["conversation_context"]),
+                len(user_data["rate_limit_timestamps"]),
+                user_data["daily_count"],
+                user_data["daily_date"],
+            )
             conversation_context[user_id] = user_data["conversation_context"]
             llm_rate_limit[user_id] = user_data["rate_limit_timestamps"]
             llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]}
@@ -671,9 +675,13 @@ async def respond_with_llm_message(update):
                 conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:]
 
         # Save user data to database
-        debug("Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s",
-              user_id, len(conversation_context[user_id]),
-              llm_daily_limit[user_id]["count"], llm_daily_limit[user_id]["date"])
+        debug(
+            "Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s",
+            user_id,
+            len(conversation_context[user_id]),
+            llm_daily_limit[user_id]["count"],
+            llm_daily_limit[user_id]["date"],
+        )
         db_storage.save_user_data(
             user_id,
             conversation_context[user_id],

From 1696d2d00e228476714fdca5540908e028eeff91 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 18:59:42 +0100
Subject: [PATCH 24/27] Update dependencies

---
 DOKKU_STORAGE.md     | 104 -------------------------------------------
 src/requirements.txt |  12 ++---
 2 files changed, 6 insertions(+), 110 deletions(-)
 delete mode 100644 DOKKU_STORAGE.md

diff --git a/DOKKU_STORAGE.md b/DOKKU_STORAGE.md
deleted file mode 100644
index 6c28c87..0000000
--- a/DOKKU_STORAGE.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# Dokku Deployment with Persistent Storage
-
-## Problem
-SQLite database is stored in `/bot/data/bot.db` inside the container, but without persistent storage it gets deleted on every deployment/restart.
-
-## Solution
-Create a persistent storage mount in Dokku to preserve the database between deployments.
-
-## Setup Commands
-
-```bash
-# 1. Create persistent storage directory on host
-dokku storage:ensure-directory insta-bot
-
-# 2. Mount the storage to container's /bot/data directory
-dokku storage:mount insta-bot /var/lib/dokku/data/storage/insta-bot:/bot/data
-
-# 3. Verify the mount
-dokku storage:report insta-bot
-
-# 4. Rebuild and restart the app
-dokku ps:rebuild insta-bot
-```
-
-## Verify It Works
-
-After deployment, check the logs:
-```bash
-dokku logs insta-bot -t
-```
-
-You should see:
-```
-Database initialized at data/bot.db
-```
-
-Then test by:
-1. Send: `ботяра, привіт`
-2. Send: `ботяра, який мій попередній запит?`
-3. Bot should remember the conversation
-
-After restart:
-```bash
-dokku ps:restart insta-bot
-```
-
-The conversation context should persist.
-
-## Check Database File
-
-```bash
-# SSH into the container
-dokku enter insta-bot web
-
-# Check if database exists
-ls -lh /bot/data/
-cat /bot/data/bot.db  # Should show binary data
-
-# Exit container
-exit
-```
-
-## Troubleshooting
-
-### Database not persisting
-```bash
-# Check if mount exists
-dokku storage:report insta-bot
-
-# Should show:
-# Storage mount:  /var/lib/dokku/data/storage/insta-bot:/bot/data
-```
-
-### Permission issues
-```bash
-# Fix permissions on host
-sudo chown -R dokku:dokku /var/lib/dokku/data/storage/insta-bot
-sudo chmod -R 755 /var/lib/dokku/data/storage/insta-bot
-```
-
-### Check logs for database operations
-```bash
-# Enable DEBUG logging
-dokku config:set insta-bot LOG_LEVEL=DEBUG
-
-# Watch logs
-dokku logs insta-bot -t
-```
-
-Look for:
-- `Loading user data from database for user_id: XXX`
-- `Found user data in database: context=X messages`
-- `Saving user data to database: user_id=XXX`
-
-## Backup Database
-
-```bash
-# Backup
-sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db /var/lib/dokku/data/storage/insta-bot/bot.db.backup
-
-# Restore
-sudo cp /var/lib/dokku/data/storage/insta-bot/bot.db.backup /var/lib/dokku/data/storage/insta-bot/bot.db
-dokku ps:restart insta-bot
-```
diff --git a/src/requirements.txt b/src/requirements.txt
index 2700381..244b45a 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -1,7 +1,7 @@
-python-telegram-bot[ext]==22.6
-python-dotenv==1.2.1
-yt-dlp==2026.2.21
-gallery-dl==1.31.6
-aiohttp==3.13.3
+python-telegram-bot[ext]>=22.6
+python-dotenv>=1.2.2
+yt-dlp>=2026.3.3
+gallery-dl>=1.31.7
+aiohttp>=3.13.3
 google-generativeai>=0.8.6
-openai>=1.0.0
+openai>=2.24.0

From 5e0b958ccdb66ea9878e683fbca9b1c2e10773d6 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 19:22:26 +0100
Subject: [PATCH 25/27] fix: Apply code review improvements

- Add DB index on last_seen for efficient cleanup
- Make all DB calls async with asyncio.to_thread
- Fix stale timestamp handling from DB
- Add dynamic localization for LLM prompts (uk/en)
- Add proper cleanup task cancellation on shutdown
---
 src/db_storage.py |  1 +
 src/main.py       | 55 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/db_storage.py b/src/db_storage.py
index 65b5df7..4f4dc7a 100644
--- a/src/db_storage.py
+++ b/src/db_storage.py
@@ -30,6 +30,7 @@ def _create_tables(self):
                 last_seen REAL
             )
         """)
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_user_data_last_seen ON user_data(last_seen)")
         self.conn.commit()
 
     def load_user_data(self, user_id):
diff --git a/src/main.py b/src/main.py
index bdfb4f2..1c26131 100644
--- a/src/main.py
+++ b/src/main.py
@@ -81,6 +81,9 @@
 # Initialize database storage
 db_storage = BotStorage()
 
+# Cleanup task reference
+cleanup_task = None
+
 
 # Cache responses from JSON file
 @lru_cache(maxsize=1)
@@ -553,7 +556,7 @@ async def respond_with_llm_message(update):
     # Load user data from database on first access
     if user_id not in llm_daily_limit:
         debug("Loading user data from database for user_id: %s", user_id)
-        user_data = db_storage.load_user_data(user_id)
+        user_data = await asyncio.to_thread(db_storage.load_user_data, user_id)
         if user_data:
             debug(
                 "Found user data in database: context=%d messages, rate_limit=%d timestamps, daily=%d/%s",
@@ -565,7 +568,9 @@ async def respond_with_llm_message(update):
             conversation_context[user_id] = user_data["conversation_context"]
             llm_rate_limit[user_id] = user_data["rate_limit_timestamps"]
             llm_daily_limit[user_id] = {"count": user_data["daily_count"], "date": user_data["daily_date"]}
-            user_last_seen[user_id] = user_data["last_seen"]
+            # Only update last_seen if DB value is newer
+            if user_id not in user_last_seen or user_data["last_seen"] > user_last_seen[user_id]:
+                user_last_seen[user_id] = user_data["last_seen"]
         else:
             debug("No existing data found in database for user_id: %s", user_id)
 
@@ -646,11 +651,28 @@ async def respond_with_llm_message(update):
             context_messages = []
 
         # Create prompt with context if available
+        if language == "uk":
+            user_label = "Користувач"
+            assistant_label = "Асистент"
+            instruction = "Відповідай українською мовою як дружній асистент. Не вітайся і не прощайся."
+        else:
+            user_label = "User"
+            assistant_label = "Assistant"
+            instruction = "Answer in English as a friendly assistant. Don't greet or say goodbye."
+
         if context_messages:
-            context_str = "\n".join([f"Користувач: {msg}\nАсистент: {resp}" for msg, resp in context_messages])
-            safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\nВідповідай українською мовою як дружній асистент. Не вітайся і не прощайся."
+            context_str = "\n".join(
+                [f"{user_label}: {msg}\n{assistant_label}: {resp}" for msg, resp in context_messages]
+            )
+            if language == "uk":
+                safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\n{instruction}"
+            else:
+                safe_prompt = f"Previous conversation:\n{context_str}\n\nCurrent user question: {prompt}\n\n{instruction}"
         else:
-            safe_prompt = f"Відповідай українською мовою як дружній асистент. Не вітайся і не прощайся. Питання користувача: {prompt}"
+            if language == "uk":
+                safe_prompt = f"{instruction} Питання користувача: {prompt}"
+            else:
+                safe_prompt = f"{instruction} User question: {prompt}"
 
         debug("Modified safe prompt with context: %s", safe_prompt[:200])
 
@@ -682,7 +704,8 @@ async def respond_with_llm_message(update):
             llm_daily_limit[user_id]["count"],
             llm_daily_limit[user_id]["date"],
         )
-        db_storage.save_user_data(
+        await asyncio.to_thread(
+            db_storage.save_user_data,
             user_id,
             conversation_context[user_id],
             llm_rate_limit[user_id],
@@ -847,7 +870,7 @@ async def cleanup_stale_users():
         ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400
 
         # Get stale users from database
-        stale_users = db_storage.get_stale_users(ttl_seconds)
+        stale_users = await asyncio.to_thread(db_storage.get_stale_users, ttl_seconds)
 
         for user_id in stale_users:
             # Remove from memory
@@ -860,7 +883,7 @@ async def cleanup_stale_users():
             if user_id in user_last_seen:
                 del user_last_seen[user_id]
             # Remove from database
-            db_storage.delete_user_data(user_id)
+            await asyncio.to_thread(db_storage.delete_user_data, user_id)
 
         if stale_users:
             info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS)
@@ -892,6 +915,8 @@ def main():
     Returns:
         None
     """
+    global cleanup_task  # pylint: disable=global-statement
+
     bot_token = os.getenv("BOT_TOKEN")
     application = Application.builder().token(bot_token).build()
     application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
@@ -900,9 +925,21 @@ def main():
 
     # Start cleanup task after event loop is running
     async def post_init(app):  # pylint: disable=unused-argument
-        asyncio.create_task(cleanup_stale_users())
+        global cleanup_task  # pylint: disable=global-statement
+        cleanup_task = asyncio.create_task(cleanup_stale_users())
+
+    # Cancel cleanup task on shutdown
+    async def post_shutdown(app):  # pylint: disable=unused-argument
+        global cleanup_task  # pylint: disable=global-statement
+        if cleanup_task is not None:
+            cleanup_task.cancel()
+            try:
+                await cleanup_task
+            except asyncio.CancelledError:
+                pass
 
     application.post_init = post_init
+    application.post_shutdown = post_shutdown
 
     info("Bot started. Ctrl+C to stop")
     application.run_polling()

From 8e893475a46d6d213606f1ae362f0adcaa7c9554 Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 19:32:05 +0100
Subject: [PATCH 26/27] fix: Apply critical code review fixes

- Close DB connection on shutdown
- Initialize response variable before retry loop
- Localize Gemini fallback instruction
- Move time import to module level in db_storage
---
 src/db_storage.py |  3 +--
 src/main.py       | 43 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/src/db_storage.py b/src/db_storage.py
index 4f4dc7a..2ecfdb4 100644
--- a/src/db_storage.py
+++ b/src/db_storage.py
@@ -3,6 +3,7 @@
 import sqlite3
 import json
 import os
+import time
 from logger import debug
 
 
@@ -76,8 +77,6 @@ def delete_user_data(self, user_id):
 
     def get_stale_users(self, ttl_seconds):
         """Get list of user IDs that haven't been seen within TTL."""
-        import time
-
         current_time = time.time()
         cursor = self.conn.cursor()
         cursor.execute("SELECT user_id FROM user_data WHERE last_seen < ?", (current_time - ttl_seconds,))
diff --git a/src/main.py b/src/main.py
index 1c26131..0d7e26e 100644
--- a/src/main.py
+++ b/src/main.py
@@ -665,9 +665,13 @@ async def respond_with_llm_message(update):
                 [f"{user_label}: {msg}\n{assistant_label}: {resp}" for msg, resp in context_messages]
             )
             if language == "uk":
-                safe_prompt = f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\n{instruction}"
+                safe_prompt = (
+                    f"Попередня розмова:\n{context_str}\n\nПоточне питання користувача: {prompt}\n\n{instruction}"
+                )
             else:
-                safe_prompt = f"Previous conversation:\n{context_str}\n\nCurrent user question: {prompt}\n\n{instruction}"
+                safe_prompt = (
+                    f"Previous conversation:\n{context_str}\n\nCurrent user question: {prompt}\n\n{instruction}"
+                )
         else:
             if language == "uk":
                 safe_prompt = f"{instruction} Питання користувача: {prompt}"
@@ -797,6 +801,7 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
 
     max_retries = 2
     retry_delay = 60
+    response = None
 
     for attempt in range(max_retries):
         try:
@@ -833,6 +838,17 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
                 await asyncio.sleep(retry_delay)
             else:
                 raise
+
+    # Check if response was set after retries
+    if response is None:
+        fail_msg = (
+            "Вибачте, не вдалося отримати відповідь. Спробуйте пізніше."
+            if language == "uk"
+            else "Sorry, failed to get a response. Please try again later."
+        )
+        await update.message.reply_text(fail_msg)
+        raise Exception("Failed to get response after retries")  # pylint: disable=broad-exception-raised
+
     if hasattr(response, 'candidates') and response.candidates:
         candidate = response.candidates[0]
         debug("Response candidate finish_reason: %s", getattr(candidate, 'finish_reason', 'None'))
@@ -840,17 +856,26 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
 
         if hasattr(candidate, 'finish_reason') and candidate.finish_reason == 2:
             debug("Safety filter triggered - finish_reason: 2, trying simpler approach")
+            fallback_instruction = (
+                "Відповідь українською мовою: дай загальну інформацію про: "
+                if language == "uk"
+                else "Answer in English: give general information about: "
+            )
             simple_response = await asyncio.to_thread(
                 model.generate_content,
-                "Відповідь українською мовою: дай загальну інформацію про: " + prompt,
+                fallback_instruction + prompt,
                 safety_settings=safety_settings,
             )
             if simple_response.text:
-                return f"Ось загальна інформація: {simple_response.text.strip()}"
+                prefix = "Ось загальна інформація: " if language == "uk" else "Here's general information: "
+                return f"{prefix}{simple_response.text.strip()}"
             else:
-                raise Exception(  # pylint: disable=broad-exception-raised
+                error_msg = (
                     "Вибачте, не можу надати детальну відповідь на це питання."
+                    if language == "uk"
+                    else "Sorry, I can't provide a detailed answer to this question."
                 )
+                raise Exception(error_msg)  # pylint: disable=broad-exception-raised
         elif response.text:
             # Remove Markdown formatting
             bot_response = response.text.strip()
@@ -928,7 +953,7 @@ async def post_init(app):  # pylint: disable=unused-argument
         global cleanup_task  # pylint: disable=global-statement
         cleanup_task = asyncio.create_task(cleanup_stale_users())
 
-    # Cancel cleanup task on shutdown
+    # Cancel cleanup task and close DB on shutdown
     async def post_shutdown(app):  # pylint: disable=unused-argument
         global cleanup_task  # pylint: disable=global-statement
         if cleanup_task is not None:
@@ -937,6 +962,12 @@ async def post_shutdown(app):  # pylint: disable=unused-argument
                 await cleanup_task
             except asyncio.CancelledError:
                 pass
+        # Close database connection
+        try:
+            db_storage.close()
+            debug("Database connection closed")
+        except Exception as e:  # pylint: disable=broad-except
+            error("Error closing database: %s", e)
 
     application.post_init = post_init
     application.post_shutdown = post_shutdown

From 9f15bb886b4709fef826ffd44e85075ddc6cd53e Mon Sep 17 00:00:00 2001
From: avelytchko <919635+avelytchko@users.noreply.github.com>
Date: Thu, 5 Mar 2026 19:44:13 +0100
Subject: [PATCH 27/27] fix: Apply final code review improvements

- Make DB persistence best-effort (send reply first)
- Remove duplicate error message from call_gemini_api
- Protect cleanup loop from crashes with exception handling
---
 src/main.py | 99 +++++++++++++++++++++++++++--------------------------
 1 file changed, 51 insertions(+), 48 deletions(-)

diff --git a/src/main.py b/src/main.py
index 0d7e26e..d3d070e 100644
--- a/src/main.py
+++ b/src/main.py
@@ -700,26 +700,33 @@ async def respond_with_llm_message(update):
             if len(conversation_context[user_id]) > MAX_CONTEXT_MESSAGES:
                 conversation_context[user_id] = conversation_context[user_id][-MAX_CONTEXT_MESSAGES:]
 
-        # Save user data to database
-        debug(
-            "Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s",
-            user_id,
-            len(conversation_context[user_id]),
-            llm_daily_limit[user_id]["count"],
-            llm_daily_limit[user_id]["date"],
-        )
-        await asyncio.to_thread(
-            db_storage.save_user_data,
-            user_id,
-            conversation_context[user_id],
-            llm_rate_limit[user_id],
-            llm_daily_limit[user_id]["count"],
-            llm_daily_limit[user_id]["date"],
-            user_last_seen[user_id],
-        )
-
+        # Send reply first, then save to DB (best-effort persistence)
         await update.message.reply_text(bot_response)
 
+        # Save user data to database (best-effort, don't fail on DB errors)
+        async def save_to_db():
+            try:
+                debug(
+                    "Saving user data to database: user_id=%s, context=%d messages, daily=%d/%s",
+                    user_id,
+                    len(conversation_context[user_id]),
+                    llm_daily_limit[user_id]["count"],
+                    llm_daily_limit[user_id]["date"],
+                )
+                await asyncio.to_thread(
+                    db_storage.save_user_data,
+                    user_id,
+                    conversation_context[user_id],
+                    llm_rate_limit[user_id],
+                    llm_daily_limit[user_id]["count"],
+                    llm_daily_limit[user_id]["date"],
+                    user_last_seen[user_id],
+                )
+            except Exception as db_error:  # pylint: disable=broad-except
+                error("Failed to save user data to database: %s", db_error)
+
+        asyncio.create_task(save_to_db())
+
     except Exception as e:  # pylint: disable=broad-except
         # Remove tentative timestamp on failure
         if llm_rate_limit[user_id] and llm_rate_limit[user_id][-1] == current_time:
@@ -841,12 +848,6 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
 
     # Check if response was set after retries
     if response is None:
-        fail_msg = (
-            "Вибачте, не вдалося отримати відповідь. Спробуйте пізніше."
-            if language == "uk"
-            else "Sorry, failed to get a response. Please try again later."
-        )
-        await update.message.reply_text(fail_msg)
         raise Exception("Failed to get response after retries")  # pylint: disable=broad-exception-raised
 
     if hasattr(response, 'candidates') and response.candidates:
@@ -891,27 +892,32 @@ async def call_gemini_api(safe_prompt: str, prompt: str, update) -> str:
 async def cleanup_stale_users():
     """Remove inactive users from memory and database to prevent unbounded growth."""
     while True:
-        await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600)
-        ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400
-
-        # Get stale users from database
-        stale_users = await asyncio.to_thread(db_storage.get_stale_users, ttl_seconds)
-
-        for user_id in stale_users:
-            # Remove from memory
-            if user_id in conversation_context:
-                del conversation_context[user_id]
-            if user_id in llm_rate_limit:
-                del llm_rate_limit[user_id]
-            if user_id in llm_daily_limit:
-                del llm_daily_limit[user_id]
-            if user_id in user_last_seen:
-                del user_last_seen[user_id]
-            # Remove from database
-            await asyncio.to_thread(db_storage.delete_user_data, user_id)
-
-        if stale_users:
-            info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS)
+        try:
+            await asyncio.sleep(USER_CLEANUP_INTERVAL_HOURS * 3600)
+            ttl_seconds = USER_CLEANUP_TTL_DAYS * 86400
+
+            # Get stale users from database
+            stale_users = await asyncio.to_thread(db_storage.get_stale_users, ttl_seconds)
+
+            for user_id in stale_users:
+                # Remove from memory
+                if user_id in conversation_context:
+                    del conversation_context[user_id]
+                if user_id in llm_rate_limit:
+                    del llm_rate_limit[user_id]
+                if user_id in llm_daily_limit:
+                    del llm_daily_limit[user_id]
+                if user_id in user_last_seen:
+                    del user_last_seen[user_id]
+                # Remove from database
+                await asyncio.to_thread(db_storage.delete_user_data, user_id)
+
+            if stale_users:
+                info("Cleaned up %d inactive users (TTL: %d days)", len(stale_users), USER_CLEANUP_TTL_DAYS)
+        except Exception as cleanup_error:  # pylint: disable=broad-except
+            error("Error in cleanup_stale_users: %s", cleanup_error)
+            error("Full traceback: %s", traceback.format_exc())
+            await asyncio.sleep(60)  # Wait before retrying
 
 
 def main():
@@ -940,8 +946,6 @@ def main():
     Returns:
         None
     """
-    global cleanup_task  # pylint: disable=global-statement
-
     bot_token = os.getenv("BOT_TOKEN")
     application = Application.builder().token(bot_token).build()
     application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
@@ -955,7 +959,6 @@ async def post_init(app):  # pylint: disable=unused-argument
 
     # Cancel cleanup task and close DB on shutdown
     async def post_shutdown(app):  # pylint: disable=unused-argument
-        global cleanup_task  # pylint: disable=global-statement
         if cleanup_task is not None:
             cleanup_task.cancel()
             try: