From 4fa761dbadd829a8ac26835fcac7ecda88c98d81 Mon Sep 17 00:00:00 2001
From: Saurabh Puri <purisaurabh2885@gmail.com>
Date: Thu, 13 Nov 2025 12:25:43 +0530
Subject: [PATCH 1/6] feat : add lingo bot

---
 Bot/app/api/meetings.py                   | 43 +++++++++-
 attendee/bots/tasks/sync_calendar_task.py | 98 ++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 4 deletions(-)

diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py
index 40747b0..5faabbd 100644
--- a/Bot/app/api/meetings.py
+++ b/Bot/app/api/meetings.py
@@ -14,6 +14,8 @@
 import json
 from app.core import config
 import os
+import re
+import html
 
 # Now you can access the values like this:
 CLIENT_ID = os.getenv("CLIENT_ID")
@@ -70,8 +72,47 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)):
     events = events_result.get('items', [])
     scheduled_meetings = []
     meetings_map = {}
+    URL_RE = re.compile(r'https?://[^\s<>"\']+', re.IGNORECASE)
+
+    def extract_meeting_url_from_text(text: str):
+        if not text:
+            return None
+        # look for normal https links first
+        m = URL_RE.search(text)
+        if m:
+            return m.group(0).rstrip('>')
+        # fallback: scheme-less hosts like "zoom.us/j/..."
+        for pat in [r'(?:[\w.-]+\.)?zoom\.us/\S+', r'meet\.google\.com/\S+', r'teams\.microsoft\.com/\S+']:
+            m2 = re.search(pat, text, flags=re.IGNORECASE)
+            if m2:
+                candidate = m2.group(0)
+                if not candidate.lower().startswith('http'):
+                    candidate = 'https://' + candidate
+                return candidate
+        return None
+
     for event in events:
-        meeting_url = event.get('hangoutLink')
+        # Prefer structured conferenceData entryPoints (video) when available
+        meeting_url = None
+        conf = event.get('conferenceData')
+        if conf:
+            entry_points = conf.get('entryPoints', [])
+            for ep in entry_points:
+                if ep.get('entryPointType') == 'video' and ep.get('uri'):
+                    meeting_url = ep.get('uri')
+                    break
+        # fallbacks
+        if not meeting_url:
+            meeting_url = event.get('hangoutLink')
+        if not meeting_url:
+            # check location, description, summary for embedded links
+            for field in ('location', 'description', 'summary'):
+                val = event.get(field)
+                if val:
+                    val = html.unescape(val)
+                    meeting_url = extract_meeting_url_from_text(val)
+                    if meeting_url:
+                        break
 
         # check is bot request is aleady sent for the meeting
         json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY)
diff --git a/attendee/bots/tasks/sync_calendar_task.py b/attendee/bots/tasks/sync_calendar_task.py
index 85645c6..cc85ab2 100644
--- a/attendee/bots/tasks/sync_calendar_task.py
+++ b/attendee/bots/tasks/sync_calendar_task.py
@@ -7,6 +7,7 @@
 from zoneinfo import ZoneInfo
 
 import dateutil.parser
+import html
 import requests
 from celery import shared_task
 from django.db import transaction
@@ -21,16 +22,34 @@
 
 logger = logging.getLogger(__name__)
 
-URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+")
+URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)
 
 
 def extract_meeting_url_from_text(text: str) -> Optional[str]:
     if not text:
         return None
+    # First pass: look for normal https:// links (case-insensitive)
     for m in URL_CANDIDATE.finditer(text):
-        url = m.group(0).rstrip(").,;]}>")
+        url = m.group(0).rstrip(").,;]}")
+        # strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
+        url = url.rstrip('>')
         if meeting_type_from_url(url):
             return url
+
+    # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
+    # Try to find common meeting host patterns and prepend https:// when detected
+    scheme_less_patterns = [
+        r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
+        r"meet\.google\.com/[^\s<>\"']+",
+        r"teams\.microsoft\.com/[^\s<>\"']+",
+    ]
+    for pat in scheme_less_patterns:
+        for m in re.finditer(pat, text, flags=re.IGNORECASE):
+            candidate = m.group(0)
+            if not candidate.lower().startswith("http"):
+                candidate = "https://" + candidate
+            if meeting_type_from_url(candidate):
+                return candidate
     return None
 
 
@@ -457,13 +476,86 @@ def _remote_event_to_calendar_event_data(self, google_event: dict) -> dict:
 
         # Extract meeting URL if present
         meeting_url_from_conference_data = None
+        entry_points = []
         if "conferenceData" in google_event:
             entry_points = google_event["conferenceData"].get("entryPoints", [])
             for entry_point in entry_points:
                 if entry_point.get("entryPointType") == "video":
                     meeting_url_from_conference_data = entry_point.get("uri")
                     break
-        meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data) or extract_meeting_url_from_text(google_event.get("hangoutLink")) or extract_meeting_url_from_text(google_event.get("location")) or extract_meeting_url_from_text(google_event.get("description")) or extract_meeting_url_from_text(google_event.get("summary"))
+
+        # Normalize/unescape free-text fields before extraction
+        hangout_link = google_event.get("hangoutLink")
+        location_text = google_event.get("location")
+        description_text = google_event.get("description")
+        summary_text = google_event.get("summary")
+
+        if description_text:
+            description_text = html.unescape(description_text)
+        if location_text:
+            location_text = html.unescape(location_text)
+        if summary_text:
+            summary_text = html.unescape(summary_text)
+
+        meeting_url = None
+        meeting_url_source = None
+
+        # Check in order and record the source field for logging
+        if meeting_url_from_conference_data:
+            meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data)
+            if meeting_url:
+                meeting_url_source = "conferenceData.entryPoints"
+
+        if not meeting_url and hangout_link:
+            meeting_url = extract_meeting_url_from_text(hangout_link)
+            if meeting_url:
+                meeting_url_source = "hangoutLink"
+
+        if not meeting_url and location_text:
+            meeting_url = extract_meeting_url_from_text(location_text)
+            if meeting_url:
+                meeting_url_source = "location"
+
+        if not meeting_url and description_text:
+            meeting_url = extract_meeting_url_from_text(description_text)
+            if meeting_url:
+                meeting_url_source = "description"
+
+        if not meeting_url and summary_text:
+            meeting_url = extract_meeting_url_from_text(summary_text)
+            if meeting_url:
+                meeting_url_source = "summary"
+
+        # Loose fallback: try to detect scheme-less Zoom/Meet/Teams links inside text
+        if not meeting_url:
+            # Try entry points liberally (some providers set non-standard URIs)
+            if entry_points:
+                for ep in entry_points:
+                    uri = ep.get("uri")
+                    if uri:
+                        candidate = uri
+                        if not candidate.lower().startswith("http"):
+                            candidate = "https://" + candidate
+                        if meeting_type_from_url(candidate):
+                            meeting_url = extract_meeting_url_from_text(candidate) or candidate
+                            meeting_url_source = "conferenceData.entryPoints.loose"
+                            break
+
+            # Try scheme-less patterns in text fields
+            if not meeting_url:
+                loose_text = "\n".join(filter(None, [location_text, description_text, summary_text]))
+                if loose_text:
+                    # common patterns
+                    m = re.search(r"(?:[\w.-]+\.)?zoom\.us/\S+", loose_text, flags=re.IGNORECASE)
+                    if m:
+                        candidate = m.group(0)
+                        if not candidate.lower().startswith("http"):
+                            candidate = "https://" + candidate
+                        if meeting_type_from_url(candidate):
+                            meeting_url = candidate
+                            meeting_url_source = "loose_text_zoom"
+
+        logger.debug("Event %s: extracted meeting_url=%s source=%s", google_event.get("id"), meeting_url, meeting_url_source)
 
         # Extract attendees
         attendees = []

From 5ef69c4ec976dff18d71be0e04688647da4b2399 Mon Sep 17 00:00:00 2001
From: Saurabh Puri <purisaurabh2885@gmail.com>
Date: Thu, 13 Nov 2025 12:26:07 +0530
Subject: [PATCH 2/6] feat: move worker from 4 to 1

---
 Bot/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Bot/Dockerfile b/Bot/Dockerfile
index 1a48749..f9d6ce1 100644
--- a/Bot/Dockerfile
+++ b/Bot/Dockerfile
@@ -16,5 +16,5 @@ COPY . .
 EXPOSE 8001
 
 # Run FastAPI app
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "4"]
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "1"]
 

From 411ab24e5f8858d0f6b8b49301378972140ee62e Mon Sep 17 00:00:00 2001
From: Saurabh Puri <purisaurabh2885@gmail.com>
Date: Mon, 17 Nov 2025 11:57:55 +0530
Subject: [PATCH 3/6] feat; update workers value

---
 Bot/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Bot/Dockerfile b/Bot/Dockerfile
index f9d6ce1..1a48749 100644
--- a/Bot/Dockerfile
+++ b/Bot/Dockerfile
@@ -16,5 +16,5 @@ COPY . .
 EXPOSE 8001
 
 # Run FastAPI app
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "1"]
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "4"]
 

From 3f247b298e2672bad2c4f194e0533734538ca4e1 Mon Sep 17 00:00:00 2001
From: Saurabh Puri <purisaurabh2885@gmail.com>
Date: Mon, 17 Nov 2025 11:58:09 +0530
Subject: [PATCH 4/6] feat: remove duplication of code

---
 Bot/app/api/meetings.py | 79 +++++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 19 deletions(-)

diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py
index 5faabbd..d283226 100644
--- a/Bot/app/api/meetings.py
+++ b/Bot/app/api/meetings.py
@@ -72,24 +72,65 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)):
     events = events_result.get('items', [])
     scheduled_meetings = []
     meetings_map = {}
-    URL_RE = re.compile(r'https?://[^\s<>"\']+', re.IGNORECASE)
-
-    def extract_meeting_url_from_text(text: str):
-        if not text:
-            return None
-        # look for normal https links first
-        m = URL_RE.search(text)
-        if m:
-            return m.group(0).rstrip('>')
-        # fallback: scheme-less hosts like "zoom.us/j/..."
-        for pat in [r'(?:[\w.-]+\.)?zoom\.us/\S+', r'meet\.google\.com/\S+', r'teams\.microsoft\.com/\S+']:
-            m2 = re.search(pat, text, flags=re.IGNORECASE)
-            if m2:
-                candidate = m2.group(0)
-                if not candidate.lower().startswith('http'):
-                    candidate = 'https://' + candidate
-                return candidate
-        return None
+    # Try to import the canonical extraction logic from the attendee package.
+    # In some deployment setups the `attendee` package may not be on sys.path
+    # (separate services). Fall back to a local implementation that mirrors
+    # the attendee behaviour and will attempt to use meeting_type_from_url if
+    # that helper is importable.
+    try:
+        from attendee.bots.tasks.sync_calendar_task import extract_meeting_url_from_text  # type: ignore
+    except Exception:
+        try:
+            # Try alternative import path used in the attendee codebase
+            from bots.tasks.sync_calendar_task import extract_meeting_url_from_text  # type: ignore
+        except Exception:
+            # Fallback implementation copied from attendee/bots/tasks/sync_calendar_task.py
+            from typing import Optional
+
+            URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)
+
+            try:
+                # Prefer to reuse meeting_type_from_url when available
+                from bots.meeting_url_utils import meeting_type_from_url  # type: ignore
+            except Exception:
+                try:
+                    from attendee.bots.meeting_url_utils import meeting_type_from_url  # type: ignore
+                except Exception:
+                    # Last-resort: accept any url (best-effort)
+                    def meeting_type_from_url(url: str):
+                        return True
+
+            def extract_meeting_url_from_text(text: str) -> Optional[str]:
+                if not text:
+                    return None
+                # First pass: look for normal https:// links (case-insensitive)
+                for m in URL_CANDIDATE.finditer(text):
+                    url = m.group(0).rstrip(").,;]}")
+                    # strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
+                    url = url.rstrip('>')
+                    try:
+                        if meeting_type_from_url(url):
+                            return url
+                    except Exception:
+                        return url
+
+                # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
+                scheme_less_patterns = [
+                    r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
+                    r"meet\.google\.com/[^\s<>\"']+",
+                    r"teams\.microsoft\.com/[^\s<>\"']+",
+                ]
+                for pat in scheme_less_patterns:
+                    for m in re.finditer(pat, text, flags=re.IGNORECASE):
+                        candidate = m.group(0)
+                        if not candidate.lower().startswith("http"):
+                            candidate = "https://" + candidate
+                        try:
+                            if meeting_type_from_url(candidate):
+                                return candidate
+                        except Exception:
+                            return candidate
+                return None
 
     for event in events:
         # Prefer structured conferenceData entryPoints (video) when available
@@ -114,7 +155,7 @@ def extract_meeting_url_from_text(text: str):
                     if meeting_url:
                         break
 
-        # check is bot request is aleady sent for the meeting
+        # check is bot request is already sent for the meeting
         json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY)
         if json_str:
             meetings_map = json.loads(json_str)

From 8a477129849ceea37294153d19c4b182646e449a Mon Sep 17 00:00:00 2001
From: Saurabh Puri <purisaurabh2885@gmail.com>
Date: Thu, 20 Nov 2025 12:58:00 +0530
Subject: [PATCH 5/6] feat:update docker compose file

---
 docker-compose.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index f8704b9..447fbe7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -155,7 +155,8 @@ services:
     #  - "8001:8000"
     command: "python bots/webpage_streamer/run_webpage_streamer.py"
     security_opt:
-      - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json
+      # - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json
+      - seccomp:unconfined
     depends_on:
       - postgres-attendee
       - redis

From d4d1c3cade9872087185f0566033f2e21ea4a3ef Mon Sep 17 00:00:00 2001
From: Saurabh Puri <purisaurabh2885@gmail.com>
Date: Thu, 20 Nov 2025 12:58:43 +0530
Subject: [PATCH 6/6] feat : remove duplicate and unneccessary code

---
 Bot/app/api/meetings.py                   | 20 +++++++--------
 Bot/app/core/config.py                    | 30 ++++++++---------------
 attendee/bots/meeting_url_utils.py        |  8 ++++++
 attendee/bots/tasks/sync_calendar_task.py |  9 ++-----
 4 files changed, 30 insertions(+), 37 deletions(-)

diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py
index d283226..4894f4a 100644
--- a/Bot/app/api/meetings.py
+++ b/Bot/app/api/meetings.py
@@ -90,15 +90,20 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)):
             URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)
 
             try:
-                # Prefer to reuse meeting_type_from_url when available
-                from bots.meeting_url_utils import meeting_type_from_url  # type: ignore
+                # Prefer to reuse meeting_type_from_url and the canonical patterns when available
+                from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS  # type: ignore
             except Exception:
                 try:
-                    from attendee.bots.meeting_url_utils import meeting_type_from_url  # type: ignore
+                    from attendee.bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS  # type: ignore
                 except Exception:
-                    # Last-resort: accept any url (best-effort)
+                    # Last-resort: accept any url (best-effort) and provide default patterns
                     def meeting_type_from_url(url: str):
                         return True
+                    SCHEME_LESS_PATTERNS = [
+                        r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
+                        r"meet\.google\.com/[^\s<>\"']+",
+                        r"teams\.microsoft\.com/[^\s<>\"']+",
+                    ]
 
             def extract_meeting_url_from_text(text: str) -> Optional[str]:
                 if not text:
@@ -115,12 +120,7 @@ def extract_meeting_url_from_text(text: str) -> Optional[str]:
                         return url
 
                 # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
-                scheme_less_patterns = [
-                    r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
-                    r"meet\.google\.com/[^\s<>\"']+",
-                    r"teams\.microsoft\.com/[^\s<>\"']+",
-                ]
-                for pat in scheme_less_patterns:
+                for pat in SCHEME_LESS_PATTERNS:
                     for m in re.finditer(pat, text, flags=re.IGNORECASE):
                         candidate = m.group(0)
                         if not candidate.lower().startswith("http"):
diff --git a/Bot/app/core/config.py b/Bot/app/core/config.py
index 75ba517..5f3c554 100644
--- a/Bot/app/core/config.py
+++ b/Bot/app/core/config.py
@@ -74,38 +74,27 @@ def _set_user_id_in_redis(value, key='bot:user_id', ttl=REDIS_USER_ID_TTL):
         logger.exception('Error writing USER_ID to Redis')
         return False
 
-
 async def _fetch_user_id_async(db_url: str):
-    """Connect to Postgres and return a probable user id from the session table.
-
-    This is intentionally defensive: it looks for common column names and falls
-    back to the first column value if necessary. Returns empty string on errors.
-    """
     try:
         conn = await asyncpg.connect(dsn=db_url)
-        try:
-            # Prefer a non-expired session (expires_at in the future), most
-            # recently created first. If none found, fall back to the most
-            # recent session regardless of expiry.
-            val = await conn.fetchval(
-                "SELECT user_id FROM session LIMIT 1"
-            )
-            if val:
-                return str(val)
+        print("Connected successfully!")
 
+        try:
+            query = "SELECT user_id FROM session LIMIT 1"
+            val = await conn.fetchval(query)
             if val:
-                print("Fetched user_id from session table:", val)
                 return str(val)
-
-            logger.warning('session table returned no rows')
             return ''
+
         finally:
             await conn.close()
-    except Exception:
-        logger.exception('Error fetching USER_ID from DB')
+
+    except Exception as e:
+        logger.exception("Error fetching USER_ID from DB ")
         return ''
 
 
+
 def get_user_id():
     """Synchronous helper that returns the USER_ID, fetching it from DB if needed.
 
@@ -129,6 +118,7 @@ def get_user_id():
         pass
 
     db_url = DATABASE_URL or _build_db_url_from_env()
+    print( "Database URL:", db_url)
     if not db_url:
         logger.error('No DATABASE_URL or PG_* env vars set; cannot fetch USER_ID')
         _USER_ID_CACHE = ''
diff --git a/attendee/bots/meeting_url_utils.py b/attendee/bots/meeting_url_utils.py
index 3ef56ac..dc4a76c 100644
--- a/attendee/bots/meeting_url_utils.py
+++ b/attendee/bots/meeting_url_utils.py
@@ -11,6 +11,14 @@
 
 HTTP_URL_RE = re.compile(r"https?://[^\s<>\"']+")
 
+# Patterns used to detect scheme-less meeting URLs (e.g. "zoom.us/j/123456")
+# Keep these here as the canonical source so other modules can reuse them.
+SCHEME_LESS_PATTERNS = [
+    r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
+    r"meet\.google\.com/[^\s<>\"']+",
+    r"teams\.microsoft\.com/[^\s<>\"']+",
+]
+
 
 def contains_multiple_urls(url: str):
     if not url:
diff --git a/attendee/bots/tasks/sync_calendar_task.py b/attendee/bots/tasks/sync_calendar_task.py
index cc85ab2..25ad288 100644
--- a/attendee/bots/tasks/sync_calendar_task.py
+++ b/attendee/bots/tasks/sync_calendar_task.py
@@ -15,7 +15,7 @@
 
 from bots.bots_api_utils import delete_bot, patch_bot
 from bots.calendars_api_utils import remove_bots_from_calendar
-from bots.meeting_url_utils import meeting_type_from_url
+from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS
 from bots.models import Bot, BotStates, Calendar, CalendarEvent, CalendarPlatform, CalendarStates, WebhookTriggerTypes
 from bots.webhook_payloads import calendar_webhook_payload
 from bots.webhook_utils import trigger_webhook
@@ -38,12 +38,7 @@ def extract_meeting_url_from_text(text: str) -> Optional[str]:
 
     # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
     # Try to find common meeting host patterns and prepend https:// when detected
-    scheme_less_patterns = [
-        r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
-        r"meet\.google\.com/[^\s<>\"']+",
-        r"teams\.microsoft\.com/[^\s<>\"']+",
-    ]
-    for pat in scheme_less_patterns:
+    for pat in SCHEME_LESS_PATTERNS:
         for m in re.finditer(pat, text, flags=re.IGNORECASE):
             candidate = m.group(0)
             if not candidate.lower().startswith("http"):