joshsoftware · purisaurabh · Nov 13, 2025 · Nov 13, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py
@@ -14,6 +14,8 @@
 import json
 from app.core import config
 import os
+import re
+import html
 
 # Now you can access the values like this:
 CLIENT_ID = os.getenv("CLIENT_ID")
@@ -70,10 +72,90 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)):
     events = events_result.get('items', [])
     scheduled_meetings = []
     meetings_map = {}
-    for event in events:
-        meeting_url = event.get('hangoutLink')
+    # Try to import the canonical extraction logic from the attendee package.
+    # In some deployment setups the `attendee` package may not be on sys.path
+    # (separate services). Fall back to a local implementation that mirrors
+    # the attendee behaviour and will attempt to use meeting_type_from_url if
+    # that helper is importable.
+    try:
+        from attendee.bots.tasks.sync_calendar_task import extract_meeting_url_from_text  # type: ignore
+    except Exception:
+        try:
+            # Try alternative import path used in the attendee codebase
+            from bots.tasks.sync_calendar_task import extract_meeting_url_from_text  # type: ignore
+        except Exception:
+            # Fallback implementation copied from attendee/bots/tasks/sync_calendar_task.py
+            from typing import Optional
+
+            URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)
 
-        # check is bot request is aleady sent for the meeting
+            try:
+                # Prefer to reuse meeting_type_from_url and the canonical patterns when available
+                from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS  # type: ignore
+            except Exception:
+                try:
+                    from attendee.bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS  # type: ignore
+                except Exception:
+                    # Last-resort: accept any url (best-effort) and provide default patterns
+                    def meeting_type_from_url(url: str):
+                        return True
+                    SCHEME_LESS_PATTERNS = [
+                        r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
+                        r"meet\.google\.com/[^\s<>\"']+",
+                        r"teams\.microsoft\.com/[^\s<>\"']+",
+                    ]
+
+            def extract_meeting_url_from_text(text: str) -> Optional[str]:
+                if not text:
+                    return None
+                # First pass: look for normal https:// links (case-insensitive)
+                for m in URL_CANDIDATE.finditer(text):
+                    url = m.group(0).rstrip(").,;]}")
+                    # strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
+                    url = url.rstrip('>')
+                    try:
+                        if meeting_type_from_url(url):
+                            return url
+                    except Exception:
+                        return url
+
+                # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
+                for pat in SCHEME_LESS_PATTERNS:
+                    for m in re.finditer(pat, text, flags=re.IGNORECASE):
+                        candidate = m.group(0)
+                        if not candidate.lower().startswith("http"):
+                            candidate = "https://" + candidate
+                        try:
+                            if meeting_type_from_url(candidate):
+                                return candidate
+                        except Exception:
+                            return candidate
+                return None
+
+    for event in events:
+        # Prefer structured conferenceData entryPoints (video) when available
+        meeting_url = None
+        conf = event.get('conferenceData')
+        if conf:
+            entry_points = conf.get('entryPoints', [])
+            for ep in entry_points:
+                if ep.get('entryPointType') == 'video' and ep.get('uri'):
+                    meeting_url = ep.get('uri')
+                    break
+        # fallbacks
+        if not meeting_url:
+            meeting_url = event.get('hangoutLink')
+        if not meeting_url:
+            # check location, description, summary for embedded links
+            for field in ('location', 'description', 'summary'):
+                val = event.get(field)
+                if val:
+                    val = html.unescape(val)
+                    meeting_url = extract_meeting_url_from_text(val)
+                    if meeting_url:
+                        break
+
+        # check is bot request is already sent for the meeting
         json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY)
         if json_str:
             meetings_map = json.loads(json_str)

diff --git a/Bot/app/core/config.py b/Bot/app/core/config.py
@@ -74,38 +74,27 @@ def _set_user_id_in_redis(value, key='bot:user_id', ttl=REDIS_USER_ID_TTL):
         logger.exception('Error writing USER_ID to Redis')
         return False
 
-
 async def _fetch_user_id_async(db_url: str):
-    """Connect to Postgres and return a probable user id from the session table.
-
-    This is intentionally defensive: it looks for common column names and falls
-    back to the first column value if necessary. Returns empty string on errors.
-    """
     try:
         conn = await asyncpg.connect(dsn=db_url)
-        try:
-            # Prefer a non-expired session (expires_at in the future), most
-            # recently created first. If none found, fall back to the most
-            # recent session regardless of expiry.
-            val = await conn.fetchval(
-                "SELECT user_id FROM session LIMIT 1"
-            )
-            if val:
-                return str(val)
+        print("Connected successfully!")
 
+        try:
+            query = "SELECT user_id FROM session LIMIT 1"
+            val = await conn.fetchval(query)
             if val:
-                print("Fetched user_id from session table:", val)
                 return str(val)
-
-            logger.warning('session table returned no rows')
             return ''
+
         finally:
             await conn.close()
-    except Exception:
-        logger.exception('Error fetching USER_ID from DB')
+
+    except Exception as e:
+        logger.exception("Error fetching USER_ID from DB ")
         return ''
 
 
+
 def get_user_id():
     """Synchronous helper that returns the USER_ID, fetching it from DB if needed.
 
@@ -129,6 +118,7 @@ def get_user_id():
         pass
 
     db_url = DATABASE_URL or _build_db_url_from_env()
+    print( "Database URL:", db_url)
     if not db_url:
         logger.error('No DATABASE_URL or PG_* env vars set; cannot fetch USER_ID')
         _USER_ID_CACHE = ''

diff --git a/attendee/bots/meeting_url_utils.py b/attendee/bots/meeting_url_utils.py
@@ -11,6 +11,14 @@
 
 HTTP_URL_RE = re.compile(r"https?://[^\s<>\"']+")
 
+# Patterns used to detect scheme-less meeting URLs (e.g. "zoom.us/j/123456")
+# Keep these here as the canonical source so other modules can reuse them.
+SCHEME_LESS_PATTERNS = [
+    r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
+    r"meet\.google\.com/[^\s<>\"']+",
+    r"teams\.microsoft\.com/[^\s<>\"']+",
+]
+
 
 def contains_multiple_urls(url: str):
     if not url:

diff --git a/attendee/bots/tasks/sync_calendar_task.py b/attendee/bots/tasks/sync_calendar_task.py
@@ -7,30 +7,44 @@
 from zoneinfo import ZoneInfo
 
 import dateutil.parser
+import html
 import requests
 from celery import shared_task
 from django.db import transaction
 from django.utils import timezone
 
 from bots.bots_api_utils import delete_bot, patch_bot
 from bots.calendars_api_utils import remove_bots_from_calendar
-from bots.meeting_url_utils import meeting_type_from_url
+from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS
 from bots.models import Bot, BotStates, Calendar, CalendarEvent, CalendarPlatform, CalendarStates, WebhookTriggerTypes
 from bots.webhook_payloads import calendar_webhook_payload
 from bots.webhook_utils import trigger_webhook
 
 logger = logging.getLogger(__name__)
 
-URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+")
+URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)
 
 
 def extract_meeting_url_from_text(text: str) -> Optional[str]:
     if not text:
         return None
+    # First pass: look for normal https:// links (case-insensitive)
     for m in URL_CANDIDATE.finditer(text):
-        url = m.group(0).rstrip(").,;]}>")
+        url = m.group(0).rstrip(").,;]}")
+        # strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
+        url = url.rstrip('>')
         if meeting_type_from_url(url):
             return url
+
+    # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
+    # Try to find common meeting host patterns and prepend https:// when detected
+    for pat in SCHEME_LESS_PATTERNS:
+        for m in re.finditer(pat, text, flags=re.IGNORECASE):
+            candidate = m.group(0)
+            if not candidate.lower().startswith("http"):
+                candidate = "https://" + candidate
+            if meeting_type_from_url(candidate):
+                return candidate
     return None
 
 
@@ -457,13 +471,86 @@ def _remote_event_to_calendar_event_data(self, google_event: dict) -> dict:
 
         # Extract meeting URL if present
         meeting_url_from_conference_data = None
+        entry_points = []
         if "conferenceData" in google_event:
             entry_points = google_event["conferenceData"].get("entryPoints", [])
             for entry_point in entry_points:
                 if entry_point.get("entryPointType") == "video":
                     meeting_url_from_conference_data = entry_point.get("uri")
                     break
-        meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data) or extract_meeting_url_from_text(google_event.get("hangoutLink")) or extract_meeting_url_from_text(google_event.get("location")) or extract_meeting_url_from_text(google_event.get("description")) or extract_meeting_url_from_text(google_event.get("summary"))
+
+        # Normalize/unescape free-text fields before extraction
+        hangout_link = google_event.get("hangoutLink")
+        location_text = google_event.get("location")
+        description_text = google_event.get("description")
+        summary_text = google_event.get("summary")
+
+        if description_text:
+            description_text = html.unescape(description_text)
+        if location_text:
+            location_text = html.unescape(location_text)
+        if summary_text:
+            summary_text = html.unescape(summary_text)
+
+        meeting_url = None
+        meeting_url_source = None
+
+        # Check in order and record the source field for logging
+        if meeting_url_from_conference_data:
+            meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data)
+            if meeting_url:
+                meeting_url_source = "conferenceData.entryPoints"
+
+        if not meeting_url and hangout_link:
+            meeting_url = extract_meeting_url_from_text(hangout_link)
+            if meeting_url:
+                meeting_url_source = "hangoutLink"
+
+        if not meeting_url and location_text:
+            meeting_url = extract_meeting_url_from_text(location_text)
+            if meeting_url:
+                meeting_url_source = "location"
+
+        if not meeting_url and description_text:
+            meeting_url = extract_meeting_url_from_text(description_text)
+            if meeting_url:
+                meeting_url_source = "description"
+
+        if not meeting_url and summary_text:
+            meeting_url = extract_meeting_url_from_text(summary_text)
+            if meeting_url:
+                meeting_url_source = "summary"
+
+        # Loose fallback: try to detect scheme-less Zoom/Meet/Teams links inside text
+        if not meeting_url:
+            # Try entry points liberally (some providers set non-standard URIs)
+            if entry_points:
+                for ep in entry_points:
+                    uri = ep.get("uri")
+                    if uri:
+                        candidate = uri
+                        if not candidate.lower().startswith("http"):
+                            candidate = "https://" + candidate
+                        if meeting_type_from_url(candidate):
+                            meeting_url = extract_meeting_url_from_text(candidate) or candidate
+                            meeting_url_source = "conferenceData.entryPoints.loose"
+                            break
+
+            # Try scheme-less patterns in text fields
+            if not meeting_url:
+                loose_text = "\n".join(filter(None, [location_text, description_text, summary_text]))
+                if loose_text:
+                    # common patterns
+                    m = re.search(r"(?:[\w.-]+\.)?zoom\.us/\S+", loose_text, flags=re.IGNORECASE)
+                    if m:
+                        candidate = m.group(0)
+                        if not candidate.lower().startswith("http"):
+                            candidate = "https://" + candidate
+                        if meeting_type_from_url(candidate):
+                            meeting_url = candidate
+                            meeting_url_source = "loose_text_zoom"
+
+        logger.debug("Event %s: extracted meeting_url=%s source=%s", google_event.get("id"), meeting_url, meeting_url_source)
 
         # Extract attendees
         attendees = []

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -155,7 +155,8 @@ services:
     #  - "8001:8000"
     command: "python bots/webpage_streamer/run_webpage_streamer.py"
     security_opt:
-      - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json
+      # - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json
+      - seccomp:unconfined
     depends_on:
       - postgres-attendee
       - redis