From 4fa761dbadd829a8ac26835fcac7ecda88c98d81 Mon Sep 17 00:00:00 2001 From: Saurabh Puri Date: Thu, 13 Nov 2025 12:25:43 +0530 Subject: [PATCH 1/6] feat : add lingo bot --- Bot/app/api/meetings.py | 43 +++++++++- attendee/bots/tasks/sync_calendar_task.py | 98 ++++++++++++++++++++++- 2 files changed, 137 insertions(+), 4 deletions(-) diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py index 40747b0..5faabbd 100644 --- a/Bot/app/api/meetings.py +++ b/Bot/app/api/meetings.py @@ -14,6 +14,8 @@ import json from app.core import config import os +import re +import html # Now you can access the values like this: CLIENT_ID = os.getenv("CLIENT_ID") @@ -70,8 +72,47 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)): events = events_result.get('items', []) scheduled_meetings = [] meetings_map = {} + URL_RE = re.compile(r'https?://[^\s<>"\']+', re.IGNORECASE) + + def extract_meeting_url_from_text(text: str): + if not text: + return None + # look for normal https links first + m = URL_RE.search(text) + if m: + return m.group(0).rstrip('>') + # fallback: scheme-less hosts like "zoom.us/j/..." + for pat in [r'(?:[\w.-]+\.)?zoom\.us/\S+', r'meet\.google\.com/\S+', r'teams\.microsoft\.com/\S+']: + m2 = re.search(pat, text, flags=re.IGNORECASE) + if m2: + candidate = m2.group(0) + if not candidate.lower().startswith('http'): + candidate = 'https://' + candidate + return candidate + return None + for event in events: - meeting_url = event.get('hangoutLink') + # Prefer structured conferenceData entryPoints (video) when available + meeting_url = None + conf = event.get('conferenceData') + if conf: + entry_points = conf.get('entryPoints', []) + for ep in entry_points: + if ep.get('entryPointType') == 'video' and ep.get('uri'): + meeting_url = ep.get('uri') + break + # fallbacks + if not meeting_url: + meeting_url = event.get('hangoutLink') + if not meeting_url: + # check location, description, summary for embedded links + for field in ('location', 'description', 'summary'): + val = event.get(field) + if val: + val = html.unescape(val) + meeting_url = extract_meeting_url_from_text(val) + if meeting_url: + break # check is bot request is aleady sent for the meeting json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY) diff --git a/attendee/bots/tasks/sync_calendar_task.py b/attendee/bots/tasks/sync_calendar_task.py index 85645c6..cc85ab2 100644 --- a/attendee/bots/tasks/sync_calendar_task.py +++ b/attendee/bots/tasks/sync_calendar_task.py @@ -7,6 +7,7 @@ from zoneinfo import ZoneInfo import dateutil.parser +import html import requests from celery import shared_task from django.db import transaction @@ -21,16 +22,34 @@ logger = logging.getLogger(__name__) -URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+") +URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE) def extract_meeting_url_from_text(text: str) -> Optional[str]: if not text: return None + # First pass: look for normal https:// links (case-insensitive) for m in URL_CANDIDATE.finditer(text): - url = m.group(0).rstrip(").,;]}>") + url = m.group(0).rstrip(").,;]}") + # strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping + url = url.rstrip('>') if meeting_type_from_url(url): return url + + # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme + # Try to find common meeting host patterns and prepend https:// when detected + scheme_less_patterns = [ + r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+", + r"meet\.google\.com/[^\s<>\"']+", + r"teams\.microsoft\.com/[^\s<>\"']+", + ] + for pat in scheme_less_patterns: + for m in re.finditer(pat, text, flags=re.IGNORECASE): + candidate = m.group(0) + if not candidate.lower().startswith("http"): + candidate = "https://" + candidate + if meeting_type_from_url(candidate): + return candidate return None @@ -457,13 +476,86 @@ def _remote_event_to_calendar_event_data(self, google_event: dict) -> dict: # Extract meeting URL if present meeting_url_from_conference_data = None + entry_points = [] if "conferenceData" in google_event: entry_points = google_event["conferenceData"].get("entryPoints", []) for entry_point in entry_points: if entry_point.get("entryPointType") == "video": meeting_url_from_conference_data = entry_point.get("uri") break - meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data) or extract_meeting_url_from_text(google_event.get("hangoutLink")) or extract_meeting_url_from_text(google_event.get("location")) or extract_meeting_url_from_text(google_event.get("description")) or extract_meeting_url_from_text(google_event.get("summary")) + + # Normalize/unescape free-text fields before extraction + hangout_link = google_event.get("hangoutLink") + location_text = google_event.get("location") + description_text = google_event.get("description") + summary_text = google_event.get("summary") + + if description_text: + description_text = html.unescape(description_text) + if location_text: + location_text = html.unescape(location_text) + if summary_text: + summary_text = html.unescape(summary_text) + + meeting_url = None + meeting_url_source = None + + # Check in order and record the source field for logging + if meeting_url_from_conference_data: + meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data) + if meeting_url: + meeting_url_source = "conferenceData.entryPoints" + + if not meeting_url and hangout_link: + meeting_url = extract_meeting_url_from_text(hangout_link) + if meeting_url: + meeting_url_source = "hangoutLink" + + if not meeting_url and location_text: + meeting_url = extract_meeting_url_from_text(location_text) + if meeting_url: + meeting_url_source = "location" + + if not meeting_url and description_text: + meeting_url = extract_meeting_url_from_text(description_text) + if meeting_url: + meeting_url_source = "description" + + if not meeting_url and summary_text: + meeting_url = extract_meeting_url_from_text(summary_text) + if meeting_url: + meeting_url_source = "summary" + + # Loose fallback: try to detect scheme-less Zoom/Meet/Teams links inside text + if not meeting_url: + # Try entry points liberally (some providers set non-standard URIs) + if entry_points: + for ep in entry_points: + uri = ep.get("uri") + if uri: + candidate = uri + if not candidate.lower().startswith("http"): + candidate = "https://" + candidate + if meeting_type_from_url(candidate): + meeting_url = extract_meeting_url_from_text(candidate) or candidate + meeting_url_source = "conferenceData.entryPoints.loose" + break + + # Try scheme-less patterns in text fields + if not meeting_url: + loose_text = "\n".join(filter(None, [location_text, description_text, summary_text])) + if loose_text: + # common patterns + m = re.search(r"(?:[\w.-]+\.)?zoom\.us/\S+", loose_text, flags=re.IGNORECASE) + if m: + candidate = m.group(0) + if not candidate.lower().startswith("http"): + candidate = "https://" + candidate + if meeting_type_from_url(candidate): + meeting_url = candidate + meeting_url_source = "loose_text_zoom" + + logger.debug("Event %s: extracted meeting_url=%s source=%s", google_event.get("id"), meeting_url, meeting_url_source) # Extract attendees attendees = [] From 5ef69c4ec976dff18d71be0e04688647da4b2399 Mon Sep 17 00:00:00 2001 From: Saurabh Puri Date: Thu, 13 Nov 2025 12:26:07 +0530 Subject: [PATCH 2/6] feat: move worker from 4 to 1 --- Bot/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bot/Dockerfile b/Bot/Dockerfile index 1a48749..f9d6ce1 100644 --- a/Bot/Dockerfile +++ b/Bot/Dockerfile @@ -16,5 +16,5 @@ COPY . . EXPOSE 8001 # Run FastAPI app -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "4"] +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "1"] From 411ab24e5f8858d0f6b8b49301378972140ee62e Mon Sep 17 00:00:00 2001 From: Saurabh Puri Date: Mon, 17 Nov 2025 11:57:55 +0530 Subject: [PATCH 3/6] feat; update workers value --- Bot/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bot/Dockerfile b/Bot/Dockerfile index f9d6ce1..1a48749 100644 --- a/Bot/Dockerfile +++ b/Bot/Dockerfile @@ -16,5 +16,5 @@ COPY . . EXPOSE 8001 # Run FastAPI app -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "1"] +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "4"] From 3f247b298e2672bad2c4f194e0533734538ca4e1 Mon Sep 17 00:00:00 2001 From: Saurabh Puri Date: Mon, 17 Nov 2025 11:58:09 +0530 Subject: [PATCH 4/6] feat: remove duplication of code --- Bot/app/api/meetings.py | 79 +++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 19 deletions(-) diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py index 5faabbd..d283226 100644 --- a/Bot/app/api/meetings.py +++ b/Bot/app/api/meetings.py @@ -72,24 +72,65 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)): events = events_result.get('items', []) scheduled_meetings = [] meetings_map = {} - URL_RE = re.compile(r'https?://[^\s<>"\']+', re.IGNORECASE) - - def extract_meeting_url_from_text(text: str): - if not text: - return None - # look for normal https links first - m = URL_RE.search(text) - if m: - return m.group(0).rstrip('>') - # fallback: scheme-less hosts like "zoom.us/j/..." - for pat in [r'(?:[\w.-]+\.)?zoom\.us/\S+', r'meet\.google\.com/\S+', r'teams\.microsoft\.com/\S+']: - m2 = re.search(pat, text, flags=re.IGNORECASE) - if m2: - candidate = m2.group(0) - if not candidate.lower().startswith('http'): - candidate = 'https://' + candidate - return candidate - return None + # Try to import the canonical extraction logic from the attendee package. + # In some deployment setups the `attendee` package may not be on sys.path + # (separate services). Fall back to a local implementation that mirrors + # the attendee behaviour and will attempt to use meeting_type_from_url if + # that helper is importable. + try: + from attendee.bots.tasks.sync_calendar_task import extract_meeting_url_from_text # type: ignore + except Exception: + try: + # Try alternative import path used in the attendee codebase + from bots.tasks.sync_calendar_task import extract_meeting_url_from_text # type: ignore + except Exception: + # Fallback implementation copied from attendee/bots/tasks/sync_calendar_task.py + from typing import Optional + + URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE) + + try: + # Prefer to reuse meeting_type_from_url when available + from bots.meeting_url_utils import meeting_type_from_url # type: ignore + except Exception: + try: + from attendee.bots.meeting_url_utils import meeting_type_from_url # type: ignore + except Exception: + # Last-resort: accept any url (best-effort) + def meeting_type_from_url(url: str): + return True + + def extract_meeting_url_from_text(text: str) -> Optional[str]: + if not text: + return None + # First pass: look for normal https:// links (case-insensitive) + for m in URL_CANDIDATE.finditer(text): + url = m.group(0).rstrip(").,;]}") + # strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping + url = url.rstrip('>') + try: + if meeting_type_from_url(url): + return url + except Exception: + return url + + # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme + scheme_less_patterns = [ + r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+", + r"meet\.google\.com/[^\s<>\"']+", + r"teams\.microsoft\.com/[^\s<>\"']+", + ] + for pat in scheme_less_patterns: + for m in re.finditer(pat, text, flags=re.IGNORECASE): + candidate = m.group(0) + if not candidate.lower().startswith("http"): + candidate = "https://" + candidate + try: + if meeting_type_from_url(candidate): + return candidate + except Exception: + return candidate + return None for event in events: # Prefer structured conferenceData entryPoints (video) when available @@ -114,7 +155,7 @@ def extract_meeting_url_from_text(text: str): if meeting_url: break - # check is bot request is aleady sent for the meeting + # check is bot request is already sent for the meeting json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY) if json_str: meetings_map = json.loads(json_str) From 8a477129849ceea37294153d19c4b182646e449a Mon Sep 17 00:00:00 2001 From: Saurabh Puri Date: Thu, 20 Nov 2025 12:58:00 +0530 Subject: [PATCH 5/6] feat:update docker compose file --- docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f8704b9..447fbe7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -155,7 +155,8 @@ services: # - "8001:8000" command: "python bots/webpage_streamer/run_webpage_streamer.py" security_opt: - - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json + # - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json + - seccomp:unconfined depends_on: - postgres-attendee - redis From d4d1c3cade9872087185f0566033f2e21ea4a3ef Mon Sep 17 00:00:00 2001 From: Saurabh Puri Date: Thu, 20 Nov 2025 12:58:43 +0530 Subject: [PATCH 6/6] feat : remove duplicate and unneccessary code --- Bot/app/api/meetings.py | 20 +++++++-------- Bot/app/core/config.py | 30 ++++++++--------------- attendee/bots/meeting_url_utils.py | 8 ++++++ attendee/bots/tasks/sync_calendar_task.py | 9 ++----- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/Bot/app/api/meetings.py b/Bot/app/api/meetings.py index d283226..4894f4a 100644 --- a/Bot/app/api/meetings.py +++ b/Bot/app/api/meetings.py @@ -90,15 +90,20 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)): URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE) try: - # Prefer to reuse meeting_type_from_url when available - from bots.meeting_url_utils import meeting_type_from_url # type: ignore + # Prefer to reuse meeting_type_from_url and the canonical patterns when available + from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS # type: ignore except Exception: try: - from attendee.bots.meeting_url_utils import meeting_type_from_url # type: ignore + from attendee.bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS # type: ignore except Exception: - # Last-resort: accept any url (best-effort) + # Last-resort: accept any url (best-effort) and provide default patterns def meeting_type_from_url(url: str): return True + SCHEME_LESS_PATTERNS = [ + r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+", + r"meet\.google\.com/[^\s<>\"']+", + r"teams\.microsoft\.com/[^\s<>\"']+", + ] def extract_meeting_url_from_text(text: str) -> Optional[str]: if not text: @@ -115,12 +120,7 @@ def extract_meeting_url_from_text(text: str) -> Optional[str]: return url # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme - scheme_less_patterns = [ - r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+", - r"meet\.google\.com/[^\s<>\"']+", - r"teams\.microsoft\.com/[^\s<>\"']+", - ] - for pat in scheme_less_patterns: + for pat in SCHEME_LESS_PATTERNS: for m in re.finditer(pat, text, flags=re.IGNORECASE): candidate = m.group(0) if not candidate.lower().startswith("http"): diff --git a/Bot/app/core/config.py b/Bot/app/core/config.py index 75ba517..5f3c554 100644 --- a/Bot/app/core/config.py +++ b/Bot/app/core/config.py @@ -74,38 +74,27 @@ def _set_user_id_in_redis(value, key='bot:user_id', ttl=REDIS_USER_ID_TTL): logger.exception('Error writing USER_ID to Redis') return False - async def _fetch_user_id_async(db_url: str): - """Connect to Postgres and return a probable user id from the session table. - - This is intentionally defensive: it looks for common column names and falls - back to the first column value if necessary. Returns empty string on errors. - """ try: conn = await asyncpg.connect(dsn=db_url) - try: - # Prefer a non-expired session (expires_at in the future), most - # recently created first. If none found, fall back to the most - # recent session regardless of expiry. - val = await conn.fetchval( - "SELECT user_id FROM session LIMIT 1" - ) - if val: - return str(val) + print("Connected successfully!") + try: + query = "SELECT user_id FROM session LIMIT 1" + val = await conn.fetchval(query) if val: - print("Fetched user_id from session table:", val) return str(val) - - logger.warning('session table returned no rows') return '' + finally: await conn.close() - except Exception: - logger.exception('Error fetching USER_ID from DB') + + except Exception as e: + logger.exception("Error fetching USER_ID from DB ") return '' + def get_user_id(): """Synchronous helper that returns the USER_ID, fetching it from DB if needed. @@ -129,6 +118,7 @@ def get_user_id(): pass db_url = DATABASE_URL or _build_db_url_from_env() + print( "Database URL:", db_url) if not db_url: logger.error('No DATABASE_URL or PG_* env vars set; cannot fetch USER_ID') _USER_ID_CACHE = '' diff --git a/attendee/bots/meeting_url_utils.py b/attendee/bots/meeting_url_utils.py index 3ef56ac..dc4a76c 100644 --- a/attendee/bots/meeting_url_utils.py +++ b/attendee/bots/meeting_url_utils.py @@ -11,6 +11,14 @@ HTTP_URL_RE = re.compile(r"https?://[^\s<>\"']+") +# Patterns used to detect scheme-less meeting URLs (e.g. "zoom.us/j/123456") +# Keep these here as the canonical source so other modules can reuse them. +SCHEME_LESS_PATTERNS = [ + r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+", + r"meet\.google\.com/[^\s<>\"']+", + r"teams\.microsoft\.com/[^\s<>\"']+", +] + def contains_multiple_urls(url: str): if not url: diff --git a/attendee/bots/tasks/sync_calendar_task.py b/attendee/bots/tasks/sync_calendar_task.py index cc85ab2..25ad288 100644 --- a/attendee/bots/tasks/sync_calendar_task.py +++ b/attendee/bots/tasks/sync_calendar_task.py @@ -15,7 +15,7 @@ from bots.bots_api_utils import delete_bot, patch_bot from bots.calendars_api_utils import remove_bots_from_calendar -from bots.meeting_url_utils import meeting_type_from_url +from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS from bots.models import Bot, BotStates, Calendar, CalendarEvent, CalendarPlatform, CalendarStates, WebhookTriggerTypes from bots.webhook_payloads import calendar_webhook_payload from bots.webhook_utils import trigger_webhook @@ -38,12 +38,7 @@ def extract_meeting_url_from_text(text: str) -> Optional[str]: # Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme # Try to find common meeting host patterns and prepend https:// when detected - scheme_less_patterns = [ - r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+", - r"meet\.google\.com/[^\s<>\"']+", - r"teams\.microsoft\.com/[^\s<>\"']+", - ] - for pat in scheme_less_patterns: + for pat in SCHEME_LESS_PATTERNS: for m in re.finditer(pat, text, flags=re.IGNORECASE): candidate = m.group(0) if not candidate.lower().startswith("http"):