Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 85 additions & 3 deletions Bot/app/api/meetings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import json
from app.core import config
import os
import re
import html

# Now you can access the values like this:
CLIENT_ID = os.getenv("CLIENT_ID")
Expand Down Expand Up @@ -70,10 +72,90 @@ def get_meetings(body: ScheduleMeeting, token: str = Depends(OAUTH2_SCHEME)):
events = events_result.get('items', [])
scheduled_meetings = []
meetings_map = {}
for event in events:
meeting_url = event.get('hangoutLink')
# Try to import the canonical extraction logic from the attendee package.
# In some deployment setups the `attendee` package may not be on sys.path
# (separate services). Fall back to a local implementation that mirrors
# the attendee behaviour and will attempt to use meeting_type_from_url if
# that helper is importable.
try:
from attendee.bots.tasks.sync_calendar_task import extract_meeting_url_from_text # type: ignore
except Exception:
try:
# Try alternative import path used in the attendee codebase
from bots.tasks.sync_calendar_task import extract_meeting_url_from_text # type: ignore
except Exception:
# Fallback implementation copied from attendee/bots/tasks/sync_calendar_task.py
from typing import Optional

URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)

# check is bot request is aleady sent for the meeting
try:
# Prefer to reuse meeting_type_from_url and the canonical patterns when available
from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS # type: ignore
except Exception:
try:
from attendee.bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS # type: ignore
except Exception:
# Last-resort: accept any url (best-effort) and provide default patterns
def meeting_type_from_url(url: str):
return True
SCHEME_LESS_PATTERNS = [
r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
r"meet\.google\.com/[^\s<>\"']+",
r"teams\.microsoft\.com/[^\s<>\"']+",
]

def extract_meeting_url_from_text(text: str) -> Optional[str]:
if not text:
return None
# First pass: look for normal https:// links (case-insensitive)
for m in URL_CANDIDATE.finditer(text):
url = m.group(0).rstrip(").,;]}")
# strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
url = url.rstrip('>')
try:
if meeting_type_from_url(url):
return url
except Exception:
return url

# Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
for pat in SCHEME_LESS_PATTERNS:
for m in re.finditer(pat, text, flags=re.IGNORECASE):
candidate = m.group(0)
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
try:
if meeting_type_from_url(candidate):
return candidate
except Exception:
return candidate
return None

for event in events:
# Prefer structured conferenceData entryPoints (video) when available
meeting_url = None
conf = event.get('conferenceData')
if conf:
entry_points = conf.get('entryPoints', [])
for ep in entry_points:
if ep.get('entryPointType') == 'video' and ep.get('uri'):
meeting_url = ep.get('uri')
break
# fallbacks
if not meeting_url:
meeting_url = event.get('hangoutLink')
if not meeting_url:
# check location, description, summary for embedded links
for field in ('location', 'description', 'summary'):
val = event.get(field)
if val:
val = html.unescape(val)
meeting_url = extract_meeting_url_from_text(val)
if meeting_url:
break

# check is bot request is already sent for the meeting
json_str = redis_client.get(BOT_ADDED_IN_MEETING_KEY)
if json_str:
meetings_map = json.loads(json_str)
Expand Down
30 changes: 10 additions & 20 deletions Bot/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,38 +74,27 @@ def _set_user_id_in_redis(value, key='bot:user_id', ttl=REDIS_USER_ID_TTL):
logger.exception('Error writing USER_ID to Redis')
return False


async def _fetch_user_id_async(db_url: str):
"""Connect to Postgres and return a probable user id from the session table.

This is intentionally defensive: it looks for common column names and falls
back to the first column value if necessary. Returns empty string on errors.
"""
try:
conn = await asyncpg.connect(dsn=db_url)
try:
# Prefer a non-expired session (expires_at in the future), most
# recently created first. If none found, fall back to the most
# recent session regardless of expiry.
val = await conn.fetchval(
"SELECT user_id FROM session LIMIT 1"
)
if val:
return str(val)
print("Connected successfully!")

try:
query = "SELECT user_id FROM session LIMIT 1"
val = await conn.fetchval(query)
if val:
print("Fetched user_id from session table:", val)
return str(val)

logger.warning('session table returned no rows')
return ''

finally:
await conn.close()
except Exception:
logger.exception('Error fetching USER_ID from DB')

except Exception as e:
logger.exception("Error fetching USER_ID from DB ")
return ''



def get_user_id():
"""Synchronous helper that returns the USER_ID, fetching it from DB if needed.

Expand All @@ -129,6 +118,7 @@ def get_user_id():
pass

db_url = DATABASE_URL or _build_db_url_from_env()
print( "Database URL:", db_url)
if not db_url:
logger.error('No DATABASE_URL or PG_* env vars set; cannot fetch USER_ID')
_USER_ID_CACHE = ''
Expand Down
8 changes: 8 additions & 0 deletions attendee/bots/meeting_url_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@

HTTP_URL_RE = re.compile(r"https?://[^\s<>\"']+")

# Patterns used to detect scheme-less meeting URLs (e.g. "zoom.us/j/123456")
# Keep these here as the canonical source so other modules can reuse them.
SCHEME_LESS_PATTERNS = [
r"(?:[\w.-]+\.)?zoom\.us/[^\s<>\"']+",
r"meet\.google\.com/[^\s<>\"']+",
r"teams\.microsoft\.com/[^\s<>\"']+",
]


def contains_multiple_urls(url: str):
if not url:
Expand Down
95 changes: 91 additions & 4 deletions attendee/bots/tasks/sync_calendar_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,44 @@
from zoneinfo import ZoneInfo

import dateutil.parser
import html
import requests
from celery import shared_task
from django.db import transaction
from django.utils import timezone

from bots.bots_api_utils import delete_bot, patch_bot
from bots.calendars_api_utils import remove_bots_from_calendar
from bots.meeting_url_utils import meeting_type_from_url
from bots.meeting_url_utils import meeting_type_from_url, SCHEME_LESS_PATTERNS
from bots.models import Bot, BotStates, Calendar, CalendarEvent, CalendarPlatform, CalendarStates, WebhookTriggerTypes
from bots.webhook_payloads import calendar_webhook_payload
from bots.webhook_utils import trigger_webhook

logger = logging.getLogger(__name__)

URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+")
URL_CANDIDATE = re.compile(r"https?://[^\s<>\"']+", re.IGNORECASE)


def extract_meeting_url_from_text(text: str) -> Optional[str]:
if not text:
return None
# First pass: look for normal https:// links (case-insensitive)
for m in URL_CANDIDATE.finditer(text):
url = m.group(0).rstrip(").,;]}>")
url = m.group(0).rstrip(").,;]}")
# strip trailing '>' that sometimes remains from markdown/angle-bracket wrapping
url = url.rstrip('>')
if meeting_type_from_url(url):
return url

# Fallback: links without scheme (e.g., "zoom.us/j/12345") or mixed-case scheme
# Try to find common meeting host patterns and prepend https:// when detected
for pat in SCHEME_LESS_PATTERNS:
for m in re.finditer(pat, text, flags=re.IGNORECASE):
candidate = m.group(0)
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
if meeting_type_from_url(candidate):
return candidate
return None


Expand Down Expand Up @@ -457,13 +471,86 @@ def _remote_event_to_calendar_event_data(self, google_event: dict) -> dict:

# Extract meeting URL if present
meeting_url_from_conference_data = None
entry_points = []
if "conferenceData" in google_event:
entry_points = google_event["conferenceData"].get("entryPoints", [])
for entry_point in entry_points:
if entry_point.get("entryPointType") == "video":
meeting_url_from_conference_data = entry_point.get("uri")
break
meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data) or extract_meeting_url_from_text(google_event.get("hangoutLink")) or extract_meeting_url_from_text(google_event.get("location")) or extract_meeting_url_from_text(google_event.get("description")) or extract_meeting_url_from_text(google_event.get("summary"))

# Normalize/unescape free-text fields before extraction
hangout_link = google_event.get("hangoutLink")
location_text = google_event.get("location")
description_text = google_event.get("description")
summary_text = google_event.get("summary")

if description_text:
description_text = html.unescape(description_text)
if location_text:
location_text = html.unescape(location_text)
if summary_text:
summary_text = html.unescape(summary_text)

meeting_url = None
meeting_url_source = None

# Check in order and record the source field for logging
if meeting_url_from_conference_data:
meeting_url = extract_meeting_url_from_text(meeting_url_from_conference_data)
if meeting_url:
meeting_url_source = "conferenceData.entryPoints"

if not meeting_url and hangout_link:
meeting_url = extract_meeting_url_from_text(hangout_link)
if meeting_url:
meeting_url_source = "hangoutLink"

if not meeting_url and location_text:
meeting_url = extract_meeting_url_from_text(location_text)
if meeting_url:
meeting_url_source = "location"

if not meeting_url and description_text:
meeting_url = extract_meeting_url_from_text(description_text)
if meeting_url:
meeting_url_source = "description"

if not meeting_url and summary_text:
meeting_url = extract_meeting_url_from_text(summary_text)
if meeting_url:
meeting_url_source = "summary"

# Loose fallback: try to detect scheme-less Zoom/Meet/Teams links inside text
if not meeting_url:
# Try entry points liberally (some providers set non-standard URIs)
if entry_points:
for ep in entry_points:
uri = ep.get("uri")
if uri:
candidate = uri
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
if meeting_type_from_url(candidate):
meeting_url = extract_meeting_url_from_text(candidate) or candidate
meeting_url_source = "conferenceData.entryPoints.loose"
break

# Try scheme-less patterns in text fields
if not meeting_url:
loose_text = "\n".join(filter(None, [location_text, description_text, summary_text]))
if loose_text:
# common patterns
m = re.search(r"(?:[\w.-]+\.)?zoom\.us/\S+", loose_text, flags=re.IGNORECASE)
if m:
candidate = m.group(0)
if not candidate.lower().startswith("http"):
candidate = "https://" + candidate
if meeting_type_from_url(candidate):
meeting_url = candidate
meeting_url_source = "loose_text_zoom"

logger.debug("Event %s: extracted meeting_url=%s source=%s", google_event.get("id"), meeting_url, meeting_url_source)

# Extract attendees
attendees = []
Expand Down
3 changes: 2 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ services:
# - "8001:8000"
command: "python bots/webpage_streamer/run_webpage_streamer.py"
security_opt:
- seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json
# - seccomp=attendee/bots/web_bot_adapter/chrome_seccomp.json
- seccomp:unconfined
depends_on:
- postgres-attendee
- redis
Expand Down