AndreRobitaille · AndreRobitaille · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -17,6 +17,8 @@ WORKDIR /rails
 # Install base packages
 RUN apt-get update -qq && \
     apt-get install --no-install-recommends -y curl libjemalloc2 libvips postgresql-client poppler-utils tesseract-ocr && \
+    curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp && \
+    chmod +x /usr/local/bin/yt-dlp && \
     ln -s /usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2 /usr/local/lib/libjemalloc.so && \
     rm -rf /var/lib/apt/lists /var/cache/apt/archives
 

diff --git a/app/assets/stylesheets/application.css b/app/assets/stylesheets/application.css
@@ -1971,6 +1971,26 @@ th.sortable-header:hover {
 
 /* === Meeting Show — Structured Sections === */
 
+.transcript-banner {
+  display: flex;
+  align-items: center;
+  gap: var(--space-3);
+  padding: var(--space-3) var(--space-4);
+  margin-bottom: var(--space-6);
+  background: var(--color-accent-cool-bg);
+  border: 1px solid color-mix(in srgb, var(--color-accent-cool) 30%, var(--color-border));
+  border-left: 3px solid var(--color-accent-cool);
+  border-radius: var(--radius-md);
+  font-family: var(--font-body);
+  font-size: var(--font-size-sm);
+  color: var(--color-text);
+}
+
+.transcript-banner svg {
+  flex-shrink: 0;
+  color: var(--color-accent-cool);
+}
+
 .meeting-headline {
   font-size: var(--font-size-lg);
   line-height: 1.4;

diff --git a/app/controllers/meetings_controller.rb b/app/controllers/meetings_controller.rb
@@ -20,8 +20,9 @@ def show
       topic.topic_appearances.size > 1
     end
 
-    # Prefer minutes_recap over packet_analysis
+    # Prefer minutes_recap over transcript_recap over packet_analysis
     @summary = @meeting.meeting_summaries.find_by(summary_type: "minutes_recap") ||
+               @meeting.meeting_summaries.find_by(summary_type: "transcript_recap") ||
                @meeting.meeting_summaries.find_by(summary_type: "packet_analysis")
   end
 end
diff --git a/app/jobs/documents/download_transcript_job.rb b/app/jobs/documents/download_transcript_job.rb
@@ -0,0 +1,82 @@
+require "open3"
+
+module Documents
+  class DownloadTranscriptJob < ApplicationJob
+    queue_as :default
+
+    YOUTUBE_URL_PATTERN = %r{\Ahttps://www\.youtube\.com/watch\?v=[A-Za-z0-9_-]+\z}
+
+    def perform(meeting_id, video_url)
+      unless video_url.match?(YOUTUBE_URL_PATTERN)
+        Rails.logger.error "DownloadTranscriptJob: invalid video URL: #{video_url}"
+        return
+      end
+
+      meeting = Meeting.find(meeting_id)
+
+      # Idempotency: skip if transcript already exists
+      return if meeting.meeting_documents.exists?(document_type: "transcript")
+
+      srt_content, plain_text = download_captions(video_url)
+      return unless plain_text
+
+      document = meeting.meeting_documents.create!(
+        document_type: "transcript",
+        source_url: video_url,
+        extracted_text: plain_text,
+        text_quality: "auto_transcribed",
+        text_chars: plain_text.length,
+        fetched_at: Time.current
+      )
+
+      document.file.attach(
+        io: StringIO.new(srt_content),
+        filename: "transcript-#{meeting.starts_at.to_date}.srt",
+        content_type: "text/srt"
+      )
+
+      unless meeting.meeting_summaries.exists?(summary_type: "minutes_recap")
+        SummarizeMeetingJob.perform_later(meeting.id)
+      end
+    end
+
+    private
+
+    def download_captions(video_url)
+      Dir.mktmpdir("transcript") do |tmpdir|
+        stdout, stderr, status = Open3.capture3(
+          "yt-dlp",
+          "--write-auto-sub",
+          "--sub-lang", "en",
+          "--sub-format", "srt",
+          "--skip-download",
+          "-o", "#{tmpdir}/video",
+          video_url
+        )
+
+        unless status.success?
+          Rails.logger.error "yt-dlp failed for #{video_url}: #{stderr.strip}"
+          return nil
+        end
+
+        srt_files = Dir.glob("#{tmpdir}/*.srt")
+        if srt_files.empty?
+          Rails.logger.error "yt-dlp produced no SRT file for #{video_url}"
+          return nil
+        end
+
+        srt_content = File.read(srt_files.first)
+        plain_text = parse_srt(srt_content)
+        [ srt_content, plain_text ]
+      end
+    end
+
+    def parse_srt(srt_content)
+      srt_content
+        .gsub(/^\d+\s*$/, "")
+        .gsub(/^\d{2}:\d{2}:\d{2},\d{3}\s*-->.*$/, "")
+        .gsub(/\n{3,}/, "\n\n")
+        .strip
+    end
+  end
+end
diff --git a/app/jobs/scrapers/discover_meetings_job.rb b/app/jobs/scrapers/discover_meetings_job.rb
@@ -21,6 +21,9 @@ def perform(since: nil)
 
         page = next_link.click
       end
+
+      # Check for YouTube transcripts for recent council meetings
+      Scrapers::DiscoverTranscriptsJob.perform_later
     end
 
     private

diff --git a/app/jobs/scrapers/discover_transcripts_job.rb b/app/jobs/scrapers/discover_transcripts_job.rb
@@ -0,0 +1,74 @@
+require "open3"
+
+module Scrapers
+  class DiscoverTranscriptsJob < ApplicationJob
+    queue_as :default
+
+    YOUTUBE_CHANNEL_URL = "https://www.youtube.com/@Two_Rivers_WI/streams"
+    TITLE_PATTERN = /(?:City Council (?:Meeting|Work Session)) for \w+, (.+)$/i
+    COUNCIL_BODY_NAMES = [ "City Council Meeting", "City Council Work Session" ].freeze
+    LOOKBACK_WINDOW = 48.hours
+
+    def perform
+      meetings = candidate_meetings
+      return if meetings.empty?
+
+      videos = fetch_video_list
+      return if videos.nil?
+
+      videos.each do |video_id, title|
+        match = TITLE_PATTERN.match(title)
+        next unless match
+
+        date_str = match[1].strip
+        parsed_date = parse_date(date_str)
+        next unless parsed_date
+
+        meeting = find_meeting(meetings, parsed_date)
+        next unless meeting
+
+        video_url = "https://www.youtube.com/watch?v=#{video_id}"
+        Documents::DownloadTranscriptJob.perform_later(meeting.id, video_url)
+      end
+    end
+
+    private
+
+    def candidate_meetings
+      Meeting
+        .where(body_name: COUNCIL_BODY_NAMES)
+        .where("starts_at >= ? AND starts_at <= ?", LOOKBACK_WINDOW.ago, Time.current)
+        .includes(:meeting_documents)
+        .reject { |m| m.meeting_documents.any? { |d| d.document_type == "transcript" } }
+    end
+
+    def fetch_video_list
+      stdout, stderr, status = Open3.capture3(
+        "yt-dlp", "--flat-playlist", "--print", "%(id)s | %(title)s",
+        YOUTUBE_CHANNEL_URL
+      )
+
+      unless status.success?
+        Rails.logger.error "DiscoverTranscriptsJob: yt-dlp failed — #{stderr.strip}"
+        return nil
+      end
+
+      stdout.lines.filter_map do |line|
+        id, title = line.strip.split(" | ", 2)
+        next unless id.present? && title.present?
+
+        [ id, title ]
+      end
+    end
+
+    def parse_date(date_str)
+      Date.parse(date_str)
+    rescue ArgumentError, TypeError
+      nil
+    end
+
+    def find_meeting(meetings, date)
+      meetings.find { |m| m.starts_at.to_date == date }
+    end
+  end
+end
diff --git a/app/jobs/summarize_meeting_job.rb b/app/jobs/summarize_meeting_job.rb
@@ -26,15 +26,36 @@ def generate_meeting_summary(meeting, ai_service, retrieval_service)
     formatted_context = retrieval_service.format_context(retrieved_chunks).split("\n\n")
     kb_context = ai_service.prepare_kb_context(formatted_context)
 
-    # Prefer minutes (authoritative) over packet
     minutes_doc = meeting.meeting_documents.find_by(document_type: "minutes_pdf")
+    transcript_doc = meeting.meeting_documents.find_by(document_type: "transcript")
+
+    # Priority 1: Minutes (authoritative), optionally supplemented by transcript
     if minutes_doc&.extracted_text.present?
-      json_str = ai_service.analyze_meeting_content(minutes_doc.extracted_text, kb_context, "minutes", source: meeting)
-      save_summary(meeting, "minutes_recap", json_str)
+      input_text = minutes_doc.extracted_text
+      source_type = "minutes"
+
+      if transcript_doc&.extracted_text.present?
+        input_text += "\n\n--- Additional context from meeting recording transcript ---\n\n" +
+          transcript_doc.extracted_text.truncate(15_000)
+        source_type = "minutes_with_transcript"
+      end
+
+      json_str = ai_service.analyze_meeting_content(input_text, kb_context, "minutes", source: meeting)
+      summary = save_summary(meeting, "minutes_recap", json_str, source_type: source_type)
+
+      # Clean up any old transcript-only summary now that minutes exist
+      meeting.meeting_summaries.where(summary_type: "transcript_recap").destroy_all
       return
     end
 
-    # Fall back to packet
+    # Priority 2: Transcript (when no minutes available)
+    if transcript_doc&.extracted_text.present?
+      json_str = ai_service.analyze_meeting_content(transcript_doc.extracted_text, kb_context, "transcript", source: meeting)
+      save_summary(meeting, "transcript_recap", json_str, source_type: "transcript")
+      return
+    end
+
+    # Priority 3: Fall back to packet
     packet_doc = meeting.meeting_documents.where("document_type LIKE ?", "%packet%").first
     if packet_doc
       doc_text = if packet_doc.extractions.any?
@@ -153,18 +174,21 @@ def build_retrieval_query(meeting)
     parts.join("\n")
   end
 
-  def save_summary(meeting, type, json_str)
+  def save_summary(meeting, type, json_str, source_type: nil)
     generation_data = begin
       JSON.parse(json_str)
     rescue JSON::ParserError => e
       Rails.logger.error "Failed to parse meeting summary JSON: #{e.message}"
       {}
     end
 
+    generation_data["source_type"] = source_type if source_type
+
     summary = meeting.meeting_summaries.find_or_initialize_by(summary_type: type)
     summary.generation_data = generation_data
     summary.content = nil
     summary.save!
+    summary
   end
 
   def save_topic_summary(meeting, topic, content, generation_data)

diff --git a/app/models/meeting.rb b/app/models/meeting.rb
@@ -22,6 +22,8 @@ def document_status
       :minutes
     elsif docs.any? { |d| d.document_type == "packet_pdf" }
       :packet
+    elsif docs.any? { |d| d.document_type == "transcript" }
+      :transcript
     elsif docs.any? { |d| d.document_type == "agenda_pdf" }
       :agenda
     else

diff --git a/app/views/meetings/show.html.erb b/app/views/meetings/show.html.erb
@@ -34,6 +34,17 @@
   </div>
 </div>
 
+<% if @summary&.generation_data&.dig("source_type") == "transcript" %>
+  <div class="transcript-banner">
+    <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+      <circle cx="12" cy="12" r="10"></circle>
+      <line x1="12" y1="8" x2="12" y2="12"></line>
+      <line x1="12" y1="16" x2="12.01" y2="16"></line>
+    </svg>
+    This summary is based on the meeting's video recording. It will be updated when official minutes are published.
+  </div>
+<% end %>
+
 <% gd = @summary&.generation_data.presence %>
 
 <% if gd %>
@@ -218,11 +229,15 @@
                     - Quality: <%= doc.text_quality.humanize %>
                   <% end %>
                 </span>
+              <% elsif doc.document_type == "transcript" %>
+                <span class="document-meta">Source: Video Recording</span>
               <% end %>
             </div>
             <div>
               <% if doc.file.attached? && doc.document_type.include?("pdf") %>
                 <%= link_to "Download PDF", rails_blob_path(doc.file, disposition: "attachment"), class: "btn btn--secondary btn--sm" %>
+              <% elsif doc.document_type == "transcript" && doc.source_url.present? %>
+                <%= link_to "Watch Recording", safe_external_url(doc.source_url), target: "_blank", rel: "noopener", class: "btn btn--secondary btn--sm" %>
               <% else %>
                 <%= link_to "View Original", safe_external_url(doc.source_url), target: "_blank", rel: "noopener", class: "btn btn--secondary btn--sm" %>
               <% end %>

diff --git a/config/brakeman.ignore b/config/brakeman.ignore
@@ -0,0 +1,10 @@
+{
+  "ignored_warnings": [
+    {
+      "fingerprint": "3e4893630b06b15e5d9d65ba5da1d33eb340b31840f53bcdef92240b27b28217",
+      "note": "Open3.capture3 with array arguments does not use a shell. URL is validated against YOUTUBE_URL_PATTERN before use."
+    }
+  ],
+  "updated": "2026-04-09",
+  "brakeman_version": "7.0.2"
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,6 +21,9 @@ def perform(since: nil) @@
             page = next_link.click
           end
+          # Check for YouTube transcripts for recent council meetings
+          Scrapers::DiscoverTranscriptsJob.perform_later
         end
         private
@@ Expand Down @@