Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ WORKDIR /rails
# Install base packages
RUN apt-get update -qq && \
apt-get install --no-install-recommends -y curl libjemalloc2 libvips postgresql-client poppler-utils tesseract-ocr && \
curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp && \
chmod +x /usr/local/bin/yt-dlp && \
ln -s /usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2 /usr/local/lib/libjemalloc.so && \
rm -rf /var/lib/apt/lists /var/cache/apt/archives

Expand Down
20 changes: 20 additions & 0 deletions app/assets/stylesheets/application.css
Original file line number Diff line number Diff line change
Expand Up @@ -1971,6 +1971,26 @@ th.sortable-header:hover {

/* === Meeting Show — Structured Sections === */

.transcript-banner {
display: flex;
align-items: center;
gap: var(--space-3);
padding: var(--space-3) var(--space-4);
margin-bottom: var(--space-6);
background: var(--color-accent-cool-bg);
border: 1px solid color-mix(in srgb, var(--color-accent-cool) 30%, var(--color-border));
border-left: 3px solid var(--color-accent-cool);
border-radius: var(--radius-md);
font-family: var(--font-body);
font-size: var(--font-size-sm);
color: var(--color-text);
}

.transcript-banner svg {
flex-shrink: 0;
color: var(--color-accent-cool);
}

.meeting-headline {
font-size: var(--font-size-lg);
line-height: 1.4;
Expand Down
3 changes: 2 additions & 1 deletion app/controllers/meetings_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def show
topic.topic_appearances.size > 1
end

# Prefer minutes_recap over packet_analysis
# Prefer minutes_recap over transcript_recap over packet_analysis
@summary = @meeting.meeting_summaries.find_by(summary_type: "minutes_recap") ||
@meeting.meeting_summaries.find_by(summary_type: "transcript_recap") ||
@meeting.meeting_summaries.find_by(summary_type: "packet_analysis")
end
end
82 changes: 82 additions & 0 deletions app/jobs/documents/download_transcript_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
require "open3"

module Documents
class DownloadTranscriptJob < ApplicationJob
queue_as :default

YOUTUBE_URL_PATTERN = %r{\Ahttps://www\.youtube\.com/watch\?v=[A-Za-z0-9_-]+\z}

def perform(meeting_id, video_url)
unless video_url.match?(YOUTUBE_URL_PATTERN)
Rails.logger.error "DownloadTranscriptJob: invalid video URL: #{video_url}"
return
end

meeting = Meeting.find(meeting_id)

# Idempotency: skip if transcript already exists
return if meeting.meeting_documents.exists?(document_type: "transcript")

srt_content, plain_text = download_captions(video_url)
return unless plain_text

document = meeting.meeting_documents.create!(
document_type: "transcript",
source_url: video_url,
extracted_text: plain_text,
text_quality: "auto_transcribed",
text_chars: plain_text.length,
fetched_at: Time.current
)

document.file.attach(
io: StringIO.new(srt_content),
filename: "transcript-#{meeting.starts_at.to_date}.srt",
content_type: "text/srt"
)

unless meeting.meeting_summaries.exists?(summary_type: "minutes_recap")
SummarizeMeetingJob.perform_later(meeting.id)
end
end

private

def download_captions(video_url)
Dir.mktmpdir("transcript") do |tmpdir|
stdout, stderr, status = Open3.capture3(
"yt-dlp",
"--write-auto-sub",
"--sub-lang", "en",
"--sub-format", "srt",
"--skip-download",
"-o", "#{tmpdir}/video",
video_url
)

unless status.success?
Rails.logger.error "yt-dlp failed for #{video_url}: #{stderr.strip}"
return nil
end

srt_files = Dir.glob("#{tmpdir}/*.srt")
if srt_files.empty?
Rails.logger.error "yt-dlp produced no SRT file for #{video_url}"
return nil
end

srt_content = File.read(srt_files.first)
plain_text = parse_srt(srt_content)
[ srt_content, plain_text ]
end
end

def parse_srt(srt_content)
srt_content
.gsub(/^\d+\s*$/, "")
.gsub(/^\d{2}:\d{2}:\d{2},\d{3}\s*-->.*$/, "")
.gsub(/\n{3,}/, "\n\n")
.strip
end
end
end
3 changes: 3 additions & 0 deletions app/jobs/scrapers/discover_meetings_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ def perform(since: nil)

page = next_link.click
end

# Check for YouTube transcripts for recent council meetings
Scrapers::DiscoverTranscriptsJob.perform_later
end

private
Expand Down
74 changes: 74 additions & 0 deletions app/jobs/scrapers/discover_transcripts_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
require "open3"

module Scrapers
class DiscoverTranscriptsJob < ApplicationJob
queue_as :default

YOUTUBE_CHANNEL_URL = "https://www.youtube.com/@Two_Rivers_WI/streams"
TITLE_PATTERN = /(?:City Council (?:Meeting|Work Session)) for \w+, (.+)$/i
COUNCIL_BODY_NAMES = [ "City Council Meeting", "City Council Work Session" ].freeze
LOOKBACK_WINDOW = 48.hours

def perform
meetings = candidate_meetings
return if meetings.empty?

videos = fetch_video_list
return if videos.nil?

videos.each do |video_id, title|
match = TITLE_PATTERN.match(title)
next unless match

date_str = match[1].strip
parsed_date = parse_date(date_str)
next unless parsed_date

meeting = find_meeting(meetings, parsed_date)
next unless meeting

video_url = "https://www.youtube.com/watch?v=#{video_id}"
Documents::DownloadTranscriptJob.perform_later(meeting.id, video_url)
end
end

private

def candidate_meetings
Meeting
.where(body_name: COUNCIL_BODY_NAMES)
.where("starts_at >= ? AND starts_at <= ?", LOOKBACK_WINDOW.ago, Time.current)
.includes(:meeting_documents)
.reject { |m| m.meeting_documents.any? { |d| d.document_type == "transcript" } }
end

def fetch_video_list
stdout, stderr, status = Open3.capture3(
"yt-dlp", "--flat-playlist", "--print", "%(id)s | %(title)s",
YOUTUBE_CHANNEL_URL
)

unless status.success?
Rails.logger.error "DiscoverTranscriptsJob: yt-dlp failed — #{stderr.strip}"
return nil
end

stdout.lines.filter_map do |line|
id, title = line.strip.split(" | ", 2)
next unless id.present? && title.present?

[ id, title ]
end
end

def parse_date(date_str)
Date.parse(date_str)
rescue ArgumentError, TypeError
nil
end

def find_meeting(meetings, date)
meetings.find { |m| m.starts_at.to_date == date }
end
end
end
34 changes: 29 additions & 5 deletions app/jobs/summarize_meeting_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,36 @@ def generate_meeting_summary(meeting, ai_service, retrieval_service)
formatted_context = retrieval_service.format_context(retrieved_chunks).split("\n\n")
kb_context = ai_service.prepare_kb_context(formatted_context)

# Prefer minutes (authoritative) over packet
minutes_doc = meeting.meeting_documents.find_by(document_type: "minutes_pdf")
transcript_doc = meeting.meeting_documents.find_by(document_type: "transcript")

# Priority 1: Minutes (authoritative), optionally supplemented by transcript
if minutes_doc&.extracted_text.present?
json_str = ai_service.analyze_meeting_content(minutes_doc.extracted_text, kb_context, "minutes", source: meeting)
save_summary(meeting, "minutes_recap", json_str)
input_text = minutes_doc.extracted_text
source_type = "minutes"

if transcript_doc&.extracted_text.present?
input_text += "\n\n--- Additional context from meeting recording transcript ---\n\n" +
transcript_doc.extracted_text.truncate(15_000)
source_type = "minutes_with_transcript"
end

json_str = ai_service.analyze_meeting_content(input_text, kb_context, "minutes", source: meeting)
summary = save_summary(meeting, "minutes_recap", json_str, source_type: source_type)

# Clean up any old transcript-only summary now that minutes exist
meeting.meeting_summaries.where(summary_type: "transcript_recap").destroy_all
return
end

# Fall back to packet
# Priority 2: Transcript (when no minutes available)
if transcript_doc&.extracted_text.present?
json_str = ai_service.analyze_meeting_content(transcript_doc.extracted_text, kb_context, "transcript", source: meeting)
save_summary(meeting, "transcript_recap", json_str, source_type: "transcript")
return
end

# Priority 3: Fall back to packet
packet_doc = meeting.meeting_documents.where("document_type LIKE ?", "%packet%").first
if packet_doc
doc_text = if packet_doc.extractions.any?
Expand Down Expand Up @@ -153,18 +174,21 @@ def build_retrieval_query(meeting)
parts.join("\n")
end

def save_summary(meeting, type, json_str)
def save_summary(meeting, type, json_str, source_type: nil)
generation_data = begin
JSON.parse(json_str)
rescue JSON::ParserError => e
Rails.logger.error "Failed to parse meeting summary JSON: #{e.message}"
{}
end

generation_data["source_type"] = source_type if source_type

summary = meeting.meeting_summaries.find_or_initialize_by(summary_type: type)
summary.generation_data = generation_data
summary.content = nil
summary.save!
summary
end

def save_topic_summary(meeting, topic, content, generation_data)
Expand Down
2 changes: 2 additions & 0 deletions app/models/meeting.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def document_status
:minutes
elsif docs.any? { |d| d.document_type == "packet_pdf" }
:packet
elsif docs.any? { |d| d.document_type == "transcript" }
:transcript
elsif docs.any? { |d| d.document_type == "agenda_pdf" }
:agenda
else
Expand Down
15 changes: 15 additions & 0 deletions app/views/meetings/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@
</div>
</div>

<% if @summary&.generation_data&.dig("source_type") == "transcript" %>
<div class="transcript-banner">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<circle cx="12" cy="12" r="10"></circle>
<line x1="12" y1="8" x2="12" y2="12"></line>
<line x1="12" y1="16" x2="12.01" y2="16"></line>
</svg>
This summary is based on the meeting's video recording. It will be updated when official minutes are published.
</div>
<% end %>

<% gd = @summary&.generation_data.presence %>

<% if gd %>
Expand Down Expand Up @@ -218,11 +229,15 @@
- Quality: <%= doc.text_quality.humanize %>
<% end %>
</span>
<% elsif doc.document_type == "transcript" %>
<span class="document-meta">Source: Video Recording</span>
<% end %>
</div>
<div>
<% if doc.file.attached? && doc.document_type.include?("pdf") %>
<%= link_to "Download PDF", rails_blob_path(doc.file, disposition: "attachment"), class: "btn btn--secondary btn--sm" %>
<% elsif doc.document_type == "transcript" && doc.source_url.present? %>
<%= link_to "Watch Recording", safe_external_url(doc.source_url), target: "_blank", rel: "noopener", class: "btn btn--secondary btn--sm" %>
<% else %>
<%= link_to "View Original", safe_external_url(doc.source_url), target: "_blank", rel: "noopener", class: "btn btn--secondary btn--sm" %>
<% end %>
Expand Down
10 changes: 10 additions & 0 deletions config/brakeman.ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"ignored_warnings": [
{
"fingerprint": "3e4893630b06b15e5d9d65ba5da1d33eb340b31840f53bcdef92240b27b28217",
"note": "Open3.capture3 with array arguments does not use a shell. URL is validated against YOUTUBE_URL_PATTERN before use."
}
],
"updated": "2026-04-09",
"brakeman_version": "7.0.2"
}
Loading
Loading