diff --git a/e2e/README.md b/e2e/README.md deleted file mode 100644 index 7b9114b..0000000 --- a/e2e/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# End-to-End (E2E) Acceptance Testing Strategy - -This document outlines the strategy for end-to-end (E2E) acceptance testing of the KillrVideo React frontend application. - -## Recommended Framework: Playwright - -For E2E testing, we recommend using [Playwright](https://playwright.dev/). - -### Why Playwright? - -* **Cross-browser:** It can test on Chromium (Chrome, Edge), Firefox, and WebKit (Safari). -* **Auto-waits:** Playwright waits for elements to be actionable before performing actions, which eliminates a major source of flakiness in E2E tests. -* **Powerful Tooling:** It includes tools like Codegen (to record tests), Playwright Inspector (to debug tests), and Trace Viewer (to see what happened during a test run). -* **Network Interception:** It can intercept and mock network requests, which is very useful for testing various API response scenarios. - -## Getting Started - -1. **Installation:** - ```bash - npm install @playwright/test --save-dev - npx playwright install - ``` - -2. **Configuration:** - Create a `playwright.config.ts` file in the root of the project. - - ```typescript - import { defineConfig, devices } from '@playwright/test'; - - export default defineConfig({ - testDir: './e2e', - /* Run tests in files in parallel */ - fullyParallel: true, - /* Fail the build on CI if you accidentally left test.only in the source code. */ - forbidOnly: !!process.env.CI, - /* Retry on CI only */ - retries: process.env.CI ? 2 : 0, - /* Opt out of parallel tests on CI. */ - workers: process.env.CI ? 1 : undefined, - /* Reporter to use. See https://playwright.dev/docs/test-reporters */ - reporter: 'html', - /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ - use: { - /* Base URL to use in actions like `await page.goto('/')`. */ - baseURL: 'http://localhost:5173', // Assuming Vite's default dev server port - - /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ - trace: 'on-first-retry', - }, - - /* Configure projects for major browsers */ - projects: [ - { - name: 'chromium', - use: { ...devices['Desktop Chrome'] }, - }, - ], - - /* Run your local dev server before starting the tests */ - webServer: { - command: 'npm run dev', - url: 'http://localhost:5173', - reuseExistingServer: !process.env.CI, - }, - }); - ``` - -## Initial Test Cases - -Here are some initial test cases to create in the `e2e/` directory. - -### 1. `e2e/auth.spec.ts` - -* **User Registration:** - 1. Navigate to the registration page. - 2. Fill out the registration form with a unique email. - 3. Submit the form. - 4. Assert that the user is redirected to the login page or a "registration successful" page. - -* **User Login and Logout:** - 1. Create a user via API or use a pre-existing test user. - 2. Navigate to the login page. - 3. Fill in the login form. - 4. Submit the form. - 5. Assert that the user is redirected to the home page or their profile page. - 6. Assert that user-specific elements are visible (e.g., a "logout" button or their name). - 7. Click the logout button. - 8. Assert that the user is logged out and redirected to the home page. - -### 2. `e2e/video.spec.ts` - -* **Submit a Video (for authenticated users):** - 1. Log in as a user with "creator" role. - 2. Navigate to the "Submit Video" page. - 3. Fill in the YouTube URL. - 4. Submit the form. - 5. Assert that a confirmation message is shown and the user is redirected to the video page. - -* **View a Video:** - 1. Navigate to a video details page (either by ID or by clicking a video on the home page). - 2. Assert that the video title, description, and other details are visible. - 3. Assert that the video player is present. - -## Running the Tests - -```bash -npx playwright test -``` - -This command will run all the tests in the `e2e/` directory. - -To see the test report: -```bash -npx playwright show-report -``` \ No newline at end of file diff --git a/public/educational/extracted/comments_denormalization_tooltip.md b/public/educational/extracted/comments_denormalization_tooltip.md new file mode 100644 index 0000000..1a02af7 --- /dev/null +++ b/public/educational/extracted/comments_denormalization_tooltip.md @@ -0,0 +1,35 @@ +# Denormalization: Same Data, Different Partition Key + +Comments are stored in **two tables** — same data, organized for different queries. This is the core Cassandra data modeling pattern. + +## Two Tables, Two Query Patterns + +```cql +-- "Show comments on this video" (partition by video) +CREATE TABLE comments ( + videoid uuid, + commentid timeuuid, + comment text, userid uuid, + PRIMARY KEY (videoid, commentid) +) WITH CLUSTERING ORDER BY (commentid DESC); + +-- "Show all comments by this user" (partition by user) +CREATE TABLE comments_by_user ( + userid uuid, + commentid timeuuid, + comment text, videoid uuid, + PRIMARY KEY (userid, commentid) +) WITH CLUSTERING ORDER BY (commentid DESC); +``` + +**Why duplicate the data?** +- Cassandra has **no JOINs** — you can't query `comments` by `userid` efficiently +- Each table is optimized for exactly one access pattern +- Both use `commentid DESC` for newest-first ordering + +## The Trade-off + +- **Write cost**: Every comment is written twice (one to each table) +- **Read benefit**: Both queries are single-partition reads — the fastest operation Cassandra offers + +**In KillrVideo**: Posting a comment writes to both tables. The video page reads from `comments`, while a user's profile page reads from `comments_by_user`. diff --git a/public/educational/extracted/latest_videos_timeseries_tooltip.md b/public/educational/extracted/latest_videos_timeseries_tooltip.md new file mode 100644 index 0000000..1883f82 --- /dev/null +++ b/public/educational/extracted/latest_videos_timeseries_tooltip.md @@ -0,0 +1,28 @@ +# Time-Series Bucketing for Latest Videos + +Cassandra models "latest videos" as a **time-series table** using date-based partitioning. Each day gets its own partition, and videos within that day are sorted by timestamp. + +## The Data Model + +```cql +CREATE TABLE latest_videos ( + day date, -- Partition key: one partition per day + added_date timestamp, -- Clustering column: sorts within the day + videoid uuid, + name text, + preview_image_location text, + userid uuid, + PRIMARY KEY (day, added_date, videoid) +) WITH CLUSTERING ORDER BY (added_date DESC); +``` + +**Why bucket by day?** +- **Bounded partitions**: Each partition holds one day's videos, preventing unbounded growth +- **Sorted within partition**: `added_date DESC` gives newest-first ordering for free +- **Efficient pagination**: Query today's bucket, then yesterday's, etc. + +## Why Not Just Use the `videos` Table? + +The `videos` table uses `videoid` as partition key — great for single-video lookups but terrible for "give me the latest N videos." A full table scan would be required. The `latest_videos` table is a **denormalized copy** optimized for this specific query pattern. + +**In KillrVideo**: When a video is submitted, it's written to both `videos` (for lookups) and `latest_videos` (for the feed). This dual-write is a classic Cassandra data modeling pattern. diff --git a/public/educational/extracted/login_credential_lookup_tooltip.md b/public/educational/extracted/login_credential_lookup_tooltip.md new file mode 100644 index 0000000..155f7dd --- /dev/null +++ b/public/educational/extracted/login_credential_lookup_tooltip.md @@ -0,0 +1,30 @@ +# Credential Lookup & Login Counters + +Login is a **partition key lookup** on the `user_credentials` table — Cassandra's fastest operation. + +## How Login Works in the DB + +```cql +-- Step 1: Find credentials by email (O(1) partition key lookup) +SELECT * FROM user_credentials WHERE email = 'user@example.com'; + +-- Step 2: On failure, increment the counter +UPDATE login_attempts +SET failed_attempts = failed_attempts + 1 +WHERE email = 'user@example.com'; +``` + +## Why a Separate Counter Table? + +Cassandra requires **counter columns in their own dedicated table** — you cannot mix counters with regular columns. That's why `login_attempts` exists separately from `user_credentials`: + +```cql +CREATE TABLE login_attempts ( + email text PRIMARY KEY, + failed_attempts counter -- Must be in a counter-only table +); +``` + +**Key constraint**: Counter tables can only contain the primary key columns and counter columns. No regular text, boolean, or timestamp columns allowed. + +**In KillrVideo**: Login performs an O(1) lookup by email, verifies the bcrypt hash in the application layer, and tracks failed attempts in a dedicated counter table for account lockout. diff --git a/public/educational/extracted/playback_stats_counters_tooltip.md b/public/educational/extracted/playback_stats_counters_tooltip.md new file mode 100644 index 0000000..c77446c --- /dev/null +++ b/public/educational/extracted/playback_stats_counters_tooltip.md @@ -0,0 +1,32 @@ +# Video Playback Stats: Multi-Counter Table + +Video statistics use a **dedicated counter table** with four independent counters — each atomically incremented across distributed nodes. + +## The Data Model + +```cql +CREATE TABLE video_playback_stats ( + videoid uuid PRIMARY KEY, + views counter, -- Total view count + total_play_time counter, -- Total seconds watched + complete_views counter, -- Full watch-throughs + unique_viewers counter -- Approximate unique viewers +); + +-- Atomic increment (no read-before-write): +UPDATE video_playback_stats +SET views = views + 1, + total_play_time = total_play_time + 245 +WHERE videoid = 550e8400-...; +``` + +## Why a Separate Table? + +Cassandra enforces a strict rule: **counter columns cannot coexist with regular columns**. That's why playback stats live in `video_playback_stats`, not in the `videos` table. + +**Key properties**: +- **Atomic**: Increments never conflict, even across nodes +- **No read-before-write**: Unlike `UPDATE SET views = views + 1` in SQL, Cassandra counters don't require reading the current value first +- **Eventually consistent**: Counter values converge across replicas + +**In KillrVideo**: Each video view atomically increments the `views` counter. The stats cards on the Creator Dashboard aggregate these counters across all of a creator's videos. diff --git a/public/educational/extracted/ratings_data_model_tooltip.md b/public/educational/extracted/ratings_data_model_tooltip.md new file mode 100644 index 0000000..b711b0f --- /dev/null +++ b/public/educational/extracted/ratings_data_model_tooltip.md @@ -0,0 +1,37 @@ +# Ratings Data Model: Counters + Individual Records + +Video ratings use **two tables** — one for fast aggregate counts, another for individual user ratings with upsert semantics. + +## The Two Tables + +```cql +-- Aggregate counters (fast average calculation) +CREATE TABLE video_ratings ( + videoid uuid PRIMARY KEY, + rating_counter counter, -- Number of ratings + rating_total counter -- Sum of all star values +); + +-- Individual ratings (one per user per video) +CREATE TABLE video_ratings_by_user ( + videoid uuid, + userid uuid, + rating int, + rating_date timestamp, + PRIMARY KEY (videoid, userid) +); +``` + +## Composite Primary Key & Upsert + +`video_ratings_by_user` uses a **composite primary key** `(videoid, userid)` — this means each user can only have one rating per video. Re-rating the same video **upserts** (overwrites) automatically: + +```cql +-- First rating or update — same CQL either way: +INSERT INTO video_ratings_by_user (videoid, userid, rating, rating_date) +VALUES (?, ?, 4, '2025-10-31T10:00:00Z'); +``` + +Cassandra's INSERT is always an upsert when the full primary key matches — no need for `INSERT ... ON CONFLICT` like SQL. + +**In KillrVideo**: Submitting a rating writes to both tables. The counter table gives instant averages; the per-user table prevents duplicate votes and lets users see their own rating. diff --git a/public/educational/extracted/registration_dual_table_tooltip.md b/public/educational/extracted/registration_dual_table_tooltip.md new file mode 100644 index 0000000..0121d55 --- /dev/null +++ b/public/educational/extracted/registration_dual_table_tooltip.md @@ -0,0 +1,35 @@ +# Dual-Table Writes for User Registration + +When you register, Cassandra writes to **two separate tables** — because Cassandra has no JOINs, each query pattern needs its own table. + +## The Two Tables + +```cql +-- Table 1: Profile lookups by user ID +CREATE TABLE users ( + userid uuid PRIMARY KEY, + email text, + firstname text, + lastname text, + account_status text +); + +-- Table 2: Credential lookups by email +CREATE TABLE user_credentials ( + email text PRIMARY KEY, -- Partition key is email for login lookups + password text, + userid uuid, + account_locked boolean +); +``` + +**Why separate?** +- **Different partition keys**: `users` is keyed by `userid`, `user_credentials` by `email` +- **Security**: Credentials are isolated from frequently-accessed profile data +- **Performance**: Login checks only read the small credentials row + +## No Multi-Table Transactions + +Cassandra doesn't support cross-table transactions. Both INSERTs happen independently — if one fails, you get an orphaned record. Production systems handle this with idempotent retries or background reconciliation. + +**In KillrVideo**: Registration writes to `users` + `user_credentials` in sequence, and SAI indexes on `users.email` enable flexible lookups without yet another table. diff --git a/public/educational/extracted/trending_video_activity_tooltip.md b/public/educational/extracted/trending_video_activity_tooltip.md new file mode 100644 index 0000000..cadaa98 --- /dev/null +++ b/public/educational/extracted/trending_video_activity_tooltip.md @@ -0,0 +1,27 @@ +# Time-Series Data for Trending + +Trending videos are computed from the `video_activity` table — a time-series that records every view, partitioned by day. + +## The Data Model + +```cql +CREATE TABLE video_activity ( + videoid uuid, + day date, -- Partition key: one partition per day + watch_time timeuuid, -- Clustering column: time-ordered + PRIMARY KEY (day, watch_time) +) WITH CLUSTERING ORDER BY (watch_time DESC); +``` + +**How trending works**: +1. Every video view writes a row to `video_activity` for that day +2. The trending query reads a day's partition and aggregates view counts per video +3. Time period selection (24h, 7 days, 30 days) determines how many day-partitions to scan + +## Why This Design? + +- **Bounded partitions**: Each day's activity is in its own partition, preventing unbounded growth +- **Natural time windowing**: "Past 7 days" = read 7 partitions +- **TimeUUID clustering**: Preserves exact ordering within each day + +**In KillrVideo**: The trending page queries `video_activity` for the selected time window, counts views per video, and ranks them — all from time-bucketed partitions. diff --git a/public/educational/features/sai-indexes.md b/public/educational/features/sai-indexes.md deleted file mode 100644 index 5344597..0000000 --- a/public/educational/features/sai-indexes.md +++ /dev/null @@ -1,124 +0,0 @@ -# Storage-Attached Indexes (SAI) - -Storage-Attached Indexes are a revolutionary feature in Cassandra/Astra DB that eliminates the need for complex denormalized table structures. - -## What Problem Does SAI Solve? - -In Cassandra 3.x, if you wanted to query videos by tag, you needed a separate denormalized table like `videos_by_tag`. This approach required: - -- **Multiple writes** on every insert (videos table + videos_by_tag table) -- **Complex consistency** management across tables -- **More storage** for duplicated data -- **Application logic** to keep tables in sync - -With hundreds of videos and dozens of tags, this could mean managing multiple denormalized tables just to support different query patterns. - -## How SAI Works - -SAI attaches indexes directly to the storage layer, enabling efficient filtering without denormalization. You create a single index: - -```cql -CREATE INDEX videos_tags_idx ON videos(tags) -USING 'StorageAttachedIndex'; -``` - -Now you can query the primary table directly: - -```cql -SELECT * FROM videos -WHERE tags CONTAINS 'cassandra' -LIMIT 10; -``` - -**No separate table needed!** SAI handles the indexing automatically. - -## SAI in KillrVideo - -The KillrVideo `videos` table uses multiple SAI indexes for flexible querying: - -- **`videos_tags_idx`** - Find videos by tag (e.g., `WHERE tags CONTAINS 'tutorial'`) -- **`videos_name_idx`** - Text search on video titles -- **`videos_userid_idx`** - All videos by a specific user (replaces `user_videos` table) -- **`videos_added_date_idx`** - Query by date range (supports `latest_videos` queries) -- **`videos_category_idx`** - Filter by category (music, education, gaming, etc.) -- **`videos_content_rating_idx`** - Filter by rating (G, PG, PG-13, R) -- **`videos_language_idx`** - Filter by language -- **`videos_content_features_idx`** - **Vector similarity search** (COSINE distance) - -### Multiple Index Queries - -SAI allows combining multiple filters efficiently: - -```cql -SELECT * FROM videos -WHERE category = 'education' - AND content_rating = 'G' - AND tags CONTAINS 'cassandra' -LIMIT 20; -``` - -This query uses three SAI indexes simultaneously without requiring a denormalized table for this specific combination. - -## Migration Impact - -Migrating from Cassandra 3.x to Astra DB with SAI: - -**Before (3.x):** -- 1 primary table (`videos`) -- 5+ denormalized tables (`videos_by_tag`, `videos_by_user`, `videos_by_category`, etc.) -- Complex application code for multi-table writes - -**After (Astra with SAI):** -- 1 primary table (`videos`) -- 8 SAI indexes -- Simple application code - single table writes - -**Result:** 5x fewer tables, simpler architecture, easier maintenance. - -## Performance Characteristics - -- **Write performance**: Slightly higher latency due to index maintenance, but eliminates multi-table writes -- **Read performance**: Optimized for filtered queries, comparable to denormalized tables -- **Storage**: Indexes add overhead, but less than full denormalization -- **Flexibility**: Add new indexes without schema changes to application code - -## Learn More - -- [Astra DB SAI Documentation](https://docs.datastax.com/en/astra-serverless/docs/develop/dev-with-sai.html) -- [SAI Query Examples](https://docs.datastax.com/en/astra-serverless/docs/develop/dev-with-sai.html#_query_examples) -- [Migration Guide: 3.x to 5.0](../../../../../../../killrvideo-data/migrating/Migrating%20from%203.x%20to%205.0.md) - ---- - -## Tag Autocomplete {#autocomplete} - -The tag autocomplete feature uses SAI's collection indexing to provide instant suggestions as you type. - -### How It Works - -When you start typing a tag, the application queries: - -```cql -SELECT DISTINCT tag FROM tags -WHERE tag >= 'cas' - AND tag < 'cat' -LIMIT 10; -``` - -The `tags` table has an SAI index on the `tag` column, enabling fast prefix matching without scanning the entire table. - -### Why It's Fast - -- **SAI indexes are sorted** - Prefix queries are efficient (like a B-tree) -- **No full table scan** - Only relevant partitions are accessed -- **Low latency** - Typically < 10ms for autocomplete queries -- **Scalable** - Performance doesn't degrade with millions of tags - -### Alternative Approaches - -Before SAI, you might have used: -- **Client-side filtering** - Load all tags into memory (doesn't scale) -- **Separate search service** - Elasticsearch/Solr (adds complexity) -- **Prefix partitioning** - Denormalized table by first letter (limited flexibility) - -SAI provides the best of all worlds: fast, scalable, simple. diff --git a/public/educational/features/search.md b/public/educational/features/search.md deleted file mode 100644 index c2b65ac..0000000 --- a/public/educational/features/search.md +++ /dev/null @@ -1,267 +0,0 @@ -# Search in KillrVideo - -KillrVideo implements two complementary search approaches: **keyword search with SAI indexes** and **semantic search with vector embeddings**. - -## Search Architecture - -### 1. Keyword Search (SAI-based) - -Keyword search uses Storage-Attached Indexes to filter videos by exact matches on: -- **Video titles** (`videos_name_idx`) -- **Tags** (`videos_tags_idx`) -- **Categories** (`videos_category_idx`) -- **Languages** (`videos_language_idx`) - -#### How It Works - -```cql -SELECT * FROM videos -WHERE name LIKE '%cassandra%' - AND tags CONTAINS 'tutorial' - AND category = 'education' -LIMIT 20; -``` - -**Performance characteristics:** -- **Fast**: SAI indexes enable sub-10ms queries -- **Exact matches**: Finds videos with specific keywords -- **Combinable**: Multiple filters work together -- **Scalable**: Handles millions of videos efficiently - -**Best for:** -- Precise searches: "Find videos tagged 'cassandra'" -- Category browsing: "Show all music videos" -- Filtered queries: "Educational videos in Spanish" - -### 2. Semantic Search (Vector-based) - -Semantic search uses vector embeddings to find videos by meaning, not just keywords. - -#### How It Works - -1. **Convert query to vector** - ```json - { - "$vectorize": "explain how distributed databases work" - } - ``` - -2. **Find similar content** - ```cql - SELECT videoid, title, thumbnailUrl, - similarity_cosine(content_features, ?) AS score - FROM videos - WHERE content_features ANN OF ? - ORDER BY score DESC - LIMIT 20; - ``` - -3. **Return ranked results** - - Videos ranked by semantic similarity - - No exact keyword match required - - Understands synonyms and related concepts - -**Performance characteristics:** -- **Moderate speed**: 20-50ms typical query time -- **Approximate results**: ANN returns ~95% accurate matches -- **Context-aware**: Understands query intent -- **Smart**: Finds related content even with different wording - -**Best for:** -- Natural language queries: "how do NoSQL databases handle failures" -- Concept-based search: "distributed system tutorials" -- Discovery: "videos similar to this one" - -## Search Ranking {#ranking} - -Search results are ranked differently depending on the search type: - -### Keyword Search Ranking - -Keyword searches use **boolean matching** - results either match or don't: - -1. **Exact matches** ranked first -2. **Multiple criteria matches** ranked higher -3. **Recent videos** can be boosted (via `ORDER BY added_date DESC`) - -Example query with ranking: -```cql -SELECT * FROM videos -WHERE tags CONTAINS 'cassandra' -ORDER BY added_date DESC -LIMIT 20; -``` - -Results are: -- All tagged with 'cassandra' -- Sorted by newest first -- No relevance score - -### Semantic Search Ranking - -Semantic searches use **similarity scores** (0 to 1): - -- **0.9-1.0**: Nearly identical content -- **0.7-0.9**: Highly relevant, similar topic -- **0.5-0.7**: Related but different angle -- **0.3-0.5**: Loosely related -- **< 0.3**: Not very relevant - -Example query with scoring: -```cql -SELECT videoid, title, - similarity_cosine(content_features, ?) AS relevance -FROM videos -WHERE content_features ANN OF ? -ORDER BY relevance DESC -LIMIT 20; -``` - -Results include: -- **Relevance score** for each result -- **Ranked by similarity** to query -- **Top matches** most semantically similar - -## Hybrid Search (Future Enhancement) - -Combine keyword and semantic search for best results: - -1. **Stage 1: Keyword filter** - ```cql - WHERE category = 'education' - AND tags CONTAINS 'database' - ``` - -2. **Stage 2: Semantic ranking** - ```cql - ORDER BY similarity_cosine(content_features, ?) DESC - ``` - -Result: Videos matching keywords, ranked by semantic relevance. - -## Search Performance Optimization - -### SAI Index Configuration - -```cql -CREATE CUSTOM INDEX videos_name_idx -ON videos(name) -USING 'StorageAttachedIndex' -WITH OPTIONS = { - 'case_sensitive': 'false', - 'normalize': 'true', - 'ascii': 'true' -}; -``` - -**Options explained:** -- `case_sensitive: false` - "Cassandra" matches "cassandra" -- `normalize: true` - "café" matches "cafe" -- `ascii: true` - Converts accented characters to ASCII - -### Vector Index Configuration - -```cql -CREATE CUSTOM INDEX videos_features_idx -ON videos(content_features) -USING 'StorageAttachedIndex' -WITH OPTIONS = { - 'similarity_function': 'COSINE' -}; -``` - -**Similarity functions:** -- **COSINE** - Best for text embeddings (default) -- **EUCLIDEAN** - Best for spatial data -- **DOT_PRODUCT** - Best for pre-normalized vectors - -## Query Patterns - -### Pattern 1: Simple Tag Search - -```javascript -const { data } = useSearchVideos({ - query: 'cassandra', - page: 1, - pageSize: 20 -}); -``` - -Backend query: -```cql -SELECT * FROM videos -WHERE tags CONTAINS 'cassandra' -ORDER BY added_date DESC -LIMIT 20; -``` - -### Pattern 2: Multi-Criteria Filter - -```javascript -const { data } = useSearchVideos({ - query: 'tutorial', - category: 'education', - contentRating: 'G', - page: 1, - pageSize: 20 -}); -``` - -Backend query: -```cql -SELECT * FROM videos -WHERE tags CONTAINS 'tutorial' - AND category = 'education' - AND content_rating = 'G' -ORDER BY added_date DESC -LIMIT 20; -``` - -### Pattern 3: Semantic Search - -```javascript -const { data } = searchVideosBySemantic({ - query: 'how do distributed databases work', - limit: 20 -}); -``` - -Backend query: -```json -{ - "find": { - "sort": { - "$vectorize": "how do distributed databases work" - }, - "options": { - "limit": 20 - } - } -} -``` - -## Search Analytics - -KillrVideo tracks search performance metrics: - -- **Query latency**: P50, P95, P99 response times -- **Result quality**: Click-through rates on results -- **Search volume**: Queries per second -- **Failed searches**: Zero-result queries - -These metrics help optimize: -- Index configuration -- Query patterns -- Embedding model selection -- Caching strategies - -## Learn More - -- [SAI Indexes](./sai-indexes.md) -- [Vector Search](./vector-search.md) -- [Astra DB Search Documentation](https://docs.datastax.com/en/astra-db-serverless/docs/develop/dev-with-sai.html) -- [Vector Search Best Practices](https://docs.datastax.com/en/astra-db-serverless/databases/vector-search.html) - ---- - -**Pro Tip**: Start with keyword search for precise queries, then add semantic search for discovery and exploration. The combination provides the best user experience. diff --git a/public/educational/features/vector-search.md b/public/educational/features/vector-search.md deleted file mode 100644 index 28f2489..0000000 --- a/public/educational/features/vector-search.md +++ /dev/null @@ -1,228 +0,0 @@ -# Vector Search & AI-Powered Recommendations - -Vector search is a cutting-edge feature in Astra DB that enables AI-powered content recommendations using machine learning embeddings. - -## What is Vector Search? - -Vector search represents data as high-dimensional vectors (arrays of numbers) that capture semantic meaning. Similar content has similar vectors, enabling "find content like this" queries without exact keyword matching. - -### Example - -Traditional keyword search: "cassandra database tutorial" -- Matches: Videos with exact words "cassandra", "database", "tutorial" -- Misses: "NoSQL distributed data store guide" (same meaning, different words) - -Vector search with embeddings: -- Converts text to a 4096-dimension vector representing semantic meaning -- Finds videos with similar meaning regardless of exact words used -- Returns "NoSQL distributed data store guide" as highly relevant - -## Vector Types in KillrVideo - -KillrVideo uses several vector columns for different AI features: - -### 1. Video Content Features (`videos.content_features`) - -```cql -content_features vector -``` - -This vector represents the **semantic content** of the video, generated from: -- Video title -- Description -- Tags - -**How it's created:** -- Text is concatenated: `{title} {description} {tags}` -- Clipped to 512 tokens (NVIDIA embedding model limit) -- Automatically embedded using NVIDIA NV-Embed-QA model -- Stored as 4096-dimension vector - -**What it enables:** -- "Find videos similar to this one" -- Content-based recommendations -- Semantic search: "explain distributed databases" → finds relevant videos - -### 2. User Preference Vectors (`user_preferences.preference_vector`) - -```cql -preference_vector vector -``` - -Represents a user's viewing preferences based on: -- Watch history -- Liked videos -- Engagement patterns - -**What it enables:** -- Personalized "For You" feed -- User-to-user similarity -- Collaborative filtering recommendations - -### 3. Tag Vectors (`tags.tag_vector`) - -```cql -tag_vector vector -``` - -Represents semantic meaning of tags for discovering related topics. - -**What it enables:** -- "Related tags" suggestions -- Topic clustering -- Semantic tag search - -## Similarity Functions - -Astra DB provides three similarity functions for vector search: - -### 1. Cosine Similarity (Most Common) - -```cql -SELECT * FROM videos -ORDER BY content_features ANN OF [0.1, 0.5, ...] -LIMIT 10; -``` - -- Measures **angle** between vectors (direction, not magnitude) -- Range: -1 to 1 (higher is more similar) -- Best for: Text embeddings, normalized vectors -- Used in: Video recommendations, semantic search - -### 2. Euclidean Distance - -```cql -SELECT *, similarity_euclidean(content_features, [0.1, 0.5, ...]) AS sim -FROM videos -ORDER BY sim DESC -LIMIT 10; -``` - -- Measures **straight-line distance** between vectors -- Range: 0 to ∞ (lower is more similar) -- Best for: Spatial data, when magnitude matters -- Used in: Geographic proximity, feature matching - -### 3. Dot Product - -```cql -SELECT *, similarity_dot_product(content_features, [0.1, 0.5, ...]) AS sim -FROM videos -ORDER BY sim DESC -LIMIT 10; -``` - -- Measures **alignment and magnitude** of vectors -- Range: -∞ to ∞ (higher is more similar) -- Best for: Recommendation scores, weighted features -- Used in: Scoring algorithms, ranking systems - -## Vector Search in Action {#recommendations} - -When you view a video, KillrVideo shows "Related Videos" using vector similarity: - -1. **Get current video's feature vector** - ```cql - SELECT content_features FROM videos WHERE videoid = ? - ``` - -2. **Find similar videos** - ```cql - SELECT videoid, title, thumbnailUrl, - similarity_cosine(content_features, ?) AS similarity - FROM videos - WHERE content_features ANN OF ? - LIMIT 5; - ``` - -3. **Results ranked by similarity** - - Similarity score: 0.95 → Nearly identical content - - Similarity score: 0.75 → Related but different angle - - Similarity score: 0.50 → Loosely related - -## NVIDIA Embedding Integration - -KillrVideo uses the **NVIDIA NV-Embed-QA** model through Astra DB's `$vectorize` feature: - -### Automatic Vectorization - -```json -{ - "insertOne": { - "document": { - "name": "Cassandra Basics", - "description": "Learn distributed database fundamentals", - "$vectorize": "Cassandra Basics. Learn distributed database fundamentals. cassandra, database, tutorial" - } - } -} -``` - -The `$vectorize` field automatically: -1. Sends text to NVIDIA embedding API -2. Generates 4096-dimension vector -3. Stores in `content_features` column -4. Enables similarity queries - -### Token Limit (512 tokens) - -NVIDIA embeddings have a 512-token limit. KillrVideo enforces this with `clip_to_512_tokens()` function: - -```python -def clip_to_512_tokens(text: str) -> str: - """Clip text to ~512 tokens (rough: 1 token ≈ 4 chars)""" - max_chars = 512 * 4 # 2048 characters - return text[:max_chars] -``` - -**Why this matters:** -- Embedding models have input limits -- Exceeding limits causes API errors -- Clipping ensures reliable vectorization -- First 2048 characters usually contain key information - -## Performance Considerations - -### Approximate Nearest Neighbor (ANN) - -Vector searches use ANN algorithms, not exact matching: - -- **Trade-off**: Speed vs. accuracy -- **ANN**: Fast, returns ~95% accurate results -- **Exact**: Slow, returns 100% accurate results -- **KillrVideo choice**: ANN for real-time recommendations - -### Index Types - -```cql -CREATE CUSTOM INDEX videos_features_idx -ON videos(content_features) -USING 'StorageAttachedIndex'; -``` - -SAI indexes optimize vector searches with: -- **HNSW** (Hierarchical Navigable Small World) graph structure -- **Sub-linear search time**: O(log N) instead of O(N) -- **Memory efficient**: Not all vectors loaded at once - -## Use Cases Beyond KillrVideo - -Vector search enables many AI applications: - -- **Semantic search**: Find documents by meaning, not keywords -- **Image similarity**: Find visually similar images -- **Product recommendations**: "Customers who liked X also liked Y" -- **Anomaly detection**: Find unusual patterns in data -- **Question answering**: Match questions to relevant answers -- **Duplicate detection**: Find near-duplicate content - -## Learn More - -- [Astra DB Vector Search](https://docs.datastax.com/en/astra-db-serverless/databases/vector-search.html) -- [NVIDIA NV-Embed Model](https://build.nvidia.com/nvidia/nv-embed-qa) -- [Vector Search Best Practices](https://docs.datastax.com/en/astra-db-serverless/api-reference/client-libraries.html#vector-search) -- [KillrVideo Vector Implementation](../../../../../../../kv-be-python-fastapi-dataapi-table/docs/vector_search.md) - ---- - -**Fun Fact**: A 4096-dimension vector requires 16,384 bytes (16KB) of storage. KillrVideo's videos table with 10,000 videos would use ~160MB just for embeddings. SAI indexes make this efficient by using compressed storage and smart caching. diff --git a/public/educational/tooltips-manifest.json b/public/educational/tooltips-manifest.json index db2d4f9..dedcdf6 100644 --- a/public/educational/tooltips-manifest.json +++ b/public/educational/tooltips-manifest.json @@ -118,6 +118,97 @@ "component": "CommentsSection", "priority": "medium", "category": "database-concepts" + }, + { + "id": "latest-videos-timeseries", + "title": "Time-Series Bucketing for Latest Videos", + "contentFile": "extracted/latest_videos_timeseries_tooltip.md", + "explainerSource": { + "file": "docs/explainer/video_catalog/GET_videos_latest.md", + "lines": null, + "note": "Explainer pending - to be written" + }, + "component": "FeaturedVideos", + "priority": "high", + "category": "data-modeling" + }, + { + "id": "registration-dual-table", + "title": "Dual-Table Writes for Registration", + "contentFile": "extracted/registration_dual_table_tooltip.md", + "explainerSource": { + "file": "docs/explainer/account_management/POST_users_register.md", + "lines": [59, 69], + "sections": ["why-two-tables"] + }, + "component": "Auth", + "priority": "high", + "category": "data-modeling" + }, + { + "id": "login-credential-lookup", + "title": "Credential Lookup & Login Counters", + "contentFile": "extracted/login_credential_lookup_tooltip.md", + "explainerSource": { + "file": "docs/explainer/account_management/POST_users_login.md", + "lines": null, + "sections": ["credential-lookup"] + }, + "component": "Auth", + "priority": "medium", + "category": "data-modeling" + }, + { + "id": "comments-denormalization", + "title": "Denormalization: Same Data, Different Key", + "contentFile": "extracted/comments_denormalization_tooltip.md", + "explainerSource": { + "file": "docs/explainer/comments_ratings/POST_videos_comments.md", + "lines": null, + "note": "Explainer pending - to be written" + }, + "component": "CommentsSection", + "priority": "high", + "category": "data-modeling" + }, + { + "id": "trending-video-activity", + "title": "Time-Series Data for Trending", + "contentFile": "extracted/trending_video_activity_tooltip.md", + "explainerSource": { + "file": "docs/explainer/video_catalog/GET_videos_trending.md", + "lines": null, + "note": "Explainer pending - to be written" + }, + "component": "Trending", + "priority": "high", + "category": "data-modeling" + }, + { + "id": "playback-stats-counters", + "title": "Video Playback Stats: Multi-Counter Table", + "contentFile": "extracted/playback_stats_counters_tooltip.md", + "explainerSource": { + "file": "docs/explainer/video_catalog/POST_video_view.md", + "lines": null, + "note": "Explainer pending - to be written" + }, + "component": "Creator", + "priority": "medium", + "category": "counters" + }, + { + "id": "ratings-data-model", + "title": "Ratings: Counters + Individual Records", + "contentFile": "extracted/ratings_data_model_tooltip.md", + "explainerSource": { + "file": "docs/explainer/comments_ratings/POST_videos_ratings.md", + "lines": null, + "note": "Explainer pending - to be written" + }, + "component": "StarRating", + "priority": "medium", + "category": "data-modeling" } ] } diff --git a/src/App.css b/src/App.css deleted file mode 100644 index b9d355d..0000000 --- a/src/App.css +++ /dev/null @@ -1,42 +0,0 @@ -#root { - max-width: 1280px; - margin: 0 auto; - padding: 2rem; - text-align: center; -} - -.logo { - height: 6em; - padding: 1.5em; - will-change: filter; - transition: filter 300ms; -} -.logo:hover { - filter: drop-shadow(0 0 2em #646cffaa); -} -.logo.react:hover { - filter: drop-shadow(0 0 2em #61dafbaa); -} - -@keyframes logo-spin { - from { - transform: rotate(0deg); - } - to { - transform: rotate(360deg); - } -} - -@media (prefers-reduced-motion: no-preference) { - a:nth-of-type(2) .logo { - animation: logo-spin infinite 20s linear; - } -} - -.card { - padding: 2em; -} - -.read-the-docs { - color: #888; -} diff --git a/src/App.tsx b/src/App.tsx index 07c2192..3234d1a 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -7,7 +7,7 @@ import { BrowserRouter, Routes, Route } from "react-router-dom"; import { AuthProvider } from "@/hooks/useAuth"; import { WelcomeModal } from "@/components/educational/WelcomeModal"; import { ErrorBoundary } from "@/components/ErrorBoundary"; -import Index from "./pages/Index"; +import Home from "./pages/Home"; import Auth from "./pages/Auth"; import NotFound from "./pages/NotFound"; @@ -16,6 +16,7 @@ const Creator = lazy(() => import("./pages/Creator")); const Trending = lazy(() => import("./pages/Trending")); const Profile = lazy(() => import("./pages/Profile")); const SearchResults = lazy(() => import("./pages/SearchResults")); +const ExploreVideos = lazy(() => import("./pages/ExploreVideos")); const Moderation = lazy(() => import("./pages/Moderation")); const FlagDetail = lazy(() => import("./pages/FlagDetail")); const UserManagement = lazy(() => import("./pages/UserManagement")); @@ -45,11 +46,12 @@ const App = () => ( - } /> + } /> } /> } /> } /> } /> + } /> } /> } /> } /> diff --git a/src/components/comments/CommentsSection.tsx b/src/components/comments/CommentsSection.tsx index 859f13b..7f07472 100644 --- a/src/components/comments/CommentsSection.tsx +++ b/src/components/comments/CommentsSection.tsx @@ -62,11 +62,16 @@ const CommentsSection = ({ videoId }: CommentsSectionProps) => { return (
{/* Comments Section Header */} - -

- Comments -

-
+
+ +

+ Comments +

+
+ + Denormalized Tables + +
{/* Comment form */} {isAuthenticated && ( diff --git a/src/components/home/FeaturedVideos.tsx b/src/components/home/FeaturedVideos.tsx index 55ccdfe..754cb14 100644 --- a/src/components/home/FeaturedVideos.tsx +++ b/src/components/home/FeaturedVideos.tsx @@ -2,6 +2,7 @@ import { useMemo } from 'react'; import VideoCard from '@/components/video/VideoCard'; import { useLatestVideos, useUserNames } from '@/hooks/useApi'; import { VideoSummary } from '@/types/api'; +import { EducationalTooltip } from '@/components/educational/EducationalTooltip'; const PLACEHOLDER_THUMB = 'https://via.placeholder.com/400x225'; const EMPTY_TAGS: string[] = []; @@ -70,9 +71,11 @@ const FeaturedVideos = () => {
-

- Featured Videos -

+ +

+ Featured Videos +

+

Discover the latest and most popular content from our community of developers and creators

diff --git a/src/components/home/HeroSection.tsx b/src/components/home/HeroSection.tsx index 35032ae..8472fb3 100644 --- a/src/components/home/HeroSection.tsx +++ b/src/components/home/HeroSection.tsx @@ -1,4 +1,5 @@ +import { Link } from 'react-router-dom'; import { Button } from '@/components/ui/button'; import SearchBar from '@/components/search/SearchBar'; @@ -28,19 +29,23 @@ const HeroSection = () => {
- -
diff --git a/src/components/layout/Header.tsx b/src/components/layout/Header.tsx index ffa44a9..6f0eae4 100644 --- a/src/components/layout/Header.tsx +++ b/src/components/layout/Header.tsx @@ -45,9 +45,7 @@ const Header = () => {
-
- KV -
+ KillrVideo KillrVideo @@ -58,6 +56,9 @@ const Header = () => { Trending + + Explore + {isAuthenticated && ( Creator Studio diff --git a/src/components/ui/accordion.tsx b/src/components/ui/accordion.tsx deleted file mode 100644 index e6a723d..0000000 --- a/src/components/ui/accordion.tsx +++ /dev/null @@ -1,56 +0,0 @@ -import * as React from "react" -import * as AccordionPrimitive from "@radix-ui/react-accordion" -import { ChevronDown } from "lucide-react" - -import { cn } from "@/lib/utils" - -const Accordion = AccordionPrimitive.Root - -const AccordionItem = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - -)) -AccordionItem.displayName = "AccordionItem" - -const AccordionTrigger = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, children, ...props }, ref) => ( - - svg]:rotate-180", - className - )} - {...props} - > - {children} - - - -)) -AccordionTrigger.displayName = AccordionPrimitive.Trigger.displayName - -const AccordionContent = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, children, ...props }, ref) => ( - -
{children}
-
-)) - -AccordionContent.displayName = AccordionPrimitive.Content.displayName - -export { Accordion, AccordionItem, AccordionTrigger, AccordionContent } diff --git a/src/components/ui/alert-dialog.tsx b/src/components/ui/alert-dialog.tsx deleted file mode 100644 index 8722561..0000000 --- a/src/components/ui/alert-dialog.tsx +++ /dev/null @@ -1,139 +0,0 @@ -import * as React from "react" -import * as AlertDialogPrimitive from "@radix-ui/react-alert-dialog" - -import { cn } from "@/lib/utils" -import { buttonVariants } from "@/components/ui/button" - -const AlertDialog = AlertDialogPrimitive.Root - -const AlertDialogTrigger = AlertDialogPrimitive.Trigger - -const AlertDialogPortal = AlertDialogPrimitive.Portal - -const AlertDialogOverlay = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - -)) -AlertDialogOverlay.displayName = AlertDialogPrimitive.Overlay.displayName - -const AlertDialogContent = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - - - - -)) -AlertDialogContent.displayName = AlertDialogPrimitive.Content.displayName - -const AlertDialogHeader = ({ - className, - ...props -}: React.HTMLAttributes) => ( -
-) -AlertDialogHeader.displayName = "AlertDialogHeader" - -const AlertDialogFooter = ({ - className, - ...props -}: React.HTMLAttributes) => ( -
-) -AlertDialogFooter.displayName = "AlertDialogFooter" - -const AlertDialogTitle = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - -)) -AlertDialogTitle.displayName = AlertDialogPrimitive.Title.displayName - -const AlertDialogDescription = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - -)) -AlertDialogDescription.displayName = - AlertDialogPrimitive.Description.displayName - -const AlertDialogAction = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - -)) -AlertDialogAction.displayName = AlertDialogPrimitive.Action.displayName - -const AlertDialogCancel = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - -)) -AlertDialogCancel.displayName = AlertDialogPrimitive.Cancel.displayName - -export { - AlertDialog, - AlertDialogPortal, - AlertDialogOverlay, - AlertDialogTrigger, - AlertDialogContent, - AlertDialogHeader, - AlertDialogFooter, - AlertDialogTitle, - AlertDialogDescription, - AlertDialogAction, - AlertDialogCancel, -} diff --git a/src/components/ui/alert.tsx b/src/components/ui/alert.tsx deleted file mode 100644 index 41fa7e0..0000000 --- a/src/components/ui/alert.tsx +++ /dev/null @@ -1,59 +0,0 @@ -import * as React from "react" -import { cva, type VariantProps } from "class-variance-authority" - -import { cn } from "@/lib/utils" - -const alertVariants = cva( - "relative w-full rounded-lg border p-4 [&>svg~*]:pl-7 [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-foreground", - { - variants: { - variant: { - default: "bg-background text-foreground", - destructive: - "border-destructive/50 text-destructive dark:border-destructive [&>svg]:text-destructive", - }, - }, - defaultVariants: { - variant: "default", - }, - } -) - -const Alert = React.forwardRef< - HTMLDivElement, - React.HTMLAttributes & VariantProps ->(({ className, variant, ...props }, ref) => ( -
-)) -Alert.displayName = "Alert" - -const AlertTitle = React.forwardRef< - HTMLParagraphElement, - React.HTMLAttributes ->(({ className, ...props }, ref) => ( -
-)) -AlertTitle.displayName = "AlertTitle" - -const AlertDescription = React.forwardRef< - HTMLParagraphElement, - React.HTMLAttributes ->(({ className, ...props }, ref) => ( -
-)) -AlertDescription.displayName = "AlertDescription" - -export { Alert, AlertTitle, AlertDescription } diff --git a/src/components/ui/aspect-ratio.tsx b/src/components/ui/aspect-ratio.tsx deleted file mode 100644 index c4abbf3..0000000 --- a/src/components/ui/aspect-ratio.tsx +++ /dev/null @@ -1,5 +0,0 @@ -import * as AspectRatioPrimitive from "@radix-ui/react-aspect-ratio" - -const AspectRatio = AspectRatioPrimitive.Root - -export { AspectRatio } diff --git a/src/components/ui/breadcrumb.tsx b/src/components/ui/breadcrumb.tsx deleted file mode 100644 index 71a5c32..0000000 --- a/src/components/ui/breadcrumb.tsx +++ /dev/null @@ -1,115 +0,0 @@ -import * as React from "react" -import { Slot } from "@radix-ui/react-slot" -import { ChevronRight, MoreHorizontal } from "lucide-react" - -import { cn } from "@/lib/utils" - -const Breadcrumb = React.forwardRef< - HTMLElement, - React.ComponentPropsWithoutRef<"nav"> & { - separator?: React.ReactNode - } ->(({ ...props }, ref) =>