diff --git a/docs/guides/integration-advertiser-dataprovider-endpoints.md b/docs/guides/integration-advertiser-dataprovider-endpoints.md index c834ea151..5faeb8e18 100644 --- a/docs/guides/integration-advertiser-dataprovider-endpoints.md +++ b/docs/guides/integration-advertiser-dataprovider-endpoints.md @@ -8,6 +8,7 @@ displayed_sidebar: sidebarAdvertisers --- import Link from '@docusaurus/Link'; +import IntegrationExampleIdentityMap from '../snippets/_integration-example-identity-map.mdx'; # Advertiser/Data Provider Integration to HTTP Endpoints @@ -32,6 +33,10 @@ You'll need to set up these values, in the UID2 Portal on the [API Keys](../port It's very important that you keep these values secure. For details, see [Security of API Key and Client Secret](../getting-started/gs-credentials.md#security-of-api-key-and-client-secret). ::: +## Integration Example + + + ## High-Level Steps At a high level, the steps for advertisers and data providers integrating with UID2 are as follows: diff --git a/docs/guides/integration-advertiser-dataprovider-overview.md b/docs/guides/integration-advertiser-dataprovider-overview.md index ad77d0850..808fb8cf9 100644 --- a/docs/guides/integration-advertiser-dataprovider-overview.md +++ b/docs/guides/integration-advertiser-dataprovider-overview.md @@ -8,6 +8,7 @@ displayed_sidebar: sidebarAdvertisers --- import Link from '@docusaurus/Link'; +import IntegrationExampleIdentityMap from '../snippets/_integration-example-identity-map.mdx'; # Advertiser/Data Provider Integration Overview @@ -27,6 +28,10 @@ There are other ways that you can use UID2, outside these use cases. These are j | Send in conversions | Send raw UID2s as conversion information | Use conversion information for measurement (attribution) or for retargeting via API or pixels. | | Receive graph data | Receive raw UID2s from graph/data providers via API or pixels | Build graph data. | +## Integration Example + + + ## High-Level Steps At a high level, the steps for advertisers and data providers integrating with UID2 are as follows: @@ -191,7 +196,7 @@ For instructions for monitoring for salt bucket rotations, refer to one of the f - Python SDK: [Monitor Rotated Salt Buckets](../sdks/sdk-ref-python.md#monitor-rotated-salt-buckets). -- Snowflake: [Monitor for Salt Bucket Rotation and Regenerate Raw UID2s](integration-snowflake.md#monitor-for-salt-bucket-rotation-and-regenerate-raw-uid2s). +- Snowflake: [Monitor for Salt Bucket Rotation and Regenerate Raw UID2s](integration-snowflake-before-july-2025.md#monitor-for-salt-bucket-rotation-and-regenerate-raw-uid2s). - HTTP endpoints: [Monitor for Salt Bucket Rotations for Your Stored Raw UID2s (v2)](integration-advertiser-dataprovider-endpoints.md#monitor-for-salt-bucket-rotations-for-your-stored-raw-uid2s-v2). diff --git a/docs/ref-info/deprecation-schedule.md b/docs/ref-info/deprecation-schedule.md index a16cf7c2b..4b70b92ec 100644 --- a/docs/ref-info/deprecation-schedule.md +++ b/docs/ref-info/deprecation-schedule.md @@ -41,7 +41,7 @@ The latest ZIP file is available in the Assets section at the bottom of the link | Q3 2024 | [v5.38.104](https://github.com/IABTechLab/uid2-operator/releases/tag/v5.38.104) | 5.38.104 | September 12, 2024 | Mar 31, 2026 | | Q2 2024 | [v5.37.12](https://github.com/IABTechLab/uid2-operator/releases/tag/v5.37.12) | 5.37.12 | June 12, 2024 | Sep 30, 2025 | -For documentation, see [UID2 Private Operator for AWS Integration Guide](..\guides\operator-guide-aws-marketplace.md). +For documentation, see [UID2 Private Operator for AWS Integration Guide](../guides/operator-guide-aws-marketplace.md). ### Private Operator for GCP @@ -55,7 +55,7 @@ The latest ZIP file is linked in the GCP Download column in the following table. | Q3 2024 | [v5.38.104](https://github.com/IABTechLab/uid2-operator/releases/tag/v5.38.104) | [gcp-oidc-deployment-files-5.38.104.zip](https://github.com/IABTechLab/uid2-operator/releases/download/v5.38.104/gcp-oidc-deployment-files-5.38.104.zip) | September 12, 2024 | Mar 31, 2026 | | Q2 2024 | [v5.37.12](https://github.com/IABTechLab/uid2-operator/releases/tag/v5.37.12) | [gcp-oidc-deployment-files-5.37.12.zip](https://github.com/IABTechLab/uid2-operator/releases/download/v5.37.12/gcp-oidc-deployment-files-5.37.12.zip) | June 12, 2024 | Sep 30, 2025 | -For documentation, see [UID2 Private Operator for GCP Integration Guide](..\guides\operator-private-gcp-confidential-space.md). +For documentation, see [UID2 Private Operator for GCP Integration Guide](../guides/operator-private-gcp-confidential-space.md). ### Private Operator for Azure @@ -69,7 +69,7 @@ The latest ZIP file is linked in the Azure Download column in the following tabl | Q3 2024 | [v5.38.104](https://github.com/IABTechLab/uid2-operator/releases/tag/v5.38.104) | [azure-cc-deployment-files-5.38.104.zip](https://github.com/IABTechLab/uid2-operator/releases/download/v5.38.104/azure-cc-deployment-files-5.38.104.zip) | September 12, 2024 | Mar 31, 2026 | | Q2 2024 | [v5.37.12](https://github.com/IABTechLab/uid2-operator/releases/tag/v5.37.12) | [azure-cc-deployment-files-5.37.12.zip](https://github.com/IABTechLab/uid2-operator/releases/download/v5.37.12/azure-cc-deployment-files-5.37.12.zip) | June 12, 2024 | Sep 30, 2025 | -For documentation, see [UID2 Private Operator for Azure Integration Guide](..\guides\operator-guide-azure-enclave.md). +For documentation, see [UID2 Private Operator for Azure Integration Guide](../guides/operator-guide-azure-enclave.md). + +import Link from '@docusaurus/Link'; + +For a complete demonstration of a working integration that includes all the recommended patterns, see the [UID2 Identity Map v3 Integration Example](https://github.com/IABTechLab/uid2docs/blob/main/static/examples/identity-map-integration-example). + +The sample uses the Python SDK, but the integration patterns are applicable to any SDK or direct API integration. + +For step-by-step setup instructions and to run the example, see the README.md file: [UID2 Integration Technical Sample](https://github.com/IABTechLab/uid2docs/blob/main/static/examples/identity-map-integration-example/README.md). diff --git a/i18n/ja/docusaurus-plugin-content-docs/current/guides/integration-advertiser-dataprovider-overview.md b/i18n/ja/docusaurus-plugin-content-docs/current/guides/integration-advertiser-dataprovider-overview.md index 7f77c2694..1ef519e56 100644 --- a/i18n/ja/docusaurus-plugin-content-docs/current/guides/integration-advertiser-dataprovider-overview.md +++ b/i18n/ja/docusaurus-plugin-content-docs/current/guides/integration-advertiser-dataprovider-overview.md @@ -53,7 +53,7 @@ import Link from '@docusaurus/Link'; | [2: Store Raw UID2s and Salt Bucket IDs](#2-store-raw-uid2s-and-salt-bucket-ids) | カスタム(適切な方法で)。 | | [3: Manipulate or Combine Raw UID2s](#3-manipulate-or-combine-raw-uid2s) | カスタム(適切な方法で)。 | | [4: Send Stored Raw UID2s to DSPs to Create Audiences or Conversions](#4-send-stored-raw-uid2s-to-dsps-to-create-audiences-or-conversions) | カスタム(適切な方法で)。 | -| [5: Monitor for Salt Bucket Rotations for Your Stored Raw UID2s](#5-monitor-for-salt-bucket-rotations-for-your-stored-raw-uid2s) | 以下のいずれかのオプションを使用してください: | +| [5: Monitor for Salt Bucket Rotations for Your Stored Raw UID2s](#5-monitor-for-salt-bucket-rotations-for-your-stored-raw-uid2s) | 以下のいずれかのオプションを使用してください: | | [6: Monitor for Opt-Out Status](#6-monitor-for-opt-out-status) | API コールを使用して、[POST /optout/status](../endpoints/post-optout-status.md) エンドポイントにアクセスします。 | ## Integration Diagram @@ -124,7 +124,7 @@ raw UID2 は、特定の時点におけるユーザーの識別子です。raw U - Python SDK: [Monitor Rotated Salt Buckets](../sdks/sdk-ref-python.md#monitor-rotated-salt-buckets). -- Snowflake: [Monitor for Salt Bucket Rotation and Regenerate Raw UID2s](integration-snowflake.md#monitor-for-salt-bucket-rotation-and-regenerate-raw-uid2s). +- Snowflake: [Monitor for Salt Bucket Rotation and Regenerate Raw UID2s](integration-snowflake-before-july-2025.md#monitor-for-salt-bucket-rotation-and-regenerate-raw-uid2s). - HTTP endpoints: [Monitor for Salt Bucket Rotations for Your Stored Raw UID2s](integration-advertiser-dataprovider-endpoints.md#5-monitor-for-salt-bucket-rotations-for-your-stored-raw-uid2s). diff --git a/static/examples/identity-map-integration-example/.env.example b/static/examples/identity-map-integration-example/.env.example new file mode 100644 index 000000000..6b6d36e0e --- /dev/null +++ b/static/examples/identity-map-integration-example/.env.example @@ -0,0 +1,3 @@ +UID2_BASE_URL=https://operator-integ.uidapi.com +UID2_API_KEY=your_api_key_here +UID2_SECRET_KEY=your_secret_key_here \ No newline at end of file diff --git a/static/examples/identity-map-integration-example/.gitignore b/static/examples/identity-map-integration-example/.gitignore new file mode 100644 index 000000000..24c6b4c24 --- /dev/null +++ b/static/examples/identity-map-integration-example/.gitignore @@ -0,0 +1,18 @@ +# Environment variables +.env + +# Python cache +__pycache__/ +*.pyc + +# Virtual environments (legacy and uv) +venv/ +.venv/ + +# uv lock file +uv.lock + +# Database files +*.db + +.idea \ No newline at end of file diff --git a/static/examples/identity-map-integration-example/.python-version b/static/examples/identity-map-integration-example/.python-version new file mode 100644 index 000000000..24ee5b1be --- /dev/null +++ b/static/examples/identity-map-integration-example/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/static/examples/identity-map-integration-example/README.md b/static/examples/identity-map-integration-example/README.md new file mode 100644 index 000000000..5fcd4b7c2 --- /dev/null +++ b/static/examples/identity-map-integration-example/README.md @@ -0,0 +1,123 @@ +# UID2 Integration Technical Sample + +**Complete UID2 integration example demonstrating Identity Map v3 flow.** + +This sample shows a pattern for mapping email addresses and phone numbers to UID2 tokens, handling optouts, managing token refresh cycles, and performing a sample attribution analysis based on both current and previous UID2s. + +## Project Structure + +``` +identity-map-integration-example/ +├── src/ # Python source code +│ ├── complete_demo.py # End-to-end demo workflow +│ ├── map_identities.py # Core UID2 mapping logic +│ ├── attribution_analysis.py # Attribution analysis example +│ ├── config.py # Configuration loading +│ ├── database.py # Database schema and utilities +│ ├── uid_client_wrapper.py # UID2 client with retry logic +│ └── populate_*.py # Test data generation scripts +├── .env # UID2 credentials (create from .env.example) +├── pyproject.toml # Project configuration +└── README.md # This file +``` + +## Quick Start + +### 1. Install Dependencies +```bash +# Install uv (Python package manager) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install project dependencies +uv sync +``` + +### 2. Configure UID2 Credentials +```bash +cp .env.example .env +# Edit .env with your UID2 integration credentials +``` + +Required `.env` format: +``` +UID2_BASE_URL=operator-integ.uidapi.com +UID2_API_KEY=your_api_key_here +UID2_SECRET_KEY=your_secret_key_here +``` + +### 3. Run Complete Demo +```bash +# Full workflow: test data population → UID2 mapping → attribution analysis +uv run src/complete_demo.py +``` + +### 4. Run Individual Components +```bash +# Generate test data only +uv run src/populate_test_uid_mappings.py + +# Run UID2 mapping only +uv run src/map_identities.py + +# Run attribution analysis only +uv run src/attribution_analysis.py +``` + +## Core UID2 Integration Patterns + +### Identity Mapping Workflow + +**Key Integration Points:** +1. **Batch Processing** (`src/map_identities.py:build_uid2_input()`) - Process sequential batches of up to 5,000 emails and/or phone numbers per request +2. **Retry Logic** (`src/uid_client_wrapper.py:generate_identity_map_with_retry()`) - Exponential backoff for network resilience +3. **Response Handling** (`src/map_identities.py:process_uid2_response()`) - Process mapped, opted-out, and invalid identifiers + +## Sample Database Schema + +**Core `uid_mapping` table:** +```sql +CREATE TABLE uid_mapping ( + uid_mapping_id INTEGER PRIMARY KEY, + dii TEXT NOT NULL, -- Email or phone (+E.164) + dii_type TEXT NOT NULL, -- 'email' or 'phone' + current_uid TEXT, -- Current UID2 token + previous_uid TEXT, -- Previous UID2 token (only available for 90 days after rotation, afterwards NULL) + refresh_from TIMESTAMP, -- When to refresh mapping + opt_out BOOLEAN DEFAULT FALSE -- The user has opted out, we shouldn't attempt to map this user again +); +``` + +**Key business logic queries:** +```sql +-- Records needing mapping (never mapped + refresh expired) +SELECT uid_mapping_id, dii, dii_type +FROM uid_mapping +WHERE opt_out = FALSE +AND (current_uid IS NULL OR refresh_from < datetime('now')); + +-- Attribution joins using both current and previous UID2s +SELECT * FROM impressions imp +JOIN uid_mapping um ON (imp.uid = um.current_uid OR imp.uid = um.previous_uid) +WHERE um.opt_out = FALSE; +``` + +## Script Reference + +| Script | Purpose | Key Integration Concepts | +|--------|---------|--------------------------------------------------| +| `src/populate_test_uid_mappings.py` | Creates 100k test records | Database schema, DII formatting | +| `src/map_identities.py` | **Core UID2 mapping logic** | Batch processing, retry logic, response handling | +| `src/populate_test_conversions_impressions.py` | Attribution demo data | UID2 token usage in measurement | +| `src/attribution_analysis.py` | Attribution analysis | Cross-UID2 joins, measurement patterns | +| `src/complete_demo.py` | End-to-end workflow | Full integration validation | + +## Production Integration Checklist + +**Patterns for UID2 Integration:** + +✅ **Request Limits**: Maximum 5,000 emails and/or phone numbers per request +✅ **Sequential Processing**: No parallel requests to UID2 service +✅ **Retry Logic**: Exponential backoff for network failures +✅ **Optout Handling**: Permanent exclude opted out users from future processing +✅ **Raw UID2 Refresh**: Re-map raw UID2s when they reach `refresh_from` timestamps +✅ **State Persistence**: Track mapping state diff --git a/static/examples/identity-map-integration-example/pyproject.toml b/static/examples/identity-map-integration-example/pyproject.toml new file mode 100644 index 000000000..bbd07d5ce --- /dev/null +++ b/static/examples/identity-map-integration-example/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "identity-map-tech-sample-2" +version = "0.1.0" +description = "UID2 Identity Map v3 technical sample demonstrating email/phone to UID2 mapping with proper optout handling" +requires-python = ">=3.13" +dependencies = [ + "python-dotenv>=1.0.0", + "uid2-client>=2.6.0", +] + +[dependency-groups] +dev = [ + "black>=23.0.0", +] diff --git a/static/examples/identity-map-integration-example/src/attribution_analysis.py b/static/examples/identity-map-integration-example/src/attribution_analysis.py new file mode 100644 index 000000000..1841da633 --- /dev/null +++ b/static/examples/identity-map-integration-example/src/attribution_analysis.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +Simple demo of joining impression and conversion data via current and previous UIDs +""" +import sqlite3 +import traceback +from database import get_connection + + +def attribution_analysis(conn: sqlite3.Connection) -> None: + """Run simple attribution analysis query""" + cursor = conn.cursor() + + attribution_query = """ + SELECT + imp.impression_id, + conv.conversion_id, + conv.conversion_value, + imp.campaign_id, + um.dii, + um.current_uid + FROM impressions imp + JOIN uid_mapping um ON (imp.uid = um.current_uid OR imp.uid = um.previous_uid) + JOIN conversions conv ON (conv.uid = um.current_uid OR conv.uid = um.previous_uid) + WHERE um.opt_out = FALSE + ORDER BY RANDOM() + LIMIT 10 + """ + + cursor.execute(attribution_query) + results = cursor.fetchall() + + print("Sample Attribution Results:") + print( + f"{'Impression':<12} {'Conversion':<12} {'Value':<10} {'Campaign':<12} {'DII':<40} {'UID':<15}" + ) + print("-" * 110) + + for row in results: + imp_id, conv_id, value, campaign, dii, uid = row + print( + f"{imp_id:<12} {conv_id:<12} ${value:<9.2f} {campaign:<12} {dii:<40} {uid:<15}" + ) + + +def main(): + try: + conn = get_connection() + attribution_analysis(conn) + except Exception as e: + print(f"Attribution analysis failed: {e}") + traceback.print_exc() + finally: + if "conn" in locals(): + conn.close() + + +if __name__ == "__main__": + main() diff --git a/static/examples/identity-map-integration-example/src/complete_demo.py b/static/examples/identity-map-integration-example/src/complete_demo.py new file mode 100644 index 000000000..4e2d66c8b --- /dev/null +++ b/static/examples/identity-map-integration-example/src/complete_demo.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +""" +Complete demo of UID2 Identity Mapping: + - Creates a test database. + - Populates the database with test identity mapping data. + - Runs the UID2 mapping process. + - Populates the database with test impression and conversion data. + - Runs a sample attribution analysis. +""" +import traceback + +import map_identities +import populate_test_uid_mappings +import populate_test_conversions_impressions +import attribution_analysis +from database import get_connection + + +def complete_demo(): + conn = get_connection("uid_demo.db") + try: + print("Step 1: Populating UID2 mapping test data...") + populate_test_uid_mappings.populate_database(conn) + + print("Step 2: Running UID2 mapping...") + map_identities.map_identities(conn) + + print("Step 3: Populating attribution test data...") + populate_test_conversions_impressions.populate_attribution_data(conn) + + print("Step 4: Running attribution analysis...") + attribution_analysis.attribution_analysis(conn) + + print("Demo completed successfully!") + + except Exception as e: + print(f"Failed with error: {e}") + traceback.print_exc() + + finally: + conn.close() + + +if __name__ == "__main__": + complete_demo() diff --git a/static/examples/identity-map-integration-example/src/config.py b/static/examples/identity-map-integration-example/src/config.py new file mode 100644 index 000000000..936a876a4 --- /dev/null +++ b/static/examples/identity-map-integration-example/src/config.py @@ -0,0 +1,39 @@ +import os +import sys +from dataclasses import dataclass + +from dotenv import load_dotenv + + +@dataclass +class Config: + uid_base_url: str + uid_api_key: str + uid_secret_key: str + + +def load_config() -> Config: + load_dotenv(override=True) # Override existing environment variables + + uid_base_url = os.getenv("UID2_BASE_URL") + uid_api_key = os.getenv("UID2_API_KEY") + uid_secret_key = os.getenv("UID2_SECRET_KEY") + + missing: list[str] = [] + if not uid_base_url: + missing.append("UID2_BASE_URL") + if not uid_api_key: + missing.append("UID2_API_KEY") + if not uid_secret_key: + missing.append("UID2_SECRET_KEY") + + if missing: + print(f"Error: Missing required environment variables: {missing}") + sys.exit(1) + + # At this point, we know all values are not None due to the validation above + assert uid_base_url is not None + assert uid_api_key is not None + assert uid_secret_key is not None + + return Config(uid_base_url, uid_api_key, uid_secret_key) diff --git a/static/examples/identity-map-integration-example/src/database.py b/static/examples/identity-map-integration-example/src/database.py new file mode 100644 index 000000000..0400ea627 --- /dev/null +++ b/static/examples/identity-map-integration-example/src/database.py @@ -0,0 +1,107 @@ +import sqlite3 +from dataclasses import dataclass + + +@dataclass +class IdentityToMap: + uid_mapping_id: int + dii: str + dii_type: str + + +def get_connection(db_path: str = "uid_mapping.db") -> sqlite3.Connection: + """Get SQLite database connection with Row factory enabled""" + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row # Enable dict-like access + return conn + + +def create_uid_mapping_table(conn: sqlite3.Connection) -> None: + """Create uid_mapping table with all necessary indexes""" + cursor = conn.cursor() + + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS uid_mapping ( + uid_mapping_id INTEGER PRIMARY KEY AUTOINCREMENT, + dii TEXT NOT NULL, + dii_type TEXT NOT NULL CHECK (dii_type IN ('email', 'phone')), + current_uid TEXT, + previous_uid TEXT, + refresh_from TIMESTAMP, + opt_out BOOLEAN DEFAULT FALSE + ) + """ + ) + + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS idx_mapping_query + ON uid_mapping(opt_out, current_uid, refresh_from) + """ + ) + + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS idx_uid_mapping_current_uid + ON uid_mapping(current_uid) + """ + ) + + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS idx_uid_mapping_previous_uid + ON uid_mapping(previous_uid) + """ + ) + + conn.commit() + + +def create_attribution_tables(conn: sqlite3.Connection) -> None: + """Create impressions and conversions tables with all necessary indexes""" + cursor = conn.cursor() + + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS impressions ( + impression_id INTEGER PRIMARY KEY AUTOINCREMENT, + uid TEXT NOT NULL, + timestamp TIMESTAMP NOT NULL, + campaign_id TEXT NOT NULL + ) + """ + ) + + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS conversions ( + conversion_id INTEGER PRIMARY KEY AUTOINCREMENT, + uid TEXT NOT NULL, + timestamp TIMESTAMP NOT NULL, + conversion_value REAL NOT NULL + ) + """ + ) + + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS idx_impressions_uid + ON impressions(uid) + """ + ) + + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS idx_conversions_uid + ON conversions(uid) + """ + ) + + conn.commit() + + +def create_complete_database_schema(conn: sqlite3.Connection) -> None: + """Create complete database schema with all tables and indexes""" + create_uid_mapping_table(conn) + create_attribution_tables(conn) diff --git a/static/examples/identity-map-integration-example/src/map_identities.py b/static/examples/identity-map-integration-example/src/map_identities.py new file mode 100644 index 000000000..95bb5447a --- /dev/null +++ b/static/examples/identity-map-integration-example/src/map_identities.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +import sqlite3 +import sys +import traceback +from sqlite3 import Connection +from typing import Any, Tuple + +from uid2_client import ( + IdentityMapV3Input, + UnmappedIdentityReason, +) + +from config import load_config +from database import get_connection, IdentityToMap +from uid_client_wrapper import UIDClientWrapper + + +def map_identities(conn: Connection) -> None: + """ + Performs identity mapping in sequential batches: + - Fetches records from the database that need mapping. + - Splits records into batches (up to 5000 per batch). + - For each batch: + • Builds UID2 input and calls the UID2 mapping API. + • Updates mapped identities in the database. + • Marks opted-out identities. + - Prints summary statistics after processing all batches. + """ + config = load_config() + uid_client = UIDClientWrapper( + config.uid_base_url, config.uid_api_key, config.uid_secret_key + ) + + print("Fetching records that need mapping...") + all_records: list[IdentityToMap] = get_records_to_map(conn) + batches: list[list[IdentityToMap]] = batch_records(all_records) # batches of up to 5k records + print(f"Processing {len(all_records)} records in {len(batches)} batches...") + + total_processed = 0 + total_mapped = 0 + total_opted_out = 0 + + for batch_num, batch in enumerate(batches, 1): + try: + print(f"Processing batch {batch_num} of {len(batches)} ({len(batch)} records)...") + processed, mapped, opted_out = map_batch(batch, batch_num, conn, uid_client) + total_processed += processed + total_mapped += mapped + total_opted_out += opted_out + + except Exception as e: + print(f"Batch {batch_num} processing failed: {e}") + traceback.print_exc() + sys.exit(1) + + print(f"Mapping complete: processed={total_processed}, mapped={total_mapped}, opted_out={total_opted_out}") + +def map_batch( + batch: list[IdentityToMap], + batch_num: int, + conn: Connection, + uid_client: UIDClientWrapper, +) -> Tuple[int, int, int]: + """ + Processes a single batch of identity records. + + - Maps identities in the batch using the UID2 API. + - Updates the database with mapped identities. + - Marks identities as opted out if indicated by the API. + + Returns: + Tuple of (number processed, number mapped, number opted out) + """ + dii_to_id = {record.dii: record.uid_mapping_id for record in batch} + + uid_input, invalid_dii = build_uid_input(batch) + response = uid_client.identity_map(uid_input) + + mapped_identities = response.mapped_identities + + optout_ids = [] + invalid_count = len(invalid_dii) + + for dii, unmapped_identity in response.unmapped_identities.items(): + if unmapped_identity.reason == UnmappedIdentityReason.OPTOUT: + optout_ids.append(dii_to_id[dii]) + + if unmapped_identity.reason in [ + UnmappedIdentityReason.INVALID_IDENTIFIER, + UnmappedIdentityReason.UNKNOWN, + ]: + invalid_count += 1 + + update_mapped_records(conn, mapped_identities, dii_to_id) + update_optout_records(conn, optout_ids) + + processed_count = len(batch) + mapped_count = len(mapped_identities) + opted_out_count = len(optout_ids) + + print(f"Batch {batch_num} complete: {mapped_count} mapped, {opted_out_count} opted out, {invalid_count} invalid") + + return processed_count, mapped_count, opted_out_count + + +def batch_records(all_records: list[IdentityToMap]) -> list[list[IdentityToMap]]: + """Split records into batches of up to 5000""" + batch_size = 5000 + batches = [ + all_records[i : i + batch_size] for i in range(0, len(all_records), batch_size) + ] + return batches + + +def get_records_to_map(conn: sqlite3.Connection) -> list[IdentityToMap]: + """Retrieves records that either have never been mapped (current_uid IS NULL) + or need refresh (refresh_from in the past). Excludes opted out records.""" + cursor = conn.cursor() + + base_query = """ + SELECT uid_mapping_id, dii, dii_type + FROM uid_mapping + WHERE opt_out = FALSE + AND (current_uid IS NULL OR refresh_from < datetime('now')) \ + """ + + cursor.execute(base_query) + + return [ + IdentityToMap( + uid_mapping_id=row["uid_mapping_id"], + dii=row["dii"], + dii_type=row["dii_type"], + ) + for row in cursor.fetchall() + ] + + +def build_uid_input(records: list[IdentityToMap]) -> tuple[IdentityMapV3Input, list[str]]: + """Build UID2 input object from database records. + If adding an id causes a ValueError, add it to invalid_dii and continue. Other exceptions are raised.""" + + uid_input = IdentityMapV3Input() + invalid_dii = [] + + for record in records: + try: + if record.dii_type == "email": + uid_input.with_email(record.dii) + elif record.dii_type == "phone": + uid_input.with_phone(record.dii) + else: + raise Exception(f"Unknown dii type: {record.dii_type}") + except ValueError: + # ValueError is raised by uid2_client when the DII is invalid + invalid_dii.append(record.dii) + + return uid_input, invalid_dii + +def update_mapped_records( + conn: sqlite3.Connection, + mapped_identities: dict[str, Any], + dii_to_id: dict[str, int], +) -> None: + """Update database records with successful mapping results""" + if not mapped_identities: + return + + cursor = conn.cursor() + sql_data = [ + ( + mapped_identity.current_raw_uid, + mapped_identity.previous_raw_uid, + mapped_identity.refresh_from, + dii_to_id[dii], + ) + for dii, mapped_identity in mapped_identities.items() + ] + + cursor.executemany( + """ + UPDATE uid_mapping + SET current_uid = ?, + previous_uid = ?, + refresh_from = ? + WHERE uid_mapping_id = ? + """, + sql_data, + ) + + conn.commit() + print(f"Updated {len(mapped_identities)} mapped records") + + +def update_optout_records(conn: sqlite3.Connection, optout_ids: list[int]) -> None: + """Sets opt_out=TRUE and clears all UID-related fields to ensure + these records are never processed again.""" + if not optout_ids: + return + + cursor = conn.cursor() + sql_data = [(uid_mapping_id,) for uid_mapping_id in optout_ids] + + cursor.executemany( + """ + UPDATE uid_mapping + SET opt_out = TRUE, + current_uid = NULL, + previous_uid = NULL, + refresh_from = NULL + WHERE uid_mapping_id = ? + """, + sql_data, + ) + + conn.commit() + print(f"Marked {len(optout_ids)} records as opted out") + + +if __name__ == "__main__": + conn = get_connection("uid_mapping.db") + try: + map_identities(conn) + finally: + conn.close() diff --git a/static/examples/identity-map-integration-example/src/populate_test_conversions_impressions.py b/static/examples/identity-map-integration-example/src/populate_test_conversions_impressions.py new file mode 100644 index 000000000..522d60841 --- /dev/null +++ b/static/examples/identity-map-integration-example/src/populate_test_conversions_impressions.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +UID2 Attribution Test Data Population + +Creates impression and conversion test data using actual mapped UID2s. +Should be run AFTER UID2 mapping is complete. +""" +import random +import sqlite3 +import traceback +from datetime import datetime, timedelta + +from database import get_connection, create_attribution_tables + + +def populate_attribution_data(conn: sqlite3.Connection) -> None: + """Generate simple attribution demo data using actual UID2s""" + cursor = conn.cursor() + + create_attribution_tables(conn) + + cursor.execute( + """ + SELECT DISTINCT current_uid FROM uid_mapping + WHERE current_uid IS NOT NULL AND opt_out = FALSE + LIMIT 10000 + """ + ) + uid_rows = cursor.fetchall() + + if not uid_rows: + print("No mapped UIDs found, skipping attribution data generation") + return + + sample_uids = [row[0] for row in uid_rows] + print(f"Using {len(sample_uids)} actual UIDs for attribution demo data") + + cursor.execute("DELETE FROM impressions") + cursor.execute("DELETE FROM conversions") + + def random_recent_timestamp(): + now = datetime.now() + days_ago = random.randint(0, 90) + random_time = now - timedelta(days=days_ago, seconds=random.randint(0, 86399)) + return random_time + + print("Generating impressions...") + impressions_data = [ + ( + random.choice(sample_uids), + random_recent_timestamp(), + f"camp_{random.randint(1, 5)}", + ) + for _ in range(1000) + ] + + print("Generating conversions...") + conversions_data = [ + ( + random.choice(sample_uids), + random_recent_timestamp(), + round(random.uniform(10.0, 500.0), 2), + ) + for _ in range(500) + ] + + cursor.executemany( + """ + INSERT INTO impressions (uid, timestamp, campaign_id) + VALUES (?, ?, ?) + """, + impressions_data, + ) + + cursor.executemany( + """ + INSERT INTO conversions (uid, timestamp, conversion_value) + VALUES (?, ?, ?) + """, + conversions_data, + ) + + conn.commit() + + print( + f"Generated {len(impressions_data):,} impressions and {len(conversions_data):,} conversions" + ) + + unique_impression_uids = len(set(uid for uid, _, _ in impressions_data)) + unique_conversion_uids = len(set(uid for uid, _, _ in conversions_data)) + + print( + f"Coverage: {unique_impression_uids} UIDs have impressions, {unique_conversion_uids} UIDs have conversions" + ) + + +def main(): + try: + conn = get_connection() + print("Populating attribution test data...") + populate_attribution_data(conn) + print("Attribution data population complete!") + + except Exception as e: + print(f"Attribution data population failed: {e}") + traceback.print_exc() + + finally: + if "conn" in locals(): + conn.close() + + +if __name__ == "__main__": + main() diff --git a/static/examples/identity-map-integration-example/src/populate_test_uid_mappings.py b/static/examples/identity-map-integration-example/src/populate_test_uid_mappings.py new file mode 100644 index 000000000..2834736ee --- /dev/null +++ b/static/examples/identity-map-integration-example/src/populate_test_uid_mappings.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +""" +Test Data Population Script + +Creates 100k test records in uid_mapping database with following distribution: +- 10% already opted out +- 20% up for refresh (refresh_from in the past, current_uid populated) +- 20% never mapped (current_uid=NULL, refresh_from=NULL) +- 49% already mapped with future refresh dates +- 1% invalid DII (random strings) +""" +import random +import sqlite3 +import string +import traceback +import uuid +from datetime import datetime, timedelta +from typing import Optional + +from database import get_connection, create_uid_mapping_table + + +def generate_test_records() -> ( + list[tuple[str, str, Optional[str], Optional[str], Optional[datetime], bool]] +): + """Generate test uid to dii mapping records""" + records = [] + total_records = 100000 + opted_out_count = int(total_records * 0.10) + refresh_needed_count = int(total_records * 0.20) + never_mapped_count = int(total_records * 0.20) + invalid_dii_count = int(total_records * 0.01) + already_mapped_count = ( + total_records - opted_out_count - refresh_needed_count - never_mapped_count - invalid_dii_count + ) + + print(f"Generating test data distribution:") + print(f" Opted out: {opted_out_count}") + print(f" Up for refresh: {refresh_needed_count}") + print(f" Never mapped: {never_mapped_count}") + print(f" Invalid DII: {invalid_dii_count}") + print(f" Already mapped: {already_mapped_count}") + + # 1. Opted out records (10%) + for _ in range(opted_out_count): + dii, dii_type = generate_dii() + + records.append((dii, dii_type, None, None, None, True)) + + # 2. Up for refresh records (20%) + for _ in range(refresh_needed_count): + dii, dii_type = generate_dii() + current_uid = generate_uid() + previous_uid = generate_uid() if random.random() < 0.5 else None + refresh_from = datetime.now() - timedelta(days=random.randint(1, 30)) + + records.append((dii, dii_type, current_uid, previous_uid, refresh_from, False)) + + # 3. Never mapped records (20%) + for _ in range(never_mapped_count): + dii, dii_type = generate_dii() + + records.append((dii, dii_type, None, None, None, False)) + + # 4. Invalid DII records (1%) + for _ in range(invalid_dii_count): + dii, dii_type = generate_invalid_dii() + + records.append((dii, dii_type, None, None, None, False)) + + # 5. Already mapped with future refresh (49%) + for _ in range(already_mapped_count): + dii, dii_type = generate_dii() + current_uid = generate_uid() + previous_uid = generate_uid() if random.random() < 0.5 else None + refresh_from = datetime.now() + timedelta(days=random.randint(1, 365)) + + records.append((dii, dii_type, current_uid, previous_uid, refresh_from, False)) + + random.shuffle(records) + return records + + +def populate_database(conn: sqlite3.Connection) -> None: + """Populate database with test records""" + try: + cursor = conn.cursor() + + print("Clearing existing database...") + cursor.execute("DROP TABLE IF EXISTS uid_mapping") + + print("Recreating database schema...") + create_uid_mapping_table(conn) + + print("Generating 100k test records...") + records = generate_test_records() + + print("Inserting records into database...") + cursor.executemany( + """ + INSERT INTO uid_mapping (dii, dii_type, current_uid, previous_uid, refresh_from, opt_out) + VALUES (?, ?, ?, ?, ?, ?) + """, + records, + ) + + conn.commit() + + cursor.execute("SELECT COUNT(*) FROM uid_mapping") + total_count = cursor.fetchone()[0] + + cursor.execute("SELECT COUNT(*) FROM uid_mapping WHERE opt_out = TRUE") + opted_out_count = cursor.fetchone()[0] + + cursor.execute( + "SELECT COUNT(*) FROM uid_mapping WHERE current_uid IS NULL AND refresh_from IS NULL AND opt_out = FALSE" + ) + never_mapped_count = cursor.fetchone()[0] + + cursor.execute( + "SELECT COUNT(*) FROM uid_mapping WHERE current_uid IS NOT NULL AND refresh_from < datetime('now')" + ) + refresh_needed_count = cursor.fetchone()[0] + + print(f"\nDatabase populated successfully!") + print(f"Total records: {total_count}") + print(f"Opted out: {opted_out_count}") + print(f"Never mapped: {never_mapped_count}") + print(f"Up for refresh: {refresh_needed_count}") + + except Exception as e: + print(f"Error populating database: {e}") + print("Full stack trace:") + traceback.print_exc() + raise + + +def generate_dii() -> tuple[str, str]: + """Generate a DII (email or phone) with its type""" + dii_type = random.choice(["email", "phone"]) + if dii_type == "email": + dii = generate_email() + else: + dii = generate_phone() + return dii, dii_type + + +def generate_invalid_dii() -> tuple[str, str]: + """Generate an invalid DII (random string)""" + dii_type = random.choice(["email", "phone"]) + dii = ''.join(random.choices(string.ascii_letters, k=10)) + return dii, dii_type + + +def generate_email() -> str: + """Generate a realistic email address""" + return f"{uuid.uuid4()}@test.com" + + +def generate_phone() -> str: + """Generate a realistic phone number""" + return f"+1{random.randint(2000000000, 9999999999)}" + + +def generate_uid() -> str: + """Generate a UID2 for testing""" + return f"uid_{random.randint(1000000, 9999999)}" + + +def main() -> None: + conn = get_connection("uid_mapping.db") + try: + populate_database(conn) + print("Test data population completed successfully!") + except Exception as e: + print(f"Failed to populate test data: {e}") + raise + finally: + conn.close() + + +if __name__ == "__main__": + main() diff --git a/static/examples/identity-map-integration-example/src/uid_client_wrapper.py b/static/examples/identity-map-integration-example/src/uid_client_wrapper.py new file mode 100644 index 000000000..e534fd9aa --- /dev/null +++ b/static/examples/identity-map-integration-example/src/uid_client_wrapper.py @@ -0,0 +1,52 @@ +import time +import urllib.error + +from uid2_client import IdentityMapV3Client, IdentityMapV3Input, IdentityMapV3Response + + +class UIDClientWrapper: + """Wrapper for UID2 client that handles retries""" + + def __init__(self, base_url: str, api_key: str, secret_key: str) -> None: + self.client = IdentityMapV3Client(base_url, api_key, secret_key) + + def identity_map(self, uid_input: IdentityMapV3Input) -> IdentityMapV3Response: + retry_delays = [1, 2, 4, 8, 16] # exponential backoff + max_retries = len(retry_delays) + + # HTTP status codes that should be retried (transient errors) + retryable_status_codes: set[int] = { + 429, # Too Many Requests (rate limiting) + 500, # Internal Server Error + 502, # Bad Gateway + 503, # Service Unavailable + 504, # Gateway Timeout + } + + for attempt in range(max_retries + 1): + try: + return self.client.generate_identity_map(uid_input) + + except urllib.error.HTTPError as http_error: + if attempt == max_retries: + print( + f"HTTP {http_error.code} error failed after {max_retries} retries: {http_error}" + ) + raise + + if http_error.code in retryable_status_codes: + print( + f"HTTP {http_error.code} error on attempt {attempt + 1}, retrying in {retry_delays[attempt]}s: {http_error}" + ) + time.sleep(retry_delays[attempt]) + continue + + print(f"Non-retryable HTTP {http_error.code} error: {http_error}") + raise + + except Exception as e: + print(f"Error: {e}") + raise + + # This line should never be reached, but included for type safety + raise RuntimeError("Unexpected exit from retry loop")