diff --git a/README.md b/README.md new file mode 100644 index 0000000..6994896 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# Tapiro Store API Documentation + +Welcome to the Tapiro Store API! This documentation provides everything you need to integrate your store's systems with Tapiro to leverage AI-driven personalized advertising and insights. + +By integrating with Tapiro, you can: + +- Submit user interaction data (purchases, searches) for analysis. +- Retrieve processed user preferences to personalize user experiences (e.g., targeted ads, product recommendations). + +## Getting Started + +1. **Obtain an API Key:** Generate API keys from your Store Dashboard within the Tapiro web application. Keep these keys secure. +2. **Authentication:** All API requests must be authenticated using your API key. See the [Authentication Guide](./authentication.md). +3. **Understand Endpoints:** Familiarize yourself with the primary endpoints for stores: + - [Data Submission](./data_submission.md): Send user purchase and search data. + - [Preference Retrieval](./preference_retrieval.md): Get user preferences. +4. **Review the Taxonomy:** Accurate data categorization is crucial for effective personalization. Understand how to use the [Tapiro Taxonomy](./taxonomy.md). +5. **Error Handling:** Be prepared to handle potential API errors. Refer to the [Error Handling Guide](./error_handling.md). + +## Core Concepts + +- **API Key:** Your unique secret key authenticates your store's requests. It identifies your store to Tapiro. +- **User Identification:** Users are primarily identified by their **email address** when submitting data or retrieving preferences via the API. This email _must_ correspond to a registered user within the Tapiro ecosystem. +- **Consent:** Tapiro respects user privacy choices. Data submission will only be processed if the user has given `dataSharingConsent` within Tapiro _and_ has not explicitly opted out of sharing data with _your specific store_. Preference retrieval will fail (403 Forbidden) if consent is not granted for your store. +- **Taxonomy:** A hierarchical system for categorizing products and interests. Using correct category IDs or names from the taxonomy when submitting data is essential for the AI models. + +## API Base URL + +The base URL for the production API is: `https://api.tapiro.com/v1` +_(Note: Use the appropriate URL provided for development or staging environments.)_ + +## Need Help? + +If you encounter issues or have questions, please contact Tapiro Support at `tapirosupport@gmail.com`. diff --git a/api-service/api/openapi.yaml b/api-service/api/openapi.yaml index 791fa6b..eb0eaf2 100644 --- a/api-service/api/openapi.yaml +++ b/api-service/api/openapi.yaml @@ -21,6 +21,8 @@ tags: description: Health and uptime monitoring - name: Admin description: Administrative operations for managing taxonomy and keyword mappings + - name: Taxonomy + description: Operations related to the product/interest taxonomy servers: - url: https://virtserver.swaggerhub.com/CHAMATHDEWMINA25/TAPIRO/1.0.0 description: SwaggerHub API Auto Mocking @@ -390,6 +392,29 @@ paths: - oauth2: [user:write] x-swagger-router-controller: PreferenceManagement + /users/preferences/store-consent: + get: + tags: [Preference Management] + summary: Get user's store opt-in/out lists + description: Retrieves the lists of store IDs the user has explicitly opted into or opted out of sharing data with. + operationId: getStoreConsentLists + responses: + "200": + description: Successfully retrieved store consent lists. + content: + application/json: + schema: + $ref: "#/components/schemas/StoreConsentList" + "401": + $ref: "#/components/responses/UnauthorizedError" + "404": + $ref: "#/components/responses/NotFoundError" + "500": + $ref: "#/components/responses/InternalServerError" + security: + - oauth2: [user:read] # Requires user read scope + x-swagger-router-controller: PreferenceManagement + /stores/api-keys: post: tags: [Store Management] @@ -561,6 +586,137 @@ paths: $ref: "#/components/responses/InternalServerError" x-swagger-router-controller: Authentication + /taxonomy/categories: + get: + tags: [Taxonomy] + summary: Get Taxonomy Categories + description: Retrieves the full taxonomy structure from the database. + operationId: getTaxonomyCategories + responses: + "200": + description: Successfully retrieved the taxonomy. + content: + application/json: + schema: + $ref: "#/components/schemas/Taxonomy" + "404": + description: Taxonomy data not found in the database. + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "500": + $ref: "#/components/responses/InternalServerError" + x-swagger-router-controller: Taxonomy + + /users/data/recent: + get: + tags: [User Management] + summary: Get Recent User Data Submissions + description: Retrieves a list of recent data submissions made about the authenticated user. + operationId: getRecentUserData + parameters: + - name: limit + in: query + description: Maximum number of records to return + required: false + schema: + type: integer + default: 10 + - name: page + in: query + description: Page number for pagination + required: false + schema: + type: integer + default: 1 + responses: + "200": + description: Recent data submissions retrieved successfully + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/RecentUserDataEntry" + "401": + $ref: "#/components/responses/UnauthorizedError" + "500": + $ref: "#/components/responses/InternalServerError" + security: + - oauth2: [user:read] + x-swagger-router-controller: UserProfile + + /users/analytics/spending: + get: + tags: [User Management] + summary: Get User Spending Analytics + description: Retrieves aggregated spending data by category and month for the authenticated user. + operationId: getSpendingAnalytics + parameters: + - name: startDate + in: query + required: false + schema: + type: string + format: date + description: Filter results from this date onwards (YYYY-MM-DD). + - name: endDate + in: query + required: false + schema: + type: string + format: date + description: Filter results up to this date (YYYY-MM-DD). + responses: + "200": + description: Spending analytics retrieved successfully. + content: + application/json: + schema: + $ref: "#/components/schemas/MonthlySpendingAnalytics" + "401": + $ref: "#/components/responses/UnauthorizedError" + "404": + $ref: "#/components/responses/NotFoundError" + "500": + $ref: "#/components/responses/InternalServerError" + security: + - oauth2: [user:read] + x-swagger-router-controller: UserProfile + + /stores/lookup: + get: + tags: [Store Management] + summary: Lookup Store Details + description: Retrieves basic details (like name) for a list of store IDs. + operationId: lookupStores + parameters: + - name: ids + in: query + description: Comma-separated list of store IDs to lookup. + required: true + schema: + type: string + responses: + "200": + description: Store details retrieved successfully. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/StoreBasicInfo" + "400": + $ref: "#/components/responses/BadRequestError" + "401": + $ref: "#/components/responses/UnauthorizedError" + "500": + $ref: "#/components/responses/InternalServerError" + security: + - oauth2: [user:read] + x-swagger-router-controller: StoreProfile + components: schemas: AttributeDistribution: @@ -597,6 +753,27 @@ components: phone: type: string pattern: ^\+?[\d\s-]+$ + gender: + type: string + nullable: true + description: User gender identity (e.g., 'male', 'female', 'non-binary', 'prefer_not_to_say') + example: "female" + incomeBracket: + type: string + nullable: true + description: User income bracket category (e.g., '<25k', '25k-50k', '50k-100k', '100k-200k', '>200k', 'prefer_not_to_say') + example: "50k-100k" + country: + type: string + nullable: true + description: User country of residence (ISO 3166-1 alpha-2 code) + example: "US" + age: + type: integer + format: int32 + nullable: true + description: User age + example: 35 privacySettings: type: object properties: @@ -616,14 +793,6 @@ components: items: type: string description: List of store IDs user has opted out from - dataAccess: - type: object - properties: - allowedDomains: - type: array - items: - type: string - description: List of allowed domains for data access createdAt: type: string format: date-time @@ -693,6 +862,23 @@ components: dataSharingConsent: type: boolean description: User's consent for data sharing + gender: + type: string + nullable: true + description: User gender identity + incomeBracket: + type: string + nullable: true + description: User income bracket category + country: + type: string + nullable: true + description: User country of residence (ISO 3166-1 alpha-2 code) + age: + type: integer + format: int32 + nullable: true + description: User age StoreCreate: type: object @@ -749,14 +935,23 @@ components: items: type: string description: List of store IDs the user has opted out of sharing data with - dataAccess: - type: object - properties: - allowedDomains: - type: array - items: - type: string - description: List of domains allowed to access user data via API keys + gender: + type: string + nullable: true + description: User gender identity + incomeBracket: + type: string + nullable: true + description: User income bracket category + country: + type: string + nullable: true + description: User country of residence (ISO 3166-1 alpha-2 code) + age: + type: integer + format: int32 + nullable: true + description: User age ApiKey: type: object @@ -783,39 +978,54 @@ components: type: object required: - email - - dataType + - dataType # Make dataType required - entries properties: email: type: string - description: User's email address + format: email + description: User's email address (used as identifier for API key auth). Must match a registered Tapiro user. dataType: type: string enum: [purchase, search] - description: Type of data being submitted + description: Specifies the type of data contained in the 'entries' array. entries: type: array - description: Array of data entries + description: > + List of data entries. Each entry must conform to either the PurchaseEntry + or SearchEntry schema, matching the top-level 'dataType'. items: - oneOf: + oneOf: # Use oneOf to specify possible entry types - $ref: "#/components/schemas/PurchaseEntry" - $ref: "#/components/schemas/SearchEntry" + description: "An entry representing either a purchase event or a search event." + minItems: 1 # Require at least one entry metadata: type: object - description: Additional information about the collection event + description: Additional metadata about the collection event (e.g., source, device). properties: - userId: - type: string - description: Optional user ID if known source: type: string - description: Source of the data (web, mobile, pos, etc) + description: Source of the data (e.g., 'web', 'mobile_app', 'pos'). deviceType: type: string - description: Type of device used + description: Type of device used (e.g., 'desktop', 'mobile', 'tablet'). sessionId: type: string - description: Unique identifier for the user session + description: Identifier for the user's session. + example: + source: "web" + deviceType: "desktop" + sessionId: "abc-123-xyz-789" + example: # Example for a purchase submission + email: "user@example.com" + dataType: "purchase" + entries: + - $ref: "#/components/schemas/PurchaseEntry/example" + metadata: + source: "web" + deviceType: "desktop" + sessionId: "abc-123-xyz-789" PurchaseEntry: type: object @@ -826,62 +1036,81 @@ components: timestamp: type: string format: date-time + description: ISO 8601 timestamp of when the purchase occurred. items: type: array + description: List of items included in the purchase. items: $ref: "#/components/schemas/PurchaseItem" - totalAmount: + totalValue: type: number format: float + description: Optional total value of the purchase event. + example: + timestamp: "2024-05-15T14:30:00Z" + items: + - $ref: "#/components/schemas/PurchaseItem/example" # Reference the example above + - sku: "ABC-789" + name: "Running Shorts" + category: "201" # Clothing + price: 39.95 + quantity: 1 + attributes: + color: "black" + size: "M" + material: "polyester" + totalValue: 91.93 PurchaseItem: type: object required: - name - - category + - category # Making category required for better processing properties: sku: type: string + description: Stock Keeping Unit or unique product identifier. name: type: string + description: Name of the purchased item. category: type: string - description: Category ID (e.g., "101") or name (e.g., "smartphones") - quantity: - type: integer - default: 1 + description: > + Category ID or name matching the Tapiro taxonomy (e.g., "101" or "Smartphones"). + Providing the most specific category ID is recommended. price: type: number format: float + description: Price of a single unit of the item. + quantity: + type: integer + description: Number of units purchased. + default: 1 attributes: $ref: "#/components/schemas/ItemAttributes" + example: + sku: "XYZ-123" + name: "Men's Cotton T-Shirt" + category: "201" # Example: Clothing ID + price: 25.99 + quantity: 2 + attributes: + color: "navy" + size: "M" + material: "cotton" ItemAttributes: type: object - description: Category-specific attributes - properties: - price_range: - type: string - enum: [budget, mid_range, premium, luxury] - brand: - type: string - color: - type: string - material: - type: string - style: - type: string - room: - type: string - size: - type: string - feature: - type: string - season: - type: string - gender: - type: string - additionalProperties: false + description: > + Key-value pairs representing product attributes based on the taxonomy. + Keys should be attribute names (e.g., "color", "size", "brand") and + values should be the specific attribute value (e.g., "blue", "large", "Acme"). + additionalProperties: + type: string + example: + color: "blue" + size: "L" + material: "cotton" SearchEntry: type: object @@ -892,16 +1121,29 @@ components: timestamp: type: string format: date-time + description: ISO 8601 timestamp of when the search occurred. query: type: string + description: The search query string entered by the user. category: type: string + description: > + Optional category context provided during the search (e.g., user was browsing 'Electronics'). + Should match a Tapiro taxonomy ID or name. results: type: integer + description: Optional number of results returned for the search query. clicked: type: array + description: Optional list of product IDs or SKUs clicked from the search results. items: type: string + example: + timestamp: "2024-05-15T10:15:00Z" + query: "noise cancelling headphones" + category: "105" # Example: Audio ID + results: 25 + clicked: ["Bose-QC45", "Sony-WH1000XM5"] UserPreferences: type: object @@ -1042,6 +1284,20 @@ components: count: type: integer + StoreConsentList: + type: object + properties: + optInStores: + type: array + items: + type: string + description: List of store IDs the user has opted into. + optOutStores: + type: array + items: + type: string + description: List of store IDs the user has opted out of. + HealthStatus: type: object properties: @@ -1114,6 +1370,142 @@ components: description: Whether registration process is complete description: User metadata from Auth0 + TaxonomyAttribute: + type: object + description: Attribute within a taxonomy category + required: + - name + - values + properties: + name: + type: string + values: + type: array + items: + type: string + description: + type: string + nullable: true + + TaxonomyCategory: + type: object + description: Category within a taxonomy system + required: + - id + - name + properties: + id: + type: string + name: + type: string + parent_id: + type: string + nullable: true + description: + type: string + nullable: true + attributes: + type: array + items: + $ref: "#/components/schemas/TaxonomyAttribute" + default: [] + + Taxonomy: + type: object + description: Complete taxonomy definition with categories and version + required: + - categories + - version + properties: + _id: + type: string + readOnly: true + categories: + type: array + items: + $ref: "#/components/schemas/TaxonomyCategory" + version: + type: string + + RecentUserDataEntry: + type: object + properties: + _id: + type: string + description: The unique ID of the userData entry. + storeId: + type: string + description: The ID of the store that submitted the data. + dataType: + type: string + enum: [purchase, search] + description: The type of data submitted. + timestamp: + type: string + format: date-time + description: When the data was submitted to Tapiro. + entryTimestamp: + type: string + format: date-time + description: The timestamp of the original event (e.g., purchase time). + details: + type: object + description: Simplified details (e.g., item count for purchase, query string for search) + + SpendingAnalytics: + type: object + description: > + Aggregated spending data per category over time. + The structure might vary based on implementation (e.g., object keyed by month/year, + or an array of objects each representing a time point). + additionalProperties: + type: object # Example: { "YYYY-MM": { "Category1": 100, "Category2": 50 } } + additionalProperties: + type: number + format: float + example: + "2025-01": { "Electronics": 1299.99, "Clothing": 150.5 } + "2025-02": { "Clothing": 100, "Home": 85 } + + StoreBasicInfo: + type: object + properties: + storeId: + type: string + description: The unique ID of the store. + name: + type: string + description: The name of the store. + required: + - storeId + - name + + MonthlySpendingItem: + type: object + properties: + month: + type: string + format: date + description: The month of the spending data (e.g., "2024-01"). + spending: + type: object + description: An object mapping category names to the total amount spent in that category for the month. + additionalProperties: + type: number + format: float + required: + - month + - spending + example: + month: "2024-01" + spending: { "Electronics": 1299.99, "Clothing": 150.50 } + + MonthlySpendingAnalytics: + type: array + description: An array of monthly spending breakdowns. + items: + $ref: "#/components/schemas/MonthlySpendingItem" + responses: BadRequestError: description: Bad request - invalid input diff --git a/api-service/clients/AIService.js b/api-service/clients/AIService.js index fc78402..39c5eb6 100644 --- a/api-service/clients/AIService.js +++ b/api-service/clients/AIService.js @@ -9,48 +9,6 @@ const axiosInstance = axios.create(); const AI_SERVICE_URL = process.env.AI_SERVICE_URL; const AI_SERVICE_API_KEY = process.env.AI_SERVICE_API_KEY; -/** - * Update user preferences directly through the FastAPI service - * @param {string} auth0Id - User Auth0 ID - * @param {string} email - User email - * @param {Array} preferences - User preferences to update - * @returns {Promise} Updated preferences - */ -exports.updateUserPreferences = async function (auth0Id, email, preferences) { - try { - const response = await axiosInstance.post( - `${AI_SERVICE_URL}/preferences/update`, - { auth0Id, email, preferences }, - { - headers: { - 'X-API-Key': AI_SERVICE_API_KEY, - 'Content-Type': 'application/json', - }, - timeout: 10000, // Using longer timeout for preference operations - } - ); - - return response.data; - } catch (error) { - console.error('Failed to update preferences through AI service:', error?.response?.data || error); - - // More specific error handling - if (error.response) { - if (error.response.status === 401) { - throw new Error('AI service authentication failed'); - } else if (error.response.status === 404) { - throw new Error('User not found in AI service'); - } else if (error.response.status >= 500) { - throw new Error('AI service internal error'); - } - } else if (error.request) { - throw new Error('AI service connection failed'); - } - - throw error; - } -}; - /** * Process user data by sending it to AI service * @param {Object} userData - The user data to process diff --git a/api-service/clients/taxonomyService.js b/api-service/clients/taxonomyService.js deleted file mode 100644 index 8618f15..0000000 --- a/api-service/clients/taxonomyService.js +++ /dev/null @@ -1,36 +0,0 @@ -const axios = require('axios'); -const { getCache, setCache } = require('../utils/redisUtil'); -const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); - -// Axios Instance -const axiosInstance = axios.create(); - -// Get environment variables - use config fallback for backward compatibility -const AI_SERVICE_URL = process.env.AI_SERVICE_URL; -const AI_SERVICE_API_KEY = process.env.AI_SERVICE_API_KEY; - -// Add these constants at the top of taxonomyService.js -const REQUEST_TIMEOUTS = { - DEFAULT: 5000, // Standard endpoints - SHORT: 2000, // Health checks - LONG: 10000, // Complex operations -}; - -/** - * Check taxonomy service health - * @returns {Promise} Health status - */ -exports.checkHealth = async function () { - try { - const response = await axiosInstance.get(`${AI_SERVICE_URL}/taxonomy/health`, { - headers: { - 'X-API-Key': AI_SERVICE_API_KEY, - }, - timeout: REQUEST_TIMEOUTS.SHORT, - }); - return { status: 'connected', details: response.data }; - } catch (error) { - console.error('Health check failed:', error?.response?.data || error); - return { status: 'disconnected', details: error.message }; - } -}; diff --git a/api-service/controllers/PreferenceManagement.js b/api-service/controllers/PreferenceManagement.js index 6841ec6..c9e21f9 100644 --- a/api-service/controllers/PreferenceManagement.js +++ b/api-service/controllers/PreferenceManagement.js @@ -39,4 +39,15 @@ module.exports.optOutFromStore = function optOutFromStore(req, res, next, storeI .catch((response) => { utils.writeJson(res, response); }); +}; + +// Add the new controller function +module.exports.getStoreConsentLists = function getStoreConsentLists(req, res, next) { + PreferenceManagement.getStoreConsentLists(req) + .then((response) => { + utils.writeJson(res, response); + }) + .catch((response) => { + utils.writeJson(res, response); + }); }; \ No newline at end of file diff --git a/api-service/controllers/StoreProfile.js b/api-service/controllers/StoreProfile.js index b75acfc..ef368da 100644 --- a/api-service/controllers/StoreProfile.js +++ b/api-service/controllers/StoreProfile.js @@ -29,4 +29,14 @@ module.exports.deleteStoreProfile = function deleteStoreProfile(req, res, next) .catch((response) => { utils.writeJson(res, response); }); +}; + +module.exports.lookupStores = function lookupStores(req, res, next, ids) { + StoreProfile.lookupStores(req, ids) + .then((response) => { + utils.writeJson(res, response); + }) + .catch((response) => { + utils.writeJson(res, response); + }); }; \ No newline at end of file diff --git a/api-service/controllers/Taxonomy.js b/api-service/controllers/Taxonomy.js new file mode 100644 index 0000000..75a60e3 --- /dev/null +++ b/api-service/controllers/Taxonomy.js @@ -0,0 +1,12 @@ +const utils = require('../utils/writer.js'); +const TaxonomyService = require('../service/TaxonomyService'); + +module.exports.getTaxonomyCategories = function getTaxonomyCategories (req, res, next) { + TaxonomyService.getTaxonomyCategories() + .then(function (response) { + utils.writeJson(res, response); + }) + .catch(function (response) { + utils.writeJson(res, response); + }); +}; \ No newline at end of file diff --git a/api-service/controllers/UserProfile.js b/api-service/controllers/UserProfile.js index b6868c5..0364e2e 100644 --- a/api-service/controllers/UserProfile.js +++ b/api-service/controllers/UserProfile.js @@ -29,4 +29,25 @@ module.exports.deleteUserProfile = function deleteUserProfile(req, res, next) { .catch((response) => { utils.writeJson(res, response); }); +}; + +module.exports.getRecentUserData = function getRecentUserData(req, res, next, limit, page) { + // Pass query parameters to the service function + UserProfile.getRecentUserData(req, limit, page) + .then((response) => { + utils.writeJson(res, response); + }) + .catch((response) => { + utils.writeJson(res, response); + }); +}; + +module.exports.getSpendingAnalytics = function getSpendingAnalytics(req, res, next) { + UserProfile.getSpendingAnalytics(req) + .then((response) => { + utils.writeJson(res, response); + }) + .catch((response) => { + utils.writeJson(res, response); + }); }; \ No newline at end of file diff --git a/api-service/service/AuthenticationService.js b/api-service/service/AuthenticationService.js index 94e5e36..e3daed0 100644 --- a/api-service/service/AuthenticationService.js +++ b/api-service/service/AuthenticationService.js @@ -13,7 +13,15 @@ const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); exports.registerUser = async function (req, body) { try { const db = getDB(); - const { preferences, dataSharingConsent } = body; + // Destructure new demographic fields + const { + preferences, + dataSharingConsent, + gender, + incomeBracket, + country, + age, + } = body; // Get user data - use req.user if available (from middleware) or fetch it const userData = req.user || (await getUserData(req.headers.authorization?.split(' ')[1])); @@ -89,9 +97,13 @@ exports.registerUser = async function (req, body) { // Create user in database const user = { auth0Id: userData.sub, - username: userData.username, + username: userData.username || userData.nickname || userData.sub, email: userData.email, phone: userData.phone_number || null, + gender: gender || null, // Add new fields, defaulting to null if not provided + incomeBracket: incomeBracket || null, + country: country || null, + age: age || null, preferences: preferences || [], privacySettings: { dataSharingConsent, @@ -99,9 +111,6 @@ exports.registerUser = async function (req, body) { optInStores: [], optOutStores: [], }, - dataAccess: { - allowedDomains: [], - }, createdAt: new Date(), updatedAt: new Date(), }; @@ -117,6 +126,7 @@ exports.registerUser = async function (req, body) { }); // Also cache user preferences + // Note: Demographic data is NOT typically included in the preferences cache const cachePreferences = { userId: user._id.toString(), preferences: user.preferences || [], // Fixed: consistent naming diff --git a/api-service/service/PreferenceManagementService.js b/api-service/service/PreferenceManagementService.js index 34b165e..ed4998d 100644 --- a/api-service/service/PreferenceManagementService.js +++ b/api-service/service/PreferenceManagementService.js @@ -4,7 +4,9 @@ const { setCache, getCache, invalidateCache } = require('../utils/redisUtil'); const { getUserData } = require('../utils/authUtil'); const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); const { ObjectId } = require('mongodb'); -const AIService = require('../clients/AIService'); +// Removed AIService require as it's no longer used here +// const AIService = require('../clients/AIService'); +const TaxonomyService = require('../service/TaxonomyService'); // Import TaxonomyService exports.getUserOwnPreferences = async function (req) { try { @@ -117,55 +119,87 @@ exports.optOutFromStore = async function (req, storeId) { */ exports.updateUserPreferences = async function (req, body) { try { - // Get user data - use req.user if available (from middleware) or fetch it const userData = req.user || (await getUserData(req.headers.authorization?.split(' ')[1])); - const db = getDB(); - // Find user in database using Auth0 ID const user = await db.collection('users').findOne({ auth0Id: userData.sub }); if (!user) { - return respondWithCode(404, { - code: 404, - message: 'User not found', - }); + return respondWithCode(404, { code: 404, message: 'User not found' }); } - // If preferences are provided, send to FastAPI for processing + let validatedPreferences = []; if (body.preferences) { + // --- Validation --- + if (!Array.isArray(body.preferences)) { + return respondWithCode(400, { code: 400, message: 'Preferences must be an array.' }); + } + + // Fetch taxonomy for validation + let taxonomyDoc; try { - // Call the AI service to process preferences - await AIService.updateUserPreferences( - userData.sub, - user.email, // Use email from the found user document - body.preferences - ); - } catch (error) { - console.error('Failed to process preferences through AI service:', error); - // Continue with the update, we'll use the raw preferences without validation for now + // Use the service function to get taxonomy (handles caching) + const taxonomyResponse = await TaxonomyService.getTaxonomyCategories(); + if (taxonomyResponse.code !== 200) { + throw new Error('Failed to fetch taxonomy for validation'); + } + taxonomyDoc = taxonomyResponse.payload; // Assuming payload contains the taxonomy doc + } catch (taxError) { + console.error("Taxonomy fetch error during preference update:", taxError); + return respondWithCode(500, { code: 500, message: 'Could not load taxonomy for validation.' }); + } + + const validCategoryIds = new Set(taxonomyDoc?.data?.categories?.map(cat => cat.id) || []); + + for (const pref of body.preferences) { + // Basic structure validation + if (typeof pref.category !== 'string' || typeof pref.score !== 'number' || pref.score < 0 || pref.score > 1) { + return respondWithCode(400, { code: 400, message: `Invalid preference item format or score range: ${JSON.stringify(pref)}` }); + } + // Attributes validation (if present, must be a non-null object) + if (pref.attributes !== undefined && (typeof pref.attributes !== 'object' || pref.attributes === null || Array.isArray(pref.attributes))) { + return respondWithCode(400, { code: 400, message: `Invalid 'attributes' format for category ${pref.category}. Must be an object.` }); + } + // Taxonomy validation + if (!validCategoryIds.has(pref.category)) { + return respondWithCode(400, { code: 400, message: `Invalid category ID in preferences: ${pref.category}` }); + } + validatedPreferences.push(pref); // Add valid preference } + // --- End Validation --- + + } else { + // If body.preferences is explicitly null or undefined, maybe clear preferences? + // Or return an error if preferences are required for update. + // Current behavior: If body.preferences is missing/null, validatedPreferences remains [] + // which will effectively clear preferences in the $set below. + // If you require preferences, add: + // return respondWithCode(400, { code: 400, message: 'Preferences array is required for update.' }); } - // Update preferences in the database + // Log the data being sent to the database for debugging + console.log('Attempting to update preferences with:', JSON.stringify(validatedPreferences, null, 2)); + + // Update preferences in the database using the validated list const updateResult = await db.collection('users').updateOne( { _id: user._id }, { $set: { - preferences: body.preferences || [], + preferences: validatedPreferences, // Use the validated array updatedAt: new Date(), }, }, ); - // Fetch the updated user data to get the latest timestamp + // Fetch the updated user data to get the latest timestamp and preferences + // No need to fetch again if we trust the update, but it confirms the write const updatedUser = await db.collection('users').findOne( { _id: user._id }, { projection: { preferences: 1, updatedAt: 1 } } ); - // Clear related caches - await invalidateCache(`${CACHE_KEYS.PREFERENCES}${userData.sub}`); + const userCacheKey = `${CACHE_KEYS.PREFERENCES}${userData.sub}`; + await invalidateCache(userCacheKey); // Clear store-specific preference caches as preferences changed if (user.privacySettings?.optInStores) { @@ -174,20 +208,25 @@ exports.updateUserPreferences = async function (req, body) { } } - // Return updated preferences object (without privacySettings) + // Return updated preferences object const preferencesResponse = { userId: user._id.toString(), - preferences: updatedUser.preferences || [], + preferences: updatedUser.preferences || [], // Use actual updated preferences updatedAt: updatedUser.updatedAt, // Use the actual updated timestamp }; - // Update the cache with the minimal response - const cacheKey = `${CACHE_KEYS.PREFERENCES}${userData.sub}`; - await setCache(cacheKey, JSON.stringify(preferencesResponse), { EX: CACHE_TTL.USER_DATA }); - + // Update the cache with the new minimal response + await setCache(userCacheKey, JSON.stringify(preferencesResponse), { EX: CACHE_TTL.USER_DATA }); return respondWithCode(200, preferencesResponse); + } catch (error) { + // Catch MongoDB validation errors specifically if needed + if (error.code === 121) { // MongoDB validation error code + // Log the full details for better debugging + console.error('Update user preferences failed MongoDB validation:', JSON.stringify(error.errInfo?.details, null, 2) || error.message); + return respondWithCode(400, { code: 400, message: 'Preferences failed database validation.', details: error.errInfo?.details }); // Keep details for client if needed + } console.error('Update user preferences failed:', error); return respondWithCode(500, { code: 500, message: 'Internal server error' }); } @@ -252,3 +291,42 @@ exports.optInToStore = async function (req, storeId) { return respondWithCode(500, { code: 500, message: 'Internal server error' }); } }; + +/** + * Get user's store opt-in/out lists + */ +exports.getStoreConsentLists = async function (req) { + try { + // Get user data - use req.user if available (from middleware) or fetch it + const userData = req.user || (await getUserData(req.headers.authorization?.split(' ')[1])); + + const db = getDB(); + + // Find user in database using Auth0 ID, projecting only necessary fields + const user = await db.collection('users').findOne( + { auth0Id: userData.sub }, + { projection: { 'privacySettings.optInStores': 1, 'privacySettings.optOutStores': 1, _id: 0 } } // Only get opt-in/out lists + ); + + if (!user) { + return respondWithCode(404, { + code: 404, + message: 'User not found', + }); + } + + // Prepare the response object, defaulting to empty arrays if fields don't exist + const consentLists = { + optInStores: user.privacySettings?.optInStores || [], + optOutStores: user.privacySettings?.optOutStores || [], + }; + + // Note: Caching could be added here if needed, potentially using a specific key + // or relying on the USER_DATA cache invalidation from opt-in/out actions. + + return respondWithCode(200, consentLists); + } catch (error) { + console.error('Get store consent lists failed:', error); + return respondWithCode(500, { code: 500, message: 'Internal server error' }); + } +}; diff --git a/api-service/service/StoreProfileService.js b/api-service/service/StoreProfileService.js index f9a4e8f..ce5677b 100644 --- a/api-service/service/StoreProfileService.js +++ b/api-service/service/StoreProfileService.js @@ -4,6 +4,7 @@ const { respondWithCode } = require('../utils/writer'); const { getUserData } = require('../utils/authUtil'); const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); const { deleteAuth0User } = require('../utils/auth0Util'); +const { ObjectId } = require('mongodb'); // Import ObjectId /** * Get Store Profile @@ -117,3 +118,51 @@ exports.deleteStoreProfile = async function (req) { return respondWithCode(500, { code: 500, message: 'Internal server error' }); } }; + +/** + * Lookup Store Details + * Retrieves basic details (like name) for a list of store IDs. + */ +exports.lookupStores = async function (req, ids) { + try { + if (!ids) { + return respondWithCode(400, { code: 400, message: 'Missing required query parameter: ids' }); + } + + const storeIds = ids.split(','); + + // Optional: Validate if IDs are in ObjectId format if needed + // const validObjectIds = storeIds.filter(id => ObjectId.isValid(id)).map(id => new ObjectId(id)); + // if (validObjectIds.length !== storeIds.length) { + // return respondWithCode(400, { code: 400, message: 'One or more invalid store ID formats provided.' }); + // } + + const db = getDB(); + + const stores = await db.collection('stores') + .find({ _id: { $in: storeIds.map(id => new ObjectId(id)) } }) // Use ObjectId for lookup if IDs are ObjectIds + // If store IDs are stored as strings in optIn/optOut lists, use: + // .find({ _id: { $in: storeIds } }) + .project({ _id: 1, name: 1 }) // Project only ID and name + .toArray(); + + // Format the response to match StoreBasicInfo schema + const formattedStores = stores.map(store => ({ + storeId: store._id.toString(), // Convert ObjectId back to string + name: store.name + })); + + // Caching could be considered if lookups for the same set of IDs are common, + // but the cache key generation might be complex. + + return respondWithCode(200, formattedStores); + + } catch (error) { + console.error('Lookup stores failed:', error); + // Handle potential ObjectId format errors if validation is strict + if (error.message.includes('Argument passed in must be a single String')) { + return respondWithCode(400, { code: 400, message: 'Invalid store ID format provided.' }); + } + return respondWithCode(500, { code: 500, message: 'Internal server error' }); + } +}; diff --git a/api-service/service/TaxonomyService.js b/api-service/service/TaxonomyService.js new file mode 100644 index 0000000..6991488 --- /dev/null +++ b/api-service/service/TaxonomyService.js @@ -0,0 +1,48 @@ +const { getDB } = require('../utils/mongoUtil'); +const { respondWithCode } = require('../utils/writer'); +const { setCache, getCache } = require('../utils/redisUtil'); +const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); + +/** + * Get Taxonomy Categories + * Retrieves the full taxonomy structure from the MongoDB 'taxonomy' collection. + */ +exports.getTaxonomyCategories = async function () { + // Use the correct cache key defined in cacheConfig.js + const cacheKey = CACHE_KEYS.TAXONOMY; // Changed from TAXONOMY_FULL + try { + // Check cache first + const cachedTaxonomy = await getCache(cacheKey); + if (cachedTaxonomy) { + console.log('Taxonomy retrieved from cache'); + // Parse and remove MongoDB _id before returning if it's not part of the defined schema response + const taxonomyData = JSON.parse(cachedTaxonomy); + // delete taxonomyData._id; // Optional: remove _id if not needed in response + return respondWithCode(200, taxonomyData); + } + + console.log('Fetching taxonomy from MongoDB'); + const db = getDB(); + // Assuming the taxonomy is stored as a single document in the 'taxonomy' collection. + // Adjust the query if the structure is different (e.g., findOne({ _id: 'current_taxonomy' })) + const taxonomyDoc = await db.collection('taxonomy').findOne({ current: true }); // Find the current taxonomy + + if (!taxonomyDoc) { + return respondWithCode(404, { code: 404, message: 'Taxonomy data not found in database' }); + } + + // Cache the result - Use a longer TTL for taxonomy structure + // Store the raw document including _id in cache + // Use a specific TTL for taxonomy if defined, otherwise fallback or use a default + const taxonomyTTL = CACHE_TTL.TAXONOMY || CACHE_TTL.LONG || 3600 * 24; // Example: Use TAXONOMY TTL or fallback + await setCache(cacheKey, JSON.stringify(taxonomyDoc), { EX: taxonomyTTL }); + + // Remove MongoDB _id before returning if it's not part of the defined schema response + // delete taxonomyDoc._id; // Optional: remove _id if not needed in response + return respondWithCode(200, taxonomyDoc); + + } catch (error) { + console.error('Get taxonomy categories failed:', error); + return respondWithCode(500, { code: 500, message: 'Internal server error retrieving taxonomy' }); + } +}; \ No newline at end of file diff --git a/api-service/service/UserProfileService.js b/api-service/service/UserProfileService.js index 100219a..60189d9 100644 --- a/api-service/service/UserProfileService.js +++ b/api-service/service/UserProfileService.js @@ -3,8 +3,8 @@ const { setCache, getCache, invalidateCache } = require('../utils/redisUtil'); const { respondWithCode } = require('../utils/writer'); const { getUserData } = require('../utils/authUtil'); const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); -// Import the new function -const { updateUserMetadata, updateUserPhone, updateAuth0Username, deleteAuth0User } = require('../utils/auth0Util'); +const {updateUserPhone, updateAuth0Username, deleteAuth0User } = require('../utils/auth0Util'); +const { ObjectId } = require('mongodb'); // Ensure ObjectId is imported /** * Get User Profile @@ -76,11 +76,8 @@ exports.updateUserProfile = async function (req, body) { if (body.username) { try { await updateAuth0Username(auth0UserId, body.username); - // Optionally: Update nickname in metadata as well if desired - // await updateUserMetadata(auth0UserId, { nickname: body.username }); + await updateUserMetadata(auth0UserId, { nickname: body.username }); } catch (auth0Error) { - // If Auth0 update fails (e.g., username exists in Auth0 connection), return an error - // You might want to check the specific error type from auth0Error console.error(`Auth0 username update failed for ${auth0UserId}:`, auth0Error); return respondWithCode(409, { // Use 409 Conflict or appropriate code code: 409, @@ -105,23 +102,70 @@ exports.updateUserProfile = async function (req, body) { const updateData = { updatedAt: new Date(), }; - // Update local DB username only if Auth0 update was successful (or not attempted) + let demographicsChanged = false; // Flag to track if demographics were updated + + // Update local DB username and phone if (body.username !== undefined) updateData.username = body.username; if (body.phone !== undefined) updateData.phone = body.phone; + // --- Update Demographic Data --- + // Use dot notation to set fields within the demographicData object + if (body.gender !== undefined) { + updateData['demographicData.gender'] = body.gender; + demographicsChanged = true; + } + if (body.incomeBracket !== undefined) { + updateData['demographicData.incomeBracket'] = body.incomeBracket; + demographicsChanged = true; + } + if (body.country !== undefined) { + updateData['demographicData.country'] = body.country; + demographicsChanged = true; + } + if (body.age !== undefined) { + // Ensure age is null or an integer + const ageValue = body.age === null ? null : parseInt(body.age); + if (ageValue === null || !isNaN(ageValue)) { + updateData['demographicData.age'] = ageValue; + demographicsChanged = true; + // If age is being set, clear the inferred age bracket + updateData['demographicData.inferredAgeBracket'] = null; + } else { + console.warn(`Invalid age value provided for user ${auth0UserId}: ${body.age}`); + // Optionally return a 400 error here + } + } + // --- End Update Demographic Data --- + + // Only update allowed privacy settings + let privacySettingsChanged = false; // Flag for privacy changes if (body.privacySettings !== undefined) { - updateData.privacySettings = {}; + // Use dot notation for nested privacy settings updates if (body.privacySettings.dataSharingConsent !== undefined) { - updateData.privacySettings.dataSharingConsent = body.privacySettings.dataSharingConsent; + updateData['privacySettings.dataSharingConsent'] = body.privacySettings.dataSharingConsent; + privacySettingsChanged = true; } if (body.privacySettings.anonymizeData !== undefined) { - updateData.privacySettings.anonymizeData = body.privacySettings.anonymizeData; + updateData['privacySettings.anonymizeData'] = body.privacySettings.anonymizeData; + privacySettingsChanged = true; } // DO NOT update optInStores or optOutStores here } - if (body.dataAccess !== undefined) updateData.dataAccess = body.dataAccess; + + // Check if there's anything to update (excluding updatedAt) + const updateKeys = Object.keys(updateData).filter(key => key !== 'updatedAt'); + if (updateKeys.length === 0) { + // Nothing changed + const currentUser = await db.collection('users').findOne( + { auth0Id: auth0UserId }, + { projection: { preferences: 0 } } + ); + return respondWithCode(200, currentUser || { message: "No changes detected." }); + } + + console.log(`Updating user ${auth0UserId} with data:`, updateData); const result = await db .collection('users') @@ -132,30 +176,39 @@ exports.updateUserProfile = async function (req, body) { ); if (!result) { - // This case might occur if the user was deleted between checks return respondWithCode(404, { code: 404, message: 'User not found during final update' }); } // --- Cache Invalidation & Update --- const cacheKey = `${CACHE_KEYS.USER_DATA}${auth0UserId}`; - await invalidateCache(cacheKey); + await invalidateCache(cacheKey); // Invalidate user data cache + + // Invalidate general preferences cache if demographics changed + if (demographicsChanged) { + await invalidateCache(`${CACHE_KEYS.PREFERENCES}${auth0UserId}`); + console.log(`Invalidated general preferences cache for ${auth0UserId} due to demographic update.`); + } - // Invalidate store preferences if privacy settings changed - if (updateData.privacySettings && result.privacySettings?.optInStores) { - const userObjectId = result._id; - for (const storeId of result.privacySettings.optInStores) { - await invalidateCache(`${CACHE_KEYS.STORE_PREFERENCES}${userObjectId}:${storeId}`); + // Invalidate store-specific preferences if demographics or relevant privacy settings changed + // Also invalidate if the optInStores list exists (safer to clear on any profile update) + const updatedUserDoc = result; // Use the returned document from findOneAndUpdate + if ((demographicsChanged || privacySettingsChanged) && updatedUserDoc.privacySettings?.optInStores) { + const userObjectId = updatedUserDoc._id; // Use the _id from the updated result + console.log(`Invalidating store preferences for user ${userObjectId} due to update.`); + for (const storeId of updatedUserDoc.privacySettings.optInStores) { + const storePrefCacheKey = `${CACHE_KEYS.STORE_PREFERENCES}${userObjectId}:${storeId}`; + await invalidateCache(storePrefCacheKey); + console.log(`Invalidated cache: ${storePrefCacheKey}`); } } // Update cache with the new data (without preferences) - // Note: This happens *after* invalidation, ensuring fresh data is set if needed immediately - await setCache(cacheKey, JSON.stringify(result), { EX: CACHE_TTL.USER_DATA }); + await setCache(cacheKey, JSON.stringify(updatedUserDoc), { EX: CACHE_TTL.USER_DATA }); - return respondWithCode(200, result); + return respondWithCode(200, updatedUserDoc); } catch (error) { - // Catch errors not handled specifically above console.error('Update profile failed:', error); + // Check for specific MongoDB errors if needed (e.g., validation errors) return respondWithCode(500, { code: 500, message: 'Internal server error during profile update' }); } }; @@ -214,3 +267,195 @@ exports.deleteUserProfile = async function (req) { return respondWithCode(500, { code: 500, message: 'Internal server error' }); } }; + +/** + * Get Recent User Data Submissions + * Retrieves a list of recent data submissions made about the authenticated user. + */ +exports.getRecentUserData = async function (req, limit = 10, page = 1) { + try { + const db = getDB(); + const userData = req.user || (await getUserData(req.headers.authorization?.split(' ')[1])); + + // Find user to get their internal _id + const user = await db.collection('users').findOne({ auth0Id: userData.sub }, { projection: { _id: 1 } }); + if (!user) { + return respondWithCode(404, { code: 404, message: 'User not found' }); + } + + const skip = (page - 1) * limit; + + // Query userData collection + const recentData = await db.collection('userData') + .find({ userId: user._id }) // Filter by the user's ObjectId + .sort({ timestamp: -1 }) // Sort by submission time descending + .skip(skip) + .limit(limit) + .project({ // Project only necessary fields for RecentUserDataEntry schema + _id: 1, + storeId: 1, + dataType: 1, + timestamp: 1, // Submission timestamp + entryTimestamp: '$entries.timestamp', // Assuming timestamp is within entries array + // Add simplified details if needed, e.g., item count or query string + // details: { $cond: { if: { $eq: ['$dataType', 'purchase'] }, then: { itemCount: { $size: '$entries.items' } }, else: '$entries.query' } } + }) + .toArray(); + + // Simple transformation if needed (e.g., flatten entryTimestamp if it's an array) + const formattedData = recentData.map(entry => ({ + ...entry, + // If entryTimestamp is an array due to projection, take the first element + entryTimestamp: Array.isArray(entry.entryTimestamp) ? entry.entryTimestamp[0] : entry.entryTimestamp, + // Add placeholder for details + details: {} + })); + + + // Caching could be added here if this data is frequently accessed + // const cacheKey = `${CACHE_KEYS.USER_RECENT_DATA}${user._id}:${page}:${limit}`; + // await setCache(cacheKey, JSON.stringify(formattedData), { EX: CACHE_TTL.SHORT }); // Example TTL + + return respondWithCode(200, formattedData); + + } catch (error) { + console.error('Get recent user data failed:', error); + return respondWithCode(500, { code: 500, message: 'Internal server error' }); + } +}; + +/** + * Get User Spending Analytics + * Retrieves aggregated spending data categorized by taxonomy for the authenticated user. + */ +exports.getSpendingAnalytics = async function (req) { + try { + const db = getDB(); + const userData = req.user || (await getUserData(req.headers.authorization?.split(' ')[1])); + + // --- Date Range Handling --- + const { startDate, endDate } = req.query; + const dateMatch = {}; + if (startDate) { + try { + dateMatch['$gte'] = new Date(startDate); + } catch (e) { + console.warn('Invalid startDate format:', startDate); + } + } + if (endDate) { + try { + // Add 1 day to endDate to include the whole day + const end = new Date(endDate); + end.setDate(end.getDate() + 1); + dateMatch['$lt'] = end; + } catch (e) { + console.warn('Invalid endDate format:', endDate); + } + } + const hasDateFilter = Object.keys(dateMatch).length > 0; + // --- End Date Range Handling --- + + + // Find user to get their internal _id + const user = await db.collection('users').findOne({ auth0Id: userData.sub }, { projection: { _id: 1 } }); + if (!user) { + return respondWithCode(404, { code: 404, message: 'User not found' }); + } + + // Fetch the taxonomy once (remains the same) + const taxonomyDoc = await db.collection('taxonomy').findOne({ current: true }); + const categoryMap = (taxonomyDoc && taxonomyDoc.data && taxonomyDoc.data.categories) + ? taxonomyDoc.data.categories.reduce((map, cat) => { + map[cat.id] = cat.name; // Assuming category ID is used in items + map[cat.name] = cat.name; // Allow matching by name too, just in case + return map; + }, {}) + : {}; + + const pipeline = [ + // Match user and data type + { $match: { userId: user._id, dataType: 'purchase' } }, + // Unwind entries array + { $unwind: '$entries' }, + // --- Add Date Filtering Stage --- + ...(hasDateFilter ? [{ $match: { 'entries.timestamp': dateMatch } }] : []), + // Unwind items array + { $unwind: '$entries.items' }, + // --- Group by Month and Category --- + { + $group: { + _id: { + // Group by year-month and category + yearMonth: { $dateToString: { format: "%Y-%m", date: "$entries.timestamp" } }, + category: '$entries.items.category' // Use the category field from item + }, + // Calculate total spent for this category in this month + monthlyTotal: { + $sum: { + $cond: { + if: { $and: [ + { $isNumber: '$entries.items.price' }, + { $isNumber: '$entries.items.quantity' } + ]}, + then: { $multiply: ['$entries.items.price', '$entries.items.quantity'] }, + // Handle cases where quantity might be missing but price exists + else: { $cond: { if: { $isNumber: '$entries.items.price' }, then: '$entries.items.price', else: 0 } } + } + } + } + } + }, + // --- Group by Month to structure categories --- + { + $group: { + _id: '$_id.yearMonth', // Group by month string (e.g., "2025-01") + categories: { + $push: { // Create an array of category-spend pairs for the month + k: { $ifNull: [ { $toString: '$_id.category' }, "Unknown" ] }, // Category name (or ID as string) + v: '$monthlyTotal' + } + } + } + }, + // --- Convert categories array to object and sort --- + { + $project: { + _id: 0, // Exclude the default _id + month: '$_id', // Rename _id to month + spending: { $arrayToObject: '$categories' } // Convert [{k: "Cat1", v: 100}, ...] to { "Cat1": 100, ... } + } + }, + // Sort by month ascending + { $sort: { month: 1 } } + ]; + + const results = await db.collection('userData').aggregate(pipeline).toArray(); + + // --- Map category IDs/names to proper names from taxonomy --- + const spendingAnalytics = results.map(monthlyData => { + const mappedSpending = {}; + for (const categoryKey in monthlyData.spending) { + const categoryName = categoryMap[categoryKey] || categoryKey; // Use mapped name or original key + mappedSpending[categoryName] = monthlyData.spending[categoryKey]; + } + return { + month: monthlyData.month, + spending: mappedSpending + }; + }); + // --- End Mapping --- + + + // Caching could be added here, considering date range in the key + // const cacheKey = `${CACHE_KEYS.USER_SPENDING_ANALYTICS}${user._id}:${startDate || 'all'}:${endDate || 'all'}`; + // await setCache(cacheKey, JSON.stringify(spendingAnalytics), { EX: CACHE_TTL.MEDIUM }); + + // Return the array structure: [{ month: "YYYY-MM", spending: { "Category1": 100, ... } }, ...] + return respondWithCode(200, spendingAnalytics); + + } catch (error) { + console.error('Get spending analytics failed:', error); + return respondWithCode(500, { code: 500, message: 'Internal server error' }); + } +}; diff --git a/api-service/utils/cacheConfig.js b/api-service/utils/cacheConfig.js index cd0fd6c..a07e3ff 100644 --- a/api-service/utils/cacheConfig.js +++ b/api-service/utils/cacheConfig.js @@ -22,6 +22,7 @@ const CACHE_KEYS = { ADMIN_TOKEN: 'auth0_management_token', // Auth0 management token PREFERENCES: 'preferences:', // User preferences STORE_PREFERENCES: 'prefs:', // Store preferences + TAXONOMY: 'taxonomy:current', // <-- Add this line AI_REQUEST: 'ai_request:', // AI service request cache }; diff --git a/api-service/utils/dbSchemas.js b/api-service/utils/dbSchemas.js index c4a8dab..108d7a2 100644 --- a/api-service/utils/dbSchemas.js +++ b/api-service/utils/dbSchemas.js @@ -3,7 +3,7 @@ */ // Schema version tracking -const SCHEMA_VERSION = '2.0.1'; +const SCHEMA_VERSION = '2.0.8'; // Incremented version const userSchema = { validator: { @@ -14,6 +14,7 @@ const userSchema = { schemaVersion: { bsonType: 'string', description: 'Schema version for tracking changes', + // Consider adding enum: [SCHEMA_VERSION] if strict enforcement is needed }, auth0Id: { bsonType: 'string', @@ -31,6 +32,58 @@ const userSchema = { bsonType: ['string', 'null'], description: 'Phone number', }, + // --- Start: Demographic Data Object --- + demographicData: { + bsonType: 'object', + description: 'User-provided and inferred demographic information', + properties: { + gender: { + bsonType: ['string', 'null'], + description: 'User gender identity', + enum: ['male', 'female', 'non-binary', 'prefer_not_to_say', null] + }, + incomeBracket: { + bsonType: ['string', 'null'], + description: 'User income bracket category', + enum: ['<25k', '25k-50k', '50k-100k', '100k-200k', '>200k', 'prefer_not_to_say', null] + }, + country: { + bsonType: ['string', 'null'], + description: 'User country of residence (e.g., ISO 3166-1 alpha-2 code)', + }, + age: { + bsonType: ['int', 'null'], + description: 'User age', + minimum: 0, + }, + // --- Inferred fields within demographicData --- + inferredHasKids: { + bsonType: ['bool', 'null'], + description: 'Inferred: Does the user likely have children? (null if unknown)', + }, + inferredRelationshipStatus: { + bsonType: ['string', 'null'], + description: 'Inferred: User relationship status (null if unknown)', + enum: ['single', 'relationship', 'married', null], + }, + inferredEmploymentStatus: { + bsonType: ['string', 'null'], + description: 'Inferred: User employment status (null if unknown)', + enum: ['employed', 'unemployed', 'student', null], + }, + inferredEducationLevel: { + bsonType: ['string', 'null'], + description: 'Inferred: User education level (null if unknown)', + enum: ['high_school', 'bachelors', 'masters', 'doctorate', null], + }, + inferredAgeBracket: { + bsonType: ['string', 'null'], + description: 'Inferred: User age bracket if age not provided (null if unknown)', + enum: ['18-24', '25-34', '35-44', '45-54', '55-64', '65+', null], + }, + } + }, + // --- End: Demographic Data Object --- preferences: { bsonType: 'array', description: 'User interests and preferences', @@ -40,12 +93,22 @@ const userSchema = { properties: { category: { bsonType: 'string' }, score: { - bsonType: 'double', + bsonType: ['double', 'int'], minimum: 0.0, maximum: 1.0, }, attributes: { bsonType: 'object', + // Attributes can have any key, and the value is another object + additionalProperties: { + bsonType: 'object', + // The inner object has attribute values as keys and scores as values + additionalProperties: { + bsonType: ['double', 'int'], + minimum: 0.0, + maximum: 1.0, + } + } }, }, }, @@ -56,14 +119,8 @@ const userSchema = { properties: { dataSharingConsent: { bsonType: 'bool' }, anonymizeData: { bsonType: 'bool' }, - optInStores: { bsonType: 'array' }, - optOutStores: { bsonType: 'array' }, - }, - }, - dataAccess: { - bsonType: 'object', - properties: { - allowedDomains: { bsonType: 'array' }, + optInStores: { bsonType: 'array', items: { bsonType: 'string' } }, // Specify item type + optOutStores: { bsonType: 'array', items: { bsonType: 'string' } }, // Specify item type }, }, createdAt: { bsonType: 'date' }, @@ -71,8 +128,8 @@ const userSchema = { }, }, }, - validationLevel: 'moderate', - validationAction: 'error', + validationLevel: 'moderate', // Changed from 'strict' to 'moderate' during dev if needed + validationAction: 'warn', // Changed from 'error' to 'warn' during dev if needed }; // Store schema @@ -186,7 +243,7 @@ const userDataSchema = { properties: { name: { bsonType: 'string' }, category: { bsonType: 'string' }, - price: { bsonType: 'double' }, + price: { bsonType: ['double', 'int'] }, quantity: { bsonType: 'int' }, attributes: { bsonType: 'object', diff --git a/api-service/utils/taxonomyUtil.js b/api-service/utils/taxonomyUtil.js deleted file mode 100644 index 890039f..0000000 --- a/api-service/utils/taxonomyUtil.js +++ /dev/null @@ -1,140 +0,0 @@ -const taxonomyService = require('../clients/taxonomyService'); -const { respondWithCode } = require('./writer'); - -/** - * Validate category and attributes - * @param {string} categoryId - Category ID - * @param {Object} attributes - Attributes to validate - * @returns {Promise<{valid: boolean, response: Object|null}>} Validation result - */ -exports.validateCategoryAndAttributes = async function (categoryId, attributes) { - // Validate category exists - const isValidCategory = await taxonomyService - .getCategoryAttributes(categoryId) - .then((attrs) => !!attrs) - .catch(() => false); - - if (!isValidCategory) { - return { - valid: false, - response: respondWithCode(400, { - code: 400, - message: `Invalid category: ${categoryId}`, - }), - }; - } - - // Validate attributes if provided - if (attributes) { - const validationResult = await taxonomyService.validateAttributes(categoryId, attributes); - if (!validationResult.valid) { - return { - valid: false, - response: respondWithCode(400, { - code: 400, - message: validationResult.message || `Invalid attributes for category ${categoryId}`, - }), - }; - } - } - - return { valid: true, response: null }; -}; - -/** - * Validate multiple items at once - * @param {Array} items - Items with category and attributes - * @returns {Promise} Validation results with index keys - */ -exports.validateBatch = async function (items) { - try { - const productsToValidate = items.map((item) => ({ - category: item.category, - attributes: item.attributes || {}, - })); - - return await taxonomyService.validateBatch(productsToValidate); - } catch (error) { - console.error('Batch validation failed:', error); - return Object.fromEntries(items.map((_, index) => [index.toString(), { valid: false, message: 'Batch validation failed' }])); - } -}; - -/** - * Get price range for an amount - * @param {number} amount - Price amount - * @param {string} categoryId - Optional category ID - * @returns {Promise} Price range label - */ -exports.getPriceRange = async function (amount, categoryId = null) { - try { - const result = await taxonomyService.getPriceRangeForAmount(amount, categoryId); - return result.range || 'unknown'; - } catch (error) { - console.error(`Failed to get price range for ${amount}:`, error); - return 'unknown'; - } -}; - -/** - * Validate purchase entry - * @param {Object} entry - Purchase entry to validate - * @returns {Object|null} - Response object if invalid, null if valid - */ -exports.validatePurchaseEntry = async function (entry) { - if (!entry.timestamp || !entry.items || !Array.isArray(entry.items)) { - return { - code: 400, - message: 'Purchase entries require timestamp and items array', - }; - } - - // Validate each item has a name - for (const item of entry.items) { - if (!item.name) { - return { - code: 400, - message: 'Each purchase item requires a name', - }; - } - } - - return null; -}; - -/** - * Validate search entry - * @param {Object} entry - Search entry to validate - * @returns {Object|null} - Response object if invalid, null if valid - */ -exports.validateSearchEntry = async function (entry) { - if (!entry.timestamp || !entry.query) { - return { - code: 400, - message: 'Search entries require timestamp and query', - }; - } - - return null; -}; - -/** - * Validate category for search entry - * @param {string} categoryId - Category ID to validate - * @returns {Promise} - Response object if invalid, null if valid - */ -exports.validateSearchCategory = async function (categoryId) { - const isValidCategory = await taxonomyService - .getCategoryAttributes(categoryId) - .then((attributes) => !!attributes) - .catch(() => false); - - if (!isValidCategory) { - return { - code: 400, - message: `Invalid category: ${categoryId}`, - }; - } - - return null; -}; diff --git a/authentication.md b/authentication.md new file mode 100644 index 0000000..7176963 --- /dev/null +++ b/authentication.md @@ -0,0 +1,9 @@ +# Authentication Guide + +All requests to the Tapiro Store API must be authenticated using an API key. + +## Using Your API Key + +You must include your API key in the `X-API-Key` header for every request you make to store-specific endpoints. + +**Header Format:** diff --git a/compose.yml b/compose.yml index 6c2062c..eb25bb3 100644 --- a/compose.yml +++ b/compose.yml @@ -29,6 +29,7 @@ services: - AUTH0_M2M_CLIENT_SECRET=${AUTH0_M2M_CLIENT_SECRET} - AI_SERVICE_URL=http://ml-service:8000/api - AI_SERVICE_API_KEY=${AI_SERVICE_API_KEY} + - AUTH0_DOMAIN=${AUTH0_DOMAIN} ports: - "3000:3000" depends_on: diff --git a/data_submission.md b/data_submission.md new file mode 100644 index 0000000..1e37c69 --- /dev/null +++ b/data_submission.md @@ -0,0 +1,72 @@ +--- + +**3. `data_submission.md`** + +````markdown +# Data Submission Guide + +Stores can submit user interaction data, such as purchases and searches, to Tapiro for analysis and preference building. + +## Endpoint + +`POST /users/data` + +## Purpose + +To send batches of user interaction data (purchases or searches) associated with a specific user email address. + +## Authentication + +Requires a valid API key in the `X-API-Key` header. See [Authentication Guide](./authentication.md). + +## Request Body + +The request body must be a JSON object conforming to the `UserData` schema: + +```json +{ + "email": "user@example.com", + "dataType": "purchase", + "entries": [ + { + "timestamp": "2024-05-15T14:30:00Z", + "items": [ + { + "sku": "XYZ-123", + "name": "Men's Cotton T-Shirt", + "category": "201", // Must match a category ID or name from the Taxonomy + "price": 25.99, + "quantity": 2, + "attributes": { + // <-- Optional: Key-value pairs based on Taxonomy for the category + "color": "navy", // Example: Value for the 'color' attribute + "size": "M", // Example: Value for the 'size' attribute + "material": "cotton" // Example: Value for the 'material' attribute + // Add other relevant attributes defined in the taxonomy for category "201" + } + }, + { + "sku": "ABC-789", + "name": "Running Shorts", + "category": "Clothing", // Can use name or ID + "price": 39.95, + "quantity": 1, + "attributes": { + "color": "black", + "size": "M", + "material": "polyester" + } + } + ], + "totalValue": 91.93 + } + // Add more PurchaseEntry objects if submitting multiple purchases in one batch + ], + "metadata": { + "source": "web", + "deviceType": "desktop", + "sessionId": "abc-123-xyz-789" + } +} +``` +```` diff --git a/ml-service/app/api/endpoints/preferences.py b/ml-service/app/api/endpoints/preferences.py index a5b2144..f156651 100644 --- a/ml-service/app/api/endpoints/preferences.py +++ b/ml-service/app/api/endpoints/preferences.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, HTTPException, Depends, Body, BackgroundTasks from app.models.preferences import UserDataEntry, UserPreferences, UserPreference from app.db.mongodb import get_database -from app.services.preferenceProcessor import process_user_data, update_user_preferences +from app.services.preferenceProcessor import process_user_data from app.utils.preference_utils import mark_processing_failed from app.utils.redis_util import invalidate_cache, CACHE_KEYS from typing import List @@ -61,33 +61,4 @@ async def process_user_data_endpoint( raise HTTPException( status_code=500, detail=f"Error processing data: {str(e)}" - ) - -@router.post( - "/preferences/update", - response_model=UserPreferences, - description="Update user preferences directly", - summary="Update preferences" -) -async def update_preferences_endpoint( - preferences: List[UserPreference] = Body(...), - auth0_id: str = Body(...), - email: str = Body(...), - db=Depends(get_database) -): - """Update user preferences directly from the API service""" - logger.info(f"Updating preferences for user: {auth0_id}") - logger.info(f"Number of preference categories: {len(preferences)}") - - try: - # Call the processor function instead of handling processing here - result = await update_user_preferences(auth0_id, email, preferences, db) - return result - except HTTPException: - raise - except Exception as e: - logger.error(f"Error updating preferences: {str(e)}") - raise HTTPException( - status_code=500, - detail=f"Error updating preferences: {str(e)}" ) \ No newline at end of file diff --git a/ml-service/app/data/taxonomy.yaml b/ml-service/app/data/taxonomy.yaml index a19c7bb..c87ba70 100644 --- a/ml-service/app/data/taxonomy.yaml +++ b/ml-service/app/data/taxonomy.yaml @@ -1,4 +1,4 @@ -version: "1.0.0" +version: "1.0.5" categories: # Electronics Category Tree - id: "100" @@ -6,35 +6,38 @@ categories: description: "Electronic devices and accessories" attributes: - name: "brand" + description: "Manufacturer of the product" values: - [ - "Apple", - "Samsung", - "Sony", - "LG", - "Google", - "Microsoft", - "Dell", - "HP", - "Lenovo", - "Asus", - ] + - Apple + - Samsung + - Sony + - LG + - Google + - Microsoft + - Dell + - HP + - Lenovo + - Asus - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "General price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "color" + description: "Available color options" values: - [ - "black", - "white", - "silver", - "gold", - "blue", - "red", - "green", - "purple", - "pink", - "orange", - ] + - black + - white + - silver + - gold + - blue + - red + - green + - purple + - pink + - orange - id: "101" name: "Smartphones" @@ -42,41 +45,72 @@ categories: description: "Mobile phones and smartphones" attributes: - name: "brand" + description: "Manufacturer" values: - [ - "Apple", - "Samsung", - "Google", - "OnePlus", - "Xiaomi", - "Huawei", - "Motorola", - "Nokia", - "Sony", - "Nothing", - ] + - Apple + - Samsung + - Google + - OnePlus + - Xiaomi + - Huawei + - Motorola + - Nokia + - Sony + - Nothing - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price segment" + values: + - budget + - mid_range + - premium + - luxury - name: "color" + description: "Color options" values: - [ - "black", - "white", - "silver", - "gold", - "blue", - "red", - "green", - "purple", - "pink", - "yellow", - ] + - black + - white + - silver + - gold + - blue + - red + - green + - purple + - pink + - yellow - name: "storage" - values: ["64GB", "128GB", "256GB", "512GB", "1TB"] + description: "Built-in storage capacity" + values: + - 64GB + - 128GB + - 256GB + - 512GB + - 1TB - name: "screen_size" - values: ["compact", "standard", "large"] + description: "Relative screen size" + values: + - compact + - standard + - large - name: "os" - values: ["iOS", "Android"] + description: "Operating system" + values: + - iOS + - Android + - name: "battery_capacity" + description: "Battery capacity in mAh" + values: + - "2000-3000" + - "3001-4000" + - "4001-5000" + - "5001+" + - name: "connectivity" + description: "Network connectivity standards" + values: + - 4G + - 5G + - WiFi + - Bluetooth + - NFC - id: "102" name: "Laptops" @@ -84,42 +118,94 @@ categories: description: "Laptop computers and notebooks" attributes: - name: "brand" + description: "Manufacturer" values: - [ - "Apple", - "Dell", - "HP", - "Lenovo", - "Asus", - "Acer", - "Microsoft", - "Samsung", - "MSI", - "Razer", - ] + - Apple + - Dell + - HP + - Lenovo + - Asus + - Acer + - Microsoft + - Samsung + - MSI + - Razer - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price segment" + values: + - budget + - mid_range + - premium + - luxury - name: "usage_type" - values: ["everyday", "business", "gaming", "creative", "student"] + description: "Intended usage scenario" + values: + - everyday + - business + - gaming + - creative + - student + - professional - name: "screen_size" - values: ["13-inch", "14-inch", "15-inch", "16-inch", "17-inch"] + description: "Size of the display" + values: + - "13-inch" + - "14-inch" + - "15-inch" + - "16-inch" + - "17-inch" - name: "processor" + description: "CPU model" values: - [ - "Intel i3", - "Intel i5", - "Intel i7", - "Intel i9", - "AMD Ryzen 3", - "AMD Ryzen 5", - "AMD Ryzen 7", - "AMD Ryzen 9", - "Apple M1", - "Apple M2", - "Apple M3", - ] + - Intel i3 + - Intel i5 + - Intel i7 + - Intel i9 + - AMD Ryzen 3 + - AMD Ryzen 5 + - AMD Ryzen 7 + - AMD Ryzen 9 + - Apple M1 + - Apple M2 + - Apple M3 - name: "os" - values: ["Windows", "macOS", "Chrome OS", "Linux"] + description: "Operating system" + values: + - Windows + - macOS + - Chrome OS + - Linux + - name: "ram" + description: "Amount of system memory" + values: + - 8GB + - 16GB + - 32GB + - "64GB+" + - name: "storage_type" + description: "Type of primary storage drive" + values: + - SSD + - HDD + - name: "storage_size" + description: "Capacity of primary storage drive" + values: + - 128GB + - 256GB + - 512GB + - 1TB + - "2TB+" + - name: "gpu_type" + description: "Type of graphics processing unit" + values: + - integrated + - dedicated_nvidia + - dedicated_amd + - name: "touchscreen" + description: "Whether the laptop has a touchscreen" + values: + - "yes" + - "no" - id: "103" name: "Tablets" @@ -127,24 +213,45 @@ categories: description: "Tablet computers" attributes: - name: "brand" + description: "Manufacturer" values: - [ - "Apple", - "Samsung", - "Microsoft", - "Amazon", - "Lenovo", - "Huawei", - "Google", - ] + - Apple + - Samsung + - Microsoft + - Amazon + - Lenovo + - Huawei + - Google - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "screen_size" - values: ["small", "medium", "large"] + description: "Relative display size" + values: + - small + - medium + - large - name: "os" - values: ["iOS", "Android", "Windows", "FireOS"] + description: "Operating system" + values: + - iOS + - Android + - Windows + - FireOS - name: "connectivity" - values: ["WiFi", "WiFi+Cellular"] + description: "Network connectivity" + values: + - WiFi + - WiFi+Cellular + - name: "pen_support" + description: "Stylus support" + values: + - "yes" + - "no" - id: "104" name: "Wearables" @@ -152,23 +259,43 @@ categories: description: "Smartwatches and fitness trackers" attributes: - name: "brand" + description: "Manufacturer" values: - [ - "Apple", - "Samsung", - "Fitbit", - "Garmin", - "Amazfit", - "Huawei", - "Fossil", - "Withings", - ] + - Apple + - Samsung + - Fitbit + - Garmin + - Amazfit + - Huawei + - Fossil + - Withings - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "type" - values: ["smartwatch", "fitness_tracker", "hybrid", "sport"] + description: "Wearable category" + values: + - smartwatch + - fitness_tracker + - hybrid + - sport - name: "connectivity" - values: ["bluetooth", "bluetooth+cellular", "wifi+bluetooth"] + description: "Connectivity options" + values: + - bluetooth + - bluetooth+cellular + - wifi+bluetooth + - name: "water_resistance" + description: "Water resistance rating" + values: + - none + - water_resistant + - swimproof + - diveproof - id: "105" name: "Audio" @@ -176,33 +303,46 @@ categories: description: "Headphones, earbuds, and speakers" attributes: - name: "brand" + description: "Manufacturer" values: - [ - "Sony", - "Bose", - "Apple", - "Samsung", - "Sennheiser", - "JBL", - "Audio-Technica", - "Sonos", - "Beats", - "Jabra", - ] + - Sony + - Bose + - Apple + - Samsung + - Sennheiser + - JBL + - Audio-Technica + - Sonos + - Beats + - Jabra - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "type" - values: ["headphones", "earbuds", "speakers", "soundbars"] + description: "Product form factor" + values: + - headphones + - earbuds + - speakers + - soundbars - name: "connectivity" - values: ["wireless", "wired", "bluetooth", "wifi"] + description: "Connection type" + values: + - wireless + - wired + - bluetooth + - wifi - name: "feature" + description: "Key feature" values: - [ - "noise_cancellation", - "water_resistant", - "smart_assistant", - "spatial_audio", - ] + - noise_cancellation + - water_resistant + - smart_assistant + - spatial_audio # Fashion Category Tree - id: "200" @@ -210,66 +350,123 @@ categories: description: "Clothing, shoes, and accessories" attributes: - name: "gender" - values: ["women", "men", "unisex", "kids"] + description: "Intended gender" + values: + - women + - men + - unisex + - kids - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "season" - values: ["spring", "summer", "fall", "winter", "all_season"] + description: "Season suitability" + values: + - spring + - summer + - fall + - winter + - all_season - id: "201" name: "Clothing" parent_id: "200" - description: "Shirts, pants, dresses and other apparel" + description: "Shirts, pants, dresses, and other apparel" attributes: - name: "type" + description: "Clothing type" values: - [ - "shirts", - "t-shirts", - "pants", - "jeans", - "dresses", - "skirts", - "sweaters", - "jackets", - "coats", - "underwear", - "socks", - "activewear", - ] + - shirts + - t-shirts + - pants + - jeans + - dresses + - skirts + - sweaters + - jackets + - coats + - underwear + - socks + - activewear - name: "gender" - values: ["women", "men", "unisex", "kids"] + description: "Intended gender" + values: + - women + - men + - unisex + - kids - name: "size" - values: ["XS", "S", "M", "L", "XL", "XXL", "XXXL"] + description: "Available sizes" + values: + - XS + - S + - M + - L + - XL + - XXL + - XXXL + - kids_xs + - kids_s + - kids_m + - kids_l + - kids_xl + - toddler_2t + - toddler_3t + - toddler_4t + - infant_0-3m + - infant_3-6m + - infant_6-9m + - infant_9-12m + - infant_12-18m + - infant_18-24m - name: "material" + description: "Fabric material" values: - [ - "cotton", - "polyester", - "wool", - "silk", - "leather", - "denim", - "linen", - "synthetic", - ] + - cotton + - polyester + - wool + - silk + - leather + - denim + - linen + - synthetic - name: "color" + description: "Color options" values: - [ - "black", - "white", - "blue", - "red", - "green", - "yellow", - "purple", - "pink", - "gray", - "brown", - "multicolor", - ] + - black + - white + - blue + - red + - green + - yellow + - purple + - pink + - gray + - brown + - multicolor - name: "occasion" - values: ["casual", "formal", "business", "party", "sports", "lounge"] + description: "Suitable occasion" + values: + - casual + - formal + - business + - party + - sports + - lounge + - name: "style" + description: "Fashion style" + values: + - fast_fashion + - business_casual + - formal_wear + - comfort + - streetwear + - vintage + - sustainable - id: "202" name: "Shoes" @@ -277,28 +474,76 @@ categories: description: "Footwear of all types" attributes: - name: "type" + description: "Shoe style" values: - [ - "sneakers", - "boots", - "sandals", - "flats", - "heels", - "loafers", - "athletic", - "dress_shoes", - "slippers", - ] + - sneakers + - boots + - sandals + - flats + - heels + - loafers + - athletic + - dress_shoes + - slippers - name: "gender" - values: ["women", "men", "unisex", "kids"] + description: "Intended gender" + values: + - women + - men + - unisex + - kids - name: "size" - values: ["5", "6", "7", "8", "9", "10", "11", "12", "13", "14"] + description: "US shoe size (Adult & Kids)" + values: + - "5" + - "6" + - "7" + - "8" + - "9" + - "10" + - "11" + - "12" + - "13" + - "14" + - kids_1 + - kids_2 + - kids_3 + - kids_4 + - kids_5 + - kids_6 + - kids_7 + - kids_8 + - kids_9 + - kids_10 + - kids_11 + - kids_12 + - kids_13 - name: "material" - values: ["leather", "canvas", "synthetic", "suede", "mesh", "rubber"] + description: "Material type" + values: + - leather + - canvas + - synthetic + - suede + - mesh + - rubber - name: "color" - values: ["black", "white", "brown", "blue", "red", "multicolor"] + description: "Color options" + values: + - black + - white + - brown + - blue + - red + - multicolor - name: "occasion" - values: ["casual", "formal", "athletic", "outdoor", "special_occasion"] + description: "Suitable occasion" + values: + - casual + - formal + - athletic + - outdoor + - special_occasion # Home & Garden Category Tree - id: "300" @@ -306,22 +551,32 @@ categories: description: "Furniture, decor, and home improvement" attributes: - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "room" + description: "Target room" values: - ["living_room", "bedroom", "kitchen", "bathroom", "office", "outdoor"] + - living_room + - bedroom + - kitchen + - bathroom + - office + - outdoor - name: "style" + description: "Design style" values: - [ - "modern", - "traditional", - "minimalist", - "rustic", - "industrial", - "scandinavian", - "bohemian", - "farmhouse", - ] + - modern + - traditional + - minimalist + - rustic + - industrial + - scandinavian + - bohemian + - farmhouse - id: "301" name: "Furniture" @@ -329,36 +584,51 @@ categories: description: "Tables, chairs, sofas, and other furniture" attributes: - name: "type" + description: "Furniture type" values: - [ - "sofa", - "chair", - "table", - "bed", - "desk", - "shelf", - "cabinet", - "dresser", - ] + - sofa + - chair + - table + - bed + - desk + - shelf + - cabinet + - dresser - name: "material" - values: ["wood", "metal", "glass", "plastic", "fabric", "leather"] + description: "Primary material" + values: + - wood + - metal + - glass + - plastic + - fabric + - leather - name: "color" + description: "Color options" values: - [ - "black", - "white", - "brown", - "gray", - "beige", - "blue", - "green", - "multicolor", - ] + - black + - white + - brown + - gray + - beige + - blue + - green + - multicolor - name: "room" + description: "Target room" values: - ["living_room", "bedroom", "kitchen", "bathroom", "office", "outdoor"] + - living_room + - bedroom + - kitchen + - bathroom + - office + - outdoor - name: "assembly_required" - values: ["yes", "no", "partial"] + description: "Assembly requirement" + values: + - "yes" + - "no" + - "partial" - id: "302" name: "Kitchen" @@ -366,45 +636,96 @@ categories: description: "Cookware, appliances, and kitchen accessories" attributes: - name: "type" + description: "Kitchen product type" values: - [ - "cookware", - "bakeware", - "utensils", - "small_appliance", - "large_appliance", - "cutlery", - "dinnerware", - "storage", - ] + - cookware + - bakeware + - utensils + - small_appliance + - large_appliance + - cutlery + - dinnerware + - storage - name: "material" + description: "Material composition" values: - [ - "stainless_steel", - "cast_iron", - "ceramic", - "glass", - "plastic", - "silicone", - "wood", - ] + - stainless_steel + - cast_iron + - ceramic + - glass + - plastic + - silicone + - wood - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "brand" + description: "Manufacturer" values: - [ - "KitchenAid", - "Cuisinart", - "All-Clad", - "Le Creuset", - "Instant Pot", - "Ninja", - "OXO", - "Calphalon", - "Breville", - ] + - KitchenAid + - Cuisinart + - All-Clad + - Le Creuset + - Instant Pot + - Ninja + - OXO + - Calphalon + - Breville - name: "dishwasher_safe" - values: ["yes", "no"] + description: "Dishwasher compatibility" + values: + - "yes" + - "no" + + - id: "303" + name: "Gardening" + parent_id: "300" + description: "Gardening tools, plants, and supplies" + attributes: + - name: "type" + values: + - tools + - seeds + - plants + - soil + - fertilizer + - pots_planters + - pest_control + - name: "plant_type" + values: + - flower + - vegetable + - herb + - shrub + - tree + - indoor + + - id: "304" + name: "Tools & Home Improvement" + parent_id: "300" + description: "Hand tools, power tools, hardware, and improvement supplies" + attributes: + - name: "type" + values: + - hand_tool + - power_tool + - hardware + - plumbing + - electrical + - paint + - safety_gear + - name: "brand" + values: + - DeWalt + - Milwaukee + - Ryobi + - Craftsman + - Stanley + - Bosch # Beauty & Personal Care - id: "400" @@ -412,26 +733,42 @@ categories: description: "Makeup, skincare, and personal care products" attributes: - name: "price_range" - values: ["budget", "mid_range", "premium", "luxury"] + description: "Price tier" + values: + - budget + - mid_range + - premium + - luxury - name: "skin_type" - values: ["dry", "oily", "combination", "sensitive", "normal"] + description: "Suitable skin type" + values: + - dry + - oily + - combination + - sensitive + - normal - name: "concern" + description: "Skin concern" values: - ["acne", "aging", "dryness", "dullness", "redness", "sun_protection"] + - acne + - aging + - dryness + - dullness + - redness + - sun_protection - name: "brand" + description: "Manufacturer" values: - [ - "MAC", - "Fenty", - "Glossier", - "The Ordinary", - "Cetaphil", - "Neutrogena", - "L'Oreal", - "Estée Lauder", - "Olay", - "CeraVe", - ] + - MAC + - Fenty + - Glossier + - The Ordinary + - Cetaphil + - Neutrogena + - L'Oreal + - Estée Lauder + - Olay + - CeraVe - id: "401" name: "Skincare" @@ -439,43 +776,46 @@ categories: description: "Products for skin health and appearance" attributes: - name: "type" + description: "Product type" values: - [ - "cleanser", - "moisturizer", - "serum", - "mask", - "exfoliant", - "toner", - "sunscreen", - "eye_cream", - "treatment", - ] + - cleanser + - moisturizer + - serum + - mask + - exfoliant + - toner + - sunscreen + - eye_cream + - treatment - name: "skin_type" - values: ["dry", "oily", "combination", "sensitive", "normal"] + description: "Suitable skin type" + values: + - dry + - oily + - combination + - sensitive + - normal - name: "concern" + description: "Skin concern" values: - [ - "acne", - "aging", - "dryness", - "dullness", - "redness", - "sun_protection", - "dark_spots", - ] + - acne + - aging + - dryness + - dullness + - redness + - sun_protection + - dark_spots - name: "ingredient" + description: "Key ingredients" values: - [ - "hyaluronic_acid", - "retinol", - "vitamin_c", - "niacinamide", - "salicylic_acid", - "glycolic_acid", - "ceramides", - "peptides", - ] + - hyaluronic_acid + - retinol + - vitamin_c + - niacinamide + - salicylic_acid + - glycolic_acid + - ceramides + - peptides - id: "402" name: "Makeup" @@ -483,38 +823,55 @@ categories: description: "Cosmetics for face, eyes, and lips" attributes: - name: "type" + description: "Makeup type" values: - [ - "foundation", - "concealer", - "eyeshadow", - "mascara", - "eyeliner", - "lipstick", - "blush", - "powder", - "bronzer", - "highlighter", - ] + - foundation + - concealer + - eyeshadow + - mascara + - eyeliner + - lipstick + - blush + - powder + - bronzer + - highlighter - name: "finish" - values: ["matte", "dewy", "satin", "natural", "radiant", "shimmer"] + description: "Finish type" + values: + - matte + - dewy + - satin + - natural + - radiant + - shimmer - name: "coverage" - values: ["sheer", "light", "medium", "full"] + description: "Coverage level" + values: + - sheer + - light + - medium + - full - name: "color_family" + description: "Primary color family" values: - [ - "nude", - "pink", - "red", - "berry", - "coral", - "brown", - "plum", - "orange", - "neutral", - ] + - nude + - pink + - red + - berry + - coral + - brown + - plum + - orange + - neutral - name: "formulation" - values: ["liquid", "cream", "powder", "gel", "pencil", "stick"] + description: "Product formulation" + values: + - liquid + - cream + - powder + - gel + - pencil + - stick # Books, Movies & Music - id: "500" @@ -522,9 +879,18 @@ categories: description: "Books, movies, music, and digital entertainment" attributes: - name: "format" - values: ["physical", "digital", "streaming"] + description: "Content format" + values: + - physical + - digital + - streaming - name: "audience" - values: ["children", "young_adult", "adult", "all_ages"] + description: "Target audience" + values: + - children + - young_adult + - adult + - all_ages - id: "501" name: "Books" @@ -532,28 +898,50 @@ categories: description: "Printed and digital reading materials" attributes: - name: "format" - values: ["hardcover", "paperback", "ebook", "audiobook"] + description: "Book format" + values: + - hardcover + - paperback + - ebook + - audiobook - name: "genre" + description: "Literary genre" values: - [ - "fiction", - "non_fiction", - "science_fiction", - "fantasy", - "mystery", - "romance", - "biography", - "history", - "self_help", - "cooking", - "business", - ] + - fiction + - non_fiction + - science_fiction + - fantasy + - mystery + - romance + - biography + - history + - self_help + - cooking + - business + - science + - technology + - academic + - textbook + - travel + - art + - poetry - name: "audience" - values: ["children", "young_adult", "adult"] + description: "Target audience" + values: + - children + - young_adult + - adult - name: "bestseller" - values: ["yes", "no"] + description: "Bestseller status" + values: + - "yes" + - "no" - name: "release_timeframe" - values: ["new_release", "recent", "classic"] + description: "Release timeframe" + values: + - new_release + - recent + - classic - id: "502" name: "Movies" @@ -561,34 +949,484 @@ categories: description: "Films and video content" attributes: - name: "format" - values: ["dvd", "blu_ray", "4k_uhd", "digital", "streaming"] + description: "Media format" + values: + - dvd + - blu_ray + - 4k_uhd + - digital + - streaming - name: "genre" + description: "Film genre" values: - [ - "action", - "comedy", - "drama", - "horror", - "sci_fi", - "fantasy", - "romance", - "thriller", - "documentary", - "animation", - ] + - action + - comedy + - drama + - horror + - sci_fi + - fantasy + - romance + - thriller + - documentary + - animation - name: "rating" + description: "Content rating" values: - [ - "G", - "PG", - "PG-13", - "R", - "NC-17", - "TV-Y", - "TV-G", - "TV-PG", - "TV-14", - "TV-MA", - ] + - G + - PG + - "PG-13" + - R + - NC-17 + - TV-Y + - TV-G + - TV-PG + - TV-14 + - TV-MA - name: "release_timeframe" - values: ["new_release", "recent", "classic"] + description: "Release timeframe" + values: + - new_release + - recent + - classic + + - id: "503" + name: "Streaming Services" + parent_id: "500" + description: "Digital streaming subscriptions for video and music" + attributes: + - name: "service_type" + values: + - video + - music + - gaming + - name: "provider" + values: + - Netflix + - Hulu + - Disney+ + - Spotify + - Apple Music + - Xbox Game Pass + + - id: "504" + name: "Academic Journals" + parent_id: "500" + description: "Scholarly publications and research articles" + attributes: + - name: "field" + values: + - science + - medicine + - engineering + - humanities + - social_sciences + - name: "format" + values: + - digital_subscription + - print + - individual_article + + # Health & Wellness (Top Level) + - id: "600" + name: "Health & Wellness" + description: "Health, wellness, and medical products" + attributes: + - name: "price_range" + description: "Price tier" + values: + - budget + - mid_range + - premium + - name: "concern" + description: "Health concern addressed" + values: + - general_wellness + - pain_relief + - mobility + - sleep + - nutrition + - specific_condition + + # Health Sub-categories + - id: "601" + name: "Vitamins & Supplements" + parent_id: "600" + description: "Dietary supplements and vitamins" + attributes: + - name: "type" + values: + - multivitamin + - vitamin_c + - vitamin_d + - protein + - herbal + - mineral + - name: "form" + values: + - capsule + - tablet + - powder + - liquid + - gummy + + - id: "602" + name: "Personal Care" + parent_id: "600" + description: "Personal hygiene and care items (non-beauty focus)" + attributes: + - name: "type" + values: + - oral_care + - first_aid + - feminine_hygiene + - incontinence + - foot_care + + - id: "603" + name: "Medical Supplies" + parent_id: "600" + description: "Medical equipment and supplies" + attributes: + - name: "type" + values: + - monitoring_device + - mobility_aid + - first_aid_kit + - diagnostic_test + + - id: "604" + name: "Comfort Footwear" + parent_id: "600" + description: "Footwear designed for comfort and support" + attributes: + - name: "feature" + values: + - orthopedic + - arch_support + - wide_fit + - diabetic + - name: "gender" + values: + - women + - men + - unisex + - name: "size" + values: + - "7" + - "8" + - "9" + - "10" + - "11" + - "12" + - wide_7 + - wide_8 + + # Toys & Baby (Top Level) + - id: "700" + name: "Toys & Games" + description: "Toys, games, and collectibles for all ages" + attributes: + - name: "age_range" + values: + - "0-1" + - "1-3" + - "3-5" + - "5-7" + - "8-11" + - "12+" + - adult + - name: "type" + values: + - action_figure + - board_game + - puzzle + - educational + - outdoor + - plush + - building_blocks + - video_game_related + + # Toys & Baby Sub-categories + - id: "701" + name: "Baby Gear" + parent_id: "700" + description: "Strollers, car seats, feeding supplies, nursery items" + attributes: + - name: "type" + values: + - stroller + - car_seat + - high_chair + - baby_monitor + - crib + - feeding_bottle + - diapering + - name: "brand" + values: + - Graco + - Chicco + - Evenflo + - Fisher-Price + - Philips Avent + + - id: "702" + name: "Kids Clothing" + parent_id: "200" + description: "Clothing specifically for children and babies" + attributes: + - name: "age_group" + values: + - baby + - toddler + - kids + - tween + - name: "gender" + values: + - boy + - girl + - unisex + - name: "size" + values: + - infant_0-3m + - infant_3-6m + - toddler_2t + - kids_s + - kids_m + - kids_l + - name: "type" + values: + - onesie + - t-shirt + - pants + - dress + - outerwear + - sleepwear + + # Office & Business (Top Level) + - id: "800" + name: "Office Supplies" + description: "Stationery, office basics, and equipment" + attributes: + - name: "price_range" + values: + - budget + - standard + - premium + - name: "category" + values: + - writing + - paper + - organization + - desk_accessories + - office_electronics + + # Office & Business Sub-categories + - id: "801" + name: "Stationery" + parent_id: "800" + description: "Pens, pencils, notebooks, paper, and related items" + attributes: + - name: "type" + values: + - pen + - pencil + - notebook + - paper + - envelope + - marker + - highlighter + + - id: "802" + name: "Business Wear" + parent_id: "200" + description: "Formal and business-casual attire for professional settings" + attributes: + - name: "type" + values: + - suit + - blazer + - dress_shirt + - blouse + - trousers + - skirt + - dress_shoes + - name: "gender" + values: + - men + - women + - name: "occasion" + values: + - business_formal + - business_casual + + # Entertainment & Hobbies (Top Level) + - id: "900" + name: "Gaming" + description: "Video games, consoles, and accessories" + attributes: + - name: "platform" + values: + - pc + - playstation + - xbox + - nintendo_switch + - mobile + - name: "genre" + values: + - action + - rpg + - strategy + - simulation + - sports + - puzzle + - indie + - name: "type" + values: + - console + - game + - accessory + - gaming_laptop + - gaming_pc_component + + # Travel (Top Level) + - id: "1000" + name: "Travel" + description: "Luggage, travel accessories, and booking services" + attributes: + - name: "type" + values: + - luggage + - travel_accessories + - booking + - travel_gear + - name: "travel_style" + values: + - business + - leisure + - budget + - luxury + - adventure + - family + + # Food & Grocery (Top Level) + - id: "1100" + name: "Grocery" + description: "Food and beverage items" + attributes: + - name: "category" + values: + - fresh_produce + - pantry_staples + - snacks + - beverages + - frozen_foods + - dairy + - meat_seafood + - name: "dietary_preference" + values: + - organic + - gluten_free + - vegan + - vegetarian + - keto + + # Food & Grocery Sub-categories + - id: "1101" + name: "Budget Food" + parent_id: "1100" + description: "Value-focused food items and bulk goods" + attributes: + - name: "type" + values: + - canned_goods + - pasta_rice + - frozen_value + - store_brand + + # Luxury & Gifting (Top Level) + - id: "1200" + name: "Jewelry & Watches" + description: "Fine and fashion jewelry, watches" + attributes: + - name: "type" + values: + - ring + - necklace + - bracelet + - earrings + - watch + - name: "material" + values: + - gold + - silver + - platinum + - diamond + - gemstone + - stainless_steel + - name: "price_range" + values: + - fashion + - mid_range + - fine + - luxury + - name: "gender" + values: + - women + - men + - unisex + + - id: "1300" + name: "Gifts" + description: "Gift sets, special occasion items, and experiences" + attributes: + - name: "occasion" + values: + - birthday + - anniversary + - holiday + - wedding + - thank_you + - corporate + - name: "recipient" + values: + - for_her + - for_him + - for_kids + - for_couple + - for_pet + - name: "type" + values: + - gift_basket + - experience + - personalized_item + - novelty_gift + + # Software (Top Level) + - id: "1400" + name: "Software" + description: "Applications, operating systems, and software tools" + attributes: + - name: "type" + values: + - productivity + - creative + - security + - utility + - operating_system + - business + - educational + - gaming + - name: "license" + values: + - subscription + - perpetual + - freeware + - open_source + - name: "platform" + values: + - windows + - macos + - linux + - web + - mobile_ios + - mobile_android diff --git a/ml-service/app/services/demographicInference.py b/ml-service/app/services/demographicInference.py new file mode 100644 index 0000000..cf80305 --- /dev/null +++ b/ml-service/app/services/demographicInference.py @@ -0,0 +1,326 @@ +import logging +from collections import defaultdict +from typing import List, Dict, Any, Optional, Tuple +from bson import ObjectId +from datetime import datetime # Import datetime +from app.utils.redis_util import invalidate_cache, CACHE_KEYS # Import cache utilities + +logger = logging.getLogger(__name__) + +# --- Keyword Definitions (Examples - Expand significantly) --- +KIDS_KEYWORDS = { + "baby", "toddler", "child", "kid", "infant", "diaper", "stroller", + "crib", "formula", "nursery", "maternity", "school supplies", "toy", + "lego", "barbie", "playstation", "nintendo", # Be careful with broad terms +} +RELATIONSHIP_KEYWORDS = { + "wedding", "engagement", "anniversary", "couple", "partner", "spouse", + "boyfriend", "girlfriend", "husband", "wife", "romantic", "valentine", +} +MARRIED_KEYWORDS = { + "wedding", "anniversary", "spouse", "husband", "wife", "married", +} +SINGLE_KEYWORDS = { + "single", "dating app", "matchmaking", +} + +# --- NEW Keyword Sets --- +EMPLOYMENT_KEYWORDS = { + "job search", "linkedin", "resume", "interview suit", "office supplies", + "business travel", "conference", "work laptop", "unemployment benefits", + "career fair", "networking event", +} +STUDENT_KEYWORDS = { + "student discount", "university", "college", "textbook", "dorm room", + "student loan", "internship", "campus", "study guide", "backpack", + "school supplies", # Overlap with KIDS_KEYWORDS, context matters +} +EDUCATION_KEYWORDS = { + "university", "college", "bachelor's degree", "master's degree", "phd", + "doctorate", "thesis", "dissertation", "academic journal", "textbook", + "research paper", "graduate school", +} +# Age bracket keywords are very unreliable, use with extreme caution or alternative methods +AGE_BRACKET_YOUNG_ADULT_KEYWORDS = { # Approx 18-24 + "college", "university", "first apartment", "internship", "study abroad", + "spring break", "starter job", +} +AGE_BRACKET_MID_CAREER_KEYWORDS = { # Approx 35-54 + "mortgage", "kids' college fund", "management training", "midlife crisis", # Joking, but maybe? + "retirement planning", "executive", +} +AGE_BRACKET_SENIOR_KEYWORDS = { # Approx 65+ + "retirement", "pension", "senior discount", "medicare", "grandchild", + "assisted living", "downsizing home", +} +# --- End NEW Keyword Sets --- + + +# --- Helper Function to Extract Text --- +def _extract_text_from_entries(entries: List[Dict[str, Any]]) -> List[str]: + """Extracts relevant text (item names, search queries) from entries.""" + texts = [] + for entry in entries: + if entry.get("dataType") == "purchase": + texts.extend([item.get("name", "").lower() for item in entry.get("items", [])]) + elif entry.get("dataType") == "search": + texts.append(entry.get("query", "").lower()) + return [text for text in texts if text] # Filter out empty strings + +# --- Inference Functions --- + +async def infer_has_kids(entries: List[Dict[str, Any]]) -> Optional[bool]: + """Infer if user has kids based on purchase/search keywords.""" + kid_evidence_count = 0 + texts = _extract_text_from_entries(entries) + logger.debug(f"Inferring 'has_kids' from {len(texts)} text entries.") + for text in texts: + if any(keyword in text for keyword in KIDS_KEYWORDS): + kid_evidence_count += 1 + logger.debug(f"Kid keyword found: {text}") + + if kid_evidence_count >= 2: # Require multiple pieces of evidence + logger.debug(f"Inferring 'has_kids' = True (evidence count: {kid_evidence_count})") + return True + logger.debug(f"Inferring 'has_kids' = None (evidence count: {kid_evidence_count})") + return None # Not enough evidence + +async def infer_relationship_status(entries: List[Dict[str, Any]]) -> Optional[str]: + """Infer relationship status (single, relationship, married) based on keywords.""" + married_evidence = 0 + relationship_evidence = 0 + single_evidence = 0 # Less reliable + texts = _extract_text_from_entries(entries) + logger.debug(f"Inferring 'relationship_status' from {len(texts)} text entries.") + + for text in texts: + # Check married first for priority + if any(keyword in text for keyword in MARRIED_KEYWORDS): + married_evidence += 1 + logger.debug(f"Married keyword found: {text}") + elif any(keyword in text for keyword in RELATIONSHIP_KEYWORDS): + relationship_evidence += 1 + logger.debug(f"Relationship keyword found: {text}") + elif any(keyword in text for keyword in SINGLE_KEYWORDS): + single_evidence += 1 + logger.debug(f"Single keyword found: {text}") + + # Prioritize married > relationship > single based on evidence threshold + if married_evidence >= 1: # Lower threshold for specific events like wedding + logger.debug(f"Inferring 'relationship_status' = 'married' (evidence count: {married_evidence})") + return "married" + elif relationship_evidence >= 2: + logger.debug(f"Inferring 'relationship_status' = 'relationship' (evidence count: {relationship_evidence})") + return "relationship" + # elif single_evidence >= 1: # Be very cautious enabling this + # logger.debug(f"Inferring 'relationship_status' = 'single' (evidence count: {single_evidence})") + # return "single" + logger.debug("Inferring 'relationship_status' = None (insufficient evidence)") + return None # Not enough evidence + +# --- NEW Inference Functions --- + +async def infer_employment_status(entries: List[Dict[str, Any]]) -> Optional[str]: + """Infer employment status (employed, student, unemployed) based on keywords.""" + student_evidence = 0 + employment_evidence = 0 + # Inferring 'unemployed' directly from keywords is very difficult/unreliable + texts = _extract_text_from_entries(entries) + logger.debug(f"Inferring 'employment_status' from {len(texts)} text entries.") + + for text in texts: + # Check student first due to potential overlap (e.g., "school supplies") + if any(keyword in text for keyword in STUDENT_KEYWORDS): + student_evidence += 1 + logger.debug(f"Student keyword found: {text}") + elif any(keyword in text for keyword in EMPLOYMENT_KEYWORDS): + employment_evidence += 1 + logger.debug(f"Employment keyword found: {text}") + + # Prioritize student if strong evidence, otherwise employed + if student_evidence >= 2: + logger.debug(f"Inferring 'employment_status' = 'student' (evidence count: {student_evidence})") + return "student" + elif employment_evidence >= 2: + logger.debug(f"Inferring 'employment_status' = 'employed' (evidence count: {employment_evidence})") + return "employed" + # Add more sophisticated logic? Check for conflicting terms? + logger.debug("Inferring 'employment_status' = None (insufficient evidence)") + return None # Not enough evidence + +async def infer_education_level(entries: List[Dict[str, Any]]) -> Optional[str]: + """Infer education level (high_school, bachelors, masters, doctorate) - Very Speculative.""" + doctorate_evidence = 0 + masters_evidence = 0 + bachelors_evidence = 0 + texts = _extract_text_from_entries(entries) + logger.debug(f"Inferring 'education_level' from {len(texts)} text entries.") + + for text in texts: + # Check most specific first + if any(keyword in text for keyword in ["phd", "doctorate", "dissertation"]): + doctorate_evidence += 1 + logger.debug(f"Doctorate keyword found: {text}") + elif any(keyword in text for keyword in ["master's degree", "graduate school", "thesis"]): + masters_evidence += 1 + logger.debug(f"Masters keyword found: {text}") + elif any(keyword in text for keyword in ["bachelor's degree", "university", "college", "undergrad"]): + bachelors_evidence += 1 + logger.debug(f"Bachelors keyword found: {text}") + + # Prioritize highest level found with some evidence threshold + if doctorate_evidence >= 1: + logger.debug(f"Inferring 'education_level' = 'doctorate' (evidence count: {doctorate_evidence})") + return "doctorate" + elif masters_evidence >= 1: + logger.debug(f"Inferring 'education_level' = 'masters' (evidence count: {masters_evidence})") + return "masters" + elif bachelors_evidence >= 2: # Require slightly more for bachelors + logger.debug(f"Inferring 'education_level' = 'bachelors' (evidence count: {bachelors_evidence})") + return "bachelors" + # Inferring 'high_school' is difficult, maybe default if other evidence is weak? + logger.debug("Inferring 'education_level' = None (insufficient evidence)") + return None # Very uncertain + +async def infer_age_bracket(entries: List[Dict[str, Any]]) -> Optional[str]: + """Infer age bracket based on keywords - EXTREMELY SPECULATIVE AND UNRELIABLE.""" + young_adult_evidence = 0 + mid_career_evidence = 0 + senior_evidence = 0 + texts = _extract_text_from_entries(entries) + logger.debug(f"Inferring 'age_bracket' from {len(texts)} text entries.") + + for text in texts: + if any(keyword in text for keyword in AGE_BRACKET_SENIOR_KEYWORDS): + senior_evidence += 1 + logger.debug(f"Senior age keyword found: {text}") + elif any(keyword in text for keyword in AGE_BRACKET_MID_CAREER_KEYWORDS): + mid_career_evidence += 1 + logger.debug(f"Mid-career age keyword found: {text}") + elif any(keyword in text for keyword in AGE_BRACKET_YOUNG_ADULT_KEYWORDS): + young_adult_evidence += 1 + logger.debug(f"Young adult age keyword found: {text}") + + # Simple thresholding - needs much refinement or a different approach + if senior_evidence >= 1: + logger.debug(f"Inferring 'age_bracket' = '65+' (evidence count: {senior_evidence})") + return "65+" + elif mid_career_evidence >= 2: + # Could try to differentiate 35-44 vs 45-54 based on keywords, but very hard + logger.debug(f"Inferring 'age_bracket' = '35-54' (evidence count: {mid_career_evidence})") + return "35-54" # Combine for now + elif young_adult_evidence >= 2: + logger.debug(f"Inferring 'age_bracket' = '18-24' (evidence count: {young_adult_evidence})") + return "18-24" + + logger.warning("Age bracket inference based on keywords is highly unreliable.") + logger.debug("Inferring 'age_bracket' = None (insufficient evidence)") + return None # Highly uncertain + +# --- Main Inference Runner --- + +async def run_inference_for_user(user_id: str, email: str, db, limit: int = 50) -> bool: + """ + Runs demographic inference based on recent user data and updates the user document if changes are found. + Returns True if the user document was updated, False otherwise. + """ + logger.info(f"Running demographic inference for user {user_id} ({email})") + updated = False + try: + user_object_id = ObjectId(user_id) + user = await db.users.find_one({"_id": user_object_id}) + if not user: + logger.error(f"Inference: User not found by ID {user_id}") + return False + + # Fetch recent userData entries for the user + recent_data = await db.userData.find( + {"userId": user_object_id} + ).sort("timestamp", -1).limit(limit).to_list(length=limit) + + if not recent_data: + logger.info(f"Inference: No recent data found for user {user_id}") + return False + else: + logger.info(f"Inference: Found {len(recent_data)} recent data entries for user {user_id}") + + + # --- Run inference functions --- + inferred_kids = await infer_has_kids(recent_data) + inferred_status = await infer_relationship_status(recent_data) + inferred_employment = await infer_employment_status(recent_data) + inferred_education = await infer_education_level(recent_data) # Very speculative + inferred_age_bracket = None + # Only infer age bracket if age is not already set in demographicData + current_demographics = user.get("demographicData", {}) + if current_demographics.get("age") is None: + logger.info(f"Inference: User {email} has no age set, attempting age bracket inference.") + inferred_age_bracket = await infer_age_bracket(recent_data) # Highly speculative + else: + logger.info(f"Inference: User {email} has age set ({current_demographics.get('age')}), skipping age bracket inference.") + + + # --- Prepare update payload --- + update_payload = {} + # Read current values from the nested demographicData object + current_kids = current_demographics.get("inferredHasKids") + current_status = current_demographics.get("inferredRelationshipStatus") + current_employment = current_demographics.get("inferredEmploymentStatus") + current_education = current_demographics.get("inferredEducationLevel") + current_age_bracket = current_demographics.get("inferredAgeBracket") + + # Use dot notation for updates within the nested object + if inferred_kids is not None and inferred_kids != current_kids: + update_payload["demographicData.inferredHasKids"] = inferred_kids + logger.info(f"Inference update for {email}: demographicData.inferredHasKids -> {inferred_kids} (was {current_kids})") + if inferred_status is not None and inferred_status != current_status: + update_payload["demographicData.inferredRelationshipStatus"] = inferred_status + logger.info(f"Inference update for {email}: demographicData.inferredRelationshipStatus -> {inferred_status} (was {current_status})") + if inferred_employment is not None and inferred_employment != current_employment: + update_payload["demographicData.inferredEmploymentStatus"] = inferred_employment + logger.info(f"Inference update for {email}: demographicData.inferredEmploymentStatus -> {inferred_employment} (was {current_employment})") + if inferred_education is not None and inferred_education != current_education: + update_payload["demographicData.inferredEducationLevel"] = inferred_education + logger.info(f"Inference update for {email}: demographicData.inferredEducationLevel -> {inferred_education} (was {current_education})") + if inferred_age_bracket is not None and inferred_age_bracket != current_age_bracket: + update_payload["demographicData.inferredAgeBracket"] = inferred_age_bracket + logger.info(f"Inference update for {email}: demographicData.inferredAgeBracket -> {inferred_age_bracket} (was {current_age_bracket})") + # --- End Prepare update payload --- + + # Update user document in DB if there are changes + if update_payload: + logger.info(f"Inference: Found updates for {email}: {update_payload.keys()}") + update_payload["updatedAt"] = datetime.now() # Update timestamp + result = await db.users.update_one( + {"_id": user_object_id}, + {"$set": update_payload} + ) + if result.modified_count > 0: + updated = True + logger.info(f"Inference: Successfully updated user document for {email}") + + # --- Invalidate Caches on Successful Update --- + auth0_id = user.get("auth0Id") + if auth0_id: + # Invalidate user data and general preferences + await invalidate_cache(f"{CACHE_KEYS['USER_DATA']}{auth0_id}") + await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") + logger.info(f"Inference: Invalidated USER_DATA and PREFERENCES cache for {auth0_id}") + + # Invalidate store-specific preferences for opt-in stores + if user.get("privacySettings", {}).get("optInStores"): + user_object_id_str = str(user_object_id) + for store_id in user["privacySettings"]["optInStores"]: + await invalidate_cache(f"{CACHE_KEYS['STORE_PREFERENCES']}{user_object_id_str}:{store_id}") + logger.info(f"Inference: Invalidated STORE_PREFERENCES caches for {auth0_id}") + # --- End Cache Invalidation --- + else: + logger.warning(f"Inference: Update payload generated but DB modify count was 0 for {email}. Payload: {update_payload}") + else: + logger.info(f"Inference: No demographic updates found for {email}") + + + except Exception as e: + logger.error(f"Error during demographic inference for user {user_id}: {str(e)}", exc_info=True) + + return updated diff --git a/ml-service/app/services/preferenceProcessor.py b/ml-service/app/services/preferenceProcessor.py index facc377..2300526 100644 --- a/ml-service/app/services/preferenceProcessor.py +++ b/ml-service/app/services/preferenceProcessor.py @@ -1,215 +1,536 @@ +import logging from app.models.preferences import UserDataEntry, UserPreferences, UserPreference from datetime import datetime from fastapi import HTTPException from bson import ObjectId -import logging from app.utils.redis_util import invalidate_cache, CACHE_KEYS -from typing import List, Dict, Any -from app.services.taxonomyService import get_taxonomy_service +from typing import List, Dict, Any, Optional +from app.services.taxonomyService import TaxonomyService, get_taxonomy_service # Updated import from collections import defaultdict +from app.services.demographicInference import run_inference_for_user # Import the inference runner +from sentence_transformers import util # Import sentence-transformers utility for similarity +import numpy as np # Import numpy logger = logging.getLogger(__name__) +# --- Configuration --- +ATTRIBUTE_SIMILARITY_THRESHOLD = 0.55 # Configurable threshold for matching attribute values + async def process_user_data(data: UserDataEntry, db) -> UserPreferences: """Process user data and update their preferences""" - + # Extract user info - user_id = data.metadata.get("userId") if data.metadata else None + user_id_from_meta = data.metadata.get("userId") if data.metadata else None email = data.email data_type = data.data_type entries = data.entries - - logger.info(f"Processing data for user {user_id or email}, type: {data_type}") - - # Fetch existing user preferences from MongoDB + + logger.info(f"Processing data for user email {email} (ID from meta: {user_id_from_meta}), type: {data_type}") + + # Fetch the full user document from MongoDB to get demographics user = None - if user_id and ObjectId.is_valid(user_id): - user = await db.users.find_one({"_id": ObjectId(user_id)}) - + if user_id_from_meta and ObjectId.is_valid(user_id_from_meta): + user = await db.users.find_one({"_id": ObjectId(user_id_from_meta)}) + if user and user.get("email") != email: + logger.warning(f"User ID {user_id_from_meta} provided in metadata maps to email {user.get('email')}, but processing request is for {email}. Proceeding with email lookup.") + user = None # Force email lookup if mismatch + if not user: # Fallback to find by email user = await db.users.find_one({"email": email}) if not user: - logger.error(f"User not found: {email}") + logger.error(f"User not found by email: {email}") + # Mark as failed before raising + await mark_processing_failed(db, email) # Assuming mark_processing_failed exists raise HTTPException(status_code=404, detail="User not found") - + + user_id = str(user["_id"]) # Use the confirmed user ID from DB + logger.info(f"Found user {email} with DB ID {user_id}") + + # Extract demographics from the nested 'demographicData' field + user_demographics_nested = user.get("demographicData", {}) + # Flatten the dictionary to pass to processing functions + user_demographics_flat = { + "gender": user_demographics_nested.get("gender"), + "incomeBracket": user_demographics_nested.get("incomeBracket"), + "country": user_demographics_nested.get("country"), + "age": user_demographics_nested.get("age"), + "inferredHasKids": user_demographics_nested.get("inferredHasKids"), + "inferredRelationshipStatus": user_demographics_nested.get("inferredRelationshipStatus"), + "inferredEmploymentStatus": user_demographics_nested.get("inferredEmploymentStatus"), + "inferredEducationLevel": user_demographics_nested.get("inferredEducationLevel"), + "inferredAgeBracket": user_demographics_nested.get("inferredAgeBracket"), + } + # Filter out None values if desired, but processing functions handle None + # user_demographics_flat = {k: v for k, v in user_demographics_flat.items() if v is not None} + + logger.info(f"Using demographics for user {email}: {user_demographics_flat}") + + # Get current preferences from the user object user_preferences = user.get("preferences", []) - - # Convert to dictionary for easier updates - preference_dict = {pref["category"]: pref for pref in user_preferences} - + + # Convert to dictionary for easier updates {category_id: preference_object} + preference_dict = {} + for pref in user_preferences: + if isinstance(pref, dict) and "category" in pref: + preference_dict[pref["category"]] = pref + else: + logger.warning(f"Skipping invalid preference item for user {email}: {pref}") + + # Get taxonomy service taxonomy = await get_taxonomy_service(db) - - # Process entries based on data type + + # Process entries based on data type, passing flattened demographics try: if data_type == "purchase": - await process_purchase_data(entries, preference_dict, taxonomy) + await process_purchase_data(entries, preference_dict, taxonomy, user_demographics_flat) elif data_type == "search": - await process_search_data(entries, preference_dict, taxonomy) + await process_search_data(entries, preference_dict, taxonomy, user_demographics_flat) else: logger.warning(f"Unknown data type: {data_type}") + # Optionally, still try embedding fallback for unknown types + await process_with_embeddings(entries, data_type, preference_dict, taxonomy, user_demographics_flat) + except Exception as e: - logger.error(f"Error processing {data_type} data: {str(e)}") + logger.error(f"Error processing {data_type} data for {email}: {str(e)}", exc_info=True) # Fall back to using embedding model for all data try: - await process_with_embeddings(entries, data_type, preference_dict, taxonomy) + logger.info(f"Attempting fallback embedding processing for {email} due to error.") + # Pass flattened demographics to fallback as well + await process_with_embeddings(entries, data_type, preference_dict, taxonomy, user_demographics_flat) except Exception as fallback_error: - logger.error(f"Fallback processing also failed: {str(fallback_error)}") + logger.error(f"Fallback processing also failed for {email}: {str(fallback_error)}", exc_info=True) + # Mark as failed before raising + await mark_processing_failed(db, email) # Assuming mark_processing_failed exists raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}") - + # Convert preference_dict back to list - updated_preferences = list(preference_dict.values()) - + updated_preferences_list = list(preference_dict.values()) + # Add normalization before database update - normalized_preferences = await normalize_categories(updated_preferences, taxonomy) - + # Ensure normalize_categories handles potential issues gracefully + try: + normalized_preferences = await normalize_categories(updated_preferences_list, taxonomy) + except Exception as norm_error: + logger.error(f"Error normalizing categories for {email}: {norm_error}", exc_info=True) + normalized_preferences = updated_preferences_list # Use unnormalized as fallback + # Update user preferences in database with normalized data + update_time = datetime.now() await db.users.update_one( {"_id": user["_id"]}, { "$set": { "preferences": normalized_preferences, - "updatedAt": datetime.now() + "updatedAt": update_time } } ) - + logger.info(f"Successfully updated preferences for user {email} in DB.") + # Update the userData collection's processedStatus to "processed" + # Find the specific document(s) related to this submission batch if possible, + # otherwise update the oldest pending one for the user. + # This assumes the AI service passes back an ID or we can match based on content/timestamp. + # For simplicity, updating the first pending entry found for the email. try: + # Ideally, match on a unique ID for the submission batch if available + # submission_id = data.metadata.get("submissionId") + # if submission_id: + # match_criteria = {"_id": ObjectId(submission_id)} + # else: + match_criteria = {"email": email, "processedStatus": "pending"} + result = await db.userData.update_one( - { - "email": email, - "processedStatus": "pending" - }, + match_criteria, {"$set": {"processedStatus": "processed"}} + # Consider adding sort if multiple pending exist and no ID is available ) - logger.info(f"Updated userData status to 'processed' for {email}, modified: {result.modified_count}") + if result.modified_count > 0: + logger.info(f"Updated userData status to 'processed' for {email}, modified: {result.modified_count}") + else: + logger.warning(f"Could not find pending userData entry for {email} to mark as processed.") except Exception as e: - logger.error(f"Failed to update userData status: {str(e)}") - - # Invalidate user preferences cache using auth0Id - if user.get("auth0Id"): - auth0_id = user["auth0Id"] - await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") - logger.info(f"Invalidated preferences cache for user {auth0_id}") - + logger.error(f"Failed to update userData status for {email}: {str(e)}") + + # --- Run Demographic Inference (After main processing) --- + inference_updated_user = False + try: + logger.info(f"Starting demographic inference for user {email} ({user_id})") + inference_updated_user = await run_inference_for_user(user_id, email, db) + if inference_updated_user: + logger.info(f"Demographic inference updated user document for {email}") + # Cache invalidation is handled within run_inference_for_user + else: + logger.info(f"Demographic inference did not result in updates for user {email}") + except Exception as inference_error: + logger.error(f"Demographic inference failed for user {email}: {inference_error}", exc_info=True) + # --- End Demographic Inference --- + + + # Invalidate user preferences cache using auth0Id (if not already done by inference) + # This ensures caches are cleared even if inference didn't run or update + auth0_id = user.get("auth0Id") + if auth0_id: + # Check if inference already invalidated caches for this user + if not inference_updated_user: + logger.info(f"Running post-processing cache invalidation for {auth0_id} as inference didn't update.") + await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") + logger.info(f"Invalidated PREFERENCES cache for user {auth0_id} (post-processing)") + + # Invalidate store-specific caches if opt-in stores exist + if user.get("privacySettings", {}).get("optInStores"): + for store_id in user["privacySettings"]["optInStores"]: + store_pref_key = f"{CACHE_KEYS['STORE_PREFERENCES']}{user_id}:{store_id}" + await invalidate_cache(store_pref_key) + logger.info(f"Invalidated STORE_PREFERENCES caches for user {auth0_id} (post-processing)") + else: + logger.info(f"Skipping post-processing cache invalidation as inference already handled it for {auth0_id}") + else: + logger.warning(f"Cannot invalidate caches for user {email} as auth0Id is missing.") + + # Return updated preferences in the expected format return UserPreferences( - user_id=str(user["_id"]), + user_id=user_id, preferences=[ UserPreference( - category=item["category"], + category=item["category"], score=item["score"], attributes=item.get("attributes") - ) for item in normalized_preferences + ) for item in normalized_preferences # Use normalized preferences ], - updated_at=datetime.now() + updated_at=update_time # Use the time of this update ) -async def process_purchase_data(entries, preference_dict, taxonomy): - """Process purchase data using rule-based system""" +# ... process_purchase_data, process_search_data, process_with_embeddings, normalize_categories ... +# (No changes needed inside these functions as they receive the flattened demographics dict) + +# --- Add helper for marking failed --- +async def mark_processing_failed(db, email: str): + """Marks the oldest pending userData entry for the email as failed.""" + try: + result = await db.userData.update_one( + {"email": email, "processedStatus": "pending"}, + {"$set": {"processedStatus": "failed"}}, + # sort={"timestamp": 1} # Optional: ensure oldest is marked if multiple exist + ) + if result.modified_count > 0: + logger.info(f"Marked a pending userData entry as 'failed' for {email}") + else: + logger.warning(f"Could not find pending userData entry for {email} to mark as failed.") + except Exception as e: + logger.error(f"Failed to mark userData as failed for {email}: {str(e)}") + +async def process_purchase_data(entries, preference_dict, taxonomy: TaxonomyService, demographics: Optional[Dict[str, Any]] = None): + """Process purchase data using rule-based system, considering demographics and buying patterns""" category_counts = defaultdict(int) attribute_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) - - # Count purchases by category and attribute + category_price_totals = defaultdict(float) + category_item_counts = defaultdict(int) + + # --- Demographic Usage (remains the same) --- + demographics = demographics or {} + gender = demographics.get("gender") + age = demographics.get("age") + income = demographics.get("incomeBracket") + country = demographics.get("country") + # --- Add inferred data --- + has_kids = demographics.get("inferredHasKids") # Boolean or None + relationship_status = demographics.get("inferredRelationshipStatus") # String or None + # --- Add NEW inferred data --- + employment_status = demographics.get("inferredEmploymentStatus") # String or None + education_level = demographics.get("inferredEducationLevel") # String or None + age_bracket = demographics.get("inferredAgeBracket") # String or None + # --- End Refined Demographic Usage --- + + # Count purchases, attributes, and track prices for entry in entries: - if "items" not in entry: - continue - - for item in entry["items"]: - category = item.get("category") - if not category: + for item in entry.get("items", []): + category_input = item.get("category") # Can be ID or Name + item_name = item.get("name") + quantity = item.get("quantity", 1) + price = item.get("price") + provided_attributes = item.get("attributes") # Attributes from the store + + if not category_input or not item_name: + logger.warning(f"Skipping item due to missing category or name: {item}") continue - - # Increment category count - category_counts[category] += item.get("quantity", 1) - - # Process attributes - if "attributes" in item: - for attr_name, attr_value in item["attributes"].items(): - attribute_counts[category][attr_name][attr_value] += item.get("quantity", 1) - - # Update preference scores - total_items = sum(category_counts.values()) - if total_items > 0: - for category, count in category_counts.items(): - # Calculate category score (normalized) - score = min(count / (total_items * 0.5), 1.0) # Cap at 1.0 - - # Create or update preference - if category not in preference_dict: - preference_dict[category] = { - "category": category, - "score": score, + + # --- Resolve Category ID --- + category_id = None + if taxonomy.taxonomy: # Check if taxonomy is loaded + # Try direct ID lookup first + if category_input in taxonomy._id_to_name_map: + category_id = category_input + else: + # Try name lookup (case-insensitive) + category_id = taxonomy.get_category_id(category_input) + + if not category_id: + logger.warning(f"Could not resolve category '{category_input}' for item '{item_name}'. Skipping attribute processing for this item.") + # Decide if you still want to count the category score even if attributes can't be processed + # For now, we skip attribute processing but might still count category later if needed + continue # Skip attribute part if category is unresolved + # --- End Resolve Category ID --- + + + # --- Hybrid Attribute Logic --- + final_attributes = None + is_valid_provided = False + + # 1. Check if store provided valid attributes + if provided_attributes and isinstance(provided_attributes, dict): + try: + # Basic validation: Check if keys exist in taxonomy for this category + category_details = taxonomy.get_category_details(category_id) + if category_details and category_details.attributes: + valid_attr_names = {attr.name for attr in category_details.attributes} + # Check if all provided keys are valid attribute names for the category + is_valid_provided = all(key in valid_attr_names for key in provided_attributes.keys()) + if is_valid_provided: + final_attributes = provided_attributes + logger.debug(f"Using valid store-provided attributes for item: {item_name}") + else: + invalid_keys = [key for key in provided_attributes.keys() if key not in valid_attr_names] + logger.warning(f"Invalid attribute keys provided by store for item '{item_name}' in category '{category_id}': {invalid_keys}. Falling back to AI.") + else: + logger.warning(f"No attributes defined in taxonomy for category '{category_id}', cannot validate provided attributes for '{item_name}'. Falling back to AI.") + is_valid_provided = False # Cannot validate + + except Exception as val_err: + logger.warning(f"Error validating provided attributes for {item_name}: {val_err}. Falling back to AI.") + is_valid_provided = False + + # 2. Fallback to AI extraction if needed + if not final_attributes: + logger.debug(f"Attempting AI attribute extraction for item: {item_name}") + try: + # Call the AI extraction function using the embedding model + extracted_attributes = await extract_attributes_with_similarity( + item_name, category_id, taxonomy # Pass taxonomy service + ) + if extracted_attributes: + final_attributes = extracted_attributes + logger.info(f"Successfully extracted attributes via AI for '{item_name}': {final_attributes}") # Log success + else: + logger.debug(f"AI could not extract attributes for {item_name}") + except Exception as ai_err: + logger.error(f"AI attribute extraction failed for {item_name}: {ai_err}", exc_info=True) + final_attributes = None # Ensure it's None on failure + + # --- End Hybrid Attribute Logic --- + + # --- Update Category Counts (Moved here to ensure category_id is valid) --- + category_counts[category_id] += quantity + if price is not None: + category_price_totals[category_id] += price * quantity + category_item_counts[category_id] += quantity + # --- End Update Category Counts --- + + + # --- Attribute Scoring (Using final_attributes) --- + if final_attributes: + for attr_name, attr_value in final_attributes.items(): + # Ensure attr_value is a string (as expected from store or AI extraction) + if isinstance(attr_value, str): + value_str = attr_value.lower() # Normalize to lower case + attribute_counts[category_id][attr_name][value_str] += quantity + else: + logger.warning(f"Skipping attribute scoring for non-string value: {attr_name}={attr_value} in item '{item_name}'") + # --- End Attribute Scoring --- + + + # --- Update preference scores (Category level - remains the same) --- + total_items_overall = sum(category_counts.values()) + if total_items_overall > 0: + for category_id, count in category_counts.items(): + # --- Category Score Calculation (remains largely the same) --- + base_score = min(count / (total_items_overall * 0.5), 1.0) + boost_factor = 1.0 + category_name = taxonomy.get_category_name(category_id) + + # Apply demographic boosts (gender, age) + if gender == "female" and category_name in ["Fashion", "Beauty", "Skincare", "Makeup"]: + boost_factor *= 1.1 # Boost common female-associated categories + elif gender == "male" and category_name in ["Electronics", "Tools", "Laptops"]: + boost_factor *= 1.05 # Slightly boost common male-associated categories + if age: + # Boost tech/gaming for younger adults + if 18 <= age <= 30 and category_name in ["Smartphones", "Wearables", "Audio", "Gaming"]: + boost_factor *= 1.05 + # Boost health/home for older adults + elif age >= 50 and category_name in ["Health", "Home"]: + boost_factor *= 1.08 # Slightly higher boost for potential health needs + + # --- Apply inferred demographic boosts --- + if has_kids is True and category_name in ["Toys", "Baby", "Kids Clothing"]: # Add relevant categories + boost_factor *= 1.15 # Stronger boost for likely parents in child-related categories + logger.debug(f"Applying 'has_kids' boost to category {category_name}") + + if relationship_status == "married" and category_name in ["Home Goods", "Furniture", "Jewelry"]: # Example categories + boost_factor *= 1.05 # Slight boost for categories related to shared living/gifting + logger.debug(f"Applying 'married' boost to category {category_name}") + + # --- Apply NEW inferred boosts (Examples) --- + if employment_status == "student" and category_name in ["Laptops", "Books", "Stationery", "Budget Food"]: # Add relevant categories + boost_factor *= 1.1 # Boost student-related items + logger.debug(f"Applying 'student' boost to category {category_name}") + if employment_status == "employed" and category_name in ["Business Wear", "Office Supplies", "Travel"]: # Add relevant categories + boost_factor *= 1.05 # Slight boost for work-related items + logger.debug(f"Applying 'employed' boost to category {category_name}") + + if education_level in ["masters", "doctorate"] and category_name in ["Books", "Academic Journals", "Software"]: # Add relevant categories + boost_factor *= 1.08 # Speculative boost for academic/professional interests + logger.debug(f"Applying 'higher_education' boost to category {category_name}") + + # Use inferred age bracket ONLY if actual age is missing + effective_age_info = age if age is not None else age_bracket + if effective_age_info: + # Example using age bracket (less precise than actual age) + if effective_age_info == "18-24" and category_name in ["Fast Fashion", "Gaming", "Streaming Services"]: + boost_factor *= 1.05 # Boost categories popular with young adults + logger.debug(f"Applying '18-24' boost to category {category_name}") + elif effective_age_info == "65+" and category_name in ["Health", "Gardening", "Comfort Footwear"]: + boost_factor *= 1.1 # Boost categories relevant to seniors + logger.debug(f"Applying '65+' boost to category {category_name}") + # --- End NEW inferred boosts --- + + final_score = min(base_score * boost_factor, 1.0) + + # Update category score in preference_dict (using EMA) + if category_id not in preference_dict: + preference_dict[category_id] = { + "category": category_id, + "score": final_score, "attributes": {} } else: - # Use exponential moving average to blend new score with existing - alpha = 0.3 # Blend factor - old_score = preference_dict[category]["score"] - preference_dict[category]["score"] = alpha * score + (1 - alpha) * old_score - - # Process attributes - if category in attribute_counts: - for attr_name, attr_values in attribute_counts[category].items(): - # Get total for this attribute - attr_total = sum(attr_values.values()) - - # Create attribute distribution - if "attributes" not in preference_dict[category]: - preference_dict[category]["attributes"] = {} - - if attr_name not in preference_dict[category]["attributes"]: - preference_dict[category]["attributes"][attr_name] = {} - - # Calculate normalized values - for value, value_count in attr_values.items(): - normalized_score = value_count / attr_total - - # Use exponential moving average if attribute value exists - if value in preference_dict[category]["attributes"][attr_name]: - old_value = preference_dict[category]["attributes"][attr_name][value] - preference_dict[category]["attributes"][attr_name][value] = \ - alpha * normalized_score + (1 - alpha) * old_value - else: - preference_dict[category]["attributes"][attr_name][value] = normalized_score + alpha = 0.3 + old_score = preference_dict[category_id]["score"] + preference_dict[category_id]["score"] = alpha * final_score + (1 - alpha) * old_score + # --- End Category Score Calculation --- + + + # --- Update preference scores (Attribute level - Adjusted) --- + for category_id, attrs in attribute_counts.items(): + if category_id in preference_dict: # Ensure category exists + if "attributes" not in preference_dict[category_id] or preference_dict[category_id]["attributes"] is None: + preference_dict[category_id]["attributes"] = {} # Initialize if missing + + for attr_name, values in attrs.items(): + if attr_name not in preference_dict[category_id]["attributes"]: + preference_dict[category_id]["attributes"][attr_name] = {} # Initialize specific attribute dict + + total_attr_count = sum(values.values()) + if total_attr_count > 0: + current_attr_prefs = preference_dict[category_id]["attributes"][attr_name] + # Decay existing scores slightly + for val, score in current_attr_prefs.items(): + current_attr_prefs[val] = max(0.0, score * 0.9) # Decay factor -async def process_search_data(entries, preference_dict, taxonomy): - """Process search data using embedding model""" - # Dictionary to track category relevance from searches + # Add new scores based on counts + for value_str, count in values.items(): + new_score_contribution = (count / total_attr_count) * 0.5 # Contribution weight + current_score = current_attr_prefs.get(value_str, 0.0) + current_attr_prefs[value_str] = min(1.0, current_score + new_score_contribution) + + # Normalize scores within the attribute so they sum roughly to 1 (optional but good practice) + total_score = sum(current_attr_prefs.values()) + if total_score > 0: + for val in current_attr_prefs: + current_attr_prefs[val] /= total_score + # --- End Update preference scores (Attribute level) --- + + +async def process_search_data(entries, preference_dict, taxonomy, demographics: Optional[Dict[str, Any]] = None): + """Process search data using embedding model, considering demographics""" search_relevance = defaultdict(float) + # --- Example Demographic Usage --- + demographics = demographics or {} + gender = demographics.get("gender") + age = demographics.get("age") + # --- Add inferred data --- + has_kids = demographics.get("inferredHasKids") + relationship_status = demographics.get("inferredRelationshipStatus") + # --- Add NEW inferred data --- + employment_status = demographics.get("inferredEmploymentStatus") + education_level = demographics.get("inferredEducationLevel") + age_bracket = demographics.get("inferredAgeBracket") + # --- End Example --- + for entry in entries: query = entry.get("query") - if not query: - continue - - # If category is already provided - if entry.get("category"): - category = entry["category"] - # A direct category search is strong signal - search_relevance[category] += 1.0 - continue - - # Use embeddings to match query to category - try: - match_result = await taxonomy.match_category(query) - if match_result["threshold_met"]: - category = match_result["category"] - # Weight by confidence score - search_relevance[category] += match_result["score"] - except Exception as e: - logger.error(f"Error matching query '{query}': {str(e)}") - + provided_category = entry.get("category") # Category explicitly sent with search + matched_category = None + match_score = 0.0 + + if not query and not provided_category: + continue # Skip if no query and no category provided + + # Determine the category + if provided_category: + matched_category = provided_category + match_score = 1.0 # Assume full relevance if category is provided + elif query: + # Use embeddings to match query to category + try: + match_result = await taxonomy.match_category(query) + if match_result["threshold_met"]: + matched_category = match_result["category"] + match_score = match_result["score"] + except Exception as e: + logger.error(f"Error matching query '{query}': {str(e)}") + + # If a category was determined, calculate boosted relevance + if matched_category: + relevance_boost = 1.0 + category_name = taxonomy.get_category_name(matched_category) # Get name for logic + + # --- Apply Demographic Boost to Relevance --- + if gender == 'female' and category_name in ['Fashion', 'Beauty', 'Skincare']: + relevance_boost *= 1.1 + elif gender == 'male' and category_name in ['Electronics', 'Tools', 'Laptops']: + relevance_boost *= 1.05 + + if age and 18 <= age <= 30 and category_name in ["Smartphones", "Gaming"]: # Add Gaming if exists + relevance_boost *= 1.05 + + # --- Apply inferred boosts --- + if has_kids is True and category_name in ["Toys", "Baby", "Kids Clothing"]: + relevance_boost *= 1.15 + logger.debug(f"Applying 'has_kids' boost to search relevance for {category_name}") + + if relationship_status == "married" and category_name in ["Home Goods", "Furniture", "Jewelry"]: + relevance_boost *= 1.05 + logger.debug(f"Applying 'married' boost to search relevance for {category_name}") + + # --- Apply NEW inferred boosts (Examples) --- + if employment_status == "student" and category_name in ["Laptops", "Books", "Stationery"]: + relevance_boost *= 1.1 + if education_level in ["masters", "doctorate"] and category_name in ["Books", "Academic Journals"]: + relevance_boost *= 1.08 + + effective_age_info = age if age is not None else age_bracket + if effective_age_info: + if effective_age_info == "18-24" and category_name in ["Fast Fashion", "Gaming"]: + relevance_boost *= 1.05 + elif effective_age_info == "65+" and category_name in ["Health", "Gardening"]: + relevance_boost *= 1.1 + # --- End NEW inferred boosts --- + + # Add boosted score to search relevance dict + search_relevance[matched_category] += match_score * relevance_boost + # Normalize search relevance scores if search_relevance: - max_relevance = max(search_relevance.values()) + max_relevance = max(search_relevance.values()) if search_relevance else 0 # Handle empty dict if max_relevance > 0: # Update preferences for category, relevance in search_relevance.items(): - # Normalize to 0-1 range score = min(relevance / max_relevance, 1.0) if category not in preference_dict: @@ -219,15 +540,27 @@ async def process_search_data(entries, preference_dict, taxonomy): "attributes": {} } else: - # Use exponential moving average - alpha = 0.2 # Lower weight for searches vs purchases + alpha = 0.2 old_score = preference_dict[category]["score"] preference_dict[category]["score"] = alpha * score + (1 - alpha) * old_score -async def process_with_embeddings(entries, data_type, preference_dict, taxonomy): - """Fallback processing using embeddings for all data types""" +async def process_with_embeddings(entries, data_type, preference_dict, taxonomy, demographics: Optional[Dict[str, Any]] = None): + """Fallback processing using embeddings, potentially considering demographics""" logger.info("Using embedding fallback processing") + # --- Demographic Usage --- + demographics = demographics or {} + gender = demographics.get("gender") + age = demographics.get("age") + # --- Add inferred data --- + has_kids = demographics.get("inferredHasKids") + relationship_status = demographics.get("inferredRelationshipStatus") + # --- Add NEW inferred data --- + employment_status = demographics.get("inferredEmploymentStatus") + education_level = demographics.get("inferredEducationLevel") + age_bracket = demographics.get("inferredAgeBracket") + # --- End Demographic Usage --- + # For purchase data if data_type == "purchase": items = [] @@ -235,32 +568,65 @@ async def process_with_embeddings(entries, data_type, preference_dict, taxonomy) if "items" in entry: items.extend([item.get("name", "") for item in entry["items"]]) - # Process each item name for item_name in items: try: match_result = await taxonomy.match_category(item_name) if match_result["threshold_met"]: category = match_result["category"] score = match_result["score"] - + boost_factor = 1.0 + category_name = taxonomy.get_category_name(category) + + # --- Apply Demographic Boost (Similar to purchase logic) --- + if gender == "female" and category_name in ["Fashion", "Beauty"]: + boost_factor *= 1.1 + # Add other demographic boosts (age, etc.) here if desired + if age and age >= 50 and category_name == "Health": + boost_factor *= 1.08 + + # --- Apply inferred boosts --- + if has_kids is True and category_name in ["Toys", "Baby", "Kids Clothing"]: + boost_factor *= 1.15 + if relationship_status == "married" and category_name in ["Home Goods", "Furniture", "Jewelry"]: + boost_factor *= 1.05 + + # --- Apply NEW inferred boosts (Examples) --- + if employment_status == "student" and category_name in ["Laptops", "Books"]: + boost_factor *= 1.1 + if education_level in ["masters", "doctorate"] and category_name in ["Books"]: + boost_factor *= 1.08 + + effective_age_info = age if age is not None else age_bracket + if effective_age_info: + if effective_age_info == "18-24" and category_name in ["Gaming"]: + boost_factor *= 1.05 + elif effective_age_info == "65+" and category_name in ["Health"]: + boost_factor *= 1.1 + # --- End NEW inferred boosts --- + + final_score = min(score * boost_factor, 1.0) + # --- End Demographic Boost --- + + # Update preference dict (using final_score) if category not in preference_dict: preference_dict[category] = { "category": category, - "score": score, + "score": final_score, # Use boosted score "attributes": {} } else: - # Update using max - preference_dict[category]["score"] = max( - preference_dict[category]["score"], - score * 0.8 # Reduce confidence for embedding-based matches - ) + # Blend score (maybe use a lower weight for embedding matches?) + alpha_embed = 0.2 + old_score = preference_dict[category]["score"] + preference_dict[category]["score"] = alpha_embed * final_score + (1 - alpha_embed) * old_score + # Alternative: Just take the max? + # preference_dict[category]["score"] = max(old_score, final_score * 0.8) except Exception as e: - logger.error(f"Error processing item '{item_name}': {str(e)}") - - # For search data, same as regular processing + logger.error(f"Error processing item '{item_name}' via embedding: {str(e)}") + + # For search data, call the updated search processor (already passes demographics) elif data_type == "search": - await process_search_data(entries, preference_dict, taxonomy) + await process_search_data(entries, preference_dict, taxonomy, demographics) async def normalize_categories(preferences, taxonomy): """Ensure all categories use IDs instead of names""" @@ -268,62 +634,102 @@ async def normalize_categories(preferences, taxonomy): # Build name-to-id mapping name_to_id = {} + id_to_name = {} # Also build reverse mapping for safety check for cat in taxonomy.taxonomy.categories: name_to_id[cat.name.lower()] = cat.id + id_to_name[cat.id] = cat.name # Store ID to Name mapping for pref in preferences: - category = pref["category"] - # If category is a name rather than ID, convert it - if category.lower() in name_to_id: - pref["category"] = name_to_id[category.lower()] + category_key = pref["category"] + # Check if the key is a name and needs conversion + if isinstance(category_key, str) and category_key.lower() in name_to_id: + pref["category"] = name_to_id[category_key.lower()] + # Safety check: Ensure the final category key is a valid ID present in the taxonomy + elif category_key not in id_to_name: + logger.warning(f"Category '{category_key}' not found in taxonomy IDs during normalization. Skipping.") + continue # Skip this preference if the category ID is invalid + normalized.append(pref) return normalized -async def update_user_preferences(auth0_id: str, email: str, preferences: List[UserPreference], db) -> UserPreferences: - """Update user preferences directly""" - - logger.info(f"Processing preference update for user {auth0_id}") - - # Get taxonomy service for validation - taxonomy = await get_taxonomy_service(db) - - # Validate preferences against taxonomy +async def extract_attributes_with_similarity(item_name: str, category_id: str, taxonomy_service: TaxonomyService) -> Optional[Dict[str, str]]: + """ + Uses the semantic embedding model to extract attributes for an item by comparing + the item name to potential attribute values defined in the taxonomy. + Returns a dictionary like {"color": "blue", "size": "M"} or None. + """ + if not taxonomy_service.embedding_model: + logger.warning("AI Extraction: Embedding model not available in TaxonomyService.") + return None + + logger.debug(f"AI Extraction: Processing '{item_name}' in category '{category_id}'") + extracted = {} + + # 1. Get category details and expected attributes/values + category_details = taxonomy_service.get_category_details(category_id) + if not category_details or not category_details.attributes: + logger.debug(f"AI Extraction: No attributes defined in taxonomy for category {category_id}") + return None + + # Prepare list of attributes and their potential values for this category + attributes_to_check = [] + for attr in category_details.attributes: + if attr.values: # Only consider attributes with defined values + attributes_to_check.append({"name": attr.name, "values": attr.values}) + + if not attributes_to_check: + logger.debug(f"AI Extraction: No attributes with values defined for category {category_id}") + return None + + logger.debug(f"AI Extraction: Expected attributes for {category_id}: {[a['name'] for a in attributes_to_check]}") + try: - taxonomy.validate_preferences(preferences) - except ValueError as e: - logger.error(f"Preference validation failed: {str(e)}") - raise HTTPException(status_code=400, detail=str(e)) - - # Find the user in the database - user = await db.users.find_one({"auth0Id": auth0_id}) - if not user: - # Try finding by email as fallback - user = await db.users.find_one({"email": email}) - if not user: - logger.error(f"User not found: {email}") - raise HTTPException(status_code=404, detail="User not found") - - # Update user preferences - update_result = await db.users.update_one( - {"_id": user["_id"]}, - { - "$set": { - "preferences": [pref.dict() for pref in preferences], - "updatedAt": datetime.now() - } - } - ) - - if update_result.modified_count == 0: - logger.warning(f"No changes made to preferences for user {auth0_id}") - - # Invalidate cache - await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") - - # Return updated preferences - return UserPreferences( - user_id=str(user["_id"]), - preferences=preferences, - updated_at=datetime.now() - ) \ No newline at end of file + # 2. Generate embedding for the item name + item_embedding = taxonomy_service.embedding_model.encode(item_name.lower(), convert_to_tensor=True) + + # 3. Iterate through attributes and their values + for attribute_info in attributes_to_check: + attr_name = attribute_info["name"] + possible_values = attribute_info["values"] + + if not possible_values: + continue + + # Generate embeddings for all possible values of this attribute + value_embeddings = taxonomy_service.embedding_model.encode([v.lower() for v in possible_values], convert_to_tensor=True) + + # Calculate cosine similarities between item name and all values + # Use pytorch_cos_sim for efficiency + similarities = util.pytorch_cos_sim(item_embedding, value_embeddings)[0] # Get the first row (item vs all values) + + # Find the value with the highest similarity + best_match_idx = similarities.argmax().item() # Get index of max value + highest_similarity = similarities[best_match_idx].item() # Get the max similarity score + + logger.debug(f"AI Extraction: Attribute '{attr_name}', Best match: '{possible_values[best_match_idx]}', Score: {highest_similarity:.4f}") + + # 4. Check against threshold and store if match is strong enough + if highest_similarity >= ATTRIBUTE_SIMILARITY_THRESHOLD: + best_match_value = possible_values[best_match_idx] + # Simple conflict resolution: If we already extracted a value for this attribute, + # only overwrite if the new score is significantly higher (e.g., > 0.1 difference). + # A more complex approach could consider multiple high-scoring values. + if attr_name in extracted: + # We need the previous score to compare - this simple approach just takes the first good match. + # For improvement, store scores alongside values during iteration. + logger.debug(f"AI Extraction: Attribute '{attr_name}' already extracted ('{extracted[attr_name]}'). Keeping first match above threshold.") + else: + extracted[attr_name] = best_match_value + logger.debug(f"AI Extraction: Extracted '{attr_name}' = '{best_match_value}' (Score: {highest_similarity:.4f})") + + + except Exception as e: + logger.error(f"AI Extraction: Error during embedding/similarity calculation for '{item_name}': {e}", exc_info=True) + return None + + if not extracted: + logger.debug(f"AI Extraction: No attributes met threshold for '{item_name}'") + return None + + return extracted \ No newline at end of file diff --git a/ml-service/app/services/taxonomyService.py b/ml-service/app/services/taxonomyService.py index 9423003..3615e71 100644 --- a/ml-service/app/services/taxonomyService.py +++ b/ml-service/app/services/taxonomyService.py @@ -15,34 +15,55 @@ class TaxonomyService: def __init__(self, db=None): self.db = db - self.taxonomy = None + self.taxonomy: Optional[Taxonomy] = None # Add type hint self.embedding_model = None self.category_embeddings = {} - + # --- Add mappings for efficient lookups --- + self._id_to_name_map: Dict[str, str] = {} + self._name_to_id_map: Dict[str, str] = {} + # --- End Add mappings --- + async def initialize(self): """Initialize taxonomy from file and DB""" # Try loading from DB first - if self.db is not None: # Changed from 'if self.db:' + if self.db is not None: cached = await self.db.taxonomy.find_one({"current": True}) if cached: - self.taxonomy = Taxonomy(**cached["data"]) - logger.info(f"Loaded taxonomy from DB: {self.taxonomy.version}") - + try: + self.taxonomy = Taxonomy(**cached["data"]) + self._build_lookup_maps() # Build maps after loading + logger.info(f"Loaded taxonomy from DB: {self.taxonomy.version}") + except Exception as e: + logger.error(f"Failed to parse taxonomy from DB: {e}") + self.taxonomy = None # Ensure taxonomy is None if parsing fails + # If not in DB or load failed, use file if not self.taxonomy: - self._load_from_file() - - # Save to DB if available - if self.db is not None: + self._load_from_file() # This already calls _build_lookup_maps + + # Save to DB if available and loaded successfully + if self.db is not None and self.taxonomy: await self.db.taxonomy.update_one( {"current": True}, {"$set": {"data": self.taxonomy.dict(), "updated_at": datetime.now()}}, upsert=True ) - + # Initialize embedding model (try Redis cache first) - await self._initialize_embeddings() - + if self.taxonomy: # Only initialize embeddings if taxonomy loaded + await self._initialize_embeddings() + else: + logger.error("Taxonomy could not be loaded. Skipping embedding initialization.") + + + def _build_lookup_maps(self): + """Builds ID-to-Name and Name-to-ID maps from the loaded taxonomy.""" + if not self.taxonomy: + return + self._id_to_name_map = {cat.id: cat.name for cat in self.taxonomy.categories} + self._name_to_id_map = {cat.name.lower(): cat.id for cat in self.taxonomy.categories} # Use lower case for name lookup + logger.debug(f"Built taxonomy lookup maps: {len(self._id_to_name_map)} categories.") + def _load_from_file(self): """Load taxonomy from YAML file""" file_path = Path(__file__).parent.parent / "data" / "taxonomy.yaml" @@ -50,13 +71,19 @@ def _load_from_file(self): with open(file_path, 'r') as file: data = yaml.safe_load(file) self.taxonomy = Taxonomy(**data) + self._build_lookup_maps() # Build maps after loading logger.info(f"Loaded taxonomy from file: {self.taxonomy.version}") except Exception as e: - logger.error(f"Failed to load taxonomy: {str(e)}") - raise HTTPException(status_code=500, detail="Failed to load taxonomy") - + logger.error(f"Failed to load taxonomy from file: {str(e)}") + self.taxonomy = None # Ensure taxonomy is None on failure + # Don't raise HTTPException here, allow service to potentially continue without taxonomy if needed + # raise HTTPException(status_code=500, detail="Failed to load taxonomy") + async def _initialize_embeddings(self): """Initialize embedding model for search processing""" + if not self.taxonomy: # Guard against missing taxonomy + logger.warning("Cannot initialize embeddings: Taxonomy not loaded.") + return # Try to get embeddings from Redis cache first cache_key = f"{CACHE_KEYS['TAXONOMY_EMBEDDINGS']}all" cached_embeddings = await get_cache_json(cache_key) @@ -108,6 +135,33 @@ async def _initialize_embeddings(self): logger.error(f"Failed to initialize embeddings: {str(e)}") # Continue without embeddings, we'll use rule-based only + # --- Add get_category_name method --- + def get_category_name(self, category_id: str) -> Optional[str]: + """Get category name from its ID using the lookup map.""" + return self._id_to_name_map.get(category_id) + # --- End Add get_category_name method --- + + # --- Optional: Add get_category_id method --- + def get_category_id(self, category_name: str) -> Optional[str]: + """Get category ID from its name (case-insensitive) using the lookup map.""" + return self._name_to_id_map.get(category_name.lower()) + # --- End Optional: Add get_category_id method --- + + # +++ Add get_category_details method +++ + def get_category_details(self, category_id: str) -> Optional[TaxonomyCategory]: + """Get the full TaxonomyCategory object by its ID.""" + if not self.taxonomy: + logger.warning("Taxonomy not loaded, cannot get category details.") + return None + # Find the category in the list + for category in self.taxonomy.categories: + if category.id == category_id: + return category + logger.warning(f"Category ID '{category_id}' not found in taxonomy.") + return None + # +++ End Add get_category_details method +++ + + def validate_preferences(self, preferences): """Validate preference data against taxonomy""" if not self.taxonomy: @@ -148,7 +202,19 @@ async def match_category(self, query_text): return cached_result if not self.embedding_model or not self.category_embeddings: - raise ValueError("Embedding model not initialized") + # Check if taxonomy exists but embeddings failed + if not self.taxonomy: + logger.error("Cannot match category: Taxonomy not loaded.") + raise ValueError("Taxonomy not available for matching.") + else: + logger.warning(f"Cannot match category '{query_text}': Embeddings not initialized. Returning None.") + # Return a structure indicating failure or inability to match + return { + "category": None, + "score": 0.0, + "threshold_met": False, + "error": "Embeddings not initialized" + } # Generate embedding for query query_embedding = self.embedding_model.encode(query_text) @@ -183,7 +249,9 @@ async def match_category(self, query_text): async def get_taxonomy_service(db=None): """Get or create the taxonomy service singleton""" global _taxonomy_service - if _taxonomy_service is None: + if (_taxonomy_service is None): _taxonomy_service = TaxonomyService(db) await _taxonomy_service.initialize() + # Ensure the service is returned even if initialization had issues (e.g., file not found) + # Downstream code should handle potential lack of taxonomy data within the service object. return _taxonomy_service \ No newline at end of file diff --git a/preference_retrieval.md b/preference_retrieval.md new file mode 100644 index 0000000..d3a4adf --- /dev/null +++ b/preference_retrieval.md @@ -0,0 +1,83 @@ +--- + +**4. `preference_retrieval.md`** + +````markdown +# Preference Retrieval Guide + +Stores can retrieve processed user preferences from Tapiro to personalize experiences like targeted advertising or product recommendations. + +## Endpoint + +`GET /users/{userId}/preferences` + +## Purpose + +To retrieve the calculated interest preferences for a specific user, based on data submitted to Tapiro. + +## Authentication + +Requires a valid API key in the `X-API-Key` header. See [Authentication Guide](./authentication.md). + +## Path Parameter + +- `{userId}` (string, required): The **email address** of the user whose preferences you want to retrieve. + +**Example URL:** + +`/users/user@example.com/preferences` + +## Response + +- **`200 OK`**: Successfully retrieved user preferences. The response body will contain a `UserPreferences` object. + + ```json + { + "userId": "60d5ecb8b48f4a001f9e8f6a", // Tapiro's internal User ID + "preferences": [ + { + "category": "101", // Category ID from Taxonomy + "score": 0.85, + "attributes": { + "brand": { "Apple": 0.7, "Samsung": 0.3 }, + "color": { "black": 0.6, "blue": 0.4 } + } + }, + { + "category": "201", // Clothing + "score": 0.62, + "attributes": { + "material": { "cotton": 0.9, "polyester": 0.1 }, + "size": { "M": 0.7, "L": 0.3 } + } + } + // ... other preferences + ], + "updatedAt": "2024-05-20T10:00:00Z" + } + ``` + + **Fields:** + + - `userId` (string): Tapiro's internal unique identifier for the user. + - `preferences` (array): A list of `PreferenceItem` objects. + - `category` (string): The category ID from the [Tapiro Taxonomy](./taxonomy.md). + - `score` (number): A value between 0.0 and 1.0 indicating the user's interest level in this category. Higher is stronger. + - `attributes` (object, optional): A breakdown of preferences for specific attributes within the category (e.g., preferred brands, colors, sizes). The structure may vary. Values typically represent relative preference scores (0.0-1.0). + - `updatedAt` (string): ISO 8601 timestamp of when the preferences were last updated. + +- **`401 Unauthorized`**: Invalid or missing `X-API-Key`. +- **`403 Forbidden`**: Access denied. This occurs if: + - The user has _not_ provided `dataSharingConsent` in Tapiro. + - The user _has_ explicitly opted out of sharing data with _your specific store_. + **You should treat this response as "no preferences available" and avoid personalization based on Tapiro data for this user.** +- **`404 Not Found`**: The user specified by the email address (`{userId}`) does not exist in Tapiro. +- **`500 Internal Server Error`**: An unexpected error occurred on the server. + +## Important Considerations + +- **Consent is Key:** Always check the HTTP status code. A `403 Forbidden` response means you cannot use Tapiro preferences for that user due to their privacy settings. +- **User Identifier:** Remember to use the user's **email address** in the URL path (`{userId}`). +- **Caching:** Consider caching preference responses on your end for a reasonable duration (e.g., minutes to hours) to reduce API calls, but be mindful of the `updatedAt` timestamp if freshness is critical. Tapiro may also employ server-side caching. +- **Use Preferences:** Use the retrieved scores and attribute preferences to tailor advertising, recommendations, or other user experiences. +```` diff --git a/taxonomy.md b/taxonomy.md new file mode 100644 index 0000000..161f6a9 --- /dev/null +++ b/taxonomy.md @@ -0,0 +1,48 @@ +# Taxonomy Guide + +The Tapiro Taxonomy is a hierarchical classification system used to categorize products, services, and user interests. Accurate use of this taxonomy is **essential** for the effectiveness of Tapiro's AI models in generating meaningful user preferences. + +## Purpose + +- **Standardization:** Ensures that data submitted by different stores uses a consistent language for products and interests. +- **AI Training:** Provides structured input for the machine learning models that analyze user behavior and build preference profiles. +- **Preference Granularity:** Allows for preferences to be understood at different levels (e.g., general interest in "Electronics" vs. specific interest in "Smartphones" with a preference for "Apple" brand). + +## Structure + +The taxonomy consists of: + +- **Categories:** Broad groupings (e.g., "Electronics", "Fashion", "Home"). +- **Sub-categories:** More specific groupings within a parent category (e.g., "Smartphones" under "Electronics"). Categories have unique IDs (e.g., `"101"`) and names (e.g., `"Smartphones"`). +- **Attributes:** Characteristics relevant to a specific category (e.g., "brand", "color", "size" for "Smartphones"). Attributes have defined possible values. + +**Example Snippet (Conceptual):** + +```yaml +version: "1.0.0" +categories: + - id: "100" + name: "Electronics" + # ... attributes for Electronics ... + - id: "101" + name: "Smartphones" + parent_id: "100" + attributes: + - name: "brand" + values: [Apple, Samsung, Google, ...] + - name: "color" + values: [black, white, blue, ...] + # ... other smartphone attributes ... + - id: "200" + name: "Fashion" + # ... attributes for Fashion ... + - id: "201" + name: "Clothing" + parent_id: "200" + attributes: + - name: "type" + values: [shirts, pants, dresses, ...] + - name: "material" + values: [cotton, polyester, wool, ...] + # ... other clothing attributes ... +``` diff --git a/web/package-lock.json b/web/package-lock.json index d16e59f..791b6ad 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -17,7 +17,8 @@ "react-dom": "^19.0.0", "react-hook-form": "^7.56.0", "react-icons": "^5.5.0", - "react-router": "^7.4.0" + "react-router": "^7.4.0", + "recharts": "^2.15.3" }, "devDependencies": { "@eslint/js": "^9.23.0", @@ -295,6 +296,18 @@ "@babel/core": "^7.0.0-0" } }, + "node_modules/@babel/runtime": { + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.0.tgz", + "integrity": "sha512-VtPOkrdPHZsKc/clNqyi9WUA8TINkZ4cGk63UUE3u4pmB2k+ZMQRDuIOagv8UVd6j7k0T3+RRIb7beKTebNbcw==", + "license": "MIT", + "dependencies": { + "regenerator-runtime": "^0.14.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@babel/template": { "version": "7.27.0", "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.0.tgz", @@ -1787,6 +1800,69 @@ "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==", "license": "MIT" }, + "node_modules/@types/d3-array": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/@types/d3-array/-/d3-array-3.2.1.tgz", + "integrity": "sha512-Y2Jn2idRrLzUfAKV2LyRImR+y4oa2AntrgID95SHJxuMUrkNXmanDSed71sRNZysveJVt1hLLemQZIady0FpEg==", + "license": "MIT" + }, + "node_modules/@types/d3-color": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-3.1.3.tgz", + "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", + "license": "MIT" + }, + "node_modules/@types/d3-ease": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/d3-ease/-/d3-ease-3.0.2.tgz", + "integrity": "sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==", + "license": "MIT" + }, + "node_modules/@types/d3-interpolate": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", + "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==", + "license": "MIT", + "dependencies": { + "@types/d3-color": "*" + } + }, + "node_modules/@types/d3-path": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-3.1.1.tgz", + "integrity": "sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg==", + "license": "MIT" + }, + "node_modules/@types/d3-scale": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-4.0.9.tgz", + "integrity": "sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw==", + "license": "MIT", + "dependencies": { + "@types/d3-time": "*" + } + }, + "node_modules/@types/d3-shape": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-3.1.7.tgz", + "integrity": "sha512-VLvUQ33C+3J+8p+Daf+nYSOsjB4GXp19/S/aGo60m9h1v6XaxjiT82lKVWJCfzhtuZ3yD7i/TPeC/fuKLLOSmg==", + "license": "MIT", + "dependencies": { + "@types/d3-path": "*" + } + }, + "node_modules/@types/d3-time": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/d3-time/-/d3-time-3.0.4.tgz", + "integrity": "sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==", + "license": "MIT" + }, + "node_modules/@types/d3-timer": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/d3-timer/-/d3-timer-3.0.2.tgz", + "integrity": "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==", + "license": "MIT" + }, "node_modules/@types/estree": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.7.tgz", @@ -2732,6 +2808,15 @@ "node": ">=12" } }, + "node_modules/clsx": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", + "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -2845,9 +2930,129 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "dev": true, "license": "MIT" }, + "node_modules/d3-array": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", + "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", + "license": "ISC", + "dependencies": { + "internmap": "1 - 2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-color": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", + "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-ease": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz", + "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-format": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.0.tgz", + "integrity": "sha512-YyUI6AEuY/Wpt8KWLgZHsIU86atmikuoOmCfommt0LYHiQSPjvX2AcFc38PX0CBpr2RCyZhjex+NS/LPOv6YqA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-interpolate": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", + "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-path": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz", + "integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", + "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", + "license": "ISC", + "dependencies": { + "d3-array": "2.10.0 - 3", + "d3-format": "1 - 3", + "d3-interpolate": "1.2.0 - 3", + "d3-time": "2.1.1 - 3", + "d3-time-format": "2 - 4" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-shape": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz", + "integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==", + "license": "ISC", + "dependencies": { + "d3-path": "^3.1.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", + "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", + "license": "ISC", + "dependencies": { + "d3-array": "2 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time-format": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", + "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", + "license": "ISC", + "dependencies": { + "d3-time": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-timer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", + "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "node_modules/debounce": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/debounce/-/debounce-2.2.0.tgz", @@ -2877,6 +3082,12 @@ } } }, + "node_modules/decimal.js-light": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz", + "integrity": "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==", + "license": "MIT" + }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -2926,6 +3137,16 @@ "node": ">=8" } }, + "node_modules/dom-helpers": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz", + "integrity": "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.8.7", + "csstype": "^3.0.2" + } + }, "node_modules/dotenv": { "version": "16.5.0", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz", @@ -3300,6 +3521,12 @@ "url": "https://github.com/eta-dev/eta?sponsor=1" } }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "license": "MIT" + }, "node_modules/exsolve": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/exsolve/-/exsolve-1.0.4.tgz", @@ -3314,6 +3541,15 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-equals": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-5.2.2.tgz", + "integrity": "sha512-V7/RktU11J3I36Nwq2JnZEM7tNm17eBJz+u25qdxBZeCKiX6BkVSZQjwWIr+IobgnZy+ag73tTZgZi7tr0LrBw==", + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, "node_modules/fast-glob": { "version": "3.3.3", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", @@ -3753,6 +3989,15 @@ "node": ">=0.8.19" } }, + "node_modules/internmap": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", + "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -3814,7 +4059,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "dev": true, "license": "MIT" }, "node_modules/js-yaml": { @@ -4169,7 +4413,6 @@ "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "dev": true, "license": "MIT" }, "node_modules/lodash.merge": { @@ -4179,6 +4422,18 @@ "dev": true, "license": "MIT" }, + "node_modules/loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "license": "MIT", + "dependencies": { + "js-tokens": "^3.0.0 || ^4.0.0" + }, + "bin": { + "loose-envify": "cli.js" + } + }, "node_modules/lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -4469,6 +4724,15 @@ "node": ">= 6" } }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/ohash": { "version": "2.0.11", "resolved": "https://registry.npmjs.org/ohash/-/ohash-2.0.11.tgz", @@ -4744,6 +5008,23 @@ } } }, + "node_modules/prop-types": { + "version": "15.8.1", + "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", + "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.4.0", + "object-assign": "^4.1.1", + "react-is": "^16.13.1" + } + }, + "node_modules/prop-types/node_modules/react-is": { + "version": "16.13.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", + "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", + "license": "MIT" + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -4837,6 +5118,12 @@ "react": "*" } }, + "node_modules/react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", + "license": "MIT" + }, "node_modules/react-refresh": { "version": "0.14.2", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.14.2.tgz", @@ -4871,6 +5158,37 @@ } } }, + "node_modules/react-smooth": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.4.tgz", + "integrity": "sha512-gnGKTpYwqL0Iii09gHobNolvX4Kiq4PKx6eWBCYYix+8cdw+cGo3do906l1NBPKkSWx1DghC1dlWG9L2uGd61Q==", + "license": "MIT", + "dependencies": { + "fast-equals": "^5.0.1", + "prop-types": "^15.8.1", + "react-transition-group": "^4.4.5" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/react-transition-group": { + "version": "4.4.5", + "resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz", + "integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==", + "license": "BSD-3-Clause", + "dependencies": { + "@babel/runtime": "^7.5.5", + "dom-helpers": "^5.0.1", + "loose-envify": "^1.4.0", + "prop-types": "^15.6.2" + }, + "peerDependencies": { + "react": ">=16.6.0", + "react-dom": ">=16.6.0" + } + }, "node_modules/readdirp": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz", @@ -4900,6 +5218,38 @@ "node": ">= 4" } }, + "node_modules/recharts": { + "version": "2.15.3", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.15.3.tgz", + "integrity": "sha512-EdOPzTwcFSuqtvkDoaM5ws/Km1+WTAO2eizL7rqiG0V2UVhTnz0m7J2i0CjVPUCdEkZImaWvXLbZDS2H5t6GFQ==", + "license": "MIT", + "dependencies": { + "clsx": "^2.0.0", + "eventemitter3": "^4.0.1", + "lodash": "^4.17.21", + "react-is": "^18.3.1", + "react-smooth": "^4.0.4", + "recharts-scale": "^0.4.4", + "tiny-invariant": "^1.3.1", + "victory-vendor": "^36.6.8" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "react": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/recharts-scale": { + "version": "0.4.5", + "resolved": "https://registry.npmjs.org/recharts-scale/-/recharts-scale-0.4.5.tgz", + "integrity": "sha512-kivNFO+0OcUNu7jQquLXAxz1FIwZj8nrj+YkOKc5694NbjCvcT6aSZiIzNzd2Kul4o4rTto8QVR9lMNtxD4G1w==", + "license": "MIT", + "dependencies": { + "decimal.js-light": "^2.4.1" + } + }, "node_modules/reftools": { "version": "1.1.9", "resolved": "https://registry.npmjs.org/reftools/-/reftools-1.1.9.tgz", @@ -4910,6 +5260,12 @@ "url": "https://github.com/Mermade/oas-kit?sponsor=1" } }, + "node_modules/regenerator-runtime": { + "version": "0.14.1", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz", + "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==", + "license": "MIT" + }, "node_modules/repeat-string": { "version": "1.6.1", "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz", @@ -5473,6 +5829,28 @@ "punycode": "^2.1.0" } }, + "node_modules/victory-vendor": { + "version": "36.9.2", + "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.9.2.tgz", + "integrity": "sha512-PnpQQMuxlwYdocC8fIJqVXvkeViHYzotI+NJrCuav0ZYFoq912ZHBk3mCeuj+5/VpodOjPe1z0Fk2ihgzlXqjQ==", + "license": "MIT AND ISC", + "dependencies": { + "@types/d3-array": "^3.0.3", + "@types/d3-ease": "^3.0.0", + "@types/d3-interpolate": "^3.0.1", + "@types/d3-scale": "^4.0.2", + "@types/d3-shape": "^3.1.0", + "@types/d3-time": "^3.0.0", + "@types/d3-timer": "^3.0.0", + "d3-array": "^3.1.6", + "d3-ease": "^3.0.1", + "d3-interpolate": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-shape": "^3.1.0", + "d3-time": "^3.0.0", + "d3-timer": "^3.0.1" + } + }, "node_modules/vite": { "version": "6.2.6", "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.6.tgz", diff --git a/web/package.json b/web/package.json index 78dfae6..dc0fef0 100644 --- a/web/package.json +++ b/web/package.json @@ -22,7 +22,8 @@ "react-dom": "^19.0.0", "react-hook-form": "^7.56.0", "react-icons": "^5.5.0", - "react-router": "^7.4.0" + "react-router": "^7.4.0", + "recharts": "^2.15.3" }, "devDependencies": { "@eslint/js": "^9.23.0", diff --git a/web/src/api/apiClient.ts b/web/src/api/apiClient.ts index 1ee2a6c..a6429ee 100644 --- a/web/src/api/apiClient.ts +++ b/web/src/api/apiClient.ts @@ -2,6 +2,7 @@ import { Users } from "./types/Users"; import { Stores } from "./types/Stores"; import { Health } from "./types/Health"; import { Ping } from "./types/Ping"; +import { Taxonomy } from "./types/Taxonomy"; // <-- Import Taxonomy client // Add useState import import { useEffect, useMemo, useState } from "react"; import { useAuth } from "../hooks/useAuth"; // ← use your context @@ -24,6 +25,9 @@ export function createApiClients() { }, }; } + // Return an empty object or undefined if no securityData is present + // to avoid potential issues with Axios/fetch expecting an object. + return {}; }, }; @@ -33,6 +37,7 @@ export function createApiClients() { stores: new Stores(config), health: new Health(config), ping: new Ping(config), + taxonomy: new Taxonomy(config), // <-- Instantiate Taxonomy client }; } @@ -53,15 +58,16 @@ export function useApiClients() { try { const token = await getAccessToken(); // This now throws on error if (isMounted) { - Object.values(apiClients).forEach( - (c) => c.setSecurityData(token || null), // Should always have token here if no error + // Ensure all clients get the security data + Object.values(apiClients).forEach((c) => + c.setSecurityData(token || null), ); setClientsReady(true); // <-- Set clients as ready AFTER token is set } } catch { - // <-- Remove 'e' from here - // Error fetching token (already logged in getAccessToken) + // Error fetching token if (isMounted) { + // Ensure all clients have security data cleared on error Object.values(apiClients).forEach((c) => c.setSecurityData(null)); setClientsReady(false); // <-- Clients are not ready } @@ -69,6 +75,7 @@ export function useApiClients() { })(); } else { // If not authenticated or still loading, ensure clients are not ready and have no token + // Ensure all clients have security data cleared Object.values(apiClients).forEach((c) => c.setSecurityData(null)); setClientsReady(false); // <-- Clients are not ready } @@ -76,8 +83,6 @@ export function useApiClients() { return () => { isMounted = false; }; - // Add clientsReady to dependency array? No, causes infinite loop. - // The effect should run based on auth state changes. }, [isAuthenticated, authLoading, getAccessToken, apiClients]); // Return clients and the readiness state diff --git a/web/src/api/hooks/useAuthHooks.ts b/web/src/api/hooks/useAuthHooks.ts index 8e2c8a8..66da8f4 100644 --- a/web/src/api/hooks/useAuthHooks.ts +++ b/web/src/api/hooks/useAuthHooks.ts @@ -3,6 +3,7 @@ import { useAuth } from "../../hooks/useAuth"; import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; import { UserCreate, StoreCreate, User, Store } from "../types/data-contracts"; import { cacheSettings, cacheKeys } from "../utils/cache"; // Import cacheSettings +import { useNavigate } from "react-router"; export function useUserMetadata() { // Get clientsReady state along with apiClients @@ -22,21 +23,26 @@ export function useRegisterUser() { const { apiClients } = useApiClients(); const queryClient = useQueryClient(); const auth = useAuth(); + const navigate = useNavigate(); return useMutation({ mutationFn: (userData: UserCreate) => apiClients.users.registerUser(userData).then((res) => res.data), onSuccess: async () => { + // Invalidate metadata and refresh tokens first await queryClient.invalidateQueries({ queryKey: ["auth", "metadata"] }); await auth.refreshTokens(); + // Navigate to user dashboard after token refresh + navigate("/dashboard/user"); + + // Invalidate other queries after navigation is triggered await queryClient.invalidateQueries({ queryKey: cacheKeys.users.profile(), }); await queryClient.invalidateQueries({ queryKey: cacheKeys.users.preferences(), }); - // Add any other relevant query invalidations here }, }); } diff --git a/web/src/api/hooks/useStoreHooks.ts b/web/src/api/hooks/useStoreHooks.ts index d4fde6f..a92f26a 100644 --- a/web/src/api/hooks/useStoreHooks.ts +++ b/web/src/api/hooks/useStoreHooks.ts @@ -1,7 +1,11 @@ import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; import { useApiClients } from "../apiClient"; -import { cacheKeys, cacheSettings } from "../utils/cache"; -import { ApiKeyCreate, StoreUpdate } from "../types/data-contracts"; +import { cacheKeys, cacheSettings, CACHE_TIMES } from "../utils/cache"; // <-- Import CACHE_TIMES +import { + ApiKeyCreate, + StoreUpdate, + StoreBasicInfo, // <-- Import StoreBasicInfo +} from "../types/data-contracts"; import { useAuth } from "../../hooks/useAuth"; // Import useAuth export function useStoreProfile() { @@ -86,6 +90,35 @@ export function useApiKeyUsage(keyId: string) { }); } +// --- New Hook --- + +// Hook to lookup multiple stores by their IDs +export function useLookupStores(storeIds: string[]) { + const { apiClients, clientsReady } = useApiClients(); + const { isAuthenticated, isLoading: authLoading } = useAuth(); + + // Filter out empty IDs and join for the query key and API call + const validIds = storeIds.filter((id) => id); + const idsQueryParam = validIds.join(","); + + return useQuery({ + // Expect an array of StoreBasicInfo + // Include the sorted list of valid IDs in the query key + queryKey: cacheKeys.stores.lookup(validIds.sort()), + queryFn: () => + // Pass the comma-separated string of IDs to the API client method + apiClients.stores + .lookupStores({ ids: idsQueryParam }) + .then((res) => res.data), + // Only enable if there are valid IDs and the client is ready + enabled: + validIds.length > 0 && isAuthenticated && !authLoading && clientsReady, + // Cache settings can be specific or default + staleTime: CACHE_TIMES.LONG, // Store names don't change often + gcTime: CACHE_TIMES.LONG * 2, + }); +} + export function useDeleteStoreProfile() { const { apiClients } = useApiClients(); const queryClient = useQueryClient(); diff --git a/web/src/api/hooks/useSystemHooks.ts b/web/src/api/hooks/useSystemHooks.ts index c02a272..4a2257b 100644 --- a/web/src/api/hooks/useSystemHooks.ts +++ b/web/src/api/hooks/useSystemHooks.ts @@ -1,7 +1,7 @@ import { useQuery } from "@tanstack/react-query"; import { useApiClients } from "../apiClient"; import { cacheKeys, cacheSettings } from "../utils/cache"; -import { HealthStatus, PingStatus } from "../types/data-contracts"; +import { HealthStatus, PingStatus, Error } from "../types/data-contracts"; export function useHealthCheck() { // Destructure apiClients first, then get health from it diff --git a/web/src/api/hooks/useTaxonomyHooks.ts b/web/src/api/hooks/useTaxonomyHooks.ts new file mode 100644 index 0000000..f843cea --- /dev/null +++ b/web/src/api/hooks/useTaxonomyHooks.ts @@ -0,0 +1,32 @@ +import { useQuery } from "@tanstack/react-query"; +import { useApiClients } from "../apiClient"; +import { cacheKeys, cacheSettings } from "../utils/cache"; +import { Taxonomy, Error } from "../types/data-contracts"; + +// Define an interface for the actual API response structure +interface TaxonomyApiResponse { + _id: string; + version: string; + current: boolean; + data: Taxonomy; // The nested object matching the Taxonomy type + updated_at: string; +} + +export function useTaxonomy() { + const { apiClients, clientsReady } = useApiClients(); + + // The useQuery hook should return the inner 'Taxonomy' type + return useQuery({ + queryKey: cacheKeys.system.taxonomy(), + queryFn: async () => { + // Fetch the full response + const res = await apiClients.taxonomy.getTaxonomyCategories(); + // Explicitly cast the response data to the actual API structure + const responseData = res.data as unknown as TaxonomyApiResponse; + // Return the nested 'data' property which matches the 'Taxonomy' type + return responseData.data; + }, + enabled: clientsReady, // Only fetch when API client is ready + ...cacheSettings.taxonomy, // Use specific cache settings + }); +} diff --git a/web/src/api/hooks/useUserHooks.ts b/web/src/api/hooks/useUserHooks.ts index 22ee7fa..512f32d 100644 --- a/web/src/api/hooks/useUserHooks.ts +++ b/web/src/api/hooks/useUserHooks.ts @@ -5,6 +5,11 @@ import { UserPreferencesUpdate, UserUpdate, User, + RecentUserDataEntry, + // SpendingAnalytics, // <-- Remove old type if not used elsewhere + StoreConsentList, + MonthlySpendingAnalytics, // <-- Import new type + GetSpendingAnalyticsParams, // <-- Import params type } from "../types/data-contracts"; import { useAuth } from "../../hooks/useAuth"; // Import useAuth @@ -92,8 +97,10 @@ export function useOptInToStore() { if (context?.queryKey) { queryClient.invalidateQueries({ queryKey: context.queryKey }); } - // Also invalidate preferences cache as opt-in/out might affect derived data? - // queryClient.invalidateQueries({ queryKey: cacheKeys.users.preferences() }); + // Also invalidate the consent list cache + queryClient.invalidateQueries({ + queryKey: cacheKeys.users.storeConsent(), + }); }, }); } @@ -127,8 +134,10 @@ export function useOptOutFromStore() { if (context?.queryKey) { queryClient.invalidateQueries({ queryKey: context.queryKey }); } - // Also invalidate preferences cache as opt-in/out might affect derived data? - // queryClient.invalidateQueries({ queryKey: cacheKeys.users.preferences() }); + // Also invalidate the consent list cache + queryClient.invalidateQueries({ + queryKey: cacheKeys.users.storeConsent(), + }); }, }); } @@ -147,3 +156,64 @@ export function useDeleteUserProfile() { }, }); } + +// --- New Hooks --- + +export function useRecentUserData(limit: number = 10, page: number = 1) { + const { apiClients, clientsReady } = useApiClients(); + const { isAuthenticated, isLoading: authLoading } = useAuth(); + + return useQuery({ + // Expect an array + queryKey: cacheKeys.users.recentData(limit, page), + queryFn: () => + apiClients.users + .getRecentUserData({ limit, page }) + .then((res) => res.data), + enabled: isAuthenticated && !authLoading && clientsReady, + // Add specific cache settings if needed, otherwise defaults apply + // ...cacheSettings.recentData, // Example + // Replace keepPreviousData with placeholderData for TanStack Query v5+ + placeholderData: (previousData) => previousData, + }); +} + +export function useSpendingAnalytics( + params?: GetSpendingAnalyticsParams, // Accept optional params +) { + const { apiClients, clientsReady } = useApiClients(); + const { isAuthenticated, isLoading: authLoading } = useAuth(); + + // Destructure params for queryKey dependency, provide defaults + const { startDate, endDate } = params || {}; + + return useQuery({ + // <-- Use new response type + // Update queryKey to include dates for unique caching + queryKey: cacheKeys.users.spendingAnalytics(startDate, endDate), + queryFn: () => + // Pass params to the API call + apiClients.users + .getSpendingAnalytics({ startDate, endDate }) + .then((res) => res.data), + enabled: isAuthenticated && !authLoading && clientsReady, + // Add specific cache settings if needed + // ...cacheSettings.analytics, // Example + placeholderData: (previousData) => previousData, // Keep placeholderData for smoother transitions + }); +} + +export function useStoreConsentLists() { + const { apiClients, clientsReady } = useApiClients(); + const { isAuthenticated, isLoading: authLoading } = useAuth(); + + return useQuery({ + // Expect StoreConsentList type + queryKey: cacheKeys.users.storeConsent(), + queryFn: () => + apiClients.users.getStoreConsentLists().then((res) => res.data), + enabled: isAuthenticated && !authLoading && clientsReady, + // Add specific cache settings if needed + // ...cacheSettings.consent, // Example + }); +} diff --git a/web/src/api/types/Stores.ts b/web/src/api/types/Stores.ts index 834e777..ce70675 100644 --- a/web/src/api/types/Stores.ts +++ b/web/src/api/types/Stores.ts @@ -17,7 +17,9 @@ import { ApiKeyUsage, Error, GetApiKeyUsagePayload, + LookupStoresParams, Store, + StoreBasicInfo, StoreCreate, StoreUpdate, } from "./data-contracts"; @@ -205,4 +207,26 @@ export class Stores< format: "json", ...params, }); + /** + * @description Retrieves basic details (like name) for a list of store IDs. + * + * @tags Store Management + * @name LookupStores + * @summary Lookup Store Details + * @request GET:/stores/lookup + * @secure + * @response `200` `(StoreBasicInfo)[]` Store details retrieved successfully. + * @response `400` `Error` + * @response `401` `Error` + * @response `500` `Error` + */ + lookupStores = (query: LookupStoresParams, params: RequestParams = {}) => + this.request({ + path: `/stores/lookup`, + method: "GET", + query: query, + secure: true, + format: "json", + ...params, + }); } diff --git a/web/src/api/types/Taxonomy.ts b/web/src/api/types/Taxonomy.ts new file mode 100644 index 0000000..c199b64 --- /dev/null +++ b/web/src/api/types/Taxonomy.ts @@ -0,0 +1,37 @@ +/* eslint-disable */ +/* tslint:disable */ +// @ts-nocheck +/* + * --------------------------------------------------------------- + * ## THIS FILE WAS GENERATED VIA SWAGGER-TYPESCRIPT-API ## + * ## ## + * ## AUTHOR: acacode ## + * ## SOURCE: https://github.com/acacode/swagger-typescript-api ## + * --------------------------------------------------------------- + */ + +import { Error, Taxonomy } from "./data-contracts"; +import { HttpClient, RequestParams } from "./http-client"; + +export class Taxonomy< + SecurityDataType = unknown, +> extends HttpClient { + /** + * @description Retrieves the full taxonomy structure from the database. + * + * @tags Taxonomy + * @name GetTaxonomyCategories + * @summary Get Taxonomy Categories + * @request GET:/taxonomy/categories + * @response `200` `Taxonomy` Successfully retrieved the taxonomy. + * @response `404` `Error` Taxonomy data not found in the database. + * @response `500` `Error` + */ + getTaxonomyCategories = (params: RequestParams = {}) => + this.request({ + path: `/taxonomy/categories`, + method: "GET", + format: "json", + ...params, + }); +} diff --git a/web/src/api/types/Users.ts b/web/src/api/types/Users.ts index 48d1c4a..df15d2a 100644 --- a/web/src/api/types/Users.ts +++ b/web/src/api/types/Users.ts @@ -12,6 +12,11 @@ import { Error, + GetRecentUserDataParams, + GetSpendingAnalyticsParams, + MonthlySpendingAnalytics, + RecentUserDataEntry, + StoreConsentList, User, UserCreate, UserData, @@ -244,6 +249,27 @@ export class Users< secure: true, ...params, }); + /** + * @description Retrieves the lists of store IDs the user has explicitly opted into or opted out of sharing data with. + * + * @tags Preference Management + * @name GetStoreConsentLists + * @summary Get user's store opt-in/out lists + * @request GET:/users/preferences/store-consent + * @secure + * @response `200` `StoreConsentList` Successfully retrieved store consent lists. + * @response `401` `Error` + * @response `404` `Error` + * @response `500` `Error` + */ + getStoreConsentLists = (params: RequestParams = {}) => + this.request({ + path: `/users/preferences/store-consent`, + method: "GET", + secure: true, + format: "json", + ...params, + }); /** * @description Retrieve Auth0 metadata for the authenticated user * @@ -264,4 +290,53 @@ export class Users< format: "json", ...params, }); + /** + * @description Retrieves a list of recent data submissions made about the authenticated user. + * + * @tags User Management + * @name GetRecentUserData + * @summary Get Recent User Data Submissions + * @request GET:/users/data/recent + * @secure + * @response `200` `(RecentUserDataEntry)[]` Recent data submissions retrieved successfully + * @response `401` `Error` + * @response `500` `Error` + */ + getRecentUserData = ( + query: GetRecentUserDataParams, + params: RequestParams = {}, + ) => + this.request({ + path: `/users/data/recent`, + method: "GET", + query: query, + secure: true, + format: "json", + ...params, + }); + /** + * @description Retrieves aggregated spending data by category and month for the authenticated user. + * + * @tags User Management + * @name GetSpendingAnalytics + * @summary Get User Spending Analytics + * @request GET:/users/analytics/spending + * @secure + * @response `200` `MonthlySpendingAnalytics` Spending analytics retrieved successfully. + * @response `401` `Error` + * @response `404` `Error` + * @response `500` `Error` + */ + getSpendingAnalytics = ( + query: GetSpendingAnalyticsParams, + params: RequestParams = {}, + ) => + this.request({ + path: `/users/analytics/spending`, + method: "GET", + query: query, + secure: true, + format: "json", + ...params, + }); } diff --git a/web/src/api/types/data-contracts.ts b/web/src/api/types/data-contracts.ts index 074fe75..4e1d9af 100644 --- a/web/src/api/types/data-contracts.ts +++ b/web/src/api/types/data-contracts.ts @@ -28,6 +28,27 @@ export interface User { username?: string; /** @pattern ^\+?[\d\s-]+$ */ phone?: string; + /** + * User gender identity (e.g., 'male', 'female', 'non-binary', 'prefer_not_to_say') + * @example "female" + */ + gender?: string | null; + /** + * User income bracket category (e.g., '<25k', '25k-50k', '50k-100k', '100k-200k', '>200k', 'prefer_not_to_say') + * @example "50k-100k" + */ + incomeBracket?: string | null; + /** + * User country of residence (ISO 3166-1 alpha-2 code) + * @example "US" + */ + country?: string | null; + /** + * User age + * @format int32 + * @example 35 + */ + age?: number | null; privacySettings: { /** @default false */ dataSharingConsent?: boolean; @@ -38,10 +59,6 @@ export interface User { /** List of store IDs user has opted out from */ optOutStores?: string[]; }; - dataAccess?: { - /** List of allowed domains for data access */ - allowedDomains?: string[]; - }; /** @format date-time */ createdAt?: string; /** @format date-time */ @@ -71,6 +88,17 @@ export interface UserCreate { preferences?: PreferenceItem[]; /** User's consent for data sharing */ dataSharingConsent: boolean; + /** User gender identity */ + gender?: string | null; + /** User income bracket category */ + incomeBracket?: string | null; + /** User country of residence (ISO 3166-1 alpha-2 code) */ + country?: string | null; + /** + * User age + * @format int32 + */ + age?: number | null; } export interface StoreCreate { @@ -96,9 +124,17 @@ export interface UserUpdate { optInStores?: string[]; optOutStores?: string[]; }; - dataAccess?: { - allowedDomains?: string[]; - }; + /** User gender identity */ + gender?: string | null; + /** User income bracket category */ + incomeBracket?: string | null; + /** User country of residence (ISO 3166-1 alpha-2 code) */ + country?: string | null; + /** + * User age + * @format int32 + */ + age?: number | null; } export interface ApiKey { @@ -112,67 +148,92 @@ export interface ApiKey { status?: "active" | "revoked"; } +/** @example {"email":"user@example.com","dataType":"purchase","entries":[{"$ref":"#/components/schemas/PurchaseEntry/example"}],"metadata":{"source":"web","deviceType":"desktop","sessionId":"abc-123-xyz-789"}} */ export interface UserData { - /** User's email address */ + /** + * User's email address (used as identifier for API key auth). Must match a registered Tapiro user. + * @format email + */ email: string; - /** Type of data being submitted */ + /** Specifies the type of data contained in the 'entries' array. */ dataType: "purchase" | "search"; - /** Array of data entries */ + /** + * List of data entries. Each entry must conform to either the PurchaseEntry or SearchEntry schema, matching the top-level 'dataType'. + * @minItems 1 + */ entries: (PurchaseEntry | SearchEntry)[]; - /** Additional information about the collection event */ + /** + * Additional metadata about the collection event (e.g., source, device). + * @example {"source":"web","deviceType":"desktop","sessionId":"abc-123-xyz-789"} + */ metadata?: { - /** Optional user ID if known */ - userId?: string; - /** Source of the data (web, mobile, pos, etc) */ + /** Source of the data (e.g., 'web', 'mobile_app', 'pos'). */ source?: string; - /** Type of device used */ + /** Type of device used (e.g., 'desktop', 'mobile', 'tablet'). */ deviceType?: string; - /** Unique identifier for the user session */ + /** Identifier for the user's session. */ sessionId?: string; }; } +/** @example {"timestamp":"2024-05-15T14:30:00Z","items":[{"$ref":"#/components/schemas/PurchaseItem/example"},{"sku":"ABC-789","name":"Running Shorts","category":"201","price":39.95,"quantity":1,"attributes":{"color":"black","size":"M","material":"polyester"}}],"totalValue":91.93} */ export interface PurchaseEntry { - /** @format date-time */ + /** + * ISO 8601 timestamp of when the purchase occurred. + * @format date-time + */ timestamp: string; + /** List of items included in the purchase. */ items: PurchaseItem[]; - /** @format float */ - totalAmount?: number; + /** + * Optional total value of the purchase event. + * @format float + */ + totalValue?: number; } +/** @example {"sku":"XYZ-123","name":"Men's Cotton T-Shirt","category":"201","price":25.99,"quantity":2,"attributes":{"color":"navy","size":"M","material":"cotton"}} */ export interface PurchaseItem { + /** Stock Keeping Unit or unique product identifier. */ sku?: string; + /** Name of the purchased item. */ name: string; - /** Category ID (e.g., "101") or name (e.g., "smartphones") */ + /** Category ID or name matching the Tapiro taxonomy (e.g., "101" or "Smartphones"). Providing the most specific category ID is recommended. */ category: string; - /** @default 1 */ - quantity?: number; - /** @format float */ + /** + * Price of a single unit of the item. + * @format float + */ price?: number; - /** Category-specific attributes */ + /** + * Number of units purchased. + * @default 1 + */ + quantity?: number; + /** Key-value pairs representing product attributes based on the taxonomy. Keys should be attribute names (e.g., "color", "size", "brand") and values should be the specific attribute value (e.g., "blue", "large", "Acme"). */ attributes?: ItemAttributes; } -/** Category-specific attributes */ -export interface ItemAttributes { - price_range?: "budget" | "mid_range" | "premium" | "luxury"; - brand?: string; - color?: string; - material?: string; - style?: string; - room?: string; - size?: string; - feature?: string; - season?: string; - gender?: string; -} +/** + * Key-value pairs representing product attributes based on the taxonomy. Keys should be attribute names (e.g., "color", "size", "brand") and values should be the specific attribute value (e.g., "blue", "large", "Acme"). + * @example {"color":"blue","size":"L","material":"cotton"} + */ +export type ItemAttributes = Record; +/** @example {"timestamp":"2024-05-15T10:15:00Z","query":"noise cancelling headphones","category":"105","results":25,"clicked":["Bose-QC45","Sony-WH1000XM5"]} */ export interface SearchEntry { - /** @format date-time */ + /** + * ISO 8601 timestamp of when the search occurred. + * @format date-time + */ timestamp: string; + /** The search query string entered by the user. */ query: string; + /** Optional category context provided during the search (e.g., user was browsing 'Electronics'). Should match a Tapiro taxonomy ID or name. */ category?: string; + /** Optional number of results returned for the search query. */ results?: number; + /** Optional list of product IDs or SKUs clicked from the search results. */ clicked?: string[]; } @@ -263,6 +324,13 @@ export interface ApiKeyUsage { }[]; } +export interface StoreConsentList { + /** List of store IDs the user has opted into. */ + optInStores?: string[]; + /** List of store IDs the user has opted out of. */ + optOutStores?: string[]; +} + export interface HealthStatus { /** Overall health status of the Health */ status?: "healthy" | "degraded" | "unhealthy"; @@ -310,6 +378,78 @@ export interface UserMetadataResponse { }; } +/** Attribute within a taxonomy category */ +export interface TaxonomyAttribute { + name: string; + values: string[]; + description?: string | null; +} + +/** Category within a taxonomy system */ +export interface TaxonomyCategory { + id: string; + name: string; + parent_id?: string | null; + description?: string | null; + /** @default [] */ + attributes?: TaxonomyAttribute[]; +} + +/** Complete taxonomy definition with categories and version */ +export interface Taxonomy { + _id?: string; + categories: TaxonomyCategory[]; + version: string; +} + +export interface RecentUserDataEntry { + /** The unique ID of the userData entry. */ + _id?: string; + /** The ID of the store that submitted the data. */ + storeId?: string; + /** The type of data submitted. */ + dataType?: "purchase" | "search"; + /** + * When the data was submitted to Tapiro. + * @format date-time + */ + timestamp?: string; + /** + * The timestamp of the original event (e.g., purchase time). + * @format date-time + */ + entryTimestamp?: string; + /** Simplified details (e.g., item count for purchase, query string for search) */ + details?: object; +} + +/** + * Aggregated spending data per category over time. The structure might vary based on implementation (e.g., object keyed by month/year, or an array of objects each representing a time point). + * @example {"2025-01":{"Electronics":1299.99,"Clothing":150.5},"2025-02":{"Clothing":100,"Home":85}} + */ +export type SpendingAnalytics = Record>; + +export interface StoreBasicInfo { + /** The unique ID of the store. */ + storeId: string; + /** The name of the store. */ + name: string; +} + +/** @example {"month":"2024-01","spending":{"Electronics":1299.99,"Clothing":150.5}} */ +export interface MonthlySpendingItem { + /** + * The month of the spending data (e.g., "2024-01"). + * @format date + */ + month: string; + /** An object mapping category names to the total amount spent in that category for the month. */ + spending: Record; +} + +/** An array of monthly spending breakdowns. */ +export type MonthlySpendingAnalytics = MonthlySpendingItem[]; + export interface GetApiKeyUsagePayload { /** * Optional start date for filtering usage data @@ -322,3 +462,34 @@ export interface GetApiKeyUsagePayload { */ endDate?: string; } + +export interface GetRecentUserDataParams { + /** + * Maximum number of records to return + * @default 10 + */ + limit?: number; + /** + * Page number for pagination + * @default 1 + */ + page?: number; +} + +export interface GetSpendingAnalyticsParams { + /** + * Filter results from this date onwards (YYYY-MM-DD). + * @format date + */ + startDate?: string; + /** + * Filter results up to this date (YYYY-MM-DD). + * @format date + */ + endDate?: string; +} + +export interface LookupStoresParams { + /** Comma-separated list of store IDs to lookup. */ + ids: string; +} diff --git a/web/src/api/utils/cache.ts b/web/src/api/utils/cache.ts index c15dc28..7772a20 100644 --- a/web/src/api/utils/cache.ts +++ b/web/src/api/utils/cache.ts @@ -26,6 +26,18 @@ export const cacheKeys = { all: ["users"], profile: () => [...cacheKeys.users.all, "profile"], preferences: () => [...cacheKeys.users.all, "preferences"], + recentData: (limit: number, page: number) => [ + ...cacheKeys.users.all, + "recentData", + { limit, page }, + ], + // Update spendingAnalytics to accept optional dates + spendingAnalytics: (startDate?: string, endDate?: string) => [ + ...cacheKeys.users.all, + "spendingAnalytics", + { startDate: startDate ?? "all", endDate: endDate ?? "all" }, // Use 'all' if undefined + ], + storeConsent: () => [...cacheKeys.users.all, "storeConsent"], }, stores: { all: ["stores"], @@ -36,10 +48,12 @@ export const cacheKeys = { keyId, "usage", ], + lookup: (ids: string[]) => [...cacheKeys.stores.all, "lookup", ids], }, system: { health: () => ["system", "health"], ping: () => ["system", "ping"], + taxonomy: () => ["system", "taxonomy"], }, }; @@ -69,6 +83,11 @@ export const cacheSettings = { staleTime: CACHE_TIMES.SHORT, gcTime: CACHE_TIMES.SHORT * 2, }, + taxonomy: { + // <-- Add specific settings for taxonomy (cache longer) + staleTime: CACHE_TIMES.LONG, + gcTime: CACHE_TIMES.LONG * 2, + }, }; // Helper for optimistic updates diff --git a/web/src/components/auth/InterestFormModal.tsx b/web/src/components/auth/InterestFormModal.tsx new file mode 100644 index 0000000..3999a66 --- /dev/null +++ b/web/src/components/auth/InterestFormModal.tsx @@ -0,0 +1,365 @@ +import { useState, useEffect, useMemo } from "react"; // Removed ReactElement +import { + Button, + Card, + Modal, + ModalBody, + ModalFooter, + ModalHeader, + Spinner, +} from "flowbite-react"; +import { + HiChevronDown, + HiChevronUp, + HiOutlineDesktopComputer, + HiOutlineShoppingBag, + HiOutlineHome, + HiOutlineSparkles, + HiOutlineBookOpen, + HiOutlineHeart, + HiOutlinePuzzle, + HiOutlineBriefcase, + HiOutlineKey, + HiOutlineGlobeAlt, + HiOutlineShoppingCart, + HiOutlineGift, + HiOutlineCode, + HiQuestionMarkCircle, +} from "react-icons/hi"; +import { useTaxonomy } from "../../api/hooks/useTaxonomyHooks"; +import { useUpdateUserPreferences } from "../../api/hooks/useUserHooks"; +import { + PreferenceItem, + // Removed TaxonomyCategory +} from "../../api/types/data-contracts"; +import ErrorDisplay from "../common/ErrorDisplay"; // Assuming ErrorDisplay exists + +// --- Icon Mapping (Keep as is) --- +const categoryIcons: { [key: string]: React.ElementType } = { + Electronics: HiOutlineDesktopComputer, + Fashion: HiOutlineShoppingBag, + Home: HiOutlineHome, + Beauty: HiOutlineSparkles, + Media: HiOutlineBookOpen, + "Health & Wellness": HiOutlineHeart, + "Toys & Games": HiOutlinePuzzle, + "Office Supplies": HiOutlineBriefcase, + Gaming: HiOutlineKey, + Travel: HiOutlineGlobeAlt, + Grocery: HiOutlineShoppingCart, + "Jewelry & Watches": HiOutlineGift, + Gifts: HiOutlineGift, + Software: HiOutlineCode, +}; +const DefaultIcon = HiQuestionMarkCircle; +// --- End Icon Mapping --- + +interface InterestFormModalProps { + show: boolean; + onClose: () => void; +} + +// --- State for selected attribute values --- +interface SelectedAttributeValue { + categoryId: string; + attributeName: string; + value: string; +} + +export function InterestFormModal({ show, onClose }: InterestFormModalProps) { + const { + data: taxonomyData, + isLoading: isLoadingTaxonomy, + error: taxonomyError, + } = useTaxonomy(); + const updateUserPreferences = useUpdateUserPreferences(); + + // --- Updated State --- + const [selectedAttributeValues, setSelectedAttributeValues] = useState< + SelectedAttributeValue[] + >([]); + const [expandedCategoryIds, setExpandedCategoryIds] = useState([]); + // --- End Updated State --- + + // --- Top Level Categories (Keep as is) --- + const topLevelCategories = useMemo(() => { + return ( + taxonomyData?.categories + .filter((cat) => !cat.parent_id) + .sort((a, b) => a.name.localeCompare(b.name)) || [] + ); + }, [taxonomyData]); + + // --- Category Map Removed (was unused) --- + + // --- Handlers --- + const handleExpandCategory = (categoryId: string) => { + setExpandedCategoryIds((prev) => + prev.includes(categoryId) + ? prev.filter((id) => id !== categoryId) + : [...prev, categoryId], + ); + }; + + // --- New Handler for Attribute Value Selection --- + const handleSelectAttributeValue = ( + categoryId: string, + attributeName: string, + value: string, + ) => { + setSelectedAttributeValues((prev) => { + const existingIndex = prev.findIndex( + (item) => + item.categoryId === categoryId && + item.attributeName === attributeName && + item.value === value, + ); + if (existingIndex > -1) { + // Remove if already selected + return prev.filter((_, index) => index !== existingIndex); + } else { + // Add if not selected + return [...prev, { categoryId, attributeName, value }]; + } + }); + }; + // --- End New Handler --- + + // --- Updated handleSubmit --- + const handleSubmit = async () => { + const groupedPreferences = new Map(); + + selectedAttributeValues.forEach(({ categoryId, attributeName, value }) => { + // Initialize preference for the category if not present + if (!groupedPreferences.has(categoryId)) { + groupedPreferences.set(categoryId, { + category: categoryId, + score: 1.0, // Assign a base score for selecting the category + attributes: {}, // Initialize attributes object + }); + } + + const pref = groupedPreferences.get(categoryId)!; + + // Ensure attributes object exists (it should from the initialization above) + if (!pref.attributes) { + pref.attributes = {}; + } + + // --- Type Assertion for Dynamic Attribute Access --- + // Cast attributes to allow indexing by any string key + const attributesMap = pref.attributes as Record< + string, + Record + >; + // --- End Type Assertion --- + + // Ensure the specific attribute object (value map) exists + let attributeValueMap = attributesMap[attributeName]; // Use the casted map + if (!attributeValueMap) { + attributeValueMap = {}; + attributesMap[attributeName] = attributeValueMap; // Use the casted map + } + + // Assign score to the specific attribute value + attributeValueMap[value] = 1.0; // Assign score to the inner map + }); + + const preferencesPayload: PreferenceItem[] = Array.from( + groupedPreferences.values(), + ); + + if (preferencesPayload.length === 0) { + console.warn("No preferences selected."); + // Optionally show a message to the user or simply close + onClose(); // Close if nothing selected, or handle differently + return; + } + + try { + await updateUserPreferences.mutateAsync({ + preferences: preferencesPayload, + }); + onClose(); // Close modal on success + } catch (err) { + console.error("Failed to save preferences:", err); + // Optionally display an error message to the user + } + }; + // --- End Updated handleSubmit --- + + // --- useEffect for Error Handling (Keep as is) --- + useEffect(() => { + if (taxonomyError) { + console.error("Taxonomy failed to load, closing interest modal."); + onClose(); + } + }, [taxonomyError, onClose]); + + // --- Check if a specific attribute value is selected --- + const isAttributeValueSelected = ( + categoryId: string, + attributeName: string, + value: string, + ): boolean => { + return selectedAttributeValues.some( + (item) => + item.categoryId === categoryId && + item.attributeName === attributeName && + item.value === value, + ); + }; + // --- End Check --- + + return ( + +
+ Tell us what you're interested in +
+ + {isLoadingTaxonomy && ( +
+ +
+ )} + {taxonomyError && ( + + )} + {!isLoadingTaxonomy && + !taxonomyError && + taxonomyData && + taxonomyData.categories && ( +
+

+ Select topics to personalize your experience. Click a main topic + to refine your interests by selecting specific features. Choose + at least one feature. +

+
+ {/* --- Render Top Level Categories --- */} + {topLevelCategories.map((category) => { + const isExpanded = expandedCategoryIds.includes(category.id); + // Get attributes directly from the category object + const attributes = category.attributes || []; + const hasAttributes = attributes.length > 0; + const IconComponent = + categoryIcons[category.name] || DefaultIcon; + + return ( +
+ handleExpandCategory(category.id) + : undefined // No action if no attributes + } + className={`h-full transition-all duration-150 ${hasAttributes ? "cursor-pointer" : "cursor-default"} ${isExpanded ? "ring-2 ring-blue-500 dark:ring-blue-400" : hasAttributes ? "hover:bg-gray-50 dark:hover:bg-gray-600" : ""}`} + > + {/* Card Content (Icon, Title, Description) - Keep as is */} +
+
+ +
+ {category.name} +
+ {category.description && ( +

+ {category.description} +

+ )} +
+ {/* Chevron or Placeholder */} + {hasAttributes && ( +
+ {isExpanded ? ( + + ) : ( + + )} +
+ )} + {!hasAttributes && ( +
// Placeholder + )} +
+
+ + {/* --- Render Attributes and Values when Expanded --- */} + {isExpanded && hasAttributes && ( +
+ {attributes.map((attribute) => ( +
+
+ {attribute.description || attribute.name}{" "} + {/* Use description or name */} +
+
+ {(attribute.values || []).map((value) => { + const isSelected = isAttributeValueSelected( + category.id, + attribute.name, + value, + ); + return ( + + ); + })} +
+
+ ))} +
+ )} + {/* --- End Attribute Rendering --- */} +
+ ); + })} +
+
+ )} +
+ + + + +
+ ); +} diff --git a/web/src/components/auth/RegistrationCompletionModal.tsx b/web/src/components/auth/RegistrationCompletionModal.tsx index dfc552d..ecad6aa 100644 --- a/web/src/components/auth/RegistrationCompletionModal.tsx +++ b/web/src/components/auth/RegistrationCompletionModal.tsx @@ -17,6 +17,7 @@ export function RegistrationCompletionModal() { "user" | "store" | null >(null); const [error, setError] = useState(null); + const [isNavigating, setIsNavigating] = useState(false); const registerUserMutation = useRegisterUser(); const registerStoreMutation = useRegisterStore(); @@ -31,9 +32,13 @@ export function RegistrationCompletionModal() { const handleUserSubmit = async (userData: UserCreate) => { setError(null); + setIsNavigating(true); // Add this state to track navigation + try { await registerUserMutation.mutateAsync(userData); + // Navigation will happen via the mutation's onSuccess handler } catch (err) { + setIsNavigating(false); console.error("Failed to complete user registration:", err); const message = err instanceof Error diff --git a/web/src/components/auth/UserRegistrationForm.tsx b/web/src/components/auth/UserRegistrationForm.tsx index 09e8913..ad4951b 100644 --- a/web/src/components/auth/UserRegistrationForm.tsx +++ b/web/src/components/auth/UserRegistrationForm.tsx @@ -8,6 +8,8 @@ import { ModalBody, ModalFooter, Popover, // <-- Import Popover + Select, // <-- Import Select + TextInput, // <-- Import TextInput } from "flowbite-react"; import { UserCreate } from "../../api/types/data-contracts"; import LoadingSpinner from "../common/LoadingSpinner"; @@ -18,6 +20,25 @@ interface UserRegistrationFormProps { isLoading: boolean; } +// Define options for selects +const genderOptions = [ + { value: "", label: "Select Gender (Optional)" }, + { value: "male", label: "Male" }, + { value: "female", label: "Female" }, + { value: "non-binary", label: "Non-binary" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; + +const incomeOptions = [ + { value: "", label: "Select Income Bracket (Optional)" }, + { value: "<25k", label: "< $25,000" }, + { value: "25k-50k", label: "$25,000 - $49,999" }, + { value: "50k-100k", label: "$50,000 - $99,999" }, + { value: "100k-200k", label: "$100,000 - $199,999" }, + { value: ">200k", label: "> $200,000" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; + export function UserRegistrationForm({ onSubmit, isLoading, @@ -29,6 +50,12 @@ export function UserRegistrationForm({ // State to track if consent has been explicitly accepted via the modal const [consentAccepted, setConsentAccepted] = useState(false); + // Add state for demographic fields + const [gender, setGender] = useState(null); + const [incomeBracket, setIncomeBracket] = useState(null); + const [country, setCountry] = useState(null); + const [age, setAge] = useState(null); + const handleSubmit = (e: React.FormEvent) => { e.preventDefault(); // Ensure consent was actually accepted via the modal flow if required @@ -43,6 +70,11 @@ export function UserRegistrationForm({ // Use the state variable linked to the checkbox dataSharingConsent: dataSharingConsent, preferences: [], // We can leave this empty for now + // Add demographic data, ensuring null if empty string or invalid number + gender: gender || null, + incomeBracket: incomeBracket || null, + country: country || null, + age: age !== null && !isNaN(age) ? Number(age) : null, }; onSubmit(userData); @@ -77,6 +109,79 @@ export function UserRegistrationForm({ Complete User Registration + {/* Demographic Information Section */} +
+

+ Demographic Information (Optional) +

+ {/* Gender Select */} +
+ + +
+ {/* Income Bracket Select */} +
+ + +
+ {/* Country Input */} +
+ + + setCountry(e.target.value ? e.target.value.toUpperCase() : null) + } + maxLength={2} // ISO 3166-1 alpha-2 + className="mt-1" + /> +
+ {/* Age Input */} +
+ + { + const val = parseInt(e.target.value); + setAge(isNaN(val) ? null : val); + }} + min="0" + className="mt-1" + /> +
+
+ {/* Consent Section */}
{/* Conditionally wrap Checkbox/Label in Popover */} diff --git a/web/src/main.tsx b/web/src/main.tsx index d101181..aea723f 100644 --- a/web/src/main.tsx +++ b/web/src/main.tsx @@ -16,6 +16,9 @@ import PrivateRoute from "./components/auth/PrivateRoute"; import NotFoundPage from "./pages/static/NotFoundPage"; import UserProfilePage from "./pages/UserProfilePage"; import StoreProfilePage from "./pages/StoreProfilePage"; +import UserPreferencesPage from "./pages/UserPreferencesPage"; +import UserDataSharingPage from "./pages/UserDataSharingPage"; +import UserAnalyticsPage from "./pages/UserAnalyticsPage"; const router = createBrowserRouter([ { @@ -51,6 +54,19 @@ const router = createBrowserRouter([ path: "profile/user", element: , }, + // --- Add New User Dashboard Sub-routes --- + { + path: "profile/user/preferences", // Route for preferences + element: , + }, + { + path: "profile/user/sharing", // Route for data sharing + element: , + }, + { + path: "profile/user/analytics", // Route for analytics + element: , + }, ], }, // --- Protected Store Routes --- diff --git a/web/src/pages/UserAnalyticsPage.tsx b/web/src/pages/UserAnalyticsPage.tsx new file mode 100644 index 0000000..5d1265e --- /dev/null +++ b/web/src/pages/UserAnalyticsPage.tsx @@ -0,0 +1,21 @@ +import React from "react"; +import { Card } from "flowbite-react"; + +const UserAnalyticsPage: React.FC = () => { + return ( +
+

+ Your Data Insights +

+ +

+ View insights derived from the data you've shared, such as spending + habits and recent activity. (Implementation coming soon!) +

+ {/* Analytics charts and recent activity list will go here */} +
+
+ ); +}; + +export default UserAnalyticsPage; diff --git a/web/src/pages/UserDashboard.tsx b/web/src/pages/UserDashboard.tsx index 8975452..b5e8415 100644 --- a/web/src/pages/UserDashboard.tsx +++ b/web/src/pages/UserDashboard.tsx @@ -1,43 +1,548 @@ -import { Card } from "flowbite-react"; -import { useUserProfile } from "../api/hooks/useUserHooks"; +import { useMemo, useState, useEffect } from "react"; +import { Link } from "react-router"; // <-- Corrected import +import { + Card, + Alert, + List, + Timeline, + TimelineItem, + TimelinePoint, + TimelineContent, + TimelineTime, + TimelineTitle, + ListItem, + Datepicker, + Button, +} from "flowbite-react"; +import { + HiArrowRight, + HiClock, + HiInformationCircle, + HiOutlineNewspaper, + HiOutlineCurrencyDollar, + HiOutlineShare, + HiOutlineAdjustments, + HiCalendar, +} from "react-icons/hi"; +import { + ResponsiveContainer, + LineChart, + Line, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + Legend, + BarChart, + Bar, + Cell, +} from "recharts"; + +import { + useUserProfile, + useRecentUserData, + useSpendingAnalytics, + useStoreConsentLists, + useUserPreferences, +} from "../api/hooks/useUserHooks"; +import { useLookupStores } from "../api/hooks/useStoreHooks"; import LoadingSpinner from "../components/common/LoadingSpinner"; import ErrorDisplay from "../components/common/ErrorDisplay"; +import { + RecentUserDataEntry, + StoreBasicInfo, + MonthlySpendingItem, +} from "../api/types/data-contracts"; +import { InterestFormModal } from "../components/auth/InterestFormModal"; + +// Helper function to format date (keep existing) +const formatDate = (dateString: string | Date | undefined) => { + if (!dateString) return "N/A"; + return new Date(dateString).toLocaleDateString("en-US", { + year: "numeric", + month: "short", + day: "numeric", + hour: "2-digit", + minute: "2-digit", + }); +}; + +// Define colors for the lines (can reuse or define new ones) +const LINE_COLORS = [ + "#0088FE", + "#00C49F", + "#FFBB28", + "#FF8042", + "#8884D8", + "#82CA9D", + "#FF5733", + "#C70039", + "#900C3F", + "#581845", +]; + +// Helper to format currency (keep existing) +const formatCurrency = (value: number) => { + return new Intl.NumberFormat("en-US", { + style: "currency", + currency: "USD", // Adjust currency as needed + minimumFractionDigits: 0, + maximumFractionDigits: 0, + }).format(value); +}; + +// Helper to format YYYY-MM date string for display +const formatMonth = (monthString: string) => { + try { + const [year, month] = monthString.split("-"); + const date = new Date(parseInt(year), parseInt(month) - 1); + return date.toLocaleDateString("en-US", { + year: "numeric", + month: "short", + }); + } catch { + return monthString; // Fallback + } +}; + +// Helper to format Date object to YYYY-MM-DD string +const formatDateToISO = (date: Date | null | undefined): string | undefined => { + if (!date) return undefined; + return date.toISOString().split("T")[0]; +}; export default function UserDashboard() { - const { data: profile, isLoading, error } = useUserProfile(); + // --- State for Date Range --- + const [startDate, setStartDate] = useState(null); + const [endDate, setEndDate] = useState(null); + + // --- Fetch Data (Hooks called unconditionally at the top) --- + const { + data: profile, + isLoading: profileLoading, + error: profileError, + } = useUserProfile(); + const { + data: recentActivity, + isLoading: activityLoading, + error: activityError, + } = useRecentUserData(3); // <-- Changed limit from 5 to 3 + const { + data: spendingData, + isLoading: spendingLoading, + error: spendingError, + } = useSpendingAnalytics({ + startDate: formatDateToISO(startDate), + endDate: formatDateToISO(endDate), + }); + const { + data: consentLists, + isLoading: consentLoading, + error: consentError, + } = useStoreConsentLists(); + const { + data: preferencesData, + isLoading: preferencesLoading, + error: preferencesError, + } = useUserPreferences(); + + // --- Prepare Derived Data (useMemo hooks called unconditionally) --- + const optInStoreIds = useMemo( + () => consentLists?.optInStores || [], + [consentLists], + ); + + const { + data: storeDetails, + isLoading: storesLoading, + error: storesError, + } = useLookupStores(optInStoreIds); + + const storeNameMap = useMemo(() => { + const map = new Map(); + storeDetails?.forEach((store: StoreBasicInfo) => { + map.set(store.storeId, store.name || `Store ID: ${store.storeId}`); + }); + return map; + }, [storeDetails]); + + // --- Data Transformation for Spending Line Chart --- + const { lineChartData, categories } = useMemo(() => { + if (!spendingData) return { lineChartData: [], categories: [] }; + + const allCategories = new Set(); + const dataMap = new Map>(); + + spendingData.forEach((monthlyItem: MonthlySpendingItem) => { + const monthData: Record = { + month: monthlyItem.month, + }; + Object.entries(monthlyItem.spending).forEach(([category, amount]) => { + allCategories.add(category); + monthData[category] = amount; + }); + dataMap.set(monthlyItem.month, monthData); + }); - if (isLoading) { - return ; + const processedData = spendingData.map((item: MonthlySpendingItem) => { + const monthEntry = dataMap.get(item.month) || { month: item.month }; + allCategories.forEach((cat) => { + if (!(cat in monthEntry)) { + monthEntry[cat] = 0; + } + }); + return monthEntry; + }); + + return { + lineChartData: processedData, + categories: Array.from(allCategories).sort(), + }; + }, [spendingData]); + + // Data for Preferences Bar Chart + const topPreferencesChartData = useMemo(() => { + if (!preferencesData?.preferences) return []; + return [...preferencesData.preferences] + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)) + .slice(0, 5) + .map((pref) => ({ + name: pref.category, + score: pref.score != null ? pref.score * 100 : 0, + })); + }, [preferencesData]); + + // --- Loading and Error States (Checked AFTER hooks) --- + const isLoading = + profileLoading || + activityLoading || + spendingLoading || + consentLoading || + preferencesLoading || + storesLoading; + + const combinedError = + profileError || + activityError || + spendingError || + consentError || + preferencesError || + storesError; + + const [showInterestForm, setShowInterestForm] = useState(false); + + useEffect(() => { + // If preferences loaded and are empty, show the form + if ( + !preferencesLoading && + preferencesData && + (!preferencesData.preferences || preferencesData.preferences.length === 0) + ) { + setShowInterestForm(true); + } + }, [preferencesData, preferencesLoading]); + + if (isLoading && !spendingData) { + return ; } - if (error) { + if (combinedError && !spendingData) { return ( ); } + // --- Render Dashboard --- return ( -
- {/* Add dark mode text color */} -

- User Dashboard -

- - {/* Add dark mode text color */} -

+ <> +

+

Welcome back, {profile?.username || "User"}! -

- {/* Add dark mode text color */} -

- Here you can manage your preferences, control data sharing with - stores, and view your usage analytics. (Content coming soon!) -

- {/* Add User Preference Management and Analytics sections later */} - -

+ + +
+ {/* --- Recent Activity Card --- */} + +
+

+ + Recent Activity +

+ {activityError ? ( + + Could not load recent activity. + + ) : !recentActivity || recentActivity.length === 0 ? ( +

+ No recent activity found. +

+ ) : ( +
+ + {recentActivity.map( + ( + activity: RecentUserDataEntry, // No change needed here, map will iterate over 3 items max + ) => ( + + + + + {formatDate(activity.timestamp)} + + + {activity.dataType === "purchase" + ? "Purchase" + : "Search"}{" "} + from{" "} + {activity.storeId + ? storeNameMap.get(activity.storeId) || + `Store ID: ${activity.storeId}` + : "Unknown Store"} + + {/* Add more details if needed */} + {/* Details about the activity... */} + + + ), + )} + +
+ )} +
+ + View All Activity + +
+ + {/* --- Spending Overview Card --- */} + +
+
+

+ + Spending Overview +

+
+ setStartDate(date)} + maxDate={endDate || undefined} + className="w-full" + placeholder="Start Date" + /> + setEndDate(date)} + minDate={startDate || undefined} + className="w-full" + placeholder="End Date" + /> + {(startDate || endDate) && ( + + )} +
+
+ + {spendingError ? ( + + Could not load spending data for the selected range. + + ) : spendingLoading ? ( +
+ +
+ ) : !lineChartData || lineChartData.length === 0 ? ( +

+ No spending data available + {startDate || endDate ? " for this period" : " yet"}. +

+ ) : ( +
+ + + + + + formatCurrency(value)} + labelFormatter={formatMonth} + /> + + {categories.map((category, index) => ( + + ))} + + +
+ )} +
+ + View Detailed Analytics + +
+ + {/* --- Data Sharing Card --- */} + +
+

+ + Data Sharing +

+ {consentError || storesError ? ( + + Could not load sharing status. + + ) : (consentLists?.optInStores?.length ?? 0) === 0 ? ( +

+ You are not currently sharing data with any stores. +

+ ) : ( + <> +

+ You are sharing data with{" "} + {consentLists?.optInStores?.length} store(s): +

+ + {consentLists?.optInStores?.slice(0, 5).map((storeId) => ( + + {storeNameMap.get(storeId) || `Store ID: ${storeId}`} + + ))} + {(consentLists?.optInStores?.length ?? 0) > 5 && ( + + ... and more + + )} + + + )} +
+ + Manage Sharing Settings + +
+ + {/* --- Preferences Summary Card --- */} + +
+

+ + Top Preferences +

+ {preferencesError ? ( + + Could not load preferences data. + + ) : preferencesLoading ? ( +
+ +
+ ) : !topPreferencesChartData || + topPreferencesChartData.length === 0 ? ( +

+ No preference data available yet. +

+ ) : ( +
+ + + + `${value}%`} + fontSize={12} + tick={{ fill: "currentColor" }} + /> + + `${value.toFixed(1)}%`} + cursor={{ fill: "rgba(156, 163, 175, 0.2)" }} + contentStyle={{ + backgroundColor: "rgba(31, 41, 55, 0.9)", + borderColor: "rgba(75, 85, 99, 0.5)", + borderRadius: "0.375rem", + }} + itemStyle={{ color: "#e5e7eb" }} + labelStyle={{ color: "#f9fafb", fontWeight: "bold" }} + /> + + + {topPreferencesChartData.map((_entry, index) => ( + + ))} + + + +
+ )} +
+ + Manage All Preferences + +
+
+
+ + setShowInterestForm(false)} + /> + ); } diff --git a/web/src/pages/UserDataSharingPage.tsx b/web/src/pages/UserDataSharingPage.tsx new file mode 100644 index 0000000..abab8c4 --- /dev/null +++ b/web/src/pages/UserDataSharingPage.tsx @@ -0,0 +1,22 @@ +import React from "react"; +import { Card } from "flowbite-react"; + +const UserDataSharingPage: React.FC = () => { + return ( +
+

+ Control Data Sharing +

+ +

+ Manage which stores you allow to access your preference data. You can + opt-in or opt-out from individual stores at any time. (Implementation + coming soon!) +

+ {/* Opt-in/Opt-out UI will go here */} +
+
+ ); +}; + +export default UserDataSharingPage; diff --git a/web/src/pages/UserPreferencesPage.tsx b/web/src/pages/UserPreferencesPage.tsx new file mode 100644 index 0000000..97e80d0 --- /dev/null +++ b/web/src/pages/UserPreferencesPage.tsx @@ -0,0 +1,21 @@ +import React from "react"; +import { Card } from "flowbite-react"; + +const UserPreferencesPage: React.FC = () => { + return ( +
+

+ Manage Your Preferences +

+ +

+ Here you can view and update your interest preferences. This helps us + show you more relevant content and ads. (Implementation coming soon!) +

+ {/* Preference selection UI will go here */} +
+
+ ); +}; + +export default UserPreferencesPage; diff --git a/web/src/pages/static/ApiDocsPage.tsx b/web/src/pages/static/ApiDocsPage.tsx index 380c575..20b08a0 100644 --- a/web/src/pages/static/ApiDocsPage.tsx +++ b/web/src/pages/static/ApiDocsPage.tsx @@ -41,7 +41,7 @@ export default function ApiDocsPage() {

{/* Add dark mode text color */}
- POST /v1/interactions + POST /interactions
{/* Add dark mode text color */}

@@ -51,9 +51,10 @@ export default function ApiDocsPage() { {/* Adjusted pre/code dark mode styles */}

           
-            {`POST /v1/interactions
+            {`POST /interactions
 Authorization: Bearer YOUR_API_KEY
 Content-Type: application/json
+X-API-Key: YOUR_STORE_API_KEY // Corrected Header
 
 {
   "userId": "store-specific-user-id-123",
@@ -71,6 +72,29 @@ Content-Type: application/json
     "source": "web",
     "deviceType": "desktop"
   }
+}`}
+          
+        
+ {/* Add dark mode text color */} +
+ POST /users/data +
+ {/* Add dark mode text color */} +

+ Sends user data (e.g., email, purchase data) to Tapiro for analysis. +

+ {/* Adjusted pre/code dark mode styles */} +
+          
+            {`POST /users/data HTTP/1.1
+Host: api.tapiro.com
+Content-Type: application/json
+X-API-Key: YOUR_API_KEY  // Changed from Authorization: Bearer
+
+{
+  "email": "user@example.com",
+  "dataType": "purchase",
+  "entries": [ ... ]
 }`}