diff --git a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/AzureSQLConnector.kt b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/AzureSQLConnector.kt index bbdff2d..ccb06cf 100644 --- a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/AzureSQLConnector.kt +++ b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/AzureSQLConnector.kt @@ -51,14 +51,21 @@ class AzureSQLConnector( } } - override fun fetchData(tableName: String, limit: Int?, whereClause: String?): List> { + override fun fetchData(tableName: String, limit: Int?, whereClause: String?, selectedAttributes: List?): List> { // Use square-bracket quoting for SQL Server identifier safety; strip brackets from input val sanitizedTable = tableName.replace("[", "").replace("]", "") + val selectPart = if (!selectedAttributes.isNullOrEmpty()) { + selectedAttributes.joinToString(", ") { col -> + "[${col.replace("[", "").replace("]", "")}]" + } + } else { + "*" + } val wherePart = if (!whereClause.isNullOrBlank()) " WHERE $whereClause" else "" val query = if (limit != null) { - "SELECT TOP $limit * FROM [$sanitizedTable]$wherePart" + "SELECT TOP $limit $selectPart FROM [$sanitizedTable]$wherePart" } else { - "SELECT * FROM [$sanitizedTable]$wherePart" + "SELECT $selectPart FROM [$sanitizedTable]$wherePart" } return getConnection().use { conn -> conn.prepareStatement(query).use { stmt -> diff --git a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/FileConnector.kt b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/FileConnector.kt index e071486..a1bf6fb 100644 --- a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/FileConnector.kt +++ b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/FileConnector.kt @@ -21,13 +21,21 @@ class FileConnector( } } - override fun fetchData(tableName: String, limit: Int?, whereClause: String?): List> { + override fun fetchData(tableName: String, limit: Int?, whereClause: String?, selectedAttributes: List?): List> { val rows = when { contentType.contains("csv", ignoreCase = true) || filename.endsWith(".csv") -> parseCsv() contentType.contains("json", ignoreCase = true) || filename.endsWith(".json") -> parseJson() else -> emptyList() } - return if (limit != null) rows.take(limit) else rows + val limited = if (limit != null) rows.take(limit) else rows + return if (!selectedAttributes.isNullOrEmpty()) { + val normalizedSelected = selectedAttributes.map { it.lowercase() }.toSet() + limited.map { row -> + row.filterKeys { it.lowercase() in normalizedSelected } + } + } else { + limited + } } override fun createTable(tableName: String, columns: List) { diff --git a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MongoDBConnector.kt b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MongoDBConnector.kt index 9038c8a..c5edbd5 100644 --- a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MongoDBConnector.kt +++ b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MongoDBConnector.kt @@ -50,13 +50,22 @@ open class MongoDBConnector( } } - override fun fetchData(tableName: String, limit: Int?, whereClause: String?): List> { + override fun fetchData(tableName: String, limit: Int?, whereClause: String?, selectedAttributes: List?): List> { // whereClause is expected to be a MongoDB query filter as JSON, e.g. {"age": {"$gt": 18}} return createMongoClient().use { client -> val collection = client.getDatabase(getDatabaseName()).getCollection(tableName) val filter = if (!whereClause.isNullOrBlank()) Document.parse(whereClause) else Document() - val cursor = if (limit != null) collection.find(filter).limit(limit) else collection.find(filter) - cursor.map { doc -> + var query = if (limit != null) collection.find(filter).limit(limit) else collection.find(filter) + if (!selectedAttributes.isNullOrEmpty()) { + val projection = Document().apply { + selectedAttributes.forEach { field -> put(field, 1) } + if (!selectedAttributes.contains("_id")) { + put("_id", 0) + } + } + query = query.projection(projection) + } + query.map { doc -> doc.entries.associate { it.key to it.value } }.toList() } diff --git a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MySQLConnector.kt b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MySQLConnector.kt index ac1d84f..dc79eca 100644 --- a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MySQLConnector.kt +++ b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/MySQLConnector.kt @@ -48,10 +48,15 @@ class MySQLConnector( } } - override fun fetchData(tableName: String, limit: Int?, whereClause: String?): List> { + override fun fetchData(tableName: String, limit: Int?, whereClause: String?, selectedAttributes: List?): List> { getConnection().use { conn -> + val selectPart = if (!selectedAttributes.isNullOrEmpty()) { + selectedAttributes.joinToString(", ") { "`${it.replace("`", "``")}`" } + } else { + "*" + } val sql = buildString { - append("SELECT * FROM `$tableName`") + append("SELECT $selectPart FROM `$tableName`") if (whereClause != null) append(" WHERE $whereClause") if (limit != null) append(" LIMIT $limit") } diff --git a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnector.kt b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnector.kt index 9979aba..75476f6 100644 --- a/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnector.kt +++ b/backend/src/main/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnector.kt @@ -46,14 +46,19 @@ class PostgreSQLConnector( } } - override fun fetchData(tableName: String, limit: Int?, whereClause: String?): List> { + override fun fetchData(tableName: String, limit: Int?, whereClause: String?, selectedAttributes: List?): List> { // Use double-quoting for PostgreSQL identifier safety val quotedTable = "\"${tableName.replace("\"", "")}\"" + val selectPart = if (!selectedAttributes.isNullOrEmpty()) { + selectedAttributes.joinToString(", ") { "\"${it.replace("\"", "")}\"" } + } else { + "*" + } val wherePart = if (!whereClause.isNullOrBlank()) " WHERE $whereClause" else "" val query = if (limit != null) { - "SELECT * FROM $quotedTable$wherePart LIMIT $limit" + "SELECT $selectPart FROM $quotedTable$wherePart LIMIT $limit" } else { - "SELECT * FROM $quotedTable$wherePart" + "SELECT $selectPart FROM $quotedTable$wherePart" } return getConnection().use { conn -> conn.prepareStatement(query).use { stmt -> diff --git a/backend/src/main/kotlin/com/opendatamask/application/service/DestinationSchemaService.kt b/backend/src/main/kotlin/com/opendatamask/application/service/DestinationSchemaService.kt index f026733..aeb6261 100644 --- a/backend/src/main/kotlin/com/opendatamask/application/service/DestinationSchemaService.kt +++ b/backend/src/main/kotlin/com/opendatamask/application/service/DestinationSchemaService.kt @@ -77,11 +77,29 @@ class DestinationSchemaService { sourceType: ConnectionType, destConnector: DatabaseConnector, destType: ConnectionType, - tableName: String + tableName: String, + selectedAttributes: List = emptyList() ) { logger.info("Mirroring schema for table: $tableName ($sourceType -> $destType)") val sourceColumns = sourceConnector.listColumns(tableName) - val destColumns = sourceColumns.map { col -> + val filteredColumns = if (selectedAttributes.isEmpty()) { + sourceColumns + } else { + val sourceColumnNames = sourceColumns.map { it.name } + val sourceColumnNamesNormalized = sourceColumnNames.map { it.lowercase() }.toSet() + val normalizedSelected = selectedAttributes.map { it.lowercase() }.toSet() + val matched = sourceColumns.filter { it.name.lowercase() in normalizedSelected } + if (matched.isEmpty()) { + val unknownAttributes = selectedAttributes.filter { it.lowercase() !in sourceColumnNamesNormalized } + throw IllegalArgumentException( + "No selected attributes matched source columns for table '$tableName'. " + + "Unknown attributes: ${if (unknownAttributes.isEmpty()) selectedAttributes else unknownAttributes}. " + + "Available columns: $sourceColumnNames" + ) + } + matched + } + val destColumns = filteredColumns.map { col -> ColumnInfo( name = col.name, type = mapColumnType(col.type, sourceType, destType), diff --git a/backend/src/main/kotlin/com/opendatamask/application/service/JobService.kt b/backend/src/main/kotlin/com/opendatamask/application/service/JobService.kt index 8c9ab10..b93ee0a 100644 --- a/backend/src/main/kotlin/com/opendatamask/application/service/JobService.kt +++ b/backend/src/main/kotlin/com/opendatamask/application/service/JobService.kt @@ -180,10 +180,13 @@ class JobService( continue } addLog(job.id, "Mirroring schema for table: ${tableConfig.tableName}", LogLevel.INFO) + val selectedAttrs = tableConfig.selectedAttributes + ?.split(",")?.map { it.trim() }?.filter { it.isNotBlank() } ?: emptyList() destinationSchemaService.mirrorSchema( sourceConnector, sourceConn.type, destConnector, destConn.type, - tableConfig.tableName + tableConfig.tableName, + selectedAttrs ) processTable(job.id, tableConfig, sourceConnector, destConnector, subsetRows) } @@ -217,16 +220,19 @@ class JobService( ) { addLog(jobId, "Processing table: ${tableConfig.tableName} (mode: ${tableConfig.mode})", LogLevel.INFO) + val selectedAttrs = tableConfig.selectedAttributes + ?.split(",")?.map { it.trim() }?.filter { it.isNotBlank() }?.takeIf { it.isNotEmpty() } + when (tableConfig.mode) { TableMode.PASSTHROUGH -> { - val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt()) + val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt(), null, selectedAttrs) addLog(jobId, "Fetched ${data.size} rows from ${tableConfig.tableName}", LogLevel.INFO) val written = destConnector.writeData(tableConfig.tableName, data) addLog(jobId, "Wrote $written rows to destination ${tableConfig.tableName}", LogLevel.INFO) } TableMode.MASK -> { val generators = columnGeneratorRepository.findByTableConfigurationId(tableConfig.id) - val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt()) + val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt(), null, selectedAttrs) addLog( jobId, "Masking ${data.size} rows in ${tableConfig.tableName} with ${generators.size} generator(s)", @@ -249,7 +255,8 @@ class JobService( ?: sourceConnector.fetchData( tableConfig.tableName, tableConfig.rowLimit?.toInt(), - tableConfig.whereClause + tableConfig.whereClause, + selectedAttrs ) addLog(jobId, "Subsetting ${data.size} rows from ${tableConfig.tableName}", LogLevel.INFO) val written = destConnector.writeData(tableConfig.tableName, data) @@ -257,7 +264,7 @@ class JobService( } TableMode.UPSERT -> { val generators = columnGeneratorRepository.findByTableConfigurationId(tableConfig.id) - val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt(), tableConfig.whereClause) + val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt(), tableConfig.whereClause, selectedAttrs) addLog(jobId, "Upserting ${data.size} rows in ${tableConfig.tableName} with ${generators.size} generator(s)", LogLevel.INFO) val maskedData = if (generators.isNotEmpty()) data.map { row -> generatorService.applyGenerators(row, generators) } else data val written = destConnector.writeData(tableConfig.tableName, maskedData) diff --git a/backend/src/main/kotlin/com/opendatamask/application/service/TableConfigurationService.kt b/backend/src/main/kotlin/com/opendatamask/application/service/TableConfigurationService.kt index 100414f..347a0b6 100644 --- a/backend/src/main/kotlin/com/opendatamask/application/service/TableConfigurationService.kt +++ b/backend/src/main/kotlin/com/opendatamask/application/service/TableConfigurationService.kt @@ -26,6 +26,10 @@ class TableConfigurationService( rowLimit = request.rowLimit, whereClause = request.whereClause ) + config.selectedAttributes = request.selectedAttributes + ?.filter { it.isNotBlank() } + ?.takeIf { it.isNotEmpty() } + ?.joinToString(",") return tableConfigurationRepository.save(config).toResponse() } @@ -51,6 +55,10 @@ class TableConfigurationService( config.mode = request.mode config.rowLimit = request.rowLimit config.whereClause = request.whereClause + config.selectedAttributes = request.selectedAttributes + ?.filter { it.isNotBlank() } + ?.takeIf { it.isNotEmpty() } + ?.joinToString(",") return tableConfigurationRepository.save(config).toResponse() } @@ -132,6 +140,7 @@ class TableConfigurationService( mode = mode, rowLimit = rowLimit, whereClause = whereClause, + selectedAttributes = selectedAttributes?.split(",")?.map { it.trim() }?.filter { it.isNotBlank() }, createdAt = createdAt ) diff --git a/backend/src/main/kotlin/com/opendatamask/application/service/WorkspaceExportService.kt b/backend/src/main/kotlin/com/opendatamask/application/service/WorkspaceExportService.kt index 5ff1022..5a7c844 100644 --- a/backend/src/main/kotlin/com/opendatamask/application/service/WorkspaceExportService.kt +++ b/backend/src/main/kotlin/com/opendatamask/application/service/WorkspaceExportService.kt @@ -27,6 +27,7 @@ class WorkspaceExportService( mode = table.mode, rowLimit = table.rowLimit, whereClause = table.whereClause, + selectedAttributes = table.selectedAttributes?.split(",")?.map { it.trim() }?.filter { it.isNotBlank() }, columnGenerators = generators.map { gen -> ColumnGeneratorExportDto( columnName = gen.columnName, @@ -63,6 +64,10 @@ class WorkspaceExportService( table.mode = tableDto.mode table.rowLimit = tableDto.rowLimit table.whereClause = tableDto.whereClause + table.selectedAttributes = tableDto.selectedAttributes + ?.filter { it.isNotBlank() } + ?.takeIf { it.isNotEmpty() } + ?.joinToString(",") val savedTable = tableConfigurationRepository.save(table) val existingGenerators = columnGeneratorRepository.findByTableConfigurationId(savedTable.id) diff --git a/backend/src/main/kotlin/com/opendatamask/domain/model/TableConfiguration.kt b/backend/src/main/kotlin/com/opendatamask/domain/model/TableConfiguration.kt index 8da99c1..a7eda0a 100644 --- a/backend/src/main/kotlin/com/opendatamask/domain/model/TableConfiguration.kt +++ b/backend/src/main/kotlin/com/opendatamask/domain/model/TableConfiguration.kt @@ -33,6 +33,10 @@ class TableConfiguration( @Column(length = 4096) var whereClause: String? = null, + // Comma-separated list of column names to include in the extraction (null/empty = all columns). + @Column(length = 2048) + var selectedAttributes: String? = null, + @Column(nullable = false) var createdAt: LocalDateTime = LocalDateTime.now() ) { diff --git a/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/TableConfigurationDto.kt b/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/TableConfigurationDto.kt index 2bf71a7..8dae102 100644 --- a/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/TableConfigurationDto.kt +++ b/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/TableConfigurationDto.kt @@ -17,7 +17,8 @@ data class TableConfigurationRequest( val mode: TableMode, val rowLimit: Long? = null, - val whereClause: String? = null + val whereClause: String? = null, + val selectedAttributes: List? = null ) data class TableConfigurationResponse( @@ -28,6 +29,7 @@ data class TableConfigurationResponse( val mode: TableMode, val rowLimit: Long?, val whereClause: String?, + val selectedAttributes: List?, val createdAt: LocalDateTime ) diff --git a/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/WorkspaceConfigDto.kt b/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/WorkspaceConfigDto.kt index 1a58903..6bae1a9 100644 --- a/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/WorkspaceConfigDto.kt +++ b/backend/src/main/kotlin/com/opendatamask/domain/port/input/dto/WorkspaceConfigDto.kt @@ -16,6 +16,7 @@ data class TableConfigExportDto( val mode: TableMode, val rowLimit: Long?, val whereClause: String?, + val selectedAttributes: List? = null, val columnGenerators: List = emptyList() ) diff --git a/backend/src/main/kotlin/com/opendatamask/domain/port/output/DatabaseConnector.kt b/backend/src/main/kotlin/com/opendatamask/domain/port/output/DatabaseConnector.kt index bcf20da..29fa1dc 100644 --- a/backend/src/main/kotlin/com/opendatamask/domain/port/output/DatabaseConnector.kt +++ b/backend/src/main/kotlin/com/opendatamask/domain/port/output/DatabaseConnector.kt @@ -17,7 +17,7 @@ interface DatabaseConnector { fun testConnection(): Boolean fun listTables(): List fun listColumns(tableName: String): List - fun fetchData(tableName: String, limit: Int? = null, whereClause: String? = null): List> + fun fetchData(tableName: String, limit: Int? = null, whereClause: String? = null, selectedAttributes: List? = null): List> fun createTable(tableName: String, columns: List) fun truncateTable(tableName: String) fun writeData(tableName: String, rows: List>): Int diff --git a/backend/src/test/kotlin/com/opendatamask/adapter/input/rest/TableConfigurationControllerTest.kt b/backend/src/test/kotlin/com/opendatamask/adapter/input/rest/TableConfigurationControllerTest.kt index 338bbc7..8cf0a02 100644 --- a/backend/src/test/kotlin/com/opendatamask/adapter/input/rest/TableConfigurationControllerTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/adapter/input/rest/TableConfigurationControllerTest.kt @@ -79,7 +79,7 @@ class TableConfigurationControllerTest { private fun makeConfigResponse(id: Long = 1L, workspaceId: Long = 1L) = TableConfigurationResponse( id = id, workspaceId = workspaceId, tableName = "users", schemaName = null, mode = TableMode.PASSTHROUGH, rowLimit = null, whereClause = null, - createdAt = LocalDateTime.now() + selectedAttributes = null, createdAt = LocalDateTime.now() ) private fun makeGeneratorResponse(id: Long = 1L, tableConfigId: Long = 1L) = ColumnGeneratorResponse( diff --git a/backend/src/test/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnectorTest.kt b/backend/src/test/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnectorTest.kt index 482ee2f..d78ca39 100644 --- a/backend/src/test/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnectorTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/adapter/output/connector/PostgreSQLConnectorTest.kt @@ -81,4 +81,37 @@ class PostgreSQLConnectorTest { val rows = connector.fetchData("test_users") assertTrue(rows.isEmpty()) } + + @Test + fun `fetchData with selectedAttributes returns only requested columns`() { + val rows = connector.fetchData("test_users", selectedAttributes = listOf("id", "name")) + assertEquals(2, rows.size) + // Each row should only contain id and name keys (case may vary in H2) + val firstRow = rows[0] + val keys = firstRow.keys.map { it.lowercase() }.toSet() + assertTrue(keys.contains("id") || keys.contains("ID")) + assertTrue(keys.contains("name") || keys.contains("NAME")) + // email column should not be present + assertFalse(keys.contains("email")) + } + + @Test + fun `fetchData with empty selectedAttributes returns all columns`() { + val rows = connector.fetchData("test_users", selectedAttributes = emptyList()) + assertEquals(2, rows.size) + val keys = rows[0].keys.map { it.lowercase() }.toSet() + assertTrue(keys.contains("id") || keys.contains("ID")) + assertTrue(keys.contains("name") || keys.contains("NAME")) + assertTrue(keys.contains("email") || keys.contains("EMAIL")) + } + + @Test + fun `fetchData with selectedAttributes and whereClause filters both rows and columns`() { + val rows = connector.fetchData("test_users", whereClause = "name = 'Alice'", selectedAttributes = listOf("id")) + assertEquals(1, rows.size) + val keys = rows[0].keys.map { it.lowercase() }.toSet() + // Only id should be returned + assertTrue(keys.size == 1) + assertTrue(keys.contains("id") || keys.contains("ID")) + } } diff --git a/backend/src/test/kotlin/com/opendatamask/application/service/DestinationSchemaServiceTest.kt b/backend/src/test/kotlin/com/opendatamask/application/service/DestinationSchemaServiceTest.kt index 0d276dd..b956193 100644 --- a/backend/src/test/kotlin/com/opendatamask/application/service/DestinationSchemaServiceTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/application/service/DestinationSchemaServiceTest.kt @@ -235,4 +235,109 @@ class DestinationSchemaServiceTest { } } } + + // ── mirrorSchema with selectedAttributes ────────────────────────────── + + @Test + fun `mirrorSchema filters columns to selectedAttributes subset`() { + val sourceConnector = mock() + val destConnector = mock() + val columns = listOf( + ColumnInfo("id", "int4", false), + ColumnInfo("name", "text", true), + ColumnInfo("email", "text", true), + ColumnInfo("salary", "float8", true) + ) + whenever(sourceConnector.listColumns("users")).thenReturn(columns) + + service.mirrorSchema( + sourceConnector, ConnectionType.POSTGRESQL, + destConnector, ConnectionType.POSTGRESQL, + "users", + listOf("id", "name") + ) + + verify(destConnector).createTable( + eq("users"), + argThat { cols -> + cols.size == 2 && + cols.any { it.name == "id" } && + cols.any { it.name == "name" } && + cols.none { it.name == "email" } && + cols.none { it.name == "salary" } + } + ) + } + + @Test + fun `mirrorSchema uses all columns when selectedAttributes is empty`() { + val sourceConnector = mock() + val destConnector = mock() + val columns = listOf( + ColumnInfo("id", "int4", false), + ColumnInfo("name", "text", true) + ) + whenever(sourceConnector.listColumns("users")).thenReturn(columns) + + service.mirrorSchema( + sourceConnector, ConnectionType.POSTGRESQL, + destConnector, ConnectionType.POSTGRESQL, + "users", + emptyList() + ) + + verify(destConnector).createTable( + eq("users"), + argThat { cols -> cols.size == 2 } + ) + } + + @Test + fun `mirrorSchema column filtering is case-insensitive`() { + val sourceConnector = mock() + val destConnector = mock() + val columns = listOf( + ColumnInfo("Id", "int4", false), + ColumnInfo("NAME", "text", true), + ColumnInfo("email", "text", true) + ) + whenever(sourceConnector.listColumns("users")).thenReturn(columns) + + service.mirrorSchema( + sourceConnector, ConnectionType.POSTGRESQL, + destConnector, ConnectionType.POSTGRESQL, + "users", + listOf("id", "name") + ) + + verify(destConnector).createTable( + eq("users"), + argThat { cols -> + cols.size == 2 && + cols.none { it.name == "email" } + } + ) + } + + @Test + fun `mirrorSchema throws when selectedAttributes matches no source columns`() { + val sourceConnector = mock() + val destConnector = mock() + val columns = listOf( + ColumnInfo("id", "int4", false), + ColumnInfo("name", "text", true) + ) + whenever(sourceConnector.listColumns("users")).thenReturn(columns) + + val ex = assertThrows { + service.mirrorSchema( + sourceConnector, ConnectionType.POSTGRESQL, + destConnector, ConnectionType.POSTGRESQL, + "users", + listOf("nonexistent_col") + ) + } + assertTrue(ex.message!!.contains("No selected attributes matched")) + assertTrue(ex.message!!.contains("nonexistent_col")) + } } diff --git a/backend/src/test/kotlin/com/opendatamask/application/service/JobServiceTest.kt b/backend/src/test/kotlin/com/opendatamask/application/service/JobServiceTest.kt index a570b7f..1930bd3 100644 --- a/backend/src/test/kotlin/com/opendatamask/application/service/JobServiceTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/application/service/JobServiceTest.kt @@ -244,7 +244,7 @@ class JobServiceTest { whenever(mockSrc.testConnection()).thenReturn(true) whenever(mockDst.testConnection()).thenReturn(true) whenever(tableConfigurationRepository.findByWorkspaceId(1L)).thenReturn(listOf(tableConfig)) - whenever(mockSrc.fetchData(eq("users"), anyOrNull(), anyOrNull())).thenReturn(rows) + whenever(mockSrc.fetchData(eq("users"), anyOrNull(), anyOrNull(), anyOrNull())).thenReturn(rows) whenever(mockDst.writeData(eq("users"), eq(rows))).thenReturn(1) jobService.runJob(1L) @@ -280,7 +280,7 @@ class JobServiceTest { whenever(mockDst.testConnection()).thenReturn(true) whenever(tableConfigurationRepository.findByWorkspaceId(1L)).thenReturn(listOf(tableConfig)) whenever(columnGeneratorRepository.findByTableConfigurationId(1L)).thenReturn(listOf(generator)) - whenever(mockSrc.fetchData(eq("users"), anyOrNull(), anyOrNull())).thenReturn(listOf(original)) + whenever(mockSrc.fetchData(eq("users"), anyOrNull(), anyOrNull(), anyOrNull())).thenReturn(listOf(original)) whenever(generatorService.applyGenerators(original, listOf(generator))).thenReturn(masked) whenever(mockDst.writeData("users", listOf(masked))).thenReturn(1) @@ -370,12 +370,12 @@ class JobServiceTest { whenever(mockSrc.testConnection()).thenReturn(true) whenever(mockDst.testConnection()).thenReturn(true) whenever(tableConfigurationRepository.findByWorkspaceId(1L)).thenReturn(listOf(tableConfig)) - whenever(mockSrc.fetchData(eq("users"), anyOrNull(), eq("age > 18"))).thenReturn(filtered) + whenever(mockSrc.fetchData(eq("users"), anyOrNull(), eq("age > 18"), anyOrNull())).thenReturn(filtered) whenever(mockDst.writeData("users", filtered)).thenReturn(1) jobService.runJob(1L) - verify(mockSrc).fetchData("users", null, "age > 18") + verify(mockSrc).fetchData("users", null, "age > 18", null) verify(mockDst).writeData("users", filtered) } diff --git a/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanLogServiceTest.kt b/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanLogServiceTest.kt index d7f83f1..65bba57 100644 --- a/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanLogServiceTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanLogServiceTest.kt @@ -62,7 +62,7 @@ class SensitivityScanLogServiceTest { )).thenReturn(mockConnector) whenever(mockConnector.listTables()).thenReturn(listOf("users")) whenever(mockConnector.listColumns("users")).thenReturn(columns) - whenever(mockConnector.fetchData(eq("users"), eq(100), anyOrNull())).thenReturn(sampleRows) + whenever(mockConnector.fetchData(eq("users"), eq(100), anyOrNull(), anyOrNull())).thenReturn(sampleRows) whenever(columnSensitivityRepository.findByWorkspaceIdAndTableNameAndColumnName(any(), any(), any())) .thenReturn(null) whenever(columnSensitivityRepository.save(any())) @@ -118,7 +118,7 @@ class SensitivityScanLogServiceTest { )).thenReturn(mockConnector) whenever(mockConnector.listTables()).thenReturn(listOf("users")) whenever(mockConnector.listColumns("users")).thenReturn(columns) - whenever(mockConnector.fetchData(eq("users"), eq(100), anyOrNull())).thenReturn(sampleRows) + whenever(mockConnector.fetchData(eq("users"), eq(100), anyOrNull(), anyOrNull())).thenReturn(sampleRows) whenever(columnSensitivityRepository.findByWorkspaceIdAndTableNameAndColumnName(any(), any(), any())) .thenReturn(null) whenever(columnSensitivityRepository.save(any())) diff --git a/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanServiceTest.kt b/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanServiceTest.kt index d733e28..9135c5b 100644 --- a/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanServiceTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/application/service/SensitivityScanServiceTest.kt @@ -159,7 +159,7 @@ class SensitivityScanServiceTest { )).thenReturn(mockConnector) whenever(mockConnector.listTables()).thenReturn(listOf("users")) whenever(mockConnector.listColumns("users")).thenReturn(columns) - whenever(mockConnector.fetchData(eq("users"), eq(100), anyOrNull())).thenReturn(sampleRows) + whenever(mockConnector.fetchData(eq("users"), eq(100), anyOrNull(), anyOrNull())).thenReturn(sampleRows) whenever(columnSensitivityRepository.findByWorkspaceIdAndTableNameAndColumnName( any(), any(), any() )).thenReturn(null) diff --git a/backend/src/test/kotlin/com/opendatamask/application/service/SubsetExecutionServiceTest.kt b/backend/src/test/kotlin/com/opendatamask/application/service/SubsetExecutionServiceTest.kt index 33572bc..ca211b6 100644 --- a/backend/src/test/kotlin/com/opendatamask/application/service/SubsetExecutionServiceTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/application/service/SubsetExecutionServiceTest.kt @@ -66,7 +66,7 @@ class SubsetExecutionServiceTest { whenever(subsetPlanService.buildExecutionPlan(1L)).thenReturn(plan) whenever(fkRepo.findByWorkspaceId(1L)).thenReturn(fks) - whenever(connector.fetchData(eq("customers"), anyOrNull(), anyOrNull())).thenReturn(customerRows) + whenever(connector.fetchData(eq("customers"), anyOrNull(), anyOrNull(), anyOrNull())).thenReturn(customerRows) val result = service.executeSubset(1L, connector) diff --git a/backend/src/test/kotlin/com/opendatamask/application/service/TableConfigurationServiceTest.kt b/backend/src/test/kotlin/com/opendatamask/application/service/TableConfigurationServiceTest.kt index 0576615..cbc6699 100644 --- a/backend/src/test/kotlin/com/opendatamask/application/service/TableConfigurationServiceTest.kt +++ b/backend/src/test/kotlin/com/opendatamask/application/service/TableConfigurationServiceTest.kt @@ -43,8 +43,9 @@ class TableConfigurationServiceTest { private fun makeConfigRequest( tableName: String = "users", - mode: TableMode = TableMode.PASSTHROUGH - ) = TableConfigurationRequest(tableName = tableName, mode = mode) + mode: TableMode = TableMode.PASSTHROUGH, + selectedAttributes: List? = null + ) = TableConfigurationRequest(tableName = tableName, mode = mode, selectedAttributes = selectedAttributes) // ── createTableConfiguration ─────────────────────────────────────────── @@ -79,6 +80,50 @@ class TableConfigurationServiceTest { assertEquals("active = true", response.whereClause) } + @Test + fun `createTableConfiguration persists selectedAttributes as comma-separated string`() { + val saved = makeTableConfig(id = 1L).also { + it.selectedAttributes = "id,name,email" + } + val request = TableConfigurationRequest( + tableName = "users", mode = TableMode.SUBSET, + selectedAttributes = listOf("id", "name", "email") + ) + val captor = argumentCaptor() + whenever(tableConfigurationRepository.save(captor.capture())).thenReturn(saved) + + val response = service.createTableConfiguration(10L, request) + + assertEquals("id,name,email", captor.firstValue.selectedAttributes) + assertEquals(listOf("id", "name", "email"), response.selectedAttributes) + } + + @Test + fun `createTableConfiguration normalizes empty selectedAttributes list to null`() { + val saved = makeTableConfig(id = 1L) + val request = TableConfigurationRequest( + tableName = "users", mode = TableMode.PASSTHROUGH, + selectedAttributes = emptyList() + ) + val captor = argumentCaptor() + whenever(tableConfigurationRepository.save(captor.capture())).thenReturn(saved) + + service.createTableConfiguration(10L, request) + + assertNull(captor.firstValue.selectedAttributes) + } + + @Test + fun `createTableConfiguration returns null selectedAttributes when entity has no value`() { + val saved = makeTableConfig(id = 1L) + val request = TableConfigurationRequest(tableName = "users", mode = TableMode.PASSTHROUGH) + whenever(tableConfigurationRepository.save(any())).thenReturn(saved) + + val response = service.createTableConfiguration(10L, request) + + assertNull(response.selectedAttributes) + } + // ── getTableConfiguration ────────────────────────────────────────────── @Test diff --git a/docs/user-guide.md b/docs/user-guide.md index 9e898a4..ccdbb1a 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -183,9 +183,51 @@ A **Workspace** is an isolated configuration scope. Each workspace has: | `MASK` | Replace column values with generated fake data | | `GENERATE` | Generate a completely new row set | | `PASSTHROUGH` | Copy data without modification | -| `SUBSET` | Copy a filtered/sampled subset of rows | +| `SUBSET` | Copy a filtered/sampled subset of rows using an optional WHERE clause | +| `UPSERT` | Fetch rows from source, optionally mask them, and write to destination | | `SKIP` | Exclude the table from processing | +### Subsetting & Attribute Filtering + +OpenDataMask supports two complementary mechanisms to control which data is extracted: + +#### Record Filtering (WHERE clause) + +Set a SQL `WHERE` clause on any table configuration to restrict which **rows** are extracted from the source. This predicate is pushed down to the source database for optimal performance. + +```json +{ + "tableName": "orders", + "mode": "SUBSET", + "whereClause": "created_at > '2023-01-01' AND tenant_id = 5" +} +``` + +> For MongoDB, the `whereClause` is interpreted as a JSON query filter, e.g. `{"status": "active"}`. + +#### Attribute Filtering (Column Selection) + +Set a `selectedAttributes` list on any table configuration to restrict which **columns** are extracted from the source. OpenDataMask builds a `SELECT col1, col2, ...` query (instead of `SELECT *`) so the filtering happens at the source for maximum performance. + +```json +{ + "tableName": "users", + "mode": "PASSTHROUGH", + "selectedAttributes": ["id", "name", "email"] +} +``` + +When `selectedAttributes` is set: +- Only the listed columns are fetched from the source database. +- The destination table schema is created with only those columns (no schema mismatch). +- Applies to all modes that read from the source: `PASSTHROUGH`, `MASK`, `SUBSET`, and `UPSERT`. + +Leave `selectedAttributes` empty or omit it to include all columns (the default behaviour). + +#### UI Column Picker + +In the **Tables** view, the table configuration modal includes a **Selected Columns** field. Enter a comma-separated list of column names (e.g. `id, name, email`) to enable column-level filtering. Leave the field blank to include all columns. + ### Generator Types OpenDataMask includes 60+ built-in generators. Key examples: diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts index 72f429d..29868dc 100644 --- a/frontend/src/types/index.ts +++ b/frontend/src/types/index.ts @@ -107,6 +107,7 @@ export interface TableConfiguration { tableName: string mode: TableMode whereClause?: string + selectedAttributes?: string[] createdAt: string updatedAt: string columnGenerators: ColumnGenerator[] @@ -118,6 +119,7 @@ export interface TableConfigurationRequest { tableName: string mode: TableMode whereClause?: string + selectedAttributes?: string[] } // ── Column Generator ────────────────────────────────────────────────────── @@ -334,6 +336,7 @@ export interface WorkspaceTableConfigExport { mode: TableMode rowLimit: number | null whereClause: string | null + selectedAttributes: string[] | null columnGenerators: WorkspaceColumnGeneratorExport[] } diff --git a/frontend/src/views/TablesView.vue b/frontend/src/views/TablesView.vue index 14bc243..fff0478 100644 --- a/frontend/src/views/TablesView.vue +++ b/frontend/src/views/TablesView.vue @@ -28,7 +28,8 @@ const tableForm = ref({ connectionId: 0, schemaName: 'public', tableName: '', - mode: TableMode.MASK + mode: TableMode.MASK, + selectedAttributes: [] }) const tableFormError = ref('') const savingTable = ref(false) @@ -77,7 +78,8 @@ function openCreateTable() { connectionId: connections.value[0]?.id ?? 0, schemaName: 'public', tableName: '', - mode: TableMode.MASK + mode: TableMode.MASK, + selectedAttributes: [] } tableFormError.value = '' showTableModal.value = true @@ -90,7 +92,8 @@ function openEditTable(t: TableConfiguration) { schemaName: t.schemaName, tableName: t.tableName, mode: t.mode, - whereClause: t.whereClause + whereClause: t.whereClause, + selectedAttributes: t.selectedAttributes ? [...t.selectedAttributes] : [] } tableFormError.value = '' showTableModal.value = true @@ -104,16 +107,22 @@ async function submitTableForm() { savingTable.value = true tableFormError.value = '' try { + // Normalise: omit the field when no attributes selected (empty list = select all) + const payload: TableConfigurationRequest = { + ...tableForm.value, + selectedAttributes: + tableForm.value.selectedAttributes?.length ? tableForm.value.selectedAttributes : undefined + } if (editingTable.value) { const updated = await tablesApi.updateTableConfig( workspaceId.value, editingTable.value.id, - tableForm.value + payload ) const idx = tables.value.findIndex((t) => t.id === updated.id) if (idx !== -1) tables.value[idx] = { ...tables.value[idx], ...updated } } else { - const created = await tablesApi.createTableConfig(workspaceId.value, tableForm.value) + const created = await tablesApi.createTableConfig(workspaceId.value, payload) tables.value.push({ ...created, columnGenerators: [] }) } showTableModal.value = false @@ -220,6 +229,17 @@ async function deleteColumn(t: TableConfiguration, col: ColumnGenerator) { } // ── Helpers ── +// Computed getter/setter for selected attributes as a comma-separated string in the form +const selectedAttributesText = computed({ + get: () => (tableForm.value.selectedAttributes ?? []).join(', '), + set: (val: string) => { + tableForm.value.selectedAttributes = val + .split(',') + .map((s) => s.trim()) + .filter((s) => s.length > 0) + } +}) + function modeBadgeClass(mode: TableMode) { const map: Record = { [TableMode.PASSTHROUGH]: 'badge-gray', @@ -286,6 +306,9 @@ function paramString(params?: Record) {
Connection: {{ connectionName(table.connectionId) }} | WHERE: {{ table.whereClause }} + + | Columns: {{ table.selectedAttributes.join(', ') }} +
@@ -383,6 +406,16 @@ function paramString(params?: Record) { placeholder="e.g. created_at > '2020-01-01'" /> +
+ + +

Comma-separated list of column names to extract. Leave blank to include all columns.

+