Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class CustomDataMappingService(
columnName = request.columnName,
action = request.action,
maskingStrategy = if (request.action == MappingAction.MASK) request.maskingStrategy else null,
fakeGeneratorType = if (request.action == MappingAction.MASK) request.fakeGeneratorType else null
fakeGeneratorType = if (request.action == MappingAction.MASK) request.fakeGeneratorType else null,
piiRuleParams = if (request.action == MappingAction.MASK) request.piiRuleParams else null
)
return customDataMappingRepository.save(mapping).toResponse()
}
Expand Down Expand Up @@ -64,6 +65,7 @@ class CustomDataMappingService(
mapping.action = request.action
mapping.maskingStrategy = if (request.action == MappingAction.MASK) request.maskingStrategy else null
mapping.fakeGeneratorType = if (request.action == MappingAction.MASK) request.fakeGeneratorType else null
mapping.piiRuleParams = if (request.action == MappingAction.MASK) request.piiRuleParams else null
return customDataMappingRepository.save(mapping).toResponse()
}

Expand All @@ -90,7 +92,8 @@ class CustomDataMappingService(
columnName = entry.columnName,
action = entry.action,
maskingStrategy = if (entry.action == MappingAction.MASK) entry.maskingStrategy else null,
fakeGeneratorType = if (entry.action == MappingAction.MASK) entry.fakeGeneratorType else null
fakeGeneratorType = if (entry.action == MappingAction.MASK) entry.fakeGeneratorType else null,
piiRuleParams = if (entry.action == MappingAction.MASK) entry.piiRuleParams else null
)
}
return mappings.map { customDataMappingRepository.save(it).toResponse() }
Expand Down Expand Up @@ -129,6 +132,7 @@ class CustomDataMappingService(
action = action,
maskingStrategy = maskingStrategy,
fakeGeneratorType = fakeGeneratorType,
piiRuleParams = piiRuleParams,
createdAt = createdAt,
updatedAt = updatedAt
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package com.opendatamask.application.service

import com.opendatamask.domain.model.HashRule
import com.opendatamask.domain.model.PIIMaskingRule
import com.opendatamask.domain.model.PartialMaskRule
import com.opendatamask.domain.model.PassThroughRule
import com.opendatamask.domain.model.RedactRule
import com.opendatamask.domain.port.output.RuleRegistryPort
import org.springframework.stereotype.Service
import java.util.concurrent.ConcurrentHashMap

// Holds all known PIIMaskingRule implementations.
// Built-in non-parameterized rules are registered at construction time.
// RegexRule is not registered as a default instance because it requires caller-supplied
// pattern and replacement parameters; PIIMaskingService constructs RegexRule instances
// directly from piiRuleParams. To override regex behavior globally, registerCustomRule()
// can be used with a concrete PIIMaskingRule that has ruleId "regex" or any custom ID.
// Call registerCustomRule() to inject additional business-specific rules at runtime.
@Service
class DefaultRuleRegistry : RuleRegistryPort {

private val registry = ConcurrentHashMap<String, PIIMaskingRule>()

init {
register(PassThroughRule())
register(RedactRule())
register(PartialMaskRule())
register(HashRule())
}
Comment on lines +24 to +29
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DefaultRuleRegistry does not register a regex rule ID, even though RegexRule is modeled as a built-in rule (and docs/UI may expect discoverability via getAllRuleIds()). Either include regex in the registry (which likely requires changing the registry to handle parameterized rules) or document/rename the registry so it’s clearly only for non-parameterized/custom rules.

Copilot uses AI. Check for mistakes.

private fun register(rule: PIIMaskingRule) {
registry[rule.ruleId] = rule
}

override fun getRule(ruleId: String): PIIMaskingRule? = registry[ruleId]

override fun getAllRuleIds(): Set<String> = registry.keys.toSet()

override fun registerCustomRule(rule: PIIMaskingRule) = register(rule)
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class JobService(
private val destinationSchemaService: DestinationSchemaService,
private val postJobActionService: PostJobActionService,
private val schemaChangeService: SchemaChangeService,
private val webhookService: WebhookService
private val webhookService: WebhookService,
private val piiMaskingService: PIIMaskingService
) : JobUseCase {
@org.springframework.beans.factory.annotation.Autowired(required = false)
private var subsetExecutionService: SubsetExecutionService? = null
Expand Down Expand Up @@ -178,7 +179,7 @@ class JobService(
tableConfig.tableName,
selectedAttrs
)
processTable(job.id, tableConfig, sourceConnector, destConnector, subsetRows)
processTable(job.id, tableConfig, sourceConnector, destConnector, subsetRows, job.workspaceId, sourceConn.id)
}

updateJobStatus(job, JobStatus.COMPLETED)
Expand Down Expand Up @@ -206,7 +207,9 @@ class JobService(
tableConfig: TableConfiguration,
sourceConnector: DatabaseConnector,
destConnector: DatabaseConnector,
preComputedRows: Map<String, List<Map<String, Any?>>> = emptyMap()
preComputedRows: Map<String, List<Map<String, Any?>>> = emptyMap(),
workspaceId: Long? = null,
sourceConnectionId: Long? = null
) {
addLog(jobId, "Processing table: ${tableConfig.tableName} (mode: ${tableConfig.mode})", LogLevel.INFO)

Expand All @@ -217,7 +220,12 @@ class JobService(
TableMode.PASSTHROUGH -> {
val data = sourceConnector.fetchData(tableConfig.tableName, tableConfig.rowLimit?.toInt(), null, selectedAttrs)
addLog(jobId, "Fetched ${data.size} rows from ${tableConfig.tableName}", LogLevel.INFO)
val written = destConnector.writeData(tableConfig.tableName, data)
val transformed = if (workspaceId != null && sourceConnectionId != null) {
piiMaskingService.applyMappingsToRows(workspaceId, sourceConnectionId, tableConfig.tableName, data)
} else {
data
}
val written = destConnector.writeData(tableConfig.tableName, transformed)
addLog(jobId, "Wrote $written rows to destination ${tableConfig.tableName}", LogLevel.INFO)
}
TableMode.MASK -> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package com.opendatamask.application.service

import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
import com.opendatamask.domain.model.CustomDataMapping
import com.opendatamask.domain.model.HashRule
import com.opendatamask.domain.model.MappingAction
import com.opendatamask.domain.model.MaskingStrategy
import com.opendatamask.domain.model.PartialMaskRule
import com.opendatamask.domain.model.RegexRule
import com.opendatamask.domain.port.output.CustomDataMappingPort
import com.opendatamask.domain.port.output.RuleRegistryPort
import org.slf4j.LoggerFactory
import org.springframework.stereotype.Service

// Transforms source data rows by applying the PIIMaskingRule selected by each
// CustomDataMapping entry. Columns without a mapping pass through unchanged.
@Service
class PIIMaskingService(
private val ruleRegistry: RuleRegistryPort,
private val customDataMappingPort: CustomDataMappingPort
) {
private val logger = LoggerFactory.getLogger(PIIMaskingService::class.java)
private val mapper = jacksonObjectMapper()

// Load mappings once and apply to every row in a batch. Prefer this over
// the convenience overload (applyMappings with workspaceId/connectionId/tableName/row)
// to avoid N+1 persistence queries when processing multiple rows for the same table.
fun applyMappingsToRows(
workspaceId: Long,
connectionId: Long,
tableName: String,
rows: List<Map<String, Any?>>
): List<Map<String, Any?>> {
val mappings = loadMappings(workspaceId, connectionId, tableName)
if (mappings.isEmpty()) return rows
return rows.map { row -> applyMappings(mappings, row) }
}

// Load the per-column mapping index for a table; result can be reused across rows.
fun loadMappings(
workspaceId: Long,
connectionId: Long,
tableName: String
): Map<String, CustomDataMapping> =
customDataMappingPort
.findByWorkspaceIdAndConnectionIdAndTableName(workspaceId, connectionId, tableName)
.associateBy { it.columnName.lowercase() }

// Apply pre-fetched mappings to a single row. Column name matching is case-insensitive.
fun applyMappings(
mappings: Map<String, CustomDataMapping>,
row: Map<String, Any?>
): Map<String, Any?> {
if (mappings.isEmpty()) return row
return row.mapValues { (column, value) ->
val mapping = mappings[column.lowercase()] ?: return@mapValues value
when (mapping.action) {
MappingAction.MIGRATE_AS_IS -> value
MappingAction.MASK -> applyStrategy(mapping, value)
}
}
}

// Convenience overload that fetches mappings from persistence on each call.
// Use applyMappingsToRows() instead when processing multiple rows for the same table.
fun applyMappings(
workspaceId: Long,
connectionId: Long,
tableName: String,
row: Map<String, Any?>
): Map<String, Any?> =
applyMappings(loadMappings(workspaceId, connectionId, tableName), row)

private fun applyStrategy(mapping: CustomDataMapping, value: Any?): Any? {
val params = parseParams(mapping.piiRuleParams) ?: return value

// Allow a custom rule to be invoked by specifying its ruleId in params.
// This enables runtime-registered rules to be used from any mapping strategy slot.
val customRuleId = params["ruleId"]?.takeIf { it.isNotBlank() }
if (customRuleId != null) {
val customRule = ruleRegistry.getRule(customRuleId)
if (customRule != null) return customRule.mask(value)
logger.warn("PII rule '{}' not found in registry — passing value through unchanged", customRuleId)
return value
}

return when (mapping.maskingStrategy) {
MaskingStrategy.NULL -> null
MaskingStrategy.REDACT -> ruleRegistry.getRule("redact")?.mask(value) ?: "[REDACTED]"
MaskingStrategy.HASH -> {
val salt = params["salt"] ?: ""
HashRule(salt).mask(value)
}
MaskingStrategy.PARTIAL_MASK -> {
val keepFirst = params["keepFirst"]?.toIntOrNull() ?: 0
val keepLast = params["keepLast"]?.toIntOrNull() ?: 4
val maskChar = params["maskChar"]?.firstOrNull() ?: '*'
PartialMaskRule(keepFirst, keepLast, maskChar).mask(value)
}
MaskingStrategy.REGEX -> {
val pattern = params["pattern"]
val replacement = params["replacement"]
if (pattern.isNullOrBlank() || replacement == null) {
logger.warn(
"REGEX strategy for column '{}' is missing 'pattern' or 'replacement' in piiRuleParams — passing value through unchanged",
mapping.columnName
)
return value
}
runCatching { RegexRule(pattern, replacement).mask(value) }
.getOrElse { cause ->
logger.warn(
"REGEX strategy for column '{}' has invalid pattern '{}' — passing value through unchanged: {}",
mapping.columnName, pattern, cause.message
)
value
}
}
Comment on lines +101 to +119
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

REGEX defaults (pattern = ".*", replacement = "") mean that if piiRuleParams is missing/invalid, the value will be replaced by an empty string (effectively data loss). For safety, treat missing/invalid required params as a no-op (return the original value) or fail fast/log clearly instead of applying these destructive defaults.

Copilot uses AI. Check for mistakes.
// FAKE strategy is handled upstream by the GeneratorService; return value unchanged.
MaskingStrategy.FAKE -> value
null -> value
}
}

Comment on lines +88 to +125
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

applyStrategy() only consults RuleRegistryPort for REDACT; HASH/PARTIAL_MASK/REGEX are instantiated directly. This makes registerCustomRule() unusable for job-time mappings and contradicts the stated “registry pattern” approach. Consider using the registry for strategy dispatch (including custom rule IDs in piiRuleParams) or adjusting the design/docs so it’s clear which strategies are registry-driven.

Suggested change
return when (mapping.maskingStrategy) {
MaskingStrategy.NULL -> null
MaskingStrategy.REDACT -> ruleRegistry.getRule("redact")?.mask(value) ?: "[REDACTED]"
MaskingStrategy.HASH -> {
val salt = params["salt"] ?: ""
HashRule(salt).mask(value)
}
MaskingStrategy.PARTIAL_MASK -> {
val keepFirst = params["keepFirst"]?.toIntOrNull() ?: 0
val keepLast = params["keepLast"]?.toIntOrNull() ?: 4
val maskChar = params["maskChar"]?.firstOrNull() ?: '*'
PartialMaskRule(keepFirst, keepLast, maskChar).mask(value)
}
MaskingStrategy.REGEX -> {
val pattern = params["pattern"] ?: ".*"
val replacement = params["replacement"] ?: ""
RegexRule(pattern, replacement).mask(value)
}
// FAKE strategy is handled upstream by the GeneratorService; return value unchanged.
MaskingStrategy.FAKE -> value
null -> value
}
}
params["ruleId"]
?.takeIf { it.isNotBlank() }
?.let { ruleId ->
applyRegistryRule(ruleId, value)?.let { return it }
}
return when (mapping.maskingStrategy) {
MaskingStrategy.NULL -> null
MaskingStrategy.REDACT -> applyRegistryRule("redact", value) ?: "[REDACTED]"
MaskingStrategy.HASH -> applyRegistryRule("hash", value) ?: applyHashRule(params, value)
MaskingStrategy.PARTIAL_MASK -> applyRegistryRule("partial_mask", value) ?: applyPartialMaskRule(params, value)
MaskingStrategy.REGEX -> applyRegistryRule("regex", value) ?: applyRegexRule(params, value)
// FAKE strategy is handled upstream by the GeneratorService; return value unchanged.
MaskingStrategy.FAKE -> value
null -> value
}
}
private fun applyRegistryRule(ruleId: String, value: Any?): Any? =
ruleRegistry.getRule(ruleId)?.mask(value)
private fun applyHashRule(params: Map<String, String>, value: Any?): Any? {
val salt = params["salt"] ?: ""
return HashRule(salt).mask(value)
}
private fun applyPartialMaskRule(params: Map<String, String>, value: Any?): Any? {
val keepFirst = params["keepFirst"]?.toIntOrNull() ?: 0
val keepLast = params["keepLast"]?.toIntOrNull() ?: 4
val maskChar = params["maskChar"]?.firstOrNull() ?: '*'
return PartialMaskRule(keepFirst, keepLast, maskChar).mask(value)
}
private fun applyRegexRule(params: Map<String, String>, value: Any?): Any? {
val pattern = params["pattern"] ?: ".*"
val replacement = params["replacement"] ?: ""
return RegexRule(pattern, replacement).mask(value)
}

Copilot uses AI. Check for mistakes.
// Returns null (and logs a warning) when piiRuleParams is present but cannot be parsed as JSON.
// Returns an empty map when piiRuleParams is absent/blank (treated as "no extra config").
private fun parseParams(json: String?): Map<String, String>? {
if (json.isNullOrBlank()) return emptyMap()
return runCatching { mapper.readValue<Map<String, String>>(json) }
.getOrElse { cause ->
logger.warn("Failed to parse piiRuleParams JSON '{}': {} — passing value through unchanged", json, cause.message)
null
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ enum class MappingAction {
}

enum class MaskingStrategy {
FAKE, HASH, NULL
FAKE, HASH, NULL, REDACT, PARTIAL_MASK, REGEX
}

@Entity
Expand Down Expand Up @@ -50,6 +50,9 @@ class CustomDataMapping(
@Column
var fakeGeneratorType: GeneratorType? = null,

@Column(name = "pii_rule_params", columnDefinition = "TEXT")
var piiRuleParams: String? = null,

@Column(nullable = false)
var createdAt: LocalDateTime = LocalDateTime.now(),

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package com.opendatamask.domain.model

import java.security.MessageDigest

// Core interface every PII masking rule must implement.
// Each implementation must carry a stable, unique ruleId that identifies it
// in the registry and in JSON mapping configurations.
interface PIIMaskingRule {
val ruleId: String
fun mask(input: Any?): Any?
}

// Sealed base class for built-in rules so exhaustive when-expressions can be used
// when dispatch on rule type is needed in the application layer.
sealed class BuiltInPIIRule(override val ruleId: String) : PIIMaskingRule

// Passes the value through unchanged. Used as the default for unmapped columns.
class PassThroughRule : BuiltInPIIRule("pass_through") {
override fun mask(input: Any?): Any? = input
}

// Replaces every non-null value with the literal token [REDACTED].
class RedactRule : BuiltInPIIRule("redact") {
override fun mask(input: Any?): Any? = if (input == null) null else "[REDACTED]"
}

// Masks the middle characters of a string, preserving a configurable number of
// leading and trailing characters. Useful for partial credit-card or email masking.
// When the input is shorter than keepFirst + keepLast, the original value is returned unchanged.
// Negative values for keepFirst or keepLast are treated as zero.
class PartialMaskRule(
val keepFirst: Int = 0,
val keepLast: Int = 4,
val maskChar: Char = '*'
) : BuiltInPIIRule("partial_mask") {
override fun mask(input: Any?): Any? {
if (input == null) return null
val str = input.toString()
val safeKeepFirst = keepFirst.coerceAtLeast(0)
val safeKeepLast = keepLast.coerceAtLeast(0)
if (str.length <= safeKeepFirst + safeKeepLast) return str
val maskLen = str.length - safeKeepFirst - safeKeepLast
return str.take(safeKeepFirst) + maskChar.toString().repeat(maskLen) + str.takeLast(safeKeepLast)
}
}

// Produces a deterministic SHA-256 hex digest of the input, with an optional salt.
class HashRule(val salt: String = "") : BuiltInPIIRule("hash") {
override fun mask(input: Any?): Any? {
if (input == null) return null
val digest = MessageDigest.getInstance("SHA-256")
val bytes = digest.digest(("$salt${input}").toByteArray(Charsets.UTF_8))
return bytes.joinToString("") { "%02x".format(it) }
}
}

// Applies a user-supplied regular expression to the string representation of the value,
// replacing every match with the given replacement string.
// Throws IllegalArgumentException at construction time if pattern is not a valid regex.
class RegexRule(val pattern: String, val replacement: String) : BuiltInPIIRule("regex") {
private val compiledRegex: Regex = runCatching { Regex(pattern) }
.getOrElse { cause ->
throw IllegalArgumentException(
"Invalid regex pattern: \"$pattern\"",
cause
)
}

override fun mask(input: Any?): Any? {
if (input == null) return null
return compiledRegex.replace(input.toString(), replacement)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ data class CustomDataMappingRequest(

val maskingStrategy: MaskingStrategy? = null,

val fakeGeneratorType: GeneratorType? = null
val fakeGeneratorType: GeneratorType? = null,

// JSON string carrying strategy-specific parameters.
// HashRule: {"salt":"..."}
// PartialMaskRule: {"keepFirst":"2","keepLast":"4","maskChar":"*"}
// RegexRule: {"pattern":"\\d","replacement":"#"}
val piiRuleParams: String? = null
)

data class BulkCustomDataMappingRequest(
Expand All @@ -46,7 +52,9 @@ data class BulkCustomDataMappingRequest(

val maskingStrategy: MaskingStrategy? = null,

val fakeGeneratorType: GeneratorType? = null
val fakeGeneratorType: GeneratorType? = null,

val piiRuleParams: String? = null
)
}

Expand All @@ -59,6 +67,7 @@ data class CustomDataMappingResponse(
val action: MappingAction,
val maskingStrategy: MaskingStrategy?,
val fakeGeneratorType: GeneratorType?,
val piiRuleParams: String?,
val createdAt: LocalDateTime,
val updatedAt: LocalDateTime
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.opendatamask.domain.port.output

import com.opendatamask.domain.model.PIIMaskingRule

// Driven port: a registry that maps rule IDs to PIIMaskingRule implementations.
// The default implementation is provided by DefaultRuleRegistry in the application layer.
// Custom rules can be registered at runtime via registerCustomRule().
interface RuleRegistryPort {
fun getRule(ruleId: String): PIIMaskingRule?
fun getAllRuleIds(): Set<String>
fun registerCustomRule(rule: PIIMaskingRule)
}
Loading