phodal · phodal · Dec 10, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/mpp-core/build.gradle.kts b/mpp-core/build.gradle.kts
@@ -212,6 +212,9 @@ kotlin {
 
                 // JSQLParser for SQL validation and parsing
                 implementation("com.github.jsqlparser:jsqlparser:4.9")
+
+                // MyNLP for Chinese NLP tokenization
+                implementation("com.mayabot.mynlp:mynlp-all:4.0.0")
             }
         }
 

diff --git a/mpp-core/src/androidMain/kotlin/cc/unitmesh/agent/chatdb/NlpTokenizer.android.kt b/mpp-core/src/androidMain/kotlin/cc/unitmesh/agent/chatdb/NlpTokenizer.android.kt
@@ -0,0 +1,23 @@
+package cc.unitmesh.agent.chatdb
+
+/**
+ * Android implementation of NlpTokenizer.
+ * Uses the fallback regex-based tokenization since MyNLP is JVM-only
+ * and may have compatibility issues on Android.
+ * 
+ * TODO: Consider using Android's BreakIterator or a lightweight NLP library for better tokenization.
+ */
+actual object NlpTokenizer {
+    /**
+     * Extract keywords from natural language query using simple tokenization.
+     * Supports both English and Chinese text.
+     * 
+     * @param query The natural language query to tokenize
+     * @param stopWords Set of words to filter out from results
+     * @return List of extracted keywords
+     */
+    actual fun extractKeywords(query: String, stopWords: Set<String>): List<String> {
+        return FallbackNlpTokenizer.extractKeywords(query, stopWords)
+    }
+}
+
diff --git a/mpp-core/src/androidMain/kotlin/cc/unitmesh/agent/subagent/SqlValidator.android.kt b/mpp-core/src/androidMain/kotlin/cc/unitmesh/agent/subagent/SqlValidator.android.kt
@@ -0,0 +1,125 @@
+package cc.unitmesh.agent.subagent
+
+import net.sf.jsqlparser.parser.CCJSqlParserUtil
+import net.sf.jsqlparser.statement.Statement
+import net.sf.jsqlparser.util.TablesNamesFinder
+
+/**
+ * Android implementation of SqlValidator using JSqlParser.
+ *
+ * This validator uses JSqlParser to validate SQL syntax.
+ * It can detect:
+ * - Syntax errors
+ * - Malformed SQL statements
+ * - Unsupported SQL constructs
+ * - Table names not in whitelist (schema validation)
+ */
+actual class SqlValidator actual constructor() : SqlValidatorInterface {
+
+    actual override fun validate(sql: String): SqlValidationResult {
+        return try {
+            val statement: Statement = CCJSqlParserUtil.parse(sql)
+            SqlValidationResult(
+                isValid = true,
+                errors = emptyList(),
+                warnings = collectWarnings(statement)
+            )
+        } catch (e: Exception) {
+            SqlValidationResult(
+                isValid = false,
+                errors = listOf(extractErrorMessage(e)),
+                warnings = emptyList()
+            )
+        }
+    }
+
+    actual override fun validateWithTableWhitelist(sql: String, allowedTables: Set<String>): SqlValidationResult {
+        return try {
+            val statement: Statement = CCJSqlParserUtil.parse(sql)
+
+            // Extract table names from the SQL
+            val tablesNamesFinder = TablesNamesFinder()
+            val usedTables = tablesNamesFinder.getTableList(statement)
+
+            // Check if all used tables are in the whitelist (case-insensitive)
+            val allowedTablesLower = allowedTables.map { it.lowercase() }.toSet()
+            val invalidTables = usedTables.filter { tableName ->
+                tableName.lowercase() !in allowedTablesLower
+            }
+
+            if (invalidTables.isNotEmpty()) {
+                SqlValidationResult(
+                    isValid = false,
+                    errors = listOf(
+                        "Invalid table(s) used: ${invalidTables.joinToString(", ")}. " +
+                        "Available tables: ${allowedTables.joinToString(", ")}"
+                    ),
+                    warnings = collectWarnings(statement)
+                )
+            } else {
+                SqlValidationResult(
+                    isValid = true,
+                    errors = emptyList(),
+                    warnings = collectWarnings(statement)
+                )
+            }
+        } catch (e: Exception) {
+            SqlValidationResult(
+                isValid = false,
+                errors = listOf(extractErrorMessage(e)),
+                warnings = emptyList()
+            )
+        }
+    }
+
+    actual override fun extractTableNames(sql: String): List<String> {
+        return try {
+            val statement: Statement = CCJSqlParserUtil.parse(sql)
+            val tablesNamesFinder = TablesNamesFinder()
+            tablesNamesFinder.getTableList(statement)
+        } catch (e: Exception) {
+            emptyList()
+        }
+    }
+
+    private fun extractErrorMessage(e: Exception): String {
+        val message = e.message ?: "Unknown SQL parsing error"
+        return when {
+            message.contains("Encountered") -> {
+                val match = Regex("Encountered \"(.+?)\" at line (\\d+), column (\\d+)").find(message)
+                if (match != null) {
+                    val (token, line, column) = match.destructured
+                    "Syntax error at line $line, column $column: unexpected token '$token'"
+                } else {
+                    message
+                }
+            }
+            message.contains("Was expecting") -> {
+                val match = Regex("Was expecting.*?:\\s*(.+)").find(message)
+                if (match != null) {
+                    "Expected: ${match.groupValues[1].take(100)}"
+                } else {
+                    message
+                }
+            }
+            else -> message.take(200)
+        }
+    }
+
+    private fun collectWarnings(statement: Statement): List<String> {
+        val warnings = mutableListOf<String>()
+        val sql = statement.toString()
+
+        if (sql.contains("SELECT *")) {
+            warnings.add("Consider specifying explicit columns instead of SELECT *")
+        }
+
+        if (!sql.contains("WHERE", ignoreCase = true) && 
+            (sql.contains("UPDATE", ignoreCase = true) || sql.contains("DELETE", ignoreCase = true))) {
+            warnings.add("UPDATE/DELETE without WHERE clause will affect all rows")
+        }
+
+        return warnings
+    }
+}
+
diff --git a/mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/AgentType.kt b/mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/AgentType.kt
@@ -7,6 +7,8 @@ package cc.unitmesh.agent
  * - LOCAL: Simple local chat mode without heavy tooling
  * - CODING: Local coding agent with full tool access (file system, shell, etc.)
  * - CODE_REVIEW: Dedicated code review agent with git integration
+ * - KNOWLEDGE: Document reader mode for AI-native document reading
+ * - CHAT_DB: Database chat mode for text-to-SQL interactions
  * - REMOTE: Remote agent connected to mpp-server
  */
 enum class AgentType {
@@ -30,6 +32,11 @@ enum class AgentType {
      */
     KNOWLEDGE,
 
+    /**
+     * Database chat mode - text-to-SQL agent for database queries
+     */
+    CHAT_DB,
+
     /**
      * Remote agent mode - connects to remote mpp-server for distributed execution
      */
@@ -40,6 +47,7 @@ enum class AgentType {
         CODING -> "Agentic"
         CODE_REVIEW -> "Review"
         KNOWLEDGE -> "Knowledge"
+        CHAT_DB -> "ChatDB"
         REMOTE -> "Remote"
     }
 
@@ -51,6 +59,7 @@ enum class AgentType {
                 "coding" -> CODING
                 "codereview" -> CODE_REVIEW
                 "documentreader", "documents" -> KNOWLEDGE
+                "chatdb", "database" -> CHAT_DB
                 else -> LOCAL_CHAT
             }
         }

diff --git a/mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/chatdb/ChatDBAgent.kt b/mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/chatdb/ChatDBAgent.kt
@@ -0,0 +1,172 @@
+package cc.unitmesh.agent.chatdb
+
+import cc.unitmesh.agent.config.McpToolConfigService
+import cc.unitmesh.agent.core.MainAgent
+import cc.unitmesh.agent.database.DatabaseConfig
+import cc.unitmesh.agent.database.DatabaseConnection
+import cc.unitmesh.agent.database.createDatabaseConnection
+import cc.unitmesh.agent.logging.getLogger
+import cc.unitmesh.agent.model.AgentDefinition
+import cc.unitmesh.agent.model.PromptConfig
+import cc.unitmesh.agent.model.RunConfig
+import cc.unitmesh.agent.orchestrator.ToolOrchestrator
+import cc.unitmesh.agent.policy.DefaultPolicyEngine
+import cc.unitmesh.agent.render.CodingAgentRenderer
+import cc.unitmesh.agent.render.DefaultCodingAgentRenderer
+import cc.unitmesh.agent.tool.shell.DefaultShellExecutor
+import cc.unitmesh.agent.tool.shell.ShellExecutor
+import cc.unitmesh.agent.tool.ToolResult
+import cc.unitmesh.agent.tool.filesystem.DefaultToolFileSystem
+import cc.unitmesh.agent.tool.filesystem.ToolFileSystem
+import cc.unitmesh.agent.tool.registry.ToolRegistry
+import cc.unitmesh.llm.KoogLLMService
+import cc.unitmesh.llm.ModelConfig
+
+/**
+ * ChatDB Agent - Text2SQL Agent for natural language database queries
+ * 
+ * This agent converts natural language queries to SQL, executes them,
+ * and optionally generates visualizations of the results.
+ * 
+ * Features:
+ * - Schema Linking: Keyword-based search to find relevant tables/columns
+ * - SQL Generation: LLM generates SQL from natural language
+ * - Revise Agent: Self-correction loop using JSqlParser for SQL validation
+ * - Query Execution: Execute validated SQL and return results
+ * - Visualization: Optional PlotDSL generation for data visualization
+ * 
+ * Based on GitHub Issue #508: https://github.com/phodal/auto-dev/issues/508
+ */
+class ChatDBAgent(
+    private val projectPath: String,
+    private val llmService: KoogLLMService,
+    private val databaseConfig: DatabaseConfig,
+    override val maxIterations: Int = 10,
+    private val renderer: CodingAgentRenderer = DefaultCodingAgentRenderer(),
+    private val fileSystem: ToolFileSystem? = null,
+    private val shellExecutor: ShellExecutor? = null,
+    private val mcpToolConfigService: McpToolConfigService,
+    private val enableLLMStreaming: Boolean = true
+) : MainAgent<ChatDBTask, ToolResult.AgentResult>(
+    AgentDefinition(
+        name = "ChatDBAgent",
+        displayName = "ChatDB Agent",
+        description = "Text2SQL Agent that converts natural language to SQL queries with schema linking and self-correction",
+        promptConfig = PromptConfig(
+            systemPrompt = SYSTEM_PROMPT
+        ),
+        modelConfig = ModelConfig.default(),
+        runConfig = RunConfig(maxTurns = 10, maxTimeMinutes = 5)
+    )
+) {
+    private val logger = getLogger("ChatDBAgent")
+
+    private val actualFileSystem = fileSystem ?: DefaultToolFileSystem(projectPath = projectPath)
+
+    private val toolRegistry = ToolRegistry(
+        fileSystem = actualFileSystem,
+        shellExecutor = shellExecutor ?: DefaultShellExecutor(),
+        configService = mcpToolConfigService,
+        llmService = llmService
+    )
+
+    private val policyEngine = DefaultPolicyEngine()
+
+    private val toolOrchestrator = ToolOrchestrator(
+        registry = toolRegistry,
+        policyEngine = policyEngine,
+        renderer = renderer,
+        mcpConfigService = mcpToolConfigService
+    )
+
+    private var databaseConnection: DatabaseConnection? = null
+
+    private val executor: ChatDBAgentExecutor by lazy {
+        val connection = databaseConnection ?: createDatabaseConnection(databaseConfig)
+        databaseConnection = connection
+
+        ChatDBAgentExecutor(
+            projectPath = projectPath,
+            llmService = llmService,
+            toolOrchestrator = toolOrchestrator,
+            renderer = renderer,
+            databaseConnection = connection,
+            maxIterations = maxIterations,
+            enableLLMStreaming = enableLLMStreaming
+        )
+    }
+
+    override fun validateInput(input: Map<String, Any>): ChatDBTask {
+        val query = input["query"] as? String
+            ?: throw IllegalArgumentException("Missing required parameter: query")
+
+        return ChatDBTask(
+            query = query,
+            additionalContext = input["additionalContext"] as? String ?: "",
+            maxRows = (input["maxRows"] as? Number)?.toInt() ?: 100,
+            generateVisualization = input["generateVisualization"] as? Boolean ?: true
+        )
+    }
+
+    override suspend fun execute(
+        input: ChatDBTask,
+        onProgress: (String) -> Unit
+    ): ToolResult.AgentResult {
+        logger.info { "Starting ChatDB Agent for query: ${input.query}" }
+
+        val systemPrompt = buildSystemPrompt()
+        val result = executor.execute(input, systemPrompt, onProgress)
+
+        return ToolResult.AgentResult(
+            success = result.success,
+            content = result.message,
+            metadata = mapOf(
+                "generatedSql" to (result.generatedSql ?: ""),
+                "rowCount" to (result.queryResult?.rowCount?.toString() ?: "0"),
+                "revisionAttempts" to result.revisionAttempts.toString(),
+                "hasVisualization" to (result.plotDslCode != null).toString()
+            )
+        )
+    }
+
+    private fun buildSystemPrompt(): String {
+        return SYSTEM_PROMPT
+    }
+
+    override fun formatOutput(output: ToolResult.AgentResult): String {
+        return output.content
+    }
+
+    override fun getParameterClass(): String = "ChatDBTask"
+
+    /**
+     * Close database connection when done
+     */
+    suspend fun close() {
+        databaseConnection?.close()
+        databaseConnection = null
+    }
+
+    companion object {
+        const val SYSTEM_PROMPT = """You are an expert SQL developer. Generate SQL queries from natural language.
+
+CRITICAL RULES - YOU MUST FOLLOW THESE:
+1. ONLY use table names provided in the schema - NEVER invent or guess table names
+2. ONLY use column names provided in the schema - NEVER invent or guess column names
+3. If a table or column doesn't exist in the schema, DO NOT use it
+4. Only generate SELECT queries (read-only operations)
+5. Always add LIMIT clause to prevent large result sets
+
+OUTPUT FORMAT:
+- Return ONLY the SQL query wrapped in ```sql code block
+- Do NOT include explanations, alternatives, or reasoning
+- Do NOT add comments outside the code block
+- Keep response concise - just the SQL
+
+Example response:
+```sql
+SELECT id, name FROM users WHERE status = 'active' LIMIT 100;
+```"""
+    }
+}
+