From c2f7bd0eb6a44db9de38d88996b9676353e8e182 Mon Sep 17 00:00:00 2001 From: keyliang Date: Mon, 2 Mar 2026 22:52:40 +0800 Subject: [PATCH] Fix: Explicit UTF-8 encoding for cross-platform file reading On Windows (especially Chinese/Asian locales) and some other systems, the default file encoding is not UTF-8. This causes UnicodeDecodeError when reading session files that contain non-ASCII characters. This fix explicitly specifies encoding="utf-8" in all open() and read_text() calls when reading JSON files. Changes: - Line 82: Add encoding="utf-8" to read_text() in _extract_project_path_from_sessions() - Line 124: Add encoding="utf-8" to open() in _iter_jsonl() - Line 266: Add encoding="utf-8" to read_text() in _load_kimi_work_dirs() --- dataclaw/parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dataclaw/parser.py b/dataclaw/parser.py index 50cc39e..b7f8977 100644 --- a/dataclaw/parser.py +++ b/dataclaw/parser.py @@ -79,7 +79,7 @@ def _extract_project_path_from_sessions(project_hash: str) -> str | None: return None for session_file in sorted(chats_dir.glob("session-*.json"), reverse=True): try: - data = json.loads(session_file.read_text()) + data = json.loads(session_file.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError): continue for msg in data.get("messages", []): @@ -121,7 +121,7 @@ def _resolve_gemini_hash(project_hash: str) -> str: def _iter_jsonl(filepath: Path): """Yield parsed JSON objects from a JSONL file, skipping blank/malformed lines.""" - with open(filepath) as f: + with open(filepath, encoding="utf-8") as f: for line in f: line = line.strip() if not line: @@ -263,7 +263,7 @@ def _load_kimi_work_dirs() -> dict[str, str]: if not KIMI_CONFIG_PATH.exists(): return {} try: - data = json.loads(KIMI_CONFIG_PATH.read_text()) + data = json.loads(KIMI_CONFIG_PATH.read_text(encoding="utf-8")) work_dirs = data.get("work_dirs", []) return { entry.get("path", ""): entry.get("path", "")