yandex · dimbo4ka · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 3, 2026
diff --git a/ch_backup/logic/table.py b/ch_backup/logic/table.py
@@ -235,6 +235,13 @@ def _load_create_statement_from_disk(table: Table) -> Optional[str]:
             return None
         try:
             return Path(table.metadata_path).read_text("utf-8")
+        except UnicodeDecodeError:
+            logging.warning(
+                'Table "{}"."{}": metadata contains non-UTF-8 bytes, using latin-1 fallback',
+                table.database,
+                table.name,
+            )
+            return Path(table.metadata_path).read_text("latin-1")
         except OSError as e:
             logging.debug(
                 'Cannot load a create statement of the table "{}"."{}": {}',

diff --git a/ch_backup/storage/loader.py b/ch_backup/storage/loader.py
@@ -126,9 +126,7 @@ def upload_files_tarball(
         )
         return remote_path
 
-    def download_data(
-        self, remote_path, is_async=False, encryption=False, encoding="utf-8"
-    ):
+    def download_data(self, remote_path, is_async=False, encryption=False):
 data=query.encode("utf-8"), 
 data=query.encode("utf-8"), 
         """
         Download file from storage and return its content.
 
@@ -139,7 +137,10 @@ def download_data(
         data = self._ploader.download_data(
             remote_path, is_async=is_async, encryption=encryption
         )
-        return data.decode(encoding) if encoding else data
+        try:
+            return data.decode("utf-8")
+        except UnicodeDecodeError:
+            return data.decode("latin-1")
 
     def download_file(
         self,

diff --git a/tests/integration/features/schema_encoding_compatibility.feature b/tests/integration/features/schema_encoding_compatibility.feature
@@ -0,0 +1,63 @@
+Feature: Non-UTF-8 schema encoding support
+
+  Background:
+    Given default configuration
+    And a working s3
+    And a working zookeeper on zookeeper01
+    And a working clickhouse on clickhouse01
+    And a working clickhouse on clickhouse02
+
+  Scenario: Backup and restore multiple tables with correct utf-8 encodings
+    Given we have executed queries on clickhouse01
+    """
+    CREATE DATABASE test_db;
+
+    CREATE TABLE test_db.table_ascii (
+        id Int32,
+        name_ascii String COMMENT 'ascii'
+    ) ENGINE = MergeTree() ORDER BY id;
+
+    CREATE TABLE test_db.table_emoji (
+        id Int32,
+        `name_😈` String COMMENT '😈'
+    ) ENGINE = MergeTree() ORDER BY id;
+
+    CREATE TABLE test_db.table_cyrillic (
+        id Int32,
+        `name_абвгд` String COMMENT 'абвгд'
+    ) ENGINE = MergeTree() ORDER BY id;
+
+    CREATE TABLE test_db.table_chinese (
+        id Int32,
+        `name_试` String COMMENT '试'
+    ) ENGINE = MergeTree() ORDER BY id;
+
+    INSERT INTO test_db.table_ascii VALUES (1, 'test1');
+    INSERT INTO test_db.table_emoji VALUES (2, 'test2');
+    INSERT INTO test_db.table_cyrillic VALUES (3, 'test3');
+    INSERT INTO test_db.table_chinese VALUES (4, 'test4');
+    """
+    When we create clickhouse01 clickhouse backup
+    Then we got the following backups on clickhouse01
+      | num | state   | data_count | link_count |
+      | 0   | created | 4          | 0          |
+    When we restore clickhouse backup #0 to clickhouse02
+    Then clickhouse02 has same schema as clickhouse01
+    And we got same clickhouse data at clickhouse01 clickhouse02
+
+  Scenario: Table with invalid utf-8 characters
+    Given we have created non-UTF-8 test table on clickhouse01
+    When we create clickhouse01 clickhouse backup
+    Then we got the following backups on clickhouse01
+      | num | state   | data_count | link_count |
+      | 0   | created | 1          | 0          |
+    When we restore clickhouse backup #0 to clickhouse02
+    When we execute query on clickhouse02
+    """
+    EXISTS TABLE test_db.table_rus
+    """
+    Then we get response
+    """
+    1
+    """
+
diff --git a/tests/integration/modules/clickhouse.py b/tests/integration/modules/clickhouse.py
@@ -67,11 +67,14 @@ def ping(self) -> None:
         """
         self._query("GET", url="ping")
 
-    def execute(self, query: str) -> None:
+    def execute(self, query: Union[str, bytes]) -> None:
         """
         Execute arbitrary query.
         """
-        self._query("POST", query=query)
+        if isinstance(query, str):
+            self._query("POST", query=query)
+            return
+        self._query("POST", data=query)
 
     def get_response(self, query: str) -> str:
         """

diff --git a/tests/integration/steps/clickhouse.py b/tests/integration/steps/clickhouse.py
@@ -349,3 +349,26 @@ def step_create_multiple_tables(context, table_count, node):
     for i in range(table_count):
         table_schema = schema_template.format(table_number=i)
         ch_client.execute(table_schema)
+
+
+@given("we have created non-UTF-8 test table on {node}")
+def create_non_utf8_table(context, node):
+    """
+    Create table with invalid utf-8 for testing latin-1 fallback
+    """
+    ch_client = ClickhouseClient(context, node)
+    ch_client.execute("CREATE DATABASE IF NOT EXISTS test_db")
+
+    query = b"""
+        CREATE TABLE test_db.table_rus (
+            EventDate DateTime,
+            CounterID UInt32,
+            `\xcf\xf0\xe8\xe2\xe5\xf2` UInt32
+        )
+        ENGINE = MergeTree()
+        PARTITION BY CounterID % 10
+        ORDER BY (CounterID, EventDate)
+    """
+
+    ch_client.execute(query)
+    ch_client.execute("INSERT INTO test_db.table_rus VALUES (toDateTime('17.01.2006 10:03:00'), 2, 3)")