diff --git a/connector_oxigesti/__manifest__.py b/connector_oxigesti/__manifest__.py index e89e0c302..3942caabb 100644 --- a/connector_oxigesti/__manifest__.py +++ b/connector_oxigesti/__manifest__.py @@ -4,7 +4,7 @@ { "name": "Oxigesti-Odoo connector", - "version": "14.0.1.1.11", + "version": "14.0.1.1.13", "author": "NuoBiT Solutions, S.L.", "license": "AGPL-3", "category": "Connector", diff --git a/connector_oxigesti/components/adapter.py b/connector_oxigesti/components/adapter.py index 4ba11fa84..4eb68892d 100644 --- a/connector_oxigesti/components/adapter.py +++ b/connector_oxigesti/components/adapter.py @@ -64,6 +64,24 @@ def api_handle_errors(message=""): ) +@contextmanager +def mssql_connection_retryable(): + """Re-raise transient MSSQL connection errors as NetworkRetryableError. + + Without this, a MSSQL restart or network blip while an export job is + running raises ``pymssql.OperationalError`` / ``InterfaceError``, which + queue_job does not recognize as retryable — the job moves straight to + ``failed`` on its first attempt and the per-backend ``since_date`` + cursor has already advanced past it, making the missing record + invisible to subsequent cron cycles. Wrapping as + ``NetworkRetryableError`` lets the configured ``retry_pattern`` apply. + """ + try: + yield + except (pymssql.OperationalError, pymssql.InterfaceError) as err: + raise NetworkRetryableError("MSSQL connection error: %s" % err) from err + + class CRUDAdapter(AbstractComponent): """External Records Adapter for Oxigesti""" @@ -157,14 +175,15 @@ def _exec_sql(self, sql, params, as_dict=False, commit=False): # Convert params params = self._convert_dict(params, to_backend=True) # Execute sql - conn = self.conn() - cr = conn.cursor(as_dict=as_dict) - cr.execute(sql, params) - res = cr.fetchall() - if commit: - conn.commit() - cr.close() - conn.close() + with mssql_connection_retryable(): + conn = self.conn() + cr = conn.cursor(as_dict=as_dict) + cr.execute(sql, params) + res = cr.fetchall() + if commit: + conn.commit() + cr.close() + conn.close() # Convert result if as_dict: for r in res: @@ -313,26 +332,27 @@ def write(self, _id, values_d): # pylint: disable=W8106 params[k9] = v params = self._convert_dict(params, to_backend=True) - conn = self.conn() - cr = conn.cursor() - cr.execute(sql, params) # pylint: disable=E8103 - count = cr.rowcount - if count == 0: - raise Exception( - _( - "Impossible to update external record with ID '%s': " - "Register not found on Backend" + with mssql_connection_retryable(): + conn = self.conn() + cr = conn.cursor() + cr.execute(sql, params) # pylint: disable=E8103 + count = cr.rowcount + if count == 0: + raise Exception( + _( + "Impossible to update external record with ID '%s': " + "Register not found on Backend" + ) + % (id_d,) ) - % (id_d,) - ) - elif count > 1: - conn.rollback() - raise pymssql.IntegrityError( - "Unexpected error: Returned more the one row with ID: %s" % (id_d,) - ) - conn.commit() - cr.close() - conn.close() + elif count > 1: + conn.rollback() + raise pymssql.IntegrityError( + "Unexpected error: Returned more the one row with ID: %s" % (id_d,) + ) + conn.commit() + cr.close() + conn.close() return count @@ -428,26 +448,28 @@ def delete(self, _id): params = dict(zip(self._id, _id)) params = self._convert_dict(params, to_backend=True) - conn = self.conn() - cr = conn.cursor() - cr.execute(sql, params) # pylint: disable=E8103 - count = cr.rowcount - if count == 0: - raise Exception( - _( - "Impossible to delete external record with ID '%s': " - "Register not found on Backend" + with mssql_connection_retryable(): + conn = self.conn() + cr = conn.cursor() + cr.execute(sql, params) # pylint: disable=E8103 + count = cr.rowcount + if count == 0: + raise Exception( + _( + "Impossible to delete external record with ID '%s': " + "Register not found on Backend" + ) + % (params,) ) - % (params,) - ) - elif count > 1: - conn.rollback() - raise pymssql.IntegrityError( - "Unexpected error: Returned more the one row with ID: %s" % (params,) - ) - conn.commit() - cr.close() - conn.close() + elif count > 1: + conn.rollback() + raise pymssql.IntegrityError( + "Unexpected error: Returned more the one row with ID: %s" + % (params,) + ) + conn.commit() + cr.close() + conn.close() return count diff --git a/connector_oxigesti/models/oxigesti_binding/common.py b/connector_oxigesti/models/oxigesti_binding/common.py index ec164a0ec..68042f5e8 100644 --- a/connector_oxigesti/models/oxigesti_binding/common.py +++ b/connector_oxigesti/models/oxigesti_binding/common.py @@ -9,6 +9,15 @@ from ...common.tools import idhash +# Max attempts for a job before it transitions to ``state='failed'``. +# Combined with ``retry_pattern`` on every queue.job.function of this +# connector (``{1: 10, 5: 30, 10: 60, 15: 300}``) this yields a retry +# window of ~33 minutes — enough to cover transient MSSQL restarts, +# VPN blips and short planned maintenance on the Oxigesti server +# without spamming it. OCA queue_job default is 5 which clipped the +# progression at the first bucket (10s × 4 ≈ 40s). +MAX_RETRIES_NETWORK = 20 + class OxigestiBinding(models.AbstractModel): _name = "oxigesti.binding" @@ -17,6 +26,26 @@ class OxigestiBinding(models.AbstractModel): active = fields.Boolean(default=True) + def with_delay( + self, + priority=None, + eta=None, + max_retries=None, + description=None, + channel=None, + identity_key=None, + ): + if max_retries is None: + max_retries = MAX_RETRIES_NETWORK + return super().with_delay( + priority=priority, + eta=eta, + max_retries=max_retries, + description=description, + channel=channel, + identity_key=identity_key, + ) + backend_id = fields.Many2one( comodel_name="oxigesti.backend", string="Oxigesti Backend", diff --git a/connector_oxigesti/tests/__init__.py b/connector_oxigesti/tests/__init__.py new file mode 100644 index 000000000..3d20c6a7d --- /dev/null +++ b/connector_oxigesti/tests/__init__.py @@ -0,0 +1,2 @@ +from . import test_mssql_retryable +from . import test_with_delay_max_retries diff --git a/connector_oxigesti/tests/test_mssql_retryable.py b/connector_oxigesti/tests/test_mssql_retryable.py new file mode 100644 index 000000000..653a4777f --- /dev/null +++ b/connector_oxigesti/tests/test_mssql_retryable.py @@ -0,0 +1,292 @@ +# Copyright 2026 NuoBiT Solutions SL - Eric Antones +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +import pymssql + +from odoo import _ +from odoo.exceptions import UserError, ValidationError +from odoo.tests.common import TransactionCase, tagged + +from odoo.addons.connector.exception import NetworkRetryableError + +from ..components.adapter import api_handle_errors, mssql_connection_retryable + + +@tagged("post_install", "-at_install") +class TestMssqlConnectionRetryable(TransactionCase): + """MSSQL connection errors must be retryable by queue_job. + + Reproduces the incident #2 from task #2548: during an MSSQL outage the + adapter used to leak raw ``pymssql.OperationalError`` / ``InterfaceError`` + through the queue_job worker, which does not recognize them as + retryable, so every in-flight export job went straight to ``failed`` + on its first attempt and the per-backend ``since_date`` cursor had + already advanced past them. + """ + + def _make_unreachable_backend(self): + return self.env["oxigesti.backend"].create( + { + "name": "Test unreachable", + "server": "127.0.0.1", + "port": 1, + "database": "irrelevant", + "schema": "dbo", + "username": "irrelevant", + "password": "irrelevant", + "lang_id": self.env.ref("base.lang_en").id, + "tz": "UTC", + "chunk_size": 0, + } + ) + + # --- contract of the contextmanager itself ------------------------- + + def test_operational_error_is_wrapped_as_retryable(self): + with self.assertRaises(NetworkRetryableError): + with mssql_connection_retryable(): + raise pymssql.OperationalError( + "(6005) Adaptive Server is unavailable or does not exist" + ) + + def test_interface_error_is_wrapped_as_retryable(self): + with self.assertRaises(NetworkRetryableError): + with mssql_connection_retryable(): + raise pymssql.InterfaceError("Cannot connect to server") + + def test_integrity_error_is_not_wrapped(self): + """Data-integrity errors must stay as-is — they are not transient + and queue_job should not retry them.""" + with self.assertRaises(pymssql.IntegrityError): + with mssql_connection_retryable(): + raise pymssql.IntegrityError(2627, "PK violation") + + def test_internal_error_is_not_wrapped(self): + """Internal errors (programming/data contract) are not transient.""" + with self.assertRaises(pymssql.InternalError): + with mssql_connection_retryable(): + raise pymssql.InternalError("bad internal state") + + def test_programming_error_is_not_wrapped(self): + """Programming errors (bad SQL, missing columns, etc.) must not + be silently retried — they need fixing, not retrying.""" + with self.assertRaises(pymssql.ProgrammingError): + with mssql_connection_retryable(): + raise pymssql.ProgrammingError("invalid column name") + + def test_database_error_base_class_is_not_wrapped(self): + """The wrapper is deliberately narrow: it catches the concrete + transient classes (``OperationalError``, ``InterfaceError``) and + lets the ``DatabaseError`` base class flow through, so any future + data-integrity / data-truncation error raised as bare + ``DatabaseError`` is not accidentally converted to a retry.""" + with self.assertRaises(pymssql.DatabaseError): + with mssql_connection_retryable(): + raise pymssql.DatabaseError("ambiguous error") + + def test_unrelated_exception_passthrough(self): + """Non-pymssql errors flow through the wrapper untouched.""" + with self.assertRaises(ValueError): + with mssql_connection_retryable(): + raise ValueError("unrelated") + + def test_no_exception_no_side_effects(self): + """Happy path: the contextmanager must be transparent when no + exception is raised.""" + marker = [] + with mssql_connection_retryable(): + marker.append("ok") + self.assertEqual(marker, ["ok"]) + + def test_cause_chain_preserved(self): + """The original pymssql error must be preserved in ``__cause__`` + so the queue_job exc_info keeps the forensic trail.""" + try: + with mssql_connection_retryable(): + raise pymssql.OperationalError("(6005) SHUTDOWN is in progress") + except NetworkRetryableError as err: + self.assertIsInstance(err.__cause__, pymssql.OperationalError) + else: + self.fail("NetworkRetryableError was not raised") + + # --- production-observed error variants (task #2548, 2026-04-13) --- + + def test_error_variant_adaptive_server_unavailable(self): + """66 of the 120 failed jobs had this exact variant.""" + with self.assertRaises(NetworkRetryableError): + with mssql_connection_retryable(): + raise pymssql.OperationalError( + 2, + b"Adaptive Server is unavailable or does not exist " + b"(SV-BCN-SC-SQ-0S) / Connection refused (111)", + ) + + def test_error_variant_general_sql_server_error(self): + """50 of the 120 failed jobs had this exact variant.""" + with self.assertRaises(NetworkRetryableError): + with mssql_connection_retryable(): + raise pymssql.OperationalError( + 20003, + b"General SQL Server error: Check messages... " + b"Adaptive Server connection failed", + ) + + def test_error_variant_read_from_server_failed(self): + """Unexpected EOF during an in-flight read.""" + with self.assertRaises(NetworkRetryableError): + with mssql_connection_retryable(): + raise pymssql.OperationalError(20004, b"Read from the server failed") + + def test_error_variant_shutdown_in_progress(self): + """The MSSQL SHUTDOWN window (2026-04-13 06:30).""" + with self.assertRaises(NetworkRetryableError): + with mssql_connection_retryable(): + raise pymssql.OperationalError(6005, b"SHUTDOWN is in progress") + + # --- interactive path (api_handle_errors) regressions -------------- + + def test_operational_error_becomes_user_error_in_interactive_path(self): + """When the adapter is called from the interactive path the outer + ``api_handle_errors`` context must still surface a UserError + (its first clause catches ``NetworkRetryableError`` and translates).""" + with self.assertRaises(UserError): + with api_handle_errors("Connection failed"): + with mssql_connection_retryable(): + raise pymssql.OperationalError( + "(20003) General SQL Server error: connection failed" + ) + + def test_integrity_error_still_user_error_in_interactive_path(self): + """Non-transient integrity errors must still surface a UserError + on the interactive path (``api_handle_errors`` has a dedicated + clause for them).""" + with self.assertRaises(UserError): + with api_handle_errors("Operation failed"): + with mssql_connection_retryable(): + raise pymssql.IntegrityError(2627, "PK violation") + + # --- end-to-end: real adapter path against unreachable host -------- + + def test_unreachable_backend_get_version_raises_retryable(self): + """An adapter pointed at an unreachable MSSQL host must surface + ``NetworkRetryableError`` from ``get_version()`` — exercises + ``_exec_query`` → ``_exec_sql`` → ``pymssql.connect`` → wrap.""" + backend = self._make_unreachable_backend() + with backend.work_on("oxigesti.backend") as work: + component = work.component_by_name(name="oxigesti.adapter.test") + with self.assertRaises(NetworkRetryableError): + component.get_version() + + def test_unreachable_backend_check_connection_raises_user_error(self): + """``button_check_connection`` is an interactive path: an unreachable + host must surface a ``UserError`` (``api_handle_errors`` wraps our + ``NetworkRetryableError``).""" + backend = self._make_unreachable_backend() + with self.assertRaises(UserError): + backend._check_connection() + + def test_unreachable_backend_write_raises_retryable(self): + """``write`` starts by calling ``_exec_sql`` for the schema check, + which goes through the wrapper — this proves the write call-path + surfaces ``NetworkRetryableError`` instead of a raw pymssql + exception when the host is unreachable.""" + backend = self._make_unreachable_backend() + with backend.work_on("oxigesti.backend") as work: + component = work.component_by_name(name="oxigesti.adapter.test") + with self.assertRaises(NetworkRetryableError): + component.write([1], {"some_field": "x"}) + + def test_unreachable_backend_delete_raises_retryable(self): + """Same as above for ``delete``: the schema-check ``_exec_sql`` + call at the top of ``delete`` surfaces ``NetworkRetryableError`` + when the host is unreachable.""" + backend = self._make_unreachable_backend() + with backend.work_on("oxigesti.backend") as work: + component = work.component_by_name(name="oxigesti.adapter.test") + with self.assertRaises(NetworkRetryableError): + component.delete([1]) + + # --- guard on write/delete count mismatch paths -------------------- + # + # The wrapping in write() and delete() must not swallow the + # count-mismatch integrity checks. Both "count=0 => not found" and + # "count>1 => duplicate" paths raise exceptions inside the + # ``mssql_connection_retryable`` block; those exceptions are NOT + # ``OperationalError`` / ``InterfaceError`` and must propagate. + + def test_count_zero_not_wrapped(self): + """A generic Exception raised inside the wrapper must propagate + as-is — covers the ``count == 0`` path in write/delete.""" + with self.assertRaises(Exception) as ctx: + with mssql_connection_retryable(): + raise Exception("Impossible to update external record") + # Must be the raw Exception, not NetworkRetryableError. + self.assertNotIsInstance(ctx.exception, NetworkRetryableError) + + def test_count_above_one_integrity_error_not_wrapped(self): + """A duplicate-row IntegrityError raised inside the wrapper + must propagate as-is — covers the ``count > 1`` path in + write/delete.""" + with self.assertRaises(pymssql.IntegrityError): + with mssql_connection_retryable(): + raise pymssql.IntegrityError( + "Unexpected error: Returned more the one row with ID: ..." + ) + + # --- guard on create()'s IntegrityError 2627 workaround ------------- + # + # create() has a specific ``except pymssql.IntegrityError`` that converts + # error code 2627 (PK violation on varchar with trailing spaces) into a + # ``ValidationError`` with a user-friendly message. My wrapping is on + # ``_exec_sql`` (inside create), not on create itself, so this workaround + # must keep working unchanged. + + def test_integrity_error_2627_still_raises_validation_error(self): + """Replays the create() flow: IntegrityError(2627) from _exec_sql + must still be caught by create() and re-raised as ValidationError.""" + try: + try: + with mssql_connection_retryable(): + raise pymssql.IntegrityError(2627, "trailing-space PK") + except pymssql.IntegrityError as e: + if e.args[0] == 2627: + raise ValidationError(_("fake PK violation")) + raise + except ValidationError: + pass + else: + self.fail("ValidationError was not raised") + + def test_integrity_error_non_2627_still_reraises(self): + """A non-2627 IntegrityError inside _exec_sql must still reach + create()'s bare ``raise`` branch unchanged — not silently retried.""" + try: + try: + with mssql_connection_retryable(): + raise pymssql.IntegrityError(547, "FK violation") + except pymssql.IntegrityError as e: + if e.args[0] == 2627: + raise ValidationError(_("fake PK")) + raise + except pymssql.IntegrityError as err: + self.assertEqual(err.args[0], 547) + else: + self.fail("IntegrityError was not re-raised") + + # --- paranoia: wrapper must NOT re-raise from its own cleanup ------- + + def test_nested_wrapper_does_not_double_wrap(self): + """Nesting the wrapper (defensive in case helper code uses it + internally) must surface a single NetworkRetryableError, not a + re-wrapped one — contextmanager catches only pymssql classes.""" + try: + with mssql_connection_retryable(): + with mssql_connection_retryable(): + raise pymssql.OperationalError("transient") + except NetworkRetryableError as err: + # The inner wrapper raised NetworkRetryableError, the outer + # saw it, did NOT recognize it as pymssql.OperationalError + # (it's not one), and let it bubble — not a double wrap. + self.assertNotIsInstance(err.__cause__, NetworkRetryableError) + else: + self.fail("NetworkRetryableError was not raised") diff --git a/connector_oxigesti/tests/test_with_delay_max_retries.py b/connector_oxigesti/tests/test_with_delay_max_retries.py new file mode 100644 index 000000000..b0ce0f6dc --- /dev/null +++ b/connector_oxigesti/tests/test_with_delay_max_retries.py @@ -0,0 +1,79 @@ +# Copyright 2026 NuoBiT Solutions SL - Eric Antones +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +from odoo.tests.common import TransactionCase, tagged + +from ..models.oxigesti_binding.common import MAX_RETRIES_NETWORK + + +@tagged("post_install", "-at_install") +class TestWithDelayMaxRetries(TransactionCase): + """The ``retry_pattern`` declared on every oxigesti ``queue.job.function`` + (``{1: 10, 5: 30, 10: 60, 15: 300}``) is designed for up to ~20 attempts. + OCA queue_job's ``DEFAULT_MAX_RETRIES`` is 5, which clipped every + oxigesti job at the first bucket (10s x 4 = ~40s window) and turned the + longer buckets (30s, 60s, 300s) into dead code. + + This test suite pins the override on ``OxigestiBinding.with_delay`` that + defaults ``max_retries`` to ``MAX_RETRIES_NETWORK`` (20) so the declared + pattern actually runs, giving a ~33-minute retry window per job — + enough to absorb transient MSSQL restarts, VPN blips and short planned + maintenance on the Oxigesti server. + """ + + def test_default_max_retries_matches_oxigesti_constant(self): + """Without an explicit ``max_retries=``, the oxigesti binding must + inject ``MAX_RETRIES_NETWORK`` (20) instead of the OCA default (5).""" + delayable = self.env["oxigesti.res.partner"].with_delay() + self.assertEqual(delayable.delayable.max_retries, MAX_RETRIES_NETWORK) + + def test_explicit_max_retries_is_respected(self): + """An explicit ``max_retries=N`` on the call site must win over the + oxigesti default — e.g. for one-shot diagnostic enqueues.""" + delayable = self.env["oxigesti.res.partner"].with_delay(max_retries=7) + self.assertEqual(delayable.delayable.max_retries, 7) + + def test_explicit_zero_max_retries_is_respected(self): + """``max_retries=0`` means infinite retries in queue_job — the + oxigesti default must not silently replace it with 20.""" + delayable = self.env["oxigesti.res.partner"].with_delay(max_retries=0) + self.assertEqual(delayable.delayable.max_retries, 0) + + def test_other_kwargs_forwarded(self): + """The override must pass ``priority``, ``eta``, ``description``, + ``channel`` and ``identity_key`` through to the parent ``with_delay`` + untouched.""" + delayable = self.env["oxigesti.res.partner"].with_delay( + priority=42, + description="probe", + channel="root.oxigesti", + identity_key="probe-identity", + ) + self.assertEqual(delayable.delayable.priority, 42) + self.assertEqual(delayable.delayable.description, "probe") + self.assertEqual(delayable.delayable.channel, "root.oxigesti") + self.assertEqual(delayable.delayable.identity_key, "probe-identity") + # And the injected default is still there alongside them. + self.assertEqual(delayable.delayable.max_retries, MAX_RETRIES_NETWORK) + + def test_default_applied_to_multiple_binding_models(self): + """The override lives on the abstract ``oxigesti.binding`` model, so + every concrete binding (partners, products, lots, sale orders, etc.) + must inherit the same default without per-model wiring.""" + for model in ( + "oxigesti.res.partner", + "oxigesti.product.product", + "oxigesti.stock.production.lot", + "oxigesti.sale.order", + ): + with self.subTest(model=model): + delayable = self.env[model].with_delay() + self.assertEqual(delayable.delayable.max_retries, MAX_RETRIES_NETWORK) + + def test_non_oxigesti_model_keeps_oca_default(self): + """The override must be scoped to oxigesti bindings — generic Odoo + models (e.g. ``res.partner``) keep the OCA ``DEFAULT_MAX_RETRIES`` + behaviour of ``max_retries=None`` until ``with_delay`` wires it to + ``DEFAULT_MAX_RETRIES`` at Job construction time.""" + delayable = self.env["res.partner"].with_delay() + self.assertIsNone(delayable.delayable.max_retries)