diff --git a/ooniapi/common/src/common/alembic/versions/b860eb79750f_add_targets_name_and_inputs_extra_.py b/ooniapi/common/src/common/alembic/versions/b860eb79750f_add_targets_name_and_inputs_extra_.py new file mode 100644 index 000000000..831ff0640 --- /dev/null +++ b/ooniapi/common/src/common/alembic/versions/b860eb79750f_add_targets_name_and_inputs_extra_.py @@ -0,0 +1,36 @@ +"""Add targets_name and inputs_extra columns to oonirun_nettest + +Revision ID: b860eb79750f +Revises: 8e7ecea5c2f5 +Create Date: 2025-05-21 15:44:32.959349 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "b860eb79750f" +down_revision: Union[str, None] = "8e7ecea5c2f5" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "oonirun_nettest", sa.Column("targets_name", sa.String(), nullable=True) + ) + op.add_column( + "oonirun_nettest", sa.Column("inputs_extra", sa.ARRAY(sa.JSON()), nullable=True) + ) + op.drop_column("oonirun_nettest", "backend_options") + + +def downgrade() -> None: + op.drop_column("oonirun_nettest", "targets_name") + op.drop_column("oonirun_nettest", "inputs_extra") + + op.add_column("oonirun_nettest", sa.Column("backend_options", sa.ARRAY(sa.JSON()), nullable=True)) \ No newline at end of file diff --git a/ooniapi/common/src/common/postgresql.py b/ooniapi/common/src/common/postgresql.py index c3359c46e..647d95933 100644 --- a/ooniapi/common/src/common/postgresql.py +++ b/ooniapi/common/src/common/postgresql.py @@ -9,4 +9,5 @@ class Base(DeclarativeBase): Dict[str, Any]: sa.JSON, List[str]: sa.JSON, Dict[str, str]: sa.JSON, + List[Dict[str, Any]] : sa.ARRAY(sa.JSON()) } diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/prio.py b/ooniapi/common/src/common/prio.py similarity index 97% rename from ooniapi/services/ooniprobe/src/ooniprobe/prio.py rename to ooniapi/common/src/common/prio.py index b84a9e42d..689395b0e 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/prio.py +++ b/ooniapi/common/src/common/prio.py @@ -1,3 +1,4 @@ + """ OONI Probe Services API - reactive URL prioritization @@ -24,8 +25,8 @@ from typing import List, Tuple import logging -from .common.clickhouse_utils import query_click -from .common.metrics import timer +from .clickhouse_utils import query_click +from .metrics import timer from clickhouse_driver import Client as Clickhouse import sqlalchemy as sa @@ -154,4 +155,4 @@ def generate_test_list( if debug: return out, entries, prio_rules - return out, (), () + return out, (), () \ No newline at end of file diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 23a301825..90076e52d 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -20,7 +20,7 @@ from ...common.auth import create_jwt, decode_jwt, jwt from ...common.config import Settings from ...common.utils import setnocacheresponse -from ...prio import generate_test_list +from ...common.prio import generate_test_list router = APIRouter(prefix="/v1") diff --git a/ooniapi/services/ooniprobe/tests/test_prio.py b/ooniapi/services/ooniprobe/tests/test_prio.py index ba1a85b67..362e7066a 100644 --- a/ooniapi/services/ooniprobe/tests/test_prio.py +++ b/ooniapi/services/ooniprobe/tests/test_prio.py @@ -1,4 +1,4 @@ -from ooniprobe import prio +from ooniprobe.common import prio def test_prio(): diff --git a/ooniapi/services/oonirun/pyproject.toml b/ooniapi/services/oonirun/pyproject.toml index 0dc165c5d..958be2c39 100644 --- a/ooniapi/services/oonirun/pyproject.toml +++ b/ooniapi/services/oonirun/pyproject.toml @@ -58,7 +58,7 @@ packages = ["src/oonirun"] artifacts = ["BUILD_LABEL"] [tool.hatch.envs.default] -dependencies = ["pytest", "pytest-cov", "click", "black", "pytest-postgresql", "pytest-asyncio"] +dependencies = ["pytest", "pytest-cov", "click", "black", "pytest-postgresql", "pytest-asyncio", "pytest-docker"] path = ".venv/" [tool.hatch.envs.default.scripts] diff --git a/ooniapi/services/oonirun/src/oonirun/dependencies.py b/ooniapi/services/oonirun/src/oonirun/dependencies.py index 84a8c8a55..2ab98729c 100644 --- a/ooniapi/services/oonirun/src/oonirun/dependencies.py +++ b/ooniapi/services/oonirun/src/oonirun/dependencies.py @@ -1,16 +1,20 @@ -from functools import lru_cache from typing import Annotated from fastapi import Depends from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import sessionmaker, Session + +from clickhouse_driver import Client as Clickhouse from .common.config import Settings from .common.dependencies import get_settings -def get_postgresql_session(settings: Annotated[Settings, Depends(get_settings)]): +DependsSettings = Annotated[Settings, Depends(get_settings)] + + +def get_postgresql_session(settings: DependsSettings): engine = create_engine(settings.postgresql_url) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) @@ -19,3 +23,17 @@ def get_postgresql_session(settings: Annotated[Settings, Depends(get_settings)]) yield db finally: db.close() + + +DependsPostgresSession = Annotated[Session, Depends(get_postgresql_session)] + + +def get_clickhouse_session(settings: DependsSettings): + db = Clickhouse.from_url(settings.clickhouse_url) + try: + yield db + finally: + db.disconnect() + + +DependsClickhouseClient = Annotated[Clickhouse, Depends(get_clickhouse_session)] diff --git a/ooniapi/services/oonirun/src/oonirun/main.py b/ooniapi/services/oonirun/src/oonirun/main.py index 90ec41978..bddaa5737 100644 --- a/ooniapi/services/oonirun/src/oonirun/main.py +++ b/ooniapi/services/oonirun/src/oonirun/main.py @@ -11,12 +11,18 @@ from . import models from .routers import v2 -from .dependencies import get_postgresql_session +from .dependencies import ( + DependsPostgresSession, + DependsClickhouseClient, + DependsSettings, +) from .common.dependencies import get_settings from .common.version import get_build_label, get_pkg_version from .common.version import get_build_label, get_pkg_version from .common.metrics import mount_metrics +from .common.clickhouse_utils import query_click +log = logging.getLogger(__name__) pkg_name = "oonirun" @@ -63,10 +69,23 @@ class HealthStatus(BaseModel): @app.get("/health") async def health( - settings=Depends(get_settings), - db=Depends(get_postgresql_session), + settings: DependsSettings, + db: DependsPostgresSession, + clickhouse: DependsClickhouseClient, ): errors = [] + + try: + query = """ + SELECT COUNT() + FROM fastpath + WHERE measurement_start_time < NOW() AND measurement_start_time > NOW() - INTERVAL 3 HOUR + """ + query_click(db=clickhouse, query=query, query_params={}) + except Exception as e: + errors.append("clickhouse_error") + log.error(e) + try: db.query(models.OONIRunLink).limit(1).all() except Exception as exc: diff --git a/ooniapi/services/oonirun/src/oonirun/models.py b/ooniapi/services/oonirun/src/oonirun/models.py index efadc5fc0..abad33909 100644 --- a/ooniapi/services/oonirun/src/oonirun/models.py +++ b/ooniapi/services/oonirun/src/oonirun/models.py @@ -57,7 +57,8 @@ class OONIRunLinkNettest(Base): test_name: Mapped[str] = mapped_column() inputs: Mapped[List[str]] = mapped_column(nullable=True) options: Mapped[Dict[str, Any]] = mapped_column(nullable=True) - backend_options: Mapped[Dict[str, Any]] = mapped_column(nullable=True) is_background_run_enabled_default: Mapped[bool] = mapped_column(default=False) is_manual_run_enabled_default: Mapped[bool] = mapped_column(default=False) + targets_name: Mapped[str] = mapped_column(nullable=True) + inputs_extra: Mapped[List[Dict[str, Any]]] = mapped_column(nullable=True) diff --git a/ooniapi/services/oonirun/src/oonirun/routers/v2.py b/ooniapi/services/oonirun/src/oonirun/routers/v2.py index d823d2880..26438fa3e 100644 --- a/ooniapi/services/oonirun/src/oonirun/routers/v2.py +++ b/ooniapi/services/oonirun/src/oonirun/routers/v2.py @@ -5,15 +5,17 @@ """ from datetime import datetime, timedelta, timezone -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Any +from typing_extensions import Annotated, Self import logging import sqlalchemy as sa from sqlalchemy.orm import Session from fastapi import APIRouter, Depends, Query, HTTPException, Header, Path from pydantic import computed_field, Field -from pydantic.functional_validators import field_validator -from typing_extensions import Annotated +from pydantic.functional_validators import field_validator, model_validator + +from clickhouse_driver.client import Client as Clickhouse from .. import models @@ -22,7 +24,9 @@ from ..common.auth import ( get_account_id_or_none, ) -from ..dependencies import get_postgresql_session +from ..common.prio import generate_test_list +from ..dependencies import DependsPostgresSession, DependsClickhouseClient +from uuid import uuid4 log = logging.getLogger(__name__) @@ -34,15 +38,43 @@ def utcnow_seconds(): return datetime.now(timezone.utc).replace(microsecond=0) +NETWORK_TYPES = [ + "vpn", + "wifi", + "mobile", + "wired_ethernet", + "no_internet", + "bluetooth", + "usb", + "unknown", +] + + +class OonirunMeta(BaseModel): + run_type: str = Field(description="Run type", pattern="^(timed|manual)$") + is_charging: bool = Field(description="If the probe is charging") + probe_asn: str = Field(pattern=r"^(AS)?([0-9]{1,10})$") + probe_cc: str = Field(description="Country code. Ex: VE") + network_type: str = Field( + description="Ex: wifi", pattern=f"^({'|'.join(NETWORK_TYPES)})$" + ) + website_category_codes: List[str] = Field( + description="List of category codes that user has chosen to test (eg. NEWS,HUMR)", + default=[], + ) + + def probe_asn_int(self) -> int: + return int(self.probe_asn.replace("AS", "")) + + class OONIRunLinkNettest(BaseModel): test_name: str = Field( default="", title="name of the ooni nettest", min_length=2, max_length=100 ) - inputs: List[str] = Field( - default=[], title="list of input dictionaries for the nettest" + inputs: Optional[List[str]] = Field( + default=None, title="list of input dictionaries for the nettest" ) options: Dict = Field(default={}, title="options for the nettest") - backend_options: Dict = Field(default={}, title="options to send to the backend") is_background_run_enabled_default: bool = Field( default=False, title="if this test should be enabled by default for background runs", @@ -51,6 +83,40 @@ class OONIRunLinkNettest(BaseModel): default=False, title="if this test should be enabled by default for manual runs" ) + # TODO(luis): Add validation for expected variants of targets_name + targets_name: Optional[str] = Field( + default=None, + description="string used to specify during creation that the input list should be dynamically generated.", + ) + + inputs_extra: Optional[List[Dict[str, Any]]] = Field( + default=None, + description="provides a richer JSON array containing extra parameters for each input. If provided, the length of inputs_extra should match the length of inputs.", + ) + + @model_validator(mode="after") + def validate_inputs_extra(self) -> Self: + if self.inputs_extra is not None and ( + self.inputs is None or len(self.inputs) != len(self.inputs_extra) + ): + raise ValueError( + "When provided, inputs_extra should be the same length as inputs" + ) + return self + + def validate_no_inputs_and_targets_name(self): + """ + Check that you are not providing targets_name and inputs-inputs_extra in the same request + """ + if self.targets_name is not None and ( + self.inputs is not None or self.inputs_extra is not None + ): + raise ValueError( + "When targets_name is provided, you can't provide inputs or inputs_extra" + ) + + return self + class OONIRunLinkEngineDescriptor(BaseModel): revision: str = Field(title="revision of the nettest descriptor") @@ -117,6 +183,7 @@ def validate_intl(cls, v: Dict[str, str]): class OONIRunLink(OONIRunLinkBase): + oonirun_link_id: str date_created: datetime = Field( description="time when the ooni run link was created" @@ -151,9 +218,9 @@ class OONIRunLinkCreateEdit(OONIRunLinkBase): ) def create_oonirun_link( create_request: OONIRunLinkCreateEdit, + db: DependsPostgresSession, token=Depends(role_required(["admin", "user"])), - db=Depends(get_postgresql_session), -): +) -> OONIRunLink: """Create a new oonirun link or a new version for an existing one.""" log.debug("creating oonirun") account_id = token["account_id"] @@ -165,6 +232,12 @@ def create_oonirun_link( detail="email_address must match the email address of the user who created the oonirun link", ) + for nt in create_request.nettests: + try: + nt.validate_no_inputs_and_targets_name() + except ValueError as e: + raise HTTPException(status_code=422, detail={"error": str(e)}) + now = utcnow_seconds() revision = 1 @@ -189,7 +262,7 @@ def create_oonirun_link( test_name=nt.test_name, inputs=nt.inputs, options=nt.options, - backend_options=nt.backend_options, + targets_name=nt.targets_name, is_background_run_enabled_default=nt.is_background_run_enabled_default, is_manual_run_enabled_default=nt.is_manual_run_enabled_default, ) @@ -233,13 +306,19 @@ def create_oonirun_link( def edit_oonirun_link( oonirun_link_id: str, edit_request: OONIRunLinkCreateEdit, + db: DependsPostgresSession, token=Depends(role_required(["admin", "user"])), - db=Depends(get_postgresql_session), ): """Edit an existing OONI Run link""" log.debug(f"edit oonirun {oonirun_link_id}") account_id = token["account_id"] + for nt in edit_request.nettests: + try: + nt.validate_no_inputs_and_targets_name() + except ValueError as e: + raise HTTPException(status_code=422, detail={"error": str(e)}) + now = utcnow_seconds() q = db.query(models.OONIRunLink).filter( @@ -275,10 +354,10 @@ def edit_oonirun_link( assert nt.nettest_index == nettest_index, "inconsistent nettest index" latest_nettests.append( OONIRunLinkNettest( + targets_name=nt.targets_name, test_name=nt.test_name, inputs=nt.inputs, options=nt.options, - backend_options=nt.backend_options, is_background_run_enabled_default=nt.is_background_run_enabled_default, is_manual_run_enabled_default=nt.is_manual_run_enabled_default, ) @@ -288,13 +367,13 @@ def edit_oonirun_link( latest_revision += 1 for nettest_index, nt in enumerate(edit_request.nettests): new_nettest = models.OONIRunLinkNettest( + targets_name=nt.targets_name, revision=latest_revision, nettest_index=nettest_index, date_created=now, test_name=nt.test_name, inputs=nt.inputs, options=nt.options, - backend_options=nt.backend_options, is_background_run_enabled_default=nt.is_background_run_enabled_default, is_manual_run_enabled_default=nt.is_manual_run_enabled_default, oonirun_link=oonirun_link, @@ -335,27 +414,92 @@ def edit_oonirun_link( ) +def make_test_lists_from_targets_name( + targets_name: str, meta: OonirunMeta, clickhouse: Clickhouse +) -> Tuple[List[str], List[Dict[str, Any]]]: + if targets_name == "websites_list_prioritized": + return make_nettest_websites_list_prioritized(meta, clickhouse) + + raise ValueError("Unknown target name: " + targets_name) + + +def make_nettest_websites_list_prioritized( + meta: OonirunMeta, clickhouse: Clickhouse +) -> Tuple[List[str], List[Dict[str, Any]]]: + """Generates an inputs list using prio. + Returns: + Tuple[List[str], List[Dict[str, Any]]]: (Inputs, InputsExtra) + """ + + if meta.run_type == "manual": + url_limit = 9999 # same as prio.py + elif meta.is_charging: + url_limit = 100 + else: + url_limit = 20 + tests, _1, _2 = generate_test_list( + clickhouse, + meta.probe_cc, + meta.website_category_codes, + meta.probe_asn_int(), + url_limit, + False, + ) + + inputs = [] + inputs_extra = [] + for test in tests: + url = test["url"] + del test["url"] + inputs.append(url) + inputs_extra.append(test) + + return inputs, inputs_extra + + def get_nettests( - oonirun_link: models.OONIRunLink, revision: Optional[int] + oonirun_link: models.OONIRunLink, + revision: Optional[int], ) -> Tuple[List[OONIRunLinkNettest], datetime]: + """Computes a list of nettests related to the given oonirun link + + The `meta` parameter is required for the dynamic tests list calculation. If not provided, + it will skip it. + + """ + date_created = oonirun_link.nettests[0].date_created nettests = [] for nt in oonirun_link.nettests: if revision and nt.revision != revision: continue date_created = nt.date_created + inputs, inputs_extra = nt.inputs, nt.inputs_extra + targets_name = nt.targets_name + nettests.append( OONIRunLinkNettest( + targets_name=targets_name, test_name=nt.test_name, - inputs=nt.inputs, + inputs=inputs, options=nt.options, - backend_options=nt.backend_options, + inputs_extra=inputs_extra, is_background_run_enabled_default=nt.is_background_run_enabled_default, is_manual_run_enabled_default=nt.is_manual_run_enabled_default, ) ) return nettests, date_created +def populate_dynamic_lists(nettest : OONIRunLinkNettest, meta : OonirunMeta, clickhouse : Clickhouse): + + if nettest.targets_name is None: + return + + inputs, inputs_extra = make_test_lists_from_targets_name( + nettest.targets_name, meta, clickhouse + ) + nettest.inputs = inputs + nettest.inputs_extra = inputs_extra def make_oonirun_link( db: Session, @@ -411,7 +555,7 @@ class OONIRunLinkRevisions(BaseModel): ) def get_oonirun_link_revisions( oonirun_link_id: str, - db=Depends(get_postgresql_session), + db: DependsPostgresSession, ): """ Obtain the list of revisions for a certain OONI Run link @@ -435,7 +579,10 @@ def get_oonirun_link_revisions( return OONIRunLinkRevisions(revisions=revisions) -@router.get( +USER_AGENT_PATTERN = r"^([a-zA-Z0-9\-\_]+),([a-zA-Z0-9\-\_\.]+),([a-zA-Z0-9\ ]+),([a-zA-Z0-9\-\_]+),([a-zA-Z0-9\-\_\.]+),([a-zA-Z0-9\-\_\.]+)$" + + +@router.post( "/v2/oonirun/links/{oonirun_link_id}/engine-descriptor/{revision_number}", tags=["oonirun"], response_model=OONIRunLinkEngineDescriptor, @@ -451,7 +598,20 @@ def get_oonirun_link_engine_descriptor( }, ), ], - db=Depends(get_postgresql_session), + db: DependsPostgresSession, + clickhouse: DependsClickhouseClient, + meta: OonirunMeta, + useragent: Annotated[ + Optional[str], + Header( + pattern=USER_AGENT_PATTERN, + error_message="Expected format: ,,,,,", + description="Expected format: ,,,,,", + ), + ] = None, + x_ooni_credentials: Annotated[ + Optional[bytes], Header(description="base64 encoded OONI anonymous credentials") + ] = None, ): """Fetch an OONI Run link by specifying the revision number""" try: @@ -461,6 +621,17 @@ def get_oonirun_link_engine_descriptor( assert revision_number == "latest" revision = None + if useragent is not None: + ( + software_name, + software_version, + platform, + engine_name, + engine_version, + engine_version_full, + ) = useragent.split(",") + # TODO Log this metadata + q = db.query(models.OONIRunLink).filter( models.OONIRunLink.oonirun_link_id == oonirun_link_id ) @@ -475,7 +646,11 @@ def get_oonirun_link_engine_descriptor( revision = latest_revision assert isinstance(revision, int) + nettests, date_created = get_nettests(res, revision) + for nt in nettests: + populate_dynamic_lists(nt, meta, clickhouse) + return OONIRunLinkEngineDescriptor( nettests=nettests, date_created=date_created, @@ -499,8 +674,8 @@ def get_oonirun_link_revision( }, ), ], + db: DependsPostgresSession, authorization: str = Header("authorization"), - db=Depends(get_postgresql_session), settings=Depends(get_settings), ): """Fetch an OONI Run link by specifying the revision number""" @@ -528,8 +703,8 @@ def get_oonirun_link_revision( ) def get_latest_oonirun_link( oonirun_link_id: str, + db: DependsPostgresSession, authorization: str = Header("authorization"), - db=Depends(get_postgresql_session), settings=Depends(get_settings), ): """Fetch OONIRun descriptor by creation time or the newest one""" @@ -551,6 +726,7 @@ class OONIRunLinkList(BaseModel): @router.get("/v2/oonirun/links", tags=["oonirun"]) def list_oonirun_links( + db: DependsPostgresSession, is_mine: Annotated[ Optional[bool], Query(description="List only the my descriptors"), @@ -560,7 +736,6 @@ def list_oonirun_links( Query(description="List also expired descriptors"), ] = None, authorization: str = Header("authorization"), - db=Depends(get_postgresql_session), settings=Depends(get_settings), ) -> OONIRunLinkList: """List OONIRun descriptors""" @@ -579,7 +754,10 @@ def list_oonirun_links( assert ( row.nettests[-1].revision <= revision ), "nettests must be sorted by revision" + + # if revision is None, it will get all the nettests, including from old revisions nettests, _ = get_nettests(row, revision) + oonirun_link = OONIRunLink( oonirun_link_id=row.oonirun_link_id, name=row.name, diff --git a/ooniapi/services/oonirun/tests/conftest.py b/ooniapi/services/oonirun/tests/conftest.py index 4c6f716ee..eaaa17c9e 100644 --- a/ooniapi/services/oonirun/tests/conftest.py +++ b/ooniapi/services/oonirun/tests/conftest.py @@ -1,5 +1,9 @@ import pathlib +from pathlib import Path import pytest +import json +import random +from datetime import datetime, timedelta, UTC import time import jwt @@ -7,8 +11,12 @@ from fastapi.testclient import TestClient from oonirun.common.config import Settings +from oonirun.common.clickhouse_utils import insert_click from oonirun.common.dependencies import get_settings from oonirun.main import app +from clickhouse_driver import Client as ClickhouseClient + +THIS_DIR = Path(__file__).parent.resolve() def make_override_get_settings(**kw): @@ -40,7 +48,8 @@ def alembic_migration(postgresql): @pytest.fixture def client_with_bad_settings(): app.dependency_overrides[get_settings] = make_override_get_settings( - postgresql_url="postgresql://bad:bad@localhost/bad" + postgresql_url="postgresql://bad:bad@localhost/bad", + clickhouse_url="clickhouse://bad:bad@localhost/bad", ) client = TestClient(app) @@ -48,11 +57,12 @@ def client_with_bad_settings(): @pytest.fixture -def client(alembic_migration): +def client(alembic_migration, clickhouse_server): app.dependency_overrides[get_settings] = make_override_get_settings( postgresql_url=alembic_migration, jwt_encryption_key="super_secure", prometheus_metrics_password="super_secure", + clickhouse_url=clickhouse_server, ) client = TestClient(app) @@ -92,3 +102,95 @@ def client_with_admin_role(client): jwt_token = create_session_token("0" * 16, "admin") client.headers = {"Authorization": f"Bearer {jwt_token}"} yield client + + +def is_clickhouse_running(url): + try: + with ClickhouseClient.from_url(url) as client: + client.execute("SELECT 1") + return True + except Exception: + return False + + +@pytest.fixture(scope="session") +def clickhouse_server(docker_ip, docker_services): + port = docker_services.port_for("clickhouse", 9000) + # See password in docker compose + url = "clickhouse://test:test@{}:{}".format(docker_ip, port) + docker_services.wait_until_responsive( + timeout=30.0, pause=0.1, check=lambda: is_clickhouse_running(url) + ) + yield url + + +@pytest.fixture(scope="session") +def clickhouse_db(clickhouse_server): + yield ClickhouseClient.from_url(clickhouse_server) + + +@pytest.fixture(scope="module") +def fixtures_data_dir(): + yield Path(THIS_DIR, "fixtures/data") + + +@pytest.fixture(scope="module") +def url_priorities(clickhouse_db, fixtures_data_dir): + filename = "url_priorities_us.json" + file = Path(fixtures_data_dir, filename) + + with file.open("r") as f: + j = json.load(f) + + # 'sign' is created with default value 0, causing a db error. + # use 1 to prevent it + for row in j: + row["sign"] = 1 + + query = "INSERT INTO url_priorities (sign, category_code, cc, domain, url, priority) VALUES" + insert_click(clickhouse_db, query, j) + yield + clickhouse_db.execute("TRUNCATE TABLE url_priorities") + + +def generate_random_date_last_7_days() -> datetime: + start = datetime.now(tz=UTC) - timedelta(days=7) + + # return a random date between 7 days ago and now + return start + timedelta( + seconds=random.randrange(3600 * 24, 3600 * 24 * 7 - 3600 * 24) + ) + + +@pytest.fixture(scope="module") +def measurements(clickhouse_db, fixtures_data_dir): + msmnts_dir = Path(fixtures_data_dir, "measurements.json") + with open(msmnts_dir, "r") as f: + measurements = json.load(f) + + for ms in measurements: + date = generate_random_date_last_7_days() + ms["measurement_start_time"] = date + ms["test_start_time"] = date + + query = "INSERT INTO fastpath VALUES" + insert_click(clickhouse_db, query, measurements) + + yield + clickhouse_db.execute("TRUNCATE TABLE url_priorities") + + +@pytest.fixture +def super_prioritized_website(clickhouse_db): + values = { + "category_code": "*", + "cc": "*", + "domain": "ooni.org", + "priority": 99999, + "url": "*", + "sign": 1, + } + query = "INSERT INTO url_priorities (sign, category_code, cc, domain, url, priority) VALUES" + insert_click(clickhouse_db, query, [values]) + yield + clickhouse_db.execute("DELETE FROM url_priorities WHERE domain='ooni.org'") diff --git a/ooniapi/services/oonirun/tests/docker-compose.yml b/ooniapi/services/oonirun/tests/docker-compose.yml new file mode 100644 index 000000000..7eb60f627 --- /dev/null +++ b/ooniapi/services/oonirun/tests/docker-compose.yml @@ -0,0 +1,12 @@ +version: '2' +services: + clickhouse: + image: "clickhouse/clickhouse-server" + ports: + - "9000" + volumes: + - ./fixtures:/fixtures + - ./fixtures/initdb:/docker-entrypoint-initdb.d/ + environment: + CLICKHOUSE_USER: test + CLICKHOUSE_PASSWORD: test diff --git a/ooniapi/services/oonirun/tests/fixtures/data/measurements.json b/ooniapi/services/oonirun/tests/fixtures/data/measurements.json new file mode 100644 index 000000000..32b405acb --- /dev/null +++ b/ooniapi/services/oonirun/tests/fixtures/data/measurements.json @@ -0,0 +1,182 @@ +[ + { + "measurement_uid": "20240815000001.959692_TH_webconnectivity_29f8e1b5b07606c7", + "report_id": "20240814T220910Z_webconnectivity_TH_45629_n1_FWtmywdrVDFlEAfy", + "input": "https://twitter.com/", + "probe_cc": "ES", + "probe_asn": 1234, + "test_name": "web_connectivity", + "test_start_time": "2024-08-14 22:09:10", + "measurement_start_time": "2024-08-15 00:00:01", + "filename": "", + "scores": "{\"blocking_general\": 0.0,\"blocking_global\": 0.0,\"blocking_country\": 0.0,\"blocking_isp\": 0.0,\"blocking_local\": 0.0}", + "platform": "linux", + "anomaly": "f", + "confirmed": "f", + "msm_failure": "f", + "domain": "twitter.com", + "software_name": "ooniprobe-cli", + "software_version": "3.20.0", + "control_failure": "", + "blocking_general": 0, + "is_ssl_expected": 0, + "page_len": 0, + "page_len_ratio": 0, + "server_cc": "", + "server_asn": 0, + "server_as_name": "", + "test_version": "0.4.3", + "test_runtime": 7.9002676, + "architecture": "amd64", + "engine_name": "ooniprobe-engine", + "engine_version": "3.20.0", + "blocking_type": "", + "test_helper_address": "https://3.th.ooni.org", + "test_helper_type": "https", + "ooni_run_link_id": null + }, + { + "measurement_uid": "20240815000001.959692_TH_webconnectivity_29f8e1b5b07606c7", + "report_id": "20240814T220910Z_webconnectivity_TH_45629_n1_FWtmywdrVDFlEAfy", + "input": "https://twitter.com/", + "probe_cc": "ES", + "probe_asn": 1234, + "test_name": "web_connectivity", + "test_start_time": "2024-08-14 22:09:10", + "measurement_start_time": "2024-08-15 00:00:01", + "filename": "", + "scores": "{\"blocking_general\": 0.0,\"blocking_global\": 0.0,\"blocking_country\": 0.0,\"blocking_isp\": 0.0,\"blocking_local\": 0.0}", + "platform": "linux", + "anomaly": "f", + "confirmed": "f", + "msm_failure": "f", + "domain": "twitter.com", + "software_name": "ooniprobe-cli", + "software_version": "3.20.0", + "control_failure": "", + "blocking_general": 0, + "is_ssl_expected": 0, + "page_len": 0, + "page_len_ratio": 0, + "server_cc": "", + "server_asn": 0, + "server_as_name": "", + "test_version": "0.4.3", + "test_runtime": 7.9002676, + "architecture": "amd64", + "engine_name": "ooniprobe-engine", + "engine_version": "3.20.0", + "blocking_type": "", + "test_helper_address": "https://3.th.ooni.org", + "test_helper_type": "https", + "ooni_run_link_id": null + }, + { + "measurement_uid": "20240815000001.959692_TH_webconnectivity_29f8e1b5b07606c7", + "report_id": "20240814T220910Z_webconnectivity_TH_45629_n1_FWtmywdrVDFlEAfy", + "input": "https://twitter.com/", + "probe_cc": "ES", + "probe_asn": 1234, + "test_name": "web_connectivity", + "test_start_time": "2024-08-14 22:09:10", + "measurement_start_time": "2024-08-15 00:00:01", + "filename": "", + "scores": "{\"blocking_general\": 0.0,\"blocking_global\": 0.0,\"blocking_country\": 0.0,\"blocking_isp\": 0.0,\"blocking_local\": 0.0}", + "platform": "linux", + "anomaly": "f", + "confirmed": "f", + "msm_failure": "f", + "domain": "twitter.com", + "software_name": "ooniprobe-cli", + "software_version": "3.20.0", + "control_failure": "", + "blocking_general": 0, + "is_ssl_expected": 0, + "page_len": 0, + "page_len_ratio": 0, + "server_cc": "", + "server_asn": 0, + "server_as_name": "", + "test_version": "0.4.3", + "test_runtime": 7.9002676, + "architecture": "amd64", + "engine_name": "ooniprobe-engine", + "engine_version": "3.20.0", + "blocking_type": "", + "test_helper_address": "https://3.th.ooni.org", + "test_helper_type": "https", + "ooni_run_link_id": null + }, + { + "measurement_uid": "20240815000001.959692_TH_webconnectivity_29f8e1b5b07606c7", + "report_id": "20240814T220910Z_webconnectivity_TH_45629_n1_FWtmywdrVDFlEAfy", + "input": "https://www.facebook.com/", + "probe_cc": "IT", + "probe_asn": 1234, + "test_name": "web_connectivity", + "test_start_time": "2024-08-14 22:09:10", + "measurement_start_time": "2024-08-15 00:00:01", + "filename": "", + "scores": "{\"blocking_general\": 0.0,\"blocking_global\": 0.0,\"blocking_country\": 0.0,\"blocking_isp\": 0.0,\"blocking_local\": 0.0}", + "platform": "linux", + "anomaly": "f", + "confirmed": "f", + "msm_failure": "f", + "domain": "www.facebook.com", + "software_name": "ooniprobe-cli", + "software_version": "3.20.0", + "control_failure": "", + "blocking_general": 0, + "is_ssl_expected": 0, + "page_len": 0, + "page_len_ratio": 0, + "server_cc": "", + "server_asn": 0, + "server_as_name": "", + "test_version": "0.4.3", + "test_runtime": 7.9002676, + "architecture": "amd64", + "engine_name": "ooniprobe-engine", + "engine_version": "3.20.0", + "blocking_type": "", + "test_helper_address": "https://3.th.ooni.org", + "test_helper_type": "https", + "ooni_run_link_id": null + }, + { + "measurement_uid": "20240815000001.959692_TH_webconnectivity_29f8e1b5b07606c7", + "report_id": "20240814T220910Z_webconnectivity_TH_45629_n1_FWtmywdrVDFlEAfy", + "input": "https://www.facebook.com/", + "probe_cc": "IT", + "probe_asn": 1234, + "test_name": "web_connectivity", + "test_start_time": "2024-08-14 22:09:10", + "measurement_start_time": "2024-08-15 00:00:01", + "filename": "", + "scores": "{\"blocking_general\": 0.0,\"blocking_global\": 0.0,\"blocking_country\": 0.0,\"blocking_isp\": 0.0,\"blocking_local\": 0.0}", + "platform": "linux", + "anomaly": "f", + "confirmed": "f", + "msm_failure": "f", + "domain": "www.facebook.com", + "software_name": "ooniprobe-cli", + "software_version": "3.20.0", + "control_failure": "", + "blocking_general": 0, + "is_ssl_expected": 0, + "page_len": 0, + "page_len_ratio": 0, + "server_cc": "", + "server_asn": 0, + "server_as_name": "", + "test_version": "0.4.3", + "test_runtime": 7.9002676, + "architecture": "amd64", + "engine_name": "ooniprobe-engine", + "engine_version": "3.20.0", + "blocking_type": "", + "test_helper_address": "https://3.th.ooni.org", + "test_helper_type": "https", + "ooni_run_link_id": null + } +] \ No newline at end of file diff --git a/ooniapi/services/oonirun/tests/fixtures/data/url_priorities_us.json b/ooniapi/services/oonirun/tests/fixtures/data/url_priorities_us.json new file mode 100644 index 000000000..3259dffa7 --- /dev/null +++ b/ooniapi/services/oonirun/tests/fixtures/data/url_priorities_us.json @@ -0,0 +1,338 @@ +[ + { + "category_code": "NEWS", + "cc": "*", + "domain": "*", + "priority": 100, + "url": "*" + }, + { + "category_code": "POLR", + "cc": "*", + "domain": "*", + "priority": 100, + "url": "*" + }, + { + "category_code": "HUMR", + "cc": "*", + "domain": "*", + "priority": 100, + "url": "*" + }, + { + "category_code": "LGBT", + "cc": "*", + "domain": "*", + "priority": 100, + "url": "*" + }, + { + "category_code": "ANON", + "cc": "*", + "domain": "*", + "priority": 100, + "url": "*" + }, + { + "category_code": "GRP", + "cc": "*", + "domain": "*", + "priority": 80, + "url": "*" + }, + { + "category_code": "COMT", + "cc": "*", + "domain": "*", + "priority": 80, + "url": "*" + }, + { + "category_code": "MMED", + "cc": "*", + "domain": "*", + "priority": 80, + "url": "*" + }, + { + "category_code": "SRCH", + "cc": "*", + "domain": "*", + "priority": 80, + "url": "*" + }, + { + "category_code": "PUBH", + "cc": "*", + "domain": "*", + "priority": 80, + "url": "*" + }, + { + "category_code": "REL", + "cc": "*", + "domain": "*", + "priority": 60, + "url": "*" + }, + { + "category_code": "XED", + "cc": "*", + "domain": "*", + "priority": 60, + "url": "*" + }, + { + "category_code": "HOST", + "cc": "*", + "domain": "*", + "priority": 60, + "url": "*" + }, + { + "category_code": "ENV", + "cc": "*", + "domain": "*", + "priority": 60, + "url": "*" + }, + { + "category_code": "FILE", + "cc": "*", + "domain": "*", + "priority": 40, + "url": "*" + }, + { + "category_code": "CULTR", + "cc": "*", + "domain": "*", + "priority": 40, + "url": "*" + }, + { + "category_code": "IGO", + "cc": "*", + "domain": "*", + "priority": 40, + "url": "*" + }, + { + "category_code": "GOVT", + "cc": "*", + "domain": "*", + "priority": 40, + "url": "*" + }, + { + "category_code": "DATE", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "HATE", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "MILX", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "PROV", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "PORN", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "GMB", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "ALDR", + "cc": "*", + "domain": "*", + "priority": 30, + "url": "*" + }, + { + "category_code": "GAME", + "cc": "*", + "domain": "*", + "priority": 20, + "url": "*" + }, + { + "category_code": "MISC", + "cc": "*", + "domain": "*", + "priority": 20, + "url": "*" + }, + { + "category_code": "HACK", + "cc": "*", + "domain": "*", + "priority": 20, + "url": "*" + }, + { + "category_code": "ECON", + "cc": "*", + "domain": "*", + "priority": 20, + "url": "*" + }, + { + "category_code": "COMM", + "cc": "*", + "domain": "*", + "priority": 20, + "url": "*" + }, + { + "category_code": "CTRL", + "cc": "*", + "domain": "*", + "priority": 20, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.facebook.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "twitter.com", + "priority": 201, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.instagram.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.whatsapp.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "web.whatsapp.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "telegram.org", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "web.telegram.org", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.youtube.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.tiktok.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.viber.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.snapchat.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.reddit.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "vimeo.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "www.wechat.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "international.qq.com", + "priority": 200, + "url": "*" + }, + { + "category_code": "*", + "cc": "*", + "domain": "signal.org", + "priority": 200, + "url": "*" + }, + { + "category_code": "MISC", + "cc": "US", + "domain": "*", + "priority": -200, + "url": "*" + } +] diff --git a/ooniapi/services/oonirun/tests/fixtures/initdb/01-scheme.sql b/ooniapi/services/oonirun/tests/fixtures/initdb/01-scheme.sql new file mode 100644 index 000000000..b3081dd47 --- /dev/null +++ b/ooniapi/services/oonirun/tests/fixtures/initdb/01-scheme.sql @@ -0,0 +1,260 @@ +-- Create tables for Clickhouse integ tests + +-- Main tables + +CREATE TABLE default.fastpath +( + `measurement_uid` String, + `report_id` String, + `input` String, + `probe_cc` String, + `probe_asn` UInt32, + `test_name` String, + `test_start_time` DateTime, + `measurement_start_time` DateTime, + `filename` String, + `scores` String, + `platform` String, + `anomaly` String, + `confirmed` String, + `msm_failure` String, + `domain` String, + `software_name` String, + `software_version` String, + `control_failure` String, + `blocking_general` Float32, + `is_ssl_expected` Int8, + `page_len` Int32, + `page_len_ratio` Float32, + `server_cc` String, + `server_asn` Int8, + `server_as_name` String, + `update_time` DateTime64(3) MATERIALIZED now64(), + `test_version` String, + `test_runtime` Float32, + `architecture` String, + `engine_name` String, + `engine_version` String, + `blocking_type` String, + `test_helper_address` LowCardinality(String), + `test_helper_type` LowCardinality(String), + `ooni_run_link_id` Nullable(UInt64) +) +ENGINE = ReplacingMergeTree +ORDER BY (measurement_start_time, report_id, input) +SETTINGS index_granularity = 8192; + +CREATE TABLE default.jsonl +( + `report_id` String, + `input` String, + `s3path` String, + `linenum` Int32, + `measurement_uid` String +) +ENGINE = MergeTree +ORDER BY (report_id, input) +SETTINGS index_granularity = 8192; + +CREATE TABLE default.url_priorities ( + `sign` Int8, + `category_code` String, + `cc` String, + `domain` String, + `url` String, + `priority` Int32 +) +ENGINE = CollapsingMergeTree(sign) +ORDER BY (category_code, cc, domain, url, priority) +SETTINGS index_granularity = 1024; + +CREATE TABLE default.citizenlab +( + `domain` String, + `url` String, + `cc` FixedString(32), + `category_code` String +) +ENGINE = ReplacingMergeTree +ORDER BY (domain, url, cc, category_code) +SETTINGS index_granularity = 4; + +CREATE TABLE default.citizenlab_flip AS default.citizenlab; + +CREATE TABLE test_groups ( + `test_name` String, + `test_group` String +) +ENGINE = Join(ANY, LEFT, test_name); + + +-- Auth + +CREATE TABLE accounts +( + `account_id` FixedString(32), + `role` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY account_id; + +CREATE TABLE session_expunge +( + `account_id` FixedString(32), + `threshold` DateTime DEFAULT now() +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY account_id; + +-- Materialized views + +CREATE MATERIALIZED VIEW default.counters_test_list +( + `day` DateTime, + `probe_cc` String, + `input` String, + `msmt_cnt` UInt64 +) +ENGINE = SummingMergeTree +PARTITION BY day +ORDER BY (probe_cc, input) +SETTINGS index_granularity = 8192 AS +SELECT + toDate(measurement_start_time) AS day, + probe_cc, + input, + count() AS msmt_cnt +FROM default.fastpath +INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url +WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity') +GROUP BY + day, + probe_cc, + input; + +CREATE MATERIALIZED VIEW default.counters_asn_test_list +( + `week` DateTime, + `probe_cc` String, + `probe_asn` UInt32, + `input` String, + `msmt_cnt` UInt64 +) +ENGINE = SummingMergeTree +ORDER BY (probe_cc, probe_asn, input) +SETTINGS index_granularity = 8192 AS +SELECT + toStartOfWeek(measurement_start_time) AS week, + probe_cc, + probe_asn, + input, + count() AS msmt_cnt +FROM default.fastpath +INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url +WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity') +GROUP BY + week, + probe_cc, + probe_asn, + input; + +CREATE TABLE msmt_feedback +( + `measurement_uid` String, + `account_id` String, + `status` String, + `update_time` DateTime64(3) MATERIALIZED now64() +) +ENGINE = ReplacingMergeTree +ORDER BY (measurement_uid, account_id) +SETTINGS index_granularity = 4; + +CREATE TABLE default.fingerprints_dns +( + `name` String, + `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6), + `other_names` String, + `location_found` String, + `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), + `pattern` String, + `confidence_no_fp` UInt8, + `expected_countries` String, + `source` String, + `exp_url` String, + `notes` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY name; + +CREATE TABLE default.fingerprints_http +( + `name` String, + `scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6, 'injb' = 7, 'prov' = 8), + `other_names` String, + `location_found` String, + `pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4), + `pattern` String, + `confidence_no_fp` UInt8, + `expected_countries` String, + `source` String, + `exp_url` String, + `notes` String +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY name; + +CREATE TABLE asnmeta +( + `asn` UInt32, + `org_name` String, + `cc` String, + `changed` Date, + `aut_name` String, + `source` String +) +ENGINE = MergeTree +ORDER BY (asn, changed); + +CREATE TABLE IF NOT EXISTS default.incidents +( + `update_time` DateTime DEFAULT now(), + `create_time` DateTime DEFAULT now(), + `start_time` DateTime DEFAULT now(), + `end_time` Nullable(DateTime), + `creator_account_id` FixedString(32), + `reported_by` String, + `email_address` String, + `id` String, + `title` String, + `text` String, + `event_type` LowCardinality(String), + `published` UInt8, + `deleted` UInt8 DEFAULT 0, + `CCs` Array(FixedString(2)), + `ASNs` Array(UInt32), + `domains` Array(String), + `tags` Array(String), + `links` Array(String), + `test_names` Array(String), + `short_description` String, +) +ENGINE = ReplacingMergeTree(update_time) +ORDER BY (id) +SETTINGS index_granularity = 1; + +CREATE TABLE IF NOT EXISTS default.oonirun +( + `ooni_run_link_id` UInt64, + `descriptor_creation_time` DateTime64(3), + `translation_creation_time` DateTime64(3), + `creator_account_id` FixedString(32), + `archived` UInt8 DEFAULT 0, + `descriptor` String, + `author` String, + `name` String, + `short_description` String, + `icon` String +) +ENGINE = ReplacingMergeTree(translation_creation_time) +ORDER BY (ooni_run_link_id, descriptor_creation_time) +SETTINGS index_granularity = 1; diff --git a/ooniapi/services/oonirun/tests/fixtures/initdb/02-fixtures.sql b/ooniapi/services/oonirun/tests/fixtures/initdb/02-fixtures.sql new file mode 100644 index 000000000..daaab63ef --- /dev/null +++ b/ooniapi/services/oonirun/tests/fixtures/initdb/02-fixtures.sql @@ -0,0 +1,25 @@ + +-- Populate lookup tables + +INSERT INTO test_groups (test_name, test_group) VALUES ('bridge_reachability', 'circumvention'), ('meek_fronted_requests_test', 'circumvention'), ('psiphon', 'circumvention'), ('riseupvpn', 'circumvention'), ('tcp_connect', 'circumvention'), ('tor', 'circumvention'), ('torsf', 'circumvention'), ('vanilla_tor', 'circumvention'), ('dnscheck', 'experimental'), ('urlgetter', 'experimental'), ('facebook_messenger', 'im'), ('signal', 'im'), ('telegram', 'im'), ('whatsapp', 'im'), ('dns_consistency', 'legacy'), ('http_host', 'legacy'), ('http_requests', 'legacy'), ('multi_protocol_traceroute', 'legacy'), ('http_header_field_manipulation', 'middlebox'), ('http_invalid_request_line', 'middlebox'), ('dash', 'performance'), ('ndt', 'performance')('web_connectivity', 'websites') ; + +-- Create integ test data for Clickhouse + +INSERT INTO citizenlab VALUES ('www.ushmm.org', 'https://www.ushmm.org/', 'ZZ', 'CULTR'); +INSERT INTO citizenlab VALUES ('www.cabofrio.rj.gov.br', 'http://www.cabofrio.rj.gov.br/', 'BR', 'CULTR'); +INSERT INTO citizenlab VALUES ('ncac.org', 'http://ncac.org/', 'ZZ', 'NEWS'); +INSERT INTO citizenlab VALUES ('ncac.org', 'https://ncac.org/', 'ZZ', 'NEWS'); +INSERT INTO citizenlab VALUES ('www.facebook.com','http://www.facebook.com/saakashvilimikheil','ge','NEWS'); +INSERT INTO citizenlab VALUES ('www.facebook.com','http://www.facebook.com/somsakjeam/videos/1283095981743678/','th','POLR'); +INSERT INTO citizenlab VALUES ('www.facebook.com','https://www.facebook.com/','ZZ','GRP'); +INSERT INTO citizenlab VALUES ('facebook.com','http://facebook.com/','ua','GRP'); +INSERT INTO citizenlab VALUES ('facebook.com','https://facebook.com/watch','jo','GRP'); +INSERT INTO citizenlab VALUES ('twitter.com','http://twitter.com/ghonim','kw','POLR'); +INSERT INTO citizenlab VALUES ('twitter.com','http://twitter.com/ghonim','so','POLR'); +INSERT INTO citizenlab VALUES ('twitter.com','https://twitter.com/','ZZ','GRP'); +INSERT INTO citizenlab VALUES ('ooni.org','https://ooni.org/','ZZ','GRP'); + +-- get_measurement_meta integ tests +INSERT INTO jsonl (report_id, input, s3path, linenum) VALUES ('20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3', 'https://www.backtrack-linux.org/', 'raw/20210709/00/MY/webconnectivity/2021070900_MY_webconnectivity.n0.2.jsonl.gz', 35) + + diff --git a/ooniapi/services/oonirun/tests/fixtures/initdb/init.sh b/ooniapi/services/oonirun/tests/fixtures/initdb/init.sh new file mode 100755 index 000000000..0ce1f3a4c --- /dev/null +++ b/ooniapi/services/oonirun/tests/fixtures/initdb/init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +set -e + +# Add initialization code here. Example: fetch data, generate it dynamically \ No newline at end of file diff --git a/ooniapi/services/oonirun/tests/integ/test_dynamic_lists.py b/ooniapi/services/oonirun/tests/integ/test_dynamic_lists.py new file mode 100644 index 000000000..5ffabb8a8 --- /dev/null +++ b/ooniapi/services/oonirun/tests/integ/test_dynamic_lists.py @@ -0,0 +1,131 @@ +from copy import deepcopy +from oonirun.common.clickhouse_utils import insert_click +import pytest +from ..test_oonirun import SAMPLE_OONIRUN, SAMPLE_META + + +def postj(client, url, **kw): + response = client.post(url, json=kw) + assert response.status_code == 200 + return response.json() + + +def test_engine_descriptor_basic(client, client_with_user_role, url_priorities): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing simple prioritizationpri" + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["inputs_extra"] = None + z["nettests"] = z["nettests"][:1] + + # Create a link + j = postj(client_with_user_role, "/api/v2/oonirun/links", **z) + orlid = j["oonirun_link_id"] + + # Get link + r = client.post( + f"/api/v2/oonirun/links/{orlid}/engine-descriptor/latest", json=SAMPLE_META + ) + assert r.status_code == 200, r.json() + j = r.json() + + urls = j["nettests"][0]["inputs"] + assert len(urls) > 1, urls + + +def test_check_in_url_category_news(client, client_with_user_role, url_priorities): + """ + Test that you can filter by category codes + """ + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Categories filtering" + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["inputs_extra"] = None + z["nettests"] = z["nettests"][:1] + + # Create a link + j = postj(client_with_user_role, "/api/v2/oonirun/links", **z) + orlid = j["oonirun_link_id"] + + # fetch the link + meta = deepcopy(SAMPLE_META) + meta["website_category_codes"] = ["NEWS"] + j = postj(client, f"/api/v2/oonirun/links/{orlid}/engine-descriptor/latest", **meta) + inputs = j["nettests"][0]["inputs"] + inputs_extra = j["nettests"][0]["inputs_extra"] + assert len(inputs), inputs + assert len(inputs) == len(inputs_extra) + for extra in inputs_extra: + assert extra["category_code"] == "NEWS" + + +def test_prioritization_with_measurements( + client, client_with_user_role, url_priorities, measurements +): + """ + Test priorization including measurements + """ + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing header parsing" + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["inputs_extra"] = None + z["nettests"] = z["nettests"][:1] + + # Create a link + j = postj(client_with_user_role, "/api/v2/oonirun/links", **z) + orlid = j["oonirun_link_id"] + + # fetch the link + meta = deepcopy(SAMPLE_META) + # In ES we have more measurements for twitter, (see tests/fixtures/data/measurements.json) + # so twitter should NOT show up first + meta["probe_cc"] = "ES" + j = postj(client, f"/api/v2/oonirun/links/{orlid}/engine-descriptor/latest", **meta) + inputs = j["nettests"][0]["inputs"] + assert len(inputs), inputs + assert "twitter.com" not in inputs[0], "Twitter should not be the first one" + + # Twitter with a different asn can be first + meta["probe_cc"] = "ES" + meta["probe_asn"] = "AS9999" + j = postj(client, f"/api/v2/oonirun/links/{orlid}/engine-descriptor/latest", **meta) + inputs = j["nettests"][0]["inputs"] + assert len(inputs), inputs + assert "twitter.com" in inputs[0], "Twitter should be the first one" + + # Similarly, in IT twitter should be first, and facebook last + meta["probe_cc"] = "IT" + meta["probe_asn"] = "AS1234" + j = postj(client, f"/api/v2/oonirun/links/{orlid}/engine-descriptor/latest", **meta) + inputs = j["nettests"][0]["inputs"] + assert len(inputs), inputs + assert "twitter.com" in inputs[0], "Twitter should be the first one" + assert "facebook.com" in inputs[-1], "Facebook should be the last one" + + +def test_priorities_basic( + client, + client_with_user_role, + measurements, + url_priorities, + super_prioritized_website, +): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing header parsing" + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["inputs_extra"] = None + z["nettests"] = z["nettests"][:1] + + # Create a link + j = postj(client_with_user_role, "/api/v2/oonirun/links", **z) + orlid = j["oonirun_link_id"] + + meta = deepcopy(SAMPLE_META) + meta["probe_cc"] = "ES" + j = postj(client, f"/api/v2/oonirun/links/{orlid}/engine-descriptor/latest", **meta) + inputs = j["nettests"][0]["inputs"] + assert len(inputs), inputs + assert "ooni.org" in inputs[0], "Ooni should be the first one" diff --git a/ooniapi/services/oonirun/tests/test_database.py b/ooniapi/services/oonirun/tests/test_database.py index 79969c58d..d9a62786a 100644 --- a/ooniapi/services/oonirun/tests/test_database.py +++ b/ooniapi/services/oonirun/tests/test_database.py @@ -7,7 +7,6 @@ import sqlalchemy as sa from sqlalchemy.orm import sessionmaker from oonirun import models -from oonirun.dependencies import get_postgresql_session from sqlalchemy import create_engine SAMPLE_OONIRUN = { @@ -32,7 +31,6 @@ "options": { "HTTP3Enabled": True, }, - "backend_options": {}, "is_background_run_enabled_default": False, "is_manual_run_enabled_default": False, "test_name": "web_connectivity", @@ -40,7 +38,24 @@ { "inputs": [], "options": {}, - "backend_options": {}, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "dnscheck", + }, + { + "targets_name": "sample_target", + "options": {}, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "dnscheck", + }, + { + "inputs": [ + "https://example.com/", + "https://ooni.org/", + ], + "inputs_extra": [{"category_code": "HUMR"}, {}], + "options": {}, "is_background_run_enabled_default": False, "is_manual_run_enabled_default": False, "test_name": "dnscheck", @@ -127,13 +142,27 @@ def test_upgrade_to_head(postgresql): nettest_index=0, date_created=utcnow_seconds(), ), + models.OONIRunLinkNettest( + **nettests[2], + revision=1, + nettest_index=2, + date_created=utcnow_seconds(), + ), + models.OONIRunLinkNettest( + **nettests[3], + revision=1, + nettest_index=3, + date_created=utcnow_seconds(), + ), ] db.add(db_runlink) db.commit() new_row = db.query(models.OONIRunLink).first() assert new_row - assert new_row.nettests[0].revision == 3 + assert ( + new_row.nettests[0].revision == 3 + ), "First one to show up should have the latest revision" db.close() diff --git a/ooniapi/services/oonirun/tests/test_oonirun.py b/ooniapi/services/oonirun/tests/test_oonirun.py index 737fb4ccb..74c018503 100644 --- a/ooniapi/services/oonirun/tests/test_oonirun.py +++ b/ooniapi/services/oonirun/tests/test_oonirun.py @@ -6,13 +6,8 @@ from datetime import datetime, timedelta, timezone import time -from oonirun import models -from oonirun.routers.v2 import utcnow_seconds -import pytest +from oonirun.routers.v2 import utcnow_seconds, NETWORK_TYPES -import sqlalchemy as sa -from sqlalchemy.orm import sessionmaker -from sqlalchemy import create_engine SAMPLE_OONIRUN = { "name": "", @@ -33,18 +28,29 @@ "https://example.com/", "https://ooni.org/", ], + "targets_name": None, + "inputs_extra": None, "options": { "HTTP3Enabled": True, }, - "backend_options": {}, "is_background_run_enabled_default": False, "is_manual_run_enabled_default": False, "test_name": "web_connectivity", }, { "inputs": [], + "targets_name": None, + "inputs_extra": None, "options": {}, - "backend_options": {}, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "dnscheck", + }, + { + "inputs": None, + "targets_name": "websites_list_prioritized", + "inputs_extra": None, + "options":{}, "is_background_run_enabled_default": False, "is_manual_run_enabled_default": False, "test_name": "dnscheck", @@ -72,6 +78,15 @@ "expiration_date", ] +SAMPLE_META = { + "run_type": "timed", + "is_charging": True, + "probe_asn": "AS1234", + "probe_cc": "VE", + "network_type": "wifi", + "website_category_codes": [], +} + def test_get_version(client): r = client.get("/version") @@ -215,6 +230,7 @@ def test_oonirun_full_workflow(client, client_with_user_role, client_with_admin_ assert j["name"] == z["name"] assert j["name_intl"] == z["name_intl"] assert j["description"] == z["description"] + assert j["nettests"] == z["nettests"] date_created = datetime.strptime( j["date_created"], "%Y-%m-%dT%H:%M:%S.%fZ" @@ -402,7 +418,7 @@ def test_oonirun_full_workflow(client, client_with_user_role, client_with_admin_ r = client_with_user_role.get(f"/api/v2/oonirun/links/{oonirun_link_id}") assert r.status_code == 200, r.json() descs = r.json()["nettests"] - assert len(descs) == 2, r.json() + assert len(descs) == 3, r.json() ## List descriptors r = client_with_user_role.get(f"/api/v2/oonirun/links") @@ -560,16 +576,25 @@ def test_oonirun_revisions(client, client_with_user_role): assert j["revisions"][0] == "3", "the latest one is 3" ## Fetch nettests for latest - r = client.get( - f"/api/v2/oonirun/links/{oonirun_link_id_one}/engine-descriptor/latest" + r = client.post( + f"/api/v2/oonirun/links/{oonirun_link_id_one}/engine-descriptor/latest", + json=SAMPLE_META, ) + assert r.status_code == 200, r.json() j_latest = r.json() assert j_latest["revision"] == "3", "revision is 3" + + # The engine-descriptor returns a list along with targets name on reading + lastest_nettests[2]["inputs"] = [] + lastest_nettests[2]["inputs_extra"] = [] assert j_latest["nettests"] == lastest_nettests, "nettests are the same" assert j_latest["date_created"] == latest_date_created, "date created matches" ## Should match latest - r = client.get(f"/api/v2/oonirun/links/{oonirun_link_id_one}/engine-descriptor/3") + r = client.post( + f"/api/v2/oonirun/links/{oonirun_link_id_one}/engine-descriptor/3", + json=SAMPLE_META, + ) assert j_latest == r.json() ## Fetch invalid revision number @@ -585,6 +610,288 @@ def test_oonirun_revisions(client, client_with_user_role): assert r.status_code == 404, r.json() ## Get not-existing engine descriptor - r = client.get(f"/api/v2/oonirun/links/404/engine-descriptor/latest") + r = client.post( + f"/api/v2/oonirun/links/404/engine-descriptor/latest", json=SAMPLE_META + ) j = r.json() assert r.status_code == 404, r.json() + + +def test_inputs_extra_length(client, client_with_user_role): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "integ-test name in English" + nettests = z.pop("nettests") + nettests = nettests[:1] + nettests[0]["inputs_extra"] = [ + { + "provider": "riseupvpn", + } + ] + z["nettests"] = nettests + + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert ( + r.status_code == 422 + ), "Should fail when inputs_extra != None and len(inputs_extra) != len(inputs)" + + nettests[0]["inputs_extra"] = [ + { + "provider": "riseupvpn", + }, + { + "provider": "riseupvpn", + }, + ] + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, "Appropiate inputs extra size, should pass" + + nettests[0].pop("inputs_extra") + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert ( + r.status_code == 200 + ), "No checks should be performed when inputs_extra is None" + + +def test_link_revision_args(client, client_with_user_role): + # Check args parsing for oonirun engine-descriptor + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing descriptor revision" + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + j = r.json() + id = j["oonirun_link_id"] + + # Try with good arguments + gs = ["timed", "manual"] + for good in gs: + r = client.post( + f"/api/v2/oonirun/links/{id}/engine-descriptor/1", json=SAMPLE_META + ) + assert r.status_code == 200, r.json() + + # Try with bad arguments + bm = deepcopy(SAMPLE_META) + bm["run_type"] = "bad" + r = client.post(f"/api/v2/oonirun/links/{id}/engine-descriptor/1", json=bm) + assert r.status_code == 422, r.json() + + +def test_inputs_and_targets_name(client_with_user_role): + """ + Test that you can't specify targets_name and inputs in the same request + """ + + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing no targets and inputs at the same time" + + # Only inputs = OK + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + + # Only targets = OK + z["nettests"] = [ + { + "inputs": None, + "targets_name": "example_name", + "inputs_extra": None, + "options": { + "HTTP3Enabled": True, + }, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "web_connectivity", + }, + ] + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + + # Both targets and input = error + z["nettests"] = [ + { + "inputs": [ + "https://example.com/", + "https://ooni.org/", + ], + "targets_name": "example_name", + "inputs_extra": None, + "options": { + "HTTP3Enabled": True, + }, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "web_connectivity", + }, + ] + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 422, r.json() + + # Both targets and inputs_extra = error + z["nettests"] = [ + { + "targets_name": "example_name", + "inputs_extra": [{}, {}], + "options": { + "HTTP3Enabled": True, + }, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "web_connectivity", + }, + ] + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 422, r.json() + + # Targets with inputs = [] still an error + z["nettests"] = [ + { + "targets_name": "example_name", + "inputs_extra": [], + "inputs": [], + "options": { + "HTTP3Enabled": True, + }, + "is_background_run_enabled_default": False, + "is_manual_run_enabled_default": False, + "test_name": "web_connectivity", + }, + ] + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 422, r.json() + + +def test_creation_with_targets_name(client_with_user_role): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing dynamic test lists calculation" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"] = z["nettests"][:1] + + # Create + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + j = r.json() + + # Retrieve + r = client_with_user_role.get(f"/api/v2/oonirun/links/{j['oonirun_link_id']}") + assert r.status_code == 200, r.json() + j = r.json() + + # Does it have the targets name? + assert ( + j["nettests"][0]["targets_name"] == "websites_list_prioritized" + ), "Missing targets_name" + + # now test that you can edit + z["nettests"][0]["targets_name"] = "new_value" + r = client_with_user_role.put( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}", json=z + ) + assert r.status_code == 200, r.json() + + # Retrieve again + r = client_with_user_role.get(f"/api/v2/oonirun/links/{j['oonirun_link_id']}") + assert r.status_code == 200, r.json() + j = r.json() + + assert ( + j["nettests"][0]["targets_name"] == "new_value" + ), "Value of nettest should be changed by now" + + +def test_dynamic_test_lists_calculation(client_with_user_role): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing dynamic test lists calculation" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"] = z["nettests"][:1] + + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + j = r.json() + + r = client_with_user_role.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + json=SAMPLE_META, + ) + assert r.status_code == 200, r.json() + + j = r.json() + assert j["nettests"][0]["targets_name"] == "websites_list_prioritized" + + +def test_x_user_agent_header_parsing(client_with_user_role, client): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing header parsing" + + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + j = r.json() + + # Test with good headers + headers = { + "UserAgent": "ooniprobe-android-unattended,3.8.2,android,ooniprobe-engine,3.17.2,ooniprobe-engine_1.2.3" + } + + r = client_with_user_role.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + json=SAMPLE_META, + ) + r = client.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + headers=headers, + json=SAMPLE_META, + ) + assert r.status_code == 200, r.json() + + # Should be able to skip the header + r = client_with_user_role.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + json=SAMPLE_META, + ) + r = client.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + json=SAMPLE_META, + ) + assert r.status_code == 200, r.json() + + # Bad header + headers = { + "UserAgent": "ooniprobe-android-unattended,3.8.2,android,ooniprobe-engine,3.17.2" + } + r = client.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + headers=headers, + json=SAMPLE_META, + ) + assert r.status_code == 422, r.json() + + +def test_network_type_validation(client_with_user_role, client): + z = deepcopy(SAMPLE_OONIRUN) + z["name"] = "Testing dynamic test lists calculation" + z["nettests"][0]["inputs"] = None + z["nettests"][0]["targets_name"] = "websites_list_prioritized" + z["nettests"] = z["nettests"][:1] + + # Create + r = client_with_user_role.post("/api/v2/oonirun/links", json=z) + assert r.status_code == 200, r.json() + j = r.json() + + # try to compute dynamic list with each network type + meta = deepcopy(SAMPLE_META) + for nt in NETWORK_TYPES: + meta["network_type"] = nt + r = client_with_user_role.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + json=meta, + ) + assert r.status_code == 200, r.json() + + # try with a bad network type + meta["network_type"] = "bad" + r = client_with_user_role.post( + f"/api/v2/oonirun/links/{j['oonirun_link_id']}/engine-descriptor/latest", + json=meta, + ) + assert r.status_code == 422, r.json()