[REQUEST] Add Bosnia and Herz.

**Data source**
Website: https://vodostaji.voda.ba/#2031
Terms of use: https://vodostaji.voda.ba/#Impressum

**Type of data portal**
No API access, but endpoint downloads. No registration required, but data available up to 1-year back

**Country/Countries/Regions**
Bosnia and Herz. 

**Variables**
hourly discharge, water temperature and stage.

**Station list**
https://vodostaji.voda.ba/data/internet/layers/20/index.json
(But also implemented in the code below)

**Code snippets**
Metadata/list of stations: 

```
import requests
import pandas as pd
import numpy as np


def get_vodostaji_metadata() -> pd.DataFrame:
    """
    Fetch latest snapshot data (value + timestamp + metadata)
    from vodostaji.voda.ba.

    - Keeps ALL original columns
    - Renames only user-selected fields
    - Converts numeric and datetime columns
    """

    url = "https://vodostaji.voda.ba/data/internet/layers/20/index.json"

    # --- Step 1: Fetch JSON ---
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        data = r.json()
        if not isinstance(data, list) or not data:
            print("No snapshot records returned.")
            return pd.DataFrame()
    except Exception as e:
        print(f"Failed to fetch snapshot data: {e}")
        return pd.DataFrame()

    # --- Step 2: Normalize JSON ---
    df_all = pd.json_normalize(data)

    # --- Step 3: Rename only selected fields ---
    rename_map = {
        # identifiers
        "metadata_station_no": "gauge_id",
        "metadata_station_name": "station_name",
        "metadata_river_name": "river",
        "metadata_catchment_name": "catchment",
        "metadata_station_latitude": "latitude",
        "metadata_station_longitude": "longitude",
        "metadata_CATCHMENT_SIZE": "area",

    }
    df_all = df_all.rename(columns=rename_map)

    # --- Step 4: Type conversions ---
    # numeric
    numeric_cols = [
        "latitude",
        "longitude",
        "area",
        "metadata_station_carteasting",
        "metadata_station_cartnorthing",
        "metadata_station_local_x",
        "metadata_station_local_y",
    ]

    for col in numeric_cols:
        if col in df_all.columns:
            df_all[col] = pd.to_numeric(df_all[col], errors="coerce")

    # --- Step 5: Optional: extract catchment area as number ---
    if "metadata_CATCHMENT_SIZE" in df_all.columns:
        df_all["catchment_area_km2"] = (
            df_all["metadata_CATCHMENT_SIZE"]
            .str.replace("km²", "", regex=False)
            .str.strip()
            .pipe(pd.to_numeric, errors="coerce")
        )

    # --- Step 6: Add static metadata ---
    df_all["country"] = "Bosnia and Herzegovina"
    df_all["source"] = "vodostaji.voda.ba"

    # --- Step 7: Basic cleanup ---
    df_all = df_all.reset_index(drop=True)

    return df_all
```

Time-series: 

```
import requests
import pandas as pd
import numpy as np
from typing import Optional
from io import BytesIO


def get_vodostaji_data(
    gauge_id: str,
    variable: str,
    frequency: str = "instantaneous",
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    station_groups=range(1, 11),
) -> pd.DataFrame:
    """
    Download hydrological time series data from vodostaji.voda.ba,
    automatically detecting the correct station group.

    Parameters
    ----------
    gauge_id : str
        Station number (e.g. '4411')
    variable : str
        'discharge', 'stage', or 'temperature'
    frequency : str
        'instantaneous' or 'daily'
    start_date, end_date : str, optional
        ISO dates ('YYYY-MM-DD')
    station_groups : iterable
        Station group IDs to try (default: 1..10)

    Returns
    -------
    pd.DataFrame
        Columns: ['time', '<variable>']
    """

    variable = variable.lower()
    frequency = frequency.lower()

    if variable not in ("discharge", "stage", "temperature"):
        raise ValueError("variable must be 'discharge', 'stage', or 'temperature'.")

    if frequency not in ("instantaneous", "daily"):
        raise ValueError("frequency must be 'instantaneous' or 'daily'.")

    # --- Variable mapping ---
    var_map = {
        "discharge": {"code": "Q", "file": "Q_1Y.xlsx"},
        "stage": {"code": "H", "file": "H_1Y.xlsx"},
        "temperature": {"code": "WT", "file": "Tvode_1Y.xlsx"},
    }

    var_code = var_map[variable]["code"]
    filename = var_map[variable]["file"]

    # --- Try station groups until one works ---
    content = None
    used_group = None

    for group in station_groups:
        base_url = f"https://vodostaji.voda.ba/data/internet/stations/{group}"
        url = f"{base_url}/{gauge_id}/{var_code}/{filename}"

        try:
            r = requests.get(url, timeout=20)
            if r.status_code == 200 and len(r.content) > 10_000:
                content = r.content
                used_group = group
                break
        except Exception:
            continue

    if content is None:
        return pd.DataFrame(columns=["time", variable])

    # --- Read Excel (skip metadata header) ---
    df = pd.read_excel(
        BytesIO(content),
        skiprows=8,
        names=["time", variable],
    )

    if df.empty:
        return pd.DataFrame(columns=["time", variable])

    # --- Parse types ---
    df["time"] = pd.to_datetime(df["time"], dayfirst=True, errors="coerce")
    df[variable] = pd.to_numeric(df[variable], errors="coerce")
    df = df.dropna(subset=["time", variable])

    # --- Date filtering ---
    if start_date:
        df = df[df["time"] >= pd.to_datetime(start_date)]
    if end_date:
        df = df[df["time"] <= pd.to_datetime(end_date)]

    # --- Aggregate if needed ---
    if frequency == "daily":
        df = (
            df.set_index("time")
            .resample("D")
            .mean()
            .dropna()
            .reset_index()
        )

    # --- Final cleanup ---
    df = df.drop_duplicates(subset="time", keep="first")
    df = df.sort_values("time").reset_index(drop=True)

    # Optional: attach metadata
    df.attrs["station_group"] = used_group
    df.attrs["station_id"] = gauge_id
    df.attrs["variable"] = variable

    return df
```



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[REQUEST] Add Bosnia and Herz. #85

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

[REQUEST] Add Bosnia and Herz. #85

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions