diff --git a/CHANGELOG.md b/CHANGELOG.md index d530b0f..1340ce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ All notable changes to this project will be documented in this file. +## Unreleased - 2025-09-26 + +### Added +- New HTTP API server: `asninfo serve` subcommand with CORS enabled and background refresh. +- Lookup endpoints: `GET /lookup` and `POST /lookup` with optional `legacy=true` response. +- Health endpoint: `GET /health`. +- Environment variable `ASNINFO_MAX_ASNS` to cap ASNs per request (default 100). +- Request logging for HTTP server; requests to `/health` are excluded to reduce noise. + +### Changed +- Documentation: README expanded with commands, HTTP API usage, Docker examples, and environment variables. +- Clarified that CSV exports always use the simplified schema; `--simplified` flag implies reduced dataset (skips population, hegemony, and PeeringDB data). +- Docker README: added examples showing how to pass a local .env using `--env-file` or by bind-mounting to `/asninfo/.env`. + ## v0.3.4 - 2025-09-10 * update `bgpkit-commons` to `v0.9.4` to support older Rust versions diff --git a/Cargo.lock b/Cargo.lock index dcc71af..0147236 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,12 +108,17 @@ dependencies = [ name = "asninfo" version = "0.3.4" dependencies = [ + "axum", "bgpkit-commons", + "chrono", "clap", "dotenvy", + "http 1.3.1", "oneio 0.19.0", "serde", "serde_json", + "tokio", + "tower-http", "tracing", "tracing-subscriber", ] @@ -214,6 +219,60 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "axum" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.3.1", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +dependencies = [ + "bytes", + "futures-core", + "http 1.3.1", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -773,6 +832,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "hyper" version = "1.7.0" @@ -787,6 +852,7 @@ dependencies = [ "http 1.3.1", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -1098,6 +1164,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "maybe-async" version = "0.2.10" @@ -1706,18 +1778,28 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.225" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516" dependencies = [ "proc-macro2", "quote", @@ -1736,6 +1818,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2000,9 +2093,21 @@ dependencies = [ "pin-project-lite", "slab", "socket2", + "tokio-macros", "windows-sys 0.59.0", ] +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tokio-rustls" version = "0.26.2" @@ -2039,6 +2144,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -2077,6 +2183,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", diff --git a/Cargo.toml b/Cargo.toml index ef0f3cc..792700d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,11 @@ oneio = { version = "0.19.0", default-features = false, features = ["https", "s3 tracing-subscriber = "0.3" tracing = "0.1" clap = { version = "4.5", features = ["derive"] } - +axum = { version = "0.8"} +tokio = { version = "1.39", features = ["macros", "rt-multi-thread", "time", "net"] } +chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "std"] } + dotenvy = "0.15.7" -serde = { version = "1.0.210", features = ["derive"] } \ No newline at end of file +serde = { version = "1.0.210", features = ["derive"] } +http = "1" +tower-http = { version = "0.6", features = ["cors"] } \ No newline at end of file diff --git a/README.md b/README.md index 58d9084..50e0208 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ -# ASN Information Data Uploader +# ASN Information Tool (exporter and HTTP API) -Fetch the latest ASN information and export to JSON, JSONL, or CSV. Optionally upload to an S3-compatible target when -configured via environment variables. +Export up-to-date ASN information to JSON, JSONL, or CSV files, and optionally upload to an S3-compatible target. +You can also run a lightweight HTTP API server to perform ASN info lookups. + +- Export formats: JSON, JSONL, CSV (CSV uses a simplified schema) +- Optional upload to S3/R2 via environment variables (no CLI flag needed) +- HTTP API with GET/POST lookup endpoints and CORS enabled +- .env files supported via dotenv ## Install @@ -24,25 +29,225 @@ cargo install cargo-binstall cargo binstall asninfo ``` -## Usage +## Commands + +The CLI provides two subcommands: generate and serve. ```shell -Usage: asninfo [OPTIONS] [PATH] +asninfo generate [OPTIONS] [PATH] + +Options: + -s, --simplified Export simplified fields (implied for .csv) Arguments: - [PATH] Export data path [default: ./asninfo.jsonl] - Format is inferred from file extension: .json, .jsonl, or .csv + [PATH] Export data path (default: ./asninfo.jsonl) + Format is inferred from file extension: .json, .jsonl, or .csv +``` + +```shell +asninfo serve [OPTIONS] Options: - -s, --simplified Export simplified fields (implied for .csv) - -h, --help Print help - -V, --version Print version + -b, --bind Bind address (default: 0.0.0.0:8080) + --refresh-secs Background refresh interval in seconds (default: 21600) + --simplified Use simplified mode (skip heavy datasets) +``` + +### Examples + +- Export JSONL with full fields: + +```bash +asninfo generate ./asninfo.jsonl +# same as "asninfo generate" +``` + +- Export CSV (simplified schema is implied): + +```bash +asninfo generate ./asninfo.csv ``` -Notes +- Export simplified JSON (smaller payload): + +```bash +asninfo generate -s ./asninfo.json +``` + +- Upload automatically to S3/R2 by setting environment variables: + +```bash +export ASNINFO_UPLOAD_PATH="r2://my-bucket/asn/asninfo.jsonl" +export AWS_REGION="auto" # for Cloudflare R2 +export AWS_ENDPOINT="https://.r2.cloudflarestorage.com" +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... + +asninfo generate ./asninfo.jsonl +``` + +## HTTP API + +Start the server: + +```bash +asninfo serve --bind 0.0.0.0:8080 --refresh-secs 21600 +``` + +- Background updater refreshes the in-memory dataset every refresh-secs seconds (minimum 3600). +- CORS is enabled for all origins. +- Simplified mode reduces memory footprint by skipping heavy datasets (population, hegemony, PeeringDB). +- The maximum number of ASNs per request is limited by the environment variable ASNINFO_MAX_ASNS (default 100). + +### Endpoints + +- GET /health + - Returns status and metadata, including updatedAt timestamp. + +- GET /lookup?asns=AS1,AS2,...[&legacy=true] + - Query parameter asns is a comma-separated list of ASNs. + - Optional legacy=true to return a legacy array of objects instead of the structured response. -- Upload is automatic when ASNINFO_UPLOAD_PATH is set (no --upload flag). -- .env files are supported (dotenv). +- POST /lookup + - JSON body: { "asns": [number, ...] } + - Note: legacy=true is only supported on GET /lookup. + +### Responses + +Default response (full schema plus country_name): + +```json +[ + { + "asn": 13335, + "name": "CLOUDFLARENET", + "country": "US", + "country_name": "United States", + "as2org": { + "org_id": "CLOUD14-ARIN", + "org_name": "Cloudflare, Inc.", + "name": "CLOUDFLARENET", + "country": "US" + }, + "hegemony": { + "asn": 13335, + "ipv4": 0.0018, + "ipv6": 0.0084 + }, + "peeringdb": { + "asn": 13335, + "name": "Cloudflare", + "aka": "", + "name_long": "", + "website": "https://www.cloudflare.com", + "irr_as_set": "AS13335:AS-CLOUDFLARE" + }, + "population": { + "user_count": 10, + "sample_count": 127, + "percent_global": 0.0, + "percent_country": 0.02 + } + } +] +``` + +Note: When the server runs with --simplified, heavy datasets (population, hegemony, PeeringDB) are omitted and will be +null in responses. + +Legacy response (when legacy=true) returns an array of objects compatible with the previous consumer format. + +### Example requests + +```bash +# GET +curl 'http://localhost:8080/lookup?asns=13335,15169' + +# POST +curl -X POST 'http://localhost:8080/lookup' \ + -H 'Content-Type: application/json' \ + -d '{"asns":[13335,15169]}' +``` + +```json +[ + { + "as2org": { + "country": "US", + "name": "CLOUDFLARENET", + "org_id": "CLOUD14-ARIN", + "org_name": "Cloudflare, Inc." + }, + "asn": 13335, + "country": "US", + "country_name": "United States", + "hegemony": { + "asn": 13335, + "ipv4": 0.0017993252336435785, + "ipv6": 0.008380104743151566 + }, + "name": "CLOUDFLARENET", + "peeringdb": { + "aka": "", + "asn": 13335, + "irr_as_set": "AS13335:AS-CLOUDFLARE", + "name": "Cloudflare", + "name_long": "", + "website": "https://www.cloudflare.com" + }, + "population": { + "percent_country": 0.02, + "percent_global": 0.0, + "sample_count": 127, + "user_count": 10 + } + }, + { + "as2org": { + "country": "US", + "name": "GOOGLE", + "org_id": "GOGL-ARIN", + "org_name": "Google LLC" + }, + "asn": 15169, + "country": "US", + "country_name": "United States", + "hegemony": { + "asn": 15169, + "ipv4": 0.0072255134909779304, + "ipv6": 0.002685539203529714 + }, + "name": "GOOGLE", + "peeringdb": { + "aka": "Google, YouTube (for Google Fiber see AS16591 record)", + "asn": 15169, + "irr_as_set": "RADB::AS-GOOGLE", + "name": "Google LLC", + "name_long": "", + "website": "https://about.google/intl/en/" + }, + "population": { + "percent_country": 0.01, + "percent_global": 0.0, + "sample_count": 740, + "user_count": 521 + } + } +] +``` + +## CSV simplified schema + +When exporting CSV (or using --simplified), the schema is: + +``` +asn,as_name,org_id,org_name,country_code,country_name,data_source +``` + +Notes: + +- country_name is looked up from country_code where available. +- data_source is reserved for future use. ## Environment variables @@ -57,4 +262,42 @@ Required for S3/R2 upload (when ASNINFO_UPLOAD_PATH is set): Optional: - ASNINFO_HEARTBEAT_URL — HTTP/HTTPS URL to request after a successful upload (used as a heartbeat) -- PEERINGDB_API_KEY — used by dependencies to access PeeringDB API (avoids rate limits) \ No newline at end of file +- ASNINFO_MAX_ASNS — maximum ASNs per lookup request for the HTTP API (default: 100) +- PEERINGDB_API_KEY — used by dependencies to access PeeringDB API (avoids rate limits) + +.env files are supported and loaded automatically when present. + +## Docker + +A minimal container image can be built using the provided Dockerfile: + +```bash +docker build -t asninfo . + +# run generator, mounting a host directory for output +docker run --rm -v "$PWD:/out" asninfo generate /out/asninfo.jsonl + +# run HTTP server on port 8080 +docker run --rm -p 8080:8080 asninfo serve --bind 0.0.0.0:8080 +``` + +### Using a local env file (.env) + +The image supports environment variables for uploads and server limits. You can pass your local .env to the container +using Docker's `--env-file`: + +```bash +# pass variables from ./.env to the container environment +# works for both generate and serve + +docker run --rm \ + --env-file ./.env \ + -v "$PWD:/out" \ + asninfo generate /out/asninfo.jsonl + +# server example with env-file +docker run --rm \ + --env-file ./.env \ + -p 8080:8080 \ + asninfo serve --bind 0.0.0.0:8080 +``` \ No newline at end of file diff --git a/src/api.rs b/src/api.rs new file mode 100644 index 0000000..53d85d4 --- /dev/null +++ b/src/api.rs @@ -0,0 +1,297 @@ +use axum::{ + extract::{Query, Request as AxumRequest, State}, + http::{Method, StatusCode}, + middleware::{self, Next}, + response::Response, + routing::get, + Json, Router, +}; +use bgpkit_commons::asinfo::AsInfo; +use chrono::{SecondsFormat, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tokio::task::JoinHandle; +use tokio::time::sleep; +use tower_http::cors::{Any, CorsLayer}; +use tracing::{error, info}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AsInfoOut { + #[serde(flatten)] + pub inner: AsInfo, + #[serde(rename = "country_name")] + pub country_name: String, +} + +#[derive(Clone)] +pub struct AppState { + pub map: Arc>>, + pub updated_at: Arc>, + pub max_asns: usize, +} + +#[derive(Deserialize)] +struct LookupQuery { + asns: Option, + legacy: Option, +} + +#[derive(Deserialize)] +struct LookupBody { + asns: Vec, +} + +pub fn build_router(state: AppState) -> Router { + let cors = CorsLayer::new() + .allow_origin(Any) + .allow_methods([Method::GET, Method::POST]) + .allow_headers(Any); + + Router::new() + .route("/lookup", get(get_lookup).post(post_lookup)) + .route("/health", get(health)) + .with_state(state) + // log all requests except /health + .layer(middleware::from_fn(log_requests)) + .layer(cors) +} + +// Middleware to log requests, skipping /health +async fn log_requests(req: AxumRequest, next: Next) -> Response { + let path = req.uri().path().to_string(); + if path == "/health" { + return next.run(req).await; + } + let method = req.method().clone(); + let start = std::time::Instant::now(); + let response = next.run(req).await; + let status = response.status(); + let elapsed_ms = start.elapsed().as_millis(); + info!( + method = %method, + path = %path, + status = %status.as_u16(), + latency_ms = elapsed_ms, + "request" + ); + response +} + +pub fn load_asn_map_out(simplified: bool) -> Result<(HashMap, String), i32> { + let load_population = !simplified; + let load_hegemony = !simplified; + let load_peeringdb = !simplified; + + info!("loading asn info data ..."); + let mut commons = bgpkit_commons::BgpkitCommons::new(); + if let Err(e) = commons.load_asinfo(true, load_population, load_hegemony, load_peeringdb) { + error!("failed to load asn info data: {e}"); + return Err(1); + }; + if let Err(e) = commons.load_countries() { + error!("failed to load countries: {e}"); + return Err(2); + }; + let as_info_map = match commons.asinfo_all() { + Ok(map) => map, + Err(e) => { + error!("failed to get asinfo map: {e}"); + return Err(3); + } + }; + + // build enriched map with country_name + let mut out: HashMap = HashMap::with_capacity(as_info_map.len()); + for (asn, info) in as_info_map.iter() { + let country_name = commons + .country_by_code(&info.country) + .ok() + .flatten() + .map(|c| c.name) + .unwrap_or_default(); + out.insert( + *asn, + AsInfoOut { + inner: info.clone(), + country_name, + }, + ); + } + let updated_at = Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true); + + Ok((out, updated_at)) +} + +const MINIMUM_UPDATER_INTERVAL_SECS: u64 = 3600; + +pub fn start_updater( + map: Arc>>, + updated_at: Arc>, + refresh_secs: u64, + simplified: bool, +) -> JoinHandle<()> { + tokio::spawn(async move { + let interval = Duration::from_secs(refresh_secs.max(MINIMUM_UPDATER_INTERVAL_SECS)); // minimum 1 hour + loop { + sleep(interval).await; + info!("background updater: refreshing ASN data ..."); + match load_asn_map_out(simplified) { + Ok((new_map, ts)) => { + // Update both map and updated_at within a single critical section + // to avoid exposing an inconsistent state between them. + let mut map_guard = map.lock().unwrap_or_else(|poisoned| { + error!("background updater: map mutex is poisoned, recovering"); + poisoned.into_inner() + }); + let mut ts_guard = updated_at.lock().unwrap_or_else(|poisoned| { + error!("background updater: updated_at mutex is poisoned, recovering"); + poisoned.into_inner() + }); + *map_guard = new_map; + *ts_guard = ts; + info!("background updater: ASN data updated"); + } + Err(e) => { + error!("background updater: refresh failed with code {e}"); + } + } + } + }) +} + +async fn health(State(state): State) -> Json { + let updated_at = state + .updated_at + .lock() + .unwrap_or_else(|e| e.into_inner()) + .clone(); + Json(json!({ + "status": "ok", + "updatedAt": updated_at, + })) +} + +fn convert_to_legacy(list: Vec) -> Vec { + let mut out = Vec::with_capacity(list.len()); + for o in list.into_iter() { + let asn = o.inner.asn; + let as_name = o.inner.name.clone(); + let country_code = o.inner.country.clone(); + let country_name = o.country_name.clone(); + let org_id = o + .inner + .as2org + .as_ref() + .map(|v| v.org_id.clone()) + .unwrap_or_default(); + let org_name = o + .inner + .as2org + .as_ref() + .map(|v| v.org_name.clone()) + .unwrap_or_default(); + out.push(json!({ + "asn": asn, + "as_name": as_name, + "org_id": org_id, + "org_name": org_name, + "country_code": country_code, + "country_name": country_name, + "data_source": "", + })); + } + out +} + +async fn get_lookup( + State(state): State, + Query(q): Query, +) -> Result, (StatusCode, Json)> { + let asns: Vec = q + .asns + .clone() + .unwrap_or_default() + .split(',') + .filter_map(|s| s.trim().parse::().ok()) + .collect(); + + if asns.is_empty() { + return Err(( + StatusCode::BAD_REQUEST, + Json(json!({"error": "no valid ASNs provided in 'asns' query parameter"})), + )); + } + + if asns.len() > state.max_asns { + return Err(( + StatusCode::PAYLOAD_TOO_LARGE, + Json( + json!({"error": format!("payload too large, max ASNs per request is {}", state.max_asns)}), + ), + )); + } + + let map_guard = state.map.lock().map_err(|_| { + error!("get_lookup: map mutex is poisoned"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"error": "internal server error"})), + ) + })?; + + let mut found = Vec::with_capacity(asns.len()); + for asn in asns { + if let Some(info) = map_guard.get(&asn) { + found.push(info.clone()); + } + } + + let use_legacy = q.legacy.unwrap_or(false); + let results = if use_legacy { + json!(convert_to_legacy(found)) + } else { + json!(found) + }; + + Ok(Json(results)) +} + +async fn post_lookup( + State(state): State, + Json(body): Json, +) -> Result, (StatusCode, Json)> { + if body.asns.is_empty() { + return Err(( + StatusCode::BAD_REQUEST, + Json(json!({"error": "no ASNs provided in request body"})), + )); + } + if body.asns.len() > state.max_asns { + return Err(( + StatusCode::PAYLOAD_TOO_LARGE, + Json( + json!({"error": format!("payload too large, max ASNs per request is {}", state.max_asns)}), + ), + )); + } + + let map_guard = state.map.lock().map_err(|_| { + error!("post_lookup: map mutex is poisoned"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"error": "internal server error"})), + ) + })?; + + let mut found = Vec::with_capacity(body.asns.len()); + for asn in body.asns { + if let Some(info) = map_guard.get(&asn) { + found.push(info.clone()); + } + } + + Ok(Json(json!(found))) +} diff --git a/src/main.rs b/src/main.rs index d92078f..ea882cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,24 +15,49 @@ //! For Cloudflare R2 destination, `AWS_REGION` should be `auto`. use bgpkit_commons::asinfo::AsInfo; -use clap::Parser; +use clap::{Parser, Subcommand}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::fmt::{Display, Formatter}; +use std::net::SocketAddr; use std::process::exit; +use std::sync::{Arc, Mutex}; use tracing::{error, info}; +mod api; +use crate::api::{build_router, load_asn_map_out, start_updater, AppState}; + #[derive(Parser)] #[clap(author, version, about, long_about = None)] #[clap(propagate_version = true)] struct Cli { - /// Export data path - #[clap(default_value = "./asninfo.jsonl")] - path: String, + #[clap(subcommand)] + command: Commands, +} - /// Simplified format - #[clap(short, long)] - simplified: bool, +#[derive(Subcommand, Debug, Clone)] +enum Commands { + /// Generate ASN info dump file (JSON/JSONL/CSV) and optionally upload + Generate { + /// Export data path; determines format by extension (json, jsonl, csv) + #[clap(default_value = "./asninfo.jsonl")] + path: String, + /// Simplified format (also implied when CSV) + #[clap(short, long)] + simplified: bool, + }, + /// Serve an HTTP API for ASN info lookup + Serve { + /// Bind address, e.g., 0.0.0.0:8080 + #[clap(short, long, default_value = "0.0.0.0:8080")] + bind: String, + /// Refresh interval in seconds for background updates, default 21600 (6 hours) + #[clap(long, default_value_t = 21600)] + refresh_secs: u64, + /// Use simplified mode (skip heavy datasets); default false + #[clap(long, default_value_t = false)] + simplified: bool, + }, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -89,24 +114,44 @@ impl Display for ExportFormat { } } -fn main() { +#[tokio::main] +async fn main() { tracing_subscriber::fmt().with_ansi(false).init(); + dotenvy::dotenv().ok(); + let cli = Cli::parse(); - dotenvy::dotenv().ok(); + match cli.command { + Commands::Generate { path, simplified } => { + if let Err(code) = generate_cmd(&path, simplified) { + exit(code); + } + } + Commands::Serve { + bind, + refresh_secs, + simplified, + } => { + if let Err(code) = serve_cmd(&bind, refresh_secs, simplified).await { + exit(code); + } + } + } +} - let format: ExportFormat = if cli.path.contains(".jsonl") { +fn generate_cmd(path: &str, simplified_flag: bool) -> Result<(), i32> { + let format: ExportFormat = if path.contains(".jsonl") { ExportFormat::JSONL - } else if cli.path.contains(".csv") { + } else if path.contains(".csv") { ExportFormat::CSV - } else if cli.path.contains(".json") { + } else if path.contains(".json") { ExportFormat::JSON } else { error!("unknown format. please choose from csv, json, jsonl format"); - exit(1); + return Err(1); }; - let simplified = cli.simplified || matches!(format, ExportFormat::CSV); + let simplified = simplified_flag || matches!(format, ExportFormat::CSV); let load_population = !simplified; let load_hegemony = !simplified; @@ -116,18 +161,24 @@ fn main() { let mut commons = bgpkit_commons::BgpkitCommons::new(); if let Err(e) = commons.load_asinfo(true, load_population, load_hegemony, load_peeringdb) { error!("failed to load asn info data: {e}"); - exit(1); + return Err(1); }; if let Err(e) = commons.load_countries() { error!("failed to load countries: {e}"); - exit(2); + return Err(2); }; let as_info_map = commons.asinfo_all().expect("failed to get asinfo map"); info!("export format: {}", &format); - info!("writing asn info data to '{}' ...", &cli.path); - let mut writer = oneio::get_writer(&cli.path).unwrap(); + info!("writing asn info data to '{}' ...", &path); + let mut writer = match oneio::get_writer(&path) { + Ok(w) => w, + Err(e) => { + error!("failed to open writer for path '{}': {}", path, e); + return Err(1); + } + }; let mut info_vec = as_info_map.values().collect::>(); info_vec.sort_by(|a, b| a.asn.cmp(&b.asn)); @@ -148,10 +199,32 @@ fn main() { }; if matches!(format, ExportFormat::JSONL) { for as_info in values_vec { - writeln!(writer, "{}", serde_json::to_string(&as_info).unwrap()).unwrap(); + match serde_json::to_string(&as_info) { + Ok(s) => { + if writeln!(writer, "{}", s).is_err() { + error!("failed to write to file"); + return Err(1); + } + } + Err(e) => { + error!("failed to serialize AS info: {}", e); + return Err(1); + } + } } } else { - writeln!(writer, "{}", serde_json::to_string(&values_vec).unwrap()).unwrap(); + match serde_json::to_string(&values_vec) { + Ok(s) => { + if writeln!(writer, "{}", s).is_err() { + error!("failed to write to file"); + return Err(1); + } + } + Err(e) => { + error!("failed to serialize AS info vector: {}", e); + return Err(1); + } + } } } ExportFormat::CSV => { @@ -182,29 +255,70 @@ fn main() { drop(writer); if let Ok(upload_path) = std::env::var("ASNINFO_UPLOAD_PATH") { - info!("uploading {} to {} ...", &cli.path, upload_path); + info!("uploading {} to {} ...", &path, upload_path); if oneio::s3_env_check().is_err() { error!("S3 environment variables not set, skipping upload"); - exit(3); + return Err(3); } else { let (bucket, key) = oneio::s3_url_parse(&upload_path).unwrap(); - match oneio::s3_upload(&bucket, &key, &cli.path) { + match oneio::s3_upload(&bucket, &key, &path) { Ok(_) => { // try to do send a success message to if let Ok(heartbeat_url) = dotenvy::var("ASNINFO_HEARTBEAT_URL") { info!("sending heartbeat to configured URL"); if let Err(e) = oneio::read_to_string(&heartbeat_url) { error!("failed to send heartbeat: {e}"); - exit(4); + return Err(4); } } } Err(e) => { error!("failed to upload to destination ({upload_path}): {e}"); - exit(5); + return Err(5); } } } } info!("asninfo download done"); + Ok(()) +} + +async fn serve_cmd(bind: &str, refresh_secs: u64, simplified: bool) -> Result<(), i32> { + let (initial_map, updated_at_str) = load_asn_map_out(simplified)?; + let map = Arc::new(Mutex::new(initial_map)); + let updated_at = Arc::new(Mutex::new(updated_at_str)); + + // config: max ASNs per request (default 100) + let max_asns: usize = dotenvy::var("ASNINFO_MAX_ASNS") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(100); + + let state = AppState { + map: map.clone(), + updated_at: updated_at.clone(), + max_asns, + }; + + // start background updater + let _handle = start_updater(map.clone(), updated_at.clone(), refresh_secs, simplified); + + // build API router + let app = build_router(state); + + let addr: SocketAddr = bind.parse().map_err(|e| { + error!("invalid bind address {bind}: {e}"); + 6 + })?; + let listener = tokio::net::TcpListener::bind(addr).await.map_err(|e| { + error!("failed to bind {bind}: {e}"); + 6 + })?; + info!("serving on http://{}", addr); + axum::serve(listener, app).await.map_err(|e| { + error!("server error: {e}"); + 7 + })?; + + Ok(()) }