From eb4364f5dfc530b78ac2d3f602be057c83120865 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 17 Sep 2025 17:54:41 -0700 Subject: [PATCH 01/11] add HTTP API and background updates using Axum and Tokio - Introduced an `axum`-based HTTP API with `/lookup` endpoints for ASN information queries. - Added a `serve` command to start the HTTP server, including support for background data refresh. - Refactored commands to include both `generate` and `serve` subcommands. - Updated dependencies and locked versions in `Cargo.lock`. - Enhanced error handling with explicit exit codes. --- Cargo.lock | 113 +++++++++++++++++- Cargo.toml | 9 +- src/main.rs | 337 ++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 429 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcc71af..778882a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,12 +108,15 @@ dependencies = [ name = "asninfo" version = "0.3.4" dependencies = [ + "axum", "bgpkit-commons", + "chrono", "clap", "dotenvy", "oneio 0.19.0", "serde", "serde_json", + "tokio", "tracing", "tracing-subscriber", ] @@ -214,6 +217,60 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "axum" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.3.1", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +dependencies = [ + "bytes", + "futures-core", + "http 1.3.1", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -773,6 +830,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "hyper" version = "1.7.0" @@ -787,6 +850,7 @@ dependencies = [ "http 1.3.1", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -1098,6 +1162,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "maybe-async" version = "0.2.10" @@ -1706,18 +1776,28 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.225" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.225" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516" dependencies = [ "proc-macro2", "quote", @@ -1736,6 +1816,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2000,9 +2091,21 @@ dependencies = [ "pin-project-lite", "slab", "socket2", + "tokio-macros", "windows-sys 0.59.0", ] +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tokio-rustls" version = "0.26.2" @@ -2039,6 +2142,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -2077,6 +2181,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", diff --git a/Cargo.toml b/Cargo.toml index ef0f3cc..a93ccec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,9 @@ oneio = { version = "0.19.0", default-features = false, features = ["https", "s3 tracing-subscriber = "0.3" tracing = "0.1" clap = { version = "4.5", features = ["derive"] } - -dotenvy = "0.15.7" -serde = { version = "1.0.210", features = ["derive"] } \ No newline at end of file +axum = { version = "0.8"} + tokio = { version = "1.39", features = ["macros", "rt-multi-thread", "time", "net"] } + chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "std"] } + + dotenvy = "0.15.7" + serde = { version = "1.0.210", features = ["derive"] } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index d92078f..92083b6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,25 +14,54 @@ //! //! For Cloudflare R2 destination, `AWS_REGION` should be `auto`. +use axum::{extract::Query, routing::get, Json, Router}; use bgpkit_commons::asinfo::AsInfo; -use clap::Parser; +use chrono::{SecondsFormat, Utc}; +use clap::{Parser, Subcommand}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; +use std::collections::HashMap; use std::fmt::{Display, Formatter}; +use std::net::SocketAddr; use std::process::exit; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tokio::task::JoinHandle; +use tokio::time::sleep; use tracing::{error, info}; #[derive(Parser)] #[clap(author, version, about, long_about = None)] #[clap(propagate_version = true)] struct Cli { - /// Export data path - #[clap(default_value = "./asninfo.jsonl")] - path: String, + #[clap(subcommand)] + command: Commands, +} - /// Simplified format - #[clap(short, long)] - simplified: bool, +#[derive(Subcommand, Debug, Clone)] +enum Commands { + /// Generate ASN info dump file (JSON/JSONL/CSV) and optionally upload + Generate { + /// Export data path; determines format by extension (json, jsonl, csv) + #[clap(default_value = "./asninfo.jsonl")] + path: String, + /// Simplified format (also implied when CSV) + #[clap(short, long)] + simplified: bool, + }, + /// Serve an HTTP API for ASN info lookup + Serve { + /// Bind address, e.g., 0.0.0.0:8080 + #[clap(short, long, default_value = "0.0.0.0:8080")] + bind: String, + /// Refresh interval in seconds for background updates + #[clap(long, default_value_t = 21600)] + refresh_secs: u64, + /// Use simplified mode (skip heavy datasets); default false + #[clap(long, default_value_t = false)] + simplified: bool, + }, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -46,6 +75,24 @@ pub struct AsInfoSimplified { pub data_source: String, } +#[derive(Debug, Clone, Serialize, Deserialize)] +struct AsInfoOut { + #[serde(flatten)] + inner: AsInfo, + #[serde(rename = "country_name")] + country_name: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct LookupResponse { + data: Vec, + count: usize, + #[serde(rename = "updatedAt")] + updated_at: String, + page: usize, + page_size: usize, +} + impl From<&AsInfo> for AsInfoSimplified { fn from(value: &AsInfo) -> Self { let (org_id, org_name) = match &value.as2org { @@ -89,24 +136,44 @@ impl Display for ExportFormat { } } -fn main() { +#[tokio::main] +async fn main() { tracing_subscriber::fmt().with_ansi(false).init(); + dotenvy::dotenv().ok(); + let cli = Cli::parse(); - dotenvy::dotenv().ok(); + match cli.command { + Commands::Generate { path, simplified } => { + if let Err(code) = generate_cmd(&path, simplified) { + exit(code); + } + } + Commands::Serve { + bind, + refresh_secs, + simplified, + } => { + if let Err(code) = serve_cmd(&bind, refresh_secs, simplified).await { + exit(code); + } + } + } +} - let format: ExportFormat = if cli.path.contains(".jsonl") { +fn generate_cmd(path: &str, simplified_flag: bool) -> Result<(), i32> { + let format: ExportFormat = if path.contains(".jsonl") { ExportFormat::JSONL - } else if cli.path.contains(".csv") { + } else if path.contains(".csv") { ExportFormat::CSV - } else if cli.path.contains(".json") { + } else if path.contains(".json") { ExportFormat::JSON } else { error!("unknown format. please choose from csv, json, jsonl format"); - exit(1); + return Err(1); }; - let simplified = cli.simplified || matches!(format, ExportFormat::CSV); + let simplified = simplified_flag || matches!(format, ExportFormat::CSV); let load_population = !simplified; let load_hegemony = !simplified; @@ -116,18 +183,18 @@ fn main() { let mut commons = bgpkit_commons::BgpkitCommons::new(); if let Err(e) = commons.load_asinfo(true, load_population, load_hegemony, load_peeringdb) { error!("failed to load asn info data: {e}"); - exit(1); + return Err(1); }; if let Err(e) = commons.load_countries() { error!("failed to load countries: {e}"); - exit(2); + return Err(2); }; let as_info_map = commons.asinfo_all().expect("failed to get asinfo map"); info!("export format: {}", &format); - info!("writing asn info data to '{}' ...", &cli.path); - let mut writer = oneio::get_writer(&cli.path).unwrap(); + info!("writing asn info data to '{}' ...", &path); + let mut writer = oneio::get_writer(&path).unwrap(); let mut info_vec = as_info_map.values().collect::>(); info_vec.sort_by(|a, b| a.asn.cmp(&b.asn)); @@ -182,29 +249,253 @@ fn main() { drop(writer); if let Ok(upload_path) = std::env::var("ASNINFO_UPLOAD_PATH") { - info!("uploading {} to {} ...", &cli.path, upload_path); + info!("uploading {} to {} ...", &path, upload_path); if oneio::s3_env_check().is_err() { error!("S3 environment variables not set, skipping upload"); - exit(3); + return Err(3); } else { let (bucket, key) = oneio::s3_url_parse(&upload_path).unwrap(); - match oneio::s3_upload(&bucket, &key, &cli.path) { + match oneio::s3_upload(&bucket, &key, &path) { Ok(_) => { // try to do send a success message to if let Ok(heartbeat_url) = dotenvy::var("ASNINFO_HEARTBEAT_URL") { info!("sending heartbeat to configured URL"); if let Err(e) = oneio::read_to_string(&heartbeat_url) { error!("failed to send heartbeat: {e}"); - exit(4); + return Err(4); } } } Err(e) => { error!("failed to upload to destination ({upload_path}): {e}"); - exit(5); + return Err(5); } } } } info!("asninfo download done"); + Ok(()) +} + +// ==================== Serve command implementation ==================== +#[derive(Clone)] +struct AppState { + map: Arc>>, + updated_at: Arc>, +} + +#[derive(Deserialize)] +struct LookupQuery { + asns: Option, + legacy: Option, +} + +#[derive(Deserialize)] +struct LookupBody { + asns: Vec, +} + +async fn serve_cmd(bind: &str, refresh_secs: u64, simplified: bool) -> Result<(), i32> { + let (initial_map, updated_at_str) = load_asn_map_out(simplified)?; + let map = Arc::new(Mutex::new(initial_map)); + let updated_at = Arc::new(Mutex::new(updated_at_str)); + let state = AppState { + map: map.clone(), + updated_at: updated_at.clone(), + }; + + // start background updater + let _handle = start_updater(map.clone(), updated_at.clone(), refresh_secs, simplified); + + // set up routes + let app = Router::new() + .route("/lookup", get(get_lookup).post(post_lookup)) + .with_state(state); + + let addr: SocketAddr = bind.parse().map_err(|e| { + error!("invalid bind address {bind}: {e}"); + 6 + })?; + let listener = tokio::net::TcpListener::bind(addr).await.map_err(|e| { + error!("failed to bind {bind}: {e}"); + 6 + })?; + info!("serving on http://{}", addr); + axum::serve(listener, app).await.map_err(|e| { + error!("server error: {e}"); + 7 + })?; + + Ok(()) +} + +fn load_asn_map_out(simplified: bool) -> Result<(HashMap, String), i32> { + let load_population = !simplified; + let load_hegemony = !simplified; + let load_peeringdb = !simplified; + + info!("loading asn info data ..."); + let mut commons = bgpkit_commons::BgpkitCommons::new(); + if let Err(e) = commons.load_asinfo(true, load_population, load_hegemony, load_peeringdb) { + error!("failed to load asn info data: {e}"); + return Err(1); + }; + if let Err(e) = commons.load_countries() { + error!("failed to load countries: {e}"); + return Err(2); + }; + let as_info_map = commons.asinfo_all().expect("failed to get asinfo map"); + + // build enriched map with country_name + let mut out: HashMap = HashMap::with_capacity(as_info_map.len()); + for (asn, info) in as_info_map.iter() { + let country_name = commons + .country_by_code(&info.country) + .ok() + .flatten() + .map(|c| c.name) + .unwrap_or_default(); + out.insert( + *asn, + AsInfoOut { + inner: info.clone(), + country_name, + }, + ); + } + let updated_at = Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true); + + Ok((out, updated_at)) +} + +fn start_updater( + map: Arc>>, + updated_at: Arc>, + refresh_secs: u64, + simplified: bool, +) -> JoinHandle<()> { + tokio::spawn(async move { + let interval = Duration::from_secs(refresh_secs.max(60)); // minimum 60s + loop { + sleep(interval).await; + info!("background updater: refreshing ASN data ..."); + match load_asn_map_out(simplified) { + Ok((new_map, ts)) => { + { + let mut guard = map.lock().unwrap(); + *guard = new_map; + } + { + let mut ts_guard = updated_at.lock().unwrap(); + *ts_guard = ts; + } + info!("background updater: ASN data updated"); + } + Err(e) => { + error!("background updater: refresh failed with code {e}"); + } + } + } + }) +} + +use axum::extract::State; + +async fn get_lookup(State(state): State, Query(q): Query) -> Json { + let asns: Vec = q + .asns + .clone() + .unwrap_or_default() + .split(',') + .filter_map(|s| u32::from_str(s.trim()).ok()) + .collect(); + let legacy = q.legacy.unwrap_or(false); + let data_full = lookup(&state, &asns); + let updated_at = state.updated_at.lock().unwrap().clone(); + + let data_values: Vec = if legacy { + data_full + .into_iter() + .map(|o| { + let (org_id, org_name) = match &o.inner.as2org { + None => ("".to_string(), "".to_string()), + Some(v) => (v.org_id.clone(), v.org_name.clone()), + }; + json!({ + "asn": o.inner.asn, + "as_name": o.inner.name, + "org_id": org_id, + "org_name": org_name, + "country_code": o.inner.country, + "country_name": o.country_name, + "data_source": "", + }) + }) + .collect() + } else { + data_full.into_iter().map(|o| json!(o)).collect() + }; + + let resp = json!({ + "data": data_values, + "count": data_values.len(), + "updatedAt": updated_at, + "page": 0, + "page_size": asns.len(), + }); + Json(resp) +} + +async fn post_lookup( + State(state): State, + Query(q): Query, + Json(body): Json, +) -> Json { + let legacy = q.legacy.unwrap_or(false); + let asns = body.asns; + let data_full = lookup(&state, &asns); + let updated_at = state.updated_at.lock().unwrap().clone(); + + let data_values: Vec = if legacy { + data_full + .into_iter() + .map(|o| { + let (org_id, org_name) = match &o.inner.as2org { + None => ("".to_string(), "".to_string()), + Some(v) => (v.org_id.clone(), v.org_name.clone()), + }; + json!({ + "asn": o.inner.asn, + "as_name": o.inner.name, + "org_id": org_id, + "org_name": org_name, + "country_code": o.inner.country, + "country_name": o.country_name, + "data_source": "", + }) + }) + .collect() + } else { + data_full.into_iter().map(|o| json!(o)).collect() + }; + + let resp = json!({ + "data": data_values, + "count": data_values.len(), + "updatedAt": updated_at, + "page": 0, + "page_size": asns.len(), + }); + Json(resp) +} + +fn lookup(state: &AppState, asns: &[u32]) -> Vec { + let map = state.map.lock().unwrap(); + let mut res = Vec::with_capacity(asns.len()); + for asn in asns { + if let Some(info) = map.get(asn) { + res.push(info.clone()); + } + } + res } From 009c3c2629c3996e18b1746b203f018b0b26cc57 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Fri, 19 Sep 2025 12:07:01 -0700 Subject: [PATCH 02/11] refactor HTTP API: modularize into `api.rs` and enhance CORS support - Moved HTTP API logic to `src/api.rs` for better modularity and clarity. - Added CORS support with `tower-http` for improved cross-origin compatibility. - Decluttered `src/main.rs` by extracting reusable components. - Updated dependency list in `Cargo.toml` and `Cargo.lock`. --- Cargo.lock | 2 + Cargo.toml | 10 +- src/api.rs | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 210 +++--------------------------------------- 4 files changed, 284 insertions(+), 199 deletions(-) create mode 100644 src/api.rs diff --git a/Cargo.lock b/Cargo.lock index 778882a..0147236 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,10 +113,12 @@ dependencies = [ "chrono", "clap", "dotenvy", + "http 1.3.1", "oneio 0.19.0", "serde", "serde_json", "tokio", + "tower-http", "tracing", "tracing-subscriber", ] diff --git a/Cargo.toml b/Cargo.toml index a93ccec..792700d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,10 @@ tracing-subscriber = "0.3" tracing = "0.1" clap = { version = "4.5", features = ["derive"] } axum = { version = "0.8"} - tokio = { version = "1.39", features = ["macros", "rt-multi-thread", "time", "net"] } - chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "std"] } +tokio = { version = "1.39", features = ["macros", "rt-multi-thread", "time", "net"] } +chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "std"] } - dotenvy = "0.15.7" - serde = { version = "1.0.210", features = ["derive"] } \ No newline at end of file +dotenvy = "0.15.7" +serde = { version = "1.0.210", features = ["derive"] } +http = "1" +tower-http = { version = "0.6", features = ["cors"] } \ No newline at end of file diff --git a/src/api.rs b/src/api.rs new file mode 100644 index 0000000..008a037 --- /dev/null +++ b/src/api.rs @@ -0,0 +1,261 @@ +use axum::{ + extract::{Query, State}, + http::{Method, StatusCode}, + routing::get, + Json, Router, +}; +use bgpkit_commons::asinfo::AsInfo; +use chrono::{SecondsFormat, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tokio::task::JoinHandle; +use tokio::time::sleep; +use tower_http::cors::{Any, CorsLayer}; +use tracing::{error, info}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AsInfoOut { + #[serde(flatten)] + pub inner: AsInfo, + #[serde(rename = "country_name")] + pub country_name: String, +} + +#[derive(Clone)] +pub struct AppState { + pub map: Arc>>, + pub updated_at: Arc>, + pub max_asns: usize, +} + +#[derive(Deserialize)] +struct LookupQuery { + asns: Option, + legacy: Option, +} + +#[derive(Deserialize)] +struct LookupBody { + asns: Vec, +} + +pub fn build_router(state: AppState) -> Router { + let cors = CorsLayer::new() + .allow_origin(Any) + .allow_methods([Method::GET, Method::POST]) + .allow_headers(Any); + + Router::new() + .route("/lookup", get(get_lookup).post(post_lookup)) + .route("/health", get(health)) + .with_state(state) + .layer(cors) +} + +pub fn load_asn_map_out(simplified: bool) -> Result<(HashMap, String), i32> { + let load_population = !simplified; + let load_hegemony = !simplified; + let load_peeringdb = !simplified; + + info!("loading asn info data ..."); + let mut commons = bgpkit_commons::BgpkitCommons::new(); + if let Err(e) = commons.load_asinfo(true, load_population, load_hegemony, load_peeringdb) { + error!("failed to load asn info data: {e}"); + return Err(1); + }; + if let Err(e) = commons.load_countries() { + error!("failed to load countries: {e}"); + return Err(2); + }; + let as_info_map = commons.asinfo_all().expect("failed to get asinfo map"); + + // build enriched map with country_name + let mut out: HashMap = HashMap::with_capacity(as_info_map.len()); + for (asn, info) in as_info_map.iter() { + let country_name = commons + .country_by_code(&info.country) + .ok() + .flatten() + .map(|c| c.name) + .unwrap_or_default(); + out.insert( + *asn, + AsInfoOut { + inner: info.clone(), + country_name, + }, + ); + } + let updated_at = Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true); + + Ok((out, updated_at)) +} + +pub fn start_updater( + map: Arc>>, + updated_at: Arc>, + refresh_secs: u64, + simplified: bool, +) -> JoinHandle<()> { + tokio::spawn(async move { + let interval = Duration::from_secs(refresh_secs.max(3600)); // minimum 1 hour + loop { + sleep(interval).await; + info!("background updater: refreshing ASN data ..."); + match load_asn_map_out(simplified) { + Ok((new_map, ts)) => { + { + let mut guard = map.lock().unwrap(); + *guard = new_map; + } + { + let mut ts_guard = updated_at.lock().unwrap(); + *ts_guard = ts; + } + info!("background updater: ASN data updated"); + } + Err(e) => { + error!("background updater: refresh failed with code {e}"); + } + } + } + }) +} + +async fn health(State(state): State) -> Json { + let updated_at = state.updated_at.lock().unwrap().clone(); + Json(json!({ + "status": "ok", + "updatedAt": updated_at, + })) +} + +fn convert_to_legacy(list: Vec) -> Vec { + let mut out = Vec::with_capacity(list.len()); + for o in list.into_iter() { + let asn = o.inner.asn; + let as_name = o.inner.name.clone(); + let country_code = o.inner.country.clone(); + let country_name = o.country_name.clone(); + let org_id = o + .inner + .as2org + .as_ref() + .map(|v| v.org_id.clone()) + .unwrap_or_default(); + let org_name = o + .inner + .as2org + .as_ref() + .map(|v| v.org_name.clone()) + .unwrap_or_default(); + out.push(json!({ + "asn": asn, + "as_name": as_name, + "org_id": org_id, + "org_name": org_name, + "country_code": country_code, + "country_name": country_name, + "data_source": "", + })); + } + out +} + +async fn get_lookup( + State(state): State, + Query(q): Query, +) -> Result, StatusCode> { + let asns: Vec = q + .asns + .clone() + .unwrap_or_default() + .split(',') + .filter_map(|s| s.trim().parse::().ok()) + .collect(); + + if asns.is_empty() { + error!("/lookup GET: empty or invalid 'asns' query param"); + return Err(StatusCode::BAD_REQUEST); + } + if asns.len() > state.max_asns { + error!( + "/lookup GET: too many ASNs: {} > {}", + asns.len(), + state.max_asns + ); + return Err(StatusCode::PAYLOAD_TOO_LARGE); + } + + let legacy = q.legacy.unwrap_or(false); + let data_full = lookup(&state, &asns); + let updated_at = state.updated_at.lock().unwrap().clone(); + + let data_values: Vec = if legacy { + convert_to_legacy(data_full) + } else { + data_full.into_iter().map(|o| json!(o)).collect() + }; + + let resp = json!({ + "data": data_values, + "count": data_values.len(), + "updatedAt": updated_at, + "page": 0, + "page_size": asns.len(), + }); + Ok(Json(resp)) +} + +async fn post_lookup( + State(state): State, + Query(q): Query, + Json(body): Json, +) -> Result, StatusCode> { + let legacy = q.legacy.unwrap_or(false); + let asns = body.asns; + if asns.is_empty() { + error!("/lookup POST: empty asns body"); + return Err(StatusCode::BAD_REQUEST); + } + if asns.len() > state.max_asns { + error!( + "/lookup POST: too many ASNs: {} > {}", + asns.len(), + state.max_asns + ); + return Err(StatusCode::PAYLOAD_TOO_LARGE); + } + + let data_full = lookup(&state, &asns); + let updated_at = state.updated_at.lock().unwrap().clone(); + + let data_values: Vec = if legacy { + convert_to_legacy(data_full) + } else { + data_full.into_iter().map(|o| json!(o)).collect() + }; + + let resp = json!({ + "data": data_values, + "count": data_values.len(), + "updatedAt": updated_at, + "page": 0, + "page_size": asns.len(), + }); + Ok(Json(resp)) +} + +fn lookup(state: &AppState, asns: &[u32]) -> Vec { + let map = state.map.lock().unwrap(); + let mut res = Vec::with_capacity(asns.len()); + for asn in asns { + if let Some(info) = map.get(asn) { + res.push(info.clone()); + } + } + res +} diff --git a/src/main.rs b/src/main.rs index 92083b6..647a80e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,23 +14,21 @@ //! //! For Cloudflare R2 destination, `AWS_REGION` should be `auto`. -use axum::{extract::Query, routing::get, Json, Router}; +use axum::serve; use bgpkit_commons::asinfo::AsInfo; -use chrono::{SecondsFormat, Utc}; use clap::{Parser, Subcommand}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::net::SocketAddr; use std::process::exit; use std::str::FromStr; use std::sync::{Arc, Mutex}; -use std::time::Duration; -use tokio::task::JoinHandle; -use tokio::time::sleep; use tracing::{error, info}; +mod api; +use crate::api::{build_router, load_asn_map_out, start_updater, AppState}; + #[derive(Parser)] #[clap(author, version, about, long_about = None)] #[clap(propagate_version = true)] @@ -55,7 +53,7 @@ enum Commands { /// Bind address, e.g., 0.0.0.0:8080 #[clap(short, long, default_value = "0.0.0.0:8080")] bind: String, - /// Refresh interval in seconds for background updates + /// Refresh interval in seconds for background updates, default 21600 (6 hours) #[clap(long, default_value_t = 21600)] refresh_secs: u64, /// Use simplified mode (skip heavy datasets); default false @@ -75,14 +73,6 @@ pub struct AsInfoSimplified { pub data_source: String, } -#[derive(Debug, Clone, Serialize, Deserialize)] -struct AsInfoOut { - #[serde(flatten)] - inner: AsInfo, - #[serde(rename = "country_name")] - country_name: String, -} - #[derive(Debug, Clone, Serialize, Deserialize)] struct LookupResponse { data: Vec, @@ -278,11 +268,6 @@ fn generate_cmd(path: &str, simplified_flag: bool) -> Result<(), i32> { } // ==================== Serve command implementation ==================== -#[derive(Clone)] -struct AppState { - map: Arc>>, - updated_at: Arc>, -} #[derive(Deserialize)] struct LookupQuery { @@ -299,18 +284,24 @@ async fn serve_cmd(bind: &str, refresh_secs: u64, simplified: bool) -> Result<() let (initial_map, updated_at_str) = load_asn_map_out(simplified)?; let map = Arc::new(Mutex::new(initial_map)); let updated_at = Arc::new(Mutex::new(updated_at_str)); + + // config: max ASNs per request (default 100) + let max_asns: usize = dotenvy::var("ASNINFO_MAX_ASNS") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(100); + let state = AppState { map: map.clone(), updated_at: updated_at.clone(), + max_asns, }; // start background updater let _handle = start_updater(map.clone(), updated_at.clone(), refresh_secs, simplified); - // set up routes - let app = Router::new() - .route("/lookup", get(get_lookup).post(post_lookup)) - .with_state(state); + // build API router + let app = build_router(state); let addr: SocketAddr = bind.parse().map_err(|e| { error!("invalid bind address {bind}: {e}"); @@ -328,174 +319,3 @@ async fn serve_cmd(bind: &str, refresh_secs: u64, simplified: bool) -> Result<() Ok(()) } - -fn load_asn_map_out(simplified: bool) -> Result<(HashMap, String), i32> { - let load_population = !simplified; - let load_hegemony = !simplified; - let load_peeringdb = !simplified; - - info!("loading asn info data ..."); - let mut commons = bgpkit_commons::BgpkitCommons::new(); - if let Err(e) = commons.load_asinfo(true, load_population, load_hegemony, load_peeringdb) { - error!("failed to load asn info data: {e}"); - return Err(1); - }; - if let Err(e) = commons.load_countries() { - error!("failed to load countries: {e}"); - return Err(2); - }; - let as_info_map = commons.asinfo_all().expect("failed to get asinfo map"); - - // build enriched map with country_name - let mut out: HashMap = HashMap::with_capacity(as_info_map.len()); - for (asn, info) in as_info_map.iter() { - let country_name = commons - .country_by_code(&info.country) - .ok() - .flatten() - .map(|c| c.name) - .unwrap_or_default(); - out.insert( - *asn, - AsInfoOut { - inner: info.clone(), - country_name, - }, - ); - } - let updated_at = Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true); - - Ok((out, updated_at)) -} - -fn start_updater( - map: Arc>>, - updated_at: Arc>, - refresh_secs: u64, - simplified: bool, -) -> JoinHandle<()> { - tokio::spawn(async move { - let interval = Duration::from_secs(refresh_secs.max(60)); // minimum 60s - loop { - sleep(interval).await; - info!("background updater: refreshing ASN data ..."); - match load_asn_map_out(simplified) { - Ok((new_map, ts)) => { - { - let mut guard = map.lock().unwrap(); - *guard = new_map; - } - { - let mut ts_guard = updated_at.lock().unwrap(); - *ts_guard = ts; - } - info!("background updater: ASN data updated"); - } - Err(e) => { - error!("background updater: refresh failed with code {e}"); - } - } - } - }) -} - -use axum::extract::State; - -async fn get_lookup(State(state): State, Query(q): Query) -> Json { - let asns: Vec = q - .asns - .clone() - .unwrap_or_default() - .split(',') - .filter_map(|s| u32::from_str(s.trim()).ok()) - .collect(); - let legacy = q.legacy.unwrap_or(false); - let data_full = lookup(&state, &asns); - let updated_at = state.updated_at.lock().unwrap().clone(); - - let data_values: Vec = if legacy { - data_full - .into_iter() - .map(|o| { - let (org_id, org_name) = match &o.inner.as2org { - None => ("".to_string(), "".to_string()), - Some(v) => (v.org_id.clone(), v.org_name.clone()), - }; - json!({ - "asn": o.inner.asn, - "as_name": o.inner.name, - "org_id": org_id, - "org_name": org_name, - "country_code": o.inner.country, - "country_name": o.country_name, - "data_source": "", - }) - }) - .collect() - } else { - data_full.into_iter().map(|o| json!(o)).collect() - }; - - let resp = json!({ - "data": data_values, - "count": data_values.len(), - "updatedAt": updated_at, - "page": 0, - "page_size": asns.len(), - }); - Json(resp) -} - -async fn post_lookup( - State(state): State, - Query(q): Query, - Json(body): Json, -) -> Json { - let legacy = q.legacy.unwrap_or(false); - let asns = body.asns; - let data_full = lookup(&state, &asns); - let updated_at = state.updated_at.lock().unwrap().clone(); - - let data_values: Vec = if legacy { - data_full - .into_iter() - .map(|o| { - let (org_id, org_name) = match &o.inner.as2org { - None => ("".to_string(), "".to_string()), - Some(v) => (v.org_id.clone(), v.org_name.clone()), - }; - json!({ - "asn": o.inner.asn, - "as_name": o.inner.name, - "org_id": org_id, - "org_name": org_name, - "country_code": o.inner.country, - "country_name": o.country_name, - "data_source": "", - }) - }) - .collect() - } else { - data_full.into_iter().map(|o| json!(o)).collect() - }; - - let resp = json!({ - "data": data_values, - "count": data_values.len(), - "updatedAt": updated_at, - "page": 0, - "page_size": asns.len(), - }); - Json(resp) -} - -fn lookup(state: &AppState, asns: &[u32]) -> Vec { - let map = state.map.lock().unwrap(); - let mut res = Vec::with_capacity(asns.len()); - for asn in asns { - if let Some(info) = map.get(asn) { - res.push(info.clone()); - } - } - res -} From ce6ad12fb4d57be9beb44f11980cf91f60d0a4a1 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Fri, 26 Sep 2025 14:47:13 -0700 Subject: [PATCH 03/11] log middleware: add request logging for all routes except `/health` - Introduced a middleware to log request details such as method, path, status, and latency. - Excluded `/health` route from logging for reduced noise in logs. --- src/api.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/api.rs b/src/api.rs index 008a037..2efab5c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1,6 +1,8 @@ use axum::{ - extract::{Query, State}, + extract::{Query, Request as AxumRequest, State}, http::{Method, StatusCode}, + middleware::{self, Next}, + response::Response, routing::get, Json, Router, }; @@ -52,9 +54,32 @@ pub fn build_router(state: AppState) -> Router { .route("/lookup", get(get_lookup).post(post_lookup)) .route("/health", get(health)) .with_state(state) + // log all requests except /health + .layer(middleware::from_fn(log_requests)) .layer(cors) } +// Middleware to log requests, skipping /health +async fn log_requests(req: AxumRequest, next: Next) -> Response { + let path = req.uri().path().to_string(); + if path == "/health" { + return next.run(req).await; + } + let method = req.method().clone(); + let start = std::time::Instant::now(); + let response = next.run(req).await; + let status = response.status(); + let elapsed_ms = start.elapsed().as_millis(); + info!( + method = %method, + path = %path, + status = %status.as_u16(), + latency_ms = elapsed_ms, + "request" + ); + response +} + pub fn load_asn_map_out(simplified: bool) -> Result<(HashMap, String), i32> { let load_population = !simplified; let load_hegemony = !simplified; From 908d20f1091ce686c8a81fcc213e8942cc646b40 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Fri, 26 Sep 2025 14:47:28 -0700 Subject: [PATCH 04/11] remove unused code related to `serve` command --- src/main.rs | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/main.rs b/src/main.rs index 647a80e..6f1640a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,6 @@ //! //! For Cloudflare R2 destination, `AWS_REGION` should be `auto`. -use axum::serve; use bgpkit_commons::asinfo::AsInfo; use clap::{Parser, Subcommand}; use serde::{Deserialize, Serialize}; @@ -22,7 +21,6 @@ use serde_json::{json, Value}; use std::fmt::{Display, Formatter}; use std::net::SocketAddr; use std::process::exit; -use std::str::FromStr; use std::sync::{Arc, Mutex}; use tracing::{error, info}; @@ -267,19 +265,6 @@ fn generate_cmd(path: &str, simplified_flag: bool) -> Result<(), i32> { Ok(()) } -// ==================== Serve command implementation ==================== - -#[derive(Deserialize)] -struct LookupQuery { - asns: Option, - legacy: Option, -} - -#[derive(Deserialize)] -struct LookupBody { - asns: Vec, -} - async fn serve_cmd(bind: &str, refresh_secs: u64, simplified: bool) -> Result<(), i32> { let (initial_map, updated_at_str) = load_asn_map_out(simplified)?; let map = Arc::new(Mutex::new(initial_map)); From c9a2096512338e6506ca9aa123fe818795dfba9c Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Fri, 26 Sep 2025 14:47:37 -0700 Subject: [PATCH 05/11] update CHANGELOG and README for HTTP API, `serve` improvements, and Docker examples - Documented new `serve` subcommand with HTTP API endpoints, request limits, and background refresh. - Expanded usage instructions and examples, including `.env` support and Docker setup. - Clarified CSV export schema and data simplifications. --- CHANGELOG.md | 14 +++ README.md | 255 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 255 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d530b0f..1340ce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ All notable changes to this project will be documented in this file. +## Unreleased - 2025-09-26 + +### Added +- New HTTP API server: `asninfo serve` subcommand with CORS enabled and background refresh. +- Lookup endpoints: `GET /lookup` and `POST /lookup` with optional `legacy=true` response. +- Health endpoint: `GET /health`. +- Environment variable `ASNINFO_MAX_ASNS` to cap ASNs per request (default 100). +- Request logging for HTTP server; requests to `/health` are excluded to reduce noise. + +### Changed +- Documentation: README expanded with commands, HTTP API usage, Docker examples, and environment variables. +- Clarified that CSV exports always use the simplified schema; `--simplified` flag implies reduced dataset (skips population, hegemony, and PeeringDB data). +- Docker README: added examples showing how to pass a local .env using `--env-file` or by bind-mounting to `/asninfo/.env`. + ## v0.3.4 - 2025-09-10 * update `bgpkit-commons` to `v0.9.4` to support older Rust versions diff --git a/README.md b/README.md index 58d9084..deaf209 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ -# ASN Information Data Uploader +# ASN Information Tool (exporter and HTTP API) -Fetch the latest ASN information and export to JSON, JSONL, or CSV. Optionally upload to an S3-compatible target when -configured via environment variables. +Export up-to-date ASN information to JSON, JSONL, or CSV files, and optionally upload to an S3-compatible target. +You can also run a lightweight HTTP API server to perform ASN info lookups. + +- Export formats: JSON, JSONL, CSV (CSV uses a simplified schema) +- Optional upload to S3/R2 via environment variables (no CLI flag needed) +- HTTP API with GET/POST lookup endpoints and CORS enabled +- .env files supported via dotenv ## Install @@ -24,25 +29,209 @@ cargo install cargo-binstall cargo binstall asninfo ``` -## Usage +## Commands + +The CLI provides two subcommands: generate and serve. ```shell -Usage: asninfo [OPTIONS] [PATH] +asninfo generate [OPTIONS] [PATH] + +Options: + -s, --simplified Export simplified fields (implied for .csv) Arguments: - [PATH] Export data path [default: ./asninfo.jsonl] - Format is inferred from file extension: .json, .jsonl, or .csv + [PATH] Export data path (default: ./asninfo.jsonl) + Format is inferred from file extension: .json, .jsonl, or .csv +``` + +```shell +asninfo serve [OPTIONS] Options: - -s, --simplified Export simplified fields (implied for .csv) - -h, --help Print help - -V, --version Print version + -b, --bind Bind address (default: 0.0.0.0:8080) + --refresh-secs Background refresh interval in seconds (default: 21600) + --simplified Use simplified mode (skip heavy datasets) +``` + +### Examples + +- Export JSONL with full fields: + +```bash +asninfo generate ./asninfo.jsonl +# same as "asninfo generate" +``` + +- Export CSV (simplified schema is implied): + +```bash +asninfo generate ./asninfo.csv +``` + +- Export simplified JSON (smaller payload): + +```bash +asninfo generate -s ./asninfo.json +``` + +- Upload automatically to S3/R2 by setting environment variables: + +```bash +export ASNINFO_UPLOAD_PATH="r2://my-bucket/asn/asninfo.jsonl" +export AWS_REGION="auto" # for Cloudflare R2 +export AWS_ENDPOINT="https://.r2.cloudflarestorage.com" +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... + +asninfo generate ./asninfo.jsonl +``` + +## HTTP API + +Start the server: + +```bash +asninfo serve --bind 0.0.0.0:8080 --refresh-secs 21600 +``` + +- Background updater refreshes the in-memory dataset every refresh-secs seconds (minimum 3600). +- CORS is enabled for all origins. +- Simplified mode reduces memory footprint by skipping heavy datasets (population, hegemony, PeeringDB). +- The maximum number of ASNs per request is limited by the environment variable ASNINFO_MAX_ASNS (default 100). + +### Endpoints + +- GET /health + - Returns status and metadata, including updatedAt timestamp. + +- GET /lookup?asns=AS1,AS2,...[&legacy=true] + - Query parameter asns is a comma-separated list of ASNs. + - Optional legacy=true to return a legacy array of objects instead of the structured response. + +- POST /lookup?legacy=true + - JSON body: { "asns": [number, ...] } + - Optional legacy=true query parameter as above. + +### Responses + +Default structured response: + +```json +{ + "data": [ + { + "asn": 13335, + "name": "CLOUDFLARENET", + "country": "US", + "country_name": "United States" + } + ], + "count": 1, + "updatedAt": "2025-01-01T00:00:00.000Z", + "page": 1, + "page_size": 100 +} +``` + +Legacy response (when legacy=true) returns an array of objects compatible with the previous consumer format. + +### Example requests + +```bash +# GET +curl 'http://localhost:8080/lookup?asns=13335,15169' + +# POST +curl -X POST 'http://localhost:8080/lookup' \ + -H 'Content-Type: application/json' \ + -d '{"asns":[13335,15169]}' +``` + +```json +{ + "count": 2, + "data": [ + { + "as2org": { + "country": "US", + "name": "CLOUDFLARENET", + "org_id": "CLOUD14-ARIN", + "org_name": "Cloudflare, Inc." + }, + "asn": 13335, + "country": "US", + "country_name": "United States", + "hegemony": { + "asn": 13335, + "ipv4": 0.0017993252336435785, + "ipv6": 0.008380104743151566 + }, + "name": "CLOUDFLARENET", + "peeringdb": { + "aka": "", + "asn": 13335, + "irr_as_set": "AS13335:AS-CLOUDFLARE", + "name": "Cloudflare", + "name_long": "", + "website": "https://www.cloudflare.com" + }, + "population": { + "percent_country": 0.02, + "percent_global": 0.0, + "sample_count": 127, + "user_count": 10 + } + }, + { + "as2org": { + "country": "US", + "name": "GOOGLE", + "org_id": "GOGL-ARIN", + "org_name": "Google LLC" + }, + "asn": 15169, + "country": "US", + "country_name": "United States", + "hegemony": { + "asn": 15169, + "ipv4": 0.0072255134909779304, + "ipv6": 0.002685539203529714 + }, + "name": "GOOGLE", + "peeringdb": { + "aka": "Google, YouTube (for Google Fiber see AS16591 record)", + "asn": 15169, + "irr_as_set": "RADB::AS-GOOGLE", + "name": "Google LLC", + "name_long": "", + "website": "https://about.google/intl/en/" + }, + "population": { + "percent_country": 0.01, + "percent_global": 0.0, + "sample_count": 740, + "user_count": 521 + } + } + ], + "page": 0, + "page_size": 2, + "updatedAt": "2025-09-26T21:25:10.902Z" +} +``` + +## CSV simplified schema + +When exporting CSV (or using --simplified), the schema is: + +``` +asn,as_name,org_id,org_name,country_code,country_name,data_source ``` -Notes +Notes: -- Upload is automatic when ASNINFO_UPLOAD_PATH is set (no --upload flag). -- .env files are supported (dotenv). +- country_name is looked up from country_code where available. +- data_source is reserved for future use. ## Environment variables @@ -57,4 +246,42 @@ Required for S3/R2 upload (when ASNINFO_UPLOAD_PATH is set): Optional: - ASNINFO_HEARTBEAT_URL — HTTP/HTTPS URL to request after a successful upload (used as a heartbeat) -- PEERINGDB_API_KEY — used by dependencies to access PeeringDB API (avoids rate limits) \ No newline at end of file +- ASNINFO_MAX_ASNS — maximum ASNs per lookup request for the HTTP API (default: 100) +- PEERINGDB_API_KEY — used by dependencies to access PeeringDB API (avoids rate limits) + +.env files are supported and loaded automatically when present. + +## Docker + +A minimal container image can be built using the provided Dockerfile: + +```bash +docker build -t asninfo . + +# run generator, mounting a host directory for output +docker run --rm -v "$PWD:/out" asninfo generate /out/asninfo.jsonl + +# run HTTP server on port 8080 +docker run --rm -p 8080:8080 asninfo serve --bind 0.0.0.0:8080 +``` + +### Using a local env file (.env) + +The image supports environment variables for uploads and server limits. You can pass your local .env to the container +using Docker's `--env-file`: + +```bash +# pass variables from ./.env to the container environment +# works for both generate and serve + +docker run --rm \ + --env-file ./.env \ + -v "$PWD:/out" \ + asninfo generate /out/asninfo.jsonl + +# server example with env-file +docker run --rm \ + --env-file ./.env \ + -p 8080:8080 \ + asninfo serve --bind 0.0.0.0:8080 +``` \ No newline at end of file From b710f2dcd2ac3a77fd2acea90bc22fa532bcb03b Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 1 Oct 2025 10:41:51 -0700 Subject: [PATCH 06/11] remove unused `LookupResponse` struct in `main.rs` --- src/main.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6f1640a..8bcb3dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,16 +71,6 @@ pub struct AsInfoSimplified { pub data_source: String, } -#[derive(Debug, Clone, Serialize, Deserialize)] -struct LookupResponse { - data: Vec, - count: usize, - #[serde(rename = "updatedAt")] - updated_at: String, - page: usize, - page_size: usize, -} - impl From<&AsInfo> for AsInfoSimplified { fn from(value: &AsInfo) -> Self { let (org_id, org_name) = match &value.as2org { From a8d7454a1d8bdbe37a03a1750439e16c1e512b00 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 1 Oct 2025 10:44:06 -0700 Subject: [PATCH 07/11] refactor `start_updater`: use constant and simplify locking - Introduced `MINIMUM_UPDATER_INTERVAL_SECS` constant for clarity. - Consolidated map and timestamp updates into a single critical section. --- src/api.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/api.rs b/src/api.rs index 2efab5c..bb770fe 100644 --- a/src/api.rs +++ b/src/api.rs @@ -119,6 +119,8 @@ pub fn load_asn_map_out(simplified: bool) -> Result<(HashMap, St Ok((out, updated_at)) } +const MINIMUM_UPDATER_INTERVAL_SECS: u64 = 3600; + pub fn start_updater( map: Arc>>, updated_at: Arc>, @@ -126,20 +128,18 @@ pub fn start_updater( simplified: bool, ) -> JoinHandle<()> { tokio::spawn(async move { - let interval = Duration::from_secs(refresh_secs.max(3600)); // minimum 1 hour + let interval = Duration::from_secs(refresh_secs.max(MINIMUM_UPDATER_INTERVAL_SECS)); // minimum 1 hour loop { sleep(interval).await; info!("background updater: refreshing ASN data ..."); match load_asn_map_out(simplified) { Ok((new_map, ts)) => { - { - let mut guard = map.lock().unwrap(); - *guard = new_map; - } - { - let mut ts_guard = updated_at.lock().unwrap(); - *ts_guard = ts; - } + // Update both map and updated_at within a single critical section + // to avoid exposing an inconsistent state between them. + let mut guard = map.lock().unwrap(); + let mut ts_guard = updated_at.lock().unwrap(); + *guard = new_map; + *ts_guard = ts; info!("background updater: ASN data updated"); } Err(e) => { From 7b535fbcf7dcfa802ab84418620be9c1265606b7 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 1 Oct 2025 10:51:50 -0700 Subject: [PATCH 08/11] improve error handling for mutex locks and file operations - Added error recovery for poisoned mutexes across API handlers. - Enhanced error handling for file writers and serialization failures. - Updated `/lookup` endpoints to return detailed error responses. --- src/api.rs | 151 +++++++++++++++++++++++++++++----------------------- src/main.rs | 34 ++++++++++-- 2 files changed, 115 insertions(+), 70 deletions(-) diff --git a/src/api.rs b/src/api.rs index bb770fe..2f02047 100644 --- a/src/api.rs +++ b/src/api.rs @@ -95,7 +95,13 @@ pub fn load_asn_map_out(simplified: bool) -> Result<(HashMap, St error!("failed to load countries: {e}"); return Err(2); }; - let as_info_map = commons.asinfo_all().expect("failed to get asinfo map"); + let as_info_map = match commons.asinfo_all() { + Ok(map) => map, + Err(e) => { + error!("failed to get asinfo map: {e}"); + return Err(3); + } + }; // build enriched map with country_name let mut out: HashMap = HashMap::with_capacity(as_info_map.len()); @@ -136,9 +142,21 @@ pub fn start_updater( Ok((new_map, ts)) => { // Update both map and updated_at within a single critical section // to avoid exposing an inconsistent state between them. - let mut guard = map.lock().unwrap(); - let mut ts_guard = updated_at.lock().unwrap(); - *guard = new_map; + let mut map_guard = match map.lock() { + Ok(guard) => guard, + Err(poisoned) => { + error!("background updater: map mutex is poisoned, recovering"); + poisoned.into_inner() + } + }; + let mut ts_guard = match updated_at.lock() { + Ok(guard) => guard, + Err(poisoned) => { + error!("background updater: updated_at mutex is poisoned, recovering"); + poisoned.into_inner() + } + }; + *map_guard = new_map; *ts_guard = ts; info!("background updater: ASN data updated"); } @@ -151,7 +169,11 @@ pub fn start_updater( } async fn health(State(state): State) -> Json { - let updated_at = state.updated_at.lock().unwrap().clone(); + let updated_at = state + .updated_at + .lock() + .unwrap_or_else(|e| e.into_inner()) + .clone(); Json(json!({ "status": "ok", "updatedAt": updated_at, @@ -193,7 +215,7 @@ fn convert_to_legacy(list: Vec) -> Vec { async fn get_lookup( State(state): State, Query(q): Query, -) -> Result, StatusCode> { +) -> Result, (StatusCode, Json)> { let asns: Vec = q .asns .clone() @@ -203,84 +225,79 @@ async fn get_lookup( .collect(); if asns.is_empty() { - error!("/lookup GET: empty or invalid 'asns' query param"); - return Err(StatusCode::BAD_REQUEST); + return Err(( + StatusCode::BAD_REQUEST, + Json(json!({"error": "no valid ASNs provided in 'asns' query parameter"})), + )); } + if asns.len() > state.max_asns { - error!( - "/lookup GET: too many ASNs: {} > {}", - asns.len(), - state.max_asns - ); - return Err(StatusCode::PAYLOAD_TOO_LARGE); + return Err(( + StatusCode::PAYLOAD_TOO_LARGE, + Json( + json!({"error": format!("payload too large, max ASNs per request is {}", state.max_asns)}), + ), + )); } - let legacy = q.legacy.unwrap_or(false); - let data_full = lookup(&state, &asns); - let updated_at = state.updated_at.lock().unwrap().clone(); + let map_guard = state.map.lock().map_err(|_| { + error!("get_lookup: map mutex is poisoned"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"error": "internal server error"})), + ) + })?; - let data_values: Vec = if legacy { - convert_to_legacy(data_full) + let mut found = Vec::with_capacity(asns.len()); + for asn in asns { + if let Some(info) = map_guard.get(&asn) { + found.push(info.clone()); + } + } + + let use_legacy = q.legacy.unwrap_or(false); + let results = if use_legacy { + json!(convert_to_legacy(found)) } else { - data_full.into_iter().map(|o| json!(o)).collect() + json!(found) }; - let resp = json!({ - "data": data_values, - "count": data_values.len(), - "updatedAt": updated_at, - "page": 0, - "page_size": asns.len(), - }); - Ok(Json(resp)) + Ok(Json(results)) } async fn post_lookup( State(state): State, - Query(q): Query, Json(body): Json, -) -> Result, StatusCode> { - let legacy = q.legacy.unwrap_or(false); - let asns = body.asns; - if asns.is_empty() { - error!("/lookup POST: empty asns body"); - return Err(StatusCode::BAD_REQUEST); +) -> Result, (StatusCode, Json)> { + if body.asns.is_empty() { + return Err(( + StatusCode::BAD_REQUEST, + Json(json!({"error": "no ASNs provided in request body"})), + )); } - if asns.len() > state.max_asns { - error!( - "/lookup POST: too many ASNs: {} > {}", - asns.len(), - state.max_asns - ); - return Err(StatusCode::PAYLOAD_TOO_LARGE); + if body.asns.len() > state.max_asns { + return Err(( + StatusCode::PAYLOAD_TOO_LARGE, + Json( + json!({"error": format!("payload too large, max ASNs per request is {}", state.max_asns)}), + ), + )); } - let data_full = lookup(&state, &asns); - let updated_at = state.updated_at.lock().unwrap().clone(); - - let data_values: Vec = if legacy { - convert_to_legacy(data_full) - } else { - data_full.into_iter().map(|o| json!(o)).collect() - }; - - let resp = json!({ - "data": data_values, - "count": data_values.len(), - "updatedAt": updated_at, - "page": 0, - "page_size": asns.len(), - }); - Ok(Json(resp)) -} + let map_guard = state.map.lock().map_err(|_| { + error!("post_lookup: map mutex is poisoned"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"error": "internal server error"})), + ) + })?; -fn lookup(state: &AppState, asns: &[u32]) -> Vec { - let map = state.map.lock().unwrap(); - let mut res = Vec::with_capacity(asns.len()); - for asn in asns { - if let Some(info) = map.get(asn) { - res.push(info.clone()); + let mut found = Vec::with_capacity(body.asns.len()); + for asn in body.asns { + if let Some(info) = map_guard.get(&asn) { + found.push(info.clone()); } } - res + + Ok(Json(json!(found))) } diff --git a/src/main.rs b/src/main.rs index 8bcb3dc..ea882cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -172,7 +172,13 @@ fn generate_cmd(path: &str, simplified_flag: bool) -> Result<(), i32> { info!("export format: {}", &format); info!("writing asn info data to '{}' ...", &path); - let mut writer = oneio::get_writer(&path).unwrap(); + let mut writer = match oneio::get_writer(&path) { + Ok(w) => w, + Err(e) => { + error!("failed to open writer for path '{}': {}", path, e); + return Err(1); + } + }; let mut info_vec = as_info_map.values().collect::>(); info_vec.sort_by(|a, b| a.asn.cmp(&b.asn)); @@ -193,10 +199,32 @@ fn generate_cmd(path: &str, simplified_flag: bool) -> Result<(), i32> { }; if matches!(format, ExportFormat::JSONL) { for as_info in values_vec { - writeln!(writer, "{}", serde_json::to_string(&as_info).unwrap()).unwrap(); + match serde_json::to_string(&as_info) { + Ok(s) => { + if writeln!(writer, "{}", s).is_err() { + error!("failed to write to file"); + return Err(1); + } + } + Err(e) => { + error!("failed to serialize AS info: {}", e); + return Err(1); + } + } } } else { - writeln!(writer, "{}", serde_json::to_string(&values_vec).unwrap()).unwrap(); + match serde_json::to_string(&values_vec) { + Ok(s) => { + if writeln!(writer, "{}", s).is_err() { + error!("failed to write to file"); + return Err(1); + } + } + Err(e) => { + error!("failed to serialize AS info vector: {}", e); + return Err(1); + } + } } } ExportFormat::CSV => { From f2ae44daeb2189b365a1d691832683b7eccd22b5 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 1 Oct 2025 10:54:07 -0700 Subject: [PATCH 09/11] simplify mutex lock handling in background updater - Replaced `match` blocks with `unwrap_or_else` for brevity. - Preserved error recovery for poisoned mutexes. --- src/api.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/api.rs b/src/api.rs index 2f02047..53d85d4 100644 --- a/src/api.rs +++ b/src/api.rs @@ -142,20 +142,14 @@ pub fn start_updater( Ok((new_map, ts)) => { // Update both map and updated_at within a single critical section // to avoid exposing an inconsistent state between them. - let mut map_guard = match map.lock() { - Ok(guard) => guard, - Err(poisoned) => { - error!("background updater: map mutex is poisoned, recovering"); - poisoned.into_inner() - } - }; - let mut ts_guard = match updated_at.lock() { - Ok(guard) => guard, - Err(poisoned) => { - error!("background updater: updated_at mutex is poisoned, recovering"); - poisoned.into_inner() - } - }; + let mut map_guard = map.lock().unwrap_or_else(|poisoned| { + error!("background updater: map mutex is poisoned, recovering"); + poisoned.into_inner() + }); + let mut ts_guard = updated_at.lock().unwrap_or_else(|poisoned| { + error!("background updater: updated_at mutex is poisoned, recovering"); + poisoned.into_inner() + }); *map_guard = new_map; *ts_guard = ts; info!("background updater: ASN data updated"); From ace8940cd5b2b3ac86335e411e72ad55d24bc7c6 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 1 Oct 2025 11:47:45 -0700 Subject: [PATCH 10/11] simplify JSON examples in README - Replaced verbose object structures with streamlined JSON arrays. - Improved clarity of example responses for both default and legacy formats. --- README.md | 144 +++++++++++++++++++++++++----------------------------- 1 file changed, 66 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index deaf209..e1cf72c 100644 --- a/README.md +++ b/README.md @@ -117,20 +117,14 @@ asninfo serve --bind 0.0.0.0:8080 --refresh-secs 21600 Default structured response: ```json -{ - "data": [ - { - "asn": 13335, - "name": "CLOUDFLARENET", - "country": "US", - "country_name": "United States" - } - ], - "count": 1, - "updatedAt": "2025-01-01T00:00:00.000Z", - "page": 1, - "page_size": 100 -} +[ + { + "asn": 13335, + "name": "CLOUDFLARENET", + "country": "US", + "country_name": "United States" + } +] ``` Legacy response (when legacy=true) returns an array of objects compatible with the previous consumer format. @@ -148,76 +142,70 @@ curl -X POST 'http://localhost:8080/lookup' \ ``` ```json -{ - "count": 2, - "data": [ - { - "as2org": { - "country": "US", - "name": "CLOUDFLARENET", - "org_id": "CLOUD14-ARIN", - "org_name": "Cloudflare, Inc." - }, - "asn": 13335, +[ + { + "as2org": { "country": "US", - "country_name": "United States", - "hegemony": { - "asn": 13335, - "ipv4": 0.0017993252336435785, - "ipv6": 0.008380104743151566 - }, "name": "CLOUDFLARENET", - "peeringdb": { - "aka": "", - "asn": 13335, - "irr_as_set": "AS13335:AS-CLOUDFLARE", - "name": "Cloudflare", - "name_long": "", - "website": "https://www.cloudflare.com" - }, - "population": { - "percent_country": 0.02, - "percent_global": 0.0, - "sample_count": 127, - "user_count": 10 - } + "org_id": "CLOUD14-ARIN", + "org_name": "Cloudflare, Inc." }, - { - "as2org": { - "country": "US", - "name": "GOOGLE", - "org_id": "GOGL-ARIN", - "org_name": "Google LLC" - }, - "asn": 15169, + "asn": 13335, + "country": "US", + "country_name": "United States", + "hegemony": { + "asn": 13335, + "ipv4": 0.0017993252336435785, + "ipv6": 0.008380104743151566 + }, + "name": "CLOUDFLARENET", + "peeringdb": { + "aka": "", + "asn": 13335, + "irr_as_set": "AS13335:AS-CLOUDFLARE", + "name": "Cloudflare", + "name_long": "", + "website": "https://www.cloudflare.com" + }, + "population": { + "percent_country": 0.02, + "percent_global": 0.0, + "sample_count": 127, + "user_count": 10 + } + }, + { + "as2org": { "country": "US", - "country_name": "United States", - "hegemony": { - "asn": 15169, - "ipv4": 0.0072255134909779304, - "ipv6": 0.002685539203529714 - }, "name": "GOOGLE", - "peeringdb": { - "aka": "Google, YouTube (for Google Fiber see AS16591 record)", - "asn": 15169, - "irr_as_set": "RADB::AS-GOOGLE", - "name": "Google LLC", - "name_long": "", - "website": "https://about.google/intl/en/" - }, - "population": { - "percent_country": 0.01, - "percent_global": 0.0, - "sample_count": 740, - "user_count": 521 - } + "org_id": "GOGL-ARIN", + "org_name": "Google LLC" + }, + "asn": 15169, + "country": "US", + "country_name": "United States", + "hegemony": { + "asn": 15169, + "ipv4": 0.0072255134909779304, + "ipv6": 0.002685539203529714 + }, + "name": "GOOGLE", + "peeringdb": { + "aka": "Google, YouTube (for Google Fiber see AS16591 record)", + "asn": 15169, + "irr_as_set": "RADB::AS-GOOGLE", + "name": "Google LLC", + "name_long": "", + "website": "https://about.google/intl/en/" + }, + "population": { + "percent_country": 0.01, + "percent_global": 0.0, + "sample_count": 740, + "user_count": 521 } - ], - "page": 0, - "page_size": 2, - "updatedAt": "2025-09-26T21:25:10.902Z" -} + } +] ``` ## CSV simplified schema From df863c0598ddfa0d17ea5242c6537c3c71565da3 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 1 Oct 2025 11:54:01 -0700 Subject: [PATCH 11/11] clarify `/lookup` endpoint usage and response structure in README - Updated `/lookup` endpoint details with accurate query parameter usage. - Expanded response schema to include new fields: `as2org`, `hegemony`, and `peeringdb`. - Documented omission of heavy datasets when `--simplified` flag is enabled. --- README.md | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e1cf72c..50e0208 100644 --- a/README.md +++ b/README.md @@ -108,13 +108,13 @@ asninfo serve --bind 0.0.0.0:8080 --refresh-secs 21600 - Query parameter asns is a comma-separated list of ASNs. - Optional legacy=true to return a legacy array of objects instead of the structured response. -- POST /lookup?legacy=true +- POST /lookup - JSON body: { "asns": [number, ...] } - - Optional legacy=true query parameter as above. + - Note: legacy=true is only supported on GET /lookup. ### Responses -Default structured response: +Default response (full schema plus country_name): ```json [ @@ -122,11 +122,39 @@ Default structured response: "asn": 13335, "name": "CLOUDFLARENET", "country": "US", - "country_name": "United States" + "country_name": "United States", + "as2org": { + "org_id": "CLOUD14-ARIN", + "org_name": "Cloudflare, Inc.", + "name": "CLOUDFLARENET", + "country": "US" + }, + "hegemony": { + "asn": 13335, + "ipv4": 0.0018, + "ipv6": 0.0084 + }, + "peeringdb": { + "asn": 13335, + "name": "Cloudflare", + "aka": "", + "name_long": "", + "website": "https://www.cloudflare.com", + "irr_as_set": "AS13335:AS-CLOUDFLARE" + }, + "population": { + "user_count": 10, + "sample_count": 127, + "percent_global": 0.0, + "percent_country": 0.02 + } } ] ``` +Note: When the server runs with --simplified, heavy datasets (population, hegemony, PeeringDB) are omitted and will be +null in responses. + Legacy response (when legacy=true) returns an array of objects compatible with the previous consumer format. ### Example requests