From 61f9d31d57cf4f4aefe68f73a3e766e4727ebb7e Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Mon, 9 Mar 2026 21:25:54 +0100 Subject: [PATCH 01/28] feat(support): add support service with WebSocket tunnels and session management Support service (port 8082) for remote support sessions via WebSocket tunnels. Includes tunnel-client for NS8 and NethSecurity, yamux multiplexer, web terminal, HTTP proxy, session lifecycle management, rate limiting, and graceful reconnection. --- services/support/.env.example | 29 + services/support/.render-build-trigger | 1 + services/support/Containerfile | 58 + services/support/Containerfile.tunnel-client | 32 + services/support/Makefile | 236 ++++ services/support/README.md | 169 +++ services/support/cmd/tunnel-client/main.go | 1122 +++++++++++++++++ .../support/configuration/configuration.go | 121 ++ services/support/database/database.go | 61 + services/support/go.mod | 51 + services/support/go.sum | 128 ++ services/support/helpers/sha256.go | 44 + services/support/logger/logger.go | 150 +++ services/support/main.go | 189 +++ services/support/methods/commands.go | 81 ++ services/support/methods/proxy.go | 309 +++++ services/support/methods/terminal.go | 227 ++++ services/support/methods/tunnel.go | 198 +++ services/support/middleware/auth.go | 470 +++++++ services/support/middleware/ratelimit.go | 122 ++ services/support/models/session.go | 28 + services/support/pkg/version/VERSION | 1 + services/support/pkg/version/version.go | 53 + services/support/queue/redis.go | 72 ++ services/support/response/response.go | 64 + services/support/response/response_test.go | 53 + services/support/session/cleaner.go | 84 ++ services/support/session/manager.go | 313 +++++ services/support/testutils/testutils.go | 41 + services/support/tunnel/manager.go | 506 ++++++++ services/support/tunnel/manager_test.go | 252 ++++ services/support/tunnel/protocol.go | 100 ++ services/support/tunnel/protocol_test.go | 113 ++ services/support/tunnel/stream.go | 95 ++ 34 files changed, 5573 insertions(+) create mode 100644 services/support/.env.example create mode 100644 services/support/.render-build-trigger create mode 100644 services/support/Containerfile create mode 100644 services/support/Containerfile.tunnel-client create mode 100644 services/support/Makefile create mode 100644 services/support/README.md create mode 100644 services/support/cmd/tunnel-client/main.go create mode 100644 services/support/configuration/configuration.go create mode 100644 services/support/database/database.go create mode 100644 services/support/go.mod create mode 100644 services/support/go.sum create mode 100644 services/support/helpers/sha256.go create mode 100644 services/support/logger/logger.go create mode 100644 services/support/main.go create mode 100644 services/support/methods/commands.go create mode 100644 services/support/methods/proxy.go create mode 100644 services/support/methods/terminal.go create mode 100644 services/support/methods/tunnel.go create mode 100644 services/support/middleware/auth.go create mode 100644 services/support/middleware/ratelimit.go create mode 100644 services/support/models/session.go create mode 100644 services/support/pkg/version/VERSION create mode 100644 services/support/pkg/version/version.go create mode 100644 services/support/queue/redis.go create mode 100644 services/support/response/response.go create mode 100644 services/support/response/response_test.go create mode 100644 services/support/session/cleaner.go create mode 100644 services/support/session/manager.go create mode 100644 services/support/testutils/testutils.go create mode 100644 services/support/tunnel/manager.go create mode 100644 services/support/tunnel/manager_test.go create mode 100644 services/support/tunnel/protocol.go create mode 100644 services/support/tunnel/protocol_test.go create mode 100644 services/support/tunnel/stream.go diff --git a/services/support/.env.example b/services/support/.env.example new file mode 100644 index 00000000..1f8d8b41 --- /dev/null +++ b/services/support/.env.example @@ -0,0 +1,29 @@ +# Database +DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable + +# Redis +REDIS_URL=redis://localhost:6379 +REDIS_DB=2 + +# Server +LISTEN_ADDRESS=127.0.0.1:8082 + +# Logging +LOG_LEVEL=debug +LOG_FORMAT=console + +# Authentication +SYSTEM_AUTH_CACHE_TTL=24h +SYSTEM_SECRET_MIN_LENGTH=32 + +# Session defaults +SESSION_DEFAULT_DURATION=24h +SESSION_CLEANER_INTERVAL=5m + +# Tunnel +TUNNEL_GRACE_PERIOD=2m +MAX_TUNNELS=1000 +MAX_SESSIONS_PER_SYSTEM=5 + +# Internal authentication (shared secret with backend) +INTERNAL_SECRET=change-me-to-a-random-secret-min-32-chars diff --git a/services/support/.render-build-trigger b/services/support/.render-build-trigger new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/services/support/.render-build-trigger @@ -0,0 +1 @@ +1 diff --git a/services/support/Containerfile b/services/support/Containerfile new file mode 100644 index 00000000..4264491e --- /dev/null +++ b/services/support/Containerfile @@ -0,0 +1,58 @@ +# Build stage +FROM golang:1.24-alpine AS builder + +# Install git and ca-certificates (if needed for private repositories) +RUN apk add --no-cache git ca-certificates + +# Set working directory +WORKDIR /app + +# Copy build trigger file to force rebuilds when it changes +COPY .render-build-trigger /tmp/build-trigger + +# Copy go mod files first for better caching +COPY go.mod go.sum ./ + +# Download dependencies +RUN go mod download + +# Copy source code +COPY . . + +# Build the application with version information +ARG VERSION=dev +ARG COMMIT=unknown +ARG BUILD_TIME=unknown + +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo \ + -ldflags "-X github.com/nethesis/my/services/support/pkg/version.Version=${VERSION} \ + -X github.com/nethesis/my/services/support/pkg/version.Commit=${COMMIT} \ + -X github.com/nethesis/my/services/support/pkg/version.BuildTime=${BUILD_TIME}" \ + -o support main.go + +# Final stage +FROM alpine:3.21 + +# Install ca-certificates for HTTPS requests +RUN apk --no-cache add ca-certificates + +# Create non-root user +RUN addgroup -g 1001 -S appgroup && \ + adduser -u 1001 -S appuser -G appgroup + +WORKDIR /app + +# Copy the binary from builder stage +COPY --from=builder /app/support . + +# Change ownership of the application directory +RUN chown -R appuser:appgroup /app + +# Switch to non-root user +USER appuser + +# Expose port 8080 +EXPOSE 8082 + +# Command to run the application +CMD ["./support"] diff --git a/services/support/Containerfile.tunnel-client b/services/support/Containerfile.tunnel-client new file mode 100644 index 00000000..65cf6fc1 --- /dev/null +++ b/services/support/Containerfile.tunnel-client @@ -0,0 +1,32 @@ +# Build stage +FROM golang:1.24-alpine AS builder + +RUN apk add --no-cache git ca-certificates + +WORKDIR /app + +# Copy go mod files first for better caching +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code +COPY . . + +# Build the tunnel-client binary +ARG VERSION=dev +ARG COMMIT=unknown +ARG BUILD_TIME=unknown + +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo \ + -ldflags "-s -w -X github.com/nethesis/my/services/support/pkg/version.Version=${VERSION} \ + -X github.com/nethesis/my/services/support/pkg/version.Commit=${COMMIT} \ + -X github.com/nethesis/my/services/support/pkg/version.BuildTime=${BUILD_TIME}" \ + -o tunnel-client ./cmd/tunnel-client/main.go + +# Final stage - minimal image with just the binary +FROM scratch + +COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ +COPY --from=builder /app/tunnel-client /tunnel-client + +ENTRYPOINT ["/tunnel-client"] diff --git a/services/support/Makefile b/services/support/Makefile new file mode 100644 index 00000000..633c80bb --- /dev/null +++ b/services/support/Makefile @@ -0,0 +1,236 @@ +# Variables +BINARY_NAME=support +BUILD_DIR=build +VERSION=$(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") +COMMIT=$(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") +BUILD_TIME=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") + +# Container runtime detection (Docker with Podman fallback) +DOCKER_CMD=$(shell which docker 2>/dev/null || echo "") +PODMAN_CMD=$(shell which podman 2>/dev/null || echo "") + +# Choose container runtime +ifeq ($(DOCKER_CMD),) + ifeq ($(PODMAN_CMD),) + CONTAINER_CMD=echo "Error: Neither Docker nor Podman is installed" && exit 1 + CONTAINER_NAME=none + else + CONTAINER_CMD=$(PODMAN_CMD) + CONTAINER_NAME=podman + endif +else + CONTAINER_CMD=$(DOCKER_CMD) + CONTAINER_NAME=docker +endif + +# Go build flags +LDFLAGS=-ldflags "-X github.com/nethesis/my/services/support/pkg/version.Version=$(VERSION) \ + -X github.com/nethesis/my/services/support/pkg/version.Commit=$(COMMIT) \ + -X github.com/nethesis/my/services/support/pkg/version.BuildTime=$(BUILD_TIME)" + +# Default target +.PHONY: all +all: clean test build + +# Run the application +.PHONY: run +run: + @echo "Starting support development server..." + @go run main.go + +# Run the application with QA environment +.PHONY: run-qa +run-qa: + @echo "Starting support QA server..." + @ENV_FILE=.env.qa go run main.go + +# Build the binary +.PHONY: build +build: + @echo "Building $(BINARY_NAME)..." + @mkdir -p $(BUILD_DIR) + @CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME) main.go + @echo "Built $(BINARY_NAME) -> $(BUILD_DIR)/$(BINARY_NAME)" + +# Build for multiple platforms +.PHONY: build-all +build-all: clean + @echo "Building for multiple platforms..." + @mkdir -p $(BUILD_DIR) + @GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME)-linux-amd64 main.go + @echo "Built binaries for multiple platforms in $(BUILD_DIR)/" + +# Run tests +.PHONY: test +test: + @echo "Running tests..." + @go test -v ./... + +# Run tests with coverage +.PHONY: test-coverage +test-coverage: + @echo "Running tests with coverage..." + @go test -v -coverprofile=coverage.out -coverpkg=./... ./... + @go tool cover -html=coverage.out -o coverage.html + @echo "Coverage report generated: coverage.html" + +# Run linting +.PHONY: lint +lint: + @echo "Running linter..." + @golangci-lint run + +# Format code +.PHONY: fmt +fmt: + @echo "Formatting code..." + @gofmt -s -w . + +# Tidy dependencies +.PHONY: tidy +tidy: + @echo "Tidying dependencies..." + @go mod tidy + +# Clean build artifacts +.PHONY: clean +clean: + @echo "Cleaning build artifacts..." + @rm -rf $(BUILD_DIR) + @rm -f coverage.out coverage.html + +# Install the binary +.PHONY: install +install: build + @echo "Installing $(BINARY_NAME)..." + @go install $(LDFLAGS) . + +# Database management (uses shared database) +.PHONY: db-up +db-up: + @echo "Starting shared PostgreSQL container with $(CONTAINER_NAME)..." + @if $(CONTAINER_CMD) ps --format "{{.Names}}" | grep -q "^my-postgres$$"; then \ + echo "PostgreSQL container already running"; \ + echo "DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable"; \ + else \ + $(CONTAINER_CMD) run -d --name my-postgres \ + -e POSTGRES_DB=noc \ + -e POSTGRES_USER=noc_user \ + -e POSTGRES_PASSWORD=noc_password \ + -p 5432:5432 \ + postgres:15-alpine; \ + echo "DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable"; \ + fi + +.PHONY: db-down +db-down: + @echo "Stopping shared PostgreSQL container with $(CONTAINER_NAME)..." + @$(CONTAINER_CMD) stop my-postgres || true + @$(CONTAINER_CMD) rm my-postgres || true + +.PHONY: db-reset +db-reset: db-down db-up + @echo "Waiting for database to be ready..." + @sleep 5 + +# Redis development commands +.PHONY: redis-up +redis-up: + @echo "Starting shared Redis container with $(CONTAINER_NAME)..." + @if $(CONTAINER_CMD) ps --format "{{.Names}}" | grep -q "^my-redis$$"; then \ + echo "Redis container already running"; \ + echo "REDIS_URL=redis://localhost:6379"; \ + else \ + $(CONTAINER_CMD) run -d --name my-redis -p 6379:6379 redis:7-alpine redis-server --save 60 1 --loglevel warning; \ + echo "REDIS_URL=redis://localhost:6379"; \ + fi + +.PHONY: redis-down +redis-down: + @echo "Stopping shared Redis container with $(CONTAINER_NAME)..." + @$(CONTAINER_CMD) stop my-redis || true + @$(CONTAINER_CMD) rm my-redis || true + +.PHONY: redis-flush +redis-flush: + @echo "Flushing Redis cache..." + @$(CONTAINER_CMD) exec my-redis redis-cli FLUSHALL + +.PHONY: redis-cli +redis-cli: + @echo "Connecting to Redis CLI..." + @$(CONTAINER_CMD) exec -it my-redis redis-cli + +# Development environment +.PHONY: dev-up +dev-up: + @echo "Starting full development environment..." + @echo "" + @echo "=== PostgreSQL ===" + @$(MAKE) db-up + @echo "" + @echo "=== Redis ===" + @$(MAKE) redis-up + @echo "" + @echo "Full development environment started" + +.PHONY: dev-down +dev-down: db-down redis-down + @echo "Full development environment stopped" + +.PHONY: dev-setup +dev-setup: + @echo "Setting up development environment..." + @go mod download + @if [ ! -f .env ]; then \ + echo "Creating .env from .env.example..."; \ + cp .env.example .env; \ + else \ + echo ".env already exists, skipping copy"; \ + fi + @echo "Development environment ready!" + +# Pre-commit checks +.PHONY: pre-commit +pre-commit: fmt lint test + @echo "All pre-commit checks passed!" + +# Show help +.PHONY: help +help: + @echo "Support Service - Container runtime: $(CONTAINER_NAME)" + @echo "" + @echo "Available targets:" + @echo " build - Build the binary" + @echo " build-all - Build for multiple platforms" + @echo " clean - Clean build artifacts" + @echo " fmt - Format code" + @echo " help - Show this help" + @echo " install - Install the binary" + @echo " lint - Run linter" + @echo " pre-commit - Run all pre-commit checks" + @echo " run - Start development server" + @echo " run-qa - Start QA server (uses .env.qa)" + @echo " test - Run tests" + @echo " test-coverage - Run tests with coverage" + @echo " tidy - Tidy dependencies" + @echo "" + @echo "Database commands:" + @echo " db-up - Start PostgreSQL container" + @echo " db-down - Stop PostgreSQL container" + @echo " db-reset - Reset PostgreSQL container" + @echo "" + @echo "Redis commands:" + @echo " redis-up - Start Redis container" + @echo " redis-down - Stop Redis container" + @echo " redis-flush - Flush Redis cache" + @echo " redis-cli - Connect to Redis CLI" + @echo "" + @echo "Development environment:" + @echo " dev-setup - Setup development environment" + @echo " dev-up - Start full development environment (PostgreSQL + Redis)" + @echo " dev-down - Stop full development environment" + @echo "" + +# Default goal +.DEFAULT_GOAL := help diff --git a/services/support/README.md b/services/support/README.md new file mode 100644 index 00000000..2c3e3edd --- /dev/null +++ b/services/support/README.md @@ -0,0 +1,169 @@ +# Support - Remote Support Session Service + +WebSocket tunnel-based remote support service that enables operators to access remote systems through multiplexed yamux sessions. + +## Quick Start + +### Prerequisites +- Go 1.24+ +- PostgreSQL 15+ +- Redis 7+ +- Docker/Podman + +### Setup + +> **Note:** Support shares the same PostgreSQL and Redis containers with the backend. +> If you already started them with `cd backend && make dev-up`, you can skip `make dev-up` here. + +```bash +# Setup development environment +make dev-setup + +# Start PostgreSQL and Redis containers (skip if already running from backend) +make dev-up + +# Start the application (port 8082) +make run + +# Stop PostgreSQL and Redis when done +make dev-down +``` + +### Required Environment Variables +```bash +# Database +DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable + +# Redis +REDIS_URL=redis://localhost:6379 +REDIS_DB=2 + +# Internal authentication (shared secret with backend) +INTERNAL_SECRET=change-me-to-a-random-secret-min-32-chars +``` + +### Optional Environment Variables +```bash +LISTEN_ADDRESS=127.0.0.1:8082 +LOG_LEVEL=info +LOG_FORMAT=console +SYSTEM_AUTH_CACHE_TTL=24h +SYSTEM_SECRET_MIN_LENGTH=32 +SESSION_DEFAULT_DURATION=24h +SESSION_CLEANER_INTERVAL=5m +TUNNEL_GRACE_PERIOD=2m +MAX_TUNNELS=1000 +MAX_SESSIONS_PER_SYSTEM=5 +``` + +## Architecture + +### Tunnel Flow + +1. **System connects** via WebSocket with HTTP Basic Auth (same credentials as collect) +2. **yamux session** multiplexes streams over a single WebSocket connection +3. **Service manifest** is exchanged — the system advertises available services (e.g., cluster-admin, SSH) +4. **Operator requests** arrive as yamux streams with CONNECT headers routing to the target service +5. **Reverse proxy** forwards HTTP/WebSocket traffic through the tunnel to remote services + +### Session Lifecycle +- `pending` — Session created by backend, waiting for system to connect +- `active` — System connected, tunnel established +- `expired` — Session past `expires_at`, cleaned up by background cleaner +- `closed` — Session closed by operator or system disconnect + +### Inter-Service Communication +- **Backend → Support**: Redis pub/sub on channel `support:commands` (close sessions) +- **Backend → Support**: Internal HTTP endpoints with `X-Internal-Secret` header (proxy, terminal, services) +- **System → Support**: WebSocket with HTTP Basic Auth (tunnel establishment) + +## Development + +### Basic Commands +```bash +# Run tests +make test + +# Format code +make fmt + +# Run linter +make lint + +# Build +make build + +# Run server +make run + +# Run QA server (uses .env.qa) +make run-qa + +# Test coverage +make test-coverage +``` + +### PostgreSQL Commands +```bash +# Start PostgreSQL container +make db-up + +# Stop PostgreSQL container +make db-down + +# Reset database +make db-reset +``` + +### Redis Commands +```bash +# Start Redis container +make redis-up + +# Stop Redis container +make redis-down + +# Flush Redis cache +make redis-flush + +# Connect to Redis CLI +make redis-cli +``` + +## Project Structure + +``` +services/support/ +├── main.go # Server entry point +├── cmd/ +│ └── tunnel-client/ # Client binary deployed on remote systems +├── configuration/ # Environment configuration +├── database/ # PostgreSQL connection +├── helpers/ # SHA256 verification +├── logger/ # Structured logging (zerolog) +├── methods/ # HTTP/WebSocket handlers +│ ├── tunnel.go # WebSocket tunnel endpoint +│ ├── proxy.go # HTTP reverse proxy through tunnel +│ ├── terminal.go # Web terminal (WebSocket-to-SSH) +│ └── commands.go # Redis pub/sub command listener +├── middleware/ # Auth and rate limiting +│ ├── auth.go # HTTP Basic Auth (SHA256) + caching +│ └── ratelimit.go # Tunnel connection rate limiting +├── models/ # Data structures +├── queue/ # Redis client +├── response/ # HTTP response helpers +├── session/ # Session CRUD and background cleaner +├── tunnel/ # yamux tunnel manager and protocol +│ ├── manager.go # In-memory tunnel registry +│ ├── protocol.go # CONNECT header protocol +│ └── stream.go # WebSocket-to-net.Conn adapter +├── pkg/version/ # Build version info +└── .env.example # Environment variables template +``` + +## Related +- [openapi.yaml](../../backend/openapi.yaml) - API specification +- [Backend](../../backend/README.md) - API server +- [Collect](../../collect/README.md) - Inventory collection service +- [Proxy](../../proxy/README.md) - Nginx reverse proxy +- [Project Overview](../../README.md) - Main documentation diff --git a/services/support/cmd/tunnel-client/main.go b/services/support/cmd/tunnel-client/main.go new file mode 100644 index 00000000..99793bc2 --- /dev/null +++ b/services/support/cmd/tunnel-client/main.go @@ -0,0 +1,1122 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +// tunnel-client connects to the support service WebSocket tunnel, advertises +// available services via a manifest, and handles incoming CONNECT requests +// by proxying traffic to local targets. +// +// Usage: +// +// tunnel-client --url ws://support:8082/api/tunnel --key NETH-XXXX --secret my_xxx.yyy \ +// [--static-services cluster-admin=localhost:9090] \ +// [--redis-addr localhost:6379] +package main + +import ( + "bufio" + "context" + "crypto/tls" + "encoding/base64" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "log" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "os/signal" + "path/filepath" + "regexp" + "sort" + "strings" + "syscall" + "time" + + flag "github.com/spf13/pflag" + + "github.com/creack/pty" + "github.com/gorilla/websocket" + "github.com/hashicorp/yamux" + "github.com/redis/go-redis/v9" + "gopkg.in/yaml.v3" +) + +// ServiceInfo matches the support service's tunnel.ServiceInfo +type ServiceInfo struct { + Target string `json:"target"` + Host string `json:"host"` + TLS bool `json:"tls"` + Label string `json:"label"` + Path string `json:"path,omitempty"` + PathPrefix string `json:"path_prefix,omitempty"` + ModuleID string `json:"module_id,omitempty"` + NodeID string `json:"node_id,omitempty"` +} + +// ServiceManifest is the JSON manifest sent to the support service +type ServiceManifest struct { + Version int `json:"version"` + Services map[string]ServiceInfo `json:"services"` +} + +// tunnelClientConfig is the YAML configuration file for the tunnel client +type tunnelClientConfig struct { + Exclude []string `yaml:"exclude"` +} + +// Defaults — all overridable via CLI flags or environment variables +const ( + defaultRedisAddr = "127.0.0.1:6379" + defaultReconnectDelay = 5 * time.Second + defaultMaxReconnect = 5 * time.Minute + defaultDiscoveryInterval = 5 * time.Minute + defaultShell = "/bin/bash" + defaultTermEnv = "TERM=xterm-256color" + defaultYamuxKeepAlive = 30 // seconds + defaultRemoteHTTPSPort = "443" + defaultNethSecUIPort = "443" + maxFrameSize = 1024 * 1024 // 1 MB + maxLineLength = 1024 + redisPingTimeout = 2 * time.Second + + // NethSecurity detection paths + nethSecUIPath = "/www-ns/index.html" + nethSecNginxConf = "/etc/nginx/conf.d/ns-ui.conf" + ns8NodeEnvFile = "/var/lib/nethserver/node/state/environment" +) + +// defaultExclude filters out backend API routes that are not useful for +// support operators. Only UI-facing services (cluster-admin, *-ui, *-wizard, +// *-reports-ui, *-amld, *_grafana, n8n*, nethsecurity-controller*) are kept. +var defaultExclude = []string{ + "*-cti-server-api", + "*-janus", + "*-middleware-*", + "*-provisioning", + "*-reports-api", + "*-server-api", + "*-server-websocket", + "*-tancredi", + "*_loki", + "*_prometheus", +} + +func main() { + var ( + urlFlag = flag.StringP("url", "u", envWithDefault("SUPPORT_URL", ""), "WebSocket tunnel URL (env: SUPPORT_URL)") + keyFlag = flag.StringP("key", "k", envWithDefault("SYSTEM_KEY", ""), "System key (env: SYSTEM_KEY)") + secretFlag = flag.StringP("secret", "s", envWithDefault("SYSTEM_SECRET", ""), "System secret (env: SYSTEM_SECRET)") + nodeIDFlag = flag.StringP("node-id", "n", envWithDefault("NODE_ID", ""), "Cluster node ID, auto-detected on NS8 (env: NODE_ID)") + redisAddr = flag.StringP("redis-addr", "r", envWithDefault("REDIS_ADDR", ""), "Redis address, auto-detected on NS8 (env: REDIS_ADDR)") + staticServices = flag.String("static-services", envWithDefault("STATIC_SERVICES", ""), "Static services name=host:port[:tls],… (env: STATIC_SERVICES)") + configFile = flag.StringP("config", "c", envWithDefault("TUNNEL_CONFIG", ""), "YAML config file for exclusions (env: TUNNEL_CONFIG)") + reconnectDelay = flag.Duration("reconnect-delay", parseDurationDefault(envWithDefault("RECONNECT_DELAY", ""), defaultReconnectDelay), "Base reconnect delay (env: RECONNECT_DELAY)") + maxReconnectDelay = flag.Duration("max-reconnect-delay", parseDurationDefault(envWithDefault("MAX_RECONNECT_DELAY", ""), defaultMaxReconnect), "Max reconnect delay (env: MAX_RECONNECT_DELAY)") + discoveryInterval = flag.Duration("discovery-interval", parseDurationDefault(envWithDefault("DISCOVERY_INTERVAL", ""), defaultDiscoveryInterval), "Service re-discovery interval (env: DISCOVERY_INTERVAL)") + tlsInsecure = flag.Bool("tls-insecure", envWithDefault("TLS_INSECURE", "") == "true", "Skip TLS verification (env: TLS_INSECURE)") + ) + flag.Parse() + + if *urlFlag == "" || *keyFlag == "" || *secretFlag == "" { + fmt.Fprintln(os.Stderr, "Usage: tunnel-client --url URL --key KEY --secret SECRET [options]") + fmt.Fprintln(os.Stderr, " Required: --url, --key, --secret (or SUPPORT_URL, SYSTEM_KEY, SYSTEM_SECRET env vars)") + os.Exit(1) + } + + // Auto-detect Redis on localhost if not explicitly specified + if *redisAddr == "" { + rdb := redis.NewClient(&redis.Options{Addr: defaultRedisAddr}) + ctx, cancel := context.WithTimeout(context.Background(), redisPingTimeout) + if err := rdb.Ping(ctx).Err(); err == nil { + log.Printf("Redis detected at %s, enabling NS8 auto-discovery", defaultRedisAddr) + *redisAddr = defaultRedisAddr + } else { + log.Printf("No Redis at %s, skipping NS8 auto-discovery (use -redis-addr to specify)", defaultRedisAddr) + } + cancel() + _ = rdb.Close() + } + + // Auto-detect node ID from NS8 environment if not explicitly specified + if *nodeIDFlag == "" && *redisAddr != "" { + if nid := readNodeID(); nid != "" { + log.Printf("Auto-detected node ID: %s", nid) + *nodeIDFlag = nid + } + } + + // Build exclusion list: start with defaults, add config file overrides + exclude := append([]string{}, defaultExclude...) + if *configFile != "" { + if tc, err := loadConfig(*configFile); err != nil { + log.Printf("Warning: cannot load config %s: %v", *configFile, err) + } else if len(tc.Exclude) > 0 { + exclude = append(exclude, tc.Exclude...) + } + } + log.Printf("Excluding %d service patterns: %v", len(exclude), exclude) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle signals + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + log.Println("Shutting down...") + cancel() + }() + + config := &clientConfig{ + url: *urlFlag, + key: *keyFlag, + secret: *secretFlag, + nodeID: *nodeIDFlag, + redisAddr: *redisAddr, + staticServices: *staticServices, + configFile: *configFile, + exclude: exclude, + reconnectDelay: *reconnectDelay, + maxReconnectDelay: *maxReconnectDelay, + discoveryInterval: *discoveryInterval, + tlsInsecure: *tlsInsecure, + } + + runWithReconnect(ctx, config) +} + +type clientConfig struct { + url string + key string + secret string + nodeID string + redisAddr string + staticServices string + configFile string + reconnectDelay time.Duration + maxReconnectDelay time.Duration + discoveryInterval time.Duration + tlsInsecure bool + exclude []string // loaded from config file +} + +// closeCodeSessionClosed matches the server's CloseCodeSessionClosed. +// When the operator closes a session, the server sends this code +// to tell the client to exit without reconnecting. +const closeCodeSessionClosed = 4000 + +func runWithReconnect(ctx context.Context, cfg *clientConfig) { + delay := cfg.reconnectDelay + + for { + start := time.Now() + err := connect(ctx, cfg) + if ctx.Err() != nil { + return // context cancelled, clean shutdown + } + + // Check if the server sent a "session closed" close frame + if websocket.IsCloseError(err, closeCodeSessionClosed) { + log.Println("Session closed by operator. Exiting.") + os.Exit(0) + } + + log.Printf("Connection lost: %v", err) + + // Reset backoff if connection lasted longer than 60 seconds + if time.Since(start) > 60*time.Second { + delay = cfg.reconnectDelay + } + + log.Printf("Reconnecting in %v...", delay) + + select { + case <-ctx.Done(): + return + case <-time.After(delay): + } + + // Exponential backoff + delay = delay * 2 + if delay > cfg.maxReconnectDelay { + delay = cfg.maxReconnectDelay + } + } +} + +func connect(ctx context.Context, cfg *clientConfig) error { + // Build Basic Auth header + creds := base64.StdEncoding.EncodeToString([]byte(cfg.key + ":" + cfg.secret)) + header := http.Header{} + header.Set("Authorization", "Basic "+creds) + + // Append node_id query parameter for multi-node clusters + connectURL := cfg.url + if cfg.nodeID != "" { + parsed, err := url.Parse(connectURL) + if err != nil { + return fmt.Errorf("invalid URL: %w", err) + } + q := parsed.Query() + q.Set("node_id", cfg.nodeID) + parsed.RawQuery = q.Encode() + connectURL = parsed.String() + } + + log.Printf("Connecting to %s ...", connectURL) + + dialer := websocket.Dialer{ + HandshakeTimeout: 10 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: cfg.tlsInsecure, //nolint:gosec // Configurable: disabled by default, enable for dev/self-signed certs + }, + } + wsConn, _, err := dialer.Dial(connectURL, header) + if err != nil { + return fmt.Errorf("websocket dial failed: %w", err) + } + log.Println("WebSocket connected") + + // Wrap as net.Conn + netConn := &wsNetConn{conn: wsConn} + + // Create yamux client session + yamuxCfg := yamux.DefaultConfig() + yamuxCfg.EnableKeepAlive = true + yamuxCfg.KeepAliveInterval = defaultYamuxKeepAlive + yamuxCfg.LogOutput = io.Discard + + session, err := yamux.Client(netConn, yamuxCfg) + if err != nil { + _ = wsConn.Close() + return fmt.Errorf("yamux client creation failed: %w", err) + } + log.Println("yamux session established") + + // Discover services + services := discoverServices(ctx, cfg) + + // Send initial manifest + if err := sendManifest(session, services); err != nil { + _ = session.Close() + return fmt.Errorf("failed to send manifest: %w", err) + } + + // Start periodic re-discovery + go func() { + ticker := time.NewTicker(cfg.discoveryInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-session.CloseChan(): + return + case <-ticker.C: + newServices := discoverServices(ctx, cfg) + if len(newServices) > 0 { + if err := sendManifest(session, newServices); err != nil { + log.Printf("Failed to send updated manifest: %v", err) + } else { + services = newServices + log.Printf("Manifest updated with %d services", len(services)) + } + } + } + } + }() + + // Close session when context is cancelled to unblock Accept() + go func() { + <-ctx.Done() + _ = session.Close() + }() + + // Accept incoming streams + for { + stream, err := session.Accept() + if err != nil { + if ctx.Err() != nil { + return nil + } + // If the underlying WebSocket received a close frame, return that error + // so the reconnect loop can inspect the close code + if netConn.closeErr != nil { + return netConn.closeErr + } + return fmt.Errorf("stream accept error: %w", err) + } + go handleStream(stream, services) + } +} + +func discoverServices(ctx context.Context, cfg *clientConfig) map[string]ServiceInfo { + services := make(map[string]ServiceInfo) + + // Parse static services + if cfg.staticServices != "" { + for _, entry := range strings.Split(cfg.staticServices, ",") { + entry = strings.TrimSpace(entry) + if entry == "" { + continue + } + parts := strings.SplitN(entry, "=", 2) + if len(parts) != 2 { + log.Printf("Invalid static service entry: %s", entry) + continue + } + name := parts[0] + if isExcluded(name, cfg.exclude) { + continue + } + target := parts[1] + + svc := ServiceInfo{Label: name} + + // Check for :tls suffix + if strings.HasSuffix(target, ":tls") { + svc.TLS = true + target = strings.TrimSuffix(target, ":tls") + } + + // Check for host override: name=target:port:host=hostname + if idx := strings.Index(target, ":host="); idx != -1 { + svc.Host = target[idx+6:] + target = target[:idx] + } + + svc.Target = target + services[name] = svc + } + } + + // NS8 auto-discovery from Traefik config files + if cfg.redisAddr != "" { + discovered := discoverTraefikRoutes(ctx, cfg.redisAddr) + for name, svc := range discovered { + if isExcluded(name, cfg.exclude) { + continue + } + services[name] = svc + } + } + + // NethSecurity auto-discovery (OpenWrt-based, no Redis/Traefik) + if cfg.redisAddr == "" { + discovered := discoverNethSecurityServices() + for name, svc := range discovered { + if isExcluded(name, cfg.exclude) { + continue + } + services[name] = svc + } + } + + logDiscoveredServices(services) + + return services +} + +// logDiscoveredServices prints a structured summary grouped by node → module → service +func logDiscoveredServices(services map[string]ServiceInfo) { + type moduleGroup struct { + label string + services map[string]ServiceInfo + } + type nodeGroup struct { + modules map[string]*moduleGroup + ungrouped []string // service keys without moduleID + } + + nodes := make(map[string]*nodeGroup) // keyed by nodeID ("" for non-node services) + + for name, svc := range services { + nid := svc.NodeID + ng, ok := nodes[nid] + if !ok { + ng = &nodeGroup{modules: make(map[string]*moduleGroup)} + nodes[nid] = ng + } + + if svc.ModuleID == "" { + ng.ungrouped = append(ng.ungrouped, name) + continue + } + + mg, ok := ng.modules[svc.ModuleID] + if !ok { + mg = &moduleGroup{services: make(map[string]ServiceInfo)} + ng.modules[svc.ModuleID] = mg + } + if mg.label == "" && svc.Label != "" { + mg.label = svc.Label + } + mg.services[name] = svc + } + + log.Printf("Discovered %d services across %d node(s)", len(services), len(nodes)) + + // Sort node IDs (empty string = non-node services, printed last) + nodeIDs := make([]string, 0, len(nodes)) + for nid := range nodes { + nodeIDs = append(nodeIDs, nid) + } + sort.Strings(nodeIDs) + + for _, nid := range nodeIDs { + ng := nodes[nid] + + if nid != "" { + log.Printf(" Node %s:", nid) + } + + indent := " " + if nid != "" { + indent = " " + } + + // Print modules (sorted) + moduleIDs := make([]string, 0, len(ng.modules)) + for id := range ng.modules { + moduleIDs = append(moduleIDs, id) + } + sort.Strings(moduleIDs) + + for _, moduleID := range moduleIDs { + mg := ng.modules[moduleID] + if mg.label != "" { + log.Printf("%s%s (%s)", indent, moduleID, mg.label) + } else { + log.Printf("%s%s", indent, moduleID) + } + names := make([]string, 0, len(mg.services)) + for name := range mg.services { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + svc := mg.services[name] + route := svc.Host + if svc.Path != "" && svc.Path != "/" { + route += svc.Path + } + log.Printf("%s - %s -> %s", indent, name, route) + } + } + + // Print ungrouped services (static, cluster-admin) + sort.Strings(ng.ungrouped) + for _, name := range ng.ungrouped { + svc := services[name] + log.Printf("%s%s -> %s", indent, name, svc.Target) + } + } +} + +func loadConfig(path string) (*tunnelClientConfig, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var cfg tunnelClientConfig + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, err + } + return &cfg, nil +} + +func isExcluded(name string, patterns []string) bool { + for _, pattern := range patterns { + if matched, _ := filepath.Match(pattern, name); matched { + return true + } + } + return false +} + +// apiCliRoute represents a single route returned by api-cli list-routes with expand_list +type apiCliRoute struct { + Instance string `json:"instance"` + Host string `json:"host"` + Path string `json:"path"` + URL string `json:"url"` + StripPrefix bool `json:"strip_prefix"` + SkipCertVerif bool `json:"skip_cert_verify"` +} + +// discoverTraefikRoutes uses api-cli to discover routes from ALL cluster nodes. +func discoverTraefikRoutes(ctx context.Context, redisAddr string) map[string]ServiceInfo { + services := make(map[string]ServiceInfo) + + rdb := redis.NewClient(&redis.Options{ + Addr: redisAddr, + }) + defer func() { _ = rdb.Close() }() + + // Discover all node IDs by scanning Redis keys + nodeIDs := discoverNodeIDs(ctx, rdb) + if len(nodeIDs) == 0 { + log.Println("Traefik discovery: no nodes found, skipping") + return services + } + + // Read local NODE_ID to distinguish local vs remote nodes + localNodeID := readNodeID() + log.Printf("Traefik discovery: found %d node(s): %v (local: %s)", len(nodeIDs), nodeIDs, localNodeID) + + // Build a map of remote node IPs from Redis VPN config + nodeIPs := make(map[string]string) + for _, nid := range nodeIDs { + if nid == localNodeID { + continue + } + ip, err := rdb.HGet(ctx, fmt.Sprintf("node/%s/vpn", nid), "ip_address").Result() + if err != nil { + log.Printf("Traefik discovery: cannot get IP for node %s: %v", nid, err) + continue + } + nodeIPs[nid] = ip + log.Printf("Traefik discovery: node %s -> %s", nid, ip) + } + + for _, nodeID := range nodeIDs { + nodeServices := discoverNodeRoutes(ctx, rdb, nodeID) + + // For remote nodes, rewrite targets to go through the node's Traefik (HTTPS). + // Traefik on the remote node handles TLS termination, Host-based routing, + // and PathPrefix stripping, so we clear PathPrefix to avoid double-stripping. + if nodeID != localNodeID { + remoteIP, ok := nodeIPs[nodeID] + if !ok { + log.Printf("Traefik discovery: skipping node %s (no IP)", nodeID) + continue + } + for name, svc := range nodeServices { + svc.Target = remoteIP + ":" + defaultRemoteHTTPSPort + svc.TLS = true + svc.PathPrefix = "" + nodeServices[name] = svc + } + } + + for name, svc := range nodeServices { + services[name] = svc + } + } + + return services +} + +// readNodeID reads NODE_ID from the NS8 node environment file. +func readNodeID() string { + f, err := os.Open(ns8NodeEnvFile) + if err != nil { + return "" + } + defer func() { _ = f.Close() }() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "NODE_ID=") { + return strings.TrimPrefix(line, "NODE_ID=") + } + } + return "" +} + +// discoverNodeIDs finds all NS8 node IDs by scanning Redis keys. +func discoverNodeIDs(ctx context.Context, rdb *redis.Client) []string { + var nodeIDs []string + var cursor uint64 + + for { + keys, nextCursor, err := rdb.Scan(ctx, cursor, "node/*/default_instance/traefik", 100).Result() + if err != nil { + log.Printf("Traefik discovery: Redis SCAN error: %v", err) + return nodeIDs + } + + for _, key := range keys { + // key format: node/{NODE_ID}/default_instance/traefik + parts := strings.Split(key, "/") + if len(parts) >= 2 { + nodeIDs = append(nodeIDs, parts[1]) + } + } + + cursor = nextCursor + if cursor == 0 { + break + } + } + + sort.Strings(nodeIDs) + return nodeIDs +} + +// discoverNodeRoutes uses api-cli to get all routes from a node's Traefik instance. +func discoverNodeRoutes(ctx context.Context, rdb *redis.Client, nodeID string) map[string]ServiceInfo { + services := make(map[string]ServiceInfo) + + // Get the traefik instance name from Redis + traefikInstance, err := rdb.Get(ctx, fmt.Sprintf("node/%s/default_instance/traefik", nodeID)).Result() + if err != nil { + log.Printf("Traefik discovery: cannot get traefik instance for node %s: %v", nodeID, err) + return services + } + + // Call api-cli to get all routes with details + cmd := exec.CommandContext(ctx, "api-cli", "run", + fmt.Sprintf("module/%s/list-routes", traefikInstance), + "--data", `{"expand_list": true}`) + output, err := cmd.Output() + if err != nil { + log.Printf("Traefik discovery: api-cli failed for %s (node %s): %v", traefikInstance, nodeID, err) + return services + } + + var routes []apiCliRoute + if err := json.Unmarshal(output, &routes); err != nil { + log.Printf("Traefik discovery: cannot parse api-cli output for %s: %v", traefikInstance, err) + return services + } + + for _, route := range routes { + serviceKey := route.Instance + + // Parse target from URL + parsed, err := url.Parse(route.URL) + if err != nil { + continue + } + target := parsed.Host + useTLS := parsed.Scheme == "https" + + // Determine PathPrefix (only if strip_prefix is true) + var pathPrefix string + if route.Path != "" && route.StripPrefix { + pathPrefix = route.Path + } + + // Extract module ID and look up its ui_name from Redis + moduleID := extractModuleID(serviceKey) + var moduleLabel string + if moduleID != "" { + uiName, err := rdb.Get(ctx, "module/"+moduleID+"/ui_name").Result() + if err == nil && uiName != "" { + moduleLabel = uiName + } + } + + services[serviceKey] = ServiceInfo{ + Target: target, + Host: route.Host, + TLS: useTLS, + Label: moduleLabel, + Path: route.Path, + PathPrefix: pathPrefix, + ModuleID: moduleID, + NodeID: nodeID, + } + } + + return services +} + +// extractModuleID extracts the module ID from a Traefik config filename. +// NS8 module IDs end with an instance number (e.g., "nethvoice103", "n8n2", +// "nethsecurity-controller4"). Route suffixes are separated by hyphen or +// underscore after the digits (e.g., "nethvoice103-ui", "metrics1_grafana"). +func extractModuleID(name string) string { + // Match everything up to and including trailing digits, followed by + // a separator (- or _) or end of string + re := regexp.MustCompile(`^(.+\d+)(?:[-_]|$)`) + m := re.FindStringSubmatch(name) + if len(m) > 1 { + return m[1] + } + return "" +} + +// discoverNethSecurityServices detects NethSecurity (OpenWrt-based firewall) +// by checking for its web UI files and registers the main HTTPS service. +// NethSecurity runs nginx with the UI on a configurable port: +// - Port from /etc/nginx/conf.d/ns-ui.conf (dedicated UI server block) +// - Port 443 (when 00ns.locations is active, UI is on the default server) +func discoverNethSecurityServices() map[string]ServiceInfo { + services := make(map[string]ServiceInfo) + + // Detect NethSecurity by checking for its UI directory + if _, err := os.Stat(nethSecUIPath); err != nil { + return services + } + + hostname, _ := os.Hostname() + if hostname == "" { + hostname = "NethSecurity" + } + + port := detectNethSecurityUIPort() + + log.Printf("NethSecurity detected (hostname: %s, UI port: %s), registering web UI service", hostname, port) + + services["nethsecurity-ui"] = ServiceInfo{ + Target: net.JoinHostPort("127.0.0.1", port), + Host: "127.0.0.1", + TLS: true, + Label: hostname, + Path: "/", + } + + return services +} + +// detectNethSecurityUIPort determines the HTTPS port serving the NethSecurity UI. +// It checks ns-ui.conf for a dedicated server block (e.g., port 9090), and +// falls back to 443 when the UI locations are on the default server. +func detectNethSecurityUIPort() string { + // Check for dedicated UI server block (ns-ui.conf) + data, err := os.ReadFile(nethSecNginxConf) + if err == nil { + // Parse "listen ssl" directive + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "listen") && strings.Contains(line, "ssl") && !strings.Contains(line, "[::]:") { + fields := strings.Fields(line) + if len(fields) >= 2 { + port := fields[1] + // Validate it looks like a port number + if _, err := fmt.Sscanf(port, "%d", new(int)); err == nil { + return port + } + } + } + } + } + + // Default: UI on the main server + return defaultNethSecUIPort +} + +func sendManifest(session *yamux.Session, services map[string]ServiceInfo) error { + stream, err := session.Open() + if err != nil { + return fmt.Errorf("failed to open control stream: %w", err) + } + defer func() { _ = stream.Close() }() + + manifest := ServiceManifest{ + Version: 1, + Services: services, + } + + if err := json.NewEncoder(stream).Encode(manifest); err != nil { + return fmt.Errorf("failed to encode manifest: %w", err) + } + + log.Printf("Manifest sent with %d services", len(services)) + return nil +} + +func handleStream(stream net.Conn, services map[string]ServiceInfo) { + defer func() { _ = stream.Close() }() + + // Read CONNECT header + serviceName, err := readConnectHeader(stream) + if err != nil { + log.Printf("Failed to read CONNECT header: %v", err) + return + } + + // Built-in terminal service: spawn a PTY instead of dialing TCP + if serviceName == "terminal" { + if err := writeConnectResponse(stream, nil); err != nil { + return + } + log.Println("CONNECT terminal -> PTY") + handleTerminal(stream) + return + } + + // Look up service + svc, ok := services[serviceName] + if !ok { + _ = writeConnectResponse(stream, fmt.Errorf("service not found: %s", serviceName)) + return + } + + // Connect to local target + var targetConn net.Conn + if svc.TLS { + targetConn, err = tls.Dial("tcp", svc.Target, &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // Local services use self-signed certs + }) + } else { + targetConn, err = net.DialTimeout("tcp", svc.Target, 10*time.Second) + } + if err != nil { + _ = writeConnectResponse(stream, fmt.Errorf("failed to connect to %s: %v", svc.Target, err)) + return + } + + // Send OK response + if err := writeConnectResponse(stream, nil); err != nil { + _ = targetConn.Close() + return + } + + log.Printf("CONNECT %s -> %s", serviceName, svc.Target) + + // Bidirectional copy + done := make(chan struct{}, 2) + + go func() { + _, _ = io.Copy(targetConn, stream) + done <- struct{}{} + }() + + go func() { + _, _ = io.Copy(stream, targetConn) + done <- struct{}{} + }() + + <-done + _ = targetConn.Close() +} + +// readConnectHeader reads "CONNECT \n" from the stream byte-by-byte +func readConnectHeader(r io.Reader) (string, error) { + line, err := readLine(r) + if err != nil { + return "", err + } + if !strings.HasPrefix(line, "CONNECT ") { + return "", fmt.Errorf("invalid CONNECT header: %q", line) + } + name := strings.TrimPrefix(line, "CONNECT ") + if name == "" { + return "", fmt.Errorf("empty service name") + } + return name, nil +} + +func writeConnectResponse(w io.Writer, err error) error { + if err == nil { + _, writeErr := fmt.Fprint(w, "OK\n") + return writeErr + } + _, writeErr := fmt.Fprintf(w, "ERROR %s\n", err.Error()) + return writeErr +} + +func readLine(r io.Reader) (string, error) { + var buf []byte + b := make([]byte, 1) + for { + n, err := r.Read(b) + if n > 0 { + if b[0] == '\n' { + return string(buf), nil + } + buf = append(buf, b[0]) + if len(buf) > maxLineLength { + return "", fmt.Errorf("line too long") + } + } + if err != nil { + if err == io.EOF && len(buf) > 0 { + return string(buf), nil + } + return "", err + } + } +} + +// wsNetConn wraps gorilla/websocket.Conn as net.Conn for yamux. +// It captures WebSocket close errors so the reconnect loop can inspect the close code. +type wsNetConn struct { + conn *websocket.Conn + reader io.Reader + closeErr error // stores the WebSocket close error if received +} + +func (w *wsNetConn) Read(b []byte) (int, error) { + for { + if w.reader == nil { + _, reader, err := w.conn.NextReader() + if err != nil { + w.closeErr = err + return 0, err + } + w.reader = reader + } + n, err := w.reader.Read(b) + if err == io.EOF { + w.reader = nil + if n > 0 { + return n, nil + } + continue + } + return n, err + } +} + +func (w *wsNetConn) Write(b []byte) (int, error) { + err := w.conn.WriteMessage(websocket.BinaryMessage, b) + if err != nil { + return 0, err + } + return len(b), nil +} + +func (w *wsNetConn) Close() error { return w.conn.Close() } +func (w *wsNetConn) LocalAddr() net.Addr { return w.conn.LocalAddr() } +func (w *wsNetConn) RemoteAddr() net.Addr { return w.conn.RemoteAddr() } +func (w *wsNetConn) SetDeadline(_ time.Time) error { return nil } +func (w *wsNetConn) SetReadDeadline(_ time.Time) error { return nil } +func (w *wsNetConn) SetWriteDeadline(_ time.Time) error { return nil } + +func envWithDefault(key, defaultValue string) string { + if v := os.Getenv(key); v != "" { + return v + } + return defaultValue +} + +func parseDurationDefault(s string, d time.Duration) time.Duration { + if s == "" { + return d + } + if v, err := time.ParseDuration(s); err == nil { + return v + } + return d +} + +// handleTerminal spawns a shell with a PTY and bridges it to the yamux stream +// using length-prefixed binary frames: +// - Type 0 (data): raw terminal bytes (bidirectional) +// - Type 1 (resize): JSON {"cols": N, "rows": N} (stream → PTY) +func handleTerminal(stream net.Conn) { + shell := os.Getenv("SHELL") + if shell == "" { + shell = defaultShell + } + + cmd := exec.Command(shell) + cmd.Env = append(os.Environ(), defaultTermEnv) + + ptmx, err := pty.Start(cmd) + if err != nil { + log.Printf("Failed to start PTY: %v", err) + return + } + defer func() { + _ = ptmx.Close() + _ = cmd.Process.Kill() + _, _ = cmd.Process.Wait() + }() + + done := make(chan struct{}, 2) + + // PTY → stream: read from PTY, send as type-0 length-prefixed frames + go func() { + defer func() { done <- struct{}{} }() + buf := make([]byte, 4096) + for { + n, readErr := ptmx.Read(buf) + if n > 0 { + frame := make([]byte, 1+n) + frame[0] = 0 // data frame + copy(frame[1:], buf[:n]) + if writeErr := writeFrame(stream, frame); writeErr != nil { + return + } + } + if readErr != nil { + return + } + } + }() + + // Stream → PTY: read length-prefixed frames, dispatch by type + go func() { + defer func() { done <- struct{}{} }() + for { + frame, readErr := readFrame(stream) + if readErr != nil { + return + } + if len(frame) < 1 { + continue + } + + frameType := frame[0] + payload := frame[1:] + + switch frameType { + case 0: // data → write to PTY + if _, writeErr := ptmx.Write(payload); writeErr != nil { + return + } + case 1: // resize → set PTY window size + var size struct { + Cols int `json:"cols"` + Rows int `json:"rows"` + } + if jsonErr := json.Unmarshal(payload, &size); jsonErr != nil { + continue + } + if size.Cols > 0 && size.Rows > 0 { + _ = pty.Setsize(ptmx, &pty.Winsize{ + Rows: uint16(size.Rows), + Cols: uint16(size.Cols), + }) + } + } + } + }() + + <-done +} + +// writeFrame writes a length-prefixed frame: [4 bytes big-endian length][payload] +func writeFrame(w io.Writer, data []byte) error { + header := make([]byte, 4) + binary.BigEndian.PutUint32(header, uint32(len(data))) + if _, err := w.Write(header); err != nil { + return err + } + _, err := w.Write(data) + return err +} + +// readFrame reads a length-prefixed frame: [4 bytes big-endian length][payload] +func readFrame(r io.Reader) ([]byte, error) { + header := make([]byte, 4) + if _, err := io.ReadFull(r, header); err != nil { + return nil, err + } + length := binary.BigEndian.Uint32(header) + if length > maxFrameSize { + return nil, fmt.Errorf("frame too large: %d", length) + } + data := make([]byte, length) + if _, err := io.ReadFull(r, data); err != nil { + return nil, err + } + return data, nil +} diff --git a/services/support/configuration/configuration.go b/services/support/configuration/configuration.go new file mode 100644 index 00000000..6c3a04f2 --- /dev/null +++ b/services/support/configuration/configuration.go @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package configuration + +import ( + "fmt" + "os" + "strconv" + "time" + + "github.com/nethesis/my/services/support/logger" +) + +// Configuration holds all service configuration +type Configuration struct { + ListenAddress string `json:"listen_address"` + + // Database configuration + DatabaseURL string `json:"database_url"` + + // Redis configuration + RedisURL string `json:"redis_url"` + RedisDB int `json:"redis_db"` + RedisPassword string `json:"redis_password"` + + // System authentication configuration + SystemAuthCacheTTL time.Duration `json:"system_auth_cache_ttl"` + SystemSecretMinLength int `json:"system_secret_min_length"` + + // Session configuration + SessionDefaultDuration time.Duration `json:"session_default_duration"` + SessionCleanerInterval time.Duration `json:"session_cleaner_interval"` + + // Tunnel configuration + TunnelGracePeriod time.Duration `json:"tunnel_grace_period"` + MaxTunnels int `json:"max_tunnels"` + MaxSessionsPerSystem int `json:"max_sessions_per_system"` + MaxStreamsPerTunnel int `json:"max_streams_per_tunnel"` + + // Terminal configuration + TerminalInactivityTimeout time.Duration `json:"terminal_inactivity_timeout"` + TerminalMaxFrameSize int `json:"terminal_max_frame_size"` +} + +// Config is the global configuration instance +var Config = Configuration{} + +// Init initializes configuration from environment variables +func Init() { + Config.ListenAddress = getStringWithDefault("LISTEN_ADDRESS", "127.0.0.1:8082") + + // Database configuration + if os.Getenv("DATABASE_URL") != "" { + Config.DatabaseURL = os.Getenv("DATABASE_URL") + } else { + logger.LogConfigLoad("env", "DATABASE_URL", false, fmt.Errorf("DATABASE_URL variable is empty")) + } + + // Redis configuration + Config.RedisURL = getStringWithDefault("REDIS_URL", "redis://localhost:6379") + Config.RedisDB = parseIntWithDefault("REDIS_DB", 2) + Config.RedisPassword = os.Getenv("REDIS_PASSWORD") + + // System authentication configuration + Config.SystemAuthCacheTTL = parseDurationWithDefault("SYSTEM_AUTH_CACHE_TTL", 24*time.Hour) + Config.SystemSecretMinLength = parseIntWithDefault("SYSTEM_SECRET_MIN_LENGTH", 32) + + // Session configuration + Config.SessionDefaultDuration = parseDurationWithDefault("SESSION_DEFAULT_DURATION", 24*time.Hour) + Config.SessionCleanerInterval = parseDurationWithDefault("SESSION_CLEANER_INTERVAL", 5*time.Minute) + + // Tunnel configuration + Config.TunnelGracePeriod = parseDurationWithDefault("TUNNEL_GRACE_PERIOD", 30*time.Second) + Config.MaxTunnels = parseIntWithDefault("MAX_TUNNELS", 1000) + Config.MaxSessionsPerSystem = parseIntWithDefault("MAX_SESSIONS_PER_SYSTEM", 5) + Config.MaxStreamsPerTunnel = parseIntWithDefault("MAX_STREAMS_PER_TUNNEL", 64) + + // Terminal configuration + Config.TerminalInactivityTimeout = parseDurationWithDefault("TERMINAL_INACTIVITY_TIMEOUT", 30*time.Minute) + Config.TerminalMaxFrameSize = parseIntWithDefault("TERMINAL_MAX_FRAME_SIZE", 65536) + + logger.LogConfigLoad("env", "configuration", true, nil) +} + +func parseDurationWithDefault(envVar string, defaultValue time.Duration) time.Duration { + envValue := os.Getenv(envVar) + if envValue == "" { + return defaultValue + } + if duration, err := time.ParseDuration(envValue); err == nil { + return duration + } + logger.LogConfigLoad("env", envVar, false, fmt.Errorf("invalid duration format, using default %v", defaultValue)) + return defaultValue +} + +func parseIntWithDefault(envVar string, defaultValue int) int { + envValue := os.Getenv(envVar) + if envValue == "" { + return defaultValue + } + if value, err := strconv.Atoi(envValue); err == nil { + return value + } + logger.LogConfigLoad("env", envVar, false, fmt.Errorf("invalid integer format, using default %d", defaultValue)) + return defaultValue +} + +func getStringWithDefault(envVar string, defaultValue string) string { + if envValue := os.Getenv(envVar); envValue != "" { + return envValue + } + return defaultValue +} diff --git a/services/support/database/database.go b/services/support/database/database.go new file mode 100644 index 00000000..3b72363a --- /dev/null +++ b/services/support/database/database.go @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package database + +import ( + "database/sql" + "fmt" + "os" + "time" + + _ "github.com/lib/pq" + + "github.com/nethesis/my/services/support/logger" +) + +// DB is the global database connection pool +var DB *sql.DB + +// Init initializes the database connection +func Init() error { + databaseURL := os.Getenv("DATABASE_URL") + if databaseURL == "" { + return fmt.Errorf("DATABASE_URL environment variable is not set") + } + + var err error + DB, err = sql.Open("postgres", databaseURL) + if err != nil { + return fmt.Errorf("failed to open database connection: %w", err) + } + + DB.SetMaxOpenConns(25) + DB.SetMaxIdleConns(10) + DB.SetConnMaxLifetime(15 * time.Minute) + DB.SetConnMaxIdleTime(1 * time.Minute) + + if err := DB.Ping(); err != nil { + return fmt.Errorf("failed to ping database: %w", err) + } + + logger.ComponentLogger("database").Info(). + Str("url", logger.SanitizeConnectionURL(databaseURL)). + Msg("database connection initialized") + + return nil +} + +// Close closes the database connection +func Close() error { + if DB != nil { + return DB.Close() + } + return nil +} diff --git a/services/support/go.mod b/services/support/go.mod new file mode 100644 index 00000000..6554d70b --- /dev/null +++ b/services/support/go.mod @@ -0,0 +1,51 @@ +module github.com/nethesis/my/services/support + +go 1.24.0 + +toolchain go1.24.4 + +require ( + github.com/creack/pty v1.1.24 + github.com/gin-contrib/cors v1.7.6 + github.com/gin-contrib/gzip v1.2.3 + github.com/gin-gonic/gin v1.10.1 + github.com/gorilla/websocket v1.5.3 + github.com/hashicorp/yamux v0.1.2 + github.com/joho/godotenv v1.5.1 + github.com/lib/pq v1.10.9 + github.com/redis/go-redis/v9 v9.11.0 + github.com/rs/zerolog v1.34.0 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + github.com/bytedance/sonic v1.13.3 // indirect + github.com/bytedance/sonic/loader v0.2.4 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cloudwego/base64x v0.1.5 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/gabriel-vasile/mimetype v1.4.9 // indirect + github.com/gin-contrib/sse v1.1.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.26.0 // indirect + github.com/goccy/go-json v0.10.5 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pelletier/go-toml/v2 v2.2.4 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.3.0 // indirect + golang.org/x/arch v0.18.0 // indirect + golang.org/x/crypto v0.45.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/text v0.31.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect +) diff --git a/services/support/go.sum b/services/support/go.sum new file mode 100644 index 00000000..ee04c47e --- /dev/null +++ b/services/support/go.sum @@ -0,0 +1,128 @@ +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/bytedance/sonic v1.13.3 h1:MS8gmaH16Gtirygw7jV91pDCN33NyMrPbN7qiYhEsF0= +github.com/bytedance/sonic v1.13.3/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY= +github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4= +github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= +github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/gabriel-vasile/mimetype v1.4.9 h1:5k+WDwEsD9eTLL8Tz3L0VnmVh9QxGjRmjBvAG7U/oYY= +github.com/gabriel-vasile/mimetype v1.4.9/go.mod h1:WnSQhFKJuBlRyLiKohA/2DtIlPFAbguNaG7QCHcyGok= +github.com/gin-contrib/cors v1.7.6 h1:3gQ8GMzs1Ylpf70y8bMw4fVpycXIeX1ZemuSQIsnQQY= +github.com/gin-contrib/cors v1.7.6/go.mod h1:Ulcl+xN4jel9t1Ry8vqph23a60FwH9xVLd+3ykmTjOk= +github.com/gin-contrib/gzip v1.2.3 h1:dAhT722RuEG330ce2agAs75z7yB+NKvX/ZM1r8w0u2U= +github.com/gin-contrib/gzip v1.2.3/go.mod h1:ad72i4Bzmaypk8M762gNXa2wkxxjbz0icRNnuLJ9a/c= +github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w= +github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM= +github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ= +github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.26.0 h1:SP05Nqhjcvz81uJaRfEV0YBSSSGMc/iMaVtFbr3Sw2k= +github.com/go-playground/validator/v10 v10.26.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/hashicorp/yamux v0.1.2 h1:XtB8kyFOyHXYVFnwT5C3+Bdo8gArse7j2AQ0DA0Uey8= +github.com/hashicorp/yamux v0.1.2/go.mod h1:C+zze2n6e/7wshOZep2A70/aQU6QBRWJO/G6FT1wIns= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs= +github.com/redis/go-redis/v9 v9.11.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= +github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8= +github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= +github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= +github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA= +github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4= +golang.org/x/arch v0.18.0 h1:WN9poc33zL4AzGxqf8VtpKUnGvMi8O9lhNyBMF/85qc= +golang.org/x/arch v0.18.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= diff --git a/services/support/helpers/sha256.go b/services/support/helpers/sha256.go new file mode 100644 index 00000000..bfe7057e --- /dev/null +++ b/services/support/helpers/sha256.go @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package helpers + +import ( + "crypto/sha256" + "crypto/subtle" + "encoding/hex" + "fmt" + "strings" +) + +// VerifySystemSecretSHA256 verifies a system secret against a salted SHA256 hash. +// Expected format: hex_salt:hex_hash +func VerifySystemSecretSHA256(secret, encodedHash string) (bool, error) { + parts := strings.SplitN(encodedHash, ":", 2) + if len(parts) != 2 { + return false, fmt.Errorf("invalid sha256 hash format") + } + + salt, err := hex.DecodeString(parts[0]) + if err != nil { + return false, fmt.Errorf("failed to decode salt: %w", err) + } + + expectedHash, err := hex.DecodeString(parts[1]) + if err != nil { + return false, fmt.Errorf("failed to decode hash: %w", err) + } + + actualHash := sha256.Sum256(append(salt, []byte(secret)...)) + if subtle.ConstantTimeCompare(actualHash[:], expectedHash) == 1 { + return true, nil + } + + return false, nil +} diff --git a/services/support/logger/logger.go b/services/support/logger/logger.go new file mode 100644 index 00000000..fd68c5dd --- /dev/null +++ b/services/support/logger/logger.go @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package logger + +import ( + "io" + "os" + "regexp" + "strings" + "time" + + "github.com/gin-gonic/gin" + "github.com/rs/zerolog" +) + +// Logger is the global logger instance +var Logger zerolog.Logger + +// InitFromEnv initializes the logger from environment variables +func InitFromEnv(appName string) error { + level := os.Getenv("LOG_LEVEL") + format := os.Getenv("LOG_FORMAT") + + logLevel, err := zerolog.ParseLevel(strings.ToLower(level)) + if err != nil || level == "" { + logLevel = zerolog.InfoLevel + } + + var output io.Writer + if strings.ToLower(format) == "json" { + output = os.Stderr + } else { + output = zerolog.ConsoleWriter{ + Out: os.Stderr, + TimeFormat: time.RFC3339, + } + } + + Logger = zerolog.New(output).With(). + Timestamp(). + Str("app", appName). + Logger(). + Level(logLevel) + + zerolog.DefaultContextLogger = &Logger + + return nil +} + +// ComponentLogger returns a logger scoped to a component +func ComponentLogger(component string) *zerolog.Logger { + l := Logger.With().Str("component", component).Logger() + return &l +} + +// RequestLogger returns a logger enriched with request context +func RequestLogger(c *gin.Context, component string) *zerolog.Logger { + l := Logger.With(). + Str("component", component). + Str("method", c.Request.Method). + Str("path", c.Request.URL.Path). + Str("client_ip", c.ClientIP()). + Logger() + return &l +} + +// Package-level convenience functions +func Trace() *zerolog.Event { return Logger.Trace() } +func Debug() *zerolog.Event { return Logger.Debug() } +func Info() *zerolog.Event { return Logger.Info() } +func Warn() *zerolog.Event { return Logger.Warn() } +func Error() *zerolog.Event { return Logger.Error() } +func Fatal() *zerolog.Event { return Logger.Fatal() } + +// SanitizeConnectionURL redacts credentials from connection URLs +func SanitizeConnectionURL(url string) string { + if url == "" { + return "" + } + re := regexp.MustCompile(`://([^:]+):([^@]+)@`) + return re.ReplaceAllString(url, "://$1:***@") +} + +// LogConfigLoad logs a configuration loading event +func LogConfigLoad(component, configType string, success bool, err error) { + logger := ComponentLogger(component) + + if success { + logger.Info(). + Str("operation", "config_load"). + Str("config_type", configType). + Msg("configuration loaded") + } else { + logger.Warn(). + Str("operation", "config_load"). + Str("config_type", configType). + Err(err). + Msg("configuration load issue") + } +} + +// LogServiceStart logs service startup information +func LogServiceStart(serviceName, version, listenAddress string) { + Logger.Info(). + Str("operation", "service_start"). + Str("service", serviceName). + Str("version", version). + Str("listen_address", listenAddress). + Msg("service starting") +} + +// GinLogger returns a gin middleware that logs requests using zerolog +func GinLogger() gin.HandlerFunc { + return func(c *gin.Context) { + start := time.Now() + c.Next() + latency := time.Since(start) + + event := Logger.Info() + if c.Writer.Status() >= 500 { + event = Logger.Error() + } else if c.Writer.Status() >= 400 { + event = Logger.Warn() + } + + event. + Str("method", c.Request.Method). + Str("path", c.Request.URL.Path). + Int("status", c.Writer.Status()). + Dur("latency", latency). + Str("client_ip", c.ClientIP()). + Msg("request") + } +} + +// SecurityMiddleware returns a gin middleware that sets security headers +func SecurityMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + c.Header("X-Content-Type-Options", "nosniff") + c.Header("X-Frame-Options", "DENY") + c.Next() + } +} diff --git a/services/support/main.go b/services/support/main.go new file mode 100644 index 00000000..e0f72e85 --- /dev/null +++ b/services/support/main.go @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package main + +import ( + "context" + "net/http" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "github.com/gin-contrib/cors" + "github.com/gin-contrib/gzip" + "github.com/gin-gonic/gin" + "github.com/joho/godotenv" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/database" + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/methods" + "github.com/nethesis/my/services/support/middleware" + "github.com/nethesis/my/services/support/pkg/version" + "github.com/nethesis/my/services/support/queue" + "github.com/nethesis/my/services/support/response" + "github.com/nethesis/my/services/support/session" + "github.com/nethesis/my/services/support/tunnel" +) + +func main() { + envFile := os.Getenv("ENV_FILE") + if envFile == "" { + envFile = ".env" + } + err := godotenv.Load(envFile) + + loggerErr := logger.InitFromEnv("support") + if loggerErr != nil { + logger.Fatal().Err(loggerErr).Msg("Failed to initialize logger") + } + + if err == nil { + logger.Info().Str("component", "env").Str("operation", "config_load"). + Str("config_type", "environment").Str("env_file", envFile).Bool("success", true). + Msg("environment configuration loaded") + } else { + logger.Warn().Str("component", "env").Str("operation", "config_load"). + Str("config_type", "environment").Str("env_file", envFile).Bool("success", false). + Err(err).Msg("environment configuration not loaded (using system environment)") + } + + configuration.Init() + + err = database.Init() + if err != nil { + logger.Fatal().Err(err).Msg("Failed to initialize database") + } + + err = queue.Init() + if err != nil { + logger.Fatal().Err(err).Msg("Failed to initialize Redis") + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Initialize tunnel manager + tunnelManager := tunnel.NewManager(configuration.Config.MaxTunnels, configuration.Config.MaxStreamsPerTunnel) + methods.TunnelManager = tunnelManager + + // Set grace period callback: close session when grace period expires without reconnection + tunnelManager.SetGraceCallback(func(systemID, sessionID string) { + if err := session.CloseSession(sessionID, "disconnect"); err != nil { + logger.Error().Err(err). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to close session after grace period expired") + } + }) + + // Start session cleaner + go session.StartCleaner(ctx, func(expiredSessionIDs []string) { + for _, sessionID := range expiredSessionIDs { + tunnelManager.CloseBySessionID(sessionID) + } + }) + + // Start command listener for backend commands via Redis pub/sub + go methods.StartCommandListener(ctx) + + // #12: Start auth cache invalidation listener + go middleware.StartAuthCacheInvalidator(ctx) + + // Setup HTTP router (gin.New without default logger to avoid raw query params in logs) + router := gin.New() + router.Use(gin.Recovery()) + router.Use(logger.GinLogger()) + router.Use(logger.SecurityMiddleware()) + router.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedPathsRegexs([]string{"^/api/proxy", "^/api/terminal"}))) + + if gin.Mode() == gin.DebugMode { + corsConf := cors.DefaultConfig() + corsConf.AllowHeaders = []string{"Authorization", "Content-Type", "Accept"} + corsConf.AllowOrigins = []string{"http://localhost:*", "https://localhost:*", "http://127.0.0.1:*", "https://127.0.0.1:*"} + corsConf.AllowOriginFunc = func(origin string) bool { + return strings.HasPrefix(origin, "http://localhost") || + strings.HasPrefix(origin, "https://localhost") || + strings.HasPrefix(origin, "http://127.0.0.1") || + strings.HasPrefix(origin, "https://127.0.0.1") + } + router.Use(cors.New(corsConf)) + } + + api := router.Group("/api") + + // Health endpoint (no sensitive data — tunnel details require authenticated admin endpoints) + api.GET("/health", func(c *gin.Context) { + c.JSON(http.StatusOK, response.OK("service healthy", gin.H{ + "service": "support", + "status": "healthy", + "version": version.Get(), + })) + }) + + // Tunnel endpoint (WebSocket, requires system Basic Auth, rate-limited per IP + per system_key) + api.GET("/tunnel", middleware.TunnelRateLimitMiddleware(), middleware.BasicAuthMiddleware(), middleware.SystemKeyRateLimitMiddleware(), methods.HandleTunnel) + + // Internal endpoints: require per-session token from backend (#3/#4) + internal := api.Group("/") + internal.Use(middleware.SessionTokenMiddleware()) + + internal.GET("/terminal/:session_id", methods.HandleTerminal) + internal.GET("/proxy/:session_id/services", methods.ListServices) + internal.Any("/proxy/:session_id/:service/*path", methods.HandleProxy) + + router.NoRoute(func(c *gin.Context) { + c.JSON(http.StatusNotFound, response.NotFound("api not found", nil)) + }) + + // Start HTTP server + srv := &http.Server{ + Addr: configuration.Config.ListenAddress, + Handler: router, + } + + go func() { + logger.LogServiceStart("support", version.Version, configuration.Config.ListenAddress) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + logger.Fatal().Err(err).Msg("Failed to start server") + } + }() + + // Graceful shutdown + quit := make(chan os.Signal, 1) + signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) + <-quit + + logger.Info().Msg("Shutting down server...") + + // Close all tunnels + tunnelManager.CloseAll() + + cancel() + + ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := srv.Shutdown(ctx); err != nil { + logger.Fatal().Err(err).Msg("Server forced to shutdown") + } + + if err := queue.Close(); err != nil { + logger.Error().Err(err).Msg("Failed to close Redis connection") + } + + if err := database.Close(); err != nil { + logger.Error().Err(err).Msg("Failed to close database connection") + } + + logger.Info().Msg("Server exited") +} diff --git a/services/support/methods/commands.go b/services/support/methods/commands.go new file mode 100644 index 00000000..4400808c --- /dev/null +++ b/services/support/methods/commands.go @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package methods + +import ( + "context" + "encoding/json" + + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/queue" + "github.com/nethesis/my/services/support/session" +) + +// SupportCommand represents a command received via Redis pub/sub +type SupportCommand struct { + Action string `json:"action"` + SessionID string `json:"session_id"` +} + +// StartCommandListener listens for commands from the backend via Redis pub/sub +func StartCommandListener(ctx context.Context) { + log := logger.ComponentLogger("commands") + + pubsub := queue.Subscribe(ctx, "support:commands") + defer func() { _ = pubsub.Close() }() + + ch := pubsub.Channel() + log.Info().Msg("command listener started on support:commands channel") + + for { + select { + case <-ctx.Done(): + log.Info().Msg("command listener stopped") + return + case msg, ok := <-ch: + if !ok { + log.Warn().Msg("command channel closed") + return + } + + var cmd SupportCommand + if err := json.Unmarshal([]byte(msg.Payload), &cmd); err != nil { + log.Error().Err(err).Str("payload", msg.Payload).Msg("invalid command payload") + continue + } + + log.Info(). + Str("action", cmd.Action). + Str("session_id", cmd.SessionID). + Msg("command received") + + switch cmd.Action { + case "close": + handleCloseCommand(cmd.SessionID) + default: + log.Warn().Str("action", cmd.Action).Msg("unknown command action") + } + } + } +} + +func handleCloseCommand(sessionID string) { + log := logger.ComponentLogger("commands") + + // Close the tunnel + if TunnelManager.CloseBySessionID(sessionID) { + log.Info().Str("session_id", sessionID).Msg("tunnel closed by command") + } + + // Close the session in the database + if err := session.CloseSession(sessionID, "operator"); err != nil { + log.Error().Err(err).Str("session_id", sessionID).Msg("failed to close session") + } +} diff --git a/services/support/methods/proxy.go b/services/support/methods/proxy.go new file mode 100644 index 00000000..dcd63a19 --- /dev/null +++ b/services/support/methods/proxy.go @@ -0,0 +1,309 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package methods + +import ( + "bytes" + "compress/gzip" + "context" + "crypto/tls" + "fmt" + "io" + "net" + "net/http" + "net/http/httputil" + "strconv" + "strings" + + "github.com/gin-gonic/gin" + + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/response" + "github.com/nethesis/my/services/support/tunnel" +) + +// HandleProxy proxies HTTP/WebSocket requests through the yamux tunnel +// Route: ANY /api/proxy/:session_id/:service/*path (internal, no auth) +func HandleProxy(c *gin.Context) { + sessionID := c.Param("session_id") + serviceName := c.Param("service") + path := c.Param("path") + if path == "" { + path = "/" + } + + log := logger.ComponentLogger("proxy") + + // Find tunnel by session ID + t := TunnelManager.GetBySessionID(sessionID) + if t == nil { + c.JSON(http.StatusNotFound, response.NotFound("tunnel not found for session", nil)) + return + } + + // Look up service in manifest + svc, ok := t.GetService(serviceName) + if !ok { + c.JSON(http.StatusNotFound, response.NotFound("service not found in tunnel manifest", nil)) + return + } + + // Strip Traefik PathPrefix from the request path. + // In NS8, Traefik routes e.g. PathPrefix('/cluster-admin') to the backend + // and strips the prefix. Our proxy bypasses Traefik, so we must strip it too. + if svc.PathPrefix != "" && svc.PathPrefix != "/" { + path = strings.TrimPrefix(path, svc.PathPrefix) + if path == "" || path[0] != '/' { + path = "/" + path + } + } + + // Build hostname rewrite map for all services in the tunnel. + // This handles multi-hostname apps (e.g., NethVoice cti4/voice4) + // where HTML/JS references hostnames of sibling services. + proxyHost := c.GetHeader("X-Proxy-Host") + hostRewrites := buildHostRewriteMap(t, proxyHost) + needsRewrite := len(hostRewrites) > 0 + + // #10: Check stream limit before opening a new stream + if !t.AcquireStream() { + c.JSON(http.StatusTooManyRequests, response.Error(http.StatusTooManyRequests, "too many concurrent streams on this tunnel", nil)) + return + } + streamAcquired := true + defer func() { + if streamAcquired { + t.ReleaseStream() + } + }() + + // HTTP + WebSocket proxy via yamux stream + // httputil.ReverseProxy handles 101 Switching Protocols (WebSocket upgrades) natively + proxy := &httputil.ReverseProxy{ + Director: func(req *http.Request) { + // Always use "http" scheme: TLS termination is handled by the + // tunnel-client when it dials the target, not by this Transport. + // Using "https" here would cause Go's Transport to attempt a TLS + // handshake on the yamux stream, which is always plain TCP. + req.URL.Scheme = "http" + req.URL.Host = svc.Target + req.URL.Path = path + req.URL.RawQuery = c.Request.URL.RawQuery + + // Rewrite Host header if specified in manifest + if svc.Host != "" { + req.Host = svc.Host + + // Set Origin and Referer to the upstream hostname so apps + // that validate these headers (e.g. FreePBX) accept the request. + // The backend strips the original browser headers to avoid CORS + // issues, so we reconstruct them here from the manifest. + upstreamOrigin := "https://" + svc.Host + req.Header.Set("Origin", upstreamOrigin) + req.Header.Set("Referer", upstreamOrigin+path) + } + }, + ModifyResponse: func(resp *http.Response) error { + log.Debug(). + Str("session_id", sessionID). + Str("service", serviceName). + Str("path", path). + Int("upstream_status", resp.StatusCode). + Str("upstream_content_type", resp.Header.Get("Content-Type")). + Msg("upstream response received") + + // Replace upstream security headers with proxy-appropriate values + // instead of stripping them entirely, to prevent clickjacking + resp.Header.Del("X-Frame-Options") + resp.Header.Set("Content-Security-Policy", "frame-ancestors 'self'") + + // Rewrite hardcoded hostnames in text responses so that JS API calls + // go through the proxy instead of directly to the original host. + if needsRewrite && isRewritableResponse(resp) { + if err := rewriteResponseBodyMulti(resp, hostRewrites); err != nil { + log.Warn().Err(err).Msg("failed to rewrite response body") + } + } + return nil + }, + Transport: &http.Transport{ + DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { + stream, err := t.Session.Open() + if err != nil { + return nil, err + } + // Write CONNECT header + if err := tunnel.WriteConnectHeader(stream, serviceName); err != nil { + _ = stream.Close() + return nil, err + } + // Read response + if err := tunnel.ReadConnectResponse(stream); err != nil { + _ = stream.Close() + return nil, err + } + return stream, nil + }, + // Preserve upstream Content-Encoding as-is; without this, Go auto-decompresses + // gzip but keeps the header, causing ERR_CONTENT_DECODING_FAILED in browsers + DisableCompression: true, + MaxResponseHeaderBytes: 1 << 20, // 1 MB limit on response headers + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // Local services behind tunnel use self-signed certs + }, + }, + } + + log.Debug(). + Str("session_id", sessionID). + Str("service", serviceName). + Str("path", path). + Str("target", svc.Target). + Bool("tls", svc.TLS). + Str("host", svc.Host). + Msg("proxying HTTP request") + + proxy.ServeHTTP(c.Writer, c.Request) +} + +// ListServices returns the service manifest for a tunnel +// Route: GET /api/proxy/:session_id/services +func ListServices(c *gin.Context) { + sessionID := c.Param("session_id") + + t := TunnelManager.GetBySessionID(sessionID) + if t == nil { + c.JSON(http.StatusNotFound, response.NotFound("tunnel not found for session", nil)) + return + } + + services := t.GetServices() + c.JSON(http.StatusOK, response.OK("services retrieved successfully", gin.H{ + "services": services, + })) +} + +// isRewritableResponse returns true if the response Content-Type is HTML or JavaScript (#9). +// Restricts hostname rewriting to content types that actually contain navigable URLs, +// avoiding corruption of JSON APIs, XML data, or other text formats. +func isRewritableResponse(resp *http.Response) bool { + ct := resp.Header.Get("Content-Type") + return strings.Contains(ct, "text/html") || + strings.Contains(ct, "javascript") || + strings.Contains(ct, "text/css") +} + +// maxRewriteBodySize is the maximum response body size for hostname rewriting (50 MB). +const maxRewriteBodySize = 50 * 1024 * 1024 + +// buildHostRewriteMap creates a map of original hostname -> proxy hostname for all +// services in the tunnel. This enables multi-hostname rewriting: when proxying +// service A, references to service B's hostname are also rewritten to B's proxy URL. +func buildHostRewriteMap(t *tunnel.Tunnel, currentProxyHost string) map[string]string { + if currentProxyHost == "" { + return nil + } + + // Extract the domain pattern from the current proxy host. + // Format: {service}--{session_short}.support.{domain} + parts := strings.SplitN(currentProxyHost, ".support.", 2) + if len(parts) != 2 { + return nil + } + domain := parts[0] + domainSuffix := parts[1] + + // Extract the session short ID from the subdomain + subParts := strings.SplitN(domain, "--", 2) + if len(subParts) != 2 { + return nil + } + sessionShort := subParts[1] + + // Build rewrite map for all services with hostnames + rewrites := make(map[string]string) + services := t.GetServices() + for svcName, svc := range services { + if svc.Host == "" { + continue + } + proxyHostname := fmt.Sprintf("%s--%s.support.%s", svcName, sessionShort, domainSuffix) + if svc.Host != proxyHostname { + rewrites[svc.Host] = proxyHostname + } + } + + if len(rewrites) == 0 { + return nil + } + return rewrites +} + +// rewriteResponseBodyMulti replaces all hostname occurrences in the response body +// using a map of original -> proxy hostnames. +func rewriteResponseBodyMulti(resp *http.Response, rewrites map[string]string) error { + if resp.Body == nil || len(rewrites) == 0 { + return nil + } + + var body []byte + var isGzipped bool + + limitedReader := io.LimitReader(resp.Body, maxRewriteBodySize+1) + + if resp.Header.Get("Content-Encoding") == "gzip" { + isGzipped = true + gr, err := gzip.NewReader(limitedReader) + if err != nil { + return err + } + body, err = io.ReadAll(gr) + _ = gr.Close() + if err != nil { + return err + } + } else { + var err error + body, err = io.ReadAll(limitedReader) + if err != nil { + return err + } + } + _ = resp.Body.Close() + + // Skip rewriting for oversized responses + if int64(len(body)) > maxRewriteBodySize { + resp.Body = io.NopCloser(bytes.NewReader(body)) + return nil + } + + // Replace all hostname mappings + for oldHost, newHost := range rewrites { + body = bytes.ReplaceAll(body, []byte(oldHost), []byte(newHost)) + } + + if isGzipped { + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + if _, err := gw.Write(body); err != nil { + return err + } + if err := gw.Close(); err != nil { + return err + } + body = buf.Bytes() + resp.Header.Set("Content-Encoding", "gzip") + } + + resp.Body = io.NopCloser(bytes.NewReader(body)) + resp.ContentLength = int64(len(body)) + resp.Header.Set("Content-Length", strconv.Itoa(len(body))) + return nil +} diff --git a/services/support/methods/terminal.go b/services/support/methods/terminal.go new file mode 100644 index 00000000..2749ac42 --- /dev/null +++ b/services/support/methods/terminal.go @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package methods + +import ( + "encoding/binary" + "fmt" + "io" + "net/http" + "sync" + "sync/atomic" + "time" + + "github.com/gin-gonic/gin" + "github.com/gorilla/websocket" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/tunnel" +) + +// terminalUpgrader is a separate WebSocket upgrader for terminal connections. +// Unlike the tunnel upgrader, this rejects cross-origin requests since terminal +// sessions are initiated by browsers on the MY domain. +var terminalUpgrader = websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { return true }, // Internal endpoint, backend proxies the request + ReadBufferSize: 4096, + WriteBufferSize: 4096, +} + +// Terminal frame types (used on WebSocket between browser and this service) +const ( + frameTypeData = 0 + frameTypeResize = 1 + frameTypeError = 3 +) + +// HandleTerminal handles WebSocket terminal connections. +// It bridges the browser WebSocket to a yamux stream using CONNECT "terminal". +// The tunnel-client spawns a PTY on the remote system — no SSH involved. +// Route: GET /api/terminal/:session_id (internal, session token auth) +func HandleTerminal(c *gin.Context) { + sessionID := c.Param("session_id") + log := logger.ComponentLogger("terminal") + + // Find tunnel by session ID + t := TunnelManager.GetBySessionID(sessionID) + if t == nil { + c.JSON(http.StatusNotFound, gin.H{"error": "tunnel not found for session"}) + return + } + + // #10: Check stream limit + if !t.AcquireStream() { + c.JSON(http.StatusTooManyRequests, gin.H{"error": "too many concurrent streams on this tunnel"}) + return + } + + // Upgrade to WebSocket (using dedicated terminal upgrader) + wsConn, err := terminalUpgrader.Upgrade(c.Writer, c.Request, nil) + if err != nil { + t.ReleaseStream() + log.Error().Err(err).Str("session_id", sessionID).Msg("websocket upgrade failed") + return + } + + // Open yamux stream and CONNECT to terminal service + stream, err := t.Session.Open() + if err != nil { + t.ReleaseStream() + log.Error().Err(err).Str("session_id", sessionID).Msg("failed to open yamux stream") + sendErrorFrame(wsConn, "failed to open tunnel stream") + _ = wsConn.Close() + return + } + + if err := tunnel.WriteConnectHeader(stream, "terminal"); err != nil { + t.ReleaseStream() + _ = stream.Close() + log.Error().Err(err).Str("session_id", sessionID).Msg("failed to write CONNECT header") + sendErrorFrame(wsConn, "tunnel connect failed") + _ = wsConn.Close() + return + } + + if err := tunnel.ReadConnectResponse(stream); err != nil { + t.ReleaseStream() + _ = stream.Close() + log.Error().Err(err).Str("session_id", sessionID).Msg("CONNECT rejected") + sendErrorFrame(wsConn, "terminal not available: "+err.Error()) + _ = wsConn.Close() + return + } + + log.Info().Str("session_id", sessionID).Msg("terminal session started") + + // #7/#1: Track activity for inactivity timeout and audit logging + var bytesIn, bytesOut atomic.Int64 + lastActivity := &atomic.Value{} + lastActivity.Store(time.Now()) + startTime := time.Now() + + inactivityTimeout := configuration.Config.TerminalInactivityTimeout + + var once sync.Once + done := make(chan struct{}) + cleanup := func() { + once.Do(func() { + close(done) + _ = wsConn.Close() + _ = stream.Close() + t.ReleaseStream() + + // #1: Log terminal session summary + duration := time.Since(startTime) + log.Info(). + Str("session_id", sessionID). + Dur("duration", duration). + Int64("bytes_in", bytesIn.Load()). + Int64("bytes_out", bytesOut.Load()). + Msg("terminal session ended") + }) + } + + // #7: Inactivity timeout watchdog + go func() { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + for { + select { + case <-done: + return + case <-ticker.C: + last := lastActivity.Load().(time.Time) + if time.Since(last) > inactivityTimeout { + log.Warn(). + Str("session_id", sessionID). + Dur("idle_time", time.Since(last)). + Msg("terminal inactivity timeout, closing") + sendErrorFrame(wsConn, "session closed due to inactivity") + cleanup() + return + } + } + } + }() + + // #13: Use configurable max frame size (default 64KB) + maxFrameSize := configuration.Config.TerminalMaxFrameSize + + // WebSocket → stream: read WS binary messages, write as length-prefixed frames + go func() { + defer cleanup() + for { + _, msg, readErr := wsConn.ReadMessage() + if readErr != nil { + return + } + lastActivity.Store(time.Now()) + bytesIn.Add(int64(len(msg))) + if writeErr := writeFrame(stream, msg); writeErr != nil { + return + } + } + }() + + // Stream → WebSocket: read length-prefixed frames, send as WS binary messages + go func() { + defer cleanup() + for { + frame, readErr := readFrameWithLimit(stream, maxFrameSize) + if readErr != nil { + return + } + lastActivity.Store(time.Now()) + bytesOut.Add(int64(len(frame))) + if writeErr := wsConn.WriteMessage(websocket.BinaryMessage, frame); writeErr != nil { + return + } + } + }() + + <-done +} + +// sendErrorFrame sends a type 3 error frame to the WebSocket client +func sendErrorFrame(wsConn *websocket.Conn, msg string) { + frame := make([]byte, 1+len(msg)) + frame[0] = frameTypeError + copy(frame[1:], msg) + _ = wsConn.WriteMessage(websocket.BinaryMessage, frame) +} + +// writeFrame writes a length-prefixed frame: [4 bytes big-endian length][payload] +func writeFrame(w io.Writer, data []byte) error { + header := make([]byte, 4) + binary.BigEndian.PutUint32(header, uint32(len(data))) + if _, err := w.Write(header); err != nil { + return err + } + _, err := w.Write(data) + return err +} + +// readFrameWithLimit reads a length-prefixed frame with configurable max size (#13) +func readFrameWithLimit(r io.Reader, maxSize int) ([]byte, error) { + header := make([]byte, 4) + if _, err := io.ReadFull(r, header); err != nil { + return nil, err + } + length := binary.BigEndian.Uint32(header) + if int(length) > maxSize { + return nil, fmt.Errorf("frame too large: %d (max %d)", length, maxSize) + } + data := make([]byte, length) + if _, err := io.ReadFull(r, data); err != nil { + return nil, err + } + return data, nil +} diff --git a/services/support/methods/tunnel.go b/services/support/methods/tunnel.go new file mode 100644 index 00000000..fbb6fa74 --- /dev/null +++ b/services/support/methods/tunnel.go @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package methods + +import ( + "encoding/json" + "io" + "net/http" + "time" + + "github.com/gin-gonic/gin" + "github.com/gorilla/websocket" + "github.com/hashicorp/yamux" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/session" + "github.com/nethesis/my/services/support/tunnel" +) + +var ( + // TunnelManager is the global tunnel manager instance + TunnelManager *tunnel.Manager + + upgrader = websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { + // Only non-browser clients (no Origin header) connect to the tunnel. + // Reject browser-originated requests to prevent CSRF with cached credentials. + return r.Header.Get("Origin") == "" + }, + ReadBufferSize: 4096, + WriteBufferSize: 4096, + } +) + +// HandleTunnel handles WebSocket tunnel connections from systems +func HandleTunnel(c *gin.Context) { + systemID, exists := c.Get("system_id") + if !exists { + c.JSON(http.StatusUnauthorized, gin.H{"error": "system not authenticated"}) + return + } + + sysID := systemID.(string) + nodeID := c.Query("node_id") + log := logger.RequestLogger(c, "tunnel") + + // #8: Check for reconnect token when reusing an existing session during grace period + reconnectToken := c.Query("reconnect_token") + + // Create or reuse a session for this system+node + sess, err := session.GetActiveSession(sysID, nodeID) + if err != nil { + log.Error().Err(err).Str("system_id", sysID).Str("node_id", nodeID).Msg("failed to get session") + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get session"}) + return + } + + if sess != nil && TunnelManager.HasGracePeriod(sysID, nodeID) { + // Session exists during grace period — validate reconnect token (#8) + if !session.ValidateReconnectToken(sess.ID, reconnectToken) { + log.Warn(). + Str("system_id", sysID). + Str("node_id", nodeID). + Str("session_id", sess.ID). + Msg("reconnect token mismatch during grace period, closing old session") + // Close the old session to prevent orphaned active sessions + if err := session.CloseSession(sess.ID, "replaced"); err != nil { + log.Warn().Err(err).Str("session_id", sess.ID).Msg("failed to close replaced session") + } + sess = nil // force new session + } + } + + if sess == nil { + // Create a new session + sess, err = session.CreateSession(sysID, nodeID) + if err != nil { + log.Error().Err(err).Str("system_id", sysID).Str("node_id", nodeID).Msg("failed to create session") + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create session"}) + return + } + } + + // Upgrade to WebSocket + wsConn, err := upgrader.Upgrade(c.Writer, c.Request, nil) + if err != nil { + log.Error().Err(err).Str("system_id", sysID).Msg("websocket upgrade failed") + return + } + + log.Info(). + Str("system_id", sysID). + Str("node_id", nodeID). + Str("session_id", sess.ID). + Str("remote_addr", c.Request.RemoteAddr). + Msg("websocket connection established") + + // Wrap WebSocket as net.Conn for yamux + wsNetConn := tunnel.NewWebSocketConn(wsConn) + + // Create yamux server session over the WebSocket connection + // #11: Explicit keepalive and write timeout configuration + yamuxConfig := yamux.DefaultConfig() + yamuxConfig.EnableKeepAlive = true + yamuxConfig.KeepAliveInterval = 15 // seconds (more aggressive than default 30) + yamuxConfig.ConnectionWriteTimeout = 10 * time.Second + yamuxConfig.LogOutput = io.Discard + + yamuxSession, err := yamux.Server(wsNetConn, yamuxConfig) + if err != nil { + log.Error().Err(err).Str("system_id", sysID).Msg("yamux session creation failed") + _ = wsConn.Close() + return + } + + // Activate the session in the database + if err := session.ActivateSession(sess.ID); err != nil { + log.Error().Err(err).Str("session_id", sess.ID).Msg("failed to activate session") + } + + // Register the tunnel + t, regErr := TunnelManager.Register(sysID, nodeID, sess.ID, yamuxSession, wsConn) + if regErr != nil { + log.Error().Err(regErr).Str("system_id", sysID).Str("node_id", nodeID).Msg("failed to register tunnel") + _ = yamuxSession.Close() + return + } + + // Accept the control stream (first stream from client with service manifest) + go acceptControlStream(t, sysID, sess.ID) + + // Handle the tunnel lifecycle in a goroutine + go handleTunnelLifecycle(t, sysID, nodeID, sess.ID) +} + +// acceptControlStream accepts the control stream from the tunnel client +// and reads the service manifest. It continues to listen for manifest updates. +func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { + log := logger.ComponentLogger("tunnel") + + for { + stream, err := t.Session.Accept() + if err != nil { + return // session closed + } + + // Decode manifest from the control stream + var manifest tunnel.ServiceManifest + decoder := json.NewDecoder(stream) + if err := decoder.Decode(&manifest); err != nil { + log.Warn().Err(err). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to decode service manifest from control stream") + _ = stream.Close() + continue + } + _ = stream.Close() + + if manifest.Services != nil { + t.SetServices(manifest.Services) + log.Info(). + Str("system_id", systemID). + Str("session_id", sessionID). + Int("service_count", len(manifest.Services)). + Msg("service manifest received") + } + } +} + +func handleTunnelLifecycle(t *tunnel.Tunnel, systemID, nodeID, sessionID string) { + log := logger.ComponentLogger("tunnel") + + // Wait for the yamux session to close (either side disconnects) + <-t.Session.CloseChan() + + log.Info(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("session_id", sessionID). + Msg("tunnel disconnected, starting grace period") + + // Unregister the tunnel (yamux session is dead) + TunnelManager.Unregister(systemID, nodeID) + + // Start a grace period instead of immediately closing the session. + // If the client reconnects before the grace period expires, + // GetActiveSession finds the still-active session and reuses it. + TunnelManager.StartGracePeriod(systemID, nodeID, sessionID, configuration.Config.TunnelGracePeriod) +} diff --git a/services/support/middleware/auth.go b/services/support/middleware/auth.go new file mode 100644 index 00000000..113b907a --- /dev/null +++ b/services/support/middleware/auth.go @@ -0,0 +1,470 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package middleware + +import ( + "context" + "crypto/sha256" + "crypto/subtle" + "encoding/base64" + "fmt" + "math/rand/v2" + "net/http" + "strings" + "sync" + "time" + + "github.com/gin-gonic/gin" + "github.com/redis/go-redis/v9" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/database" + "github.com/nethesis/my/services/support/helpers" + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/queue" + "github.com/nethesis/my/services/support/response" +) + +// SessionTokenMiddleware validates the X-Session-Token header for +// internal endpoints. Each request is tied to a specific active session +// via the session_id URL parameter, eliminating the single shared secret. +func SessionTokenMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + // Extract session_id from URL (works for both /terminal/:session_id and /proxy/:session_id/...) + sessionID := c.Param("session_id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + c.Abort() + return + } + + provided := c.GetHeader("X-Session-Token") + if provided == "" { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("missing X-Session-Token header") + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "session token required", nil)) + c.Abort() + return + } + + // Look up the session token from the database + var storedToken string + err := database.DB.QueryRow( + `SELECT session_token FROM support_sessions WHERE id = $1 AND status IN ('pending', 'active')`, + sessionID, + ).Scan(&storedToken) + if err != nil { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("session_id", sessionID). + Msg("session not found or not active") + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "invalid session", nil)) + c.Abort() + return + } + + // Constant-time comparison to prevent timing attacks + if subtle.ConstantTimeCompare([]byte(provided), []byte(storedToken)) != 1 { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("session_id", sessionID). + Msg("invalid session token") + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "forbidden", nil)) + c.Abort() + return + } + + c.Next() + } +} + +// jitteredTTL returns a TTL with ±25% random variation to prevent thundering herd. +func jitteredTTL(base time.Duration) time.Duration { + jitter := float64(base) * 0.25 + offset := rand.Float64()*2*jitter - jitter + return base + time.Duration(offset) +} + +var inProcessAuthCache sync.Map + +func init() { + // Sweep expired entries every 10 minutes to bound memory usage + go func() { + ticker := time.NewTicker(10 * time.Minute) + defer ticker.Stop() + for range ticker.C { + now := time.Now() + inProcessAuthCache.Range(func(key, value any) bool { + if entry, ok := value.(*authCacheEntry); ok && now.After(entry.expiresAt) { + inProcessAuthCache.Delete(key) + } + return true + }) + } + }() +} + +type authCacheEntry struct { + systemID string + valid bool + expiresAt time.Time +} + +// authCacheKey returns a consistent cache key for in-process and Redis caches +func authCacheKey(systemKey, systemSecret string) string { + hash := sha256.Sum256([]byte(systemSecret)) + return fmt.Sprintf("%s:%x", systemKey, hash) +} + +// checkInProcessCache checks the in-process auth cache. +// Returns (systemID, valid, found). +func checkInProcessCache(systemKey, systemSecret string) (string, bool, bool) { + key := authCacheKey(systemKey, systemSecret) + val, ok := inProcessAuthCache.Load(key) + if !ok { + return "", false, false + } + entry := val.(*authCacheEntry) + if time.Now().After(entry.expiresAt) { + inProcessAuthCache.Delete(key) + return "", false, false + } + return entry.systemID, entry.valid, true +} + +// setInProcessCache stores an auth result in the in-process cache +func setInProcessCache(systemKey, systemSecret, systemID string, valid bool) { + key := authCacheKey(systemKey, systemSecret) + var ttl time.Duration + if valid { + ttl = jitteredTTL(configuration.Config.SystemAuthCacheTTL) + } else { + ttl = 1 * time.Minute + } + inProcessAuthCache.Store(key, &authCacheEntry{ + systemID: systemID, + valid: valid, + expiresAt: time.Now().Add(ttl), + }) +} + +// InvalidateAuthCache removes cached credentials for a system key from both caches. +// Called when system secrets are regenerated via Redis pub/sub. +func InvalidateAuthCache(ctx context.Context, systemKey string) { + // Clear all in-process cache entries for this system key + inProcessAuthCache.Range(func(key, _ any) bool { + if k, ok := key.(string); ok && strings.HasPrefix(k, systemKey+":") { + inProcessAuthCache.Delete(key) + } + return true + }) + + // Clear Redis cache entries for this system key + rdb := queue.GetClient() + pattern := fmt.Sprintf("auth:system:%s:*", systemKey) + iter := rdb.Scan(ctx, 0, pattern, 100).Iterator() + for iter.Next(ctx) { + _ = rdb.Del(ctx, iter.Val()).Err() + } +} + +// StartAuthCacheInvalidator listens for cache invalidation events via Redis pub/sub. +// When a system secret is regenerated, the backend publishes the system_key to this channel. +func StartAuthCacheInvalidator(ctx context.Context) { + log := logger.ComponentLogger("auth_cache") + pubsub := queue.Subscribe(ctx, "support:auth:invalidate") + defer func() { _ = pubsub.Close() }() + + ch := pubsub.Channel() + log.Info().Msg("auth cache invalidator started on support:auth:invalidate channel") + + for { + select { + case <-ctx.Done(): + log.Info().Msg("auth cache invalidator stopped") + return + case msg, ok := <-ch: + if !ok { + return + } + systemKey := msg.Payload + if systemKey != "" { + InvalidateAuthCache(ctx, systemKey) + log.Info().Str("system_key", systemKey).Msg("auth cache invalidated") + } + } + } +} + +// BasicAuthMiddleware validates system credentials using HTTP Basic Auth. +// Also verifies the system has support_enabled = true (#2). +func BasicAuthMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + auth := c.GetHeader("Authorization") + if auth == "" { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("missing Authorization header") + + c.Header("WWW-Authenticate", `Basic realm="System Authentication"`) + c.JSON(http.StatusUnauthorized, response.Unauthorized("authentication required", nil)) + c.Abort() + return + } + + const prefix = "Basic " + if !strings.HasPrefix(auth, prefix) { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("invalid Authorization header format") + + c.Header("WWW-Authenticate", `Basic realm="System Authentication"`) + c.JSON(http.StatusUnauthorized, response.Unauthorized("invalid authentication format", nil)) + c.Abort() + return + } + + encoded := auth[len(prefix):] + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + logger.Warn(). + Err(err). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("invalid base64 encoding in Authorization header") + + c.Header("WWW-Authenticate", `Basic realm="System Authentication"`) + c.JSON(http.StatusUnauthorized, response.Unauthorized("invalid authentication encoding", nil)) + c.Abort() + return + } + + credentials := string(decoded) + parts := strings.SplitN(credentials, ":", 2) + if len(parts) != 2 { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("invalid credentials format") + + c.Header("WWW-Authenticate", `Basic realm="System Authentication"`) + c.JSON(http.StatusUnauthorized, response.Unauthorized("invalid credentials format", nil)) + c.Abort() + return + } + + systemKey := parts[0] + systemSecret := parts[1] + + systemID, valid := validateSystemCredentials(c, systemKey, systemSecret) + if !valid { + c.Header("WWW-Authenticate", `Basic realm="System Authentication"`) + c.JSON(http.StatusUnauthorized, response.Unauthorized("invalid system credentials", nil)) + c.Abort() + return + } + + // #2: Check support_enabled flag — system must opt-in explicitly + var supportEnabled bool + err = database.DB.QueryRow( + `SELECT support_enabled FROM systems WHERE id = $1 AND deleted_at IS NULL`, + systemID, + ).Scan(&supportEnabled) + if err != nil || !supportEnabled { + logger.Warn(). + Str("system_key", systemKey). + Str("system_id", systemID). + Bool("support_enabled", supportEnabled). + Msg("support not enabled for this system") + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "support is not enabled for this system", nil)) + c.Abort() + return + } + + c.Set("system_id", systemID) + c.Set("system_key", systemKey) + c.Set("authenticated_system", true) + + logger.Debug(). + Str("system_key", systemKey). + Str("system_id", systemID). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("system authenticated successfully") + + c.Next() + } +} + +// systemCredentialsRow holds the DB row for system credentials lookup +type systemCredentialsRow struct { + systemID string + secretPublic string + secretSHA256 string +} + +// validateSystemCredentials validates system credentials against database and cache +func validateSystemCredentials(c *gin.Context, systemKey, systemSecret string) (string, bool) { + parts := strings.Split(systemSecret, ".") + if len(parts) != 2 { + logger.Warn(). + Str("system_key", systemKey). + Msg("invalid system secret format: missing dot separator") + return "", false + } + + publicPart := strings.TrimPrefix(parts[0], "my_") + if publicPart == parts[0] { + logger.Warn(). + Str("system_key", systemKey). + Msg("invalid system secret format: missing 'my_' prefix") + return "", false + } + secretPart := parts[1] + + if len(secretPart) < configuration.Config.SystemSecretMinLength { + logger.Warn(). + Str("system_key", systemKey). + Int("secret_length", len(secretPart)). + Int("min_length", configuration.Config.SystemSecretMinLength). + Msg("system secret part too short") + return "", false + } + + // Check in-process cache first (fastest, no network) + if cachedID, valid, found := checkInProcessCache(systemKey, systemSecret); found { + if valid { + return cachedID, true + } + return "", false + } + + // Check Redis cache + if cachedID := checkCredentialsCache(c, systemKey, systemSecret); cachedID != nil { + if *cachedID != "" { + setInProcessCache(systemKey, systemSecret, *cachedID, true) + return *cachedID, true + } + } + + // Query database for system credentials + var creds systemCredentialsRow + query := ` + SELECT id, system_secret_public, system_secret_sha256 + FROM systems + WHERE system_key = $1 AND deleted_at IS NULL + ` + + err := database.DB.QueryRow(query, systemKey).Scan( + &creds.systemID, + &creds.secretPublic, + &creds.secretSHA256, + ) + + if err != nil { + logger.Warn(). + Err(err). + Str("system_key", systemKey). + Msg("system credentials not found") + + cacheCredentialsResult(c, systemKey, systemSecret, "", false) + return "", false + } + + if creds.secretPublic != publicPart { + logger.Warn(). + Str("system_key", systemKey). + Str("system_id", creds.systemID). + Msg("public part of system secret does not match") + + cacheCredentialsResult(c, systemKey, systemSecret, "", false) + return "", false + } + + // Verify secret using SHA256 + valid, err := helpers.VerifySystemSecretSHA256(secretPart, creds.secretSHA256) + if err != nil { + logger.Warn(). + Err(err). + Str("system_key", systemKey). + Str("system_id", creds.systemID). + Msg("failed to verify SHA256 secret") + + cacheCredentialsResult(c, systemKey, systemSecret, "", false) + setInProcessCache(systemKey, systemSecret, "", false) + return "", false + } + if !valid { + logger.Warn(). + Str("system_key", systemKey). + Str("system_id", creds.systemID). + Msg("invalid system secret part") + + cacheCredentialsResult(c, systemKey, systemSecret, "", false) + setInProcessCache(systemKey, systemSecret, "", false) + return "", false + } + + // Cache positive result in both caches + cacheCredentialsResult(c, systemKey, systemSecret, creds.systemID, true) + setInProcessCache(systemKey, systemSecret, creds.systemID, true) + + return creds.systemID, true +} + +// checkCredentialsCache checks Redis cache for cached credentials +func checkCredentialsCache(c *gin.Context, systemKey, systemSecret string) *string { + hash := sha256.Sum256([]byte(systemSecret)) + cacheKey := fmt.Sprintf("auth:system:%s:%x", systemKey, hash) + + rdb := queue.GetClient() + result, err := rdb.Get(c.Request.Context(), cacheKey).Result() + if err == redis.Nil { + return nil + } + if err != nil { + logger.Warn().Err(err).Msg("redis cache error during auth check") + return nil + } + + if result == "invalid" { + empty := "" + return &empty + } + + return &result +} + +// cacheCredentialsResult caches the authentication result +func cacheCredentialsResult(c *gin.Context, systemKey, systemSecret, systemID string, valid bool) { + hash := sha256.Sum256([]byte(systemSecret)) + cacheKey := fmt.Sprintf("auth:system:%s:%x", systemKey, hash) + + var value string + var ttl time.Duration + + if valid { + value = systemID + ttl = jitteredTTL(configuration.Config.SystemAuthCacheTTL) + } else { + value = "invalid" + ttl = 1 * time.Minute + } + + rdb := queue.GetClient() + err := rdb.Set(c.Request.Context(), cacheKey, value, ttl).Err() + if err != nil { + logger.Warn().Err(err).Msg("failed to cache auth result") + } +} diff --git a/services/support/middleware/ratelimit.go b/services/support/middleware/ratelimit.go new file mode 100644 index 00000000..9a6d1cf4 --- /dev/null +++ b/services/support/middleware/ratelimit.go @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package middleware + +import ( + "net/http" + "sync" + "time" + + "github.com/gin-gonic/gin" + + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/response" +) + +type rateLimitEntry struct { + count int + resetAt time.Time +} + +type rateLimiter struct { + mu sync.Mutex + entries map[string]*rateLimitEntry + limit int + window time.Duration +} + +func newRateLimiter(limit int, window time.Duration) *rateLimiter { + rl := &rateLimiter{ + entries: make(map[string]*rateLimitEntry), + limit: limit, + window: window, + } + // Background cleanup of expired entries every window period + go func() { + ticker := time.NewTicker(window) + defer ticker.Stop() + for range ticker.C { + rl.mu.Lock() + now := time.Now() + for key, entry := range rl.entries { + if now.After(entry.resetAt) { + delete(rl.entries, key) + } + } + rl.mu.Unlock() + } + }() + return rl +} + +func (rl *rateLimiter) allow(key string) bool { + rl.mu.Lock() + defer rl.mu.Unlock() + + now := time.Now() + entry, exists := rl.entries[key] + if !exists || now.After(entry.resetAt) { + rl.entries[key] = &rateLimitEntry{ + count: 1, + resetAt: now.Add(rl.window), + } + return true + } + + entry.count++ + return entry.count <= rl.limit +} + +// tunnelIPRateLimiter limits tunnel connection attempts per IP +var tunnelIPRateLimiter = newRateLimiter(10, 1*time.Minute) + +// tunnelKeyRateLimiter limits tunnel connection attempts per system_key (#14) +var tunnelKeyRateLimiter = newRateLimiter(5, 1*time.Minute) + +// TunnelRateLimitMiddleware limits the rate of tunnel connection attempts +// per client IP (10/min) and per system_key (5/min, checked after auth). +func TunnelRateLimitMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + clientIP := c.ClientIP() + if !tunnelIPRateLimiter.allow(clientIP) { + logger.Warn(). + Str("client_ip", clientIP). + Str("path", c.Request.URL.Path). + Msg("tunnel IP rate limit exceeded") + c.JSON(http.StatusTooManyRequests, response.Error(http.StatusTooManyRequests, "too many connection attempts", nil)) + c.Abort() + return + } + c.Next() + } +} + +// SystemKeyRateLimitMiddleware checks the per-system_key rate limit. +// Runs after BasicAuthMiddleware so that system_key is available in the context. +func SystemKeyRateLimitMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + systemKey, exists := c.Get("system_key") + if !exists { + c.Next() + return + } + key := systemKey.(string) + if !tunnelKeyRateLimiter.allow(key) { + logger.Warn(). + Str("system_key", key). + Str("client_ip", c.ClientIP()). + Msg("tunnel system_key rate limit exceeded") + c.JSON(http.StatusTooManyRequests, response.Error(http.StatusTooManyRequests, "too many connection attempts for this system", nil)) + c.Abort() + return + } + c.Next() + } +} diff --git a/services/support/models/session.go b/services/support/models/session.go new file mode 100644 index 00000000..a7fee08c --- /dev/null +++ b/services/support/models/session.go @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package models + +import "time" + +// SupportSession represents a support tunnel session +type SupportSession struct { + ID string `json:"id"` + SystemID string `json:"system_id"` + NodeID string `json:"node_id,omitempty"` + SessionToken string `json:"session_token,omitempty"` + ReconnectToken string `json:"reconnect_token,omitempty"` + StartedAt time.Time `json:"started_at"` + ExpiresAt time.Time `json:"expires_at"` + Status string `json:"status"` + ClosedAt *time.Time `json:"closed_at,omitempty"` + ClosedBy *string `json:"closed_by,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} diff --git a/services/support/pkg/version/VERSION b/services/support/pkg/version/VERSION new file mode 100644 index 00000000..1d0ba9ea --- /dev/null +++ b/services/support/pkg/version/VERSION @@ -0,0 +1 @@ +0.4.0 diff --git a/services/support/pkg/version/version.go b/services/support/pkg/version/version.go new file mode 100644 index 00000000..fbfb7965 --- /dev/null +++ b/services/support/pkg/version/version.go @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package version + +import ( + _ "embed" + "fmt" + "runtime" + "strings" +) + +//go:embed VERSION +var versionRaw string + +// Build information. Populated at build-time via ldflags. +var ( + Version = strings.TrimSpace(versionRaw) + Commit = "unknown" + BuildTime = "unknown" +) + +// Info represents version information +type Info struct { + Version string `json:"version" yaml:"version"` + Commit string `json:"commit" yaml:"commit"` + BuildTime string `json:"build_time" yaml:"build_time"` + GoVersion string `json:"go_version" yaml:"go_version"` + Platform string `json:"platform" yaml:"platform"` +} + +// Get returns version information +func Get() Info { + return Info{ + Version: Version, + Commit: Commit, + BuildTime: BuildTime, + GoVersion: runtime.Version(), + Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH), + } +} + +// String returns a human-readable version string +func (i Info) String() string { + return fmt.Sprintf("%s (%s) built at %s with %s for %s", + i.Version, i.Commit, i.BuildTime, i.GoVersion, i.Platform) +} diff --git a/services/support/queue/redis.go b/services/support/queue/redis.go new file mode 100644 index 00000000..2f245743 --- /dev/null +++ b/services/support/queue/redis.go @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package queue + +import ( + "context" + "fmt" + "time" + + "github.com/redis/go-redis/v9" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/logger" +) + +var client *redis.Client + +// Init initializes the Redis client +func Init() error { + opt, err := redis.ParseURL(configuration.Config.RedisURL) + if err != nil { + return fmt.Errorf("failed to parse Redis URL: %w", err) + } + + opt.DB = configuration.Config.RedisDB + opt.Password = configuration.Config.RedisPassword + opt.PoolSize = 20 + opt.MinIdleConns = 5 + opt.ConnMaxIdleTime = 5 * time.Minute + opt.ConnMaxLifetime = 30 * time.Minute + + client = redis.NewClient(opt) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := client.Ping(ctx).Err(); err != nil { + return fmt.Errorf("failed to connect to Redis: %w", err) + } + + logger.Info(). + Str("redis_url", logger.SanitizeConnectionURL(configuration.Config.RedisURL)). + Int("redis_db", opt.DB). + Msg("Redis client initialized") + + return nil +} + +// GetClient returns the Redis client instance +func GetClient() *redis.Client { + return client +} + +// Subscribe subscribes to a Redis pub/sub channel +func Subscribe(ctx context.Context, channel string) *redis.PubSub { + return client.Subscribe(ctx, channel) +} + +// Close closes the Redis connection +func Close() error { + if client != nil { + return client.Close() + } + return nil +} diff --git a/services/support/response/response.go b/services/support/response/response.go new file mode 100644 index 00000000..bd2583ea --- /dev/null +++ b/services/support/response/response.go @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package response + +import "net/http" + +// Response is the standard API response format +type Response struct { + Code int `json:"code"` + Message string `json:"message"` + Data interface{} `json:"data"` +} + +// Success creates a success response +func Success(code int, message string, data interface{}) Response { + return Response{Code: code, Message: message, Data: data} +} + +// Error creates an error response +func Error(code int, message string, data interface{}) Response { + return Response{Code: code, Message: message, Data: data} +} + +// OK creates a 200 response +func OK(message string, data interface{}) Response { + return Success(http.StatusOK, message, data) +} + +// Created creates a 201 response +func Created(message string, data interface{}) Response { + return Success(http.StatusCreated, message, data) +} + +// BadRequest creates a 400 response +func BadRequest(message string, data interface{}) Response { + return Error(http.StatusBadRequest, message, data) +} + +// Unauthorized creates a 401 response +func Unauthorized(message string, data interface{}) Response { + return Error(http.StatusUnauthorized, message, data) +} + +// Forbidden creates a 403 response +func Forbidden(message string, data interface{}) Response { + return Error(http.StatusForbidden, message, data) +} + +// NotFound creates a 404 response +func NotFound(message string, data interface{}) Response { + return Error(http.StatusNotFound, message, data) +} + +// InternalServerError creates a 500 response +func InternalServerError(message string, data interface{}) Response { + return Error(http.StatusInternalServerError, message, data) +} diff --git a/services/support/response/response_test.go b/services/support/response/response_test.go new file mode 100644 index 00000000..c3360498 --- /dev/null +++ b/services/support/response/response_test.go @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package response + +import ( + "net/http" + "testing" +) + +func TestOK(t *testing.T) { + r := OK("success", nil) + if r.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", r.Code) + } + if r.Message != "success" { + t.Fatalf("expected 'success', got %s", r.Message) + } +} + +func TestCreated(t *testing.T) { + r := Created("created", map[string]string{"id": "123"}) + if r.Code != http.StatusCreated { + t.Fatalf("expected 201, got %d", r.Code) + } +} + +func TestUnauthorized(t *testing.T) { + r := Unauthorized("denied", nil) + if r.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", r.Code) + } +} + +func TestNotFound(t *testing.T) { + r := NotFound("not found", nil) + if r.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", r.Code) + } +} + +func TestInternalServerError(t *testing.T) { + r := InternalServerError("error", nil) + if r.Code != http.StatusInternalServerError { + t.Fatalf("expected 500, got %d", r.Code) + } +} diff --git a/services/support/session/cleaner.go b/services/support/session/cleaner.go new file mode 100644 index 00000000..0e1dfaa4 --- /dev/null +++ b/services/support/session/cleaner.go @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package session + +import ( + "context" + "time" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/database" + "github.com/nethesis/my/services/support/logger" +) + +// CleanerCallback is called when sessions are expired so tunnels can be disconnected +type CleanerCallback func(expiredSessionIDs []string) + +// StartCleaner runs the session cleanup goroutine +func StartCleaner(ctx context.Context, callback CleanerCallback) { + log := logger.ComponentLogger("session_cleaner") + ticker := time.NewTicker(configuration.Config.SessionCleanerInterval) + defer ticker.Stop() + + log.Info(). + Dur("interval", configuration.Config.SessionCleanerInterval). + Msg("session cleaner started") + + for { + select { + case <-ctx.Done(): + log.Info().Msg("session cleaner stopped") + return + case <-ticker.C: + // Get sessions that are about to expire + expiredIDs, err := getExpiredSessionIDs() + if err != nil { + log.Error().Err(err).Msg("failed to get expired session IDs") + continue + } + + // Expire sessions in DB + count, err := ExpireSessions() + if err != nil { + log.Error().Err(err).Msg("failed to expire sessions") + continue + } + + if count > 0 { + log.Info().Int64("expired_count", count).Msg("sessions expired") + if callback != nil && len(expiredIDs) > 0 { + callback(expiredIDs) + } + } + } + } +} + +// getExpiredSessionIDs returns IDs of sessions that are expired but not yet marked +func getExpiredSessionIDs() ([]string, error) { + rows, err := database.DB.Query( + `SELECT id FROM support_sessions + WHERE status IN ('pending', 'active') AND expires_at < NOW()`, + ) + if err != nil { + return nil, err + } + defer func() { _ = rows.Close() }() + + var ids []string + for rows.Next() { + var id string + if err := rows.Scan(&id); err != nil { + return nil, err + } + ids = append(ids, id) + } + return ids, rows.Err() +} diff --git a/services/support/session/manager.go b/services/support/session/manager.go new file mode 100644 index 00000000..16cb938b --- /dev/null +++ b/services/support/session/manager.go @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package session + +import ( + "crypto/rand" + "crypto/subtle" + "database/sql" + "encoding/hex" + "fmt" + "time" + + "github.com/nethesis/my/services/support/configuration" + "github.com/nethesis/my/services/support/database" + "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/models" +) + +// GenerateToken creates a cryptographically secure session token +func GenerateToken() (string, error) { + bytes := make([]byte, 32) + if _, err := rand.Read(bytes); err != nil { + return "", fmt.Errorf("failed to generate token: %w", err) + } + return hex.EncodeToString(bytes), nil +} + +// CreateSession creates a new support session for a system. +// nodeID identifies the cluster node (empty for single-node systems). +// Enforces a maximum number of active sessions per system. +// Closes any existing active/pending sessions for the same system+node to prevent orphans. +func CreateSession(systemID, nodeID string) (*models.SupportSession, error) { + // Close any existing active/pending sessions for this system+node combination. + // This prevents orphaned sessions when a client reconnects without a valid reconnect token. + var closeQuery string + var closeArgs []interface{} + if nodeID == "" { + closeQuery = `UPDATE support_sessions + SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() + WHERE system_id = $1 AND node_id IS NULL AND status IN ('pending', 'active')` + closeArgs = []interface{}{systemID} + } else { + closeQuery = `UPDATE support_sessions + SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() + WHERE system_id = $1 AND node_id = $2 AND status IN ('pending', 'active')` + closeArgs = []interface{}{systemID, nodeID} + } + result, err := database.DB.Exec(closeQuery, closeArgs...) + if err != nil { + logger.ComponentLogger("session").Warn().Err(err). + Str("system_id", systemID).Str("node_id", nodeID). + Msg("failed to close existing sessions before creating new one") + } else if rows, _ := result.RowsAffected(); rows > 0 { + logger.ComponentLogger("session").Info(). + Str("system_id", systemID).Str("node_id", nodeID). + Int64("closed_count", rows). + Msg("closed orphaned sessions before creating new one") + } + + // Enforce per-system session limit + maxSessions := configuration.Config.MaxSessionsPerSystem + if maxSessions > 0 { + var activeCount int + err := database.DB.QueryRow( + `SELECT COUNT(*) FROM support_sessions + WHERE system_id = $1 AND status IN ('pending', 'active')`, + systemID, + ).Scan(&activeCount) + if err != nil { + return nil, fmt.Errorf("failed to check session count: %w", err) + } + if activeCount >= maxSessions { + return nil, fmt.Errorf("maximum active sessions per system reached (%d)", maxSessions) + } + } + + token, err := GenerateToken() + if err != nil { + return nil, err + } + + now := time.Now() + expiresAt := now.Add(configuration.Config.SessionDefaultDuration) + + reconnectToken, err := GenerateToken() + if err != nil { + return nil, err + } + + // Use NULL for empty node_id + var nodeIDParam interface{} + if nodeID != "" { + nodeIDParam = nodeID + } + + var session models.SupportSession + var scannedNodeID sql.NullString + err = database.DB.QueryRow( + `INSERT INTO support_sessions (system_id, node_id, session_token, reconnect_token, started_at, expires_at, status) + VALUES ($1, $2, $3, $4, $5, $6, 'pending') + RETURNING id, system_id, node_id, session_token, reconnect_token, started_at, expires_at, status, created_at, updated_at`, + systemID, nodeIDParam, token, reconnectToken, now, expiresAt, + ).Scan( + &session.ID, &session.SystemID, &scannedNodeID, &session.SessionToken, &session.ReconnectToken, + &session.StartedAt, &session.ExpiresAt, &session.Status, + &session.CreatedAt, &session.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to create session: %w", err) + } + if scannedNodeID.Valid { + session.NodeID = scannedNodeID.String + } + + logger.ComponentLogger("session").Info(). + Str("session_id", session.ID). + Str("system_id", systemID). + Str("node_id", nodeID). + Msg("session created") + + return &session, nil +} + +// ActivateSession marks a session as active (tunnel connected) +func ActivateSession(sessionID string) error { + _, err := database.DB.Exec( + `UPDATE support_sessions SET status = 'active', updated_at = NOW() + WHERE id = $1 AND status = 'pending'`, + sessionID, + ) + return err +} + +// GetActiveSession returns the active or pending session for a system+node combination. +// nodeID can be empty for single-node systems. +func GetActiveSession(systemID, nodeID string) (*models.SupportSession, error) { + var session models.SupportSession + var closedAt sql.NullTime + var closedBy sql.NullString + var reconnectToken sql.NullString + var scannedNodeID sql.NullString + + var query string + var args []interface{} + + if nodeID == "" { + query = `SELECT id, system_id, node_id, session_token, reconnect_token, started_at, expires_at, status, + closed_at, closed_by, created_at, updated_at + FROM support_sessions + WHERE system_id = $1 AND node_id IS NULL AND status IN ('pending', 'active') + ORDER BY created_at DESC LIMIT 1` + args = []interface{}{systemID} + } else { + query = `SELECT id, system_id, node_id, session_token, reconnect_token, started_at, expires_at, status, + closed_at, closed_by, created_at, updated_at + FROM support_sessions + WHERE system_id = $1 AND node_id = $2 AND status IN ('pending', 'active') + ORDER BY created_at DESC LIMIT 1` + args = []interface{}{systemID, nodeID} + } + + err := database.DB.QueryRow(query, args...).Scan( + &session.ID, &session.SystemID, &scannedNodeID, &session.SessionToken, &reconnectToken, + &session.StartedAt, &session.ExpiresAt, &session.Status, + &closedAt, &closedBy, &session.CreatedAt, &session.UpdatedAt, + ) + + if err == sql.ErrNoRows { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("failed to get session: %w", err) + } + + if scannedNodeID.Valid { + session.NodeID = scannedNodeID.String + } + if closedAt.Valid { + session.ClosedAt = &closedAt.Time + } + if closedBy.Valid { + session.ClosedBy = &closedBy.String + } + if reconnectToken.Valid { + session.ReconnectToken = reconnectToken.String + } + + return &session, nil +} + +// ValidateReconnectToken checks if a reconnect token matches the session +func ValidateReconnectToken(sessionID, token string) bool { + if token == "" { + return false + } + var storedToken sql.NullString + err := database.DB.QueryRow( + `SELECT reconnect_token FROM support_sessions WHERE id = $1 AND status IN ('pending', 'active')`, + sessionID, + ).Scan(&storedToken) + if err != nil || !storedToken.Valid { + return false + } + return subtle.ConstantTimeCompare([]byte(storedToken.String), []byte(token)) == 1 +} + +// GetSessionTokenByID returns the session_token for a session (for internal auth) +func GetSessionTokenByID(sessionID string) (string, error) { + var token string + err := database.DB.QueryRow( + `SELECT session_token FROM support_sessions WHERE id = $1 AND status IN ('pending', 'active')`, + sessionID, + ).Scan(&token) + if err != nil { + return "", fmt.Errorf("session not found or not active: %w", err) + } + return token, nil +} + +// GetSessionByID returns a session by its ID +func GetSessionByID(sessionID string) (*models.SupportSession, error) { + var session models.SupportSession + var closedAt sql.NullTime + var closedBy sql.NullString + var scannedNodeID sql.NullString + + err := database.DB.QueryRow( + `SELECT id, system_id, node_id, session_token, started_at, expires_at, status, + closed_at, closed_by, created_at, updated_at + FROM support_sessions + WHERE id = $1`, + sessionID, + ).Scan( + &session.ID, &session.SystemID, &scannedNodeID, &session.SessionToken, + &session.StartedAt, &session.ExpiresAt, &session.Status, + &closedAt, &closedBy, &session.CreatedAt, &session.UpdatedAt, + ) + + if err == sql.ErrNoRows { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("failed to get session: %w", err) + } + + if scannedNodeID.Valid { + session.NodeID = scannedNodeID.String + } + if closedAt.Valid { + session.ClosedAt = &closedAt.Time + } + if closedBy.Valid { + session.ClosedBy = &closedBy.String + } + + return &session, nil +} + +// CloseSession closes a support session +func CloseSession(sessionID, closedBy string) error { + result, err := database.DB.Exec( + `UPDATE support_sessions + SET status = 'closed', closed_at = NOW(), closed_by = $2, updated_at = NOW() + WHERE id = $1 AND status IN ('pending', 'active')`, + sessionID, closedBy, + ) + if err != nil { + return fmt.Errorf("failed to close session: %w", err) + } + + rows, _ := result.RowsAffected() + if rows == 0 { + return fmt.Errorf("session not found or already closed") + } + + logger.ComponentLogger("session").Info(). + Str("session_id", sessionID). + Str("closed_by", closedBy). + Msg("session closed") + + return nil +} + +// ExpireSessions marks expired sessions +func ExpireSessions() (int64, error) { + result, err := database.DB.Exec( + `UPDATE support_sessions + SET status = 'expired', closed_at = NOW(), closed_by = 'timeout', updated_at = NOW() + WHERE status IN ('pending', 'active') AND expires_at < NOW()`, + ) + if err != nil { + return 0, fmt.Errorf("failed to expire sessions: %w", err) + } + + rows, _ := result.RowsAffected() + return rows, nil +} + +// GetActiveSessions returns the count of active sessions +func GetActiveSessions() (int, error) { + var count int + err := database.DB.QueryRow( + `SELECT COUNT(*) FROM support_sessions WHERE status = 'active'`, + ).Scan(&count) + return count, err +} diff --git a/services/support/testutils/testutils.go b/services/support/testutils/testutils.go new file mode 100644 index 00000000..c7a0f384 --- /dev/null +++ b/services/support/testutils/testutils.go @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package testutils + +import ( + "encoding/base64" + "net/http" + "net/http/httptest" + + "github.com/gin-gonic/gin" +) + +// SetupTestRouter creates a test Gin router +func SetupTestRouter() *gin.Engine { + gin.SetMode(gin.TestMode) + return gin.New() +} + +// MakeBasicAuthHeader creates a Basic Auth header value +func MakeBasicAuthHeader(username, password string) string { + credentials := username + ":" + password + return "Basic " + base64.StdEncoding.EncodeToString([]byte(credentials)) +} + +// PerformRequest executes a test HTTP request +func PerformRequest(router *gin.Engine, method, path string, headers map[string]string) *httptest.ResponseRecorder { + req, _ := http.NewRequest(method, path, nil) + for key, value := range headers { + req.Header.Set(key, value) + } + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + return w +} diff --git a/services/support/tunnel/manager.go b/services/support/tunnel/manager.go new file mode 100644 index 00000000..28478a73 --- /dev/null +++ b/services/support/tunnel/manager.go @@ -0,0 +1,506 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package tunnel + +import ( + "fmt" + "net" + "strings" + "sync" + "time" + + "github.com/gorilla/websocket" + "github.com/hashicorp/yamux" + + "github.com/nethesis/my/services/support/logger" +) + +// ServiceInfo describes a service available through the tunnel +type ServiceInfo struct { + Target string `json:"target"` + Host string `json:"host"` + TLS bool `json:"tls"` + Label string `json:"label"` + Path string `json:"path,omitempty"` // Traefik route path (for display) + PathPrefix string `json:"path_prefix,omitempty"` // Traefik PathPrefix to strip when proxying + ModuleID string `json:"module_id,omitempty"` // NS8 module ID for grouping (e.g., "nethvoice103") + NodeID string `json:"node_id,omitempty"` // NS8 node ID (e.g., "1", "2") +} + +// ServiceManifest is the JSON manifest sent by the tunnel client +type ServiceManifest struct { + Version int `json:"version"` + Services map[string]ServiceInfo `json:"services"` +} + +// CloseCodeSessionClosed is the WebSocket close code sent when an operator closes a session. +// Tunnel clients receiving this code should exit without reconnecting. +const CloseCodeSessionClosed = 4000 + +// Tunnel represents an active WebSocket tunnel connection +type Tunnel struct { + SystemID string + NodeID string + SessionID string + Session *yamux.Session + WsConn WsCloser // underlying WebSocket for sending close frames + ConnectedAt time.Time + done chan struct{} + services map[string]ServiceInfo + servicesMu sync.RWMutex + activeStreams int64 + activeStreamsMu sync.Mutex + maxStreams int +} + +// WsCloser allows sending a WebSocket close frame with a status code and reason +type WsCloser interface { + WriteControl(messageType int, data []byte, deadline time.Time) error +} + +// TunnelKey builds the map key for a tunnel. For multi-node clusters, +// each node has its own tunnel keyed by "systemID:nodeID". +// Single-node systems use just "systemID". +func TunnelKey(systemID, nodeID string) string { + if nodeID == "" { + return systemID + } + return systemID + ":" + nodeID +} + +// GraceExpiredCallback is called when a grace period expires without reconnection +type GraceExpiredCallback func(systemID, sessionID string) + +// graceTimer tracks a pending grace period for a disconnected tunnel +type graceTimer struct { + sessionID string + timer *time.Timer +} + +// Manager manages active tunnel connections +type Manager struct { + mu sync.RWMutex + tunnels map[string]*Tunnel // keyed by systemID + graceTimers map[string]*graceTimer // keyed by systemID + graceCallback GraceExpiredCallback + maxTunnels int + maxStreams int +} + +// NewManager creates a new tunnel manager with a maximum tunnel limit +func NewManager(maxTunnels, maxStreams int) *Manager { + return &Manager{ + tunnels: make(map[string]*Tunnel), + graceTimers: make(map[string]*graceTimer), + maxTunnels: maxTunnels, + maxStreams: maxStreams, + } +} + +// SetGraceCallback sets the callback for grace period expiration +func (m *Manager) SetGraceCallback(cb GraceExpiredCallback) { + m.mu.Lock() + defer m.mu.Unlock() + m.graceCallback = cb +} + +// Register adds a tunnel to the manager. Returns an error if the maximum tunnel limit is reached. +// nodeID identifies the cluster node; empty for single-node systems. +// wsConn is the underlying WebSocket connection used for sending close frames. +func (m *Manager) Register(systemID, nodeID, sessionID string, yamuxSession *yamux.Session, wsConn WsCloser) (*Tunnel, error) { + m.mu.Lock() + defer m.mu.Unlock() + + key := TunnelKey(systemID, nodeID) + + // Enforce maximum tunnel limit (existing tunnel for this key doesn't count as it gets replaced) + _, isReplacement := m.tunnels[key] + if !isReplacement && m.maxTunnels > 0 && len(m.tunnels) >= m.maxTunnels { + logger.ComponentLogger("tunnel_manager").Warn(). + Int("max_tunnels", m.maxTunnels). + Int("current_tunnels", len(m.tunnels)). + Str("system_id", systemID). + Str("node_id", nodeID). + Msg("maximum tunnel limit reached, rejecting connection") + return nil, fmt.Errorf("maximum tunnel limit reached (%d)", m.maxTunnels) + } + + // Cancel any pending grace period for this key + if gt, ok := m.graceTimers[key]; ok { + gt.timer.Stop() + delete(m.graceTimers, key) + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("session_id", gt.sessionID). + Msg("grace period cancelled: system reconnected") + } + + // Close existing tunnel for this key if any + if existing, ok := m.tunnels[key]; ok { + logger.ComponentLogger("tunnel_manager").Warn(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("old_session_id", existing.SessionID). + Str("new_session_id", sessionID). + Msg("replacing existing tunnel") + existing.Close() + } + + t := &Tunnel{ + SystemID: systemID, + NodeID: nodeID, + SessionID: sessionID, + Session: yamuxSession, + WsConn: wsConn, + ConnectedAt: time.Now(), + done: make(chan struct{}), + maxStreams: m.maxStreams, + } + if t.maxStreams == 0 { + t.maxStreams = 64 + } + + m.tunnels[key] = t + + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("session_id", sessionID). + Int("active_tunnels", len(m.tunnels)). + Msg("tunnel registered") + + return t, nil +} + +// Unregister removes a tunnel from the manager +func (m *Manager) Unregister(systemID, nodeID string) { + m.mu.Lock() + defer m.mu.Unlock() + + key := TunnelKey(systemID, nodeID) + if t, ok := m.tunnels[key]; ok { + t.Close() + delete(m.tunnels, key) + + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("session_id", t.SessionID). + Msg("tunnel unregistered") + } +} + +// Get returns a tunnel by system ID and node ID +func (m *Manager) Get(systemID, nodeID string) *Tunnel { + m.mu.RLock() + defer m.mu.RUnlock() + return m.tunnels[TunnelKey(systemID, nodeID)] +} + +// GetBySessionID returns a tunnel by session ID +func (m *Manager) GetBySessionID(sessionID string) *Tunnel { + m.mu.RLock() + defer m.mu.RUnlock() + + for _, t := range m.tunnels { + if t.SessionID == sessionID { + return t + } + } + return nil +} + +// Count returns the number of active tunnels +func (m *Manager) Count() int { + m.mu.RLock() + defer m.mu.RUnlock() + return len(m.tunnels) +} + +// List returns info about all active tunnels +func (m *Manager) List() []TunnelInfo { + m.mu.RLock() + defer m.mu.RUnlock() + + infos := make([]TunnelInfo, 0, len(m.tunnels)) + for _, t := range m.tunnels { + infos = append(infos, TunnelInfo{ + SystemID: t.SystemID, + NodeID: t.NodeID, + SessionID: t.SessionID, + ConnectedAt: t.ConnectedAt, + }) + } + return infos +} + +// CloseBySessionID closes a tunnel by session ID, sending a graceful close +// frame so the tunnel-client knows not to reconnect. +func (m *Manager) CloseBySessionID(sessionID string) bool { + m.mu.Lock() + defer m.mu.Unlock() + + for key, t := range m.tunnels { + if t.SessionID == sessionID { + t.GracefulClose() + delete(m.tunnels, key) + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", t.SystemID). + Str("session_id", sessionID). + Msg("tunnel gracefully closed by session ID") + return true + } + } + return false +} + +// StartGracePeriod begins a grace period for a disconnected tunnel. +// If the system reconnects before the grace period expires, the timer is cancelled. +// If it expires, the callback is invoked to close the session. +func (m *Manager) StartGracePeriod(systemID, nodeID, sessionID string, duration time.Duration) { + m.mu.Lock() + defer m.mu.Unlock() + + key := TunnelKey(systemID, nodeID) + + // Cancel any existing grace timer + if gt, ok := m.graceTimers[key]; ok { + gt.timer.Stop() + delete(m.graceTimers, key) + } + + timer := time.AfterFunc(duration, func() { + m.mu.Lock() + cb := m.graceCallback + delete(m.graceTimers, key) + m.mu.Unlock() + + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("session_id", sessionID). + Msg("grace period expired: closing session") + + if cb != nil { + cb(systemID, sessionID) + } + }) + + m.graceTimers[key] = &graceTimer{ + sessionID: sessionID, + timer: timer, + } + + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", systemID). + Str("node_id", nodeID). + Str("session_id", sessionID). + Dur("grace_period", duration). + Msg("grace period started") +} + +// HasGracePeriod returns true if there is an active grace period for this system+node +func (m *Manager) HasGracePeriod(systemID, nodeID string) bool { + m.mu.RLock() + defer m.mu.RUnlock() + _, ok := m.graceTimers[TunnelKey(systemID, nodeID)] + return ok +} + +// CloseAll closes all active tunnels and cancels all grace timers +func (m *Manager) CloseAll() { + m.mu.Lock() + defer m.mu.Unlock() + + for systemID, t := range m.tunnels { + t.Close() + delete(m.tunnels, systemID) + } + + for systemID, gt := range m.graceTimers { + gt.timer.Stop() + delete(m.graceTimers, systemID) + } + + logger.ComponentLogger("tunnel_manager").Info().Msg("all tunnels closed") +} + +// Close closes the tunnel's yamux session +func (t *Tunnel) Close() { + select { + case <-t.done: + return // already closed + default: + close(t.done) + } + + if t.Session != nil { + _ = t.Session.Close() + } +} + +// GracefulClose sends a WebSocket close frame with CloseCodeSessionClosed +// before closing the tunnel. This tells the tunnel-client to exit without reconnecting. +func (t *Tunnel) GracefulClose() { + if t.WsConn != nil { + msg := websocket.FormatCloseMessage(CloseCodeSessionClosed, "session_closed") + _ = t.WsConn.WriteControl(websocket.CloseMessage, msg, time.Now().Add(5*time.Second)) + // Give the client a moment to process the close frame + time.Sleep(100 * time.Millisecond) + } + t.Close() +} + +// Done returns a channel that is closed when the tunnel is done +func (t *Tunnel) Done() <-chan struct{} { + return t.done +} + +// SetServices updates the services available through this tunnel. +// Services with dangerous targets (cloud metadata, link-local) are rejected. +func (t *Tunnel) SetServices(services map[string]ServiceInfo) { + t.servicesMu.Lock() + defer t.servicesMu.Unlock() + + validated := make(map[string]ServiceInfo, len(services)) + for name, svc := range services { + if err := validateServiceTarget(svc.Target); err != nil { + logger.ComponentLogger("tunnel_manager").Warn(). + Str("system_id", t.SystemID). + Str("service", name). + Str("target", svc.Target). + Err(err). + Msg("rejected service with dangerous target") + continue + } + validated[name] = svc + } + t.services = validated +} + +// dangerousHostnames contains cloud metadata and other dangerous hostnames +var dangerousHostnames = map[string]bool{ + "metadata.google.internal": true, + "metadata": true, + "metadata.azure.internal": true, + "instance-data": true, // Oracle Cloud + "metadata.platformequinix.com": true, +} + +// validateServiceTarget rejects targets pointing to dangerous addresses (#5) +func validateServiceTarget(target string) error { + if target == "" { + return fmt.Errorf("empty target") + } + + host, _, err := net.SplitHostPort(target) + if err != nil { + host = target + } + + // Block known dangerous hostnames + if dangerousHostnames[strings.ToLower(host)] { + return fmt.Errorf("cloud metadata hostname blocked: %s", host) + } + + ip := net.ParseIP(host) + if ip == nil { + return nil // regular hostname, allowed + } + + // Block unspecified address (0.0.0.0, ::) + if ip.IsUnspecified() { + return fmt.Errorf("unspecified address blocked: %s", ip) + } + + if ip.To4() != nil { + // Block link-local (169.254.0.0/16) — cloud metadata lives here + linkLocal := net.IPNet{IP: net.IPv4(169, 254, 0, 0), Mask: net.CIDRMask(16, 32)} + if linkLocal.Contains(ip) { + return fmt.Errorf("link-local/cloud metadata address blocked: %s", ip) + } + + // Block multicast (224.0.0.0/4) + multicast := net.IPNet{IP: net.IPv4(224, 0, 0, 0), Mask: net.CIDRMask(4, 32)} + if multicast.Contains(ip) { + return fmt.Errorf("multicast address blocked: %s", ip) + } + + // Block broadcast + if ip.Equal(net.IPv4bcast) { + return fmt.Errorf("broadcast address blocked: %s", ip) + } + } else { + // Block IPv6 link-local (fe80::/10) + if ip.IsLinkLocalUnicast() { + return fmt.Errorf("IPv6 link-local address blocked: %s", ip) + } + + // Block IPv6 loopback (::1) + if ip.IsLoopback() { + return fmt.Errorf("IPv6 loopback address blocked: %s", ip) + } + + // Block IPv6 multicast (ff00::/8) + if ip.IsMulticast() { + return fmt.Errorf("IPv6 multicast address blocked: %s", ip) + } + } + + return nil +} + +// GetService returns the service info for a given service name +func (t *Tunnel) GetService(name string) (ServiceInfo, bool) { + t.servicesMu.RLock() + defer t.servicesMu.RUnlock() + svc, ok := t.services[name] + return svc, ok +} + +// GetServices returns all services available through this tunnel +func (t *Tunnel) GetServices() map[string]ServiceInfo { + t.servicesMu.RLock() + defer t.servicesMu.RUnlock() + result := make(map[string]ServiceInfo, len(t.services)) + for k, v := range t.services { + result[k] = v + } + return result +} + +// AcquireStream increments the active stream count and returns true if within limits (#10). +func (t *Tunnel) AcquireStream() bool { + t.activeStreamsMu.Lock() + defer t.activeStreamsMu.Unlock() + if t.maxStreams > 0 && int(t.activeStreams) >= t.maxStreams { + return false + } + t.activeStreams++ + return true +} + +// ReleaseStream decrements the active stream count. +func (t *Tunnel) ReleaseStream() { + t.activeStreamsMu.Lock() + defer t.activeStreamsMu.Unlock() + if t.activeStreams > 0 { + t.activeStreams-- + } +} + +// TunnelInfo represents basic tunnel information +type TunnelInfo struct { + SystemID string `json:"system_id"` + NodeID string `json:"node_id,omitempty"` + SessionID string `json:"session_id"` + ConnectedAt time.Time `json:"connected_at"` +} diff --git a/services/support/tunnel/manager_test.go b/services/support/tunnel/manager_test.go new file mode 100644 index 00000000..5cf64872 --- /dev/null +++ b/services/support/tunnel/manager_test.go @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package tunnel + +import ( + "testing" +) + +func TestNewManager(t *testing.T) { + m := NewManager(0, 64) + if m == nil { + t.Fatal("expected non-nil manager") + } + if m.Count() != 0 { + t.Fatalf("expected 0 tunnels, got %d", m.Count()) + } +} + +func TestManagerRegisterUnregister(t *testing.T) { + m := NewManager(0, 64) + + // Register a tunnel with nil yamux session (just for registry test) + tun, err := m.Register("sys1", "", "sess1", nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if tun == nil { + t.Fatal("expected non-nil tunnel") + } + if m.Count() != 1 { + t.Fatalf("expected 1 tunnel, got %d", m.Count()) + } + + // Get by system ID (no node) + got := m.Get("sys1", "") + if got == nil { + t.Fatal("expected to find tunnel by system ID") + } + if got.SystemID != "sys1" { + t.Fatalf("expected system ID sys1, got %s", got.SystemID) + } + + // Get by session ID + got = m.GetBySessionID("sess1") + if got == nil { + t.Fatal("expected to find tunnel by session ID") + } + + // Unregister + m.Unregister("sys1", "") + if m.Count() != 0 { + t.Fatalf("expected 0 tunnels after unregister, got %d", m.Count()) + } +} + +func TestManagerCloseBySessionID(t *testing.T) { + m := NewManager(0, 64) + _, _ = m.Register("sys1", "", "sess1", nil, nil) + + closed := m.CloseBySessionID("sess1") + if !closed { + t.Fatal("expected tunnel to be closed") + } + if m.Count() != 0 { + t.Fatalf("expected 0 tunnels, got %d", m.Count()) + } + + // Closing non-existent session + closed = m.CloseBySessionID("nonexistent") + if closed { + t.Fatal("expected false for non-existent session") + } +} + +func TestManagerReplaceExisting(t *testing.T) { + m := NewManager(0, 64) + _, _ = m.Register("sys1", "", "sess1", nil, nil) + _, _ = m.Register("sys1", "", "sess2", nil, nil) + + if m.Count() != 1 { + t.Fatalf("expected 1 tunnel after replacement, got %d", m.Count()) + } + + got := m.Get("sys1", "") + if got.SessionID != "sess2" { + t.Fatalf("expected session sess2, got %s", got.SessionID) + } +} + +func TestManagerList(t *testing.T) { + m := NewManager(0, 64) + _, _ = m.Register("sys1", "", "sess1", nil, nil) + _, _ = m.Register("sys2", "", "sess2", nil, nil) + + list := m.List() + if len(list) != 2 { + t.Fatalf("expected 2 tunnels in list, got %d", len(list)) + } +} + +func TestManagerCloseAll(t *testing.T) { + m := NewManager(0, 64) + _, _ = m.Register("sys1", "", "sess1", nil, nil) + _, _ = m.Register("sys2", "", "sess2", nil, nil) + + m.CloseAll() + if m.Count() != 0 { + t.Fatalf("expected 0 tunnels after CloseAll, got %d", m.Count()) + } +} + +func TestManagerMaxTunnelsLimit(t *testing.T) { + m := NewManager(2, 64) + _, err := m.Register("sys1", "", "sess1", nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + _, err = m.Register("sys2", "", "sess2", nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Third tunnel should be rejected + _, err = m.Register("sys3", "", "sess3", nil, nil) + if err == nil { + t.Fatal("expected error when exceeding max tunnels") + } + + // Replacing existing system should still work + _, err = m.Register("sys1", "", "sess1b", nil, nil) + if err != nil { + t.Fatalf("replacement should work even at limit: %v", err) + } +} + +func TestManagerMultiNode(t *testing.T) { + m := NewManager(0, 64) + + // Register tunnels for the same system but different nodes + _, err := m.Register("sys1", "1", "sess-1", nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + _, err = m.Register("sys1", "2", "sess-2", nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + _, err = m.Register("sys1", "3", "sess-3", nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if m.Count() != 3 { + t.Fatalf("expected 3 tunnels for multi-node, got %d", m.Count()) + } + + // Each node is independently addressable + got := m.Get("sys1", "1") + if got == nil || got.SessionID != "sess-1" { + t.Fatal("expected to find tunnel for node 1") + } + got = m.Get("sys1", "2") + if got == nil || got.SessionID != "sess-2" { + t.Fatal("expected to find tunnel for node 2") + } + + // GetBySessionID still works + got = m.GetBySessionID("sess-3") + if got == nil || got.NodeID != "3" { + t.Fatal("expected to find tunnel by session ID with node 3") + } + + // Unregister one node + m.Unregister("sys1", "2") + if m.Count() != 2 { + t.Fatalf("expected 2 tunnels after unregister, got %d", m.Count()) + } + + // Replace a node tunnel + _, err = m.Register("sys1", "1", "sess-1b", nil, nil) + if err != nil { + t.Fatalf("unexpected error replacing node tunnel: %v", err) + } + got = m.Get("sys1", "1") + if got.SessionID != "sess-1b" { + t.Fatalf("expected sess-1b, got %s", got.SessionID) + } +} + +func TestTunnelKeyFunction(t *testing.T) { + if TunnelKey("sys1", "") != "sys1" { + t.Fatal("expected plain systemID for empty nodeID") + } + if TunnelKey("sys1", "2") != "sys1:2" { + t.Fatal("expected systemID:nodeID for non-empty nodeID") + } +} + +func TestTunnelStreamLimits(t *testing.T) { + m := NewManager(0, 2) // max 2 streams per tunnel + tun, _ := m.Register("sys1", "", "sess1", nil, nil) + + if !tun.AcquireStream() { + t.Fatal("expected first stream to be acquired") + } + if !tun.AcquireStream() { + t.Fatal("expected second stream to be acquired") + } + if tun.AcquireStream() { + t.Fatal("expected third stream to be rejected (limit 2)") + } + + tun.ReleaseStream() + if !tun.AcquireStream() { + t.Fatal("expected stream to be acquired after release") + } +} + +func TestValidateServiceTarget(t *testing.T) { + tests := []struct { + target string + wantErr bool + }{ + {"localhost:8080", false}, + {"10.0.0.1:443", false}, + {"192.168.1.1:80", false}, + {"169.254.169.254:80", true}, // AWS metadata + {"169.254.0.1:80", true}, // link-local + {"metadata.google.internal:80", true}, // GCP metadata + {"metadata.azure.internal:80", true}, // Azure metadata + {"instance-data:80", true}, // Oracle Cloud + {"metadata.platformequinix.com:80", true}, // Equinix + {"0.0.0.0:80", true}, // unspecified + {"224.0.0.1:80", true}, // multicast + {"255.255.255.255:80", true}, // broadcast + {"", true}, + } + + for _, tt := range tests { + err := validateServiceTarget(tt.target) + if (err != nil) != tt.wantErr { + t.Errorf("validateServiceTarget(%q) = %v, wantErr %v", tt.target, err, tt.wantErr) + } + } +} diff --git a/services/support/tunnel/protocol.go b/services/support/tunnel/protocol.go new file mode 100644 index 00000000..a34d83a2 --- /dev/null +++ b/services/support/tunnel/protocol.go @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package tunnel + +import ( + "fmt" + "io" + "strings" +) + +// WriteConnectHeader writes a CONNECT request header to the stream. +// Format: "CONNECT \n" +func WriteConnectHeader(w io.Writer, serviceName string) error { + _, err := fmt.Fprintf(w, "CONNECT %s\n", serviceName) + return err +} + +// ReadConnectHeader reads a CONNECT request header from the stream. +// Returns the service name requested. +func ReadConnectHeader(r io.Reader) (string, error) { + line, err := readLine(r) + if err != nil { + return "", fmt.Errorf("failed to read CONNECT header: %w", err) + } + + if !strings.HasPrefix(line, "CONNECT ") { + return "", fmt.Errorf("invalid CONNECT header: %q", line) + } + + serviceName := strings.TrimPrefix(line, "CONNECT ") + if serviceName == "" { + return "", fmt.Errorf("empty service name in CONNECT header") + } + + return serviceName, nil +} + +// WriteConnectResponse writes a CONNECT response to the stream. +// If err is nil, writes "OK\n"; otherwise writes "ERROR \n". +func WriteConnectResponse(w io.Writer, err error) error { + if err == nil { + _, writeErr := fmt.Fprint(w, "OK\n") + return writeErr + } + _, writeErr := fmt.Fprintf(w, "ERROR %s\n", err.Error()) + return writeErr +} + +// ReadConnectResponse reads a CONNECT response from the stream. +// Returns nil on "OK", or an error with the message on "ERROR". +func ReadConnectResponse(r io.Reader) error { + line, err := readLine(r) + if err != nil { + return fmt.Errorf("failed to read CONNECT response: %w", err) + } + + if line == "OK" { + return nil + } + + if strings.HasPrefix(line, "ERROR ") { + return fmt.Errorf("%s", strings.TrimPrefix(line, "ERROR ")) + } + + return fmt.Errorf("unexpected CONNECT response: %q", line) +} + +// readLine reads a single line from the reader byte-by-byte until '\n'. +// Returns the line without the trailing newline. +func readLine(r io.Reader) (string, error) { + var buf []byte + b := make([]byte, 1) + + for { + n, err := r.Read(b) + if n > 0 { + if b[0] == '\n' { + return string(buf), nil + } + buf = append(buf, b[0]) + // Prevent unbounded reads + if len(buf) > 1024 { + return "", fmt.Errorf("line too long") + } + } + if err != nil { + if err == io.EOF && len(buf) > 0 { + return string(buf), nil + } + return "", err + } + } +} diff --git a/services/support/tunnel/protocol_test.go b/services/support/tunnel/protocol_test.go new file mode 100644 index 00000000..742e4def --- /dev/null +++ b/services/support/tunnel/protocol_test.go @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package tunnel + +import ( + "bytes" + "fmt" + "testing" +) + +func TestWriteReadConnectHeader(t *testing.T) { + var buf bytes.Buffer + + err := WriteConnectHeader(&buf, "cluster-admin") + if err != nil { + t.Fatalf("WriteConnectHeader failed: %v", err) + } + + if buf.String() != "CONNECT cluster-admin\n" { + t.Fatalf("unexpected header: %q", buf.String()) + } + + serviceName, err := ReadConnectHeader(&buf) + if err != nil { + t.Fatalf("ReadConnectHeader failed: %v", err) + } + if serviceName != "cluster-admin" { + t.Fatalf("expected service name 'cluster-admin', got %q", serviceName) + } +} + +func TestReadConnectHeader_InvalidPrefix(t *testing.T) { + buf := bytes.NewBufferString("GET /foo\n") + _, err := ReadConnectHeader(buf) + if err == nil { + t.Fatal("expected error for invalid header") + } +} + +func TestReadConnectHeader_EmptyServiceName(t *testing.T) { + buf := bytes.NewBufferString("CONNECT \n") + _, err := ReadConnectHeader(buf) + if err == nil { + t.Fatal("expected error for empty service name") + } +} + +func TestWriteReadConnectResponse_OK(t *testing.T) { + var buf bytes.Buffer + + err := WriteConnectResponse(&buf, nil) + if err != nil { + t.Fatalf("WriteConnectResponse failed: %v", err) + } + + if buf.String() != "OK\n" { + t.Fatalf("unexpected response: %q", buf.String()) + } + + err = ReadConnectResponse(&buf) + if err != nil { + t.Fatalf("ReadConnectResponse returned error for OK: %v", err) + } +} + +func TestWriteReadConnectResponse_Error(t *testing.T) { + var buf bytes.Buffer + + err := WriteConnectResponse(&buf, fmt.Errorf("service not found")) + if err != nil { + t.Fatalf("WriteConnectResponse failed: %v", err) + } + + if buf.String() != "ERROR service not found\n" { + t.Fatalf("unexpected response: %q", buf.String()) + } + + err = ReadConnectResponse(&buf) + if err == nil { + t.Fatal("expected error from ReadConnectResponse") + } + if err.Error() != "service not found" { + t.Fatalf("unexpected error message: %v", err) + } +} + +func TestReadConnectResponse_UnexpectedResponse(t *testing.T) { + buf := bytes.NewBufferString("WHAT\n") + err := ReadConnectResponse(buf) + if err == nil { + t.Fatal("expected error for unexpected response") + } +} + +func TestReadLine_TooLong(t *testing.T) { + // Create a line longer than 1024 bytes without newline + longData := make([]byte, 2000) + for i := range longData { + longData[i] = 'a' + } + buf := bytes.NewBuffer(longData) + _, err := readLine(buf) + if err == nil { + t.Fatal("expected error for line too long") + } +} diff --git a/services/support/tunnel/stream.go b/services/support/tunnel/stream.go new file mode 100644 index 00000000..f6945849 --- /dev/null +++ b/services/support/tunnel/stream.go @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package tunnel + +import ( + "io" + "net" + "time" + + "github.com/gorilla/websocket" +) + +// WebSocketConn wraps a gorilla/websocket.Conn to implement net.Conn +// for use with yamux, which requires a net.Conn interface. +type WebSocketConn struct { + conn *websocket.Conn + reader io.Reader +} + +// NewWebSocketConn wraps a WebSocket connection as a net.Conn +func NewWebSocketConn(conn *websocket.Conn) *WebSocketConn { + return &WebSocketConn{conn: conn} +} + +// Read reads data from the WebSocket connection +func (wsc *WebSocketConn) Read(b []byte) (int, error) { + for { + if wsc.reader == nil { + _, reader, err := wsc.conn.NextReader() + if err != nil { + return 0, err + } + wsc.reader = reader + } + + n, err := wsc.reader.Read(b) + if err == io.EOF { + wsc.reader = nil + if n > 0 { + return n, nil + } + continue + } + return n, err + } +} + +// Write writes data to the WebSocket connection +func (wsc *WebSocketConn) Write(b []byte) (int, error) { + err := wsc.conn.WriteMessage(websocket.BinaryMessage, b) + if err != nil { + return 0, err + } + return len(b), nil +} + +// Close closes the underlying WebSocket connection +func (wsc *WebSocketConn) Close() error { + return wsc.conn.Close() +} + +// LocalAddr returns the local network address (not applicable for WebSocket) +func (wsc *WebSocketConn) LocalAddr() net.Addr { + return wsc.conn.LocalAddr() +} + +// RemoteAddr returns the remote network address +func (wsc *WebSocketConn) RemoteAddr() net.Addr { + return wsc.conn.RemoteAddr() +} + +// SetDeadline sets read and write deadlines on the underlying WebSocket connection +func (wsc *WebSocketConn) SetDeadline(t time.Time) error { + if err := wsc.conn.SetReadDeadline(t); err != nil { + return err + } + return wsc.conn.SetWriteDeadline(t) +} + +// SetReadDeadline sets the read deadline on the underlying WebSocket connection +func (wsc *WebSocketConn) SetReadDeadline(t time.Time) error { + return wsc.conn.SetReadDeadline(t) +} + +// SetWriteDeadline sets the write deadline on the underlying WebSocket connection +func (wsc *WebSocketConn) SetWriteDeadline(t time.Time) error { + return wsc.conn.SetWriteDeadline(t) +} From d6331f8eb5d430ae0cc918aa7c9af8d87806c009 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Mon, 9 Mar 2026 21:27:21 +0100 Subject: [PATCH 02/28] feat(backend): add support session APIs, proxy, and security hardening Support session CRUD, WebSocket terminal with one-time tickets, subdomain proxy with body rewriting, access logging, RBAC with connect:systems permission, database migrations, and security hardening from penetration test findings. --- backend/.env.example | 14 + backend/cache/redis.go | 57 ++ backend/cache/redis_unit_test.go | 5 + backend/cache/terminal_tickets.go | 88 +++ backend/cmd/gentoken/main.go | 1 + backend/configuration/configuration.go | 15 + .../010_add_performance_indexes.sql | 4 + .../010_add_performance_indexes_rollback.sql | 1 + .../migrations/012_optimize_applications.sql | 2 + .../migrations/017_support_sessions.sql | 41 ++ .../017_support_sessions_rollback.sql | 6 + .../migrations/018_security_hardening.sql | 19 + .../018_security_hardening_rollback.sql | 9 + .../019_add_node_id_support_sessions.sql | 10 + ..._add_node_id_support_sessions_rollback.sql | 4 + backend/database/schema.sql | 64 ++ backend/entities/support.go | 578 +++++++++++++++ backend/go.mod | 1 + backend/go.sum | 2 + backend/jwt/jwt.go | 106 +++ backend/main.go | 57 +- backend/methods/support.go | 158 ++++ backend/methods/support_proxy.go | 670 +++++++++++++++++ backend/middleware/rbac.go | 3 +- backend/models/support.go | 70 ++ backend/openapi.yaml | 685 ++++++++++++++++++ backend/services/local/systems.go | 4 +- 27 files changed, 2666 insertions(+), 8 deletions(-) create mode 100644 backend/cache/terminal_tickets.go create mode 100644 backend/database/migrations/017_support_sessions.sql create mode 100644 backend/database/migrations/017_support_sessions_rollback.sql create mode 100644 backend/database/migrations/018_security_hardening.sql create mode 100644 backend/database/migrations/018_security_hardening_rollback.sql create mode 100644 backend/database/migrations/019_add_node_id_support_sessions.sql create mode 100644 backend/database/migrations/019_add_node_id_support_sessions_rollback.sql create mode 100644 backend/entities/support.go create mode 100644 backend/methods/support.go create mode 100644 backend/methods/support_proxy.go create mode 100644 backend/models/support.go diff --git a/backend/.env.example b/backend/.env.example index 9e1c679e..22203b22 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -38,6 +38,20 @@ REDIS_URL=redis://localhost:6379 #SMTP_FROM_NAME=My Nethesis #SMTP_TLS=true +# =========================================== +# SUPPORT SERVICE CONFIGURATION +# =========================================== +# URL of the support service for proxying tunnel connections +#SUPPORT_SERVICE_URL=http://localhost:8082 + +# Domain for subdomain-based support proxy (e.g. "my.nethesis.it") +# When set, enables proxying via {service}--{session}.support.{domain} +#SUPPORT_PROXY_DOMAIN= + +# Shared secret for backend→support service internal communication +# Must match the INTERNAL_SECRET in the support service .env +#SUPPORT_INTERNAL_SECRET=change-me-to-a-random-secret-min-32-chars + # =========================================== # OPTIONAL CONFIGURATION # =========================================== diff --git a/backend/cache/redis.go b/backend/cache/redis.go index d58b6798..42a062c1 100644 --- a/backend/cache/redis.go +++ b/backend/cache/redis.go @@ -20,6 +20,7 @@ type RedisInterface interface { SetWithContext(ctx context.Context, key string, value interface{}, ttl time.Duration) error Get(key string, dest interface{}) error GetWithContext(ctx context.Context, key string, dest interface{}) error + GetDel(key string, dest interface{}) error Delete(key string) error DeleteWithContext(ctx context.Context, key string) error DeletePattern(pattern string) error @@ -255,6 +256,52 @@ func (r *RedisClient) GetWithContext(ctx context.Context, key string, dest inter return nil } +// GetDel atomically retrieves and deletes a key from Redis (GETDEL command, Redis 6.2+). +// Returns ErrCacheMiss if the key does not exist. +func (r *RedisClient) GetDel(key string, dest interface{}) error { + ctx, cancel := context.WithTimeout(context.Background(), r.defaultTimeout) + defer cancel() + + data, err := r.client.GetDel(ctx, key).Result() + if err != nil { + if err == redis.Nil { + log.Debug(). + Str("component", "redis"). + Str("operation", "getdel"). + Str("key", key). + Msg("Key not found in Redis") + return ErrCacheMiss + } + + log.Error(). + Str("component", "redis"). + Str("operation", "getdel"). + Str("key", key). + Err(err). + Msg("Failed to getdel value from Redis") + return fmt.Errorf("failed to getdel value from Redis: %w", err) + } + + err = json.Unmarshal([]byte(data), dest) + if err != nil { + log.Error(). + Str("component", "redis"). + Str("operation", "getdel"). + Str("key", key). + Err(err). + Msg("Failed to unmarshal value") + return fmt.Errorf("failed to unmarshal value: %w", err) + } + + log.Debug(). + Str("component", "redis"). + Str("operation", "getdel"). + Str("key", key). + Msg("Value retrieved and deleted from Redis") + + return nil +} + // Delete removes a key from Redis func (r *RedisClient) Delete(key string) error { return r.DeleteWithContext(context.Background(), key) @@ -489,5 +536,15 @@ func CloseRedis() error { return nil } +// Publish publishes a message to a Redis pub/sub channel +func (r *RedisClient) Publish(channel string, message interface{}) error { + if r.client == nil { + return fmt.Errorf("redis client not initialized") + } + ctx, cancel := context.WithTimeout(context.Background(), r.defaultTimeout) + defer cancel() + return r.client.Publish(ctx, channel, message).Err() +} + // ErrCacheMiss is returned when a key is not found in cache var ErrCacheMiss = fmt.Errorf("cache miss") diff --git a/backend/cache/redis_unit_test.go b/backend/cache/redis_unit_test.go index c88547b7..84beb9a2 100644 --- a/backend/cache/redis_unit_test.go +++ b/backend/cache/redis_unit_test.go @@ -47,6 +47,11 @@ func (m *MockRedisClient) GetWithContext(ctx context.Context, key string, dest i return args.Error(0) } +func (m *MockRedisClient) GetDel(key string, dest interface{}) error { + args := m.Called(key, dest) + return args.Error(0) +} + func (m *MockRedisClient) Delete(key string) error { args := m.Called(key) return args.Error(0) diff --git a/backend/cache/terminal_tickets.go b/backend/cache/terminal_tickets.go new file mode 100644 index 00000000..852f3861 --- /dev/null +++ b/backend/cache/terminal_tickets.go @@ -0,0 +1,88 @@ +/* +Copyright (C) 2026 Nethesis S.r.l. +SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +package cache + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "time" + + "github.com/rs/zerolog/log" +) + +const ( + terminalTicketPrefix = "terminal_ticket:" + terminalTicketTTL = 30 * time.Second +) + +// TerminalTicket represents a one-time ticket for WebSocket terminal authentication. +// The ticket is stored in Redis with a short TTL and consumed on first use. +type TerminalTicket struct { + SessionID string `json:"session_id"` + UserID string `json:"user_id"` + UserLogtoID string `json:"user_logto_id"` + Username string `json:"username"` + Name string `json:"name"` + OrgRole string `json:"org_role"` + OrganizationID string `json:"organization_id"` +} + +// GenerateTerminalTicket creates a one-time ticket for terminal WebSocket authentication. +// Returns the ticket string that the client uses as ?ticket= query parameter. +func GenerateTerminalTicket(ticket *TerminalTicket) (string, error) { + rc := GetRedisClient() + if rc == nil { + return "", fmt.Errorf("redis not available") + } + + // Generate a cryptographically random ticket ID + b := make([]byte, 32) + if _, err := rand.Read(b); err != nil { + return "", fmt.Errorf("failed to generate ticket: %w", err) + } + ticketID := hex.EncodeToString(b) + + key := terminalTicketPrefix + ticketID + if err := rc.Set(key, ticket, terminalTicketTTL); err != nil { + return "", fmt.Errorf("failed to store ticket: %w", err) + } + + log.Debug(). + Str("component", "terminal_ticket"). + Str("session_id", ticket.SessionID). + Str("user_id", ticket.UserID). + Msg("Terminal ticket generated") + + return ticketID, nil +} + +// ConsumeTerminalTicket atomically retrieves and deletes a one-time terminal ticket +// using Redis GETDEL to prevent race conditions (TOCTOU). +// Returns nil if the ticket does not exist or has expired. +func ConsumeTerminalTicket(ticketID string) (*TerminalTicket, error) { + rc := GetRedisClient() + if rc == nil { + return nil, fmt.Errorf("redis not available") + } + + key := terminalTicketPrefix + ticketID + var ticket TerminalTicket + if err := rc.GetDel(key, &ticket); err != nil { + if err == ErrCacheMiss { + return nil, nil + } + return nil, fmt.Errorf("failed to consume ticket: %w", err) + } + + log.Debug(). + Str("component", "terminal_ticket"). + Str("session_id", ticket.SessionID). + Str("user_id", ticket.UserID). + Msg("Terminal ticket consumed") + + return &ticket, nil +} diff --git a/backend/cmd/gentoken/main.go b/backend/cmd/gentoken/main.go index b40fcb50..fab1726e 100644 --- a/backend/cmd/gentoken/main.go +++ b/backend/cmd/gentoken/main.go @@ -45,6 +45,7 @@ func main() { UserRoleIDs: []string{"super-admin-role-id"}, UserPermissions: []string{ "destroy:systems", "read:systems", "manage:systems", + "connect:systems", "impersonate:users", "read:users", "manage:users", "read:applications", "manage:applications", }, diff --git a/backend/configuration/configuration.go b/backend/configuration/configuration.go index da565c9d..2e2bad02 100644 --- a/backend/configuration/configuration.go +++ b/backend/configuration/configuration.go @@ -59,6 +59,11 @@ type Configuration struct { DefaultPageSize int `json:"default_page_size"` // System types configuration SystemTypes []string `json:"system_types"` + // Support service URL for proxying + SupportServiceURL string `json:"support_service_url"` + // Support proxy domain for subdomain-based proxying (e.g. "my.nethesis.it") + SupportProxyDomain string `json:"support_proxy_domain"` + // SMTP configuration for sending emails SMTPHost string `json:"smtp_host"` SMTPPort int `json:"smtp_port"` @@ -192,6 +197,16 @@ func Init() { Config.SystemTypes = []string{"ns8", "nsec"} } + // Support service URL + if os.Getenv("SUPPORT_SERVICE_URL") != "" { + Config.SupportServiceURL = os.Getenv("SUPPORT_SERVICE_URL") + } else { + Config.SupportServiceURL = "http://localhost:8082" + } + + // Support proxy domain (optional, enables subdomain-based proxy) + Config.SupportProxyDomain = os.Getenv("SUPPORT_PROXY_DOMAIN") + // SMTP configuration Config.SMTPHost = os.Getenv("SMTP_HOST") Config.SMTPPort = parseIntWithDefault("SMTP_PORT", 587) diff --git a/backend/database/migrations/010_add_performance_indexes.sql b/backend/database/migrations/010_add_performance_indexes.sql index 7540f7d1..86375967 100644 --- a/backend/database/migrations/010_add_performance_indexes.sql +++ b/backend/database/migrations/010_add_performance_indexes.sql @@ -1,6 +1,10 @@ -- Performance optimization: unified organizations view and indexes -- This view replaces the 3-way LEFT JOIN pattern used in applications queries +-- Drop any existing form (view or materialized view) before recreating +DROP MATERIALIZED VIEW IF EXISTS unified_organizations; +DROP VIEW IF EXISTS unified_organizations; + CREATE OR REPLACE VIEW unified_organizations AS SELECT logto_id, id::text AS db_id, name, 'distributor' AS org_type FROM distributors WHERE deleted_at IS NULL UNION ALL diff --git a/backend/database/migrations/010_add_performance_indexes_rollback.sql b/backend/database/migrations/010_add_performance_indexes_rollback.sql index 145399a9..f887685d 100644 --- a/backend/database/migrations/010_add_performance_indexes_rollback.sql +++ b/backend/database/migrations/010_add_performance_indexes_rollback.sql @@ -2,3 +2,4 @@ DROP INDEX IF EXISTS idx_customers_created_by; DROP INDEX IF EXISTS idx_resellers_created_by; DROP INDEX IF EXISTS idx_applications_cert_level; DROP VIEW IF EXISTS unified_organizations; +DROP MATERIALIZED VIEW IF EXISTS unified_organizations; diff --git a/backend/database/migrations/012_optimize_applications.sql b/backend/database/migrations/012_optimize_applications.sql index 54b02acd..4358286e 100644 --- a/backend/database/migrations/012_optimize_applications.sql +++ b/backend/database/migrations/012_optimize_applications.sql @@ -1,7 +1,9 @@ -- Optimize applications: materialized view + covering indexes -- 1. Convert unified_organizations from VIEW to MATERIALIZED VIEW +-- Handle both regular VIEW and MATERIALIZED VIEW cases DROP VIEW IF EXISTS unified_organizations; +DROP MATERIALIZED VIEW IF EXISTS unified_organizations; CREATE MATERIALIZED VIEW unified_organizations AS SELECT logto_id, id::text AS db_id, name, 'distributor' AS org_type FROM distributors WHERE deleted_at IS NULL UNION ALL diff --git a/backend/database/migrations/017_support_sessions.sql b/backend/database/migrations/017_support_sessions.sql new file mode 100644 index 00000000..7d6d32cc --- /dev/null +++ b/backend/database/migrations/017_support_sessions.sql @@ -0,0 +1,41 @@ +-- Migration 017: Support sessions and access logs +-- Description: Tables for WebSocket tunnel-based support sessions + +-- Support sessions track active tunnel connections from client systems +CREATE TABLE IF NOT EXISTS support_sessions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + system_id VARCHAR(255) NOT NULL REFERENCES systems(id) ON DELETE CASCADE, + session_token VARCHAR(64) UNIQUE NOT NULL, + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + expires_at TIMESTAMPTZ NOT NULL DEFAULT (NOW() + INTERVAL '24 hours'), + status VARCHAR(16) NOT NULL DEFAULT 'pending', + closed_at TIMESTAMPTZ, + closed_by VARCHAR(32), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT support_sessions_status_check CHECK (status IN ('pending', 'active', 'expired', 'closed')) +); + +CREATE INDEX IF NOT EXISTS idx_support_sessions_system_id ON support_sessions(system_id); +CREATE INDEX IF NOT EXISTS idx_support_sessions_status ON support_sessions(status); +CREATE INDEX IF NOT EXISTS idx_support_sessions_session_token ON support_sessions(session_token); +CREATE INDEX IF NOT EXISTS idx_support_sessions_expires_at ON support_sessions(expires_at); + +-- Access logs track operator interactions with support sessions +CREATE TABLE IF NOT EXISTS support_access_logs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES support_sessions(id) ON DELETE CASCADE, + operator_id VARCHAR(255) NOT NULL, + operator_name VARCHAR(255), + access_type VARCHAR(16) NOT NULL DEFAULT 'view', + connected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + disconnected_at TIMESTAMPTZ, + metadata JSONB, + CONSTRAINT support_access_logs_access_type_check CHECK (access_type IN ('view', 'ssh', 'web_terminal', 'ui_proxy')) +); + +CREATE INDEX IF NOT EXISTS idx_support_access_logs_session_id ON support_access_logs(session_id); +CREATE INDEX IF NOT EXISTS idx_support_access_logs_operator_id ON support_access_logs(operator_id); + +-- Record migration +INSERT INTO schema_migrations (migration_number, description) VALUES (17, 'Support sessions and access logs'); diff --git a/backend/database/migrations/017_support_sessions_rollback.sql b/backend/database/migrations/017_support_sessions_rollback.sql new file mode 100644 index 00000000..0bcd78ad --- /dev/null +++ b/backend/database/migrations/017_support_sessions_rollback.sql @@ -0,0 +1,6 @@ +-- Rollback migration 017: Support sessions and access logs + +DROP TABLE IF EXISTS support_access_logs; +DROP TABLE IF EXISTS support_sessions; + +DELETE FROM schema_migrations WHERE migration_number = 17; diff --git a/backend/database/migrations/018_security_hardening.sql b/backend/database/migrations/018_security_hardening.sql new file mode 100644 index 00000000..bc588855 --- /dev/null +++ b/backend/database/migrations/018_security_hardening.sql @@ -0,0 +1,19 @@ +-- Migration 018: Security hardening for support service +-- Description: Adds support_enabled flag to systems and reconnect_token to sessions + +-- #2: Require explicit opt-in before a system can connect to the support tunnel +ALTER TABLE systems ADD COLUMN IF NOT EXISTS support_enabled BOOLEAN NOT NULL DEFAULT false; + +COMMENT ON COLUMN systems.support_enabled IS 'Explicit opt-in: system can connect to support tunnel only when true'; + +CREATE INDEX IF NOT EXISTS idx_systems_support_enabled ON systems(support_enabled) WHERE support_enabled = true AND deleted_at IS NULL; + +-- #8: Reconnect token to prevent session hijacking during grace period +ALTER TABLE support_sessions ADD COLUMN IF NOT EXISTS reconnect_token VARCHAR(64); + +COMMENT ON COLUMN support_sessions.reconnect_token IS 'Token required to reconnect to a session during grace period'; + +CREATE INDEX IF NOT EXISTS idx_support_sessions_reconnect_token ON support_sessions(reconnect_token) WHERE reconnect_token IS NOT NULL; + +-- Record migration +INSERT INTO schema_migrations (migration_number, description) VALUES (18, 'Security hardening for support service'); diff --git a/backend/database/migrations/018_security_hardening_rollback.sql b/backend/database/migrations/018_security_hardening_rollback.sql new file mode 100644 index 00000000..74ac25ac --- /dev/null +++ b/backend/database/migrations/018_security_hardening_rollback.sql @@ -0,0 +1,9 @@ +-- Rollback Migration 018: Security hardening for support service + +DROP INDEX IF EXISTS idx_support_sessions_reconnect_token; +ALTER TABLE support_sessions DROP COLUMN IF EXISTS reconnect_token; + +DROP INDEX IF EXISTS idx_systems_support_enabled; +ALTER TABLE systems DROP COLUMN IF EXISTS support_enabled; + +DELETE FROM schema_migrations WHERE migration_number = 18; diff --git a/backend/database/migrations/019_add_node_id_support_sessions.sql b/backend/database/migrations/019_add_node_id_support_sessions.sql new file mode 100644 index 00000000..52c46853 --- /dev/null +++ b/backend/database/migrations/019_add_node_id_support_sessions.sql @@ -0,0 +1,10 @@ +-- Migration 019: Add node_id to support_sessions for multi-node cluster support +-- Each node in an NS8 cluster connects its own tunnel, identified by node_id. +-- node_id is NULL for single-node (non-cluster) systems. + +ALTER TABLE support_sessions ADD COLUMN node_id VARCHAR(16); + +COMMENT ON COLUMN support_sessions.node_id IS 'NS8 cluster node ID (e.g., 1, 2, 3). NULL for single-node systems.'; + +-- Index for efficient lookups by (system_id, node_id) +CREATE INDEX idx_support_sessions_system_node ON support_sessions(system_id, node_id) WHERE status IN ('pending', 'active'); diff --git a/backend/database/migrations/019_add_node_id_support_sessions_rollback.sql b/backend/database/migrations/019_add_node_id_support_sessions_rollback.sql new file mode 100644 index 00000000..b79daa99 --- /dev/null +++ b/backend/database/migrations/019_add_node_id_support_sessions_rollback.sql @@ -0,0 +1,4 @@ +-- Rollback migration 019: Remove node_id from support_sessions + +DROP INDEX IF EXISTS idx_support_sessions_system_node; +ALTER TABLE support_sessions DROP COLUMN IF EXISTS node_id; diff --git a/backend/database/schema.sql b/backend/database/schema.sql index b1e4a8bc..0abd67dc 100644 --- a/backend/database/schema.sql +++ b/backend/database/schema.sql @@ -264,6 +264,9 @@ CREATE TABLE IF NOT EXISTS systems ( -- Inventory last_inventory_at TIMESTAMP WITH TIME ZONE, -- Last inventory received timestamp (NULL = never received) + -- Support + support_enabled BOOLEAN NOT NULL DEFAULT false, -- Explicit opt-in for support tunnel access + -- Soft delete deleted_at TIMESTAMP WITH TIME ZONE, -- NULL = active, non-NULL = soft deleted deleted_by_org_id VARCHAR(255) -- Organization that caused cascade soft-deletion @@ -281,6 +284,7 @@ COMMENT ON COLUMN systems.system_secret_public IS 'Public part of token (my_>'certification_level')::int IN (4, 5); + +-- ============================================================================= +-- SUPPORT SESSIONS TABLE +-- ============================================================================= +-- Tracks WebSocket tunnel-based support sessions from client systems + +CREATE TABLE IF NOT EXISTS support_sessions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + system_id VARCHAR(255) NOT NULL REFERENCES systems(id) ON DELETE CASCADE, + node_id VARCHAR(16), + session_token VARCHAR(64) UNIQUE NOT NULL, + reconnect_token VARCHAR(64), + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + expires_at TIMESTAMPTZ NOT NULL DEFAULT (NOW() + INTERVAL '24 hours'), + status VARCHAR(16) NOT NULL DEFAULT 'pending', + closed_at TIMESTAMPTZ, + closed_by VARCHAR(32), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT support_sessions_status_check CHECK (status IN ('pending', 'active', 'expired', 'closed')) +); + +COMMENT ON TABLE support_sessions IS 'WebSocket tunnel-based support sessions from client systems'; +COMMENT ON COLUMN support_sessions.node_id IS 'NS8 cluster node ID (e.g., 1, 2, 3). NULL for single-node systems.'; +COMMENT ON COLUMN support_sessions.session_token IS 'Unique token for tunnel authentication'; +COMMENT ON COLUMN support_sessions.reconnect_token IS 'Token required to reconnect to a session during grace period'; +COMMENT ON COLUMN support_sessions.status IS 'Session status: pending (no tunnel yet), active, expired, closed'; +COMMENT ON COLUMN support_sessions.closed_by IS 'Who closed the session: client, operator, timeout, system'; + +CREATE INDEX IF NOT EXISTS idx_support_sessions_system_id ON support_sessions(system_id); +CREATE INDEX IF NOT EXISTS idx_support_sessions_status ON support_sessions(status); +CREATE INDEX IF NOT EXISTS idx_support_sessions_session_token ON support_sessions(session_token); +CREATE INDEX IF NOT EXISTS idx_support_sessions_expires_at ON support_sessions(expires_at); +CREATE INDEX IF NOT EXISTS idx_support_sessions_reconnect_token ON support_sessions(reconnect_token) WHERE reconnect_token IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_support_sessions_system_node ON support_sessions(system_id, node_id) WHERE status IN ('pending', 'active'); + +-- ============================================================================= +-- SUPPORT ACCESS LOGS TABLE +-- ============================================================================= +-- Tracks operator interactions with support sessions + +CREATE TABLE IF NOT EXISTS support_access_logs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES support_sessions(id) ON DELETE CASCADE, + operator_id VARCHAR(255) NOT NULL, + operator_name VARCHAR(255), + access_type VARCHAR(16) NOT NULL DEFAULT 'view', + connected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + disconnected_at TIMESTAMPTZ, + metadata JSONB, + CONSTRAINT support_access_logs_access_type_check CHECK (access_type IN ('view', 'ssh', 'web_terminal', 'ui_proxy')) +); + +COMMENT ON TABLE support_access_logs IS 'Operator interactions with support sessions'; +COMMENT ON COLUMN support_access_logs.operator_id IS 'Logto user ID of the operator'; +COMMENT ON COLUMN support_access_logs.access_type IS 'Type of access: view, ssh, web_terminal, ui_proxy'; + +CREATE INDEX IF NOT EXISTS idx_support_access_logs_session_id ON support_access_logs(session_id); +CREATE INDEX IF NOT EXISTS idx_support_access_logs_operator_id ON support_access_logs(operator_id); diff --git a/backend/entities/support.go b/backend/entities/support.go new file mode 100644 index 00000000..163bc5f3 --- /dev/null +++ b/backend/entities/support.go @@ -0,0 +1,578 @@ +/* +Copyright (C) 2026 Nethesis S.r.l. +SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +package entities + +import ( + "database/sql" + "fmt" + "strings" + + "github.com/nethesis/my/backend/database" + "github.com/nethesis/my/backend/models" +) + +// SupportRepository handles support session database operations +type SupportRepository struct { + db *sql.DB +} + +// NewSupportRepository creates a new support repository +func NewSupportRepository() *SupportRepository { + return &SupportRepository{db: database.DB} +} + +// buildRBACFilter returns a WHERE condition and args for RBAC scope filtering. +// The condition filters systems by organization_id based on the user's org role. +func buildRBACFilter(userOrgRole, userOrgID string, argIdx int) (string, []interface{}, int) { + switch strings.ToLower(userOrgRole) { + case "owner": + return "", nil, argIdx + case "distributor": + condition := fmt.Sprintf(`s.organization_id IN ( + SELECT $%d + UNION + SELECT logto_id FROM resellers + WHERE custom_data->>'createdBy' = $%d AND deleted_at IS NULL + UNION + SELECT logto_id FROM customers + WHERE deleted_at IS NULL AND ( + custom_data->>'createdBy' = $%d OR + custom_data->>'createdBy' IN ( + SELECT logto_id FROM resellers + WHERE custom_data->>'createdBy' = $%d AND deleted_at IS NULL + ) + ) + )`, argIdx, argIdx, argIdx, argIdx) + return condition, []interface{}{userOrgID}, argIdx + 1 + case "reseller": + condition := fmt.Sprintf(`s.organization_id IN ( + SELECT $%d + UNION + SELECT logto_id FROM customers + WHERE custom_data->>'createdBy' = $%d AND deleted_at IS NULL + )`, argIdx, argIdx) + return condition, []interface{}{userOrgID}, argIdx + 1 + case "customer": + condition := fmt.Sprintf("s.organization_id = $%d", argIdx) + return condition, []interface{}{userOrgID}, argIdx + 1 + default: + // Unknown role: deny access + return "1=0", nil, argIdx + } +} + +// GetSystemSessions returns support sessions grouped by system, with server-side +// pagination based on distinct systems (not individual sessions). +func (r *SupportRepository) GetSystemSessions( + userOrgRole, userOrgID string, + page, pageSize int, + status, systemID string, + sortBy, sortDirection string, +) ([]models.SystemSessionGroup, int, error) { + conditions := []string{"1=1"} + args := []interface{}{} + argIdx := 1 + + // RBAC scope filter + rbacCondition, rbacArgs, newArgIdx := buildRBACFilter(userOrgRole, userOrgID, argIdx) + if rbacCondition != "" { + conditions = append(conditions, rbacCondition) + args = append(args, rbacArgs...) + argIdx = newArgIdx + } + + // Optional status filter: show systems that have at least one session with this status + if status != "" { + conditions = append(conditions, fmt.Sprintf("ss.status = $%d", argIdx)) + args = append(args, status) + argIdx++ + } + if systemID != "" { + conditions = append(conditions, fmt.Sprintf("ss.system_id = $%d", argIdx)) + args = append(args, systemID) + argIdx++ + } + + whereClause := strings.Join(conditions, " AND ") + + // Count distinct systems + countQuery := fmt.Sprintf( + `SELECT COUNT(DISTINCT ss.system_id) + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + WHERE %s`, whereClause) + + var totalCount int + err := r.db.QueryRow(countQuery, args...).Scan(&totalCount) + if err != nil { + return nil, 0, fmt.Errorf("failed to count system groups: %w", err) + } + + if totalCount == 0 { + return nil, 0, nil + } + + // Validate sort column (mapped to aggregate expressions) + allowedSortColumns := map[string]string{ + "started_at": "MIN(ss.started_at)", + "expires_at": "MAX(ss.expires_at)", + "created_at": "MIN(ss.created_at)", + "status": `CASE + WHEN bool_or(ss.status = 'active') THEN 0 + WHEN bool_or(ss.status = 'pending') THEN 1 + WHEN bool_or(ss.status = 'expired') THEN 2 + ELSE 3 + END`, + } + sortColumn, ok := allowedSortColumns[sortBy] + if !ok { + sortColumn = "MIN(ss.created_at)" + } + if sortDirection != "asc" && sortDirection != "desc" { + sortDirection = "desc" + } + + // Get paginated system groups with aggregate data + offset := (page - 1) * pageSize + groupQuery := fmt.Sprintf( + `SELECT + ss.system_id, + MIN(ss.started_at) AS started_at, + MAX(ss.expires_at) AS expires_at, + CASE + WHEN bool_or(ss.status = 'active') THEN 'active' + WHEN bool_or(ss.status = 'pending') THEN 'pending' + WHEN bool_or(ss.status = 'expired') THEN 'expired' + ELSE 'closed' + END AS best_status, + COUNT(*) AS session_count, + COUNT(DISTINCT ss.node_id) FILTER (WHERE ss.node_id IS NOT NULL) AS node_count, + s.name, s.type, s.system_key, s.organization_id, + COALESCE(uo.name, '') AS org_name, + COALESCE(uo.db_id, '') AS org_db_id, + COALESCE(uo.org_type, '') AS org_type + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + LEFT JOIN unified_organizations uo ON s.organization_id = uo.logto_id + WHERE %s + GROUP BY ss.system_id, s.name, s.type, s.system_key, s.organization_id, uo.name, uo.db_id, uo.org_type + ORDER BY %s %s + LIMIT $%d OFFSET $%d`, + whereClause, sortColumn, sortDirection, argIdx, argIdx+1) + + args = append(args, pageSize, offset) + + rows, err := r.db.Query(groupQuery, args...) + if err != nil { + return nil, 0, fmt.Errorf("failed to query system groups: %w", err) + } + defer func() { _ = rows.Close() }() + + var groups []models.SystemSessionGroup + var systemIDs []string + systemMap := make(map[string]int) // system_id → index in groups + + for rows.Next() { + var g models.SystemSessionGroup + var systemType sql.NullString + var orgID, orgName, orgDBID, orgType string + + err := rows.Scan( + &g.SystemID, &g.StartedAt, &g.ExpiresAt, &g.Status, + &g.SessionCount, &g.NodeCount, + &g.SystemName, &systemType, &g.SystemKey, + &orgID, &orgName, &orgDBID, &orgType, + ) + if err != nil { + return nil, 0, fmt.Errorf("failed to scan system group: %w", err) + } + + if systemType.Valid { + g.SystemType = &systemType.String + } + g.Organization = &models.Organization{ + LogtoID: orgID, + ID: orgDBID, + Name: orgName, + Type: orgType, + } + g.Sessions = []models.SessionRef{} + + systemMap[g.SystemID] = len(groups) + systemIDs = append(systemIDs, g.SystemID) + groups = append(groups, g) + } + if err := rows.Err(); err != nil { + return nil, 0, fmt.Errorf("failed to iterate system groups: %w", err) + } + + if len(systemIDs) == 0 { + return groups, totalCount, nil + } + + // Fetch individual sessions for the returned systems + placeholders := make([]string, len(systemIDs)) + sessionArgs := make([]interface{}, len(systemIDs)) + for i, id := range systemIDs { + placeholders[i] = fmt.Sprintf("$%d", i+1) + sessionArgs[i] = id + } + sessionQuery := fmt.Sprintf( + `SELECT id, system_id, node_id, status, started_at, expires_at + FROM support_sessions + WHERE system_id IN (%s) AND status IN ('active', 'pending') + ORDER BY system_id, node_id NULLS FIRST, started_at DESC`, + strings.Join(placeholders, ",")) + + sessionRows, err := r.db.Query(sessionQuery, sessionArgs...) + if err != nil { + return nil, 0, fmt.Errorf("failed to query sessions for groups: %w", err) + } + defer func() { _ = sessionRows.Close() }() + + for sessionRows.Next() { + var ref models.SessionRef + var sysID string + var nodeID sql.NullString + + err := sessionRows.Scan(&ref.ID, &sysID, &nodeID, &ref.Status, &ref.StartedAt, &ref.ExpiresAt) + if err != nil { + return nil, 0, fmt.Errorf("failed to scan session ref: %w", err) + } + if nodeID.Valid { + ref.NodeID = &nodeID.String + } + + if idx, ok := systemMap[sysID]; ok { + groups[idx].Sessions = append(groups[idx].Sessions, ref) + } + } + + return groups, totalCount, sessionRows.Err() +} + +// GetSessions returns paginated support sessions filtered by RBAC scope +func (r *SupportRepository) GetSessions( + userOrgRole, userOrgID string, + page, pageSize int, + status, systemID string, + sortBy, sortDirection string, +) ([]models.SupportSession, int, error) { + conditions := []string{"1=1"} + args := []interface{}{} + argIdx := 1 + + // RBAC scope filter + rbacCondition, rbacArgs, newArgIdx := buildRBACFilter(userOrgRole, userOrgID, argIdx) + if rbacCondition != "" { + conditions = append(conditions, rbacCondition) + args = append(args, rbacArgs...) + argIdx = newArgIdx + } + + // Optional filters + if status != "" { + conditions = append(conditions, fmt.Sprintf("ss.status = $%d", argIdx)) + args = append(args, status) + argIdx++ + } + if systemID != "" { + conditions = append(conditions, fmt.Sprintf("ss.system_id = $%d", argIdx)) + args = append(args, systemID) + argIdx++ + } + + whereClause := strings.Join(conditions, " AND ") + + // Validate sort column + allowedSortColumns := map[string]string{ + "started_at": "ss.started_at", + "expires_at": "ss.expires_at", + "status": "ss.status", + "created_at": "ss.created_at", + } + sortColumn, ok := allowedSortColumns[sortBy] + if !ok { + sortColumn = "ss.created_at" + } + if sortDirection != "asc" && sortDirection != "desc" { + sortDirection = "desc" + } + + // Count query + countQuery := fmt.Sprintf( + `SELECT COUNT(*) + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + WHERE %s`, whereClause) + + var totalCount int + err := r.db.QueryRow(countQuery, args...).Scan(&totalCount) + if err != nil { + return nil, 0, fmt.Errorf("failed to count sessions: %w", err) + } + + // Data query + offset := (page - 1) * pageSize + dataQuery := fmt.Sprintf( + `SELECT ss.id, ss.system_id, ss.node_id, ss.session_token, ss.started_at, ss.expires_at, + ss.status, ss.closed_at, ss.closed_by, ss.created_at, ss.updated_at, + s.name, s.type, s.system_key, s.organization_id, + COALESCE(uo.name, '') AS org_name, + COALESCE(uo.db_id, '') AS org_db_id, + COALESCE(uo.org_type, '') AS org_type + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + LEFT JOIN unified_organizations uo ON s.organization_id = uo.logto_id + WHERE %s + ORDER BY %s %s + LIMIT $%d OFFSET $%d`, + whereClause, sortColumn, sortDirection, argIdx, argIdx+1) + + args = append(args, pageSize, offset) + + rows, err := r.db.Query(dataQuery, args...) + if err != nil { + return nil, 0, fmt.Errorf("failed to query sessions: %w", err) + } + defer func() { _ = rows.Close() }() + + var sessions []models.SupportSession + for rows.Next() { + session, err := scanSession(rows) + if err != nil { + return nil, 0, err + } + // Do not expose session_token in list + session.SessionToken = "" + sessions = append(sessions, session) + } + + return sessions, totalCount, rows.Err() +} + +// scannable is an interface for *sql.Row and *sql.Rows +type scannable interface { + Scan(dest ...interface{}) error +} + +// scanSession scans a session row into a SupportSession model +func scanSession(row scannable) (models.SupportSession, error) { + var session models.SupportSession + var nodeID sql.NullString + var closedAt sql.NullTime + var closedBy sql.NullString + var systemType sql.NullString + var orgID, orgName, orgDBID, orgType string + + err := row.Scan( + &session.ID, &session.SystemID, &nodeID, &session.SessionToken, + &session.StartedAt, &session.ExpiresAt, + &session.Status, &closedAt, &closedBy, + &session.CreatedAt, &session.UpdatedAt, + &session.SystemName, &systemType, &session.SystemKey, + &orgID, &orgName, &orgDBID, &orgType, + ) + if err != nil { + return session, fmt.Errorf("failed to scan session: %w", err) + } + + if nodeID.Valid { + session.NodeID = &nodeID.String + } + if closedAt.Valid { + session.ClosedAt = &closedAt.Time + } + if closedBy.Valid { + session.ClosedBy = &closedBy.String + } + if systemType.Valid { + session.SystemType = &systemType.String + } + + session.Organization = &models.Organization{ + LogtoID: orgID, + ID: orgDBID, + Name: orgName, + Type: orgType, + } + + return session, nil +} + +// GetSessionByID returns a single session with system info, filtered by RBAC scope +func (r *SupportRepository) GetSessionByID(sessionID, userOrgRole, userOrgID string) (*models.SupportSession, error) { + conditions := []string{"ss.id = $1"} + args := []interface{}{sessionID} + argIdx := 2 + + // RBAC scope filter + rbacCondition, rbacArgs, _ := buildRBACFilter(userOrgRole, userOrgID, argIdx) + if rbacCondition != "" { + conditions = append(conditions, rbacCondition) + args = append(args, rbacArgs...) + } + + query := fmt.Sprintf(`SELECT ss.id, ss.system_id, ss.node_id, ss.session_token, ss.started_at, ss.expires_at, + ss.status, ss.closed_at, ss.closed_by, ss.created_at, ss.updated_at, + s.name, s.type, s.system_key, s.organization_id, + COALESCE(uo.name, '') AS org_name, + COALESCE(uo.db_id, '') AS org_db_id, + COALESCE(uo.org_type, '') AS org_type + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + LEFT JOIN unified_organizations uo ON s.organization_id = uo.logto_id + WHERE %s`, strings.Join(conditions, " AND ")) + + session, err := scanSession(r.db.QueryRow(query, args...)) + if err != nil { + if err.Error() == "failed to scan session: sql: no rows in result set" { + return nil, nil + } + return nil, fmt.Errorf("failed to get session: %w", err) + } + + // Do not expose session_token in API + session.SessionToken = "" + + return &session, nil +} + +// maxSessionDuration is the maximum total duration a session can have from its start time (30 days) +const maxSessionDuration = 30 * 24 // hours + +// ExtendSession extends the expiration of a session atomically. +// Rejects extensions that would push the total session duration beyond 30 days. +func (r *SupportRepository) ExtendSession(sessionID string, hours int) error { + result, err := r.db.Exec( + `UPDATE support_sessions + SET expires_at = expires_at + $2 * INTERVAL '1 hour', updated_at = NOW() + WHERE id = $1 AND status IN ('pending', 'active') + AND (expires_at + $2 * INTERVAL '1 hour') - started_at <= $3 * INTERVAL '1 hour'`, + sessionID, hours, maxSessionDuration, + ) + if err != nil { + return fmt.Errorf("failed to extend session: %w", err) + } + + rows, _ := result.RowsAffected() + if rows == 0 { + // Distinguish between "not found" and "would exceed max duration" + var exists bool + _ = r.db.QueryRow( + `SELECT EXISTS(SELECT 1 FROM support_sessions WHERE id = $1 AND status IN ('pending', 'active'))`, + sessionID, + ).Scan(&exists) + if exists { + return fmt.Errorf("extension would exceed maximum session duration of %d days", maxSessionDuration/24) + } + return fmt.Errorf("session not found or not extendable") + } + return nil +} + +// CloseSession force-closes a session +func (r *SupportRepository) CloseSession(sessionID string) error { + result, err := r.db.Exec( + `UPDATE support_sessions + SET status = 'closed', closed_at = NOW(), closed_by = 'operator', updated_at = NOW() + WHERE id = $1 AND status IN ('pending', 'active')`, + sessionID, + ) + if err != nil { + return fmt.Errorf("failed to close session: %w", err) + } + + rows, _ := result.RowsAffected() + if rows == 0 { + return fmt.Errorf("session not found or already closed") + } + return nil +} + +// InsertAccessLog inserts a new access log entry +func (r *SupportRepository) InsertAccessLog(sessionID, operatorID, operatorName, accessType, metadata string) error { + _, err := r.db.Exec( + `INSERT INTO support_access_logs (session_id, operator_id, operator_name, access_type, connected_at, metadata) + VALUES ($1, $2, $3, $4, NOW(), $5)`, + sessionID, operatorID, operatorName, accessType, metadata, + ) + if err != nil { + return fmt.Errorf("failed to insert access log: %w", err) + } + return nil +} + +// GetSessionTokenByID returns the session_token for internal service communication. +// Unlike GetSessionByID, this does NOT strip the token. +func (r *SupportRepository) GetSessionTokenByID(sessionID string) (string, error) { + var token string + err := r.db.QueryRow( + `SELECT session_token FROM support_sessions WHERE id = $1 AND status IN ('pending', 'active')`, + sessionID, + ).Scan(&token) + if err != nil { + return "", fmt.Errorf("session not found or not active: %w", err) + } + return token, nil +} + +// GetAccessLogs returns access logs for a session +func (r *SupportRepository) GetAccessLogs(sessionID string, page, pageSize int) ([]models.SupportAccessLog, int, error) { + var totalCount int + err := r.db.QueryRow( + `SELECT COUNT(*) FROM support_access_logs WHERE session_id = $1`, + sessionID, + ).Scan(&totalCount) + if err != nil { + return nil, 0, fmt.Errorf("failed to count access logs: %w", err) + } + + offset := (page - 1) * pageSize + rows, err := r.db.Query( + `SELECT id, session_id, operator_id, operator_name, access_type, + connected_at, disconnected_at, metadata + FROM support_access_logs + WHERE session_id = $1 + ORDER BY connected_at DESC + LIMIT $2 OFFSET $3`, + sessionID, pageSize, offset, + ) + if err != nil { + return nil, 0, fmt.Errorf("failed to query access logs: %w", err) + } + defer func() { _ = rows.Close() }() + + var logs []models.SupportAccessLog + for rows.Next() { + var log models.SupportAccessLog + var operatorName sql.NullString + var disconnectedAt sql.NullTime + var metadata sql.NullString + + err := rows.Scan( + &log.ID, &log.SessionID, &log.OperatorID, &operatorName, + &log.AccessType, &log.ConnectedAt, &disconnectedAt, &metadata, + ) + if err != nil { + return nil, 0, fmt.Errorf("failed to scan access log: %w", err) + } + + if operatorName.Valid { + log.OperatorName = &operatorName.String + } + if disconnectedAt.Valid { + log.DisconnectedAt = &disconnectedAt.Time + } + if metadata.Valid { + log.Metadata = &metadata.String + } + + logs = append(logs, log) + } + + return logs, totalCount, rows.Err() +} diff --git a/backend/go.mod b/backend/go.mod index 56293094..c2f4ae41 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -34,6 +34,7 @@ require ( github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/goccy/go-json v0.10.5 // indirect + github.com/gorilla/websocket v1.5.3 // indirect github.com/hhrutter/lzw v1.0.0 // indirect github.com/hhrutter/tiff v1.0.1 // indirect github.com/johnfercher/go-tree v1.0.5 // indirect diff --git a/backend/go.sum b/backend/go.sum index e14b2473..6afeddec 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -54,6 +54,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0= github.com/hhrutter/lzw v1.0.0/go.mod h1:2HC6DJSn/n6iAZfgM3Pg+cP1KxeWc3ezG8bBqW5+WEo= github.com/hhrutter/tiff v1.0.1 h1:MIus8caHU5U6823gx7C6jrfoEvfSTGtEFRiM8/LOzC0= diff --git a/backend/jwt/jwt.go b/backend/jwt/jwt.go index d1c2e6a0..ae30ab86 100644 --- a/backend/jwt/jwt.go +++ b/backend/jwt/jwt.go @@ -51,6 +51,112 @@ type ImpersonationClaims struct { jwt.RegisteredClaims } +// ProxyTokenClaims represents the claims for support proxy tokens +type ProxyTokenClaims struct { + TokenType string `json:"token_type"` + SessionID string `json:"session_id"` + ServiceName string `json:"service_name"` + UserID string `json:"user_id"` + jwt.RegisteredClaims +} + +// GenerateProxyToken creates a short-lived JWT for subdomain-based support proxy access +func GenerateProxyToken(sessionID, serviceName, userID string) (string, error) { + expDuration := 8 * time.Hour + + claims := ProxyTokenClaims{ + TokenType: "proxy", + SessionID: sessionID, + ServiceName: serviceName, + UserID: userID, + RegisteredClaims: jwt.RegisteredClaims{ + Issuer: configuration.Config.JWTIssuer, + Subject: userID, + Audience: jwt.ClaimStrings{configuration.Config.LogtoAudience}, + ExpiresAt: jwt.NewNumericDate(time.Now().Add(expDuration)), + IssuedAt: jwt.NewNumericDate(time.Now()), + NotBefore: jwt.NewNumericDate(time.Now()), + }, + } + + token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) + + tokenString, err := token.SignedString([]byte(configuration.Config.JWTSecret)) + if err != nil { + logger.ComponentLogger("jwt").Error(). + Err(err). + Str("operation", "proxy_token_sign_failed"). + Str("session_id", sessionID). + Str("service_name", serviceName). + Str("user_id", userID). + Msg("Failed to sign proxy token") + return "", fmt.Errorf("failed to sign proxy token: %w", err) + } + + logger.ComponentLogger("jwt").Info(). + Str("operation", "proxy_token_generated"). + Str("session_id", sessionID). + Str("service_name", serviceName). + Str("user_id", userID). + Time("expires_at", time.Now().Add(expDuration)). + Msg("Proxy token generated successfully") + + return tokenString, nil +} + +// ValidateProxyToken parses and validates a support proxy JWT token +func ValidateProxyToken(tokenString string) (*ProxyTokenClaims, error) { + token, err := jwt.ParseWithClaims(tokenString, &ProxyTokenClaims{}, func(token *jwt.Token) (interface{}, error) { + if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { + return nil, fmt.Errorf("unexpected signing method: %v", token.Header["alg"]) + } + return []byte(configuration.Config.JWTSecret), nil + }) + + if err != nil { + logger.ComponentLogger("jwt").Warn(). + Err(err). + Str("operation", "proxy_token_validation_failed"). + Str("error_type", "parse_failed"). + Msg("Failed to parse proxy token") + return nil, fmt.Errorf("failed to parse proxy token: %w", err) + } + + if claims, ok := token.Claims.(*ProxyTokenClaims); ok && token.Valid { + if claims.TokenType != "proxy" { + logger.ComponentLogger("jwt").Warn(). + Str("operation", "proxy_token_validation_failed"). + Str("error_type", "wrong_token_type"). + Str("token_type", claims.TokenType). + Msg("token is not a proxy token") + return nil, fmt.Errorf("token is not a proxy token") + } + if claims.SessionID == "" || claims.ServiceName == "" { + logger.ComponentLogger("jwt").Warn(). + Str("operation", "proxy_token_validation_failed"). + Str("error_type", "missing_claims"). + Msg("Proxy token missing required claims") + return nil, fmt.Errorf("proxy token missing required claims") + } + + logger.ComponentLogger("jwt").Debug(). + Str("operation", "proxy_token_validation_success"). + Str("session_id", claims.SessionID). + Str("service_name", claims.ServiceName). + Str("user_id", claims.UserID). + Msg("Proxy token validated successfully") + return claims, nil + } + + logger.ComponentLogger("jwt").Warn(). + Str("operation", "proxy_token_validation_failed"). + Str("error_type", "invalid_claims"). + Bool("token_valid", token.Valid). + Msg("Invalid proxy token claims") + + return nil, fmt.Errorf("invalid proxy token claims") +} + // GenerateCustomToken creates a JWT token with user information and permissions func GenerateCustomToken(user models.User) (string, error) { // Parse expiration duration diff --git a/backend/main.go b/backend/main.go index dd8ad13d..9309070d 100644 --- a/backend/main.go +++ b/backend/main.go @@ -14,6 +14,7 @@ import ( "net/http" "os" "os/signal" + "strings" "syscall" "time" @@ -113,17 +114,35 @@ func main() { return ids } - // Init router - router := gin.Default() + // Init router (gin.New without default logger to avoid raw query params in logs) + router := gin.New() + router.Use(gin.Recovery()) - // Add request logging middleware + // Add request logging middleware (sanitizes sensitive query params) router.Use(logger.GinLogger()) // Add security monitoring middleware router.Use(logger.SecurityMiddleware()) - // Add compression - router.Use(gzip.Gzip(gzip.DefaultCompression)) + // Dev mode: rewrite subdomain requests to /support-proxy prefix + // In production, nginx handles this rewrite + // IMPORTANT: this must run BEFORE gzip middleware so that c.Abort() prevents + // the outer gzip wrapper from double-compressing proxied responses + if configuration.Config.SupportProxyDomain != "" { + router.Use(func(c *gin.Context) { + host := c.Request.Host + if strings.Contains(host, ".support.") && !strings.HasPrefix(c.Request.URL.Path, "/support-proxy") { + c.Request.URL.Path = "/support-proxy" + c.Request.URL.Path + router.HandleContext(c) + c.Abort() + return + } + c.Next() + }) + } + + // Add compression (exclude WebSocket terminal endpoint and support proxy) + router.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedPathsRegexs([]string{".*/terminal$", ".*/support-proxy/.*"}))) // CORS configuration in debug mode if gin.Mode() == gin.DebugMode { @@ -446,6 +465,26 @@ func main() { rebrandingGroup.GET("/:org_id/products/:product_id/:asset", methods.GetRebrandingAsset) } + // =========================================== + // SUPPORT SESSIONS - connect:systems permission required + // =========================================== + supportGroup := customAuthWithAudit.Group("/support-sessions", middleware.RequirePermission("connect:systems")) + { + supportGroup.GET("", methods.GetSupportSessions) + supportGroup.GET("/:id", methods.GetSupportSession) + supportGroup.PATCH("/:id/extend", methods.ExtendSupportSession) + supportGroup.DELETE("/:id", methods.CloseSupportSession) + supportGroup.GET("/:id/logs", methods.GetSupportSessionLogs) + supportGroup.GET("/:id/services", methods.GetSupportSessionServices) + supportGroup.POST("/:id/terminal-ticket", methods.GenerateTerminalTicket) + supportGroup.Any("/:id/proxy/:service/*path", methods.ProxySupportSession) + supportGroup.POST("/:id/proxy-token", methods.GenerateSupportProxyToken) + } + + // Terminal WebSocket endpoint - uses one-time ticket auth (not JWT) + // to avoid exposing the long-lived JWT in URLs and server logs + api.GET("/support-sessions/:id/terminal", methods.GetSupportSessionTerminal) + // =========================================== // METADATA - roles, organizations, third-party apps // =========================================== @@ -464,6 +503,14 @@ func main() { } + // =========================================== + // SUPPORT SUBDOMAIN PROXY (no JWT - uses proxy token auth) + // =========================================== + supportProxy := router.Group("/support-proxy") + { + supportProxy.Any("/*path", methods.SubdomainProxy) + } + // Handle missing endpoints router.NoRoute(func(c *gin.Context) { c.JSON(http.StatusNotFound, response.NotFound("api not found", nil)) diff --git a/backend/methods/support.go b/backend/methods/support.go new file mode 100644 index 00000000..f27a428b --- /dev/null +++ b/backend/methods/support.go @@ -0,0 +1,158 @@ +/* +Copyright (C) 2026 Nethesis S.r.l. +SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +package methods + +import ( + "encoding/json" + "net/http" + + "github.com/gin-gonic/gin" + "github.com/gin-gonic/gin/binding" + + "github.com/nethesis/my/backend/cache" + "github.com/nethesis/my/backend/entities" + "github.com/nethesis/my/backend/helpers" + "github.com/nethesis/my/backend/logger" + "github.com/nethesis/my/backend/models" + "github.com/nethesis/my/backend/response" +) + +// GetSupportSessions handles GET /api/support-sessions +// Returns support sessions grouped by system with server-side pagination. +func GetSupportSessions(c *gin.Context) { + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + page, pageSize, sortBy, sortDirection := helpers.GetPaginationAndSortingFromQuery(c) + + status := c.Query("status") + systemID := c.Query("system_id") + + repo := entities.NewSupportRepository() + groups, totalCount, err := repo.GetSystemSessions( + userOrgRole, userOrgID, page, pageSize, status, systemID, sortBy, sortDirection, + ) + if err != nil { + logger.Error().Err(err).Msg("failed to retrieve support sessions") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to retrieve support sessions", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("support sessions retrieved successfully", gin.H{ + "support_sessions": helpers.EnsureSlice(groups), + "pagination": helpers.BuildPaginationInfoWithSorting(page, pageSize, totalCount, sortBy, sortDirection), + })) +} + +// GetSupportSession handles GET /api/support-sessions/:id +func GetSupportSession(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + repo := entities.NewSupportRepository() + session, err := repo.GetSessionByID(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return + } + if session == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("support session retrieved successfully", session)) +} + +// ExtendSupportSession handles PATCH /api/support-sessions/:id/extend +func ExtendSupportSession(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + var request models.ExtendSessionRequest + if err := c.ShouldBindBodyWith(&request, binding.JSON); err != nil { + c.JSON(http.StatusBadRequest, response.ValidationBadRequestMultiple(err)) + return + } + + repo := entities.NewSupportRepository() + if err := repo.ExtendSession(sessionID, request.Hours); err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to extend support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to extend support session", nil)) + return + } + + logger.LogBusinessOperation(c, "support", "extend", "session", sessionID, true, nil) + + c.JSON(http.StatusOK, response.OK("support session extended successfully", gin.H{ + "session_id": sessionID, + "extended_by_hours": request.Hours, + })) +} + +// CloseSupportSession handles DELETE /api/support-sessions/:id +func CloseSupportSession(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + repo := entities.NewSupportRepository() + if err := repo.CloseSession(sessionID); err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to close support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to close support session", nil)) + return + } + + // Notify support service via Redis pub/sub to disconnect the tunnel + if redisClient := cache.GetRedisClient(); redisClient != nil { + cmd := map[string]string{ + "action": "close", + "session_id": sessionID, + } + payload, _ := json.Marshal(cmd) + if err := redisClient.Publish("support:commands", string(payload)); err != nil { + logger.Warn().Err(err).Str("session_id", sessionID).Msg("failed to publish close command to support service") + } + } + + logger.LogBusinessOperation(c, "support", "close", "session", sessionID, true, nil) + + c.JSON(http.StatusOK, response.OK("support session closed successfully", gin.H{ + "session_id": sessionID, + })) +} + +// GetSupportSessionLogs handles GET /api/support-sessions/:id/logs +func GetSupportSessionLogs(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + page, pageSize, _, _ := helpers.GetPaginationAndSortingFromQuery(c) + + repo := entities.NewSupportRepository() + logs, totalCount, err := repo.GetAccessLogs(sessionID, page, pageSize) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get access logs") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get access logs", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("access logs retrieved successfully", gin.H{ + "access_logs": helpers.EnsureSlice(logs), + "pagination": helpers.BuildPaginationInfoWithSorting(page, pageSize, totalCount, "connected_at", "desc"), + })) +} diff --git a/backend/methods/support_proxy.go b/backend/methods/support_proxy.go new file mode 100644 index 00000000..dcc3c41d --- /dev/null +++ b/backend/methods/support_proxy.go @@ -0,0 +1,670 @@ +/* +Copyright (C) 2026 Nethesis S.r.l. +SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +package methods + +import ( + "bufio" + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "net/http/httputil" + "net/url" + "strings" + "sync" + + "github.com/gin-gonic/gin" + + "github.com/nethesis/my/backend/cache" + "github.com/nethesis/my/backend/configuration" + "github.com/nethesis/my/backend/entities" + "github.com/nethesis/my/backend/helpers" + customjwt "github.com/nethesis/my/backend/jwt" + "github.com/nethesis/my/backend/logger" + "github.com/nethesis/my/backend/models" + "github.com/nethesis/my/backend/response" +) + +// internalTransport is a shared HTTP transport for internal service communication +var internalTransport = &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // Internal service communication + }, +} + +// internalClient is a shared HTTP client for internal service communication +var internalClient = &http.Client{Transport: internalTransport} + +// sessionTokenTransport wraps an http.RoundTripper to inject the per-session +// X-Session-Token header and strip browser headers (Origin, Referer) that would +// trigger the support service's CORS middleware. (#3/#4) +type sessionTokenTransport struct { + inner http.RoundTripper + sessionToken string +} + +func (t *sessionTokenTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if t.sessionToken != "" { + req.Header.Set("X-Session-Token", t.sessionToken) + } + // Remove browser headers that would trigger CORS on the support service + req.Header.Del("Origin") + req.Header.Del("Referer") + return t.inner.RoundTrip(req) +} + +// getActiveSession validates that a session exists, is accessible by the user, and is active. +// Returns the session or writes an error response and returns nil. +func getActiveSession(c *gin.Context, sessionID string) *models.SupportSession { + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + repo := entities.NewSupportRepository() + session, err := repo.GetSessionByID(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return nil + } + if session == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return nil + } + + if session.Status != "active" { + c.JSON(http.StatusBadRequest, response.BadRequest("support session is not active", nil)) + return nil + } + + return session +} + +// getSessionToken retrieves the session token for internal service authentication (#3/#4) +func getSessionToken(c *gin.Context, sessionID string) string { + repo := entities.NewSupportRepository() + token, err := repo.GetSessionTokenByID(sessionID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get session token") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get session token", nil)) + return "" + } + return token +} + +// logAccess inserts an access log entry for the current user +func logAccess(c *gin.Context, sessionID, accessType, metadata string) { + userID, _, _, _ := helpers.GetUserContextExtended(c) + userName := "" + if name, exists := c.Get("name"); exists { + userName, _ = name.(string) + } + // Wrap metadata as JSON object for the jsonb column + metaBytes, _ := json.Marshal(map[string]string{"service": metadata}) + jsonMetadata := string(metaBytes) + repo := entities.NewSupportRepository() + if err := repo.InsertAccessLog(sessionID, userID, userName, accessType, jsonMetadata); err != nil { + logger.Warn().Err(err).Str("session_id", sessionID).Msg("failed to insert access log") + } +} + +// GenerateTerminalTicket handles POST /api/support-sessions/:id/terminal-ticket +// Generates a one-time, short-lived ticket for WebSocket terminal authentication. +// The client exchanges its JWT (sent securely in the Authorization header) for a +// ticket that can be passed as a query parameter when opening the WebSocket. +func GenerateTerminalTicket(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + if session := getActiveSession(c, sessionID); session == nil { + return + } + + userID, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + userLogtoID := "" + if v, ok := c.Get("user_logto_id"); ok { + if s, ok := v.(string); ok { + userLogtoID = s + } + } + username := "" + if v, ok := c.Get("username"); ok { + if s, ok := v.(string); ok { + username = s + } + } + userName := "" + if v, ok := c.Get("name"); ok { + if s, ok := v.(string); ok { + userName = s + } + } + + ticket := &cache.TerminalTicket{ + SessionID: sessionID, + UserID: userID, + UserLogtoID: userLogtoID, + Username: username, + Name: userName, + OrgRole: userOrgRole, + OrganizationID: userOrgID, + } + + ticketID, err := cache.GenerateTerminalTicket(ticket) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to generate terminal ticket") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to generate terminal ticket", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("terminal ticket generated", gin.H{ + "ticket": ticketID, + })) +} + +// GetSupportSessionTerminal handles GET /api/support-sessions/:id/terminal (WebSocket) +// Authenticates using a one-time ticket (from ?ticket= query param) instead of a JWT, +// so the long-lived JWT is never exposed in URLs or server logs. +// Uses raw TCP hijacking to bridge the browser WebSocket to the support service, +// bypassing httputil.ReverseProxy which can conflict with Gin's response writer. +func GetSupportSessionTerminal(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + // Validate one-time ticket + ticketID := c.Query("ticket") + if ticketID == "" { + c.JSON(http.StatusUnauthorized, response.Unauthorized("ticket required", nil)) + return + } + + ticket, err := cache.ConsumeTerminalTicket(ticketID) + if err != nil { + logger.Error().Err(err).Msg("failed to consume terminal ticket") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to validate ticket", nil)) + return + } + if ticket == nil { + c.JSON(http.StatusUnauthorized, response.Unauthorized("invalid or expired ticket", nil)) + return + } + + // Verify ticket is for this session + if ticket.SessionID != sessionID { + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "ticket does not match session", nil)) + return + } + + // Verify session is active (using ticket's org context) + repo := entities.NewSupportRepository() + session, repoErr := repo.GetSessionByID(sessionID, ticket.OrgRole, ticket.OrganizationID) + if repoErr != nil { + logger.Error().Err(repoErr).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return + } + if session == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + if session.Status != "active" { + c.JSON(http.StatusBadRequest, response.BadRequest("support session is not active", nil)) + return + } + + sessionToken := getSessionToken(c, sessionID) + if sessionToken == "" { + return + } + + // Log access using ticket's user context + userName := ticket.Name + metaBytes, _ := json.Marshal(map[string]string{"service": "terminal"}) + jsonMetadata := string(metaBytes) + if logErr := repo.InsertAccessLog(sessionID, ticket.UserID, userName, "web_terminal", jsonMetadata); logErr != nil { + logger.Warn().Err(logErr).Str("session_id", sessionID).Msg("failed to insert access log") + } + + targetURL := fmt.Sprintf("%s/api/terminal/%s", configuration.Config.SupportServiceURL, sessionID) + target, err := url.Parse(targetURL) + if err != nil { + c.JSON(http.StatusInternalServerError, response.InternalServerError("invalid proxy target", nil)) + return + } + + // Connect to the support service + upstreamConn, err := net.Dial("tcp", target.Host) + if err != nil { + logger.Error().Err(err).Str("target", target.Host).Msg("failed to connect to support service") + c.JSON(http.StatusBadGateway, response.Error(http.StatusBadGateway, "support service unavailable", nil)) + return + } + + // Build the upstream HTTP request with WebSocket upgrade headers + upReq, err := http.NewRequest(http.MethodGet, targetURL, nil) + if err != nil { + _ = upstreamConn.Close() + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to create upstream request", nil)) + return + } + // Copy WebSocket handshake headers from the browser request + for _, h := range []string{ + "Upgrade", "Connection", + "Sec-WebSocket-Key", "Sec-WebSocket-Version", + "Sec-WebSocket-Extensions", "Sec-WebSocket-Protocol", + } { + if v := c.GetHeader(h); v != "" { + upReq.Header.Set(h, v) + } + } + upReq.Host = target.Host + upReq.Header.Set("X-Session-Token", sessionToken) + + // Send the request to the support service + if writeErr := upReq.Write(upstreamConn); writeErr != nil { + _ = upstreamConn.Close() + logger.Error().Err(writeErr).Msg("failed to write upstream request") + c.JSON(http.StatusBadGateway, response.Error(http.StatusBadGateway, "support service unavailable", nil)) + return + } + + // Read the response from the support service + upBuf := bufio.NewReader(upstreamConn) + upResp, err := http.ReadResponse(upBuf, upReq) + if err != nil { + _ = upstreamConn.Close() + logger.Error().Err(err).Msg("failed to read upstream response") + c.JSON(http.StatusBadGateway, response.Error(http.StatusBadGateway, "support service unavailable", nil)) + return + } + + if upResp.StatusCode != http.StatusSwitchingProtocols { + // Forward the error response body to the client + defer func() { _ = upResp.Body.Close() }() + _ = upstreamConn.Close() + for key, values := range upResp.Header { + for _, value := range values { + c.Writer.Header().Add(key, value) + } + } + c.Writer.WriteHeader(upResp.StatusCode) + _, _ = io.Copy(c.Writer, upResp.Body) + return + } + + // Hijack the client connection from Gin + hijacker, ok := c.Writer.(http.Hijacker) + if !ok { + _ = upstreamConn.Close() + c.JSON(http.StatusInternalServerError, response.InternalServerError("websocket hijack not supported", nil)) + return + } + clientConn, clientBuf, err := hijacker.Hijack() + if err != nil { + _ = upstreamConn.Close() + logger.Error().Err(err).Msg("failed to hijack client connection") + return + } + + // Forward the 101 Switching Protocols response to the browser + if writeErr := upResp.Write(clientConn); writeErr != nil { + _ = clientConn.Close() + _ = upstreamConn.Close() + return + } + _ = clientBuf.Flush() + + // Bridge both connections bidirectionally + var once sync.Once + done := make(chan struct{}) + closeBoth := func() { + once.Do(func() { + close(done) + _ = clientConn.Close() + _ = upstreamConn.Close() + }) + } + + go func() { + defer closeBoth() + _, _ = io.Copy(upstreamConn, clientConn) + }() + go func() { + defer closeBoth() + // Drain any buffered data from the upstream reader first + if upBuf.Buffered() > 0 { + _, _ = io.CopyN(clientConn, upBuf, int64(upBuf.Buffered())) + } + _, _ = io.Copy(clientConn, upstreamConn) + }() + + <-done + logger.Info().Str("session_id", sessionID).Msg("terminal session ended") +} + +// GetSupportSessionServices handles GET /api/support-sessions/:id/services +func GetSupportSessionServices(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + repo := entities.NewSupportRepository() + session, err := repo.GetSessionByID(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return + } + if session == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + + sessionToken := getSessionToken(c, sessionID) + if sessionToken == "" { + return + } + + targetURL := fmt.Sprintf("%s/api/proxy/%s/services", configuration.Config.SupportServiceURL, sessionID) + proxyGetWithTokenOrEmpty(c, targetURL, sessionToken) +} + +// ProxySupportSession handles ANY /api/support-sessions/:id/proxy/:service/*path +func ProxySupportSession(c *gin.Context) { + sessionID := c.Param("id") + serviceName := c.Param("service") + path := c.Param("path") + if path == "" { + path = "/" + } + + if sessionID == "" || serviceName == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id and service name required", nil)) + return + } + + if session := getActiveSession(c, sessionID); session == nil { + return + } + + sessionToken := getSessionToken(c, sessionID) + if sessionToken == "" { + return + } + + logAccess(c, sessionID, "ui_proxy", serviceName) + + targetURL := fmt.Sprintf("%s/api/proxy/%s/%s%s", configuration.Config.SupportServiceURL, sessionID, serviceName, path) + target, err := url.Parse(targetURL) + if err != nil { + c.JSON(http.StatusInternalServerError, response.InternalServerError("invalid proxy target", nil)) + return + } + + proxy := &httputil.ReverseProxy{ + Director: func(req *http.Request) { + req.URL = target + req.URL.RawQuery = c.Request.URL.RawQuery + req.Host = target.Host + req.Header.Del("Authorization") + }, + Transport: &sessionTokenTransport{inner: internalTransport, sessionToken: sessionToken}, + } + + proxy.ServeHTTP(c.Writer, c.Request) +} + +// GenerateSupportProxyToken handles POST /api/support-sessions/:id/proxy-token +func GenerateSupportProxyToken(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + if configuration.Config.SupportProxyDomain == "" { + c.JSON(http.StatusServiceUnavailable, response.Error(http.StatusServiceUnavailable, "subdomain proxy is not configured", nil)) + return + } + + var req struct { + Service string `json:"service" binding:"required"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, response.BadRequest("service name required", nil)) + return + } + + if session := getActiveSession(c, sessionID); session == nil { + return + } + + userID, _, _, _ := helpers.GetUserContextExtended(c) + token, err := customjwt.GenerateProxyToken(sessionID, req.Service, userID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to generate proxy token") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to generate proxy token", nil)) + return + } + + // Build subdomain URL: {service}--{session_id[:12]}.support.{domain} + shortID := sessionID + if len(shortID) > 12 { + shortID = shortID[:12] + } + subdomain := fmt.Sprintf("%s--%s.support.%s", req.Service, shortID, configuration.Config.SupportProxyDomain) + proxyURL := fmt.Sprintf("https://%s/", subdomain) + + logAccess(c, sessionID, "ui_proxy", req.Service) + + c.JSON(http.StatusOK, response.OK("proxy token generated", gin.H{ + "url": proxyURL, + "token": token, + })) +} + +// SubdomainProxy handles all requests on /support-proxy/*path for subdomain-based proxying +func SubdomainProxy(c *gin.Context) { + path := c.Param("path") + if path == "" { + path = "/" + } + + // Extract service name and session short ID from subdomain. + // Format: {service}--{session_short}.support.{domain} + forwardedHost := c.GetHeader("X-Forwarded-Host") + if forwardedHost == "" { + forwardedHost = c.Request.Host + } + hostOnly := forwardedHost + if h, _, splitErr := net.SplitHostPort(forwardedHost); splitErr == nil { + hostOnly = h + } + + var serviceName, sessionShort string + if parts := strings.SplitN(hostOnly, ".support.", 2); len(parts) == 2 { + if subParts := strings.SplitN(parts[0], "--", 2); len(subParts) == 2 { + serviceName = subParts[0] + sessionShort = subParts[1] + } + } + + if serviceName == "" || sessionShort == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("invalid support proxy subdomain", nil)) + return + } + + // Prefer query param token (fresh from the UI) over cookie (may be stale + // from a previous session — the cookie domain covers all support subdomains). + tokenString := c.Query("token") + fromQueryParam := tokenString != "" + if tokenString == "" { + tokenString, _ = c.Cookie("support_proxy") + } + + if tokenString == "" { + c.JSON(http.StatusUnauthorized, response.Unauthorized("proxy token required", nil)) + return + } + + // Validate proxy token + claims, err := customjwt.ValidateProxyToken(tokenString) + if err != nil { + c.JSON(http.StatusUnauthorized, response.Unauthorized("invalid proxy token", nil)) + return + } + + // Validate that the token's service name matches the subdomain service + if claims.ServiceName != serviceName { + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "proxy token is not valid for this service", nil)) + return + } + + // Validate that the token's session ID matches the subdomain short ID + if !strings.HasPrefix(claims.SessionID, sessionShort) { + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "proxy token does not match this session", nil)) + return + } + + sessionID := claims.SessionID + + // If token came from query param, set cookie and redirect to same path without token + if fromQueryParam { + secureCookie := !strings.HasPrefix(configuration.Config.AppURL, "http://") + c.SetCookie("support_proxy", tokenString, 8*60*60, "/", hostOnly, secureCookie, true) + + redirectPath := path + q := c.Request.URL.Query() + q.Del("token") + if encoded := q.Encode(); encoded != "" { + redirectPath = redirectPath + "?" + encoded + } + c.Redirect(http.StatusFound, redirectPath) + return + } + + // Verify session is still active + repo := entities.NewSupportRepository() + session, err := repo.GetSessionByID(sessionID, "owner", "") + if err != nil || session == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + if session.Status != "active" { + c.JSON(http.StatusBadRequest, response.BadRequest("support session is not active", nil)) + return + } + + // Get session token for internal auth (#3/#4) + sessionToken, tokenErr := repo.GetSessionTokenByID(sessionID) + if tokenErr != nil { + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get session token", nil)) + return + } + + // Build target URL for support service + targetURL := fmt.Sprintf("%s/api/proxy/%s/%s%s", configuration.Config.SupportServiceURL, sessionID, serviceName, path) + target, parseErr := url.Parse(targetURL) + if parseErr != nil { + c.JSON(http.StatusInternalServerError, response.InternalServerError("invalid proxy target", nil)) + return + } + + // Pass the browser's proxy hostname so the support service can rewrite + // hardcoded hostnames in responses + proxyHost := c.GetHeader("X-Forwarded-Host") + if proxyHost == "" { + proxyHost = c.Request.Host + } + + proxy := &httputil.ReverseProxy{ + Director: func(req *http.Request) { + req.URL = target + req.URL.RawQuery = c.Request.URL.RawQuery + req.Host = target.Host + req.Header.Set("X-Proxy-Host", proxyHost) + filterSupportProxyCookie(req) + }, + ModifyResponse: func(resp *http.Response) error { + // Replace upstream security headers with proxy-appropriate values + resp.Header.Del("X-Frame-Options") + resp.Header.Set("Content-Security-Policy", "frame-ancestors 'self'") + + // Strip upstream CORS headers to avoid duplicates + resp.Header.Del("Access-Control-Allow-Origin") + resp.Header.Del("Access-Control-Allow-Credentials") + resp.Header.Del("Access-Control-Allow-Headers") + resp.Header.Del("Access-Control-Allow-Methods") + return nil + }, + Transport: &sessionTokenTransport{inner: &http.Transport{ + DisableCompression: true, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // Internal service communication + }, + }, sessionToken: sessionToken}, + } + + proxy.ServeHTTP(c.Writer, c.Request) +} + +// filterSupportProxyCookie removes the support_proxy cookie from the request +// while preserving all other cookies and headers (including Authorization) +func filterSupportProxyCookie(req *http.Request) { + cookies := req.Cookies() + req.Header.Del("Cookie") + for _, c := range cookies { + if c.Name != "support_proxy" { + req.AddCookie(c) + } + } +} + +// proxyGetWithTokenOrEmpty proxies a GET request to the support service. +// If the support service returns 404 (e.g., tunnel disconnected but session +// still marked active), it returns an empty services list instead of propagating +// the 404, avoiding noisy errors in the frontend during the cleanup window. +func proxyGetWithTokenOrEmpty(c *gin.Context, targetURL, sessionToken string) { + req, reqErr := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, targetURL, nil) + if reqErr != nil { + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to create proxy request", nil)) + return + } + req.Header.Set("X-Session-Token", sessionToken) + + resp, err := internalClient.Do(req) + if err != nil { + logger.Error().Err(err).Str("url", targetURL).Msg("failed to proxy request to support service") + c.JSON(http.StatusBadGateway, response.Error(http.StatusBadGateway, "support service unavailable", nil)) + return + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode == http.StatusNotFound { + c.JSON(http.StatusOK, response.OK("services retrieved successfully", gin.H{ + "services": []any{}, + })) + return + } + + for key, values := range resp.Header { + for _, value := range values { + c.Writer.Header().Add(key, value) + } + } + + c.Writer.WriteHeader(resp.StatusCode) + _, _ = io.Copy(c.Writer, resp.Body) +} diff --git a/backend/middleware/rbac.go b/backend/middleware/rbac.go index 5210d9ef..4e4a1357 100644 --- a/backend/middleware/rbac.go +++ b/backend/middleware/rbac.go @@ -11,6 +11,7 @@ package middleware import ( "net/http" + "strings" "github.com/gin-gonic/gin" "github.com/nethesis/my/backend/helpers" @@ -120,7 +121,7 @@ func RequireOrgRole(role string) gin.HandlerFunc { return } - if user.OrgRole != role { + if !strings.EqualFold(user.OrgRole, role) { logger.RequestLogger(c, "rbac").Warn(). Str("operation", "org_role_denied"). Str("required_org_role", role). diff --git a/backend/models/support.go b/backend/models/support.go new file mode 100644 index 00000000..ed6f1971 --- /dev/null +++ b/backend/models/support.go @@ -0,0 +1,70 @@ +/* +Copyright (C) 2026 Nethesis S.r.l. +SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +package models + +import "time" + +// SupportSession represents a support tunnel session +type SupportSession struct { + ID string `json:"id"` + SystemID string `json:"system_id"` + NodeID *string `json:"node_id,omitempty"` + SessionToken string `json:"session_token,omitempty"` + StartedAt time.Time `json:"started_at"` + ExpiresAt time.Time `json:"expires_at"` + Status string `json:"status"` + ClosedAt *time.Time `json:"closed_at,omitempty"` + ClosedBy *string `json:"closed_by,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + + // Joined system info (populated in list/detail queries) + SystemName string `json:"system_name,omitempty"` + SystemType *string `json:"system_type,omitempty"` + SystemKey string `json:"system_key,omitempty"` + Organization *Organization `json:"organization,omitempty"` +} + +// SupportAccessLog represents an operator's access to a support session +type SupportAccessLog struct { + ID string `json:"id"` + SessionID string `json:"session_id"` + OperatorID string `json:"operator_id"` + OperatorName *string `json:"operator_name,omitempty"` + AccessType string `json:"access_type"` + ConnectedAt time.Time `json:"connected_at"` + DisconnectedAt *time.Time `json:"disconnected_at,omitempty"` + Metadata *string `json:"metadata,omitempty"` +} + +// SystemSessionGroup represents a system with its aggregated support session info +type SystemSessionGroup struct { + SystemID string `json:"system_id"` + SystemName string `json:"system_name"` + SystemType *string `json:"system_type,omitempty"` + SystemKey string `json:"system_key"` + Organization *Organization `json:"organization,omitempty"` + StartedAt time.Time `json:"started_at"` + ExpiresAt time.Time `json:"expires_at"` + Status string `json:"status"` + SessionCount int `json:"session_count"` + NodeCount int `json:"node_count"` + Sessions []SessionRef `json:"sessions"` +} + +// SessionRef is a lightweight reference to an individual session within a group +type SessionRef struct { + ID string `json:"id"` + NodeID *string `json:"node_id,omitempty"` + Status string `json:"status"` + StartedAt time.Time `json:"started_at"` + ExpiresAt time.Time `json:"expires_at"` +} + +// ExtendSessionRequest represents a request to extend a session +type ExtendSessionRequest struct { + Hours int `json:"hours" binding:"required,min=1,max=168"` +} diff --git a/backend/openapi.yaml b/backend/openapi.yaml index 45efea9e..a2cdc8f2 100644 --- a/backend/openapi.yaml +++ b/backend/openapi.yaml @@ -77,6 +77,9 @@ tags: - name: Backend - Health description: Backend service health check + - name: Backend - Support Sessions + description: Backend support session management + - name: Collect - Health description: Collect service health and monitoring - name: Collect - Systems @@ -443,6 +446,147 @@ components: description: Sort direction example: "asc" + SupportSession: + type: object + properties: + id: + type: string + format: uuid + system_id: + type: string + node_id: + type: string + nullable: true + started_at: + type: string + format: date-time + expires_at: + type: string + format: date-time + status: + type: string + enum: [pending, active, expired, closed] + closed_at: + type: string + format: date-time + nullable: true + closed_by: + type: string + nullable: true + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + system_name: + type: string + system_type: + type: string + nullable: true + system_key: + type: string + organization: + type: object + properties: + id: + type: string + logto_id: + type: string + name: + type: string + type: + type: string + + SupportAccessLog: + type: object + properties: + id: + type: string + format: uuid + session_id: + type: string + format: uuid + operator_id: + type: string + operator_name: + type: string + nullable: true + access_type: + type: string + enum: [view, ssh, web_terminal, ui_proxy] + connected_at: + type: string + format: date-time + disconnected_at: + type: string + format: date-time + nullable: true + metadata: + type: string + nullable: true + description: JSON-encoded metadata string + + SystemSessionGroup: + type: object + properties: + system_id: + type: string + system_name: + type: string + system_type: + type: string + nullable: true + system_key: + type: string + organization: + type: object + properties: + id: + type: string + logto_id: + type: string + name: + type: string + type: + type: string + started_at: + type: string + format: date-time + expires_at: + type: string + format: date-time + status: + type: string + enum: [pending, active, expired, closed] + session_count: + type: integer + node_count: + type: integer + sessions: + type: array + items: + $ref: '#/components/schemas/SessionRef' + + SessionRef: + type: object + properties: + id: + type: string + format: uuid + node_id: + type: string + nullable: true + status: + type: string + enum: [pending, active, expired, closed] + started_at: + type: string + format: date-time + expires_at: + type: string + format: date-time + TokenExchangeRequest: type: object required: @@ -8676,3 +8820,544 @@ paths: format: binary '404': $ref: '#/components/responses/NotFound' + + /support-sessions: + get: + operationId: getSupportSessions + tags: + - Backend - Support Sessions + summary: List support sessions + description: Lists active support sessions filtered by RBAC organization hierarchy + security: + - BearerAuth: [] + parameters: + - name: page + in: query + schema: + type: integer + minimum: 1 + default: 1 + - name: page_size + in: query + schema: + type: integer + minimum: 1 + maximum: 100 + default: 20 + - name: status + in: query + description: Filter by session status (can be repeated) + schema: + type: string + enum: [pending, active, expired, closed] + - name: system_id + in: query + schema: + type: string + - name: sort_by + in: query + schema: + type: string + default: started_at + - name: sort_direction + in: query + schema: + type: string + enum: [asc, desc] + default: desc + responses: + '200': + description: Support sessions retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: support sessions retrieved successfully + data: + type: object + properties: + support_sessions: + type: array + items: + $ref: '#/components/schemas/SystemSessionGroup' + pagination: + $ref: '#/components/schemas/Pagination' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + + /support-sessions/{id}: + get: + operationId: getSupportSession + tags: + - Backend - Support Sessions + summary: Get support session detail + description: Get a specific support session with system info + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + format: uuid + responses: + '200': + description: Support session retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: support session retrieved successfully + data: + $ref: '#/components/schemas/SupportSession' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + delete: + operationId: closeSupportSession + tags: + - Backend - Support Sessions + summary: Close a support session + description: Force-close a support session and disconnect the tunnel + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + format: uuid + responses: + '200': + description: Support session closed successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: support session closed successfully + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + + /support-sessions/{id}/extend: + patch: + operationId: extendSupportSession + tags: + - Backend - Support Sessions + summary: Extend a support session + description: Extends the expiration time of a support session + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + format: uuid + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - hours + properties: + hours: + type: integer + minimum: 1 + maximum: 168 + description: Number of hours to extend the session + example: 24 + responses: + '200': + description: Support session extended successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: support session extended successfully + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + + /support-sessions/{id}/logs: + get: + operationId: getSupportSessionLogs + tags: + - Backend - Support Sessions + summary: Get access logs for a support session + description: Returns the access logs for a specific support session + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + format: uuid + - name: page + in: query + schema: + type: integer + minimum: 1 + default: 1 + - name: page_size + in: query + schema: + type: integer + minimum: 1 + maximum: 100 + default: 20 + responses: + '200': + description: Access logs retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: access logs retrieved successfully + data: + type: object + properties: + access_logs: + type: array + items: + $ref: '#/components/schemas/SupportAccessLog' + pagination: + $ref: '#/components/schemas/Pagination' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + + /support-sessions/{id}/proxy-token: + post: + operationId: generateSupportProxyToken + tags: + - Backend - Support Sessions + summary: Generate a subdomain proxy token for a support session service + description: | + Generates a short-lived signed JWT (8h) that grants access to a specific service on a remote + system through the subdomain-based support proxy. The returned URL uses the format + `https://{service}--{session_short}.support.{domain}/` where the token is passed as a query + parameter on the first request, then stored as an HttpOnly cookie. + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - service + properties: + service: + type: string + description: Name of the service to proxy (e.g. "nethvoice103-ui") + example: nethvoice103-ui + responses: + '200': + description: Proxy token generated + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: proxy token generated + data: + type: object + properties: + url: + type: string + description: Full subdomain URL for the proxied service + example: https://nethvoice103-ui--550e8400.support.my.nethesis.it/ + token: + type: string + description: Signed JWT proxy token (8h expiry) + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + '503': + description: Subdomain proxy not configured + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /support-sessions/{id}/services: + get: + operationId: getSupportSessionServices + tags: + - Backend - Support Sessions + summary: List available services on a support session + description: | + Returns the list of services discovered on the remote system connected via the support tunnel. + Services are auto-discovered from Traefik routes or the tunnel client's service manifest. + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + responses: + '200': + description: Services retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: services retrieved successfully + data: + type: object + properties: + services: + type: object + additionalProperties: + type: object + properties: + target: + type: string + example: "127.0.0.1:443" + host: + type: string + example: "nethvoice.example.com" + tls: + type: boolean + label: + type: string + example: "NethVoice" + module_id: + type: string + example: "nethvoice103" + node_id: + type: string + example: "1" + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + + /support-sessions/{id}/proxy/{service}/{path}: + get: + operationId: proxySupportSession + tags: + - Backend - Support Sessions + summary: Proxy HTTP requests to a remote service + description: | + Proxies HTTP requests through the support tunnel to a service on the remote system. + Supports all HTTP methods (GET, POST, PUT, DELETE, PATCH, etc.). + Also handles WebSocket upgrade requests for services that require them. + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + - name: service + in: path + required: true + description: Service name (as returned by the services endpoint) + schema: + type: string + - name: path + in: path + required: true + description: Path to proxy to the remote service + schema: + type: string + responses: + '200': + description: Proxied response from the remote service + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + '502': + description: Support service unavailable + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /support-sessions/{id}/terminal-ticket: + post: + operationId: generateTerminalTicket + tags: + - Backend - Support Sessions + summary: Generate a one-time ticket for terminal WebSocket + description: | + Generates a one-time, short-lived ticket (30s TTL) for authenticating the + WebSocket terminal connection. The client exchanges its JWT (sent securely + in the Authorization header) for a ticket that can be passed as a query + parameter when opening the WebSocket, avoiding exposure of the long-lived + JWT in URLs and server logs. + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + responses: + '200': + description: Ticket generated + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: terminal ticket generated + data: + type: object + properties: + ticket: + type: string + description: One-time ticket string to use as ?ticket= query parameter + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + + /support-sessions/{id}/terminal: + get: + operationId: getSupportSessionTerminal + tags: + - Backend - Support Sessions + summary: Open SSH terminal via WebSocket + description: | + Upgrades to a WebSocket connection that bridges xterm.js in the browser to an SSH session + on the remote system via the yamux tunnel. Uses a binary framing protocol: + - Type 0 (data): raw terminal I/O bytes + - Type 1 (resize): JSON `{"cols": N, "rows": N}` + - Type 2 (auth): JSON `{"username": "...", "password": "...", "cols": N, "rows": N}` (first message from client) + - Type 3 (error): error string from server + + Authentication uses a one-time ticket obtained from POST /support-sessions/{id}/terminal-ticket. + The ticket is short-lived (30s) and single-use, preventing JWT exposure in URLs and logs. + security: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + - name: ticket + in: query + required: true + description: One-time ticket from POST /support-sessions/{id}/terminal-ticket + schema: + type: string + responses: + '200': + description: WebSocket upgrade successful (HTTP 101 Switching Protocols) + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' diff --git a/backend/services/local/systems.go b/backend/services/local/systems.go index 458e2112..d52523c0 100644 --- a/backend/services/local/systems.go +++ b/backend/services/local/systems.go @@ -699,11 +699,11 @@ func (s *LocalSystemsService) RegisterSystem(systemSecret string) (*models.Regis return nil, fmt.Errorf("invalid system secret") } - // Update registered_at timestamp + // Update registered_at timestamp and enable support now := time.Now() updateQuery := ` UPDATE systems - SET registered_at = $1 + SET registered_at = $1, support_enabled = true WHERE id = $2 ` From 75261c9c7fd0da0a89fc3fe7e593e3733a9ecd57 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Mon, 9 Mar 2026 21:27:33 +0100 Subject: [PATCH 03/28] feat(frontend): add support dashboard, terminal, and session UI Support sessions table with pagination and sorting, xterm.js web terminal with multi-tab support, service dropdown with multi-node grouping, connect:systems permission guard, and i18n translations. --- frontend/package-lock.json | 24 + frontend/package.json | 3 + frontend/src/components/shell/SideMenu.vue | 12 + .../support/SupportSessionsTable.vue | 422 ++++++++++++++ .../components/support/SupportTerminal.vue | 519 ++++++++++++++++++ frontend/src/i18n/en/translation.json | 39 ++ frontend/src/i18n/it/translation.json | 37 ++ frontend/src/lib/permissions.ts | 8 +- frontend/src/lib/support/support.ts | 344 ++++++++++++ frontend/src/main.ts | 1 + .../src/queries/support/supportSessions.ts | 68 +++ frontend/src/router/index.ts | 5 + frontend/src/views/SupportSessionsView.vue | 21 + frontend/vite.config.ts | 3 + 14 files changed, 1505 insertions(+), 1 deletion(-) create mode 100644 frontend/src/components/support/SupportSessionsTable.vue create mode 100644 frontend/src/components/support/SupportTerminal.vue create mode 100644 frontend/src/lib/support/support.ts create mode 100644 frontend/src/queries/support/supportSessions.ts create mode 100644 frontend/src/views/SupportSessionsView.vue diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 451a87a2..2d6ee6ab 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -20,6 +20,9 @@ "@pinia/colada": "^0.21.5", "@tailwindcss/vite": "^4.1.10", "@vueuse/core": "^13.4.0", + "@xterm/addon-fit": "^0.11.0", + "@xterm/addon-web-links": "^0.12.0", + "@xterm/xterm": "^6.0.0", "axios": "^1.11.0", "lodash": "^4.17.21", "pinia": "^3.0.1", @@ -3445,6 +3448,27 @@ "vue": "^3.5.0" } }, + "node_modules/@xterm/addon-fit": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@xterm/addon-fit/-/addon-fit-0.11.0.tgz", + "integrity": "sha512-jYcgT6xtVYhnhgxh3QgYDnnNMYTcf8ElbxxFzX0IZo+vabQqSPAjC3c1wJrKB5E19VwQei89QCiZZP86DCPF7g==", + "license": "MIT" + }, + "node_modules/@xterm/addon-web-links": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@xterm/addon-web-links/-/addon-web-links-0.12.0.tgz", + "integrity": "sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==", + "license": "MIT" + }, + "node_modules/@xterm/xterm": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-6.0.0.tgz", + "integrity": "sha512-TQwDdQGtwwDt+2cgKDLn0IRaSxYu1tSUjgKarSDkUM0ZNiSRXFpjxEsvc/Zgc5kq5omJ+V0a8/kIM2WD3sMOYg==", + "license": "MIT", + "workspaces": [ + "addons/*" + ] + }, "node_modules/acorn": { "version": "8.15.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index 0e92cf39..7590fd74 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -30,6 +30,9 @@ "@pinia/colada": "^0.21.5", "@tailwindcss/vite": "^4.1.10", "@vueuse/core": "^13.4.0", + "@xterm/addon-fit": "^0.11.0", + "@xterm/addon-web-links": "^0.12.0", + "@xterm/xterm": "^6.0.0", "axios": "^1.11.0", "lodash": "^4.17.21", "pinia": "^3.0.1", diff --git a/frontend/src/components/shell/SideMenu.vue b/frontend/src/components/shell/SideMenu.vue index c9910273..dbef4e52 100644 --- a/frontend/src/components/shell/SideMenu.vue +++ b/frontend/src/components/shell/SideMenu.vue @@ -21,6 +21,7 @@ import { faBuilding as fasBuilding, faUserGroup as fasUserGroup, faServer as fasServer, + faHeadset as fasHeadset, } from '@fortawesome/free-solid-svg-icons' import { faGridOne as fasGridOne } from '@nethesis/nethesis-solid-svg-icons' import { @@ -31,12 +32,14 @@ import { faUserGroup as falUserGroup, faServer as falServer, faGrid2 as falGrid2, + faHeadset as falHeadset, } from '@nethesis/nethesis-light-svg-icons' import { canReadApplications, canReadCustomers, canReadDistributors, canReadResellers, + canConnectSystems, canReadSystems, canReadUsers, } from '@/lib/permissions' @@ -117,6 +120,15 @@ const navigation = computed(() => { lightIcon: falUserGroup, }) } + + if (canConnectSystems()) { + menuItems.push({ + name: 'support.title', + to: 'support-sessions', + solidIcon: fasHeadset, + lightIcon: falHeadset, + }) + } return menuItems }) diff --git a/frontend/src/components/support/SupportSessionsTable.vue b/frontend/src/components/support/SupportSessionsTable.vue new file mode 100644 index 00000000..f89ec631 --- /dev/null +++ b/frontend/src/components/support/SupportSessionsTable.vue @@ -0,0 +1,422 @@ + + + + + diff --git a/frontend/src/components/support/SupportTerminal.vue b/frontend/src/components/support/SupportTerminal.vue new file mode 100644 index 00000000..ec3e38ef --- /dev/null +++ b/frontend/src/components/support/SupportTerminal.vue @@ -0,0 +1,519 @@ + + + + + + + diff --git a/frontend/src/i18n/en/translation.json b/frontend/src/i18n/en/translation.json index f8b0efae..184798ed 100644 --- a/frontend/src/i18n/en/translation.json +++ b/frontend/src/i18n/en/translation.json @@ -608,5 +608,44 @@ "delete_object_modal": { "type_to_confirm": "Type '{confirmationText}' to confirm", "confirmation_text_does_not_match": "Enter '{confirmationText}' to confirm" + }, + "support": { + "title": "Support Sessions", + "page_description": "Monitor and manage active remote support sessions for systems.", + "system": "System", + "type": "Type", + "organization": "Organization", + "started_at": "Started", + "expires_at": "Expires", + "status": "Status", + "status_pending": "Pending", + "status_active": "Active", + "status_expired": "Expired", + "status_closed": "Closed", + "extend": "Extend", + "close": "Close", + "filter_by_status": "Status:", + "no_sessions": "No support sessions", + "no_sessions_description": "There are no support sessions to display.", + "no_sessions_match": "No matching sessions", + "no_sessions_match_description": "No support sessions match the selected filters.", + "cannot_retrieve_sessions": "Cannot retrieve support sessions", + "open_service": "Open service", + "services": "Services", + "loading_services": "Loading services...", + "no_services": "No services available", + "cannot_load_services": "Cannot load services", + "cannot_generate_proxy_token": "Cannot generate proxy token", + "terminal": "Terminal", + "terminal_connected": "Connected", + "terminal_connecting": "Connecting...", + "terminal_retry": "Retry", + "terminal_connection_closed": "Connection closed", + "terminal_connection_error": "Connection error", + "terminal_new_tab": "New terminal tab", + "terminal_close_all": "Close all terminals", + "terminal_close_all_confirm": "This will close {count} active terminal session(s). Continue?", + "terminal_select_node": "Select node", + "terminal_select_node_description": "This system has multiple cluster nodes. Select which node to connect to." } } diff --git a/frontend/src/i18n/it/translation.json b/frontend/src/i18n/it/translation.json index b0915ca4..8bfd144b 100644 --- a/frontend/src/i18n/it/translation.json +++ b/frontend/src/i18n/it/translation.json @@ -608,5 +608,42 @@ "delete_object_modal": { "type_to_confirm": "Digita '{confirmationText}' per confermare", "confirmation_text_does_not_match": "Inserisci '{confirmationText}' per confermare" + }, + "support": { + "title": "Sessioni di Supporto", + "page_description": "Monitora e gestisci le sessioni di supporto remoto attive per i sistemi.", + "system": "Sistema", + "type": "Tipo", + "organization": "Organizzazione", + "started_at": "Inizio", + "expires_at": "Scadenza", + "status": "Stato", + "status_pending": "In attesa", + "status_active": "Attiva", + "status_expired": "Scaduta", + "status_closed": "Chiusa", + "extend": "Estendi", + "close": "Chiudi", + "filter_by_status": "Stato:", + "no_sessions": "Nessuna sessione di supporto", + "no_sessions_description": "Non ci sono sessioni di supporto da visualizzare.", + "no_sessions_match": "Nessuna sessione corrispondente", + "no_sessions_match_description": "Nessuna sessione di supporto corrisponde ai filtri selezionati.", + "cannot_retrieve_sessions": "Impossibile recuperare le sessioni di supporto", + "open_service": "Apri servizio", + "services": "Servizi", + "loading_services": "Caricamento servizi...", + "no_services": "Nessun servizio disponibile", + "cannot_load_services": "Impossibile caricare i servizi", + "cannot_generate_proxy_token": "Impossibile generare il token proxy", + "terminal": "Terminale", + "terminal_connected": "Connesso", + "terminal_connecting": "Connessione in corso...", + "terminal_retry": "Riprova", + "terminal_connection_closed": "Connessione chiusa", + "terminal_connection_error": "Errore di connessione", + "terminal_new_tab": "Nuovo tab terminale", + "terminal_close_all": "Chiudi tutti i terminali", + "terminal_close_all_confirm": "Verranno chiuse {count} sessioni terminale attive. Continuare?" } } diff --git a/frontend/src/lib/permissions.ts b/frontend/src/lib/permissions.ts index 4e2fe9ba..adc1087d 100644 --- a/frontend/src/lib/permissions.ts +++ b/frontend/src/lib/permissions.ts @@ -1,4 +1,4 @@ -// Copyright (C) 2025 Nethesis S.r.l. +// Copyright (C) 2026 Nethesis S.r.l. // SPDX-License-Identifier: GPL-3.0-or-later import { useLoginStore } from '@/stores/login' @@ -21,6 +21,7 @@ const DESTROY_RESELLERS = 'destroy:resellers' const DESTROY_CUSTOMERS = 'destroy:customers' const DESTROY_USERS = 'destroy:users' const DESTROY_SYSTEMS = 'destroy:systems' +const CONNECT_SYSTEMS = 'connect:systems' export const canReadDistributors = () => { const loginStore = useLoginStore() @@ -111,3 +112,8 @@ export const canDestroySystems = () => { const loginStore = useLoginStore() return loginStore.permissions.includes(DESTROY_SYSTEMS) } + +export const canConnectSystems = () => { + const loginStore = useLoginStore() + return loginStore.permissions.includes(CONNECT_SYSTEMS) +} diff --git a/frontend/src/lib/support/support.ts b/frontend/src/lib/support/support.ts new file mode 100644 index 00000000..bfa8d83e --- /dev/null +++ b/frontend/src/lib/support/support.ts @@ -0,0 +1,344 @@ +// Copyright (C) 2026 Nethesis S.r.l. +// SPDX-License-Identifier: GPL-3.0-or-later + +import axios from 'axios' +import { API_URL } from '../config' +import { useLoginStore } from '@/stores/login' +import { getQueryStringParams, type Pagination } from '../common' + +export const SUPPORT_SESSIONS_KEY = 'supportSessions' +export const SUPPORT_SESSIONS_TABLE_ID = 'supportSessionsTable' + +export type SupportSessionStatus = 'pending' | 'active' | 'expired' | 'closed' + +// SessionRef is a lightweight reference to an individual session within a system group +export interface SessionRef { + id: string + node_id: string | null + status: SupportSessionStatus + started_at: string + expires_at: string +} + +// SystemSessionGroup represents a system with its aggregated support session info +// (returned by the backend with server-side grouping and pagination) +export interface SystemSessionGroup { + system_id: string + system_name: string + system_type: string + system_key: string + organization: { + id: string + name: string + type: string + } + started_at: string + expires_at: string + status: SupportSessionStatus + session_count: number + node_count: number + sessions: SessionRef[] +} + +// SupportSession is the full session model (used for single-session detail) +export interface SupportSession { + id: string + system_id: string + node_id: string | null + session_token: string + started_at: string + expires_at: string + status: SupportSessionStatus + closed_at: string | null + closed_by: string | null + created_at: string + updated_at: string + system_name: string + system_type: string + system_key: string + organization: { + id: string + name: string + type: string + } +} + +export interface SupportAccessLog { + id: string + session_id: string + operator_id: string + operator_name: string + access_type: string + connected_at: string + disconnected_at: string | null + metadata: Record | null +} + +interface SupportSessionsResponse { + code: number + message: string + data: { + support_sessions: SystemSessionGroup[] + pagination: Pagination + } +} + +interface SupportSessionResponse { + code: number + message: string + data: SupportSession +} + +interface SupportAccessLogsResponse { + code: number + message: string + data: { + access_logs: SupportAccessLog[] + pagination: Pagination + } +} + +export const getSupportSessionsQueryStringParams = ( + pageNum: number, + pageSize: number, + statusFilter: SupportSessionStatus[], + sortBy: string | null, + sortDescending: boolean, +) => { + const searchParams = new URLSearchParams({ + page: pageNum.toString(), + page_size: pageSize.toString(), + sort_by: sortBy || '', + sort_direction: sortDescending ? 'desc' : 'asc', + }) + + statusFilter.forEach((status) => { + searchParams.append('status', status) + }) + + return searchParams.toString() +} + +export const getSupportSessions = ( + pageNum: number, + pageSize: number, + statusFilter: SupportSessionStatus[], + sortBy: string, + sortDescending: boolean, +) => { + const loginStore = useLoginStore() + const params = getSupportSessionsQueryStringParams( + pageNum, + pageSize, + statusFilter, + sortBy, + sortDescending, + ) + + return axios + .get(`${API_URL}/support-sessions?${params}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data.data) +} + +export const getSupportSession = (id: string) => { + const loginStore = useLoginStore() + + return axios + .get(`${API_URL}/support-sessions/${id}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data.data) +} + +export const extendSupportSession = (id: string, hours: number) => { + const loginStore = useLoginStore() + + return axios + .patch( + `${API_URL}/support-sessions/${id}/extend`, + { hours }, + { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }, + ) + .then((res) => res.data) +} + +export const closeSupportSession = (id: string) => { + const loginStore = useLoginStore() + + return axios + .delete(`${API_URL}/support-sessions/${id}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data) +} + +export interface SupportServiceItem { + name: string + label: string + host: string + path: string + pathPrefix: string + moduleId: string + nodeId: string +} + +export interface SupportServiceGroup { + moduleId: string + moduleLabel: string + nodeId: string + services: SupportServiceItem[] +} + +interface SupportServicesRawResponse { + code: number + message: string + data: { + services: Record< + string, + { + target: string + host: string + tls: boolean + label: string + path?: string + path_prefix?: string + module_id?: string + node_id?: string + } + > + } +} + +interface ProxyTokenResponse { + code: number + message: string + data: { + url: string + token: string + } +} + +export const getSupportSessionServices = (sessionId: string): Promise => { + const loginStore = useLoginStore() + + return axios + .get(`${API_URL}/support-sessions/${sessionId}/services`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => { + const servicesMap = res.data.data?.services || {} + + // Build flat list of service items + const items: SupportServiceItem[] = Object.entries(servicesMap).map(([name, svc]) => ({ + name, + label: svc.label || '', + host: svc.host || '', + path: svc.path || '', + pathPrefix: svc.path_prefix || '', + moduleId: svc.module_id || '', + nodeId: svc.node_id || '', + })) + + // Group by nodeId + moduleId + // Key: "nodeId:moduleId" for grouped, "nodeId:" for ungrouped + const groupMap = new Map() + const ungrouped: SupportServiceItem[] = [] + + for (const item of items) { + if (!item.moduleId) { + ungrouped.push(item) + continue + } + const key = `${item.nodeId}:${item.moduleId}` + let group = groupMap.get(key) + if (!group) { + group = { + moduleId: item.moduleId, + moduleLabel: item.label, + nodeId: item.nodeId, + services: [], + } + groupMap.set(key, group) + } + // Use the first non-empty label as the module label + if (!group.moduleLabel && item.label) { + group.moduleLabel = item.label + } + group.services.push(item) + } + + // Sort groups by nodeId then moduleId, services within groups by name + const groups = Array.from(groupMap.values()).sort((a, b) => { + const nodeCompare = a.nodeId.localeCompare(b.nodeId, undefined, { numeric: true }) + if (nodeCompare !== 0) return nodeCompare + return a.moduleId.localeCompare(b.moduleId) + }) + for (const g of groups) { + g.services.sort((a, b) => a.name.localeCompare(b.name)) + } + + // Add ungrouped services as individual groups + ungrouped.sort((a, b) => a.name.localeCompare(b.name)) + for (const item of ungrouped) { + groups.push({ + moduleId: '', + moduleLabel: '', + nodeId: item.nodeId, + services: [item], + }) + } + + return groups + }) +} + +export const generateSupportProxyToken = (sessionId: string, service: string) => { + const loginStore = useLoginStore() + + return axios + .post( + `${API_URL}/support-sessions/${sessionId}/proxy-token`, + { service }, + { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }, + ) + .then((res) => res.data.data) +} + +interface TerminalTicketResponse { + code: number + message: string + data: { + ticket: string + } +} + +export const getTerminalTicket = (sessionId: string): Promise => { + const loginStore = useLoginStore() + + return axios + .post( + `${API_URL}/support-sessions/${sessionId}/terminal-ticket`, + {}, + { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }, + ) + .then((res) => res.data.data.ticket) +} + +export const getSupportSessionLogs = (sessionId: string, pageNum: number, pageSize: number) => { + const loginStore = useLoginStore() + const params = getQueryStringParams(pageNum, pageSize, null, null, false) + + return axios + .get(`${API_URL}/support-sessions/${sessionId}/logs?${params}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data.data) +} diff --git a/frontend/src/main.ts b/frontend/src/main.ts index 6284bd82..c6710b8b 100644 --- a/frontend/src/main.ts +++ b/frontend/src/main.ts @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later import './assets/main.css' +import '@xterm/xterm/css/xterm.css' import { createApp } from 'vue' import { createPinia } from 'pinia' diff --git a/frontend/src/queries/support/supportSessions.ts b/frontend/src/queries/support/supportSessions.ts new file mode 100644 index 00000000..8a43ea2b --- /dev/null +++ b/frontend/src/queries/support/supportSessions.ts @@ -0,0 +1,68 @@ +// Copyright (C) 2026 Nethesis S.r.l. +// SPDX-License-Identifier: GPL-3.0-or-later + +import { canConnectSystems } from '@/lib/permissions' +import { DEFAULT_PAGE_SIZE, loadPageSizeFromStorage } from '@/lib/tablePageSize' +import { + getSupportSessions, + SUPPORT_SESSIONS_KEY, + SUPPORT_SESSIONS_TABLE_ID, +} from '@/lib/support/support' +import { useLoginStore } from '@/stores/login' +import { defineQuery, useQuery } from '@pinia/colada' +import { ref, watch } from 'vue' + +export const useSupportSessions = defineQuery(() => { + const loginStore = useLoginStore() + + const pageNum = ref(1) + const pageSize = ref(DEFAULT_PAGE_SIZE) + const sortBy = ref('started_at') + const sortDescending = ref(true) + + const { state, asyncStatus, refetch, ...rest } = useQuery({ + key: () => [ + SUPPORT_SESSIONS_KEY, + { + pageNum: pageNum.value, + pageSize: pageSize.value, + sortBy: sortBy.value, + sortDirection: sortDescending.value, + }, + ], + enabled: () => !!loginStore.jwtToken && canConnectSystems(), + query: () => + getSupportSessions(pageNum.value, pageSize.value, [], sortBy.value, sortDescending.value), + refetchOnWindowFocus: true, + }) + + // load table page size from storage + watch( + () => loginStore.userInfo?.email, + (email) => { + if (email) { + pageSize.value = loadPageSizeFromStorage(SUPPORT_SESSIONS_TABLE_ID) + } + }, + { immediate: true }, + ) + + // reset to first page when page size changes + watch( + () => pageSize.value, + () => { + pageNum.value = 1 + }, + ) + + return { + ...rest, + state, + asyncStatus, + refetch, + pageNum, + pageSize, + sortBy, + sortDescending, + } +}) diff --git a/frontend/src/router/index.ts b/frontend/src/router/index.ts index 62611369..c65f4dd7 100644 --- a/frontend/src/router/index.ts +++ b/frontend/src/router/index.ts @@ -90,6 +90,11 @@ const router = createRouter({ name: 'customer_detail', component: () => import('../views/CustomerDetailView.vue'), }, + { + path: '/support-sessions', + name: 'support_sessions', + component: () => import('../views/SupportSessionsView.vue'), + }, ], }) diff --git a/frontend/src/views/SupportSessionsView.vue b/frontend/src/views/SupportSessionsView.vue new file mode 100644 index 00000000..60c84591 --- /dev/null +++ b/frontend/src/views/SupportSessionsView.vue @@ -0,0 +1,21 @@ + + + + + diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index 7aa32c69..53a898ee 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -8,6 +8,9 @@ import tailwindcss from '@tailwindcss/vite' // https://vite.dev/config/ export default defineConfig({ plugins: [vue(), vueDevTools(), tailwindcss()], + server: { + allowedHosts: ['my.localtest.me'], + }, resolve: { alias: { '@': fileURLToPath(new URL('./src', import.meta.url)), From 93f94c1f8e4b1a15a1d1d3d73c09155dddcba8e6 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Mon, 9 Mar 2026 21:29:32 +0100 Subject: [PATCH 04/28] chore(infra): add support service to proxy, deploy, and CI/CD Add support service routing in nginx proxy, Render.com deployment config, CI pipeline with tunnel-client Docker image and rolling dev release, release workflow with tunnel-client binary and SBOM, connect:systems RBAC permission. --- .github/workflows/ci-main.yml | 62 +++++++++++- .github/workflows/pr-build-trigger.yml | 4 +- .github/workflows/pr-update-template.yml | 4 +- .github/workflows/release-production.yml | 121 +++++++++++++++++++++++ .gitignore | 3 + README.md | 25 +++-- deploy.sh | 8 +- docker-compose.yml | 79 +++++++++++++++ proxy/Makefile | 77 +++++++++++++++ proxy/README.md | 49 ++++++++- proxy/entrypoint.sh | 8 +- proxy/nginx-dev.conf | 92 +++++++++++++++++ proxy/nginx.conf | 76 +++++++++++++- proxy/nginx.conf.local | 52 ++++++++++ proxy/port-forward.py | 47 +++++++++ release.sh | 33 +++++-- render.yaml | 98 +++++++++++++++++- sync/configs/config.yml.example | 3 +- version.json | 3 +- 19 files changed, 805 insertions(+), 39 deletions(-) create mode 100644 proxy/Makefile create mode 100644 proxy/nginx-dev.conf create mode 100644 proxy/port-forward.py diff --git a/.github/workflows/ci-main.yml b/.github/workflows/ci-main.yml index c897f189..e82c3f3a 100644 --- a/.github/workflows/ci-main.yml +++ b/.github/workflows/ci-main.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - component: [backend, sync, collect] + component: [backend, sync, collect, services/support] defaults: run: @@ -142,6 +142,11 @@ jobs: context: proxy - component: mimir context: services/mimir + - component: support + context: services/support + - component: tunnel-client + context: services/support + dockerfile: services/support/Containerfile.tunnel-client steps: - uses: actions/checkout@v4 @@ -200,3 +205,58 @@ jobs: with: sarif_file: 'trivy-${{ matrix.component }}-results.sarif' category: 'trivy-${{ matrix.component }}' + + # =========================================================================== + # TUNNEL CLIENT BINARY - Rolling dev release (only on main push) + # =========================================================================== + tunnel-client-binary: + runs-on: ubuntu-latest + needs: [go-tests, frontend-tests] + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + permissions: + contents: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + cache: false + + - name: Build tunnel-client binary + working-directory: services/support + run: | + COMMIT="${{ github.sha }}" + BUILD_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + LDFLAGS="-s -w -X github.com/nethesis/my/services/support/pkg/version.Version=dev-${COMMIT::7} -X github.com/nethesis/my/services/support/pkg/version.Commit=${COMMIT} -X github.com/nethesis/my/services/support/pkg/version.BuildTime=${BUILD_TIME}" + + mkdir -p dist + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags="${LDFLAGS}" -o dist/tunnel-client-linux-amd64 ./cmd/tunnel-client/main.go + + - name: Update dev release + uses: softprops/action-gh-release@v2 + with: + tag_name: dev + name: Development Build + body: | + Rolling development build from `main` branch. + Updated on every push to main. + + **Commit:** ${{ github.sha }} + + ## Download + + - `tunnel-client-linux-amd64` — tunnel client for NethSecurity (OpenWrt) + prerelease: true + make_latest: false + files: | + services/support/dist/tunnel-client-linux-amd64 + + - name: Delete old dev tag and recreate + run: | + git tag -d dev 2>/dev/null || true + git push origin :refs/tags/dev 2>/dev/null || true + git tag dev + git push origin dev diff --git a/.github/workflows/pr-build-trigger.yml b/.github/workflows/pr-build-trigger.yml index 71c85db0..8d77f38d 100644 --- a/.github/workflows/pr-build-trigger.yml +++ b/.github/workflows/pr-build-trigger.yml @@ -64,7 +64,7 @@ jobs: echo "Updating build triggers with timestamp: $TIMESTAMP" UPDATED_FILES=() - for component in backend collect frontend proxy services/mimir; do + for component in backend collect frontend proxy services/mimir services/support; do if [ -f "$component/.render-build-trigger" ]; then echo "Updating $component/.render-build-trigger" perl -i -pe "s/LAST_UPDATE=.*/LAST_UPDATE=$TIMESTAMP/" "$component/.render-build-trigger" @@ -108,7 +108,7 @@ jobs: echo "✅ Safety checks passed. Proceeding with commit to branch: $TARGET_BRANCH" - git add */.render-build-trigger services/mimir/.render-build-trigger + git add */.render-build-trigger services/mimir/.render-build-trigger services/support/.render-build-trigger git commit -m "chore: update build triggers for PR deployment Auto-updated .render-build-trigger files to ensure all services diff --git a/.github/workflows/pr-update-template.yml b/.github/workflows/pr-update-template.yml index f2220e44..19bf2d29 100644 --- a/.github/workflows/pr-update-template.yml +++ b/.github/workflows/pr-update-template.yml @@ -111,7 +111,9 @@ jobs: { name: 'Collect Build', jobKey: 'build (collect)' }, { name: 'Sync Build', jobKey: 'build (sync)' }, { name: 'Frontend Build', jobKey: 'build (frontend)' }, - { name: 'Proxy Build', jobKey: 'build (proxy)' } + { name: 'Proxy Build', jobKey: 'build (proxy)' }, + { name: 'Support Tests', jobKey: 'go-tests (services/support)' }, + { name: 'Support Build', jobKey: 'build (support)' } ]; for (const { name, jobKey } of badgeReplacements) { diff --git a/.github/workflows/release-production.yml b/.github/workflows/release-production.yml index 68051755..9b7e9111 100644 --- a/.github/workflows/release-production.yml +++ b/.github/workflows/release-production.yml @@ -62,6 +62,15 @@ jobs: exit 1 fi + - name: Check code formatting (support) + working-directory: services/support + run: | + if [ "$(gofmt -s -l . | wc -l)" -gt 0 ]; then + echo "Code is not formatted properly in services/support:" + gofmt -s -l . + exit 1 + fi + - name: Run backend linting uses: golangci/golangci-lint-action@v6 with: @@ -83,6 +92,13 @@ jobs: working-directory: collect args: --timeout=10m + - name: Run support linting + uses: golangci/golangci-lint-action@v6 + with: + version: latest + working-directory: services/support + args: --timeout=10m + - name: Run backend tests working-directory: backend run: go test ./... @@ -95,6 +111,10 @@ jobs: working-directory: collect run: go test ./... + - name: Run support tests + working-directory: services/support + run: go test ./... + - name: Set up Node.js uses: actions/setup-node@v4 with: @@ -196,6 +216,33 @@ jobs: tar -czf sync-linux-amd64.tar.gz sync-linux-amd64-release rm -rf sync-linux-amd64-release + # Support Release + - name: Build support binary + working-directory: services/support + run: | + mkdir -p dist + + VERSION="${{ steps.version.outputs.VERSION }}" + COMMIT="${{ github.sha }}" + BUILD_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + LDFLAGS="-s -w -X github.com/nethesis/my/services/support/pkg/version.Version=${VERSION} -X github.com/nethesis/my/services/support/pkg/version.Commit=${COMMIT} -X github.com/nethesis/my/services/support/pkg/version.BuildTime=${BUILD_TIME}" + + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags="${LDFLAGS}" -o dist/support-linux-amd64 main.go + + cd dist + tar -czf support-linux-amd64.tar.gz support-linux-amd64 + + # Tunnel Client Release + - name: Build tunnel-client binary + working-directory: services/support + run: | + VERSION="${{ steps.version.outputs.VERSION }}" + COMMIT="${{ github.sha }}" + BUILD_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + LDFLAGS="-s -w -X github.com/nethesis/my/services/support/pkg/version.Version=${VERSION} -X github.com/nethesis/my/services/support/pkg/version.Commit=${COMMIT} -X github.com/nethesis/my/services/support/pkg/version.BuildTime=${BUILD_TIME}" + + GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags="${LDFLAGS}" -o dist/tunnel-client-linux-amd64 ./cmd/tunnel-client/main.go + # Docker Images - name: Extract backend metadata id: backend_meta @@ -395,6 +442,76 @@ jobs: format: cyclonedx-json output-file: mimir-sbom.cdx.json + - name: Extract support metadata + id: support_meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ github.repository }}/support + tags: | + type=semver,pattern=v{{version}},value=${{ steps.version.outputs.VERSION }} + type=raw,value=latest + + - name: Build and push support Docker image + uses: docker/build-push-action@v5 + with: + context: services/support + file: services/support/Containerfile + platforms: linux/amd64 + push: true + tags: ${{ steps.support_meta.outputs.tags }} + labels: | + ${{ steps.support_meta.outputs.labels }} + org.opencontainers.image.title=My Nethesis Support + org.opencontainers.image.description=WebSocket tunnel support service for My Nethesis + cache-from: type=gha,scope=support + cache-to: type=gha,mode=max,scope=support + build-args: | + VERSION=${{ steps.version.outputs.VERSION }} + COMMIT=${{ github.sha }} + BUILD_TIME=${{ steps.support_meta.outputs.labels['org.opencontainers.image.created'] }} + + - name: Generate support SBOM + uses: anchore/sbom-action@v0 + with: + image: ${{ env.REGISTRY }}/${{ steps.image_prefix.outputs.PREFIX }}/support:${{ steps.version.outputs.VERSION }} + format: cyclonedx-json + output-file: support-sbom.cdx.json + + - name: Extract tunnel-client metadata + id: tunnel_client_meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ github.repository }}/tunnel-client + tags: | + type=semver,pattern=v{{version}},value=${{ steps.version.outputs.VERSION }} + type=raw,value=latest + + - name: Build and push tunnel-client Docker image + uses: docker/build-push-action@v5 + with: + context: services/support + file: services/support/Containerfile.tunnel-client + platforms: linux/amd64 + push: true + tags: ${{ steps.tunnel_client_meta.outputs.tags }} + labels: | + ${{ steps.tunnel_client_meta.outputs.labels }} + org.opencontainers.image.title=My Nethesis Tunnel Client + org.opencontainers.image.description=WebSocket tunnel client for NS8 remote support + cache-from: type=gha,scope=tunnel-client + cache-to: type=gha,mode=max,scope=tunnel-client + build-args: | + VERSION=${{ steps.version.outputs.VERSION }} + COMMIT=${{ github.sha }} + BUILD_TIME=${{ steps.tunnel_client_meta.outputs.labels['org.opencontainers.image.created'] }} + + - name: Generate tunnel-client SBOM + uses: anchore/sbom-action@v0 + with: + image: ${{ env.REGISTRY }}/${{ steps.image_prefix.outputs.PREFIX }}/tunnel-client:${{ steps.version.outputs.VERSION }} + format: cyclonedx-json + output-file: tunnel-client-sbom.cdx.json + # Create GitHub Release - name: Create Release uses: softprops/action-gh-release@v2 @@ -404,9 +521,13 @@ jobs: backend/dist/*.tar.gz sync/dist/*.tar.gz collect/dist/*.tar.gz + services/support/dist/support-linux-amd64.tar.gz + services/support/dist/tunnel-client-linux-amd64 backend-sbom.cdx.json sync-sbom.cdx.json collect-sbom.cdx.json + support-sbom.cdx.json + tunnel-client-sbom.cdx.json frontend-sbom.cdx.json proxy-sbom.cdx.json mimir-sbom.cdx.json diff --git a/.gitignore b/.gitignore index c253b6de..1f5a1bc7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,9 @@ collect/collect collect/main collect/build/ collect/*.exe +services/support/build/ +services/support/tunnel-client +services/support/*.exe *.exe *.bin diff --git a/README.md b/README.md index f17775d2..129dc1e7 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,11 @@ Proxy: [![Proxy Build](https://img.shields.io/github/actions/workflow/status/NethServer/my/ci-main.yml?job=proxy-build&label=Build&style=for-the-badge)](https://github.com/NethServer/my/actions/workflows/ci-main.yml) +Support: + +[![Support Tests](https://img.shields.io/github/actions/workflow/status/NethServer/my/ci-main.yml?job=support-tests&label=Tests&style=for-the-badge)](https://github.com/NethServer/my/actions/workflows/ci-main.yml) +[![Support Build](https://img.shields.io/github/actions/workflow/status/NethServer/my/ci-main.yml?job=support-build&label=Build&style=for-the-badge)](https://github.com/NethServer/my/actions/workflows/ci-main.yml) + #### Release [![Release](https://img.shields.io/github/actions/workflow/status/NethServer/my/release-production.yml?style=for-the-badge&label=Release)](https://github.com/NethServer/my/actions/workflows/release-production.yml) @@ -53,8 +58,10 @@ Web application providing centralized authentication and management using Logto - **[frontend/](./frontend/)** - Vue.js application for UI - **[backend/](./backend/)** - Go REST API with Logto JWT authentication and RBAC - **[collect/](./collect/)** - Go REST API with Redis queues to handle inventories +- **[services/support/](./services/support/)** - Go service for WebSocket tunnel-based remote support sessions - **[sync/](./sync/)** - CLI tool for RBAC configuration synchronization -- **[proxy/](./proxy/)** - nginx configuration as load balancer +- **[proxy/](./proxy/)** - nginx reverse proxy routing to all services +- **[services/mimir/](./services/mimir/)** - Grafana Mimir metrics storage with S3 backend ## 🚀 Quick Start @@ -72,7 +79,7 @@ Web application providing centralized authentication and management using Logto Complete replica of the production environment with all services containerized: ```bash -# Start all services (PostgreSQL, Redis, Backend, Collect, Frontend, Proxy) +# Start all services (PostgreSQL, Redis, Mimir, Backend, Collect, Support, Frontend, Proxy) docker-compose up -d # OR podman-compose up -d @@ -115,7 +122,10 @@ cd sync && make dev-setup && make build # 5. Start the Collect service (port 8081) cd collect && make dev-setup && make run -# 6. Start the Frontend (port 5173) +# 6. Start the Support service (port 8082) +cd services/support && make dev-setup && make run + +# 7. Start the Frontend (port 5173) cd frontend && npm ci && npm run dev ``` @@ -123,8 +133,9 @@ cd frontend && npm ci && npm run dev 1. **RBAC Setup**: [sync/README.md](./sync/README.md) - Use `sync init` for complete Logto configuration 2. **Backend Development**: [backend/README.md](./backend/README.md) - API server setup and environment configuration 3. **Collect Development**: [collect/README.md](./collect/README.md) - Inventory service setup and environment configuration -4. **Frontend Development**: [frontend/README.md](./frontend/README.md) - Vue.js setup and environment configuration -5. **Production Deploy**: Use `./deploy.sh` for automated deployment +4. **Support Development**: [services/support/README.md](./services/support/README.md) - WebSocket tunnel support service +5. **Frontend Development**: [frontend/README.md](./frontend/README.md) - Vue.js setup and environment configuration +6. **Production Deploy**: Use `./deploy.sh` for automated deployment ## 🌐 Deployment Environments @@ -137,7 +148,7 @@ cd frontend && npm ci && npm run dev - **Trigger**: Manual deployment via `./deploy.sh` script - **Auto-Deploy**: Render automatically deploys when `render.yaml` is updated - **Manual Control**: Deploy only when explicitly triggered -- **Security**: Private services (Backend, Collect, Frontend) only accessible through Proxy +- **Security**: Private services (Backend, Collect, Support, Frontend) only accessible through Proxy ## 📝 Configuration @@ -146,6 +157,7 @@ See individual component documentation for setup: - **Frontend**: [frontend/README.md](./frontend/README.md) - Environment variables and setup for frontend - **Backend**: [backend/README.md](./backend/README.md) - Environment variables and setup for backend - **Collect**: [collect/README.md](./collect/README.md) - Environment variables and setup for collect +- **Support**: [services/support/README.md](./services/support/README.md) - Environment variables and setup for support - **sync CLI**: [sync/README.md](./sync/README.md) - Use `sync init` to generate all required variables - **proxy**: [proxy/README.md](./proxy/README.md) - nginx configuration and setup for load balancer @@ -178,6 +190,7 @@ Component-specific technical documentation: - **[backend](./backend/README.md)** - Server setup, environment variables, authorization architecture, and consent-based impersonation - **[backend OpenAPI](./backend/openapi.yaml)** - Complete API specification with authentication - **[collect](./collect/README.md)** - Server setup, environment variables and inventory structure +- **[support](./services/support/README.md)** - WebSocket tunnel service for remote support sessions - **[sync CLI](./sync/README.md)** - RBAC configuration and `sync init` setup - **[deploy script](./deploy.sh)** - Production deployment script for Render - **[proxy](./proxy/README.md)** - Production load balancer configuration with nginx diff --git a/deploy.sh b/deploy.sh index 6e03d173..a8883704 100755 --- a/deploy.sh +++ b/deploy.sh @@ -88,7 +88,7 @@ verify_docker_images() { info "Verifying Docker images exist for version $version..." - for image in backend collect frontend proxy; do + for image in backend collect frontend proxy mimir support; do local image_url="ghcr.io/nethserver/my/$image:$version" info "Checking: $image_url" @@ -123,6 +123,12 @@ update_render_yaml() { # Update proxy image tag sed -i.tmp 's|ghcr\.io/nethserver/my/proxy:v[0-9]*\.[0-9]*\.[0-9]*|ghcr.io/nethserver/my/proxy:'"$version"'|g' render.yaml + # Update mimir image tag + sed -i.tmp 's|ghcr\.io/nethserver/my/mimir:v[0-9]*\.[0-9]*\.[0-9]*|ghcr.io/nethserver/my/mimir:'"$version"'|g' render.yaml + + # Update support image tag + sed -i.tmp 's|ghcr\.io/nethserver/my/support:v[0-9]*\.[0-9]*\.[0-9]*|ghcr.io/nethserver/my/support:'"$version"'|g' render.yaml + # Remove sed backup file rm -f render.yaml.tmp diff --git a/docker-compose.yml b/docker-compose.yml index d4372486..329644e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,8 @@ # - Main App: http://localhost:9090 # - Backend: http://localhost:18080/api/health # - Collect: http://localhost:18081/api/health +# - Support: http://localhost:18082/api/health +# - Mimir: http://localhost:19009 # - Frontend: http://localhost:13000 # # 💾 Databases: @@ -73,6 +75,36 @@ services: networks: - my-full-network + # ============================================================================= + # METRICS + # ============================================================================= + + # Mimir (Grafana Mimir - metrics storage) + mimir-full: + build: + context: ./services/mimir + dockerfile: Containerfile + container_name: ${COMPOSE_PROJECT_NAME:-my}-mimir-full + restart: unless-stopped + environment: + PORT: 9009 + MIMIR_S3_ENDPOINT: ${MIMIR_S3_ENDPOINT:-} + MIMIR_S3_ACCESS_KEY: ${MIMIR_S3_ACCESS_KEY:-} + MIMIR_S3_SECRET_KEY: ${MIMIR_S3_SECRET_KEY:-} + MIMIR_S3_BUCKET: ${MIMIR_S3_BUCKET:-} + MIMIR_S3_ALERTMANAGER_BUCKET: ${MIMIR_S3_ALERTMANAGER_BUCKET:-} + MIMIR_S3_RULER_BUCKET: ${MIMIR_S3_RULER_BUCKET:-} + ports: + - "19009:9009" + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9009/ready"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + networks: + - my-full-network + # ============================================================================= # BACKEND SERVICES # ============================================================================= @@ -96,6 +128,8 @@ services: DATABASE_URL: postgresql://noc_user:noc_password@postgres-full:5432/noc?sslmode=disable REDIS_URL: redis://redis-full:6379 REDIS_DB: 0 + # Support service URL + SUPPORT_SERVICE_URL: http://support-full:8080 # Server configuration for container environment LISTEN_ADDRESS: 0.0.0.0:8080 GIN_MODE: debug @@ -135,6 +169,8 @@ services: DATABASE_URL: postgresql://noc_user:noc_password@postgres-full:5432/noc?sslmode=disable REDIS_URL: redis://redis-full:6379 REDIS_DB: 1 + # Mimir URL for metrics forwarding + MIMIR_URL: http://mimir-full:9009 # Server configuration for container environment LISTEN_ADDRESS: 0.0.0.0:8080 GIN_MODE: debug @@ -142,6 +178,47 @@ services: LOG_FORMAT: console ports: - "18081:8080" # Different port to avoid conflicts with individual dev + depends_on: + postgres-full: + condition: service_healthy + redis-full: + condition: service_healthy + mimir-full: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/api/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + networks: + - my-full-network + + # Support Service + support-full: + build: + context: ./services/support + dockerfile: Containerfile + args: + VERSION: ${VERSION:-dev} + COMMIT: ${COMMIT:-unknown} + BUILD_TIME: ${BUILD_TIME:-unknown} + container_name: ${COMPOSE_PROJECT_NAME:-my}-support-full + restart: unless-stopped + env_file: + - ./services/support/.env + environment: + # Override database and redis URLs for full stack containers + DATABASE_URL: postgresql://noc_user:noc_password@postgres-full:5432/noc?sslmode=disable + REDIS_URL: redis://redis-full:6379 + REDIS_DB: 2 + # Server configuration for container environment + LISTEN_ADDRESS: 0.0.0.0:8080 + GIN_MODE: debug + LOG_LEVEL: debug + LOG_FORMAT: console + ports: + - "18082:8080" # Different port to avoid conflicts with individual dev depends_on: postgres-full: condition: service_healthy @@ -223,12 +300,14 @@ services: PORT: 8080 BACKEND_SERVICE_NAME: backend-full:8080 COLLECT_SERVICE_NAME: collect-full:8080 + SUPPORT_SERVICE_NAME: support-full:8080 FRONTEND_SERVICE_NAME: frontend-full:8080 ports: - "9090:8080" # Main entry point - browser-safe port depends_on: - backend-full - collect-full + - support-full - frontend-full healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] diff --git a/proxy/Makefile b/proxy/Makefile new file mode 100644 index 00000000..3a4b6214 --- /dev/null +++ b/proxy/Makefile @@ -0,0 +1,77 @@ +CONTAINER_NAME = my-nginx +IMAGE = docker.io/library/nginx:alpine +INTERNAL_PORT = 8443 +CONF = $(shell pwd)/nginx-dev.conf +CERT = $(shell pwd)/my.localtest.me+1.pem +KEY = $(shell pwd)/my.localtest.me+1-key.pem +FWD_PID = /tmp/my-nginx-fwd.pid + +.PHONY: dev-setup dev-up dev-down dev-restart dev-logs dev-status + +## Generate TLS certificates with mkcert (one-time setup) +dev-setup: + @if [ -f $(CERT) ] && [ -f $(KEY) ]; then \ + echo "TLS certificates already exist"; \ + else \ + if ! command -v mkcert >/dev/null 2>&1; then \ + echo "Error: mkcert is not installed."; \ + echo "Install it with: brew install mkcert (macOS) or see https://github.com/FiloSottile/mkcert"; \ + exit 1; \ + fi; \ + echo "Installing local CA (if needed)..."; \ + mkcert -install 2>/dev/null || true; \ + echo "Generating TLS certificates..."; \ + cd $(shell pwd) && mkcert "my.localtest.me" "*.support.my.localtest.me"; \ + echo "Certificates generated: $(CERT) and $(KEY)"; \ + fi + +## Start nginx dev proxy on port 443 (requires sudo for port forwarding) +dev-up: dev-setup + @if podman ps --format '{{.Names}}' | grep -q '^$(CONTAINER_NAME)$$'; then \ + echo "$(CONTAINER_NAME) is already running"; \ + else \ + podman run -d --rm \ + --name $(CONTAINER_NAME) \ + -p $(INTERNAL_PORT):443 \ + -v $(CONF):/etc/nginx/nginx.conf:ro,z \ + -v $(CERT):/etc/nginx/certs/cert.pem:ro,z \ + -v $(KEY):/etc/nginx/certs/key.pem:ro,z \ + $(IMAGE) && \ + echo "$(CONTAINER_NAME) started on port $(INTERNAL_PORT)"; \ + fi + @if [ -f $(FWD_PID) ] && sudo kill -0 $$(cat $(FWD_PID)) 2>/dev/null; then \ + echo "port 443 -> $(INTERNAL_PORT) forwarding is already active"; \ + else \ + echo "Starting port 443 -> $(INTERNAL_PORT) forwarding (requires sudo)..."; \ + sudo python3 $(shell pwd)/port-forward.py $(INTERNAL_PORT) 443 & \ + echo $$! > $(FWD_PID); \ + sleep 0.5; \ + echo "port 443 -> $(INTERNAL_PORT) forwarding active"; \ + fi + +## Stop nginx dev proxy +dev-down: + @if [ -f $(FWD_PID) ]; then \ + sudo kill $$(cat $(FWD_PID)) 2>/dev/null; \ + rm -f $(FWD_PID); \ + echo "port 443 forwarding stopped"; \ + fi + @podman stop $(CONTAINER_NAME) 2>/dev/null && \ + echo "$(CONTAINER_NAME) stopped" || \ + echo "$(CONTAINER_NAME) is not running" + +## Restart nginx dev proxy +dev-restart: dev-down dev-up + +## Show nginx dev proxy logs +dev-logs: + @podman logs -f $(CONTAINER_NAME) + +## Show nginx dev proxy status +dev-status: + @podman ps --filter name=$(CONTAINER_NAME) --format 'table {{.Names}}\t{{.Status}}\t{{.Ports}}' + @if [ -f $(FWD_PID) ] && sudo kill -0 $$(cat $(FWD_PID)) 2>/dev/null; then \ + echo "port 443 -> $(INTERNAL_PORT) forwarding: active (pid: $$(cat $(FWD_PID)))"; \ + else \ + echo "port 443 -> $(INTERNAL_PORT) forwarding: inactive"; \ + fi diff --git a/proxy/README.md b/proxy/README.md index 17da6fcd..d2bad5c1 100644 --- a/proxy/README.md +++ b/proxy/README.md @@ -2,7 +2,7 @@ ## Overview -This nginx reverse proxy is the only public entry point for all My Nethesis services. Backend, collect, and frontend are private services (`pserv`) accessible only via this proxy on Render's internal network. +This nginx reverse proxy is the only public entry point for all My Nethesis services. Backend, collect, support, and frontend are private services (`pserv`) accessible only via this proxy on Render's internal network. - **Production**: `my.nethesis.it` - **QA**: `qa.my.nethesis.it` @@ -13,12 +13,14 @@ This nginx reverse proxy is the only public entry point for all My Nethesis serv my.nethesis.it (Production) ├── / → Frontend Service (private, HTTP :10000) ├── /backend/api/ → Backend Service (private, HTTP :10000) -└── /collect/api/ → Collect Service (private, HTTP :10000) +├── /collect/api/ → Collect Service (private, HTTP :10000) +└── /support/api/ → Support Service (private, HTTP :10000, WebSocket) qa.my.nethesis.it (QA) ├── / → Frontend Service (private, HTTP :10000) ├── /backend/api/ → Backend Service (private, HTTP :10000) -└── /collect/api/ → Collect Service (private, HTTP :10000) +├── /collect/api/ → Collect Service (private, HTTP :10000) +└── /support/api/ → Support Service (private, HTTP :10000, WebSocket) ``` All inter-service communication uses HTTP over Render's internal network. The proxy handles TLS termination for external clients. @@ -50,6 +52,38 @@ qa.my.nethesis.it CNAME my-proxy-qa.onrender.com - Add `qa.my.nethesis.it` - Wait for SSL certificate provisioning +## Local Development + +### Prerequisites +- [mkcert](https://github.com/FiloSottile/mkcert) (`brew install mkcert` on macOS) +- Docker/Podman + +### Setup + +```bash +# Generate TLS certificates (one-time, auto-runs on dev-up) +make dev-setup + +# Start nginx dev proxy on https://my.localtest.me +make dev-up + +# Stop proxy +make dev-down + +# View logs +make dev-logs + +# Check status +make dev-status +``` + +`make dev-setup` generates trusted TLS certificates for `my.localtest.me` and `*.support.my.localtest.me` using mkcert. It runs automatically as part of `make dev-up`, so a fresh clone only needs `make dev-up`. + +The dev proxy routes: +- `https://my.localtest.me/api/` → backend (`:8080`) +- `https://my.localtest.me/` → frontend (`:5173`) +- `https://*.support.my.localtest.me/` → support subdomain proxy via backend (`:8080`) + ## Features ### Security Headers @@ -81,6 +115,7 @@ The entrypoint script extracts the DNS resolver from `/etc/resolv.conf` to resol Set automatically by Render: - `BACKEND_SERVICE_NAME` - Internal hostname of the backend service - `COLLECT_SERVICE_NAME` - Internal hostname of the collect service +- `SUPPORT_SERVICE_NAME` - Internal hostname of the support service - `FRONTEND_SERVICE_NAME` - Internal hostname of the frontend service - `RESOLVER` - DNS resolver extracted from `/etc/resolv.conf` @@ -92,11 +127,13 @@ Set automatically by Render: curl https://my.nethesis.it/health curl https://my.nethesis.it/backend/api/health curl https://my.nethesis.it/collect/api/health +curl https://my.nethesis.it/support/api/health # QA curl https://qa.my.nethesis.it/health curl https://qa.my.nethesis.it/backend/api/health curl https://qa.my.nethesis.it/collect/api/health +curl https://qa.my.nethesis.it/support/api/health ``` ### API Testing @@ -115,7 +152,7 @@ curl -X POST https://my.nethesis.it/collect/api/systems/inventory \ ## Security Notes -- Backend, collect, and frontend are private services, not accessible from the internet +- Backend, collect, support, and frontend are private services, not accessible from the internet - All inter-service communication uses HTTP over Render's internal network - TLS termination happens at the proxy level for external clients - Security headers added to all responses @@ -124,6 +161,8 @@ curl -X POST https://my.nethesis.it/collect/api/systems/inventory \ Current configuration supports: - 1024 concurrent connections -- 30-second timeouts +- 30-second timeouts for backend/collect/frontend +- 7-day timeouts for support (WebSocket tunnel connections) +- WebSocket upgrade support on `/support/api/` - Gzip compression for text content - HTTP/1.1 keep-alive connections diff --git a/proxy/entrypoint.sh b/proxy/entrypoint.sh index bc0c1ea8..51e1dd6d 100644 --- a/proxy/entrypoint.sh +++ b/proxy/entrypoint.sh @@ -6,6 +6,7 @@ echo "RENDER_SERVICE_NAME=$RENDER_SERVICE_NAME" echo "IS_PULL_REQUEST=$IS_PULL_REQUEST" echo "Original BACKEND_SERVICE_NAME=$BACKEND_SERVICE_NAME" echo "Original COLLECT_SERVICE_NAME=$COLLECT_SERVICE_NAME" +echo "Original SUPPORT_SERVICE_NAME=$SUPPORT_SERVICE_NAME" echo "Original FRONTEND_SERVICE_NAME=$FRONTEND_SERVICE_NAME" # Check if this is a PR preview environment @@ -19,10 +20,12 @@ if [ "$IS_PULL_REQUEST" = "true" ]; then # Apply PR suffix to all service names export BACKEND_SERVICE_NAME="${BACKEND_SERVICE_NAME}${PR_SUFFIX}" export COLLECT_SERVICE_NAME="${COLLECT_SERVICE_NAME}${PR_SUFFIX}" + export SUPPORT_SERVICE_NAME="${SUPPORT_SERVICE_NAME}${PR_SUFFIX}" export FRONTEND_SERVICE_NAME="${FRONTEND_SERVICE_NAME}${PR_SUFFIX}" - + echo "Adjusted BACKEND_SERVICE_NAME=$BACKEND_SERVICE_NAME" echo "Adjusted COLLECT_SERVICE_NAME=$COLLECT_SERVICE_NAME" + echo "Adjusted SUPPORT_SERVICE_NAME=$SUPPORT_SERVICE_NAME" echo "Adjusted FRONTEND_SERVICE_NAME=$FRONTEND_SERVICE_NAME" else echo '==> Not a PR preview, using original service names' @@ -37,6 +40,7 @@ SEARCH_DOMAIN=$(awk '/^search/ {print $2; exit}' /etc/resolv.conf) if [ -n "$SEARCH_DOMAIN" ]; then export BACKEND_SERVICE_NAME="${BACKEND_SERVICE_NAME}.${SEARCH_DOMAIN}" export COLLECT_SERVICE_NAME="${COLLECT_SERVICE_NAME}.${SEARCH_DOMAIN}" + export SUPPORT_SERVICE_NAME="${SUPPORT_SERVICE_NAME}.${SEARCH_DOMAIN}" export FRONTEND_SERVICE_NAME="${FRONTEND_SERVICE_NAME}.${SEARCH_DOMAIN}" fi @@ -44,7 +48,7 @@ echo "DNS resolver: $RESOLVER" echo "Search domain: ${SEARCH_DOMAIN:-none}" echo '==> Substituting nginx config...' -envsubst '$PORT $BACKEND_SERVICE_NAME $COLLECT_SERVICE_NAME $FRONTEND_SERVICE_NAME $RESOLVER' < /etc/nginx/nginx.conf > /tmp/nginx.conf +envsubst '$PORT $BACKEND_SERVICE_NAME $COLLECT_SERVICE_NAME $SUPPORT_SERVICE_NAME $FRONTEND_SERVICE_NAME $RESOLVER' < /etc/nginx/nginx.conf > /tmp/nginx.conf echo '==> Generated upstream URLs:' grep -E 'set.*upstream' /tmp/nginx.conf || true diff --git a/proxy/nginx-dev.conf b/proxy/nginx-dev.conf new file mode 100644 index 00000000..4cbf1c1d --- /dev/null +++ b/proxy/nginx-dev.conf @@ -0,0 +1,92 @@ +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Sanitize token query param from logs + map $request_uri $sanitized_request_uri { + "~^(?[^?]*\?)(?.*)token=[^&]*(?.*)$" "$prefix${before}token=[REDACTED]$after"; + default $request_uri; + } + + log_format main '$remote_addr [$time_local] "$request_method $sanitized_request_uri $server_protocol" ' + '$status $body_bytes_sent "$http_referer"'; + + access_log /dev/stdout main; + error_log /dev/stderr warn; + + # SSL certificates (mkcert) + ssl_certificate /etc/nginx/certs/cert.pem; + ssl_certificate_key /etc/nginx/certs/key.pem; + + # Support subdomain proxy — *.support.my.localtest.me + server { + listen 443 ssl; + server_name ~^.+\.support\..*; + + location / { + rewrite ^(.*)$ /support-proxy$1 break; + + proxy_pass http://host.containers.internal:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for support sessions + proxy_connect_timeout 24h; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + } + + # Main app — my.localtest.me + server { + listen 443 ssl default_server; + server_name _; + + # Backend API + location /api/ { + proxy_pass http://host.containers.internal:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support (terminal, etc.) + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for WebSocket terminal sessions + proxy_connect_timeout 30s; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + + # Frontend (Vite dev server) + location / { + proxy_pass http://host.containers.internal:5173; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support (Vite HMR) + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + } +} diff --git a/proxy/nginx.conf b/proxy/nginx.conf index 0b900083..73cc7fc4 100644 --- a/proxy/nginx.conf +++ b/proxy/nginx.conf @@ -6,8 +6,14 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; + # Sanitize token query param from logs + map $request_uri $sanitized_request_uri { + "~^(?[^?]*\?)(?.*)token=[^&]*(?.*)$" "$prefix${before}token=[REDACTED]$after"; + default $request_uri; + } + # Logging - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + log_format main '$remote_addr - $remote_user [$time_local] "$request_method $sanitized_request_uri $server_protocol" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; @@ -30,6 +36,34 @@ http { resolver ${RESOLVER} valid=30s; resolver_timeout 5s; + # Support subdomain proxy — *.support.{domain} + server { + listen ${PORT}; + server_name ~^.+\.support\..*; + + location / { + set $backend_upstream http://${BACKEND_SERVICE_NAME}:10000; + rewrite ^(.*)$ /support-proxy$1 break; + + proxy_pass $backend_upstream; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for support sessions + proxy_connect_timeout 24h; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + } + server { listen ${PORT}; server_name _; @@ -77,10 +111,44 @@ http { proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header X-Forwarded-Host $host; - # Timeouts + # WebSocket support (terminal sessions) + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for WebSocket terminal sessions proxy_connect_timeout 30s; - proxy_send_timeout 30s; - proxy_read_timeout 30s; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + + # Support service routes — only the tunnel endpoint is exposed (requires Basic Auth). + # All other support service endpoints (terminal, proxy, health) are internal-only + # and accessed by the backend via private networking. + location = /support/api/tunnel { + set $support_upstream http://${SUPPORT_SERVICE_NAME}:10000; + rewrite ^/support/api/(.*)$ /api/$1 break; + proxy_pass $support_upstream; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for tunnel connections + proxy_connect_timeout 24h; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + + # Block all other support service endpoints + location /support/api/ { + return 403; } # Frontend routes - everything else diff --git a/proxy/nginx.conf.local b/proxy/nginx.conf.local index 39ec91f2..01548599 100644 --- a/proxy/nginx.conf.local +++ b/proxy/nginx.conf.local @@ -26,6 +26,33 @@ http { add_header X-XSS-Protection "1; mode=block" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; + # Support subdomain proxy — *.support.{domain} + server { + listen 8080; + server_name ~^.+\.support\..*; + + location / { + rewrite ^(.*)$ /support-proxy$1 break; + + proxy_pass http://backend-full:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for support sessions + proxy_connect_timeout 24h; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + } + server { listen 8080; server_name _; @@ -77,6 +104,31 @@ http { proxy_read_timeout 30s; } + # Support service routes - redirect without trailing slash + location = /support/api { + return 301 /support/api/; + } + + location /support/api/ { + rewrite ^/support/api/(.*)$ /api/$1 break; + proxy_pass http://support-full:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Long timeouts for tunnel connections + proxy_connect_timeout 24h; + proxy_send_timeout 24h; + proxy_read_timeout 24h; + } + # Frontend routes - everything else location / { proxy_pass http://frontend-full:8080; diff --git a/proxy/port-forward.py b/proxy/port-forward.py new file mode 100644 index 00000000..f6c1eb18 --- /dev/null +++ b/proxy/port-forward.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +"""Forward TCP traffic from a listen port to a local target port.""" + +import socket +import sys +import threading + + +def forward(src, dst): + try: + while True: + data = src.recv(65536) + if not data: + break + dst.sendall(data) + except Exception: + pass + finally: + src.close() + dst.close() + + +def handle(client, target_port): + try: + upstream = socket.create_connection(("127.0.0.1", target_port)) + except Exception: + client.close() + return + threading.Thread(target=forward, args=(client, upstream), daemon=True).start() + forward(upstream, client) + + +def main(): + target_port = int(sys.argv[1]) if len(sys.argv) > 1 else 8443 + listen_port = int(sys.argv[2]) if len(sys.argv) > 2 else 443 + srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + srv.bind(("0.0.0.0", listen_port)) + srv.listen(128) + print(f"Forwarding port {listen_port} -> {target_port}") + while True: + client, _ = srv.accept() + threading.Thread(target=handle, args=(client, target_port), daemon=True).start() + + +if __name__ == "__main__": + main() diff --git a/release.sh b/release.sh index 99bd11c8..3b01da63 100755 --- a/release.sh +++ b/release.sh @@ -57,8 +57,8 @@ check_formatting() { info "Checking code formatting for $component..." - cd "$component" - if [[ "$component" =~ ^(backend|sync|collect)$ ]]; then + pushd "$component" > /dev/null + if [[ "$component" =~ ^(backend|sync|collect|services/support)$ ]]; then # Go formatting check local unformatted=$(gofmt -s -l . | wc -l) if [ "$unformatted" -gt 0 ]; then @@ -72,7 +72,7 @@ check_formatting() { elif [[ "$component" =~ ^(proxy)$ ]]; then success "Skipping code formatting for $component (no formatting configured)" fi - cd .. + popd > /dev/null success "Code formatting OK for $component" } @@ -82,8 +82,8 @@ run_linting() { info "Running linting for $component..." - cd "$component" - if [ "$component" = "backend" ] || [ "$component" = "collect" ]; then + pushd "$component" > /dev/null + if [ "$component" = "backend" ] || [ "$component" = "collect" ] || [ "$component" = "services/support" ]; then # Check if golangci-lint is available if command -v golangci-lint >/dev/null 2>&1; then if ! golangci-lint run; then @@ -108,7 +108,7 @@ run_linting() { error "Linting failed for $component" fi fi - cd .. + popd > /dev/null success "Linting passed for $component" } @@ -118,8 +118,8 @@ run_tests() { info "Running tests for $component..." - cd "$component" - if [ "$component" = "backend" ] || [ "$component" = "collect" ]; then + pushd "$component" > /dev/null + if [ "$component" = "backend" ] || [ "$component" = "collect" ] || [ "$component" = "services/support" ]; then if ! go test ./...; then error "Tests failed for $component" fi @@ -135,7 +135,7 @@ run_tests() { error "Tests failed for $component" fi fi - cd .. + popd > /dev/null success "Tests passed for $component" } @@ -194,7 +194,8 @@ update_version_file() { .components.collect = $version | .components.frontend = $version | .components.proxy = $version | - .components."services/mimir" = $version + .components."services/mimir" = $version | + .components."services/support" = $version ' version.json > version.json.tmp && mv version.json.tmp version.json } @@ -235,6 +236,14 @@ update_component_versions() { else warning "services/mimir/VERSION not found" fi + + # Update services/support VERSION file + if [ -f "services/support/pkg/version/VERSION" ]; then + echo "$new_version" > "services/support/pkg/version/VERSION" + success "Updated services/support/pkg/version/VERSION" + else + warning "services/support/pkg/version/VERSION not found" + fi } # Update frontend package.json version @@ -340,16 +349,19 @@ main() { check_formatting "backend" check_formatting "sync" check_formatting "collect" + check_formatting "services/support" check_formatting "frontend" check_formatting "proxy" run_linting "backend" run_linting "sync" run_linting "collect" + run_linting "services/support" run_linting "frontend" run_linting "proxy" run_tests "backend" run_tests "sync" run_tests "collect" + run_tests "services/support" run_tests "frontend" run_tests "proxy" success "All quality checks passed!" @@ -394,6 +406,7 @@ main() { collect/pkg/version/VERSION sync/pkg/version/VERSION services/mimir/VERSION + services/support/pkg/version/VERSION frontend/package.json frontend/package-lock.json backend/openapi.yaml diff --git a/render.yaml b/render.yaml index 2d9dae2b..8f472ff6 100644 --- a/render.yaml +++ b/render.yaml @@ -41,7 +41,7 @@ services: runtime: image plan: starter image: - url: ghcr.io/nethserver/my/mimir:v0.3.0 + url: ghcr.io/nethserver/my/mimir:v0.4.0 envVars: - key: PORT value: 9009 @@ -65,7 +65,7 @@ services: runtime: image plan: free image: - url: ghcr.io/nethserver/my/backend:v0.3.0 + url: ghcr.io/nethserver/my/backend:v0.4.0 envVars: # Server Configuration - key: GIN_MODE @@ -106,6 +106,10 @@ services: - key: BACKEND_APP_SECRET sync: false + # Support service URL + - key: SUPPORT_SERVICE_URL + value: http://my-support-prod:10000 + # SMTP Configuration - key: SMTP_HOST sync: false @@ -128,7 +132,7 @@ services: runtime: image plan: free image: - url: ghcr.io/nethserver/my/collect:v0.3.0 + url: ghcr.io/nethserver/my/collect:v0.4.0 envVars: # Server Configuration - key: GIN_MODE @@ -159,13 +163,46 @@ services: - key: MIMIR_URL value: http://my-mimir-prod:9009 + # Production Support Service (Private Service) + - type: web + name: my-support-prod + runtime: image + plan: free + image: + url: ghcr.io/nethserver/my/support:v0.4.0 + envVars: + # Server Configuration + - key: GIN_MODE + value: release + - key: LISTEN_ADDRESS + value: 0.0.0.0:10000 + - key: LOG_LEVEL + value: info + - key: LOG_FORMAT + value: json + + # Redis Configuration + - key: REDIS_URL + fromService: + type: keyvalue + name: my-redis-prod + property: connectionString + - key: REDIS_DB + value: 2 + + # PostgreSQL Configuration + - key: DATABASE_URL + fromDatabase: + name: my-postgres-prod + property: connectionString + # Production Frontend (Private Service) - type: web name: my-frontend-prod runtime: image plan: free image: - url: ghcr.io/nethserver/my/frontend:v0.3.0 + url: ghcr.io/nethserver/my/frontend:v0.4.0 envVars: - key: VITE_PRODUCT_NAME value: "My Nethesis" @@ -186,7 +223,7 @@ services: runtime: image plan: free image: - url: ghcr.io/nethserver/my/proxy:v0.3.0 + url: ghcr.io/nethserver/my/proxy:v0.4.0 domains: - my.nethesis.it envVars: @@ -195,6 +232,8 @@ services: value: my-backend-prod - key: COLLECT_SERVICE_NAME value: my-collect-prod + - key: SUPPORT_SERVICE_NAME + value: my-support-prod - key: FRONTEND_SERVICE_NAME value: my-frontend-prod @@ -284,6 +323,13 @@ services: - key: BACKEND_APP_SECRET sync: false + # Support service URL + - key: SUPPORT_SERVICE_URL + fromService: + name: my-support-qa + type: pserv + envVarKey: RENDER_SERVICE_URL + # SMTP Configuration - key: SMTP_HOST sync: false @@ -345,6 +391,43 @@ services: branch: main pullRequestPreviewsEnabled: true # PR previews enabled + # QA Support Service (Private Service) + - type: pserv + name: my-support-qa + runtime: docker + plan: starter + rootDir: services/support + dockerfilePath: Containerfile + envVars: + # Server Configuration + - key: GIN_MODE + value: debug + - key: LISTEN_ADDRESS + value: 0.0.0.0:10000 + - key: LOG_LEVEL + value: debug + - key: LOG_FORMAT + value: json + + # Redis Configuration + - key: REDIS_URL + fromService: + type: keyvalue + name: my-redis-qa + property: connectionString + - key: REDIS_DB + value: 2 + + # PostgreSQL Configuration + - key: DATABASE_URL + fromDatabase: + name: my-postgres-qa + property: connectionString + + autoDeploy: true + branch: main + pullRequestPreviewsEnabled: true + # QA Frontend (Private Service) - type: pserv name: my-frontend-qa @@ -392,6 +475,11 @@ services: name: my-collect-qa type: pserv property: host + - key: SUPPORT_SERVICE_NAME + fromService: + name: my-support-qa + type: pserv + property: host - key: FRONTEND_SERVICE_NAME fromService: name: my-frontend-qa diff --git a/sync/configs/config.yml.example b/sync/configs/config.yml.example index df87939c..8370392d 100644 --- a/sync/configs/config.yml.example +++ b/sync/configs/config.yml.example @@ -25,6 +25,7 @@ user_roles: priority: 0 permissions: - id: "impersonate:users" + - id: "connect:systems" - id: "destroy:systems" - id: "read:systems" - id: "manage:systems" @@ -55,7 +56,7 @@ user_roles: resources: - name: "systems" - actions: ["read", "manage", "destroy"] + actions: ["read", "manage", "destroy", "connect"] - name: "users" actions: ["read", "manage", "impersonate"] diff --git a/version.json b/version.json index b7baa9fa..66a610ed 100644 --- a/version.json +++ b/version.json @@ -7,6 +7,7 @@ "collect": "0.4.0", "frontend": "0.4.0", "proxy": "0.4.0", - "services/mimir": "0.4.0" + "services/mimir": "0.4.0", + "services/support": "0.4.0" } } From 651712cbb63de76668a4a83f5214177db10b2ad8 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 09:05:47 +0100 Subject: [PATCH 05/28] fix(ci): use separate component/path in go-tests matrix to avoid slash in artifact name --- .github/workflows/ci-main.yml | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-main.yml b/.github/workflows/ci-main.yml index e82c3f3a..1d094b3e 100644 --- a/.github/workflows/ci-main.yml +++ b/.github/workflows/ci-main.yml @@ -17,11 +17,19 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - component: [backend, sync, collect, services/support] + include: + - component: backend + path: backend + - component: sync + path: sync + - component: collect + path: collect + - component: support + path: services/support defaults: run: - working-directory: ${{ matrix.component }} + working-directory: ${{ matrix.path }} steps: - uses: actions/checkout@v4 @@ -38,7 +46,7 @@ jobs: path: | ~/.cache/go-build ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ matrix.component }}-${{ hashFiles(format('{0}/go.sum', matrix.component)) }} + key: ${{ runner.os }}-go-${{ matrix.component }}-${{ hashFiles(format('{0}/go.sum', matrix.path)) }} restore-keys: | ${{ runner.os }}-go-${{ matrix.component }}- @@ -56,7 +64,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: ${{ matrix.component }}-coverage - path: ${{ matrix.component }}/coverage.out + path: ${{ matrix.path }}/coverage.out retention-days: 30 - name: Run go vet @@ -74,7 +82,7 @@ jobs: uses: golangci/golangci-lint-action@v6 with: version: latest - working-directory: ${{ matrix.component }} + working-directory: ${{ matrix.path }} args: --timeout=10m # =========================================================================== From e71633a3427c1b784e9b51b0eabeb8ba05caea6c Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 09:14:01 +0100 Subject: [PATCH 06/28] fix(proxy): add default_server to main server block to prevent support subdomain catch-all --- proxy/nginx.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy/nginx.conf b/proxy/nginx.conf index 73cc7fc4..37550a18 100644 --- a/proxy/nginx.conf +++ b/proxy/nginx.conf @@ -65,7 +65,7 @@ http { } server { - listen ${PORT}; + listen ${PORT} default_server; server_name _; # Health check endpoint for this proxy From 230d79cde71d12a281aeb0aab8282abdbef6981b Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 09:36:12 +0100 Subject: [PATCH 07/28] fix(proxy): skip PR suffix and search domain for FQDN service names Allows manually created services (not from Blueprint) to be reached from PR preview environments by setting their env var to a full FQDN. --- proxy/entrypoint.sh | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/proxy/entrypoint.sh b/proxy/entrypoint.sh index 51e1dd6d..ad5fd316 100644 --- a/proxy/entrypoint.sh +++ b/proxy/entrypoint.sh @@ -17,11 +17,18 @@ if [ "$IS_PULL_REQUEST" = "true" ]; then PR_SUFFIX=$(echo "$RENDER_SERVICE_NAME" | sed 's/^my-proxy-qa//') echo "Extracted PR suffix: $PR_SUFFIX" - # Apply PR suffix to all service names - export BACKEND_SERVICE_NAME="${BACKEND_SERVICE_NAME}${PR_SUFFIX}" - export COLLECT_SERVICE_NAME="${COLLECT_SERVICE_NAME}${PR_SUFFIX}" - export SUPPORT_SERVICE_NAME="${SUPPORT_SERVICE_NAME}${PR_SUFFIX}" - export FRONTEND_SERVICE_NAME="${FRONTEND_SERVICE_NAME}${PR_SUFFIX}" + # Apply PR suffix to all Blueprint-managed service names. + # Services created manually (not from the Blueprint) do not get PR preview + # instances, so their names must not be suffixed. To skip a service, set + # its env var to an FQDN (containing a dot) — the suffix is only applied + # to short names. + for VAR in BACKEND_SERVICE_NAME COLLECT_SERVICE_NAME SUPPORT_SERVICE_NAME FRONTEND_SERVICE_NAME; do + eval "VAL=\$$VAR" + case "$VAL" in + *.*) echo "Skipping PR suffix for $VAR (FQDN: $VAL)" ;; + *) eval "export $VAR=\"\${VAL}${PR_SUFFIX}\"" ;; + esac + done echo "Adjusted BACKEND_SERVICE_NAME=$BACKEND_SERVICE_NAME" echo "Adjusted COLLECT_SERVICE_NAME=$COLLECT_SERVICE_NAME" @@ -38,10 +45,13 @@ export RESOLVER="${RESOLVER:-8.8.8.8}" SEARCH_DOMAIN=$(awk '/^search/ {print $2; exit}' /etc/resolv.conf) if [ -n "$SEARCH_DOMAIN" ]; then - export BACKEND_SERVICE_NAME="${BACKEND_SERVICE_NAME}.${SEARCH_DOMAIN}" - export COLLECT_SERVICE_NAME="${COLLECT_SERVICE_NAME}.${SEARCH_DOMAIN}" - export SUPPORT_SERVICE_NAME="${SUPPORT_SERVICE_NAME}.${SEARCH_DOMAIN}" - export FRONTEND_SERVICE_NAME="${FRONTEND_SERVICE_NAME}.${SEARCH_DOMAIN}" + for VAR in BACKEND_SERVICE_NAME COLLECT_SERVICE_NAME SUPPORT_SERVICE_NAME FRONTEND_SERVICE_NAME; do + eval "VAL=\$$VAR" + case "$VAL" in + *.*) echo "Skipping search domain for $VAR (already FQDN: $VAL)" ;; + *) eval "export $VAR=\"\${VAL}.${SEARCH_DOMAIN}\"" ;; + esac + done fi echo "DNS resolver: $RESOLVER" From a967a0fc3b54564328d04f9127cd87ab828beaf0 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 14:23:23 +0100 Subject: [PATCH 08/28] fix(support): security audit hardening across support service and backend Address 27 findings from security audit: prevent double-close panic with sync.Once, fix TOCTOU race in session creation with DB transaction, add gzip bomb protection, limit manifest size/rate, validate service names, use full session UUID in subdomain proxy, add org_role to proxy tokens, harden WebSocket origin checks, add session rate limiting, fix concurrent read/write safety, and multiple other hardening improvements. --- backend/jwt/jwt.go | 24 ++--- backend/methods/support_proxy.go | 66 +++++++++----- backend/openapi.yaml | 2 +- services/support/cmd/tunnel-client/main.go | 70 ++++++++++----- services/support/main.go | 2 +- services/support/methods/proxy.go | 7 +- services/support/methods/terminal.go | 4 +- services/support/methods/tunnel.go | 12 ++- services/support/middleware/auth.go | 3 + services/support/middleware/ratelimit.go | 24 +++++ services/support/session/manager.go | 100 ++++++++++++--------- services/support/tunnel/manager.go | 52 +++++++---- services/support/tunnel/protocol.go | 2 +- services/support/tunnel/stream.go | 17 +++- 14 files changed, 257 insertions(+), 128 deletions(-) diff --git a/backend/jwt/jwt.go b/backend/jwt/jwt.go index ae30ab86..9f2004c9 100644 --- a/backend/jwt/jwt.go +++ b/backend/jwt/jwt.go @@ -53,22 +53,26 @@ type ImpersonationClaims struct { // ProxyTokenClaims represents the claims for support proxy tokens type ProxyTokenClaims struct { - TokenType string `json:"token_type"` - SessionID string `json:"session_id"` - ServiceName string `json:"service_name"` - UserID string `json:"user_id"` + TokenType string `json:"token_type"` + SessionID string `json:"session_id"` + ServiceName string `json:"service_name"` + UserID string `json:"user_id"` + OrgRole string `json:"org_role"` + OrganizationID string `json:"organization_id"` jwt.RegisteredClaims } // GenerateProxyToken creates a short-lived JWT for subdomain-based support proxy access -func GenerateProxyToken(sessionID, serviceName, userID string) (string, error) { +func GenerateProxyToken(sessionID, serviceName, userID, orgRole, organizationID string) (string, error) { expDuration := 8 * time.Hour claims := ProxyTokenClaims{ - TokenType: "proxy", - SessionID: sessionID, - ServiceName: serviceName, - UserID: userID, + TokenType: "proxy", + SessionID: sessionID, + ServiceName: serviceName, + UserID: userID, + OrgRole: orgRole, + OrganizationID: organizationID, RegisteredClaims: jwt.RegisteredClaims{ Issuer: configuration.Config.JWTIssuer, Subject: userID, @@ -131,7 +135,7 @@ func ValidateProxyToken(tokenString string) (*ProxyTokenClaims, error) { Msg("token is not a proxy token") return nil, fmt.Errorf("token is not a proxy token") } - if claims.SessionID == "" || claims.ServiceName == "" { + if claims.SessionID == "" || claims.ServiceName == "" || claims.OrgRole == "" { logger.ComponentLogger("jwt").Warn(). Str("operation", "proxy_token_validation_failed"). Str("error_type", "missing_claims"). diff --git a/backend/methods/support_proxy.go b/backend/methods/support_proxy.go index dcc3c41d..82dd3d00 100644 --- a/backend/methods/support_proxy.go +++ b/backend/methods/support_proxy.go @@ -15,6 +15,7 @@ import ( "net/http" "net/http/httputil" "net/url" + "regexp" "strings" "sync" @@ -30,6 +31,9 @@ import ( "github.com/nethesis/my/backend/response" ) +// validServiceName validates service names against path traversal and injection attacks +var validServiceName = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) + // internalTransport is a shared HTTP transport for internal service communication var internalTransport = &http.Transport{ TLSClientConfig: &tls.Config{ @@ -37,6 +41,14 @@ var internalTransport = &http.Transport{ }, } +// internalTransportNoCompression is a shared HTTP transport that preserves upstream encoding +var internalTransportNoCompression = &http.Transport{ + DisableCompression: true, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // Internal service communication + }, +} + // internalClient is a shared HTTP client for internal service communication var internalClient = &http.Client{Transport: internalTransport} @@ -395,6 +407,11 @@ func ProxySupportSession(c *gin.Context) { return } + if !validServiceName.MatchString(serviceName) { + c.JSON(http.StatusBadRequest, response.BadRequest("invalid service name", nil)) + return + } + if session := getActiveSession(c, sessionID); session == nil { return } @@ -451,20 +468,24 @@ func GenerateSupportProxyToken(c *gin.Context) { return } - userID, _, _, _ := helpers.GetUserContextExtended(c) - token, err := customjwt.GenerateProxyToken(sessionID, req.Service, userID) + userID, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + if !validServiceName.MatchString(req.Service) { + c.JSON(http.StatusBadRequest, response.BadRequest("invalid service name", nil)) + return + } + + token, err := customjwt.GenerateProxyToken(sessionID, req.Service, userID, userOrgRole, userOrgID) if err != nil { logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to generate proxy token") c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to generate proxy token", nil)) return } - // Build subdomain URL: {service}--{session_id[:12]}.support.{domain} - shortID := sessionID - if len(shortID) > 12 { - shortID = shortID[:12] - } - subdomain := fmt.Sprintf("%s--%s.support.%s", req.Service, shortID, configuration.Config.SupportProxyDomain) + // Build subdomain URL: {service}--{session_slug}.support.{domain} + // Use full UUID without dashes for exact matching (32 hex chars) + sessionSlug := strings.ReplaceAll(sessionID, "-", "") + subdomain := fmt.Sprintf("%s--%s.support.%s", req.Service, sessionSlug, configuration.Config.SupportProxyDomain) proxyURL := fmt.Sprintf("https://%s/", subdomain) logAccess(c, sessionID, "ui_proxy", req.Service) @@ -493,19 +514,24 @@ func SubdomainProxy(c *gin.Context) { hostOnly = h } - var serviceName, sessionShort string + var serviceName, sessionSlug string if parts := strings.SplitN(hostOnly, ".support.", 2); len(parts) == 2 { if subParts := strings.SplitN(parts[0], "--", 2); len(subParts) == 2 { serviceName = subParts[0] - sessionShort = subParts[1] + sessionSlug = subParts[1] } } - if serviceName == "" || sessionShort == "" { + if serviceName == "" || sessionSlug == "" { c.JSON(http.StatusBadRequest, response.BadRequest("invalid support proxy subdomain", nil)) return } + if !validServiceName.MatchString(serviceName) { + c.JSON(http.StatusBadRequest, response.BadRequest("invalid service name in subdomain", nil)) + return + } + // Prefer query param token (fresh from the UI) over cookie (may be stale // from a previous session — the cookie domain covers all support subdomains). tokenString := c.Query("token") @@ -532,8 +558,9 @@ func SubdomainProxy(c *gin.Context) { return } - // Validate that the token's session ID matches the subdomain short ID - if !strings.HasPrefix(claims.SessionID, sessionShort) { + // Validate that the token's session ID (without dashes) matches the subdomain slug exactly + tokenSessionSlug := strings.ReplaceAll(claims.SessionID, "-", "") + if tokenSessionSlug != sessionSlug { c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "proxy token does not match this session", nil)) return } @@ -543,6 +570,7 @@ func SubdomainProxy(c *gin.Context) { // If token came from query param, set cookie and redirect to same path without token if fromQueryParam { secureCookie := !strings.HasPrefix(configuration.Config.AppURL, "http://") + c.SetSameSite(http.SameSiteStrictMode) c.SetCookie("support_proxy", tokenString, 8*60*60, "/", hostOnly, secureCookie, true) redirectPath := path @@ -551,13 +579,14 @@ func SubdomainProxy(c *gin.Context) { if encoded := q.Encode(); encoded != "" { redirectPath = redirectPath + "?" + encoded } + c.Header("Referrer-Policy", "no-referrer") c.Redirect(http.StatusFound, redirectPath) return } - // Verify session is still active + // Verify session is still active using the token's org context repo := entities.NewSupportRepository() - session, err := repo.GetSessionByID(sessionID, "owner", "") + session, err := repo.GetSessionByID(sessionID, claims.OrgRole, claims.OrganizationID) if err != nil || session == nil { c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) return @@ -609,12 +638,7 @@ func SubdomainProxy(c *gin.Context) { resp.Header.Del("Access-Control-Allow-Methods") return nil }, - Transport: &sessionTokenTransport{inner: &http.Transport{ - DisableCompression: true, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, //nolint:gosec // Internal service communication - }, - }, sessionToken: sessionToken}, + Transport: &sessionTokenTransport{inner: internalTransportNoCompression, sessionToken: sessionToken}, } proxy.ServeHTTP(c.Writer, c.Request) diff --git a/backend/openapi.yaml b/backend/openapi.yaml index a2cdc8f2..cd88e0e9 100644 --- a/backend/openapi.yaml +++ b/backend/openapi.yaml @@ -9089,7 +9089,7 @@ paths: description: | Generates a short-lived signed JWT (8h) that grants access to a specific service on a remote system through the subdomain-based support proxy. The returned URL uses the format - `https://{service}--{session_short}.support.{domain}/` where the token is passed as a query + `https://{service}--{session_slug}.support.{domain}/` (where session_slug is the full UUID without dashes) where the token is passed as a query parameter on the first request, then stored as an HttpOnly cookie. security: - BearerAuth: [] diff --git a/services/support/cmd/tunnel-client/main.go b/services/support/cmd/tunnel-client/main.go index 99793bc2..65b068da 100644 --- a/services/support/cmd/tunnel-client/main.go +++ b/services/support/cmd/tunnel-client/main.go @@ -38,6 +38,7 @@ import ( "regexp" "sort" "strings" + "sync" "syscall" "time" @@ -352,8 +353,11 @@ func connect(ctx context.Context, cfg *clientConfig) error { } // If the underlying WebSocket received a close frame, return that error // so the reconnect loop can inspect the close code - if netConn.closeErr != nil { - return netConn.closeErr + netConn.mu.Lock() + closeErr := netConn.closeErr + netConn.mu.Unlock() + if closeErr != nil { + return closeErr } return fmt.Errorf("stream accept error: %w", err) } @@ -735,15 +739,15 @@ func discoverNodeRoutes(ctx context.Context, rdb *redis.Client, nodeID string) m return services } +// moduleIDRegex matches NS8 module IDs (compiled once at package level) +var moduleIDRegex = regexp.MustCompile(`^(.+\d+)(?:[-_]|$)`) + // extractModuleID extracts the module ID from a Traefik config filename. // NS8 module IDs end with an instance number (e.g., "nethvoice103", "n8n2", // "nethsecurity-controller4"). Route suffixes are separated by hyphen or // underscore after the digits (e.g., "nethvoice103-ui", "metrics1_grafana"). func extractModuleID(name string) string { - // Match everything up to and including trailing digits, followed by - // a separator (- or _) or end of string - re := regexp.MustCompile(`^(.+\d+)(?:[-_]|$)`) - m := re.FindStringSubmatch(name) + m := moduleIDRegex.FindStringSubmatch(name) if len(m) > 1 { return m[1] } @@ -879,21 +883,28 @@ func handleStream(stream net.Conn, services map[string]ServiceInfo) { log.Printf("CONNECT %s -> %s", serviceName, svc.Target) - // Bidirectional copy - done := make(chan struct{}, 2) + // Bidirectional copy with proper cleanup to prevent goroutine leaks + var once sync.Once + done := make(chan struct{}) + closeBoth := func() { + once.Do(func() { + close(done) + _ = targetConn.Close() + _ = stream.Close() + }) + } go func() { + defer closeBoth() _, _ = io.Copy(targetConn, stream) - done <- struct{}{} }() go func() { + defer closeBoth() _, _ = io.Copy(stream, targetConn) - done <- struct{}{} }() <-done - _ = targetConn.Close() } // readConnectHeader reads "CONNECT \n" from the stream byte-by-byte @@ -949,6 +960,7 @@ func readLine(r io.Reader) (string, error) { type wsNetConn struct { conn *websocket.Conn reader io.Reader + mu sync.Mutex closeErr error // stores the WebSocket close error if received } @@ -957,7 +969,9 @@ func (w *wsNetConn) Read(b []byte) (int, error) { if w.reader == nil { _, reader, err := w.conn.NextReader() if err != nil { + w.mu.Lock() w.closeErr = err + w.mu.Unlock() return 0, err } w.reader = reader @@ -982,12 +996,17 @@ func (w *wsNetConn) Write(b []byte) (int, error) { return len(b), nil } -func (w *wsNetConn) Close() error { return w.conn.Close() } -func (w *wsNetConn) LocalAddr() net.Addr { return w.conn.LocalAddr() } -func (w *wsNetConn) RemoteAddr() net.Addr { return w.conn.RemoteAddr() } -func (w *wsNetConn) SetDeadline(_ time.Time) error { return nil } -func (w *wsNetConn) SetReadDeadline(_ time.Time) error { return nil } -func (w *wsNetConn) SetWriteDeadline(_ time.Time) error { return nil } +func (w *wsNetConn) Close() error { return w.conn.Close() } +func (w *wsNetConn) LocalAddr() net.Addr { return w.conn.LocalAddr() } +func (w *wsNetConn) RemoteAddr() net.Addr { return w.conn.RemoteAddr() } +func (w *wsNetConn) SetDeadline(t time.Time) error { + if err := w.conn.SetReadDeadline(t); err != nil { + return err + } + return w.conn.SetWriteDeadline(t) +} +func (w *wsNetConn) SetReadDeadline(t time.Time) error { return w.conn.SetReadDeadline(t) } +func (w *wsNetConn) SetWriteDeadline(t time.Time) error { return w.conn.SetWriteDeadline(t) } func envWithDefault(key, defaultValue string) string { if v := os.Getenv(key); v != "" { @@ -1024,17 +1043,24 @@ func handleTerminal(stream net.Conn) { log.Printf("Failed to start PTY: %v", err) return } + var once sync.Once + done := make(chan struct{}) + closeAll := func() { + once.Do(func() { + close(done) + _ = ptmx.Close() + _ = stream.Close() + }) + } defer func() { - _ = ptmx.Close() + closeAll() _ = cmd.Process.Kill() _, _ = cmd.Process.Wait() }() - done := make(chan struct{}, 2) - // PTY → stream: read from PTY, send as type-0 length-prefixed frames go func() { - defer func() { done <- struct{}{} }() + defer closeAll() buf := make([]byte, 4096) for { n, readErr := ptmx.Read(buf) @@ -1054,7 +1080,7 @@ func handleTerminal(stream net.Conn) { // Stream → PTY: read length-prefixed frames, dispatch by type go func() { - defer func() { done <- struct{}{} }() + defer closeAll() for { frame, readErr := readFrame(stream) if readErr != nil { diff --git a/services/support/main.go b/services/support/main.go index e0f72e85..c70fd300 100644 --- a/services/support/main.go +++ b/services/support/main.go @@ -135,7 +135,7 @@ func main() { // Internal endpoints: require per-session token from backend (#3/#4) internal := api.Group("/") - internal.Use(middleware.SessionTokenMiddleware()) + internal.Use(middleware.SessionTokenMiddleware(), middleware.SessionRateLimitMiddleware()) internal.GET("/terminal/:session_id", methods.HandleTerminal) internal.GET("/proxy/:session_id/services", methods.ListServices) diff --git a/services/support/methods/proxy.go b/services/support/methods/proxy.go index dcd63a19..f9249168 100644 --- a/services/support/methods/proxy.go +++ b/services/support/methods/proxy.go @@ -200,8 +200,8 @@ func isRewritableResponse(resp *http.Response) bool { strings.Contains(ct, "text/css") } -// maxRewriteBodySize is the maximum response body size for hostname rewriting (50 MB). -const maxRewriteBodySize = 50 * 1024 * 1024 +// maxRewriteBodySize is the maximum response body size for hostname rewriting (5 MB). +const maxRewriteBodySize = 5 * 1024 * 1024 // buildHostRewriteMap creates a map of original hostname -> proxy hostname for all // services in the tunnel. This enables multi-hostname rewriting: when proxying @@ -264,7 +264,8 @@ func rewriteResponseBodyMulti(resp *http.Response, rewrites map[string]string) e if err != nil { return err } - body, err = io.ReadAll(gr) + // Limit decompressed size to prevent gzip bombs + body, err = io.ReadAll(io.LimitReader(gr, maxRewriteBodySize+1)) _ = gr.Close() if err != nil { return err diff --git a/services/support/methods/terminal.go b/services/support/methods/terminal.go index 2749ac42..cdaf65f9 100644 --- a/services/support/methods/terminal.go +++ b/services/support/methods/terminal.go @@ -30,7 +30,9 @@ import ( // Unlike the tunnel upgrader, this rejects cross-origin requests since terminal // sessions are initiated by browsers on the MY domain. var terminalUpgrader = websocket.Upgrader{ - CheckOrigin: func(r *http.Request) bool { return true }, // Internal endpoint, backend proxies the request + CheckOrigin: func(r *http.Request) bool { + return r.Header.Get("Origin") == "" // Internal endpoint: backend strips Origin before proxying + }, ReadBufferSize: 4096, WriteBufferSize: 4096, } diff --git a/services/support/methods/tunnel.go b/services/support/methods/tunnel.go index fbb6fa74..b083ba65 100644 --- a/services/support/methods/tunnel.go +++ b/services/support/methods/tunnel.go @@ -145,6 +145,7 @@ func HandleTunnel(c *gin.Context) { // and reads the service manifest. It continues to listen for manifest updates. func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { log := logger.ComponentLogger("tunnel") + var lastManifest time.Time for { stream, err := t.Session.Accept() @@ -152,9 +153,15 @@ func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { return // session closed } - // Decode manifest from the control stream + // Rate limit manifest updates (max 1 per 10 seconds, first always accepted) + if !lastManifest.IsZero() && time.Since(lastManifest) < 10*time.Second { + _ = stream.Close() + continue + } + + // Decode manifest with size limit to prevent memory exhaustion var manifest tunnel.ServiceManifest - decoder := json.NewDecoder(stream) + decoder := json.NewDecoder(io.LimitReader(stream, 1<<20)) // 1 MB max if err := decoder.Decode(&manifest); err != nil { log.Warn().Err(err). Str("system_id", systemID). @@ -167,6 +174,7 @@ func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { if manifest.Services != nil { t.SetServices(manifest.Services) + lastManifest = time.Now() log.Info(). Str("system_id", systemID). Str("session_id", sessionID). diff --git a/services/support/middleware/auth.go b/services/support/middleware/auth.go index 113b907a..2a757b20 100644 --- a/services/support/middleware/auth.go +++ b/services/support/middleware/auth.go @@ -175,6 +175,9 @@ func InvalidateAuthCache(ctx context.Context, systemKey string) { for iter.Next(ctx) { _ = rdb.Del(ctx, iter.Val()).Err() } + if err := iter.Err(); err != nil { + logger.Warn().Err(err).Str("system_key", systemKey).Msg("redis scan error during cache invalidation") + } } // StartAuthCacheInvalidator listens for cache invalidation events via Redis pub/sub. diff --git a/services/support/middleware/ratelimit.go b/services/support/middleware/ratelimit.go index 9a6d1cf4..6a2e2d4e 100644 --- a/services/support/middleware/ratelimit.go +++ b/services/support/middleware/ratelimit.go @@ -80,6 +80,9 @@ var tunnelIPRateLimiter = newRateLimiter(10, 1*time.Minute) // tunnelKeyRateLimiter limits tunnel connection attempts per system_key (#14) var tunnelKeyRateLimiter = newRateLimiter(5, 1*time.Minute) +// sessionRateLimiter limits requests per session ID on internal endpoints +var sessionRateLimiter = newRateLimiter(100, 1*time.Minute) + // TunnelRateLimitMiddleware limits the rate of tunnel connection attempts // per client IP (10/min) and per system_key (5/min, checked after auth). func TunnelRateLimitMiddleware() gin.HandlerFunc { @@ -98,6 +101,27 @@ func TunnelRateLimitMiddleware() gin.HandlerFunc { } } +// SessionRateLimitMiddleware limits the rate of requests per session ID on internal endpoints. +func SessionRateLimitMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + sessionID := c.Param("session_id") + if sessionID == "" { + c.Next() + return + } + if !sessionRateLimiter.allow(sessionID) { + logger.Warn(). + Str("session_id", sessionID). + Str("client_ip", c.ClientIP()). + Msg("session rate limit exceeded") + c.JSON(http.StatusTooManyRequests, response.Error(http.StatusTooManyRequests, "too many requests for this session", nil)) + c.Abort() + return + } + c.Next() + } +} + // SystemKeyRateLimitMiddleware checks the per-system_key rate limit. // Runs after BasicAuthMiddleware so that system_key is available in the context. func SystemKeyRateLimitMiddleware() gin.HandlerFunc { diff --git a/services/support/session/manager.go b/services/support/session/manager.go index 16cb938b..ef2efaa1 100644 --- a/services/support/session/manager.go +++ b/services/support/session/manager.go @@ -34,43 +34,71 @@ func GenerateToken() (string, error) { // CreateSession creates a new support session for a system. // nodeID identifies the cluster node (empty for single-node systems). -// Enforces a maximum number of active sessions per system. +// Enforces a maximum number of active sessions per system atomically within a transaction. // Closes any existing active/pending sessions for the same system+node to prevent orphans. func CreateSession(systemID, nodeID string) (*models.SupportSession, error) { - // Close any existing active/pending sessions for this system+node combination. - // This prevents orphaned sessions when a client reconnects without a valid reconnect token. - var closeQuery string - var closeArgs []interface{} + log := logger.ComponentLogger("session") + + token, err := GenerateToken() + if err != nil { + return nil, err + } + + reconnectToken, err := GenerateToken() + if err != nil { + return nil, err + } + + now := time.Now() + expiresAt := now.Add(configuration.Config.SessionDefaultDuration) + maxSessions := configuration.Config.MaxSessionsPerSystem + + // Use NULL for empty node_id + var nodeIDParam interface{} + if nodeID != "" { + nodeIDParam = nodeID + } + + // Use a transaction to atomically close orphans, check limits, and insert + tx, err := database.DB.Begin() + if err != nil { + return nil, fmt.Errorf("failed to begin transaction: %w", err) + } + defer func() { _ = tx.Rollback() }() + + // Close any existing active/pending sessions for this system+node combination + var closeResult sql.Result if nodeID == "" { - closeQuery = `UPDATE support_sessions - SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() - WHERE system_id = $1 AND node_id IS NULL AND status IN ('pending', 'active')` - closeArgs = []interface{}{systemID} + closeResult, err = tx.Exec( + `UPDATE support_sessions + SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() + WHERE system_id = $1 AND node_id IS NULL AND status IN ('pending', 'active')`, + systemID, + ) } else { - closeQuery = `UPDATE support_sessions - SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() - WHERE system_id = $1 AND node_id = $2 AND status IN ('pending', 'active')` - closeArgs = []interface{}{systemID, nodeID} + closeResult, err = tx.Exec( + `UPDATE support_sessions + SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() + WHERE system_id = $1 AND node_id = $2 AND status IN ('pending', 'active')`, + systemID, nodeID, + ) } - result, err := database.DB.Exec(closeQuery, closeArgs...) if err != nil { - logger.ComponentLogger("session").Warn().Err(err). + log.Warn().Err(err). Str("system_id", systemID).Str("node_id", nodeID). Msg("failed to close existing sessions before creating new one") - } else if rows, _ := result.RowsAffected(); rows > 0 { - logger.ComponentLogger("session").Info(). + } else if rows, _ := closeResult.RowsAffected(); rows > 0 { + log.Info(). Str("system_id", systemID).Str("node_id", nodeID). Int64("closed_count", rows). Msg("closed orphaned sessions before creating new one") } - // Enforce per-system session limit - maxSessions := configuration.Config.MaxSessionsPerSystem + // Check session limit within the transaction (atomic with the close + insert) if maxSessions > 0 { var activeCount int - err := database.DB.QueryRow( - `SELECT COUNT(*) FROM support_sessions - WHERE system_id = $1 AND status IN ('pending', 'active')`, + err = tx.QueryRow( + `SELECT COUNT(*) FROM support_sessions WHERE system_id = $1 AND status IN ('pending', 'active')`, systemID, ).Scan(&activeCount) if err != nil { @@ -81,28 +109,9 @@ func CreateSession(systemID, nodeID string) (*models.SupportSession, error) { } } - token, err := GenerateToken() - if err != nil { - return nil, err - } - - now := time.Now() - expiresAt := now.Add(configuration.Config.SessionDefaultDuration) - - reconnectToken, err := GenerateToken() - if err != nil { - return nil, err - } - - // Use NULL for empty node_id - var nodeIDParam interface{} - if nodeID != "" { - nodeIDParam = nodeID - } - var session models.SupportSession var scannedNodeID sql.NullString - err = database.DB.QueryRow( + err = tx.QueryRow( `INSERT INTO support_sessions (system_id, node_id, session_token, reconnect_token, started_at, expires_at, status) VALUES ($1, $2, $3, $4, $5, $6, 'pending') RETURNING id, system_id, node_id, session_token, reconnect_token, started_at, expires_at, status, created_at, updated_at`, @@ -115,11 +124,16 @@ func CreateSession(systemID, nodeID string) (*models.SupportSession, error) { if err != nil { return nil, fmt.Errorf("failed to create session: %w", err) } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("failed to commit session creation: %w", err) + } + if scannedNodeID.Valid { session.NodeID = scannedNodeID.String } - logger.ComponentLogger("session").Info(). + log.Info(). Str("session_id", session.ID). Str("system_id", systemID). Str("node_id", nodeID). diff --git a/services/support/tunnel/manager.go b/services/support/tunnel/manager.go index 28478a73..e6e0d7c9 100644 --- a/services/support/tunnel/manager.go +++ b/services/support/tunnel/manager.go @@ -53,6 +53,7 @@ type Tunnel struct { WsConn WsCloser // underlying WebSocket for sending close frames ConnectedAt time.Time done chan struct{} + closeOnce sync.Once services map[string]ServiceInfo servicesMu sync.RWMutex activeStreams int64 @@ -246,20 +247,26 @@ func (m *Manager) List() []TunnelInfo { // frame so the tunnel-client knows not to reconnect. func (m *Manager) CloseBySessionID(sessionID string) bool { m.mu.Lock() - defer m.mu.Unlock() - + var found *Tunnel for key, t := range m.tunnels { if t.SessionID == sessionID { - t.GracefulClose() + found = t delete(m.tunnels, key) - logger.ComponentLogger("tunnel_manager").Info(). - Str("system_id", t.SystemID). - Str("session_id", sessionID). - Msg("tunnel gracefully closed by session ID") - return true + break } } - return false + m.mu.Unlock() + + if found == nil { + return false + } + + found.GracefulClose() + logger.ComponentLogger("tunnel_manager").Info(). + Str("system_id", found.SystemID). + Str("session_id", sessionID). + Msg("tunnel gracefully closed by session ID") + return true } // StartGracePeriod begins a grace period for a disconnected tunnel. @@ -333,18 +340,14 @@ func (m *Manager) CloseAll() { logger.ComponentLogger("tunnel_manager").Info().Msg("all tunnels closed") } -// Close closes the tunnel's yamux session +// Close closes the tunnel's yamux session (concurrency-safe via sync.Once) func (t *Tunnel) Close() { - select { - case <-t.done: - return // already closed - default: + t.closeOnce.Do(func() { close(t.done) - } - - if t.Session != nil { - _ = t.Session.Close() - } + if t.Session != nil { + _ = t.Session.Close() + } + }) } // GracefulClose sends a WebSocket close frame with CloseCodeSessionClosed @@ -364,14 +367,25 @@ func (t *Tunnel) Done() <-chan struct{} { return t.done } +// maxServicesPerManifest caps the number of services a tunnel client can advertise +const maxServicesPerManifest = 500 + // SetServices updates the services available through this tunnel. // Services with dangerous targets (cloud metadata, link-local) are rejected. +// The manifest is capped at maxServicesPerManifest entries. func (t *Tunnel) SetServices(services map[string]ServiceInfo) { t.servicesMu.Lock() defer t.servicesMu.Unlock() validated := make(map[string]ServiceInfo, len(services)) for name, svc := range services { + if len(validated) >= maxServicesPerManifest { + logger.ComponentLogger("tunnel_manager").Warn(). + Str("system_id", t.SystemID). + Int("max", maxServicesPerManifest). + Msg("service manifest truncated at max limit") + break + } if err := validateServiceTarget(svc.Target); err != nil { logger.ComponentLogger("tunnel_manager").Warn(). Str("system_id", t.SystemID). diff --git a/services/support/tunnel/protocol.go b/services/support/tunnel/protocol.go index a34d83a2..4cc9718c 100644 --- a/services/support/tunnel/protocol.go +++ b/services/support/tunnel/protocol.go @@ -92,7 +92,7 @@ func readLine(r io.Reader) (string, error) { } if err != nil { if err == io.EOF && len(buf) > 0 { - return string(buf), nil + return "", fmt.Errorf("unexpected EOF: incomplete line") } return "", err } diff --git a/services/support/tunnel/stream.go b/services/support/tunnel/stream.go index f6945849..547955c8 100644 --- a/services/support/tunnel/stream.go +++ b/services/support/tunnel/stream.go @@ -12,6 +12,7 @@ package tunnel import ( "io" "net" + "sync" "time" "github.com/gorilla/websocket" @@ -20,8 +21,10 @@ import ( // WebSocketConn wraps a gorilla/websocket.Conn to implement net.Conn // for use with yamux, which requires a net.Conn interface. type WebSocketConn struct { - conn *websocket.Conn - reader io.Reader + conn *websocket.Conn + reader io.Reader + readMu sync.Mutex + writeMu sync.Mutex } // NewWebSocketConn wraps a WebSocket connection as a net.Conn @@ -29,8 +32,11 @@ func NewWebSocketConn(conn *websocket.Conn) *WebSocketConn { return &WebSocketConn{conn: conn} } -// Read reads data from the WebSocket connection +// Read reads data from the WebSocket connection (concurrency-safe) func (wsc *WebSocketConn) Read(b []byte) (int, error) { + wsc.readMu.Lock() + defer wsc.readMu.Unlock() + for { if wsc.reader == nil { _, reader, err := wsc.conn.NextReader() @@ -52,8 +58,11 @@ func (wsc *WebSocketConn) Read(b []byte) (int, error) { } } -// Write writes data to the WebSocket connection +// Write writes data to the WebSocket connection (concurrency-safe) func (wsc *WebSocketConn) Write(b []byte) (int, error) { + wsc.writeMu.Lock() + defer wsc.writeMu.Unlock() + err := wsc.conn.WriteMessage(websocket.BinaryMessage, b) if err != nil { return 0, err From a310292be46e3f6d0fd786214e37a6744fa36d5d Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 15:33:18 +0100 Subject: [PATCH 09/28] fix(support): security audit hardening across support service and backend Address 23 findings from penetration testing report on the support service: - SSRF/DNS rebinding prevention with IP validation and DNS resolution checks - Open redirect fix via protocol-relative URL sanitization - CORS restriction from AllowAllOrigins to localhost-only in debug mode - HSTS, CSP, X-Content-Type-Options security headers in nginx proxy - InternalSecret middleware for defense-in-depth inter-service auth - PTY environment variable sanitization to prevent credential leakage - Cookie rewriting to prevent cross-session domain leakage - Global memory budget (50MB) for gzip decompression (bomb mitigation) - CONNECT protocol newline injection prevention with service name validation - Container hardening with nginx-unprivileged and non-root users - Input validation for node_id and service names - Nginx server_name regex anchoring for multi-environment support - Rate limiter single-instance design documentation - Non-functional default secrets in .env.example files --- backend/.env.example | 6 ++- backend/configuration/configuration.go | 5 +++ backend/main.go | 9 +++- backend/methods/support_proxy.go | 14 ++++++- proxy/Containerfile | 20 +++++---- proxy/nginx.conf | 10 +++-- proxy/nginx.conf.local | 4 +- services/support/.env.example | 3 +- services/support/Containerfile.tunnel-client | 2 + services/support/cmd/tunnel-client/main.go | 34 ++++++++++++++- .../support/configuration/configuration.go | 6 +++ services/support/main.go | 4 +- services/support/methods/proxy.go | 41 +++++++++++++++++- services/support/methods/tunnel.go | 13 ++++++ services/support/middleware/auth.go | 24 +++++++++++ services/support/middleware/ratelimit.go | 6 +++ services/support/session/manager.go | 7 +++- services/support/tunnel/manager.go | 42 +++++++++++++++---- services/support/tunnel/protocol.go | 9 ++++ 19 files changed, 228 insertions(+), 31 deletions(-) diff --git a/backend/.env.example b/backend/.env.example index 22203b22..0741ed45 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -17,7 +17,8 @@ BACKEND_APP_ID=your-management-api-app-id BACKEND_APP_SECRET=your-management-api-app-secret # Custom JWT for resilient offline operation -JWT_SECRET=your-super-secret-jwt-signing-key-min-32-chars +# IMPORTANT: Generate a strong secret with: openssl rand -hex 32 +JWT_SECRET=CHANGE_ME_GENERATE_WITH_openssl_rand_hex_32 # PostgreSQL connection string (shared 'noc' database) DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable @@ -50,7 +51,8 @@ REDIS_URL=redis://localhost:6379 # Shared secret for backend→support service internal communication # Must match the INTERNAL_SECRET in the support service .env -#SUPPORT_INTERNAL_SECRET=change-me-to-a-random-secret-min-32-chars +# IMPORTANT: Generate with: openssl rand -hex 32 +#SUPPORT_INTERNAL_SECRET=CHANGE_ME_GENERATE_WITH_openssl_rand_hex_32 # =========================================== # OPTIONAL CONFIGURATION diff --git a/backend/configuration/configuration.go b/backend/configuration/configuration.go index 2e2bad02..a069138b 100644 --- a/backend/configuration/configuration.go +++ b/backend/configuration/configuration.go @@ -63,6 +63,8 @@ type Configuration struct { SupportServiceURL string `json:"support_service_url"` // Support proxy domain for subdomain-based proxying (e.g. "my.nethesis.it") SupportProxyDomain string `json:"support_proxy_domain"` + // Shared secret for backend→support internal authentication (#4) + SupportInternalSecret string `json:"-"` // SMTP configuration for sending emails SMTPHost string `json:"smtp_host"` @@ -207,6 +209,9 @@ func Init() { // Support proxy domain (optional, enables subdomain-based proxy) Config.SupportProxyDomain = os.Getenv("SUPPORT_PROXY_DOMAIN") + // Shared secret for backend→support internal communication (#4) + Config.SupportInternalSecret = os.Getenv("SUPPORT_INTERNAL_SECRET") + // SMTP configuration Config.SMTPHost = os.Getenv("SMTP_HOST") Config.SMTPPort = parseIntWithDefault("SMTP_PORT", 587) diff --git a/backend/main.go b/backend/main.go index 9309070d..87a0a4e5 100644 --- a/backend/main.go +++ b/backend/main.go @@ -144,11 +144,16 @@ func main() { // Add compression (exclude WebSocket terminal endpoint and support proxy) router.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedPathsRegexs([]string{".*/terminal$", ".*/support-proxy/.*"}))) - // CORS configuration in debug mode + // CORS configuration in debug mode: restrict to local development origins (#1) if gin.Mode() == gin.DebugMode { corsConf := cors.DefaultConfig() corsConf.AllowHeaders = []string{"Authorization", "Content-Type", "Accept"} - corsConf.AllowAllOrigins = true + corsConf.AllowOriginFunc = func(origin string) bool { + return strings.HasPrefix(origin, "http://localhost") || + strings.HasPrefix(origin, "https://localhost") || + strings.HasPrefix(origin, "http://127.0.0.1") || + strings.HasPrefix(origin, "https://127.0.0.1") + } router.Use(cors.New(corsConf)) } diff --git a/backend/methods/support_proxy.go b/backend/methods/support_proxy.go index 82dd3d00..87fd91ef 100644 --- a/backend/methods/support_proxy.go +++ b/backend/methods/support_proxy.go @@ -64,6 +64,10 @@ func (t *sessionTokenTransport) RoundTrip(req *http.Request) (*http.Response, er if t.sessionToken != "" { req.Header.Set("X-Session-Token", t.sessionToken) } + // Send internal secret for defense-in-depth authentication (#4) + if configuration.Config.SupportInternalSecret != "" { + req.Header.Set("X-Internal-Secret", configuration.Config.SupportInternalSecret) + } // Remove browser headers that would trigger CORS on the support service req.Header.Del("Origin") req.Header.Del("Referer") @@ -280,6 +284,9 @@ func GetSupportSessionTerminal(c *gin.Context) { } upReq.Host = target.Host upReq.Header.Set("X-Session-Token", sessionToken) + if configuration.Config.SupportInternalSecret != "" { + upReq.Header.Set("X-Internal-Secret", configuration.Config.SupportInternalSecret) + } // Send the request to the support service if writeErr := upReq.Write(upstreamConn); writeErr != nil { @@ -573,7 +580,9 @@ func SubdomainProxy(c *gin.Context) { c.SetSameSite(http.SameSiteStrictMode) c.SetCookie("support_proxy", tokenString, 8*60*60, "/", hostOnly, secureCookie, true) - redirectPath := path + // Sanitize redirect path to prevent open redirect via protocol-relative URLs (#3). + // "//evil.com" is interpreted by browsers as a redirect to evil.com. + redirectPath := "/" + strings.TrimLeft(path, "/") q := c.Request.URL.Query() q.Del("token") if encoded := q.Encode(); encoded != "" { @@ -667,6 +676,9 @@ func proxyGetWithTokenOrEmpty(c *gin.Context, targetURL, sessionToken string) { return } req.Header.Set("X-Session-Token", sessionToken) + if configuration.Config.SupportInternalSecret != "" { + req.Header.Set("X-Internal-Secret", configuration.Config.SupportInternalSecret) + } resp, err := internalClient.Do(req) if err != nil { diff --git a/proxy/Containerfile b/proxy/Containerfile index efc1bdc2..ee66176a 100644 --- a/proxy/Containerfile +++ b/proxy/Containerfile @@ -1,4 +1,7 @@ -FROM nginx:1.25-alpine +FROM nginxinc/nginx-unprivileged:1.25-alpine + +# Switch to root for setup operations +USER root # Install curl for health checks RUN apk add --no-cache curl @@ -9,8 +12,12 @@ COPY .render-build-trigger /tmp/build-trigger # Copy nginx configuration COPY nginx.conf /etc/nginx/nginx.conf -# Create log directory -RUN mkdir -p /var/log/nginx +# Create log directory with correct ownership +RUN mkdir -p /var/log/nginx && chown nginx:nginx /var/log/nginx + +# Copy and setup entrypoint script +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh # Expose default port (can be overridden by runtime environment) ARG PORT=80 @@ -21,9 +28,8 @@ EXPOSE ${PORT} HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:${PORT}/health || exit 1 -# Copy and setup entrypoint script -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +# Run as non-root user +USER nginx # Use entrypoint script -CMD ["/entrypoint.sh"] \ No newline at end of file +CMD ["/entrypoint.sh"] diff --git a/proxy/nginx.conf b/proxy/nginx.conf index 37550a18..3078ff09 100644 --- a/proxy/nginx.conf +++ b/proxy/nginx.conf @@ -6,9 +6,10 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; - # Sanitize token query param from logs + # Sanitize sensitive query params from logs (#21: ticket, token) map $request_uri $sanitized_request_uri { - "~^(?[^?]*\?)(?.*)token=[^&]*(?.*)$" "$prefix${before}token=[REDACTED]$after"; + "~^(?[^?]*\?)(?.*)token=[^&]*(?.*)$" "$prefix${before}token=[REDACTED]$after"; + "~^(?[^?]*\?)(?.*)ticket=[^&]*(?.*)$" "$prefix${before}ticket=[REDACTED]$after"; default $request_uri; } @@ -29,8 +30,9 @@ http { # Security headers add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - add_header X-XSS-Protection "1; mode=block" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self' wss:; img-src 'self' data:; frame-ancestors 'self';" always; # DNS resolver for dynamic upstream resolution (internal Render DNS) resolver ${RESOLVER} valid=30s; @@ -39,7 +41,7 @@ http { # Support subdomain proxy — *.support.{domain} server { listen ${PORT}; - server_name ~^.+\.support\..*; + server_name ~^.+\.support\.(qa\.)?my\.nethesis\.it$; location / { set $backend_upstream http://${BACKEND_SERVICE_NAME}:10000; diff --git a/proxy/nginx.conf.local b/proxy/nginx.conf.local index 01548599..8fda96d0 100644 --- a/proxy/nginx.conf.local +++ b/proxy/nginx.conf.local @@ -23,13 +23,13 @@ http { # Security headers add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - add_header X-XSS-Protection "1; mode=block" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self' wss:; img-src 'self' data:; frame-ancestors 'self';" always; # Support subdomain proxy — *.support.{domain} server { listen 8080; - server_name ~^.+\.support\..*; + server_name ~^.+\.support\.my\.localtest\.me$; location / { rewrite ^(.*)$ /support-proxy$1 break; diff --git a/services/support/.env.example b/services/support/.env.example index 1f8d8b41..598b7c8f 100644 --- a/services/support/.env.example +++ b/services/support/.env.example @@ -26,4 +26,5 @@ MAX_TUNNELS=1000 MAX_SESSIONS_PER_SYSTEM=5 # Internal authentication (shared secret with backend) -INTERNAL_SECRET=change-me-to-a-random-secret-min-32-chars +# IMPORTANT: Generate with: openssl rand -hex 32 +INTERNAL_SECRET=CHANGE_ME_GENERATE_WITH_openssl_rand_hex_32 diff --git a/services/support/Containerfile.tunnel-client b/services/support/Containerfile.tunnel-client index 65cf6fc1..bacc8cd6 100644 --- a/services/support/Containerfile.tunnel-client +++ b/services/support/Containerfile.tunnel-client @@ -29,4 +29,6 @@ FROM scratch COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ COPY --from=builder /app/tunnel-client /tunnel-client +USER 65534 + ENTRYPOINT ["/tunnel-client"] diff --git a/services/support/cmd/tunnel-client/main.go b/services/support/cmd/tunnel-client/main.go index 65b068da..a4f61776 100644 --- a/services/support/cmd/tunnel-client/main.go +++ b/services/support/cmd/tunnel-client/main.go @@ -1008,6 +1008,38 @@ func (w *wsNetConn) SetDeadline(t time.Time) error { func (w *wsNetConn) SetReadDeadline(t time.Time) error { return w.conn.SetReadDeadline(t) } func (w *wsNetConn) SetWriteDeadline(t time.Time) error { return w.conn.SetWriteDeadline(t) } +// sensitiveEnvPrefixes lists environment variable prefixes that are stripped +// from the PTY shell to prevent operators from extracting credentials (#8). +var sensitiveEnvPrefixes = []string{ + "SYSTEM_KEY=", + "SYSTEM_SECRET=", + "SUPPORT_URL=", + "DATABASE_URL=", + "REDIS_ADDR=", + "REDIS_PASSWORD=", + "REDIS_URL=", + "INTERNAL_SECRET=", + "TUNNEL_CONFIG=", +} + +// sanitizeEnv filters out sensitive environment variables before spawning a shell +func sanitizeEnv(env []string) []string { + filtered := make([]string, 0, len(env)) + for _, e := range env { + sensitive := false + for _, prefix := range sensitiveEnvPrefixes { + if strings.HasPrefix(e, prefix) { + sensitive = true + break + } + } + if !sensitive { + filtered = append(filtered, e) + } + } + return filtered +} + func envWithDefault(key, defaultValue string) string { if v := os.Getenv(key); v != "" { return v @@ -1036,7 +1068,7 @@ func handleTerminal(stream net.Conn) { } cmd := exec.Command(shell) - cmd.Env = append(os.Environ(), defaultTermEnv) + cmd.Env = append(sanitizeEnv(os.Environ()), defaultTermEnv) ptmx, err := pty.Start(cmd) if err != nil { diff --git a/services/support/configuration/configuration.go b/services/support/configuration/configuration.go index 6c3a04f2..b9a3e872 100644 --- a/services/support/configuration/configuration.go +++ b/services/support/configuration/configuration.go @@ -47,6 +47,9 @@ type Configuration struct { // Terminal configuration TerminalInactivityTimeout time.Duration `json:"terminal_inactivity_timeout"` TerminalMaxFrameSize int `json:"terminal_max_frame_size"` + + // Internal authentication (shared secret with backend) + InternalSecret string `json:"-"` } // Config is the global configuration instance @@ -86,6 +89,9 @@ func Init() { Config.TerminalInactivityTimeout = parseDurationWithDefault("TERMINAL_INACTIVITY_TIMEOUT", 30*time.Minute) Config.TerminalMaxFrameSize = parseIntWithDefault("TERMINAL_MAX_FRAME_SIZE", 65536) + // Internal authentication + Config.InternalSecret = os.Getenv("INTERNAL_SECRET") + logger.LogConfigLoad("env", "configuration", true, nil) } diff --git a/services/support/main.go b/services/support/main.go index c70fd300..6dcdbb4c 100644 --- a/services/support/main.go +++ b/services/support/main.go @@ -133,9 +133,9 @@ func main() { // Tunnel endpoint (WebSocket, requires system Basic Auth, rate-limited per IP + per system_key) api.GET("/tunnel", middleware.TunnelRateLimitMiddleware(), middleware.BasicAuthMiddleware(), middleware.SystemKeyRateLimitMiddleware(), methods.HandleTunnel) - // Internal endpoints: require per-session token from backend (#3/#4) + // Internal endpoints: require internal secret + per-session token from backend (#3/#4) internal := api.Group("/") - internal.Use(middleware.SessionTokenMiddleware(), middleware.SessionRateLimitMiddleware()) + internal.Use(middleware.InternalSecretMiddleware(), middleware.SessionTokenMiddleware(), middleware.SessionRateLimitMiddleware()) internal.GET("/terminal/:session_id", methods.HandleTerminal) internal.GET("/proxy/:session_id/services", methods.ListServices) diff --git a/services/support/methods/proxy.go b/services/support/methods/proxy.go index f9249168..89bf1ece 100644 --- a/services/support/methods/proxy.go +++ b/services/support/methods/proxy.go @@ -21,6 +21,7 @@ import ( "net/http/httputil" "strconv" "strings" + "sync/atomic" "github.com/gin-gonic/gin" @@ -29,6 +30,13 @@ import ( "github.com/nethesis/my/services/support/tunnel" ) +// globalRewriteBytes tracks the total memory used by concurrent response rewrites (#14). +// Limits total decompressed memory to prevent coordinated gzip bomb attacks. +var globalRewriteBytes atomic.Int64 + +// maxGlobalRewriteBytes is the maximum total memory for concurrent response rewrites (50 MB). +const maxGlobalRewriteBytes int64 = 50 * 1024 * 1024 + // HandleProxy proxies HTTP/WebSocket requests through the yamux tunnel // Route: ANY /api/proxy/:session_id/:service/*path (internal, no auth) func HandleProxy(c *gin.Context) { @@ -124,6 +132,10 @@ func HandleProxy(c *gin.Context) { resp.Header.Del("X-Frame-Options") resp.Header.Set("Content-Security-Policy", "frame-ancestors 'self'") + // Rewrite upstream cookies to prevent cross-session leakage (#9): + // scope to exact proxy hostname, enforce Secure and SameSite=Strict + rewriteUpstreamCookies(resp) + // Rewrite hardcoded hostnames in text responses so that JS API calls // go through the proxy instead of directly to the original host. if needsRewrite && isRewritableResponse(resp) { @@ -246,6 +258,23 @@ func buildHostRewriteMap(t *tunnel.Tunnel, currentProxyHost string) map[string]s return rewrites } +// rewriteUpstreamCookies rewrites cookies from upstream services to prevent +// cross-session leakage (#9). Scopes cookies to the exact proxy hostname +// and enforces Secure + SameSite=Strict flags. +func rewriteUpstreamCookies(resp *http.Response) { + cookies := resp.Cookies() + if len(cookies) == 0 { + return + } + resp.Header.Del("Set-Cookie") + for _, cookie := range cookies { + cookie.Domain = "" + cookie.Secure = true + cookie.SameSite = http.SameSiteStrictMode + resp.Header.Add("Set-Cookie", cookie.String()) + } +} + // rewriteResponseBodyMulti replaces all hostname occurrences in the response body // using a map of original -> proxy hostnames. func rewriteResponseBodyMulti(resp *http.Response, rewrites map[string]string) error { @@ -253,6 +282,11 @@ func rewriteResponseBodyMulti(resp *http.Response, rewrites map[string]string) e return nil } + // Check global memory budget before decompressing (#14) + if globalRewriteBytes.Load() >= maxGlobalRewriteBytes { + return nil + } + var body []byte var isGzipped bool @@ -279,8 +313,13 @@ func rewriteResponseBodyMulti(resp *http.Response, rewrites map[string]string) e } _ = resp.Body.Close() + // Track memory usage for concurrent rewrite budget (#14) + bodySize := int64(len(body)) + globalRewriteBytes.Add(bodySize) + defer globalRewriteBytes.Add(-bodySize) + // Skip rewriting for oversized responses - if int64(len(body)) > maxRewriteBodySize { + if bodySize > maxRewriteBodySize { resp.Body = io.NopCloser(bytes.NewReader(body)) return nil } diff --git a/services/support/methods/tunnel.go b/services/support/methods/tunnel.go index b083ba65..e77096d2 100644 --- a/services/support/methods/tunnel.go +++ b/services/support/methods/tunnel.go @@ -13,6 +13,7 @@ import ( "encoding/json" "io" "net/http" + "regexp" "time" "github.com/gin-gonic/gin" @@ -25,6 +26,9 @@ import ( "github.com/nethesis/my/services/support/tunnel" ) +// nodeIDPattern validates node_id query parameter (numeric, max 10 digits) +var nodeIDPattern = regexp.MustCompile(`^[0-9]{1,10}$`) + var ( // TunnelManager is the global tunnel manager instance TunnelManager *tunnel.Manager @@ -50,6 +54,15 @@ func HandleTunnel(c *gin.Context) { sysID := systemID.(string) nodeID := c.Query("node_id") + + // Validate node_id format to prevent memory abuse from crafted values (#20) + if nodeID != "" { + if len(nodeID) > 10 || !nodeIDPattern.MatchString(nodeID) { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid node_id format"}) + return + } + } + log := logger.RequestLogger(c, "tunnel") // #8: Check for reconnect token when reusing an existing session during grace period diff --git a/services/support/middleware/auth.go b/services/support/middleware/auth.go index 2a757b20..44c9eda8 100644 --- a/services/support/middleware/auth.go +++ b/services/support/middleware/auth.go @@ -32,6 +32,30 @@ import ( "github.com/nethesis/my/services/support/response" ) +// InternalSecretMiddleware validates the X-Internal-Secret header (#4). +// Provides defense-in-depth: even if a session token leaks, the caller +// must also know the shared internal secret to access tunnel endpoints. +func InternalSecretMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + secret := configuration.Config.InternalSecret + if secret == "" { + c.Next() + return + } + provided := c.GetHeader("X-Internal-Secret") + if subtle.ConstantTimeCompare([]byte(provided), []byte(secret)) != 1 { + logger.Warn(). + Str("client_ip", c.ClientIP()). + Str("path", c.Request.URL.Path). + Msg("invalid or missing internal secret") + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "forbidden", nil)) + c.Abort() + return + } + c.Next() + } +} + // SessionTokenMiddleware validates the X-Session-Token header for // internal endpoints. Each request is tied to a specific active session // via the session_id URL parameter, eliminating the single shared secret. diff --git a/services/support/middleware/ratelimit.go b/services/support/middleware/ratelimit.go index 6a2e2d4e..82cc1ea3 100644 --- a/services/support/middleware/ratelimit.go +++ b/services/support/middleware/ratelimit.go @@ -20,6 +20,12 @@ import ( "github.com/nethesis/my/services/support/response" ) +// NOTE: Rate limiters are in-process (not distributed). If the support service +// is scaled to multiple instances behind a load balancer, rate limits multiply +// by N instances. The support service is designed to run as a single instance +// (stateful tunnel management in memory), so this is acceptable. If horizontal +// scaling is needed, migrate to Redis-based rate limiting with INCR+EXPIRE. + type rateLimitEntry struct { count int resetAt time.Time diff --git a/services/support/session/manager.go b/services/support/session/manager.go index ef2efaa1..ebbed664 100644 --- a/services/support/session/manager.go +++ b/services/support/session/manager.go @@ -23,7 +23,12 @@ import ( "github.com/nethesis/my/services/support/models" ) -// GenerateToken creates a cryptographically secure session token +// GenerateToken creates a cryptographically secure session token. +// Tokens are stored in plaintext in the database because they serve as a shared +// secret for inter-service communication (backend reads them to authenticate +// requests to the support service). The 256-bit entropy makes brute-forcing +// infeasible; the primary risk is a full database compromise, which would +// expose all session data regardless of token hashing. func GenerateToken() (string, error) { bytes := make([]byte, 32) if _, err := rand.Read(bytes); err != nil { diff --git a/services/support/tunnel/manager.go b/services/support/tunnel/manager.go index e6e0d7c9..0ccbf66a 100644 --- a/services/support/tunnel/manager.go +++ b/services/support/tunnel/manager.go @@ -12,6 +12,7 @@ package tunnel import ( "fmt" "net" + "regexp" "strings" "sync" "time" @@ -22,6 +23,10 @@ import ( "github.com/nethesis/my/services/support/logger" ) +// validHostname matches valid FQDN hostnames and IP addresses. +// Prevents hostname rewrite injection by rejecting special characters. +var validHostname = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9._:-]*[a-zA-Z0-9])?$`) + // ServiceInfo describes a service available through the tunnel type ServiceInfo struct { Target string `json:"target"` @@ -395,6 +400,16 @@ func (t *Tunnel) SetServices(services map[string]ServiceInfo) { Msg("rejected service with dangerous target") continue } + // Validate Host field to prevent hostname rewrite injection (#7): + // a malicious Host value could hijack rewrite mappings for other services. + if svc.Host != "" && !validHostname.MatchString(svc.Host) { + logger.ComponentLogger("tunnel_manager").Warn(). + Str("system_id", t.SystemID). + Str("service", name). + Str("host", svc.Host). + Msg("rejected service with invalid host value") + continue + } validated[name] = svc } t.services = validated @@ -409,7 +424,8 @@ var dangerousHostnames = map[string]bool{ "metadata.platformequinix.com": true, } -// validateServiceTarget rejects targets pointing to dangerous addresses (#5) +// validateServiceTarget rejects targets pointing to dangerous addresses. +// For non-IP hostnames, DNS is resolved to block DNS rebinding attacks (#2). func validateServiceTarget(target string) error { if target == "" { return fmt.Errorf("empty target") @@ -427,9 +443,26 @@ func validateServiceTarget(target string) error { ip := net.ParseIP(host) if ip == nil { - return nil // regular hostname, allowed + // Resolve DNS to prevent rebinding attacks: a hostname that resolves + // to a benign IP at registration time could later resolve to a + // dangerous IP (e.g., 169.254.169.254) when the operator connects. + ips, lookupErr := net.LookupIP(host) + if lookupErr != nil { + return fmt.Errorf("DNS resolution failed for %s: %w", host, lookupErr) + } + for _, resolvedIP := range ips { + if err := validateIP(resolvedIP); err != nil { + return fmt.Errorf("hostname %s resolves to blocked address: %w", host, err) + } + } + return nil } + return validateIP(ip) +} + +// validateIP checks a single IP address against blocked ranges +func validateIP(ip net.IP) error { // Block unspecified address (0.0.0.0, ::) if ip.IsUnspecified() { return fmt.Errorf("unspecified address blocked: %s", ip) @@ -458,11 +491,6 @@ func validateServiceTarget(target string) error { return fmt.Errorf("IPv6 link-local address blocked: %s", ip) } - // Block IPv6 loopback (::1) - if ip.IsLoopback() { - return fmt.Errorf("IPv6 loopback address blocked: %s", ip) - } - // Block IPv6 multicast (ff00::/8) if ip.IsMulticast() { return fmt.Errorf("IPv6 multicast address blocked: %s", ip) diff --git a/services/support/tunnel/protocol.go b/services/support/tunnel/protocol.go index 4cc9718c..76d9a206 100644 --- a/services/support/tunnel/protocol.go +++ b/services/support/tunnel/protocol.go @@ -12,12 +12,21 @@ package tunnel import ( "fmt" "io" + "regexp" "strings" ) +// validServiceNamePattern validates service names against injection attacks. +// Rejects names with newlines or control characters that could desync the CONNECT protocol. +var validServiceNamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) + // WriteConnectHeader writes a CONNECT request header to the stream. // Format: "CONNECT \n" +// Validates the service name to prevent newline injection (#17). func WriteConnectHeader(w io.Writer, serviceName string) error { + if !validServiceNamePattern.MatchString(serviceName) { + return fmt.Errorf("invalid service name: %q", serviceName) + } _, err := fmt.Fprintf(w, "CONNECT %s\n", serviceName) return err } From 90d58c6562e0f7235031b8efcb496ce239bd221c Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 15:43:28 +0100 Subject: [PATCH 10/28] fix(proxy): configure pid and cache paths for nginx-unprivileged Add pid directive to /tmp/nginx.pid and create writable cache directories so nginx can run as non-root user without permission errors. --- proxy/Containerfile | 5 +++-- proxy/nginx.conf | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/proxy/Containerfile b/proxy/Containerfile index ee66176a..12a4bebf 100644 --- a/proxy/Containerfile +++ b/proxy/Containerfile @@ -12,8 +12,9 @@ COPY .render-build-trigger /tmp/build-trigger # Copy nginx configuration COPY nginx.conf /etc/nginx/nginx.conf -# Create log directory with correct ownership -RUN mkdir -p /var/log/nginx && chown nginx:nginx /var/log/nginx +# Create writable directories for nginx-unprivileged +RUN mkdir -p /var/log/nginx /var/cache/nginx/client_temp /var/cache/nginx/proxy_temp \ + && chown -R nginx:nginx /var/log/nginx /var/cache/nginx # Copy and setup entrypoint script COPY entrypoint.sh /entrypoint.sh diff --git a/proxy/nginx.conf b/proxy/nginx.conf index 3078ff09..830350a9 100644 --- a/proxy/nginx.conf +++ b/proxy/nginx.conf @@ -1,3 +1,5 @@ +pid /tmp/nginx.pid; + events { worker_connections 1024; } From 28bcc43a0af8d2bf73211aeffdd64dd55a454458 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 10 Mar 2026 15:48:56 +0100 Subject: [PATCH 11/28] fix(proxy): allow Logto domain in CSP connect-src directive Add https://*.nethesis.it to connect-src so the frontend can reach the Logto identity provider for OIDC flows. --- proxy/nginx.conf | 2 +- proxy/nginx.conf.local | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/proxy/nginx.conf b/proxy/nginx.conf index 830350a9..3c881d1e 100644 --- a/proxy/nginx.conf +++ b/proxy/nginx.conf @@ -34,7 +34,7 @@ http { add_header X-Content-Type-Options "nosniff" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; - add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self' wss:; img-src 'self' data:; frame-ancestors 'self';" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self' wss: https://*.nethesis.it; img-src 'self' data:; frame-ancestors 'self';" always; # DNS resolver for dynamic upstream resolution (internal Render DNS) resolver ${RESOLVER} valid=30s; diff --git a/proxy/nginx.conf.local b/proxy/nginx.conf.local index 8fda96d0..261997ee 100644 --- a/proxy/nginx.conf.local +++ b/proxy/nginx.conf.local @@ -24,7 +24,7 @@ http { add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; - add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self' wss:; img-src 'self' data:; frame-ancestors 'self';" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self' wss: https://*.nethesis.it; img-src 'self' data:; frame-ancestors 'self';" always; # Support subdomain proxy — *.support.{domain} server { From 4ef9e59a497e4dd8cfd00c664fb9ec632d5465d8 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Wed, 11 Mar 2026 15:06:19 +0100 Subject: [PATCH 12/28] feat(backend): add support_session_id to system responses Embed the support session ID directly in system list and detail endpoints to avoid N+1 API calls when checking session status per system. --- backend/entities/local_systems.go | 20 ++++++++++++++++++-- backend/models/systems.go | 3 +++ backend/openapi.yaml | 5 +++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/backend/entities/local_systems.go b/backend/entities/local_systems.go index bea8c0d0..fefb07d8 100644 --- a/backend/entities/local_systems.go +++ b/backend/entities/local_systems.go @@ -46,7 +46,8 @@ func (r *LocalSystemRepository) GetByID(id string) (*models.System, error) { s.system_key, s.organization_id, s.custom_data, s.notes, s.created_at, s.updated_at, s.created_by, s.registered_at, s.suspended_at, s.suspended_by_org_id, h.last_heartbeat, s.last_inventory_at, COALESCE(uo.name, 'Owner') as organization_name, COALESCE(uo.org_type, 'owner') as organization_type, - COALESCE(uo.db_id, '') as organization_db_id + COALESCE(uo.db_id, '') as organization_db_id, + (SELECT ss.id FROM support_sessions ss WHERE ss.system_id = s.id AND ss.status IN ('pending', 'active') LIMIT 1) as support_session_id FROM systems s LEFT JOIN system_heartbeats h ON s.id = h.system_id LEFT JOIN unified_organizations uo ON s.organization_id = uo.logto_id @@ -60,12 +61,14 @@ func (r *LocalSystemRepository) GetByID(id string) (*models.System, error) { var registeredAt, suspendedAt, lastHeartbeat, lastInventory sql.NullTime var suspendedByOrgID sql.NullString var organizationName, organizationType, organizationDBID sql.NullString + var supportSessionID sql.NullString err := r.db.QueryRow(query, id).Scan( &system.ID, &system.Name, &system.Type, &system.Status, &fqdn, &ipv4Address, &ipv6Address, &version, &system.SystemKey, &system.Organization.LogtoID, &customDataJSON, &system.Notes, &system.CreatedAt, &system.UpdatedAt, &createdByJSON, ®isteredAt, &suspendedAt, &suspendedByOrgID, &lastHeartbeat, &lastInventory, &organizationName, &organizationType, &organizationDBID, + &supportSessionID, ) if err == sql.ErrNoRows { @@ -97,6 +100,11 @@ func (r *LocalSystemRepository) GetByID(id string) (*models.System, error) { system.SuspendedByOrgID = &suspendedByOrgID.String } + // Set support session ID if present + if supportSessionID.Valid { + system.SupportSessionID = &supportSessionID.String + } + // Parse custom_data JSON if len(customDataJSON) > 0 { if err := json.Unmarshal(customDataJSON, &system.CustomData); err != nil { @@ -341,7 +349,8 @@ func (r *LocalSystemRepository) ListByCreatedByOrganizations(allowedOrgIDs []str s.system_key, s.organization_id, s.custom_data, s.notes, s.created_at, s.updated_at, s.deleted_at, s.registered_at, s.suspended_at, s.suspended_by_org_id, s.created_by, s.last_inventory_at, COALESCE(uo.name, 'Owner') as organization_name, COALESCE(uo.org_type, 'owner') as organization_type, - COALESCE(uo.db_id, '') as organization_db_id + COALESCE(uo.db_id, '') as organization_db_id, + (SELECT ss.id FROM support_sessions ss WHERE ss.system_id = s.id AND ss.status IN ('pending', 'active') LIMIT 1) as support_session_id FROM systems s LEFT JOIN unified_organizations uo ON s.organization_id = uo.logto_id WHERE %s @@ -369,12 +378,14 @@ func (r *LocalSystemRepository) ListByCreatedByOrganizations(allowedOrgIDs []str var deletedAt, registeredAt, suspendedAt, lastInventory sql.NullTime var suspendedByOrgID sql.NullString var organizationName, organizationType, organizationDBID sql.NullString + var supportSessionID sql.NullString err := rows.Scan( &system.ID, &system.Name, &system.Type, &system.Status, &fqdn, &ipv4Address, &ipv6Address, &version, &system.SystemKey, &system.Organization.LogtoID, &customDataJSON, &system.Notes, &system.CreatedAt, &system.UpdatedAt, &deletedAt, ®isteredAt, &suspendedAt, &suspendedByOrgID, &createdByJSON, &lastInventory, &organizationName, &organizationType, &organizationDBID, + &supportSessionID, ) if err != nil { return nil, 0, fmt.Errorf("failed to scan system: %w", err) @@ -412,6 +423,11 @@ func (r *LocalSystemRepository) ListByCreatedByOrganizations(allowedOrgIDs []str system.LastInventory = &lastInventory.Time } + // Set support session ID if present + if supportSessionID.Valid { + system.SupportSessionID = &supportSessionID.String + } + // Parse custom_data JSON if len(customDataJSON) > 0 { if err := json.Unmarshal(customDataJSON, &system.CustomData); err != nil { diff --git a/backend/models/systems.go b/backend/models/systems.go index 716f1507..926ac985 100644 --- a/backend/models/systems.go +++ b/backend/models/systems.go @@ -51,6 +51,9 @@ type System struct { LastHeartbeat *time.Time `json:"last_heartbeat,omitempty"` // Last heartbeat timestamp LastInventory *time.Time `json:"last_inventory,omitempty"` // Last inventory timestamp (NULL = never received) + // Support session info (populated by query) + SupportSessionID *string `json:"support_session_id"` + // Rebranding info (populated by handler) RebrandingEnabled bool `json:"rebranding_enabled"` RebrandingOrgID *string `json:"rebranding_org_id,omitempty"` diff --git a/backend/openapi.yaml b/backend/openapi.yaml index cd88e0e9..a36ffe58 100644 --- a/backend/openapi.yaml +++ b/backend/openapi.yaml @@ -1481,6 +1481,11 @@ components: nullable: true description: Organization ID that caused cascade suspension (for targeted reactivation). example: "org_abc123" + support_session_id: + type: string + nullable: true + description: ID of the active or pending support session for this system, null if none. Use GET /api/support-sessions/:id to fetch session details. + example: "a1b2c3d4-e5f6-7890-abcd-ef1234567890" rebranding_enabled: type: boolean description: Whether rebranding is active for this system (direct or inherited) From 5761822d740e7d0752fd456d5f443327219d3042 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Wed, 11 Mar 2026 16:24:48 +0100 Subject: [PATCH 13/28] feat(frontend): add support session popover to systems table Show a clickable headset icon next to system name when an active support session exists. The popover displays session status, dates, and connected operators with per-node terminal badges. Backend now tracks terminal disconnect times via access log lifecycle (insert returns ID, disconnect updates disconnected_at). --- backend/entities/support.go | 23 +- backend/methods/support_proxy.go | 10 +- .../systems/SupportSessionPopover.vue | 245 ++++++++++++++++++ .../src/components/systems/SystemsTable.vue | 7 +- frontend/src/i18n/en/translation.json | 5 + frontend/src/i18n/it/translation.json | 5 + frontend/src/lib/support/support.ts | 17 ++ frontend/src/lib/systems/systems.ts | 1 + 8 files changed, 305 insertions(+), 8 deletions(-) create mode 100644 frontend/src/components/systems/SupportSessionPopover.vue diff --git a/backend/entities/support.go b/backend/entities/support.go index 163bc5f3..13cc692b 100644 --- a/backend/entities/support.go +++ b/backend/entities/support.go @@ -493,15 +493,28 @@ func (r *SupportRepository) CloseSession(sessionID string) error { return nil } -// InsertAccessLog inserts a new access log entry -func (r *SupportRepository) InsertAccessLog(sessionID, operatorID, operatorName, accessType, metadata string) error { - _, err := r.db.Exec( +// InsertAccessLog inserts a new access log entry and returns its ID +func (r *SupportRepository) InsertAccessLog(sessionID, operatorID, operatorName, accessType, metadata string) (string, error) { + var logID string + err := r.db.QueryRow( `INSERT INTO support_access_logs (session_id, operator_id, operator_name, access_type, connected_at, metadata) - VALUES ($1, $2, $3, $4, NOW(), $5)`, + VALUES ($1, $2, $3, $4, NOW(), $5) RETURNING id`, sessionID, operatorID, operatorName, accessType, metadata, + ).Scan(&logID) + if err != nil { + return "", fmt.Errorf("failed to insert access log: %w", err) + } + return logID, nil +} + +// DisconnectAccessLog sets disconnected_at on an access log entry +func (r *SupportRepository) DisconnectAccessLog(logID string) error { + _, err := r.db.Exec( + `UPDATE support_access_logs SET disconnected_at = NOW() WHERE id = $1 AND disconnected_at IS NULL`, + logID, ) if err != nil { - return fmt.Errorf("failed to insert access log: %w", err) + return fmt.Errorf("failed to update access log disconnect: %w", err) } return nil } diff --git a/backend/methods/support_proxy.go b/backend/methods/support_proxy.go index 87fd91ef..07b2f7aa 100644 --- a/backend/methods/support_proxy.go +++ b/backend/methods/support_proxy.go @@ -122,7 +122,7 @@ func logAccess(c *gin.Context, sessionID, accessType, metadata string) { metaBytes, _ := json.Marshal(map[string]string{"service": metadata}) jsonMetadata := string(metaBytes) repo := entities.NewSupportRepository() - if err := repo.InsertAccessLog(sessionID, userID, userName, accessType, jsonMetadata); err != nil { + if _, err := repo.InsertAccessLog(sessionID, userID, userName, accessType, jsonMetadata); err != nil { logger.Warn().Err(err).Str("session_id", sessionID).Msg("failed to insert access log") } } @@ -246,7 +246,8 @@ func GetSupportSessionTerminal(c *gin.Context) { userName := ticket.Name metaBytes, _ := json.Marshal(map[string]string{"service": "terminal"}) jsonMetadata := string(metaBytes) - if logErr := repo.InsertAccessLog(sessionID, ticket.UserID, userName, "web_terminal", jsonMetadata); logErr != nil { + accessLogID, logErr := repo.InsertAccessLog(sessionID, ticket.UserID, userName, "web_terminal", jsonMetadata) + if logErr != nil { logger.Warn().Err(logErr).Str("session_id", sessionID).Msg("failed to insert access log") } @@ -367,6 +368,11 @@ func GetSupportSessionTerminal(c *gin.Context) { }() <-done + if accessLogID != "" { + if err := repo.DisconnectAccessLog(accessLogID); err != nil { + logger.Warn().Err(err).Str("session_id", sessionID).Msg("failed to update access log disconnect") + } + } logger.Info().Str("session_id", sessionID).Msg("terminal session ended") } diff --git a/frontend/src/components/systems/SupportSessionPopover.vue b/frontend/src/components/systems/SupportSessionPopover.vue new file mode 100644 index 00000000..d38cfe21 --- /dev/null +++ b/frontend/src/components/systems/SupportSessionPopover.vue @@ -0,0 +1,245 @@ + + + + + diff --git a/frontend/src/components/systems/SystemsTable.vue b/frontend/src/components/systems/SystemsTable.vue index 99a5d672..67e3086b 100644 --- a/frontend/src/components/systems/SystemsTable.vue +++ b/frontend/src/components/systems/SystemsTable.vue @@ -67,6 +67,7 @@ import SuspendSystemModal from './SuspendSystemModal.vue' import ReactivateSystemModal from './ReactivateSystemModal.vue' import DestroySystemModal from './DestroySystemModal.vue' import SystemStatusIcon from './SystemStatusIcon.vue' +import SupportSessionPopover from './SupportSessionPopover.vue' const { isShownCreateSystemDrawer = false } = defineProps<{ isShownCreateSystemDrawer: boolean @@ -553,7 +554,10 @@ function onCloseSecretRegeneratedModal() { -
+
+
diff --git a/frontend/src/i18n/en/translation.json b/frontend/src/i18n/en/translation.json index 184798ed..cec2809e 100644 --- a/frontend/src/i18n/en/translation.json +++ b/frontend/src/i18n/en/translation.json @@ -460,6 +460,11 @@ "status_offline": "Inactive", "status_unknown": "Inventory not received", "status_deleted": "Archived", + "support_session": "Support session", + "support_session_active": "Support session active", + "cannot_load_support_session": "Cannot load session details", + "node": "Node", + "connected_operators": "Connected operators", "reset_filters": "Reset filters", "copy_and_close": "Copy and close", "system_secret_copied": "System secret copied", diff --git a/frontend/src/i18n/it/translation.json b/frontend/src/i18n/it/translation.json index 8bfd144b..207cdcb4 100644 --- a/frontend/src/i18n/it/translation.json +++ b/frontend/src/i18n/it/translation.json @@ -460,6 +460,11 @@ "status_offline": "Inattivo", "status_unknown": "Inventario non ricevuto", "status_deleted": "Archiviato", + "support_session": "Sessione di supporto", + "support_session_active": "Sessione di supporto attiva", + "cannot_load_support_session": "Impossibile caricare i dettagli della sessione", + "node": "Nodo", + "connected_operators": "Operatori connessi", "reset_filters": "Reimposta filtri", "copy_and_close": "Copia e chiudi", "system_secret_copied": "Segreto del sistema copiato", diff --git a/frontend/src/lib/support/support.ts b/frontend/src/lib/support/support.ts index bfa8d83e..e5062932 100644 --- a/frontend/src/lib/support/support.ts +++ b/frontend/src/lib/support/support.ts @@ -142,6 +142,23 @@ export const getSupportSessions = ( .then((res) => res.data.data) } +export const getSystemActiveSessions = (systemId: string) => { + const loginStore = useLoginStore() + const params = new URLSearchParams({ + page: '1', + page_size: '100', + system_id: systemId, + status: 'active', + }) + params.append('status', 'pending') + + return axios + .get(`${API_URL}/support-sessions?${params}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data.data.support_sessions) +} + export const getSupportSession = (id: string) => { const loginStore = useLoginStore() diff --git a/frontend/src/lib/systems/systems.ts b/frontend/src/lib/systems/systems.ts index c5faa2e4..30c14093 100644 --- a/frontend/src/lib/systems/systems.ts +++ b/frontend/src/lib/systems/systems.ts @@ -45,6 +45,7 @@ export const SystemSchema = v.object({ system_key: v.optional(v.string()), system_secret: v.string(), suspended_at: v.optional(v.string()), + support_session_id: v.optional(v.nullable(v.string())), organization: v.object({ id: v.string(), name: v.string(), From 3e008194275ea5ae54e4ae2c1948bed9770ded2b Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Fri, 13 Mar 2026 18:51:34 +0100 Subject: [PATCH 14/28] refactor(support): split tunnel-client into packages and add configurable rate limits Refactor the tunnel-client from a single 1181-line main.go into organized internal packages (config, connection, discovery, models, stream, terminal). Rename traefik.go to nethserver.go with updated function names and log messages. Replace YAML config with EXCLUDE_PATTERNS env var / --exclude flag for service filtering. Improve api-cli error logging to include stderr output. Add configurable rate limiting via env vars (RATE_LIMIT_TUNNEL_PER_IP, RATE_LIMIT_TUNNEL_PER_KEY, RATE_LIMIT_SESSION_PER_ID, RATE_LIMIT_WINDOW) with session limit raised from 100 to 500 req/min. Add build-tunnel-client and run-tunnel-client Makefile targets. --- services/support/.env.example | 6 + services/support/Makefile | 25 +- services/support/README.md | 33 +- .../tunnel-client/internal/config/config.go | 81 ++ .../internal/connection/connection.go | 208 +++ .../internal/connection/wsconn.go | 72 ++ .../internal/discovery/discovery.go | 196 +++ .../internal/discovery/nethsecurity.go | 87 ++ .../internal/discovery/nethserver.go | 228 ++++ .../tunnel-client/internal/models/models.go | 38 + .../tunnel-client/internal/stream/handler.go | 149 +++ .../internal/terminal/terminal.go | 185 +++ services/support/cmd/tunnel-client/main.go | 1142 +---------------- .../support/configuration/configuration.go | 12 + services/support/go.mod | 4 +- services/support/main.go | 8 + services/support/middleware/ratelimit.go | 19 +- 17 files changed, 1374 insertions(+), 1119 deletions(-) create mode 100644 services/support/cmd/tunnel-client/internal/config/config.go create mode 100644 services/support/cmd/tunnel-client/internal/connection/connection.go create mode 100644 services/support/cmd/tunnel-client/internal/connection/wsconn.go create mode 100644 services/support/cmd/tunnel-client/internal/discovery/discovery.go create mode 100644 services/support/cmd/tunnel-client/internal/discovery/nethsecurity.go create mode 100644 services/support/cmd/tunnel-client/internal/discovery/nethserver.go create mode 100644 services/support/cmd/tunnel-client/internal/models/models.go create mode 100644 services/support/cmd/tunnel-client/internal/stream/handler.go create mode 100644 services/support/cmd/tunnel-client/internal/terminal/terminal.go diff --git a/services/support/.env.example b/services/support/.env.example index 598b7c8f..774291a0 100644 --- a/services/support/.env.example +++ b/services/support/.env.example @@ -25,6 +25,12 @@ TUNNEL_GRACE_PERIOD=2m MAX_TUNNELS=1000 MAX_SESSIONS_PER_SYSTEM=5 +# Rate limiting +# RATE_LIMIT_TUNNEL_PER_IP=10 +# RATE_LIMIT_TUNNEL_PER_KEY=5 +# RATE_LIMIT_SESSION_PER_ID=500 +# RATE_LIMIT_WINDOW=1m + # Internal authentication (shared secret with backend) # IMPORTANT: Generate with: openssl rand -hex 32 INTERNAL_SECRET=CHANGE_ME_GENERATE_WITH_openssl_rand_hex_32 diff --git a/services/support/Makefile b/services/support/Makefile index 633c80bb..0ae7c04d 100644 --- a/services/support/Makefile +++ b/services/support/Makefile @@ -38,6 +38,12 @@ run: @echo "Starting support development server..." @go run main.go +# Run tunnel-client locally +.PHONY: run-tunnel-client +run-tunnel-client: + @echo "Starting tunnel-client..." + @go run ./cmd/tunnel-client/ + # Run the application with QA environment .PHONY: run-qa run-qa: @@ -52,12 +58,21 @@ build: @CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME) main.go @echo "Built $(BINARY_NAME) -> $(BUILD_DIR)/$(BINARY_NAME)" +# Build tunnel-client for linux/amd64 +.PHONY: build-tunnel-client +build-tunnel-client: + @echo "Building tunnel-client (linux/amd64)..." + @mkdir -p $(BUILD_DIR) + @GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILD_DIR)/tunnel-client-linux-amd64 ./cmd/tunnel-client/ + @echo "Built tunnel-client -> $(BUILD_DIR)/tunnel-client-linux-amd64" + # Build for multiple platforms .PHONY: build-all build-all: clean @echo "Building for multiple platforms..." @mkdir -p $(BUILD_DIR) @GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME)-linux-amd64 main.go + @GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILD_DIR)/tunnel-client-linux-amd64 ./cmd/tunnel-client/ @echo "Built binaries for multiple platforms in $(BUILD_DIR)/" # Run tests @@ -201,16 +216,18 @@ help: @echo "Support Service - Container runtime: $(CONTAINER_NAME)" @echo "" @echo "Available targets:" - @echo " build - Build the binary" - @echo " build-all - Build for multiple platforms" + @echo " build - Build the binary" + @echo " build-tunnel-client - Build tunnel-client (linux/amd64)" + @echo " build-all - Build for multiple platforms" @echo " clean - Clean build artifacts" @echo " fmt - Format code" @echo " help - Show this help" @echo " install - Install the binary" @echo " lint - Run linter" @echo " pre-commit - Run all pre-commit checks" - @echo " run - Start development server" - @echo " run-qa - Start QA server (uses .env.qa)" + @echo " run - Start development server" + @echo " run-tunnel-client - Run tunnel-client locally" + @echo " run-qa - Start QA server (uses .env.qa)" @echo " test - Run tests" @echo " test-coverage - Run tests with coverage" @echo " tidy - Tidy dependencies" diff --git a/services/support/README.md b/services/support/README.md index 2c3e3edd..24494c0c 100644 --- a/services/support/README.md +++ b/services/support/README.md @@ -90,12 +90,21 @@ make fmt # Run linter make lint -# Build +# Build support service make build +# Build tunnel-client (linux/amd64) +make build-tunnel-client + +# Build all binaries (support + tunnel-client) +make build-all + # Run server make run +# Run tunnel-client locally +make run-tunnel-client + # Run QA server (uses .env.qa) make run-qa @@ -137,6 +146,14 @@ services/support/ ├── main.go # Server entry point ├── cmd/ │ └── tunnel-client/ # Client binary deployed on remote systems +│ ├── main.go # CLI entry point (flags, signal handling) +│ └── internal/ +│ ├── config/ # ClientConfig, env parsing, helpers +│ ├── connection/ # WebSocket + yamux connection, reconnect loop +│ ├── discovery/ # Service discovery (Traefik, NethSecurity, static) +│ ├── models/ # ServiceInfo, ServiceManifest, ApiCliRoute +│ ├── stream/ # CONNECT protocol stream handler +│ └── terminal/ # PTY spawning, binary frame protocol ├── configuration/ # Environment configuration ├── database/ # PostgreSQL connection ├── helpers/ # SHA256 verification @@ -161,6 +178,20 @@ services/support/ └── .env.example # Environment variables template ``` +### Tunnel Client Configuration + +All tunnel client settings are configured via environment variables or CLI flags. + +Service exclusion patterns filter out services that are not useful for support operators: + +```bash +# Via environment variable (comma-separated glob patterns) +EXCLUDE_PATTERNS="*-server-api,*-janus,*-middleware-*,*-provisioning,*-reports-api,*-cti-server-api,*-server-websocket,*-tancredi,*_loki,*_prometheus" + +# Via CLI flag +tunnel-client --exclude "*-server-api,*-janus,*-middleware-*" +``` + ## Related - [openapi.yaml](../../backend/openapi.yaml) - API specification - [Backend](../../backend/README.md) - API server diff --git a/services/support/cmd/tunnel-client/internal/config/config.go b/services/support/cmd/tunnel-client/internal/config/config.go new file mode 100644 index 00000000..1d553786 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/config/config.go @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package config + +import ( + "log" + "os" + "strings" + "time" +) + +// Defaults -- all overridable via CLI flags or environment variables +const ( + DefaultRedisAddr = "127.0.0.1:6379" + DefaultReconnectDelay = 5 * time.Second + DefaultMaxReconnect = 5 * time.Minute + DefaultDiscoveryInterval = 5 * time.Minute + DefaultYamuxKeepAlive = 30 // seconds + RedisPingTimeout = 2 * time.Second +) + +// ClientConfig holds the runtime configuration for the tunnel client +type ClientConfig struct { + URL string + Key string + Secret string + NodeID string + RedisAddr string + StaticServices string + Exclude []string + ReconnectDelay time.Duration + MaxReconnectDelay time.Duration + DiscoveryInterval time.Duration + TLSInsecure bool +} + +// ParseExcludePatterns parses a comma-separated string of glob patterns. +// Returns nil if the input is empty. +func ParseExcludePatterns(raw string) []string { + if raw == "" { + log.Println("No exclusion patterns configured (set EXCLUDE_PATTERNS or --exclude to filter services)") + return nil + } + + var patterns []string + for _, p := range strings.Split(raw, ",") { + p = strings.TrimSpace(p) + if p != "" { + patterns = append(patterns, p) + } + } + log.Printf("Excluding %d service patterns: %v", len(patterns), patterns) + return patterns +} + +// EnvWithDefault returns the value of the environment variable named by key, +// or defaultValue if the variable is not set or empty. +func EnvWithDefault(key, defaultValue string) string { + if v := os.Getenv(key); v != "" { + return v + } + return defaultValue +} + +// ParseDurationDefault parses a duration string, returning d if s is empty or invalid. +func ParseDurationDefault(s string, d time.Duration) time.Duration { + if s == "" { + return d + } + if v, err := time.ParseDuration(s); err == nil { + return v + } + return d +} diff --git a/services/support/cmd/tunnel-client/internal/connection/connection.go b/services/support/cmd/tunnel-client/internal/connection/connection.go new file mode 100644 index 00000000..85501c61 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/connection/connection.go @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package connection + +import ( + "context" + "crypto/tls" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "time" + + "github.com/gorilla/websocket" + "github.com/hashicorp/yamux" + + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/config" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/discovery" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/stream" +) + +// closeCodeSessionClosed matches the server's CloseCodeSessionClosed. +// When the operator closes a session, the server sends this code +// to tell the client to exit without reconnecting. +const closeCodeSessionClosed = 4000 + +// RunWithReconnect connects to the support service and reconnects on failure +// with exponential backoff. +func RunWithReconnect(ctx context.Context, cfg *config.ClientConfig) { + delay := cfg.ReconnectDelay + + for { + start := time.Now() + err := connect(ctx, cfg) + if ctx.Err() != nil { + return // context cancelled, clean shutdown + } + + // Check if the server sent a "session closed" close frame + if websocket.IsCloseError(err, closeCodeSessionClosed) { + log.Println("Session closed by operator. Exiting.") + os.Exit(0) + } + + log.Printf("Connection lost: %v", err) + + // Reset backoff if connection lasted longer than 60 seconds + if time.Since(start) > 60*time.Second { + delay = cfg.ReconnectDelay + } + + log.Printf("Reconnecting in %v...", delay) + + select { + case <-ctx.Done(): + return + case <-time.After(delay): + } + + // Exponential backoff + delay = delay * 2 + if delay > cfg.MaxReconnectDelay { + delay = cfg.MaxReconnectDelay + } + } +} + +func connect(ctx context.Context, cfg *config.ClientConfig) error { + // Build Basic Auth header + creds := base64.StdEncoding.EncodeToString([]byte(cfg.Key + ":" + cfg.Secret)) + header := http.Header{} + header.Set("Authorization", "Basic "+creds) + + // Append node_id query parameter for multi-node clusters + connectURL := cfg.URL + if cfg.NodeID != "" { + parsed, err := url.Parse(connectURL) + if err != nil { + return fmt.Errorf("invalid URL: %w", err) + } + q := parsed.Query() + q.Set("node_id", cfg.NodeID) + parsed.RawQuery = q.Encode() + connectURL = parsed.String() + } + + log.Printf("Connecting to %s ...", connectURL) + + dialer := websocket.Dialer{ + HandshakeTimeout: 10 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: cfg.TLSInsecure, //nolint:gosec // Configurable: disabled by default, enable for dev/self-signed certs + }, + } + wsConn, _, err := dialer.Dial(connectURL, header) + if err != nil { + return fmt.Errorf("websocket dial failed: %w", err) + } + log.Println("WebSocket connected") + + // Wrap as net.Conn + netConn := &WsNetConn{conn: wsConn} + + // Create yamux client session + yamuxCfg := yamux.DefaultConfig() + yamuxCfg.EnableKeepAlive = true + yamuxCfg.KeepAliveInterval = config.DefaultYamuxKeepAlive + yamuxCfg.LogOutput = io.Discard + + session, err := yamux.Client(netConn, yamuxCfg) + if err != nil { + _ = wsConn.Close() + return fmt.Errorf("yamux client creation failed: %w", err) + } + log.Println("yamux session established") + + // Discover services + services := discovery.DiscoverServices(ctx, cfg) + + // Send initial manifest + if err := sendManifest(session, services); err != nil { + _ = session.Close() + return fmt.Errorf("failed to send manifest: %w", err) + } + + // Start periodic re-discovery + go func() { + ticker := time.NewTicker(cfg.DiscoveryInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-session.CloseChan(): + return + case <-ticker.C: + newServices := discovery.DiscoverServices(ctx, cfg) + if len(newServices) > 0 { + if err := sendManifest(session, newServices); err != nil { + log.Printf("Failed to send updated manifest: %v", err) + } else { + services = newServices + log.Printf("Manifest updated with %d services", len(services)) + } + } + } + } + }() + + // Close session when context is cancelled to unblock Accept() + go func() { + <-ctx.Done() + _ = session.Close() + }() + + // Accept incoming streams + for { + yamuxStream, err := session.Accept() + if err != nil { + if ctx.Err() != nil { + return nil + } + // If the underlying WebSocket received a close frame, return that error + // so the reconnect loop can inspect the close code + netConn.mu.Lock() + closeErr := netConn.closeErr + netConn.mu.Unlock() + if closeErr != nil { + return closeErr + } + return fmt.Errorf("stream accept error: %w", err) + } + go stream.HandleStream(yamuxStream, services) + } +} + +func sendManifest(session *yamux.Session, services map[string]models.ServiceInfo) error { + yamuxStream, err := session.Open() + if err != nil { + return fmt.Errorf("failed to open control stream: %w", err) + } + defer func() { _ = yamuxStream.Close() }() + + manifest := models.ServiceManifest{ + Version: 1, + Services: services, + } + + if err := json.NewEncoder(yamuxStream).Encode(manifest); err != nil { + return fmt.Errorf("failed to encode manifest: %w", err) + } + + log.Printf("Manifest sent with %d services", len(services)) + return nil +} diff --git a/services/support/cmd/tunnel-client/internal/connection/wsconn.go b/services/support/cmd/tunnel-client/internal/connection/wsconn.go new file mode 100644 index 00000000..2b1f496b --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/connection/wsconn.go @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package connection + +import ( + "io" + "net" + "sync" + "time" + + "github.com/gorilla/websocket" +) + +// WsNetConn wraps gorilla/websocket.Conn as net.Conn for yamux. +// It captures WebSocket close errors so the reconnect loop can inspect the close code. +type WsNetConn struct { + conn *websocket.Conn + reader io.Reader + mu sync.Mutex + closeErr error // stores the WebSocket close error if received +} + +func (w *WsNetConn) Read(b []byte) (int, error) { + for { + if w.reader == nil { + _, reader, err := w.conn.NextReader() + if err != nil { + w.mu.Lock() + w.closeErr = err + w.mu.Unlock() + return 0, err + } + w.reader = reader + } + n, err := w.reader.Read(b) + if err == io.EOF { + w.reader = nil + if n > 0 { + return n, nil + } + continue + } + return n, err + } +} + +func (w *WsNetConn) Write(b []byte) (int, error) { + err := w.conn.WriteMessage(websocket.BinaryMessage, b) + if err != nil { + return 0, err + } + return len(b), nil +} + +func (w *WsNetConn) Close() error { return w.conn.Close() } +func (w *WsNetConn) LocalAddr() net.Addr { return w.conn.LocalAddr() } +func (w *WsNetConn) RemoteAddr() net.Addr { return w.conn.RemoteAddr() } +func (w *WsNetConn) SetDeadline(t time.Time) error { + if err := w.conn.SetReadDeadline(t); err != nil { + return err + } + return w.conn.SetWriteDeadline(t) +} +func (w *WsNetConn) SetReadDeadline(t time.Time) error { return w.conn.SetReadDeadline(t) } +func (w *WsNetConn) SetWriteDeadline(t time.Time) error { return w.conn.SetWriteDeadline(t) } diff --git a/services/support/cmd/tunnel-client/internal/discovery/discovery.go b/services/support/cmd/tunnel-client/internal/discovery/discovery.go new file mode 100644 index 00000000..abdc9d63 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/discovery/discovery.go @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package discovery + +import ( + "context" + "log" + "path/filepath" + "sort" + "strings" + + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/config" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" +) + +// DiscoverServices discovers all available services from static configuration, +// Traefik routes (NS8), and NethSecurity detection. +func DiscoverServices(ctx context.Context, cfg *config.ClientConfig) map[string]models.ServiceInfo { + services := make(map[string]models.ServiceInfo) + + // Parse static services + if cfg.StaticServices != "" { + for _, entry := range strings.Split(cfg.StaticServices, ",") { + entry = strings.TrimSpace(entry) + if entry == "" { + continue + } + parts := strings.SplitN(entry, "=", 2) + if len(parts) != 2 { + log.Printf("Invalid static service entry: %s", entry) + continue + } + name := parts[0] + if IsExcluded(name, cfg.Exclude) { + continue + } + target := parts[1] + + svc := models.ServiceInfo{Label: name} + + // Check for :tls suffix + if strings.HasSuffix(target, ":tls") { + svc.TLS = true + target = strings.TrimSuffix(target, ":tls") + } + + // Check for host override: name=target:port:host=hostname + if idx := strings.Index(target, ":host="); idx != -1 { + svc.Host = target[idx+6:] + target = target[:idx] + } + + svc.Target = target + services[name] = svc + } + } + + // NethServer auto-discovery via api-cli and Redis + if cfg.RedisAddr != "" { + discovered := DiscoverNethServerServices(ctx, cfg.RedisAddr) + for name, svc := range discovered { + if IsExcluded(name, cfg.Exclude) { + continue + } + services[name] = svc + } + } + + // NethSecurity auto-discovery (OpenWrt-based, no Redis/Traefik) + if cfg.RedisAddr == "" { + discovered := DiscoverNethSecurityServices() + for name, svc := range discovered { + if IsExcluded(name, cfg.Exclude) { + continue + } + services[name] = svc + } + } + + LogDiscoveredServices(services) + + return services +} + +// LogDiscoveredServices prints a structured summary grouped by node -> module -> service +func LogDiscoveredServices(services map[string]models.ServiceInfo) { + type moduleGroup struct { + label string + services map[string]models.ServiceInfo + } + type nodeGroup struct { + modules map[string]*moduleGroup + ungrouped []string // service keys without moduleID + } + + nodes := make(map[string]*nodeGroup) // keyed by nodeID ("" for non-node services) + + for name, svc := range services { + nid := svc.NodeID + ng, ok := nodes[nid] + if !ok { + ng = &nodeGroup{modules: make(map[string]*moduleGroup)} + nodes[nid] = ng + } + + if svc.ModuleID == "" { + ng.ungrouped = append(ng.ungrouped, name) + continue + } + + mg, ok := ng.modules[svc.ModuleID] + if !ok { + mg = &moduleGroup{services: make(map[string]models.ServiceInfo)} + ng.modules[svc.ModuleID] = mg + } + if mg.label == "" && svc.Label != "" { + mg.label = svc.Label + } + mg.services[name] = svc + } + + log.Printf("Discovered %d services across %d node(s)", len(services), len(nodes)) + + // Sort node IDs (empty string = non-node services, printed last) + nodeIDs := make([]string, 0, len(nodes)) + for nid := range nodes { + nodeIDs = append(nodeIDs, nid) + } + sort.Strings(nodeIDs) + + for _, nid := range nodeIDs { + ng := nodes[nid] + + if nid != "" { + log.Printf(" Node %s:", nid) + } + + indent := " " + if nid != "" { + indent = " " + } + + // Print modules (sorted) + moduleIDs := make([]string, 0, len(ng.modules)) + for id := range ng.modules { + moduleIDs = append(moduleIDs, id) + } + sort.Strings(moduleIDs) + + for _, moduleID := range moduleIDs { + mg := ng.modules[moduleID] + if mg.label != "" { + log.Printf("%s%s (%s)", indent, moduleID, mg.label) + } else { + log.Printf("%s%s", indent, moduleID) + } + names := make([]string, 0, len(mg.services)) + for name := range mg.services { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + svc := mg.services[name] + route := svc.Host + if svc.Path != "" && svc.Path != "/" { + route += svc.Path + } + log.Printf("%s - %s -> %s", indent, name, route) + } + } + + // Print ungrouped services (static, cluster-admin) + sort.Strings(ng.ungrouped) + for _, name := range ng.ungrouped { + svc := services[name] + log.Printf("%s%s -> %s", indent, name, svc.Target) + } + } +} + +// IsExcluded checks if a service name matches any of the exclusion patterns. +func IsExcluded(name string, patterns []string) bool { + for _, pattern := range patterns { + if matched, _ := filepath.Match(pattern, name); matched { + return true + } + } + return false +} diff --git a/services/support/cmd/tunnel-client/internal/discovery/nethsecurity.go b/services/support/cmd/tunnel-client/internal/discovery/nethsecurity.go new file mode 100644 index 00000000..f23a5238 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/discovery/nethsecurity.go @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package discovery + +import ( + "fmt" + "log" + "net" + "os" + "strings" + + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" +) + +const ( + // NethSecurity detection paths + nethSecUIPath = "/www-ns/index.html" + nethSecNginxConf = "/etc/nginx/conf.d/ns-ui.conf" + defaultNethSecUIPort = "443" +) + +// DiscoverNethSecurityServices detects NethSecurity (OpenWrt-based firewall) +// by checking for its web UI files and registers the main HTTPS service. +// NethSecurity runs nginx with the UI on a configurable port: +// - Port from /etc/nginx/conf.d/ns-ui.conf (dedicated UI server block) +// - Port 443 (when 00ns.locations is active, UI is on the default server) +func DiscoverNethSecurityServices() map[string]models.ServiceInfo { + services := make(map[string]models.ServiceInfo) + + // Detect NethSecurity by checking for its UI directory + if _, err := os.Stat(nethSecUIPath); err != nil { + return services + } + + hostname, _ := os.Hostname() + if hostname == "" { + hostname = "NethSecurity" + } + + port := detectNethSecurityUIPort() + + log.Printf("NethSecurity detected (hostname: %s, UI port: %s), registering web UI service", hostname, port) + + services["nethsecurity-ui"] = models.ServiceInfo{ + Target: net.JoinHostPort("127.0.0.1", port), + Host: "127.0.0.1", + TLS: true, + Label: hostname, + Path: "/", + } + + return services +} + +// detectNethSecurityUIPort determines the HTTPS port serving the NethSecurity UI. +// It checks ns-ui.conf for a dedicated server block (e.g., port 9090), and +// falls back to 443 when the UI locations are on the default server. +func detectNethSecurityUIPort() string { + // Check for dedicated UI server block (ns-ui.conf) + data, err := os.ReadFile(nethSecNginxConf) + if err == nil { + // Parse "listen ssl" directive + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "listen") && strings.Contains(line, "ssl") && !strings.Contains(line, "[::]:") { + fields := strings.Fields(line) + if len(fields) >= 2 { + port := fields[1] + // Validate it looks like a port number + if _, err := fmt.Sscanf(port, "%d", new(int)); err == nil { + return port + } + } + } + } + } + + // Default: UI on the main server + return defaultNethSecUIPort +} diff --git a/services/support/cmd/tunnel-client/internal/discovery/nethserver.go b/services/support/cmd/tunnel-client/internal/discovery/nethserver.go new file mode 100644 index 00000000..62ae808d --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/discovery/nethserver.go @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package discovery + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "log" + "net/url" + "os" + "os/exec" + "regexp" + "sort" + "strings" + + "github.com/redis/go-redis/v9" + + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" +) + +const ( + defaultRemoteHTTPSPort = "443" + ns8NodeEnvFile = "/var/lib/nethserver/node/state/environment" +) + +// moduleIDRegex matches NS8 module IDs (compiled once at package level) +var moduleIDRegex = regexp.MustCompile(`^(.+\d+)(?:[-_]|$)`) + +// DiscoverNethServerServices uses api-cli to discover routes from ALL cluster nodes. +func DiscoverNethServerServices(ctx context.Context, redisAddr string) map[string]models.ServiceInfo { + services := make(map[string]models.ServiceInfo) + + rdb := redis.NewClient(&redis.Options{ + Addr: redisAddr, + }) + defer func() { _ = rdb.Close() }() + + // Discover all node IDs by scanning Redis keys + nodeIDs := discoverNodeIDs(ctx, rdb) + if len(nodeIDs) == 0 { + log.Println("NethServer discovery: no nodes found, skipping") + return services + } + + // Read local NODE_ID to distinguish local vs remote nodes + localNodeID := ReadNodeID() + log.Printf("NethServer discovery: found %d node(s): %v (local: %s)", len(nodeIDs), nodeIDs, localNodeID) + + // Build a map of remote node IPs from Redis VPN config + nodeIPs := make(map[string]string) + for _, nid := range nodeIDs { + if nid == localNodeID { + continue + } + ip, err := rdb.HGet(ctx, fmt.Sprintf("node/%s/vpn", nid), "ip_address").Result() + if err != nil { + log.Printf("NethServer discovery: cannot get IP for node %s: %v", nid, err) + continue + } + nodeIPs[nid] = ip + log.Printf("NethServer discovery: node %s -> %s", nid, ip) + } + + for _, nodeID := range nodeIDs { + nodeServices := discoverNodeRoutes(ctx, rdb, nodeID) + + // For remote nodes, rewrite targets to go through the node's Traefik (HTTPS). + // Traefik on the remote node handles TLS termination, Host-based routing, + // and PathPrefix stripping, so we clear PathPrefix to avoid double-stripping. + if nodeID != localNodeID { + remoteIP, ok := nodeIPs[nodeID] + if !ok { + log.Printf("NethServer discovery: skipping node %s (no IP)", nodeID) + continue + } + for name, svc := range nodeServices { + svc.Target = remoteIP + ":" + defaultRemoteHTTPSPort + svc.TLS = true + svc.PathPrefix = "" + nodeServices[name] = svc + } + } + + for name, svc := range nodeServices { + services[name] = svc + } + } + + return services +} + +// ReadNodeID reads NODE_ID from the NS8 node environment file. +func ReadNodeID() string { + f, err := os.Open(ns8NodeEnvFile) + if err != nil { + return "" + } + defer func() { _ = f.Close() }() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "NODE_ID=") { + return strings.TrimPrefix(line, "NODE_ID=") + } + } + return "" +} + +// discoverNodeIDs finds all NS8 node IDs by scanning Redis keys. +func discoverNodeIDs(ctx context.Context, rdb *redis.Client) []string { + var nodeIDs []string + var cursor uint64 + + for { + keys, nextCursor, err := rdb.Scan(ctx, cursor, "node/*/default_instance/traefik", 100).Result() + if err != nil { + log.Printf("NethServer discovery: Redis SCAN error: %v", err) + return nodeIDs + } + + for _, key := range keys { + // key format: node/{NODE_ID}/default_instance/traefik + parts := strings.Split(key, "/") + if len(parts) >= 2 { + nodeIDs = append(nodeIDs, parts[1]) + } + } + + cursor = nextCursor + if cursor == 0 { + break + } + } + + sort.Strings(nodeIDs) + return nodeIDs +} + +// discoverNodeRoutes uses api-cli to get all routes from a node's Traefik instance. +func discoverNodeRoutes(ctx context.Context, rdb *redis.Client, nodeID string) map[string]models.ServiceInfo { + services := make(map[string]models.ServiceInfo) + + // Get the traefik instance name from Redis + traefikInstance, err := rdb.Get(ctx, fmt.Sprintf("node/%s/default_instance/traefik", nodeID)).Result() + if err != nil { + log.Printf("NethServer discovery: cannot get traefik instance for node %s: %v", nodeID, err) + return services + } + + // Call api-cli to get all routes with details + cmd := exec.CommandContext(ctx, "api-cli", "run", + fmt.Sprintf("module/%s/list-routes", traefikInstance), + "--data", `{"expand_list": true}`) + output, err := cmd.CombinedOutput() + if err != nil { + log.Printf("NethServer discovery: api-cli failed for %s (node %s): %v: %s", traefikInstance, nodeID, err, strings.TrimSpace(string(output))) + return services + } + + var routes []models.ApiCliRoute + if err := json.Unmarshal(output, &routes); err != nil { + log.Printf("NethServer discovery: cannot parse api-cli output for %s: %v", traefikInstance, err) + return services + } + + for _, route := range routes { + serviceKey := route.Instance + + // Parse target from URL + parsed, err := url.Parse(route.URL) + if err != nil { + continue + } + target := parsed.Host + useTLS := parsed.Scheme == "https" + + // Determine PathPrefix (only if strip_prefix is true) + var pathPrefix string + if route.Path != "" && route.StripPrefix { + pathPrefix = route.Path + } + + // Extract module ID and look up its ui_name from Redis + moduleID := extractModuleID(serviceKey) + var moduleLabel string + if moduleID != "" { + uiName, err := rdb.Get(ctx, "module/"+moduleID+"/ui_name").Result() + if err == nil && uiName != "" { + moduleLabel = uiName + } + } + + services[serviceKey] = models.ServiceInfo{ + Target: target, + Host: route.Host, + TLS: useTLS, + Label: moduleLabel, + Path: route.Path, + PathPrefix: pathPrefix, + ModuleID: moduleID, + NodeID: nodeID, + } + } + + return services +} + +// extractModuleID extracts the module ID from a Traefik config filename. +// NS8 module IDs end with an instance number (e.g., "nethvoice103", "n8n2", +// "nethsecurity-controller4"). Route suffixes are separated by hyphen or +// underscore after the digits (e.g., "nethvoice103-ui", "metrics1_grafana"). +func extractModuleID(name string) string { + m := moduleIDRegex.FindStringSubmatch(name) + if len(m) > 1 { + return m[1] + } + return "" +} diff --git a/services/support/cmd/tunnel-client/internal/models/models.go b/services/support/cmd/tunnel-client/internal/models/models.go new file mode 100644 index 00000000..520447ea --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/models/models.go @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package models + +// ServiceInfo matches the support service's tunnel.ServiceInfo +type ServiceInfo struct { + Target string `json:"target"` + Host string `json:"host"` + TLS bool `json:"tls"` + Label string `json:"label"` + Path string `json:"path,omitempty"` + PathPrefix string `json:"path_prefix,omitempty"` + ModuleID string `json:"module_id,omitempty"` + NodeID string `json:"node_id,omitempty"` +} + +// ServiceManifest is the JSON manifest sent to the support service +type ServiceManifest struct { + Version int `json:"version"` + Services map[string]ServiceInfo `json:"services"` +} + +// ApiCliRoute represents a single route returned by api-cli list-routes with expand_list +type ApiCliRoute struct { + Instance string `json:"instance"` + Host string `json:"host"` + Path string `json:"path"` + URL string `json:"url"` + StripPrefix bool `json:"strip_prefix"` + SkipCertVerif bool `json:"skip_cert_verify"` +} diff --git a/services/support/cmd/tunnel-client/internal/stream/handler.go b/services/support/cmd/tunnel-client/internal/stream/handler.go new file mode 100644 index 00000000..e4fa29b0 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/stream/handler.go @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package stream + +import ( + "crypto/tls" + "fmt" + "io" + "log" + "net" + "strings" + "sync" + "time" + + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/terminal" +) + +const maxLineLength = 1024 + +// HandleStream processes an incoming yamux stream by reading a CONNECT header, +// resolving the target service, and proxying traffic bidirectionally. +func HandleStream(stream net.Conn, services map[string]models.ServiceInfo) { + defer func() { _ = stream.Close() }() + + // Read CONNECT header + serviceName, err := readConnectHeader(stream) + if err != nil { + log.Printf("Failed to read CONNECT header: %v", err) + return + } + + // Built-in terminal service: spawn a PTY instead of dialing TCP + if serviceName == "terminal" { + if err := writeConnectResponse(stream, nil); err != nil { + return + } + log.Println("CONNECT terminal -> PTY") + terminal.HandleTerminal(stream) + return + } + + // Look up service + svc, ok := services[serviceName] + if !ok { + _ = writeConnectResponse(stream, fmt.Errorf("service not found: %s", serviceName)) + return + } + + // Connect to local target + var targetConn net.Conn + if svc.TLS { + targetConn, err = tls.Dial("tcp", svc.Target, &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // Local services use self-signed certs + }) + } else { + targetConn, err = net.DialTimeout("tcp", svc.Target, 10*time.Second) + } + if err != nil { + _ = writeConnectResponse(stream, fmt.Errorf("failed to connect to %s: %v", svc.Target, err)) + return + } + + // Send OK response + if err := writeConnectResponse(stream, nil); err != nil { + _ = targetConn.Close() + return + } + + log.Printf("CONNECT %s -> %s", serviceName, svc.Target) + + // Bidirectional copy with proper cleanup to prevent goroutine leaks + var once sync.Once + done := make(chan struct{}) + closeBoth := func() { + once.Do(func() { + close(done) + _ = targetConn.Close() + _ = stream.Close() + }) + } + + go func() { + defer closeBoth() + _, _ = io.Copy(targetConn, stream) + }() + + go func() { + defer closeBoth() + _, _ = io.Copy(stream, targetConn) + }() + + <-done +} + +// readConnectHeader reads "CONNECT \n" from the stream byte-by-byte +func readConnectHeader(r io.Reader) (string, error) { + line, err := readLine(r) + if err != nil { + return "", err + } + if !strings.HasPrefix(line, "CONNECT ") { + return "", fmt.Errorf("invalid CONNECT header: %q", line) + } + name := strings.TrimPrefix(line, "CONNECT ") + if name == "" { + return "", fmt.Errorf("empty service name") + } + return name, nil +} + +func writeConnectResponse(w io.Writer, err error) error { + if err == nil { + _, writeErr := fmt.Fprint(w, "OK\n") + return writeErr + } + _, writeErr := fmt.Fprintf(w, "ERROR %s\n", err.Error()) + return writeErr +} + +func readLine(r io.Reader) (string, error) { + var buf []byte + b := make([]byte, 1) + for { + n, err := r.Read(b) + if n > 0 { + if b[0] == '\n' { + return string(buf), nil + } + buf = append(buf, b[0]) + if len(buf) > maxLineLength { + return "", fmt.Errorf("line too long") + } + } + if err != nil { + if err == io.EOF && len(buf) > 0 { + return string(buf), nil + } + return "", err + } + } +} diff --git a/services/support/cmd/tunnel-client/internal/terminal/terminal.go b/services/support/cmd/tunnel-client/internal/terminal/terminal.go new file mode 100644 index 00000000..96ca63b3 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/terminal/terminal.go @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package terminal + +import ( + "encoding/binary" + "encoding/json" + "fmt" + "io" + "log" + "net" + "os" + "os/exec" + "strings" + "sync" + + "github.com/creack/pty" +) + +const ( + defaultShell = "/bin/bash" + defaultTermEnv = "TERM=xterm-256color" + maxFrameSize = 1024 * 1024 // 1 MB +) + +// sensitiveEnvPrefixes lists environment variable prefixes that are stripped +// from the PTY shell to prevent operators from extracting credentials (#8). +var sensitiveEnvPrefixes = []string{ + "SYSTEM_KEY=", + "SYSTEM_SECRET=", + "SUPPORT_URL=", + "DATABASE_URL=", + "REDIS_ADDR=", + "REDIS_PASSWORD=", + "REDIS_URL=", + "INTERNAL_SECRET=", + "TUNNEL_CONFIG=", +} + +// HandleTerminal spawns a shell with a PTY and bridges it to the yamux stream +// using length-prefixed binary frames: +// - Type 0 (data): raw terminal bytes (bidirectional) +// - Type 1 (resize): JSON {"cols": N, "rows": N} (stream -> PTY) +func HandleTerminal(stream net.Conn) { + shell := os.Getenv("SHELL") + if shell == "" { + shell = defaultShell + } + + cmd := exec.Command(shell) + cmd.Env = append(SanitizeEnv(os.Environ()), defaultTermEnv) + + ptmx, err := pty.Start(cmd) + if err != nil { + log.Printf("Failed to start PTY: %v", err) + return + } + var once sync.Once + done := make(chan struct{}) + closeAll := func() { + once.Do(func() { + close(done) + _ = ptmx.Close() + _ = stream.Close() + }) + } + defer func() { + closeAll() + _ = cmd.Process.Kill() + _, _ = cmd.Process.Wait() + }() + + // PTY -> stream: read from PTY, send as type-0 length-prefixed frames + go func() { + defer closeAll() + buf := make([]byte, 4096) + for { + n, readErr := ptmx.Read(buf) + if n > 0 { + frame := make([]byte, 1+n) + frame[0] = 0 // data frame + copy(frame[1:], buf[:n]) + if writeErr := WriteFrame(stream, frame); writeErr != nil { + return + } + } + if readErr != nil { + return + } + } + }() + + // Stream -> PTY: read length-prefixed frames, dispatch by type + go func() { + defer closeAll() + for { + frame, readErr := ReadFrame(stream) + if readErr != nil { + return + } + if len(frame) < 1 { + continue + } + + frameType := frame[0] + payload := frame[1:] + + switch frameType { + case 0: // data -> write to PTY + if _, writeErr := ptmx.Write(payload); writeErr != nil { + return + } + case 1: // resize -> set PTY window size + var size struct { + Cols int `json:"cols"` + Rows int `json:"rows"` + } + if jsonErr := json.Unmarshal(payload, &size); jsonErr != nil { + continue + } + if size.Cols > 0 && size.Rows > 0 { + _ = pty.Setsize(ptmx, &pty.Winsize{ + Rows: uint16(size.Rows), + Cols: uint16(size.Cols), + }) + } + } + } + }() + + <-done +} + +// WriteFrame writes a length-prefixed frame: [4 bytes big-endian length][payload] +func WriteFrame(w io.Writer, data []byte) error { + header := make([]byte, 4) + binary.BigEndian.PutUint32(header, uint32(len(data))) + if _, err := w.Write(header); err != nil { + return err + } + _, err := w.Write(data) + return err +} + +// ReadFrame reads a length-prefixed frame: [4 bytes big-endian length][payload] +func ReadFrame(r io.Reader) ([]byte, error) { + header := make([]byte, 4) + if _, err := io.ReadFull(r, header); err != nil { + return nil, err + } + length := binary.BigEndian.Uint32(header) + if length > maxFrameSize { + return nil, fmt.Errorf("frame too large: %d", length) + } + data := make([]byte, length) + if _, err := io.ReadFull(r, data); err != nil { + return nil, err + } + return data, nil +} + +// SanitizeEnv filters out sensitive environment variables before spawning a shell +func SanitizeEnv(env []string) []string { + filtered := make([]string, 0, len(env)) + for _, e := range env { + sensitive := false + for _, prefix := range sensitiveEnvPrefixes { + if strings.HasPrefix(e, prefix) { + sensitive = true + break + } + } + if !sensitive { + filtered = append(filtered, e) + } + } + return filtered +} diff --git a/services/support/cmd/tunnel-client/main.go b/services/support/cmd/tunnel-client/main.go index a4f61776..f946a334 100644 --- a/services/support/cmd/tunnel-client/main.go +++ b/services/support/cmd/tunnel-client/main.go @@ -19,111 +19,35 @@ package main import ( - "bufio" "context" - "crypto/tls" - "encoding/base64" - "encoding/binary" - "encoding/json" "fmt" - "io" "log" - "net" - "net/http" - "net/url" "os" - "os/exec" "os/signal" - "path/filepath" - "regexp" - "sort" - "strings" - "sync" "syscall" - "time" flag "github.com/spf13/pflag" - "github.com/creack/pty" - "github.com/gorilla/websocket" - "github.com/hashicorp/yamux" "github.com/redis/go-redis/v9" - "gopkg.in/yaml.v3" -) - -// ServiceInfo matches the support service's tunnel.ServiceInfo -type ServiceInfo struct { - Target string `json:"target"` - Host string `json:"host"` - TLS bool `json:"tls"` - Label string `json:"label"` - Path string `json:"path,omitempty"` - PathPrefix string `json:"path_prefix,omitempty"` - ModuleID string `json:"module_id,omitempty"` - NodeID string `json:"node_id,omitempty"` -} - -// ServiceManifest is the JSON manifest sent to the support service -type ServiceManifest struct { - Version int `json:"version"` - Services map[string]ServiceInfo `json:"services"` -} -// tunnelClientConfig is the YAML configuration file for the tunnel client -type tunnelClientConfig struct { - Exclude []string `yaml:"exclude"` -} - -// Defaults — all overridable via CLI flags or environment variables -const ( - defaultRedisAddr = "127.0.0.1:6379" - defaultReconnectDelay = 5 * time.Second - defaultMaxReconnect = 5 * time.Minute - defaultDiscoveryInterval = 5 * time.Minute - defaultShell = "/bin/bash" - defaultTermEnv = "TERM=xterm-256color" - defaultYamuxKeepAlive = 30 // seconds - defaultRemoteHTTPSPort = "443" - defaultNethSecUIPort = "443" - maxFrameSize = 1024 * 1024 // 1 MB - maxLineLength = 1024 - redisPingTimeout = 2 * time.Second - - // NethSecurity detection paths - nethSecUIPath = "/www-ns/index.html" - nethSecNginxConf = "/etc/nginx/conf.d/ns-ui.conf" - ns8NodeEnvFile = "/var/lib/nethserver/node/state/environment" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/config" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/connection" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/discovery" ) -// defaultExclude filters out backend API routes that are not useful for -// support operators. Only UI-facing services (cluster-admin, *-ui, *-wizard, -// *-reports-ui, *-amld, *_grafana, n8n*, nethsecurity-controller*) are kept. -var defaultExclude = []string{ - "*-cti-server-api", - "*-janus", - "*-middleware-*", - "*-provisioning", - "*-reports-api", - "*-server-api", - "*-server-websocket", - "*-tancredi", - "*_loki", - "*_prometheus", -} - func main() { var ( - urlFlag = flag.StringP("url", "u", envWithDefault("SUPPORT_URL", ""), "WebSocket tunnel URL (env: SUPPORT_URL)") - keyFlag = flag.StringP("key", "k", envWithDefault("SYSTEM_KEY", ""), "System key (env: SYSTEM_KEY)") - secretFlag = flag.StringP("secret", "s", envWithDefault("SYSTEM_SECRET", ""), "System secret (env: SYSTEM_SECRET)") - nodeIDFlag = flag.StringP("node-id", "n", envWithDefault("NODE_ID", ""), "Cluster node ID, auto-detected on NS8 (env: NODE_ID)") - redisAddr = flag.StringP("redis-addr", "r", envWithDefault("REDIS_ADDR", ""), "Redis address, auto-detected on NS8 (env: REDIS_ADDR)") - staticServices = flag.String("static-services", envWithDefault("STATIC_SERVICES", ""), "Static services name=host:port[:tls],… (env: STATIC_SERVICES)") - configFile = flag.StringP("config", "c", envWithDefault("TUNNEL_CONFIG", ""), "YAML config file for exclusions (env: TUNNEL_CONFIG)") - reconnectDelay = flag.Duration("reconnect-delay", parseDurationDefault(envWithDefault("RECONNECT_DELAY", ""), defaultReconnectDelay), "Base reconnect delay (env: RECONNECT_DELAY)") - maxReconnectDelay = flag.Duration("max-reconnect-delay", parseDurationDefault(envWithDefault("MAX_RECONNECT_DELAY", ""), defaultMaxReconnect), "Max reconnect delay (env: MAX_RECONNECT_DELAY)") - discoveryInterval = flag.Duration("discovery-interval", parseDurationDefault(envWithDefault("DISCOVERY_INTERVAL", ""), defaultDiscoveryInterval), "Service re-discovery interval (env: DISCOVERY_INTERVAL)") - tlsInsecure = flag.Bool("tls-insecure", envWithDefault("TLS_INSECURE", "") == "true", "Skip TLS verification (env: TLS_INSECURE)") + urlFlag = flag.StringP("url", "u", config.EnvWithDefault("SUPPORT_URL", ""), "WebSocket tunnel URL (env: SUPPORT_URL)") + keyFlag = flag.StringP("key", "k", config.EnvWithDefault("SYSTEM_KEY", ""), "System key (env: SYSTEM_KEY)") + secretFlag = flag.StringP("secret", "s", config.EnvWithDefault("SYSTEM_SECRET", ""), "System secret (env: SYSTEM_SECRET)") + nodeIDFlag = flag.StringP("node-id", "n", config.EnvWithDefault("NODE_ID", ""), "Cluster node ID, auto-detected on NS8 (env: NODE_ID)") + redisAddr = flag.StringP("redis-addr", "r", config.EnvWithDefault("REDIS_ADDR", ""), "Redis address, auto-detected on NS8 (env: REDIS_ADDR)") + staticServices = flag.String("static-services", config.EnvWithDefault("STATIC_SERVICES", ""), "Static services name=host:port[:tls],... (env: STATIC_SERVICES)") + excludePatterns = flag.String("exclude", config.EnvWithDefault("EXCLUDE_PATTERNS", ""), "Comma-separated glob patterns to exclude services (env: EXCLUDE_PATTERNS)") + reconnectDelay = flag.Duration("reconnect-delay", config.ParseDurationDefault(config.EnvWithDefault("RECONNECT_DELAY", ""), config.DefaultReconnectDelay), "Base reconnect delay (env: RECONNECT_DELAY)") + maxReconnectDelay = flag.Duration("max-reconnect-delay", config.ParseDurationDefault(config.EnvWithDefault("MAX_RECONNECT_DELAY", ""), config.DefaultMaxReconnect), "Max reconnect delay (env: MAX_RECONNECT_DELAY)") + discoveryInterval = flag.Duration("discovery-interval", config.ParseDurationDefault(config.EnvWithDefault("DISCOVERY_INTERVAL", ""), config.DefaultDiscoveryInterval), "Service re-discovery interval (env: DISCOVERY_INTERVAL)") + tlsInsecure = flag.Bool("tls-insecure", config.EnvWithDefault("TLS_INSECURE", "") == "true", "Skip TLS verification (env: TLS_INSECURE)") ) flag.Parse() @@ -135,13 +59,13 @@ func main() { // Auto-detect Redis on localhost if not explicitly specified if *redisAddr == "" { - rdb := redis.NewClient(&redis.Options{Addr: defaultRedisAddr}) - ctx, cancel := context.WithTimeout(context.Background(), redisPingTimeout) + rdb := redis.NewClient(&redis.Options{Addr: config.DefaultRedisAddr}) + ctx, cancel := context.WithTimeout(context.Background(), config.RedisPingTimeout) if err := rdb.Ping(ctx).Err(); err == nil { - log.Printf("Redis detected at %s, enabling NS8 auto-discovery", defaultRedisAddr) - *redisAddr = defaultRedisAddr + log.Printf("Redis detected at %s, enabling NS8 auto-discovery", config.DefaultRedisAddr) + *redisAddr = config.DefaultRedisAddr } else { - log.Printf("No Redis at %s, skipping NS8 auto-discovery (use -redis-addr to specify)", defaultRedisAddr) + log.Printf("No Redis at %s, skipping NS8 auto-discovery (use -redis-addr to specify)", config.DefaultRedisAddr) } cancel() _ = rdb.Close() @@ -149,22 +73,14 @@ func main() { // Auto-detect node ID from NS8 environment if not explicitly specified if *nodeIDFlag == "" && *redisAddr != "" { - if nid := readNodeID(); nid != "" { + if nid := discovery.ReadNodeID(); nid != "" { log.Printf("Auto-detected node ID: %s", nid) *nodeIDFlag = nid } } - // Build exclusion list: start with defaults, add config file overrides - exclude := append([]string{}, defaultExclude...) - if *configFile != "" { - if tc, err := loadConfig(*configFile); err != nil { - log.Printf("Warning: cannot load config %s: %v", *configFile, err) - } else if len(tc.Exclude) > 0 { - exclude = append(exclude, tc.Exclude...) - } - } - log.Printf("Excluding %d service patterns: %v", len(exclude), exclude) + // Build exclusion list (flag value already includes env fallback) + exclude := config.ParseExcludePatterns(*excludePatterns) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -178,1003 +94,19 @@ func main() { cancel() }() - config := &clientConfig{ - url: *urlFlag, - key: *keyFlag, - secret: *secretFlag, - nodeID: *nodeIDFlag, - redisAddr: *redisAddr, - staticServices: *staticServices, - configFile: *configFile, - exclude: exclude, - reconnectDelay: *reconnectDelay, - maxReconnectDelay: *maxReconnectDelay, - discoveryInterval: *discoveryInterval, - tlsInsecure: *tlsInsecure, - } - - runWithReconnect(ctx, config) -} - -type clientConfig struct { - url string - key string - secret string - nodeID string - redisAddr string - staticServices string - configFile string - reconnectDelay time.Duration - maxReconnectDelay time.Duration - discoveryInterval time.Duration - tlsInsecure bool - exclude []string // loaded from config file -} - -// closeCodeSessionClosed matches the server's CloseCodeSessionClosed. -// When the operator closes a session, the server sends this code -// to tell the client to exit without reconnecting. -const closeCodeSessionClosed = 4000 - -func runWithReconnect(ctx context.Context, cfg *clientConfig) { - delay := cfg.reconnectDelay - - for { - start := time.Now() - err := connect(ctx, cfg) - if ctx.Err() != nil { - return // context cancelled, clean shutdown - } - - // Check if the server sent a "session closed" close frame - if websocket.IsCloseError(err, closeCodeSessionClosed) { - log.Println("Session closed by operator. Exiting.") - os.Exit(0) - } - - log.Printf("Connection lost: %v", err) - - // Reset backoff if connection lasted longer than 60 seconds - if time.Since(start) > 60*time.Second { - delay = cfg.reconnectDelay - } - - log.Printf("Reconnecting in %v...", delay) - - select { - case <-ctx.Done(): - return - case <-time.After(delay): - } - - // Exponential backoff - delay = delay * 2 - if delay > cfg.maxReconnectDelay { - delay = cfg.maxReconnectDelay - } - } -} - -func connect(ctx context.Context, cfg *clientConfig) error { - // Build Basic Auth header - creds := base64.StdEncoding.EncodeToString([]byte(cfg.key + ":" + cfg.secret)) - header := http.Header{} - header.Set("Authorization", "Basic "+creds) - - // Append node_id query parameter for multi-node clusters - connectURL := cfg.url - if cfg.nodeID != "" { - parsed, err := url.Parse(connectURL) - if err != nil { - return fmt.Errorf("invalid URL: %w", err) - } - q := parsed.Query() - q.Set("node_id", cfg.nodeID) - parsed.RawQuery = q.Encode() - connectURL = parsed.String() - } - - log.Printf("Connecting to %s ...", connectURL) - - dialer := websocket.Dialer{ - HandshakeTimeout: 10 * time.Second, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: cfg.tlsInsecure, //nolint:gosec // Configurable: disabled by default, enable for dev/self-signed certs - }, - } - wsConn, _, err := dialer.Dial(connectURL, header) - if err != nil { - return fmt.Errorf("websocket dial failed: %w", err) - } - log.Println("WebSocket connected") - - // Wrap as net.Conn - netConn := &wsNetConn{conn: wsConn} - - // Create yamux client session - yamuxCfg := yamux.DefaultConfig() - yamuxCfg.EnableKeepAlive = true - yamuxCfg.KeepAliveInterval = defaultYamuxKeepAlive - yamuxCfg.LogOutput = io.Discard - - session, err := yamux.Client(netConn, yamuxCfg) - if err != nil { - _ = wsConn.Close() - return fmt.Errorf("yamux client creation failed: %w", err) - } - log.Println("yamux session established") - - // Discover services - services := discoverServices(ctx, cfg) - - // Send initial manifest - if err := sendManifest(session, services); err != nil { - _ = session.Close() - return fmt.Errorf("failed to send manifest: %w", err) - } - - // Start periodic re-discovery - go func() { - ticker := time.NewTicker(cfg.discoveryInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-session.CloseChan(): - return - case <-ticker.C: - newServices := discoverServices(ctx, cfg) - if len(newServices) > 0 { - if err := sendManifest(session, newServices); err != nil { - log.Printf("Failed to send updated manifest: %v", err) - } else { - services = newServices - log.Printf("Manifest updated with %d services", len(services)) - } - } - } - } - }() - - // Close session when context is cancelled to unblock Accept() - go func() { - <-ctx.Done() - _ = session.Close() - }() - - // Accept incoming streams - for { - stream, err := session.Accept() - if err != nil { - if ctx.Err() != nil { - return nil - } - // If the underlying WebSocket received a close frame, return that error - // so the reconnect loop can inspect the close code - netConn.mu.Lock() - closeErr := netConn.closeErr - netConn.mu.Unlock() - if closeErr != nil { - return closeErr - } - return fmt.Errorf("stream accept error: %w", err) - } - go handleStream(stream, services) - } -} - -func discoverServices(ctx context.Context, cfg *clientConfig) map[string]ServiceInfo { - services := make(map[string]ServiceInfo) - - // Parse static services - if cfg.staticServices != "" { - for _, entry := range strings.Split(cfg.staticServices, ",") { - entry = strings.TrimSpace(entry) - if entry == "" { - continue - } - parts := strings.SplitN(entry, "=", 2) - if len(parts) != 2 { - log.Printf("Invalid static service entry: %s", entry) - continue - } - name := parts[0] - if isExcluded(name, cfg.exclude) { - continue - } - target := parts[1] - - svc := ServiceInfo{Label: name} - - // Check for :tls suffix - if strings.HasSuffix(target, ":tls") { - svc.TLS = true - target = strings.TrimSuffix(target, ":tls") - } - - // Check for host override: name=target:port:host=hostname - if idx := strings.Index(target, ":host="); idx != -1 { - svc.Host = target[idx+6:] - target = target[:idx] - } - - svc.Target = target - services[name] = svc - } - } - - // NS8 auto-discovery from Traefik config files - if cfg.redisAddr != "" { - discovered := discoverTraefikRoutes(ctx, cfg.redisAddr) - for name, svc := range discovered { - if isExcluded(name, cfg.exclude) { - continue - } - services[name] = svc - } - } - - // NethSecurity auto-discovery (OpenWrt-based, no Redis/Traefik) - if cfg.redisAddr == "" { - discovered := discoverNethSecurityServices() - for name, svc := range discovered { - if isExcluded(name, cfg.exclude) { - continue - } - services[name] = svc - } - } - - logDiscoveredServices(services) - - return services -} - -// logDiscoveredServices prints a structured summary grouped by node → module → service -func logDiscoveredServices(services map[string]ServiceInfo) { - type moduleGroup struct { - label string - services map[string]ServiceInfo - } - type nodeGroup struct { - modules map[string]*moduleGroup - ungrouped []string // service keys without moduleID - } - - nodes := make(map[string]*nodeGroup) // keyed by nodeID ("" for non-node services) - - for name, svc := range services { - nid := svc.NodeID - ng, ok := nodes[nid] - if !ok { - ng = &nodeGroup{modules: make(map[string]*moduleGroup)} - nodes[nid] = ng - } - - if svc.ModuleID == "" { - ng.ungrouped = append(ng.ungrouped, name) - continue - } - - mg, ok := ng.modules[svc.ModuleID] - if !ok { - mg = &moduleGroup{services: make(map[string]ServiceInfo)} - ng.modules[svc.ModuleID] = mg - } - if mg.label == "" && svc.Label != "" { - mg.label = svc.Label - } - mg.services[name] = svc - } - - log.Printf("Discovered %d services across %d node(s)", len(services), len(nodes)) - - // Sort node IDs (empty string = non-node services, printed last) - nodeIDs := make([]string, 0, len(nodes)) - for nid := range nodes { - nodeIDs = append(nodeIDs, nid) - } - sort.Strings(nodeIDs) - - for _, nid := range nodeIDs { - ng := nodes[nid] - - if nid != "" { - log.Printf(" Node %s:", nid) - } - - indent := " " - if nid != "" { - indent = " " - } - - // Print modules (sorted) - moduleIDs := make([]string, 0, len(ng.modules)) - for id := range ng.modules { - moduleIDs = append(moduleIDs, id) - } - sort.Strings(moduleIDs) - - for _, moduleID := range moduleIDs { - mg := ng.modules[moduleID] - if mg.label != "" { - log.Printf("%s%s (%s)", indent, moduleID, mg.label) - } else { - log.Printf("%s%s", indent, moduleID) - } - names := make([]string, 0, len(mg.services)) - for name := range mg.services { - names = append(names, name) - } - sort.Strings(names) - for _, name := range names { - svc := mg.services[name] - route := svc.Host - if svc.Path != "" && svc.Path != "/" { - route += svc.Path - } - log.Printf("%s - %s -> %s", indent, name, route) - } - } - - // Print ungrouped services (static, cluster-admin) - sort.Strings(ng.ungrouped) - for _, name := range ng.ungrouped { - svc := services[name] - log.Printf("%s%s -> %s", indent, name, svc.Target) - } - } -} - -func loadConfig(path string) (*tunnelClientConfig, error) { - data, err := os.ReadFile(path) - if err != nil { - return nil, err - } - var cfg tunnelClientConfig - if err := yaml.Unmarshal(data, &cfg); err != nil { - return nil, err - } - return &cfg, nil -} - -func isExcluded(name string, patterns []string) bool { - for _, pattern := range patterns { - if matched, _ := filepath.Match(pattern, name); matched { - return true - } - } - return false -} - -// apiCliRoute represents a single route returned by api-cli list-routes with expand_list -type apiCliRoute struct { - Instance string `json:"instance"` - Host string `json:"host"` - Path string `json:"path"` - URL string `json:"url"` - StripPrefix bool `json:"strip_prefix"` - SkipCertVerif bool `json:"skip_cert_verify"` -} - -// discoverTraefikRoutes uses api-cli to discover routes from ALL cluster nodes. -func discoverTraefikRoutes(ctx context.Context, redisAddr string) map[string]ServiceInfo { - services := make(map[string]ServiceInfo) - - rdb := redis.NewClient(&redis.Options{ - Addr: redisAddr, - }) - defer func() { _ = rdb.Close() }() - - // Discover all node IDs by scanning Redis keys - nodeIDs := discoverNodeIDs(ctx, rdb) - if len(nodeIDs) == 0 { - log.Println("Traefik discovery: no nodes found, skipping") - return services - } - - // Read local NODE_ID to distinguish local vs remote nodes - localNodeID := readNodeID() - log.Printf("Traefik discovery: found %d node(s): %v (local: %s)", len(nodeIDs), nodeIDs, localNodeID) - - // Build a map of remote node IPs from Redis VPN config - nodeIPs := make(map[string]string) - for _, nid := range nodeIDs { - if nid == localNodeID { - continue - } - ip, err := rdb.HGet(ctx, fmt.Sprintf("node/%s/vpn", nid), "ip_address").Result() - if err != nil { - log.Printf("Traefik discovery: cannot get IP for node %s: %v", nid, err) - continue - } - nodeIPs[nid] = ip - log.Printf("Traefik discovery: node %s -> %s", nid, ip) - } - - for _, nodeID := range nodeIDs { - nodeServices := discoverNodeRoutes(ctx, rdb, nodeID) - - // For remote nodes, rewrite targets to go through the node's Traefik (HTTPS). - // Traefik on the remote node handles TLS termination, Host-based routing, - // and PathPrefix stripping, so we clear PathPrefix to avoid double-stripping. - if nodeID != localNodeID { - remoteIP, ok := nodeIPs[nodeID] - if !ok { - log.Printf("Traefik discovery: skipping node %s (no IP)", nodeID) - continue - } - for name, svc := range nodeServices { - svc.Target = remoteIP + ":" + defaultRemoteHTTPSPort - svc.TLS = true - svc.PathPrefix = "" - nodeServices[name] = svc - } - } - - for name, svc := range nodeServices { - services[name] = svc - } - } - - return services -} - -// readNodeID reads NODE_ID from the NS8 node environment file. -func readNodeID() string { - f, err := os.Open(ns8NodeEnvFile) - if err != nil { - return "" - } - defer func() { _ = f.Close() }() - - scanner := bufio.NewScanner(f) - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "NODE_ID=") { - return strings.TrimPrefix(line, "NODE_ID=") - } - } - return "" -} - -// discoverNodeIDs finds all NS8 node IDs by scanning Redis keys. -func discoverNodeIDs(ctx context.Context, rdb *redis.Client) []string { - var nodeIDs []string - var cursor uint64 - - for { - keys, nextCursor, err := rdb.Scan(ctx, cursor, "node/*/default_instance/traefik", 100).Result() - if err != nil { - log.Printf("Traefik discovery: Redis SCAN error: %v", err) - return nodeIDs - } - - for _, key := range keys { - // key format: node/{NODE_ID}/default_instance/traefik - parts := strings.Split(key, "/") - if len(parts) >= 2 { - nodeIDs = append(nodeIDs, parts[1]) - } - } - - cursor = nextCursor - if cursor == 0 { - break - } - } - - sort.Strings(nodeIDs) - return nodeIDs -} - -// discoverNodeRoutes uses api-cli to get all routes from a node's Traefik instance. -func discoverNodeRoutes(ctx context.Context, rdb *redis.Client, nodeID string) map[string]ServiceInfo { - services := make(map[string]ServiceInfo) - - // Get the traefik instance name from Redis - traefikInstance, err := rdb.Get(ctx, fmt.Sprintf("node/%s/default_instance/traefik", nodeID)).Result() - if err != nil { - log.Printf("Traefik discovery: cannot get traefik instance for node %s: %v", nodeID, err) - return services - } - - // Call api-cli to get all routes with details - cmd := exec.CommandContext(ctx, "api-cli", "run", - fmt.Sprintf("module/%s/list-routes", traefikInstance), - "--data", `{"expand_list": true}`) - output, err := cmd.Output() - if err != nil { - log.Printf("Traefik discovery: api-cli failed for %s (node %s): %v", traefikInstance, nodeID, err) - return services - } - - var routes []apiCliRoute - if err := json.Unmarshal(output, &routes); err != nil { - log.Printf("Traefik discovery: cannot parse api-cli output for %s: %v", traefikInstance, err) - return services - } - - for _, route := range routes { - serviceKey := route.Instance - - // Parse target from URL - parsed, err := url.Parse(route.URL) - if err != nil { - continue - } - target := parsed.Host - useTLS := parsed.Scheme == "https" - - // Determine PathPrefix (only if strip_prefix is true) - var pathPrefix string - if route.Path != "" && route.StripPrefix { - pathPrefix = route.Path - } - - // Extract module ID and look up its ui_name from Redis - moduleID := extractModuleID(serviceKey) - var moduleLabel string - if moduleID != "" { - uiName, err := rdb.Get(ctx, "module/"+moduleID+"/ui_name").Result() - if err == nil && uiName != "" { - moduleLabel = uiName - } - } - - services[serviceKey] = ServiceInfo{ - Target: target, - Host: route.Host, - TLS: useTLS, - Label: moduleLabel, - Path: route.Path, - PathPrefix: pathPrefix, - ModuleID: moduleID, - NodeID: nodeID, - } - } - - return services -} - -// moduleIDRegex matches NS8 module IDs (compiled once at package level) -var moduleIDRegex = regexp.MustCompile(`^(.+\d+)(?:[-_]|$)`) - -// extractModuleID extracts the module ID from a Traefik config filename. -// NS8 module IDs end with an instance number (e.g., "nethvoice103", "n8n2", -// "nethsecurity-controller4"). Route suffixes are separated by hyphen or -// underscore after the digits (e.g., "nethvoice103-ui", "metrics1_grafana"). -func extractModuleID(name string) string { - m := moduleIDRegex.FindStringSubmatch(name) - if len(m) > 1 { - return m[1] - } - return "" -} - -// discoverNethSecurityServices detects NethSecurity (OpenWrt-based firewall) -// by checking for its web UI files and registers the main HTTPS service. -// NethSecurity runs nginx with the UI on a configurable port: -// - Port from /etc/nginx/conf.d/ns-ui.conf (dedicated UI server block) -// - Port 443 (when 00ns.locations is active, UI is on the default server) -func discoverNethSecurityServices() map[string]ServiceInfo { - services := make(map[string]ServiceInfo) - - // Detect NethSecurity by checking for its UI directory - if _, err := os.Stat(nethSecUIPath); err != nil { - return services - } - - hostname, _ := os.Hostname() - if hostname == "" { - hostname = "NethSecurity" - } - - port := detectNethSecurityUIPort() - - log.Printf("NethSecurity detected (hostname: %s, UI port: %s), registering web UI service", hostname, port) - - services["nethsecurity-ui"] = ServiceInfo{ - Target: net.JoinHostPort("127.0.0.1", port), - Host: "127.0.0.1", - TLS: true, - Label: hostname, - Path: "/", - } - - return services -} - -// detectNethSecurityUIPort determines the HTTPS port serving the NethSecurity UI. -// It checks ns-ui.conf for a dedicated server block (e.g., port 9090), and -// falls back to 443 when the UI locations are on the default server. -func detectNethSecurityUIPort() string { - // Check for dedicated UI server block (ns-ui.conf) - data, err := os.ReadFile(nethSecNginxConf) - if err == nil { - // Parse "listen ssl" directive - for _, line := range strings.Split(string(data), "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "listen") && strings.Contains(line, "ssl") && !strings.Contains(line, "[::]:") { - fields := strings.Fields(line) - if len(fields) >= 2 { - port := fields[1] - // Validate it looks like a port number - if _, err := fmt.Sscanf(port, "%d", new(int)); err == nil { - return port - } - } - } - } - } - - // Default: UI on the main server - return defaultNethSecUIPort -} - -func sendManifest(session *yamux.Session, services map[string]ServiceInfo) error { - stream, err := session.Open() - if err != nil { - return fmt.Errorf("failed to open control stream: %w", err) - } - defer func() { _ = stream.Close() }() - - manifest := ServiceManifest{ - Version: 1, - Services: services, - } - - if err := json.NewEncoder(stream).Encode(manifest); err != nil { - return fmt.Errorf("failed to encode manifest: %w", err) - } - - log.Printf("Manifest sent with %d services", len(services)) - return nil -} - -func handleStream(stream net.Conn, services map[string]ServiceInfo) { - defer func() { _ = stream.Close() }() - - // Read CONNECT header - serviceName, err := readConnectHeader(stream) - if err != nil { - log.Printf("Failed to read CONNECT header: %v", err) - return - } - - // Built-in terminal service: spawn a PTY instead of dialing TCP - if serviceName == "terminal" { - if err := writeConnectResponse(stream, nil); err != nil { - return - } - log.Println("CONNECT terminal -> PTY") - handleTerminal(stream) - return - } - - // Look up service - svc, ok := services[serviceName] - if !ok { - _ = writeConnectResponse(stream, fmt.Errorf("service not found: %s", serviceName)) - return - } - - // Connect to local target - var targetConn net.Conn - if svc.TLS { - targetConn, err = tls.Dial("tcp", svc.Target, &tls.Config{ - InsecureSkipVerify: true, //nolint:gosec // Local services use self-signed certs - }) - } else { - targetConn, err = net.DialTimeout("tcp", svc.Target, 10*time.Second) - } - if err != nil { - _ = writeConnectResponse(stream, fmt.Errorf("failed to connect to %s: %v", svc.Target, err)) - return - } - - // Send OK response - if err := writeConnectResponse(stream, nil); err != nil { - _ = targetConn.Close() - return - } - - log.Printf("CONNECT %s -> %s", serviceName, svc.Target) - - // Bidirectional copy with proper cleanup to prevent goroutine leaks - var once sync.Once - done := make(chan struct{}) - closeBoth := func() { - once.Do(func() { - close(done) - _ = targetConn.Close() - _ = stream.Close() - }) - } - - go func() { - defer closeBoth() - _, _ = io.Copy(targetConn, stream) - }() - - go func() { - defer closeBoth() - _, _ = io.Copy(stream, targetConn) - }() - - <-done -} - -// readConnectHeader reads "CONNECT \n" from the stream byte-by-byte -func readConnectHeader(r io.Reader) (string, error) { - line, err := readLine(r) - if err != nil { - return "", err - } - if !strings.HasPrefix(line, "CONNECT ") { - return "", fmt.Errorf("invalid CONNECT header: %q", line) - } - name := strings.TrimPrefix(line, "CONNECT ") - if name == "" { - return "", fmt.Errorf("empty service name") - } - return name, nil -} - -func writeConnectResponse(w io.Writer, err error) error { - if err == nil { - _, writeErr := fmt.Fprint(w, "OK\n") - return writeErr - } - _, writeErr := fmt.Fprintf(w, "ERROR %s\n", err.Error()) - return writeErr -} - -func readLine(r io.Reader) (string, error) { - var buf []byte - b := make([]byte, 1) - for { - n, err := r.Read(b) - if n > 0 { - if b[0] == '\n' { - return string(buf), nil - } - buf = append(buf, b[0]) - if len(buf) > maxLineLength { - return "", fmt.Errorf("line too long") - } - } - if err != nil { - if err == io.EOF && len(buf) > 0 { - return string(buf), nil - } - return "", err - } - } -} - -// wsNetConn wraps gorilla/websocket.Conn as net.Conn for yamux. -// It captures WebSocket close errors so the reconnect loop can inspect the close code. -type wsNetConn struct { - conn *websocket.Conn - reader io.Reader - mu sync.Mutex - closeErr error // stores the WebSocket close error if received -} - -func (w *wsNetConn) Read(b []byte) (int, error) { - for { - if w.reader == nil { - _, reader, err := w.conn.NextReader() - if err != nil { - w.mu.Lock() - w.closeErr = err - w.mu.Unlock() - return 0, err - } - w.reader = reader - } - n, err := w.reader.Read(b) - if err == io.EOF { - w.reader = nil - if n > 0 { - return n, nil - } - continue - } - return n, err - } -} - -func (w *wsNetConn) Write(b []byte) (int, error) { - err := w.conn.WriteMessage(websocket.BinaryMessage, b) - if err != nil { - return 0, err - } - return len(b), nil -} - -func (w *wsNetConn) Close() error { return w.conn.Close() } -func (w *wsNetConn) LocalAddr() net.Addr { return w.conn.LocalAddr() } -func (w *wsNetConn) RemoteAddr() net.Addr { return w.conn.RemoteAddr() } -func (w *wsNetConn) SetDeadline(t time.Time) error { - if err := w.conn.SetReadDeadline(t); err != nil { - return err - } - return w.conn.SetWriteDeadline(t) -} -func (w *wsNetConn) SetReadDeadline(t time.Time) error { return w.conn.SetReadDeadline(t) } -func (w *wsNetConn) SetWriteDeadline(t time.Time) error { return w.conn.SetWriteDeadline(t) } - -// sensitiveEnvPrefixes lists environment variable prefixes that are stripped -// from the PTY shell to prevent operators from extracting credentials (#8). -var sensitiveEnvPrefixes = []string{ - "SYSTEM_KEY=", - "SYSTEM_SECRET=", - "SUPPORT_URL=", - "DATABASE_URL=", - "REDIS_ADDR=", - "REDIS_PASSWORD=", - "REDIS_URL=", - "INTERNAL_SECRET=", - "TUNNEL_CONFIG=", -} - -// sanitizeEnv filters out sensitive environment variables before spawning a shell -func sanitizeEnv(env []string) []string { - filtered := make([]string, 0, len(env)) - for _, e := range env { - sensitive := false - for _, prefix := range sensitiveEnvPrefixes { - if strings.HasPrefix(e, prefix) { - sensitive = true - break - } - } - if !sensitive { - filtered = append(filtered, e) - } - } - return filtered -} - -func envWithDefault(key, defaultValue string) string { - if v := os.Getenv(key); v != "" { - return v - } - return defaultValue -} - -func parseDurationDefault(s string, d time.Duration) time.Duration { - if s == "" { - return d - } - if v, err := time.ParseDuration(s); err == nil { - return v - } - return d -} - -// handleTerminal spawns a shell with a PTY and bridges it to the yamux stream -// using length-prefixed binary frames: -// - Type 0 (data): raw terminal bytes (bidirectional) -// - Type 1 (resize): JSON {"cols": N, "rows": N} (stream → PTY) -func handleTerminal(stream net.Conn) { - shell := os.Getenv("SHELL") - if shell == "" { - shell = defaultShell - } - - cmd := exec.Command(shell) - cmd.Env = append(sanitizeEnv(os.Environ()), defaultTermEnv) - - ptmx, err := pty.Start(cmd) - if err != nil { - log.Printf("Failed to start PTY: %v", err) - return - } - var once sync.Once - done := make(chan struct{}) - closeAll := func() { - once.Do(func() { - close(done) - _ = ptmx.Close() - _ = stream.Close() - }) - } - defer func() { - closeAll() - _ = cmd.Process.Kill() - _, _ = cmd.Process.Wait() - }() - - // PTY → stream: read from PTY, send as type-0 length-prefixed frames - go func() { - defer closeAll() - buf := make([]byte, 4096) - for { - n, readErr := ptmx.Read(buf) - if n > 0 { - frame := make([]byte, 1+n) - frame[0] = 0 // data frame - copy(frame[1:], buf[:n]) - if writeErr := writeFrame(stream, frame); writeErr != nil { - return - } - } - if readErr != nil { - return - } - } - }() - - // Stream → PTY: read length-prefixed frames, dispatch by type - go func() { - defer closeAll() - for { - frame, readErr := readFrame(stream) - if readErr != nil { - return - } - if len(frame) < 1 { - continue - } - - frameType := frame[0] - payload := frame[1:] - - switch frameType { - case 0: // data → write to PTY - if _, writeErr := ptmx.Write(payload); writeErr != nil { - return - } - case 1: // resize → set PTY window size - var size struct { - Cols int `json:"cols"` - Rows int `json:"rows"` - } - if jsonErr := json.Unmarshal(payload, &size); jsonErr != nil { - continue - } - if size.Cols > 0 && size.Rows > 0 { - _ = pty.Setsize(ptmx, &pty.Winsize{ - Rows: uint16(size.Rows), - Cols: uint16(size.Cols), - }) - } - } - } - }() - - <-done -} - -// writeFrame writes a length-prefixed frame: [4 bytes big-endian length][payload] -func writeFrame(w io.Writer, data []byte) error { - header := make([]byte, 4) - binary.BigEndian.PutUint32(header, uint32(len(data))) - if _, err := w.Write(header); err != nil { - return err - } - _, err := w.Write(data) - return err -} - -// readFrame reads a length-prefixed frame: [4 bytes big-endian length][payload] -func readFrame(r io.Reader) ([]byte, error) { - header := make([]byte, 4) - if _, err := io.ReadFull(r, header); err != nil { - return nil, err - } - length := binary.BigEndian.Uint32(header) - if length > maxFrameSize { - return nil, fmt.Errorf("frame too large: %d", length) - } - data := make([]byte, length) - if _, err := io.ReadFull(r, data); err != nil { - return nil, err - } - return data, nil + cfg := &config.ClientConfig{ + URL: *urlFlag, + Key: *keyFlag, + Secret: *secretFlag, + NodeID: *nodeIDFlag, + RedisAddr: *redisAddr, + StaticServices: *staticServices, + Exclude: exclude, + ReconnectDelay: *reconnectDelay, + MaxReconnectDelay: *maxReconnectDelay, + DiscoveryInterval: *discoveryInterval, + TLSInsecure: *tlsInsecure, + } + + connection.RunWithReconnect(ctx, cfg) } diff --git a/services/support/configuration/configuration.go b/services/support/configuration/configuration.go index b9a3e872..af696085 100644 --- a/services/support/configuration/configuration.go +++ b/services/support/configuration/configuration.go @@ -48,6 +48,12 @@ type Configuration struct { TerminalInactivityTimeout time.Duration `json:"terminal_inactivity_timeout"` TerminalMaxFrameSize int `json:"terminal_max_frame_size"` + // Rate limiting configuration + RateLimitTunnelPerIP int `json:"rate_limit_tunnel_per_ip"` + RateLimitTunnelPerKey int `json:"rate_limit_tunnel_per_key"` + RateLimitSessionPerID int `json:"rate_limit_session_per_id"` + RateLimitWindow time.Duration `json:"rate_limit_window"` + // Internal authentication (shared secret with backend) InternalSecret string `json:"-"` } @@ -89,6 +95,12 @@ func Init() { Config.TerminalInactivityTimeout = parseDurationWithDefault("TERMINAL_INACTIVITY_TIMEOUT", 30*time.Minute) Config.TerminalMaxFrameSize = parseIntWithDefault("TERMINAL_MAX_FRAME_SIZE", 65536) + // Rate limiting configuration + Config.RateLimitTunnelPerIP = parseIntWithDefault("RATE_LIMIT_TUNNEL_PER_IP", 10) + Config.RateLimitTunnelPerKey = parseIntWithDefault("RATE_LIMIT_TUNNEL_PER_KEY", 5) + Config.RateLimitSessionPerID = parseIntWithDefault("RATE_LIMIT_SESSION_PER_ID", 500) + Config.RateLimitWindow = parseDurationWithDefault("RATE_LIMIT_WINDOW", 1*time.Minute) + // Internal authentication Config.InternalSecret = os.Getenv("INTERNAL_SECRET") diff --git a/services/support/go.mod b/services/support/go.mod index 6554d70b..22da71d4 100644 --- a/services/support/go.mod +++ b/services/support/go.mod @@ -15,7 +15,7 @@ require ( github.com/lib/pq v1.10.9 github.com/redis/go-redis/v9 v9.11.0 github.com/rs/zerolog v1.34.0 - gopkg.in/yaml.v3 v3.0.1 + github.com/spf13/pflag v1.0.10 ) require ( @@ -39,7 +39,6 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect - github.com/spf13/pflag v1.0.10 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.3.0 // indirect golang.org/x/arch v0.18.0 // indirect @@ -48,4 +47,5 @@ require ( golang.org/x/sys v0.38.0 // indirect golang.org/x/text v0.31.0 // indirect google.golang.org/protobuf v1.36.6 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/services/support/main.go b/services/support/main.go index 6dcdbb4c..0959258b 100644 --- a/services/support/main.go +++ b/services/support/main.go @@ -59,6 +59,14 @@ func main() { configuration.Init() + // Initialize rate limiters from configuration + middleware.InitRateLimiters( + configuration.Config.RateLimitTunnelPerIP, + configuration.Config.RateLimitTunnelPerKey, + configuration.Config.RateLimitSessionPerID, + configuration.Config.RateLimitWindow, + ) + err = database.Init() if err != nil { logger.Fatal().Err(err).Msg("Failed to initialize database") diff --git a/services/support/middleware/ratelimit.go b/services/support/middleware/ratelimit.go index 82cc1ea3..f532154e 100644 --- a/services/support/middleware/ratelimit.go +++ b/services/support/middleware/ratelimit.go @@ -80,14 +80,19 @@ func (rl *rateLimiter) allow(key string) bool { return entry.count <= rl.limit } -// tunnelIPRateLimiter limits tunnel connection attempts per IP -var tunnelIPRateLimiter = newRateLimiter(10, 1*time.Minute) - -// tunnelKeyRateLimiter limits tunnel connection attempts per system_key (#14) -var tunnelKeyRateLimiter = newRateLimiter(5, 1*time.Minute) +var ( + tunnelIPRateLimiter *rateLimiter + tunnelKeyRateLimiter *rateLimiter + sessionRateLimiter *rateLimiter +) -// sessionRateLimiter limits requests per session ID on internal endpoints -var sessionRateLimiter = newRateLimiter(100, 1*time.Minute) +// InitRateLimiters initializes rate limiters from configuration. +// Must be called after configuration.Init(). +func InitRateLimiters(tunnelPerIP, tunnelPerKey, sessionPerID int, window time.Duration) { + tunnelIPRateLimiter = newRateLimiter(tunnelPerIP, window) + tunnelKeyRateLimiter = newRateLimiter(tunnelPerKey, window) + sessionRateLimiter = newRateLimiter(sessionPerID, window) +} // TunnelRateLimitMiddleware limits the rate of tunnel connection attempts // per client IP (10/min) and per system_key (5/min, checked after auth). From ecaff783b60c7f44d7cd23458c5c83b88d049d4c Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Fri, 13 Mar 2026 18:53:40 +0100 Subject: [PATCH 15/28] docs(support): add inline descriptions to all .env.example variables --- services/support/.env.example | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/services/support/.env.example b/services/support/.env.example index 774291a0..0bbf57c7 100644 --- a/services/support/.env.example +++ b/services/support/.env.example @@ -1,35 +1,35 @@ # Database -DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable +DATABASE_URL=postgresql://noc_user:noc_password@localhost:5432/noc?sslmode=disable # PostgreSQL connection string # Redis -REDIS_URL=redis://localhost:6379 -REDIS_DB=2 +REDIS_URL=redis://localhost:6379 # Redis connection URL +REDIS_DB=2 # Redis database number (isolated from other services) # Server -LISTEN_ADDRESS=127.0.0.1:8082 +LISTEN_ADDRESS=127.0.0.1:8082 # HTTP server bind address # Logging -LOG_LEVEL=debug -LOG_FORMAT=console +LOG_LEVEL=debug # Log level: debug, info, warn, error +LOG_FORMAT=console # Log format: console (human-readable) or json # Authentication -SYSTEM_AUTH_CACHE_TTL=24h -SYSTEM_SECRET_MIN_LENGTH=32 +SYSTEM_AUTH_CACHE_TTL=24h # How long to cache system credentials (avoids DB lookups) +SYSTEM_SECRET_MIN_LENGTH=32 # Minimum length for system secrets # Session defaults -SESSION_DEFAULT_DURATION=24h -SESSION_CLEANER_INTERVAL=5m +SESSION_DEFAULT_DURATION=24h # Default support session duration +SESSION_CLEANER_INTERVAL=5m # How often the background cleaner checks for expired sessions # Tunnel -TUNNEL_GRACE_PERIOD=2m -MAX_TUNNELS=1000 -MAX_SESSIONS_PER_SYSTEM=5 +TUNNEL_GRACE_PERIOD=2m # Time to wait before closing a disconnected tunnel +MAX_TUNNELS=1000 # Maximum number of concurrent tunnels +MAX_SESSIONS_PER_SYSTEM=5 # Maximum active sessions per system # Rate limiting -# RATE_LIMIT_TUNNEL_PER_IP=10 -# RATE_LIMIT_TUNNEL_PER_KEY=5 -# RATE_LIMIT_SESSION_PER_ID=500 -# RATE_LIMIT_WINDOW=1m +# RATE_LIMIT_TUNNEL_PER_IP=10 # Max tunnel connection attempts per IP per window +# RATE_LIMIT_TUNNEL_PER_KEY=5 # Max tunnel connection attempts per system_key per window +# RATE_LIMIT_SESSION_PER_ID=500 # Max requests per session ID per window (proxy, terminal, services) +# RATE_LIMIT_WINDOW=1m # Time window for all rate limits above # Internal authentication (shared secret with backend) # IMPORTANT: Generate with: openssl rand -hex 32 From 2fbfa44d291b3b5b50b4a4155c1ef63ad0684959 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Thu, 19 Mar 2026 10:31:50 +0100 Subject: [PATCH 16/28] chore(backend): renumber support migrations from 017-019 to 018-020 Shift migrations to avoid conflict with 017_inventory_fk_set_null added on main. --- .../{017_support_sessions.sql => 018_support_sessions.sql} | 0 ...rt_sessions_rollback.sql => 018_support_sessions_rollback.sql} | 0 .../{018_security_hardening.sql => 019_security_hardening.sql} | 0 ...hardening_rollback.sql => 019_security_hardening_rollback.sql} | 0 ..._support_sessions.sql => 020_add_node_id_support_sessions.sql} | 0 ...rollback.sql => 020_add_node_id_support_sessions_rollback.sql} | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename backend/database/migrations/{017_support_sessions.sql => 018_support_sessions.sql} (100%) rename backend/database/migrations/{017_support_sessions_rollback.sql => 018_support_sessions_rollback.sql} (100%) rename backend/database/migrations/{018_security_hardening.sql => 019_security_hardening.sql} (100%) rename backend/database/migrations/{018_security_hardening_rollback.sql => 019_security_hardening_rollback.sql} (100%) rename backend/database/migrations/{019_add_node_id_support_sessions.sql => 020_add_node_id_support_sessions.sql} (100%) rename backend/database/migrations/{019_add_node_id_support_sessions_rollback.sql => 020_add_node_id_support_sessions_rollback.sql} (100%) diff --git a/backend/database/migrations/017_support_sessions.sql b/backend/database/migrations/018_support_sessions.sql similarity index 100% rename from backend/database/migrations/017_support_sessions.sql rename to backend/database/migrations/018_support_sessions.sql diff --git a/backend/database/migrations/017_support_sessions_rollback.sql b/backend/database/migrations/018_support_sessions_rollback.sql similarity index 100% rename from backend/database/migrations/017_support_sessions_rollback.sql rename to backend/database/migrations/018_support_sessions_rollback.sql diff --git a/backend/database/migrations/018_security_hardening.sql b/backend/database/migrations/019_security_hardening.sql similarity index 100% rename from backend/database/migrations/018_security_hardening.sql rename to backend/database/migrations/019_security_hardening.sql diff --git a/backend/database/migrations/018_security_hardening_rollback.sql b/backend/database/migrations/019_security_hardening_rollback.sql similarity index 100% rename from backend/database/migrations/018_security_hardening_rollback.sql rename to backend/database/migrations/019_security_hardening_rollback.sql diff --git a/backend/database/migrations/019_add_node_id_support_sessions.sql b/backend/database/migrations/020_add_node_id_support_sessions.sql similarity index 100% rename from backend/database/migrations/019_add_node_id_support_sessions.sql rename to backend/database/migrations/020_add_node_id_support_sessions.sql diff --git a/backend/database/migrations/019_add_node_id_support_sessions_rollback.sql b/backend/database/migrations/020_add_node_id_support_sessions_rollback.sql similarity index 100% rename from backend/database/migrations/019_add_node_id_support_sessions_rollback.sql rename to backend/database/migrations/020_add_node_id_support_sessions_rollback.sql From c8c288e76f8b55e49f67b151ba83d7f47a7e5001 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Thu, 19 Mar 2026 12:24:03 +0100 Subject: [PATCH 17/28] feat(support): add pluggable diagnostics system to tunnel-client At connect time, the tunnel-client collects a health report and pushes it to the support service over a dedicated yamux stream. Operators see the results in the session popover before opening a terminal or proxy. Built-in system plugin always runs (CPU load, RAM, disk, uptime, OS info). External plugins can be dropped as executables in /usr/share/my/diagnostics.d/ - NS8 modules and NethSecurity can ship their own health checks independently. Each plugin writes JSON to stdout and signals severity via exit code (0=ok, 1=warning, 2=critical). The overall session status is the worst status across all plugins. Diagnostics run in parallel with the WebSocket connection to avoid adding latency. A per-plugin timeout (default 10s) and a total timeout (default 30s) prevent slow plugins from blocking the session. - tunnel-client: new internal/diagnostics package (runner + models), built-in system check, DIAGNOSTICS yamux stream after manifest - support service: acceptControlStream distinguishes DIAGNOSTICS header from manifest JSON, SaveDiagnostics() stores JSONB on session - backend: GET /api/support-sessions/:id/diagnostics with RBAC scoping, migration 021 adds diagnostics + diagnostics_at columns - frontend: diagnostics section in SupportSessionPopover with status dot and per-plugin summary rows --- .../021_add_diagnostics_support_sessions.sql | 7 + ..._diagnostics_support_sessions_rollback.sql | 4 + backend/database/schema.sql | 4 + backend/entities/support.go | 50 +++ backend/main.go | 1 + backend/methods/support.go | 25 ++ backend/openapi.yaml | 95 +++++ .../systems/SupportSessionPopover.vue | 66 +++ frontend/src/i18n/en/translation.json | 3 +- frontend/src/i18n/it/translation.json | 3 +- frontend/src/lib/support/support.ts | 41 ++ services/support/README.md | 40 +- .../tunnel-client/internal/config/config.go | 39 +- .../internal/connection/connection.go | 40 ++ .../internal/diagnostics/models.go | 48 +++ .../internal/diagnostics/runner.go | 394 ++++++++++++++++++ services/support/cmd/tunnel-client/main.go | 50 ++- services/support/methods/tunnel.go | 70 +++- services/support/models/session.go | 31 +- services/support/session/manager.go | 13 + 20 files changed, 962 insertions(+), 62 deletions(-) create mode 100644 backend/database/migrations/021_add_diagnostics_support_sessions.sql create mode 100644 backend/database/migrations/021_add_diagnostics_support_sessions_rollback.sql create mode 100644 services/support/cmd/tunnel-client/internal/diagnostics/models.go create mode 100644 services/support/cmd/tunnel-client/internal/diagnostics/runner.go diff --git a/backend/database/migrations/021_add_diagnostics_support_sessions.sql b/backend/database/migrations/021_add_diagnostics_support_sessions.sql new file mode 100644 index 00000000..e6144afb --- /dev/null +++ b/backend/database/migrations/021_add_diagnostics_support_sessions.sql @@ -0,0 +1,7 @@ +-- Migration 021: add diagnostics columns to support_sessions +ALTER TABLE support_sessions + ADD COLUMN IF NOT EXISTS diagnostics JSONB, + ADD COLUMN IF NOT EXISTS diagnostics_at TIMESTAMPTZ; + +COMMENT ON COLUMN support_sessions.diagnostics IS 'Diagnostic report collected by tunnel-client at connect time (JSON)'; +COMMENT ON COLUMN support_sessions.diagnostics_at IS 'Timestamp when diagnostics were last received from the tunnel-client'; diff --git a/backend/database/migrations/021_add_diagnostics_support_sessions_rollback.sql b/backend/database/migrations/021_add_diagnostics_support_sessions_rollback.sql new file mode 100644 index 00000000..bca1432e --- /dev/null +++ b/backend/database/migrations/021_add_diagnostics_support_sessions_rollback.sql @@ -0,0 +1,4 @@ +-- Rollback migration 021: remove diagnostics columns from support_sessions +ALTER TABLE support_sessions + DROP COLUMN IF EXISTS diagnostics, + DROP COLUMN IF EXISTS diagnostics_at; diff --git a/backend/database/schema.sql b/backend/database/schema.sql index 0abd67dc..5634d6db 100644 --- a/backend/database/schema.sql +++ b/backend/database/schema.sql @@ -942,6 +942,8 @@ CREATE TABLE IF NOT EXISTS support_sessions ( status VARCHAR(16) NOT NULL DEFAULT 'pending', closed_at TIMESTAMPTZ, closed_by VARCHAR(32), + diagnostics JSONB, + diagnostics_at TIMESTAMPTZ, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), CONSTRAINT support_sessions_status_check CHECK (status IN ('pending', 'active', 'expired', 'closed')) @@ -953,6 +955,8 @@ COMMENT ON COLUMN support_sessions.session_token IS 'Unique token for tunnel aut COMMENT ON COLUMN support_sessions.reconnect_token IS 'Token required to reconnect to a session during grace period'; COMMENT ON COLUMN support_sessions.status IS 'Session status: pending (no tunnel yet), active, expired, closed'; COMMENT ON COLUMN support_sessions.closed_by IS 'Who closed the session: client, operator, timeout, system'; +COMMENT ON COLUMN support_sessions.diagnostics IS 'Diagnostic report collected by tunnel-client at connect time (JSON)'; +COMMENT ON COLUMN support_sessions.diagnostics_at IS 'Timestamp when diagnostics were last received from the tunnel-client'; CREATE INDEX IF NOT EXISTS idx_support_sessions_system_id ON support_sessions(system_id); CREATE INDEX IF NOT EXISTS idx_support_sessions_status ON support_sessions(status); diff --git a/backend/entities/support.go b/backend/entities/support.go index 13cc692b..3efa0ca7 100644 --- a/backend/entities/support.go +++ b/backend/entities/support.go @@ -7,8 +7,10 @@ package entities import ( "database/sql" + "encoding/json" "fmt" "strings" + "time" "github.com/nethesis/my/backend/database" "github.com/nethesis/my/backend/models" @@ -441,6 +443,54 @@ func (r *SupportRepository) GetSessionByID(sessionID, userOrgRole, userOrgID str return &session, nil } +// GetDiagnostics returns the diagnostics data for a session, if available and accessible. +// Returns nil, nil, nil if diagnostics have not been received yet. +func (r *SupportRepository) GetDiagnostics(sessionID, userOrgRole, userOrgID string) (map[string]interface{}, *time.Time, error) { + conditions := []string{"ss.id = $1"} + args := []interface{}{sessionID} + argIdx := 2 + + // RBAC scope filter + rbacCondition, rbacArgs, _ := buildRBACFilter(userOrgRole, userOrgID, argIdx) + if rbacCondition != "" { + conditions = append(conditions, rbacCondition) + args = append(args, rbacArgs...) + } + + query := fmt.Sprintf(`SELECT ss.diagnostics, ss.diagnostics_at + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + WHERE %s`, strings.Join(conditions, " AND ")) + + var rawDiagnostics []byte + var diagnosticsAt sql.NullTime + + err := r.db.QueryRow(query, args...).Scan(&rawDiagnostics, &diagnosticsAt) + if err != nil { + if err == sql.ErrNoRows { + return nil, nil, nil + } + return nil, nil, fmt.Errorf("failed to get diagnostics: %w", err) + } + + if rawDiagnostics == nil { + return nil, nil, nil + } + + var data map[string]interface{} + if err := json.Unmarshal(rawDiagnostics, &data); err != nil { + return nil, nil, fmt.Errorf("failed to unmarshal diagnostics: %w", err) + } + + var at *time.Time + if diagnosticsAt.Valid { + t := diagnosticsAt.Time + at = &t + } + + return data, at, nil +} + // maxSessionDuration is the maximum total duration a session can have from its start time (30 days) const maxSessionDuration = 30 * 24 // hours diff --git a/backend/main.go b/backend/main.go index 87a0a4e5..37314fb8 100644 --- a/backend/main.go +++ b/backend/main.go @@ -480,6 +480,7 @@ func main() { supportGroup.PATCH("/:id/extend", methods.ExtendSupportSession) supportGroup.DELETE("/:id", methods.CloseSupportSession) supportGroup.GET("/:id/logs", methods.GetSupportSessionLogs) + supportGroup.GET("/:id/diagnostics", methods.GetSupportSessionDiagnostics) supportGroup.GET("/:id/services", methods.GetSupportSessionServices) supportGroup.POST("/:id/terminal-ticket", methods.GenerateTerminalTicket) supportGroup.Any("/:id/proxy/:service/*path", methods.ProxySupportSession) diff --git a/backend/methods/support.go b/backend/methods/support.go index f27a428b..1ec822de 100644 --- a/backend/methods/support.go +++ b/backend/methods/support.go @@ -156,3 +156,28 @@ func GetSupportSessionLogs(c *gin.Context) { "pagination": helpers.BuildPaginationInfoWithSorting(page, pageSize, totalCount, "connected_at", "desc"), })) } + +// GetSupportSessionDiagnostics handles GET /api/support-sessions/:id/diagnostics +func GetSupportSessionDiagnostics(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + repo := entities.NewSupportRepository() + data, at, err := repo.GetDiagnostics(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get diagnostics") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get diagnostics", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("diagnostics retrieved successfully", gin.H{ + "session_id": sessionID, + "diagnostics": data, + "diagnostics_at": at, + })) +} diff --git a/backend/openapi.yaml b/backend/openapi.yaml index a36ffe58..2b4bd492 100644 --- a/backend/openapi.yaml +++ b/backend/openapi.yaml @@ -9085,6 +9085,101 @@ paths: '404': $ref: '#/components/responses/NotFound' + /support-sessions/{id}/diagnostics: + get: + operationId: getSupportSessionDiagnostics + tags: + - Backend - Support Sessions + summary: Get diagnostics for a support session + description: | + Returns the diagnostic report collected by the tunnel-client at connect time. + The report includes a built-in system check (CPU, RAM, disk, uptime) and any + additional plugin results from the remote system's diagnostics directory. + Returns `null` for `diagnostics` and `diagnostics_at` if no report has been + received yet from the tunnel-client. + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + responses: + '200': + description: Diagnostics retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: diagnostics retrieved successfully + data: + type: object + properties: + session_id: + type: string + format: uuid + diagnostics: + nullable: true + type: object + description: Diagnostic report from the tunnel-client, null if not yet received + properties: + collected_at: + type: string + format: date-time + duration_ms: + type: integer + overall_status: + type: string + enum: [ok, warning, critical, error, timeout] + plugins: + type: array + items: + type: object + properties: + id: + type: string + name: + type: string + status: + type: string + enum: [ok, warning, critical, error, timeout] + summary: + type: string + checks: + type: array + items: + type: object + properties: + name: + type: string + status: + type: string + enum: [ok, warning, critical, error, timeout] + value: + type: string + details: + type: string + diagnostics_at: + type: string + format: date-time + nullable: true + description: Timestamp when diagnostics were last received + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + /support-sessions/{id}/proxy-token: post: operationId: generateSupportProxyToken diff --git a/frontend/src/components/systems/SupportSessionPopover.vue b/frontend/src/components/systems/SupportSessionPopover.vue index d38cfe21..0e05be2f 100644 --- a/frontend/src/components/systems/SupportSessionPopover.vue +++ b/frontend/src/components/systems/SupportSessionPopover.vue @@ -12,7 +12,9 @@ import { useI18n } from 'vue-i18n' import { getSystemActiveSessions, getSupportSessionLogs, + getSupportSessionDiagnostics, type SystemSessionGroup, + type SessionDiagnostics, } from '@/lib/support/support' function formatDateWithMonth(date: Date, loc: string): string { return date.toLocaleString(loc, { @@ -49,6 +51,7 @@ interface PopoverData { } const data = ref(null) +const diagnostics = ref(null) const loading = ref(false) const error = ref(false) const isOpen = ref(false) @@ -101,6 +104,18 @@ async function fetchData() { group, operators: Array.from(operatorMap.values()), } + + // Fetch diagnostics from the most recently started session + if (group.sessions && group.sessions.length > 0) { + const latestSession = [...group.sessions].sort( + (a, b) => new Date(b.started_at).getTime() - new Date(a.started_at).getTime(), + )[0] + try { + diagnostics.value = await getSupportSessionDiagnostics(latestSession.id) + } catch { + diagnostics.value = null + } + } } catch { error.value = true } finally { @@ -156,6 +171,32 @@ function formatConnectionBadge(conn: OperatorConnection): string { } return label } + +function diagnosticStatusDotClass(status: string): string { + switch (status) { + case 'ok': + return 'bg-green-500' + case 'warning': + return 'bg-amber-400' + case 'critical': + return 'bg-red-500' + default: + return 'bg-gray-400' + } +} + +function diagnosticStatusTextClass(status: string): string { + switch (status) { + case 'ok': + return 'text-green-600 dark:text-green-400' + case 'warning': + return 'text-amber-500 dark:text-amber-400' + case 'critical': + return 'text-red-600 dark:text-red-400' + default: + return 'text-gray-500 dark:text-gray-400' + } +} diff --git a/frontend/src/i18n/en/translation.json b/frontend/src/i18n/en/translation.json index 640ed7b9..93568690 100644 --- a/frontend/src/i18n/en/translation.json +++ b/frontend/src/i18n/en/translation.json @@ -652,6 +652,18 @@ "terminal_close_all_confirm": "This will close {count} active terminal session(s). Continue?", "terminal_select_node": "Select node", "terminal_select_node_description": "This system has multiple cluster nodes. Select which node to connect to.", - "diagnostics": "Diagnostics" + "diagnostics": "Diagnostics", + "add_service": "Add service", + "add_service_error": "Failed to add service", + "no_active_session": "No active session found", + "service_name": "Service name", + "service_name_placeholder": "e.g. my-service", + "service_name_helper": "Lowercase letters, digits, hyphens and underscores only", + "service_target": "Target (host:port)", + "service_target_placeholder": "e.g. 127.0.0.1:8080", + "service_target_helper": "Address and port of the service on the remote system", + "service_label": "Label (optional)", + "service_label_placeholder": "e.g. My Service", + "service_tls": "TLS" } } diff --git a/frontend/src/i18n/it/translation.json b/frontend/src/i18n/it/translation.json index fd6ce2d9..a042adbd 100644 --- a/frontend/src/i18n/it/translation.json +++ b/frontend/src/i18n/it/translation.json @@ -650,6 +650,18 @@ "terminal_new_tab": "Nuovo tab terminale", "terminal_close_all": "Chiudi tutti i terminali", "terminal_close_all_confirm": "Verranno chiuse {count} sessioni terminale attive. Continuare?", - "diagnostics": "Diagnostica" + "diagnostics": "Diagnostica", + "add_service": "Aggiungi servizio", + "add_service_error": "Impossibile aggiungere il servizio", + "no_active_session": "Nessuna sessione attiva trovata", + "service_name": "Nome servizio", + "service_name_placeholder": "es. mio-servizio", + "service_name_helper": "Solo lettere minuscole, cifre, trattini e underscore", + "service_target": "Target (host:porta)", + "service_target_placeholder": "es. 127.0.0.1:8080", + "service_target_helper": "Indirizzo e porta del servizio sul sistema remoto", + "service_label": "Etichetta (opzionale)", + "service_label_placeholder": "es. Il Mio Servizio", + "service_tls": "TLS" } } diff --git a/frontend/src/lib/support/support.ts b/frontend/src/lib/support/support.ts index 40f820f0..3aee9781 100644 --- a/frontend/src/lib/support/support.ts +++ b/frontend/src/lib/support/support.ts @@ -388,6 +388,24 @@ export const getSupportSessionLogs = (sessionId: string, pageNum: number, pageSi .then((res) => res.data.data) } +export interface AddSessionServiceItem { + name: string + target: string + label?: string + tls?: boolean +} + +export const addSupportSessionServices = (sessionId: string, services: AddSessionServiceItem[]) => { + const loginStore = useLoginStore() + return axios + .post( + `${API_URL}/support-sessions/${sessionId}/services`, + { services }, + { headers: { Authorization: `Bearer ${loginStore.jwtToken}` } }, + ) + .then((res) => res.data) +} + export const getSupportSessionDiagnostics = (sessionId: string): Promise => { const loginStore = useLoginStore() return axios diff --git a/services/support/README.md b/services/support/README.md index 1838edbd..1af4fbdb 100644 --- a/services/support/README.md +++ b/services/support/README.md @@ -62,11 +62,17 @@ MAX_SESSIONS_PER_SYSTEM=5 1. **System connects** via WebSocket with HTTP Basic Auth (same credentials as collect) 2. **yamux session** multiplexes streams over a single WebSocket connection -3. **Service manifest** is exchanged — the system advertises available services (e.g., cluster-admin, SSH) +3. **Service manifest** is exchanged — the system opens a control stream and sends the list of reachable services as JSON 4. **Diagnostics report** is sent — the system collects and pushes a health snapshot (CPU, RAM, disk, custom plugins) -5. **Operator requests** arrive as yamux streams with CONNECT headers routing to the target service +5. **Operator requests** arrive as yamux streams with `CONNECT \n` headers routing to the target service 6. **Reverse proxy** forwards HTTP/WebSocket traffic through the tunnel to remote services +The support service can also **push commands** to the tunnel-client by opening outbound yamux streams. The stream starts with a `COMMAND \n` header followed by a JSON payload. Currently supported commands: + +| Command | Description | +|:---|:---| +| `add_services` | Inject one or more static `host:port` services into the running session without reconnection | + ### Session Lifecycle - `pending` — Session created by backend, waiting for system to connect - `active` — System connected, tunnel established @@ -74,9 +80,10 @@ MAX_SESSIONS_PER_SYSTEM=5 - `closed` — Session closed by operator or system disconnect ### Inter-Service Communication -- **Backend → Support**: Redis pub/sub on channel `support:commands` (close sessions) +- **Backend → Support**: Redis pub/sub on channel `support:commands` (`close` and `add_services` commands) - **Backend → Support**: Internal HTTP endpoints with `X-Internal-Secret` header (proxy, terminal, services) - **System → Support**: WebSocket with HTTP Basic Auth (tunnel establishment) +- **Support → System**: Outbound yamux COMMAND streams (server-initiated, e.g. `add_services`) ## Development @@ -194,6 +201,27 @@ EXCLUDE_PATTERNS="*-server-api,*-janus,*-middleware-*,*-provisioning,*-reports-a tunnel-client --exclude "*-server-api,*-janus,*-middleware-*" ``` +### Static Service Injection + +Operators can add arbitrary `host:port` services to a running tunnel without restarting the tunnel-client. This is useful for services not auto-discovered via Traefik — for example the web management interface of a device on the customer's LAN (IP phone, managed switch, etc.). + +**Flow:** + +``` +Operator clicks "Add service" in the UI + → POST /api/support-sessions/:id/services {name, target, label, tls} + → Backend validates and publishes to Redis: {action: "add_services", session_id, services} + → Support service opens an outbound yamux stream to the tunnel-client + → Writes: COMMAND 1\n + JSON payload + → Tunnel-client merges the new service into its local map and re-sends the manifest + → Support service updates its service registry for that session + → Operator can immediately open the new service via the proxy +``` + +**Example:** to access a Yealink phone's web UI at `192.168.1.100:443` on a customer's system, add a service with `target: 192.168.1.100:443` and `tls: true`. The phone's interface becomes available through the subdomain proxy as if the operator were on the same LAN. + +Constraints: max 10 services per call, names must match `[a-zA-Z0-9][a-zA-Z0-9._-]*`, target must be `host:port`. + ### Diagnostics Plugin System At connect time, the tunnel-client collects a health report and pushes it to the support service. The report is stored with the session and shown to operators in the MY interface. diff --git a/services/support/cmd/tunnel-client/internal/connection/connection.go b/services/support/cmd/tunnel-client/internal/connection/connection.go index 79e0af35..1428fb5c 100644 --- a/services/support/cmd/tunnel-client/internal/connection/connection.go +++ b/services/support/cmd/tunnel-client/internal/connection/connection.go @@ -17,9 +17,13 @@ import ( "fmt" "io" "log" + "net" "net/http" "net/url" "os" + "regexp" + "strings" + "sync" "time" "github.com/gorilla/websocket" @@ -32,6 +36,49 @@ import ( "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/stream" ) +// validServiceName matches safe service names: lowercase alphanumeric, hyphens, underscores. +var validServiceName = regexp.MustCompile(`^[a-z0-9][a-z0-9_-]{0,63}$`) + +// commandPayload is the JSON body of a COMMAND stream sent by the support service. +type commandPayload struct { + Action string `json:"action"` + Services map[string]models.ServiceInfo `json:"services,omitempty"` +} + +// serviceStore is a goroutine-safe holder for the current service map. +type serviceStore struct { + mu sync.RWMutex + services map[string]models.ServiceInfo +} + +func newServiceStore(initial map[string]models.ServiceInfo) *serviceStore { + return &serviceStore{services: initial} +} + +func (s *serviceStore) get() map[string]models.ServiceInfo { + s.mu.RLock() + defer s.mu.RUnlock() + result := make(map[string]models.ServiceInfo, len(s.services)) + for k, v := range s.services { + result[k] = v + } + return result +} + +func (s *serviceStore) set(m map[string]models.ServiceInfo) { + s.mu.Lock() + defer s.mu.Unlock() + s.services = m +} + +func (s *serviceStore) merge(additional map[string]models.ServiceInfo) { + s.mu.Lock() + defer s.mu.Unlock() + for k, v := range additional { + s.services[k] = v + } +} + // closeCodeSessionClosed matches the server's CloseCodeSessionClosed. // When the operator closes a session, the server sends this code // to tell the client to exit without reconnecting. @@ -135,10 +182,11 @@ func connect(ctx context.Context, cfg *config.ClientConfig) error { log.Println("yamux session established") // Discover services - services := discovery.DiscoverServices(ctx, cfg) + initialServices := discovery.DiscoverServices(ctx, cfg) + store := newServiceStore(initialServices) // Send initial manifest - if err := sendManifest(session, services); err != nil { + if err := sendManifest(session, store.get()); err != nil { _ = session.Close() return fmt.Errorf("failed to send manifest: %w", err) } @@ -172,8 +220,8 @@ func connect(ctx context.Context, cfg *config.ClientConfig) error { if err := sendManifest(session, newServices); err != nil { log.Printf("Failed to send updated manifest: %v", err) } else { - services = newServices - log.Printf("Manifest updated with %d services", len(services)) + store.set(newServices) + log.Printf("Manifest updated with %d services", len(newServices)) } } } @@ -203,8 +251,87 @@ func connect(ctx context.Context, cfg *config.ClientConfig) error { } return fmt.Errorf("stream accept error: %w", err) } - go stream.HandleStream(yamuxStream, services) + go func() { + // Read the first line to determine stream type + firstLine, lineErr := stream.ReadLine(yamuxStream) + if lineErr != nil { + _ = yamuxStream.Close() + return + } + if strings.HasPrefix(firstLine, "COMMAND ") { + handleCommandStream(yamuxStream, firstLine, store, session) + } else { + stream.HandleStreamWithFirstLine(yamuxStream, firstLine, store.get()) + } + }() + } +} + +// handleCommandStream processes a COMMAND stream sent by the support service. +// It reads the JSON payload, applies the command, and writes OK or ERROR response. +func handleCommandStream(s net.Conn, firstLine string, store *serviceStore, session *yamux.Session) { + defer func() { _ = s.Close() }() + + version := strings.TrimPrefix(firstLine, "COMMAND ") + if version != "1" { + log.Printf("Unsupported COMMAND version: %q", version) + _, _ = fmt.Fprintf(s, "ERROR unsupported command version %q\n", version) + return + } + + // Read JSON payload (limit to 64 KB) + var payload commandPayload + dec := json.NewDecoder(io.LimitReader(s, 64*1024)) + if err := dec.Decode(&payload); err != nil { + log.Printf("Failed to decode command payload: %v", err) + _, _ = fmt.Fprintf(s, "ERROR invalid json: %v\n", err) + return + } + + switch payload.Action { + case "add_services": + if err := applyAddServices(payload.Services, store, session); err != nil { + log.Printf("add_services failed: %v", err) + _, _ = fmt.Fprintf(s, "ERROR %v\n", err) + return + } + log.Printf("add_services: added %d static service(s)", len(payload.Services)) + _, _ = fmt.Fprint(s, "OK\n") + default: + log.Printf("Unknown command action: %q", payload.Action) + _, _ = fmt.Fprintf(s, "ERROR unknown action %q\n", payload.Action) + } +} + +// applyAddServices validates and merges new static services into the store, +// then re-sends the manifest to the support service. +func applyAddServices(newSvcs map[string]models.ServiceInfo, store *serviceStore, session *yamux.Session) error { + if len(newSvcs) == 0 { + return fmt.Errorf("no services provided") + } + if len(newSvcs) > 10 { + return fmt.Errorf("too many services: max 10 per call") + } + + validated := make(map[string]models.ServiceInfo, len(newSvcs)) + for name, svc := range newSvcs { + if !validServiceName.MatchString(name) { + return fmt.Errorf("invalid service name %q: must match [a-z0-9][a-z0-9_-]{0,63}", name) + } + // Validate target format: must be host:port + if svc.Target == "" { + return fmt.Errorf("service %q has empty target", name) + } + validated[name] = svc } + + store.merge(validated) + + // Re-send manifest so the support service registers the new services + if err := sendManifest(session, store.get()); err != nil { + return fmt.Errorf("failed to resend manifest: %w", err) + } + return nil } func sendManifest(session *yamux.Session, services map[string]models.ServiceInfo) error { diff --git a/services/support/cmd/tunnel-client/internal/stream/handler.go b/services/support/cmd/tunnel-client/internal/stream/handler.go index e4fa29b0..719260c6 100644 --- a/services/support/cmd/tunnel-client/internal/stream/handler.go +++ b/services/support/cmd/tunnel-client/internal/stream/handler.go @@ -25,6 +25,32 @@ import ( const maxLineLength = 1024 +// ReadLine reads a newline-terminated line from r, byte by byte. +// Returns the line without the trailing newline. Returns an error if the line +// exceeds maxLineLength bytes or the reader fails. +func ReadLine(r io.Reader) (string, error) { + return readLine(r) +} + +// HandleStreamWithFirstLine processes an incoming yamux stream when the caller +// has already consumed the first line (e.g. to determine the stream type). +// It behaves identically to HandleStream but skips reading the header line, +// using firstLine instead. +func HandleStreamWithFirstLine(stream net.Conn, firstLine string, services map[string]models.ServiceInfo) { + defer func() { _ = stream.Close() }() + + if !strings.HasPrefix(firstLine, "CONNECT ") { + log.Printf("Invalid CONNECT header: %q", firstLine) + return + } + serviceName := strings.TrimPrefix(firstLine, "CONNECT ") + if serviceName == "" { + log.Printf("Empty service name in CONNECT header") + return + } + dispatchStream(stream, serviceName, services) +} + // HandleStream processes an incoming yamux stream by reading a CONNECT header, // resolving the target service, and proxying traffic bidirectionally. func HandleStream(stream net.Conn, services map[string]models.ServiceInfo) { @@ -37,6 +63,11 @@ func HandleStream(stream net.Conn, services map[string]models.ServiceInfo) { return } + dispatchStream(stream, serviceName, services) +} + +// dispatchStream routes an already-identified service name to the correct handler. +func dispatchStream(stream net.Conn, serviceName string, services map[string]models.ServiceInfo) { // Built-in terminal service: spawn a PTY instead of dialing TCP if serviceName == "terminal" { if err := writeConnectResponse(stream, nil); err != nil { @@ -55,6 +86,7 @@ func HandleStream(stream net.Conn, services map[string]models.ServiceInfo) { } // Connect to local target + var err error var targetConn net.Conn if svc.TLS { targetConn, err = tls.Dial("tcp", svc.Target, &tls.Config{ diff --git a/services/support/methods/commands.go b/services/support/methods/commands.go index 4400808c..553ed7bc 100644 --- a/services/support/methods/commands.go +++ b/services/support/methods/commands.go @@ -16,12 +16,14 @@ import ( "github.com/nethesis/my/services/support/logger" "github.com/nethesis/my/services/support/queue" "github.com/nethesis/my/services/support/session" + "github.com/nethesis/my/services/support/tunnel" ) // SupportCommand represents a command received via Redis pub/sub type SupportCommand struct { - Action string `json:"action"` - SessionID string `json:"session_id"` + Action string `json:"action"` + SessionID string `json:"session_id"` + Services map[string]tunnel.ServiceInfo `json:"services,omitempty"` } // StartCommandListener listens for commands from the backend via Redis pub/sub @@ -59,6 +61,8 @@ func StartCommandListener(ctx context.Context) { switch cmd.Action { case "close": handleCloseCommand(cmd.SessionID) + case "add_services": + handleAddServicesCommand(cmd) default: log.Warn().Str("action", cmd.Action).Msg("unknown command action") } @@ -66,6 +70,21 @@ func StartCommandListener(ctx context.Context) { } } +func handleAddServicesCommand(cmd SupportCommand) { + log := logger.ComponentLogger("commands") + + payload := tunnel.CommandPayload{ + Action: "add_services", + Services: cmd.Services, + } + + if err := TunnelManager.SendCommandToSession(cmd.SessionID, payload); err != nil { + log.Error().Err(err).Str("session_id", cmd.SessionID).Msg("failed to send add_services command to tunnel") + } else { + log.Info().Str("session_id", cmd.SessionID).Int("count", len(cmd.Services)).Msg("add_services command sent") + } +} + func handleCloseCommand(sessionID string) { log := logger.ComponentLogger("commands") diff --git a/services/support/tunnel/manager.go b/services/support/tunnel/manager.go index 0ccbf66a..434f4eb1 100644 --- a/services/support/tunnel/manager.go +++ b/services/support/tunnel/manager.go @@ -10,7 +10,9 @@ package tunnel import ( + "encoding/json" "fmt" + "io" "net" "regexp" "strings" @@ -539,6 +541,77 @@ func (t *Tunnel) ReleaseStream() { } } +// CommandPayload is the JSON body written to a COMMAND yamux stream. +type CommandPayload struct { + Action string `json:"action"` + Services map[string]ServiceInfo `json:"services,omitempty"` +} + +// SendCommandToSession opens a COMMAND yamux stream to the tunnel-client for the +// given session ID, writes the payload as JSON, and waits for an OK/ERROR response. +func (m *Manager) SendCommandToSession(sessionID string, payload CommandPayload) error { + t := m.GetBySessionID(sessionID) + if t == nil { + return fmt.Errorf("no active tunnel for session %s", sessionID) + } + + s, err := t.Session.Open() + if err != nil { + return fmt.Errorf("failed to open command stream: %w", err) + } + defer func() { _ = s.Close() }() + + // Write COMMAND header + if _, err := fmt.Fprintf(s, "COMMAND 1\n"); err != nil { + return fmt.Errorf("failed to write command header: %w", err) + } + + // Write JSON payload + if err := json.NewEncoder(s).Encode(payload); err != nil { + return fmt.Errorf("failed to encode command payload: %w", err) + } + + // Read response with timeout + if err := s.SetReadDeadline(time.Now().Add(30 * time.Second)); err != nil { + return fmt.Errorf("failed to set read deadline: %w", err) + } + line, err := readStreamLine(s) + if err != nil { + return fmt.Errorf("failed to read command response: %w", err) + } + if line == "OK" { + return nil + } + if strings.HasPrefix(line, "ERROR ") { + return fmt.Errorf("tunnel-client error: %s", strings.TrimPrefix(line, "ERROR ")) + } + return fmt.Errorf("unexpected command response: %q", line) +} + +// readStreamLine reads a newline-terminated line from r byte-by-byte. +func readStreamLine(r io.Reader) (string, error) { + var buf []byte + b := make([]byte, 1) + for { + n, err := r.Read(b) + if n > 0 { + if b[0] == '\n' { + return string(buf), nil + } + buf = append(buf, b[0]) + if len(buf) > 1024 { + return "", fmt.Errorf("response line too long") + } + } + if err != nil { + if err == io.EOF && len(buf) > 0 { + return string(buf), nil + } + return "", err + } + } +} + // TunnelInfo represents basic tunnel information type TunnelInfo struct { SystemID string `json:"system_id"` From df30e7d75b0a614903974e057c39124a94b10dae Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Thu, 19 Mar 2026 15:02:12 +0100 Subject: [PATCH 19/28] fix(security): address all findings from penetration test review Fixes 10 security issues identified in the pen-test review of the static service injection and diagnostics features: - SSRF bypass in applyAddServices (HMAC-signed Redis commands, server pre-check, and client-side validateTarget) - Diagnostics JSON schema validation, 512 KB size cap, and DB-enforced rate limit across reconnections - Diagnostic plugins rejected if not owned by root or writable by others; sanitized environment strips credentials - host:port validation uses net.SplitHostPort with numeric range check - DIAGNOSTICS stream version validated as exact "DIAGNOSTICS 1" - serviceStore total cap (500) prevents unbounded growth - Diagnostics goroutine starts only after yamux session is established --- backend/methods/support.go | 63 +++++++++-- .../internal/connection/connection.go | 102 ++++++++++++++++-- .../internal/diagnostics/runner.go | 26 ++++- services/support/methods/commands.go | 64 ++++++++++- services/support/methods/tunnel.go | 52 +++++++-- services/support/models/diagnostics.go | 37 +++++++ services/support/session/manager.go | 17 ++- services/support/tunnel/manager.go | 6 ++ 8 files changed, 333 insertions(+), 34 deletions(-) create mode 100644 services/support/models/diagnostics.go diff --git a/backend/methods/support.go b/backend/methods/support.go index 7aef7daf..86bb090e 100644 --- a/backend/methods/support.go +++ b/backend/methods/support.go @@ -6,14 +6,20 @@ SPDX-License-Identifier: AGPL-3.0-or-later package methods import ( + "crypto/hmac" + "crypto/sha256" + "encoding/hex" "encoding/json" + "fmt" + "net" "net/http" - "regexp" + "strconv" "github.com/gin-gonic/gin" "github.com/gin-gonic/gin/binding" "github.com/nethesis/my/backend/cache" + "github.com/nethesis/my/backend/configuration" "github.com/nethesis/my/backend/entities" "github.com/nethesis/my/backend/helpers" "github.com/nethesis/my/backend/logger" @@ -21,8 +27,44 @@ import ( "github.com/nethesis/my/backend/response" ) -// validHostPort matches a host:port string (IPv4, IPv6, or hostname with port). -var validHostPort = regexp.MustCompile(`^(?:[a-zA-Z0-9._\-\[\]]+):\d{1,5}$`) +// signedRedisMessage wraps a Redis pub/sub payload with an HMAC-SHA256 signature +// so the support service can verify the message came from the backend. +type signedRedisMessage struct { + Payload string `json:"payload"` + Sig string `json:"sig"` +} + +// signAndMarshal signs a payload with SUPPORT_INTERNAL_SECRET and returns the signed envelope. +// If SUPPORT_INTERNAL_SECRET is not configured, the envelope is published unsigned +// (backward-compatible, but a warning is logged). +func signAndMarshal(payload []byte) []byte { + secret := configuration.Config.SupportInternalSecret + var sig string + if secret != "" { + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write(payload) + sig = hex.EncodeToString(mac.Sum(nil)) + } + envelope, _ := json.Marshal(signedRedisMessage{Payload: string(payload), Sig: sig}) + return envelope +} + +// validateHostPort checks that target is a valid host:port with port in range 1-65535. +// Fix #7: replaces the regex that accepted port numbers up to 99999. +func validateHostPort(target string) error { + host, portStr, err := net.SplitHostPort(target) + if err != nil { + return fmt.Errorf("invalid host:port format: %w", err) + } + if host == "" { + return fmt.Errorf("empty host") + } + port, convErr := strconv.Atoi(portStr) + if convErr != nil || port < 1 || port > 65535 { + return fmt.Errorf("invalid port: must be 1-65535") + } + return nil +} // GetSupportSessions handles GET /api/support-sessions // Returns support sessions grouped by system with server-side pagination. @@ -118,14 +160,17 @@ func CloseSupportSession(c *gin.Context) { return } - // Notify support service via Redis pub/sub to disconnect the tunnel + // Notify support service via Redis pub/sub to disconnect the tunnel. + // Fix #2: message is signed with SUPPORT_INTERNAL_SECRET so the support service + // can verify it was not injected by a third party with Redis access. if redisClient := cache.GetRedisClient(); redisClient != nil { cmd := map[string]string{ "action": "close", "session_id": sessionID, } payload, _ := json.Marshal(cmd) - if err := redisClient.Publish("support:commands", string(payload)); err != nil { + envelope := signAndMarshal(payload) + if err := redisClient.Publish("support:commands", string(envelope)); err != nil { logger.Warn().Err(err).Str("session_id", sessionID).Msg("failed to publish close command to support service") } } @@ -230,9 +275,9 @@ func AddSupportSessionServices(c *gin.Context) { )) return } - if !validHostPort.MatchString(svc.Target) { + if err := validateHostPort(svc.Target); err != nil { c.JSON(http.StatusBadRequest, response.BadRequest( - "invalid target format: must be host:port", nil, + "invalid target format: must be host:port with port 1-65535", nil, )) return } @@ -250,13 +295,15 @@ func AddSupportSessionServices(c *gin.Context) { return } + // Fix #2: sign the Redis message before publishing cmd := map[string]interface{}{ "action": "add_services", "session_id": sessionID, "services": services, } payload, _ := json.Marshal(cmd) - if err := redisClient.Publish("support:commands", string(payload)); err != nil { + envelope := signAndMarshal(payload) + if err := redisClient.Publish("support:commands", string(envelope)); err != nil { logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to publish add_services command") c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to send command to support service", nil)) return diff --git a/services/support/cmd/tunnel-client/internal/connection/connection.go b/services/support/cmd/tunnel-client/internal/connection/connection.go index 1428fb5c..b4dbb73d 100644 --- a/services/support/cmd/tunnel-client/internal/connection/connection.go +++ b/services/support/cmd/tunnel-client/internal/connection/connection.go @@ -79,6 +79,82 @@ func (s *serviceStore) merge(additional map[string]models.ServiceInfo) { } } +func (s *serviceStore) len() int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.services) +} + +// maxServicesTotal caps the total number of services in the tunnel-client store. +// Must match the server-side maxServicesPerManifest in tunnel/manager.go. +const maxServicesTotal = 500 + +// dangerousHostnames mirrors the server-side list in tunnel/manager.go. +var dangerousHostnames = map[string]bool{ + "metadata.google.internal": true, + "metadata": true, + "metadata.azure.internal": true, + "instance-data": true, + "metadata.platformequinix.com": true, +} + +// validateTarget rejects service targets pointing to dangerous addresses. +// Fix #1: mirrors the server-side validateServiceTarget in tunnel/manager.go so that +// COMMAND-injected services are SSRF-validated on the tunnel-client side as well. +func validateTarget(target string) error { + if target == "" { + return fmt.Errorf("empty target") + } + host, _, err := net.SplitHostPort(target) + if err != nil { + host = target + } + if dangerousHostnames[strings.ToLower(host)] { + return fmt.Errorf("cloud metadata hostname blocked: %s", host) + } + ip := net.ParseIP(host) + if ip == nil { + ips, lookupErr := net.LookupIP(host) + if lookupErr != nil { + return fmt.Errorf("DNS resolution failed for %s: %w", host, lookupErr) + } + for _, resolvedIP := range ips { + if err := validateTargetIP(resolvedIP); err != nil { + return fmt.Errorf("hostname %s resolves to blocked address: %w", host, err) + } + } + return nil + } + return validateTargetIP(ip) +} + +func validateTargetIP(ip net.IP) error { + if ip.IsUnspecified() { + return fmt.Errorf("unspecified address blocked: %s", ip) + } + if ip.To4() != nil { + linkLocal := net.IPNet{IP: net.IPv4(169, 254, 0, 0), Mask: net.CIDRMask(16, 32)} + if linkLocal.Contains(ip) { + return fmt.Errorf("link-local/cloud metadata address blocked: %s", ip) + } + multicast := net.IPNet{IP: net.IPv4(224, 0, 0, 0), Mask: net.CIDRMask(4, 32)} + if multicast.Contains(ip) { + return fmt.Errorf("multicast address blocked: %s", ip) + } + if ip.Equal(net.IPv4bcast) { + return fmt.Errorf("broadcast address blocked: %s", ip) + } + } else { + if ip.IsLinkLocalUnicast() { + return fmt.Errorf("IPv6 link-local address blocked: %s", ip) + } + if ip.IsMulticast() { + return fmt.Errorf("IPv6 multicast address blocked: %s", ip) + } + } + return nil +} + // closeCodeSessionClosed matches the server's CloseCodeSessionClosed. // When the operator closes a session, the server sends this code // to tell the client to exit without reconnecting. @@ -144,13 +220,6 @@ func connect(ctx context.Context, cfg *config.ClientConfig) error { connectURL = parsed.String() } - // Start diagnostics collection in background (runs while connecting) - diagCh := make(chan diagnostics.DiagnosticsReport, 1) - go func() { - report := diagnostics.Collect(cfg.DiagnosticsDir, cfg.DiagnosticsPluginTimeout) - diagCh <- report - }() - log.Printf("Connecting to %s ...", connectURL) dialer := websocket.Dialer{ @@ -181,6 +250,14 @@ func connect(ctx context.Context, cfg *config.ClientConfig) error { } log.Println("yamux session established") + // Fix #10: start diagnostics collection only after the connection is established. + // Running plugins during a failed dial attempt wastes resources and slows reconnect loops. + diagCh := make(chan diagnostics.DiagnosticsReport, 1) + go func() { + report := diagnostics.Collect(cfg.DiagnosticsDir, cfg.DiagnosticsPluginTimeout) + diagCh <- report + }() + // Discover services initialServices := discovery.DiscoverServices(ctx, cfg) store := newServiceStore(initialServices) @@ -312,15 +389,20 @@ func applyAddServices(newSvcs map[string]models.ServiceInfo, store *serviceStore if len(newSvcs) > 10 { return fmt.Errorf("too many services: max 10 per call") } + // Fix #9: enforce total services cap to prevent unbounded store growth + // via repeated add_services calls. + if store.len()+len(newSvcs) > maxServicesTotal { + return fmt.Errorf("service limit exceeded: max %d total services", maxServicesTotal) + } validated := make(map[string]models.ServiceInfo, len(newSvcs)) for name, svc := range newSvcs { if !validServiceName.MatchString(name) { return fmt.Errorf("invalid service name %q: must match [a-z0-9][a-z0-9_-]{0,63}", name) } - // Validate target format: must be host:port - if svc.Target == "" { - return fmt.Errorf("service %q has empty target", name) + // Fix #1: SSRF validation on injected targets — mirrors server-side validateServiceTarget. + if err := validateTarget(svc.Target); err != nil { + return fmt.Errorf("service %q rejected: %w", name, err) } validated[name] = svc } diff --git a/services/support/cmd/tunnel-client/internal/diagnostics/runner.go b/services/support/cmd/tunnel-client/internal/diagnostics/runner.go index 98b01b24..cdcb3ce6 100644 --- a/services/support/cmd/tunnel-client/internal/diagnostics/runner.go +++ b/services/support/cmd/tunnel-client/internal/diagnostics/runner.go @@ -232,6 +232,13 @@ func runPlugin(path string, timeout time.Duration) PluginResult { cmd := exec.CommandContext(ctx, path) //nolint:gosec // path comes from a configured directory, not user input + // Fix #6: run plugins with a minimal environment to prevent credential leakage. + // The inherited environment may contain SYSTEM_KEY, SYSTEM_SECRET, SUPPORT_URL, + // and other sensitive values that plugins should not access. + cmd.Env = []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + } + stdoutPipe, err := cmd.StdoutPipe() if err != nil { result.Summary = fmt.Sprintf("failed to create stdout pipe: %v", err) @@ -342,6 +349,7 @@ func Collect(pluginsDir string, pluginTimeout time.Duration) DiagnosticsReport { // Silently skip if directory does not exist } else { // Collect and sort plugin paths + currentUID := os.Getuid() var pluginPaths []string for _, entry := range entries { if !entry.Type().IsRegular() { @@ -355,7 +363,23 @@ func Collect(pluginsDir string, pluginTimeout time.Duration) DiagnosticsReport { if info.Mode()&0o111 == 0 { continue } - pluginPaths = append(pluginPaths, filepath.Join(pluginsDir, entry.Name())) + pluginPath := filepath.Join(pluginsDir, entry.Name()) + // Fix #6: only run plugins owned by root (UID 0) or the current process user. + // Prevents privilege escalation if a less-privileged process can write to + // the plugins directory. + if sysInfo, ok := info.Sys().(*syscall.Stat_t); ok { + ownerUID := int(sysInfo.Uid) + if ownerUID != 0 && ownerUID != currentUID { + log.Printf("Skipping plugin %q: owned by UID %d (must be root or UID %d)", pluginPath, ownerUID, currentUID) + continue + } + } + // Fix #6: reject group-writable or world-writable plugins to prevent tampering. + if info.Mode().Perm()&0o022 != 0 { + log.Printf("Skipping plugin %q: file is group- or world-writable (mode=%04o)", pluginPath, info.Mode().Perm()) + continue + } + pluginPaths = append(pluginPaths, pluginPath) } sort.Strings(pluginPaths) diff --git a/services/support/methods/commands.go b/services/support/methods/commands.go index 553ed7bc..4153c9ba 100644 --- a/services/support/methods/commands.go +++ b/services/support/methods/commands.go @@ -11,14 +11,49 @@ package methods import ( "context" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" "encoding/json" + "github.com/nethesis/my/services/support/configuration" "github.com/nethesis/my/services/support/logger" "github.com/nethesis/my/services/support/queue" "github.com/nethesis/my/services/support/session" "github.com/nethesis/my/services/support/tunnel" ) +// signedEnvelope wraps a Redis pub/sub payload with an HMAC-SHA256 signature. +// The backend signs all messages with SUPPORT_INTERNAL_SECRET; the support service verifies +// using INTERNAL_SECRET (the same shared secret) before processing any command. +type signedEnvelope struct { + Payload string `json:"payload"` + Sig string `json:"sig"` +} + +// verifyAndUnwrap authenticates a signed Redis message and returns the inner payload. +// If INTERNAL_SECRET is not configured, messages are accepted without verification +// (backward-compatible with deployments that have not yet set the secret). +func verifyAndUnwrap(raw string) (string, bool) { + var env signedEnvelope + if err := json.Unmarshal([]byte(raw), &env); err != nil { + return "", false + } + secret := configuration.Config.InternalSecret + if secret == "" { + // No secret configured: accept but log a warning so operators know to fix it. + logger.ComponentLogger("commands").Warn().Msg("INTERNAL_SECRET not set: Redis commands accepted without HMAC verification") + return env.Payload, true + } + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write([]byte(env.Payload)) + expected := hex.EncodeToString(mac.Sum(nil)) + if !hmac.Equal([]byte(expected), []byte(env.Sig)) { + return "", false + } + return env.Payload, true +} + // SupportCommand represents a command received via Redis pub/sub type SupportCommand struct { Action string `json:"action"` @@ -41,15 +76,22 @@ func StartCommandListener(ctx context.Context) { case <-ctx.Done(): log.Info().Msg("command listener stopped") return - case msg, ok := <-ch: - if !ok { + case msg, chanOk := <-ch: + if !chanOk { log.Warn().Msg("command channel closed") return } + // Fix #2: verify HMAC signature before processing any command + payload, valid := verifyAndUnwrap(msg.Payload) + if !valid { + log.Error().Msg("rejected Redis command: invalid or missing HMAC signature") + continue + } + var cmd SupportCommand - if err := json.Unmarshal([]byte(msg.Payload), &cmd); err != nil { - log.Error().Err(err).Str("payload", msg.Payload).Msg("invalid command payload") + if err := json.Unmarshal([]byte(payload), &cmd); err != nil { + log.Error().Err(err).Str("payload", payload).Msg("invalid command payload") continue } @@ -73,6 +115,20 @@ func StartCommandListener(ctx context.Context) { func handleAddServicesCommand(cmd SupportCommand) { log := logger.ComponentLogger("commands") + // Fix #3: SSRF pre-check — validate each service target before forwarding to the tunnel-client. + // This is a defense-in-depth layer; the tunnel-client also validates, but the server + // should reject dangerous targets before they reach the customer's machine at all. + for name, svc := range cmd.Services { + if err := tunnel.ValidateServiceTarget(svc.Target); err != nil { + log.Error().Err(err). + Str("session_id", cmd.SessionID). + Str("service", name). + Str("target", svc.Target). + Msg("rejected add_services command: dangerous service target") + return + } + } + payload := tunnel.CommandPayload{ Action: "add_services", Services: cmd.Services, diff --git a/services/support/methods/tunnel.go b/services/support/methods/tunnel.go index 42953682..1ce3a462 100644 --- a/services/support/methods/tunnel.go +++ b/services/support/methods/tunnel.go @@ -24,6 +24,7 @@ import ( "github.com/nethesis/my/services/support/configuration" "github.com/nethesis/my/services/support/logger" + "github.com/nethesis/my/services/support/models" "github.com/nethesis/my/services/support/session" "github.com/nethesis/my/services/support/tunnel" ) @@ -162,7 +163,7 @@ func HandleTunnel(c *gin.Context) { func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { log := logger.ComponentLogger("tunnel") var lastManifest time.Time - var lastDiagnostics time.Time + var lastDiagnostics time.Time // fast in-memory pre-check; DB enforces cross-reconnect rate limit for { stream, err := t.Session.Accept() @@ -183,8 +184,10 @@ func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { // Read the rest of the header line (br already consumed the 'D') rest, _ := br.ReadString('\n') headerLine := "D" + rest - if strings.HasPrefix(strings.TrimSpace(headerLine), "DIAGNOSTICS") { - // Rate-limit diagnostics: max 1 per 30 seconds + // Fix #8: validate exact version to reject unknown protocol versions + diagParts := strings.Fields(strings.TrimSpace(headerLine)) + if len(diagParts) == 2 && diagParts[0] == "DIAGNOSTICS" && diagParts[1] == "1" { + // Fast in-memory rate-limit (pre-check before hitting the DB) if !lastDiagnostics.IsZero() && time.Since(lastDiagnostics) < 30*time.Second { _ = stream.Close() continue @@ -201,12 +204,49 @@ func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { continue } - var raw json.RawMessage = rawJSON - if jsonErr := session.SaveDiagnostics(sessionID, raw); jsonErr != nil { + // Fix #4: explicit size guard — reject payloads larger than 512 KB + if len(rawJSON) > 512*1024 { + log.Warn(). + Str("system_id", systemID). + Str("session_id", sessionID). + Int("bytes", len(rawJSON)). + Msg("diagnostics payload exceeds 512 KB limit, skipping") + continue + } + + // Fix #4: schema validation — unmarshal into typed struct and re-serialize + // to reject malformed JSON and strip unknown fields (prevents stored XSS). + var report models.DiagnosticsReport + if parseErr := json.Unmarshal(rawJSON, &report); parseErr != nil { + log.Warn().Err(parseErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("invalid diagnostics JSON schema, skipping") + continue + } + sanitized, marshalErr := json.Marshal(report) + if marshalErr != nil { + log.Warn().Err(marshalErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to re-serialize diagnostics, skipping") + continue + } + + var raw json.RawMessage = sanitized + // Fix #5: SaveDiagnostics enforces the rate limit in the DB + // to handle reconnect-based bypass of the in-memory check above. + saved, jsonErr := session.SaveDiagnostics(sessionID, raw) + if jsonErr != nil { log.Warn().Err(jsonErr). Str("system_id", systemID). Str("session_id", sessionID). Msg("failed to save diagnostics") + } else if !saved { + log.Debug(). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("diagnostics update skipped: rate-limited") } else { lastDiagnostics = time.Now() log.Info(). @@ -217,7 +257,7 @@ func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { } continue } - // Unknown header starting with 'D' — skip stream + // Unknown or unsupported header starting with 'D' — skip stream _ = stream.Close() continue } diff --git a/services/support/models/diagnostics.go b/services/support/models/diagnostics.go new file mode 100644 index 00000000..6026c785 --- /dev/null +++ b/services/support/models/diagnostics.go @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package models + +import "time" + +// DiagnosticCheck is a single named check within a plugin result. +type DiagnosticCheck struct { + Name string `json:"name"` + Status string `json:"status"` + Value string `json:"value,omitempty"` + Details string `json:"details,omitempty"` +} + +// DiagnosticPlugin is the result from a single diagnostics plugin. +type DiagnosticPlugin struct { + ID string `json:"id"` + Name string `json:"name"` + Status string `json:"status"` + Summary string `json:"summary,omitempty"` + Checks []DiagnosticCheck `json:"checks,omitempty"` +} + +// DiagnosticsReport is the full diagnostics report collected by a tunnel-client at connect time. +type DiagnosticsReport struct { + CollectedAt time.Time `json:"collected_at"` + DurationMs int64 `json:"duration_ms"` + OverallStatus string `json:"overall_status"` + Plugins []DiagnosticPlugin `json:"plugins"` +} diff --git a/services/support/session/manager.go b/services/support/session/manager.go index 9c9c0d23..f91565bd 100644 --- a/services/support/session/manager.go +++ b/services/support/session/manager.go @@ -333,13 +333,20 @@ func GetActiveSessions() (int, error) { } // SaveDiagnostics stores diagnostic report data on a session. -// This is best-effort: if the session no longer exists the error is ignored by the caller. -func SaveDiagnostics(sessionID string, data json.RawMessage) error { - _, err := database.DB.Exec( +// The update is skipped if a diagnostics record was saved within the last 30 seconds, +// enforcing the rate limit persistently across tunnel reconnections. +// Returns (true, nil) if saved, (false, nil) if rate-limited, (false, err) on error. +func SaveDiagnostics(sessionID string, data json.RawMessage) (bool, error) { + result, err := database.DB.Exec( `UPDATE support_sessions SET diagnostics = $1, diagnostics_at = NOW(), updated_at = NOW() - WHERE id = $2`, + WHERE id = $2 + AND (diagnostics_at IS NULL OR diagnostics_at < NOW() - INTERVAL '30 seconds')`, string(data), sessionID, ) - return err + if err != nil { + return false, err + } + rows, _ := result.RowsAffected() + return rows > 0, nil } diff --git a/services/support/tunnel/manager.go b/services/support/tunnel/manager.go index 434f4eb1..532aa4c3 100644 --- a/services/support/tunnel/manager.go +++ b/services/support/tunnel/manager.go @@ -426,6 +426,12 @@ var dangerousHostnames = map[string]bool{ "metadata.platformequinix.com": true, } +// ValidateServiceTarget is the exported version of validateServiceTarget, +// used by the commands handler to pre-check targets received from Redis messages. +func ValidateServiceTarget(target string) error { + return validateServiceTarget(target) +} + // validateServiceTarget rejects targets pointing to dangerous addresses. // For non-IP hostnames, DNS is resolved to block DNS rebinding attacks (#2). func validateServiceTarget(target string) error { From 50624ac03f3be3e947458afdaad7ed09582357d5 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Fri, 20 Mar 2026 11:04:21 +0100 Subject: [PATCH 20/28] fix(support): add cross-subdomain CORS and session-scoped proxy auth Remote apps (NethVoice, NethCTI) proxied through different subdomains make cross-origin API calls that require CORS headers and shared cookie authentication across sibling subdomains of the same support session. Backend: - Move CORS middleware from router to /api group so it does not intercept /support-proxy/* routes - Add CORS preflight (OPTIONS 204) and response headers for same-session sibling subdomains (validated by session slug match) - Scope proxy cookie to .support.{domain} with SameSite=Lax so it is shared across all service subdomains of the same session - Remove per-service token validation: session ID match is sufficient since users have session-level access Support service: - Fix non-deterministic hostname rewriting in buildHostRewriteMap: when multiple services share the same original hostname, the current service's proxy subdomain is always preferred, keeping API calls same-origin and letting Traefik handle path-based routing --- backend/main.go | 9 +++-- backend/methods/support_proxy.go | 67 ++++++++++++++++++++++++++----- services/support/methods/proxy.go | 16 +++++++- 3 files changed, 76 insertions(+), 16 deletions(-) diff --git a/backend/main.go b/backend/main.go index 8db2175a..ba262e9d 100644 --- a/backend/main.go +++ b/backend/main.go @@ -144,7 +144,11 @@ func main() { // Add compression (exclude WebSocket terminal endpoint and support proxy) router.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedPathsRegexs([]string{".*/terminal$", ".*/support-proxy/.*"}))) + // Define API group + api := router.Group("/api") + // CORS configuration in debug mode: restrict to local development origins (#1) + // Applied only to /api routes — /support-proxy handles its own CORS if gin.Mode() == gin.DebugMode { corsConf := cors.DefaultConfig() corsConf.AllowHeaders = []string{"Authorization", "Content-Type", "Accept"} @@ -154,12 +158,9 @@ func main() { strings.HasPrefix(origin, "http://127.0.0.1") || strings.HasPrefix(origin, "https://127.0.0.1") } - router.Use(cors.New(corsConf)) + api.Use(cors.New(corsConf)) } - // Define API group - api := router.Group("/api") - // Health check endpoint api.GET("/health", func(c *gin.Context) { c.JSON(http.StatusOK, response.OK("service healthy", version.Get())) diff --git a/backend/methods/support_proxy.go b/backend/methods/support_proxy.go index 07b2f7aa..58b406b6 100644 --- a/backend/methods/support_proxy.go +++ b/backend/methods/support_proxy.go @@ -545,6 +545,26 @@ func SubdomainProxy(c *gin.Context) { return } + // CORS: allow cross-origin requests between same-session sibling subdomains. + // Remote apps (e.g., NethVoice) make cross-subdomain API calls that require + // CORS headers and credentials support. + corsOrigin := supportProxyCORSOrigin(c.GetHeader("Origin"), sessionSlug) + + // Set CORS headers for all cross-subdomain responses (errors included) + if corsOrigin != "" { + c.Header("Access-Control-Allow-Origin", corsOrigin) + c.Header("Access-Control-Allow-Credentials", "true") + } + + // Handle OPTIONS preflight for cross-subdomain requests + if c.Request.Method == http.MethodOptions && corsOrigin != "" { + c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS") + c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") + c.Header("Access-Control-Max-Age", "3600") + c.Status(http.StatusNoContent) + return + } + // Prefer query param token (fresh from the UI) over cookie (may be stale // from a previous session — the cookie domain covers all support subdomains). tokenString := c.Query("token") @@ -565,13 +585,10 @@ func SubdomainProxy(c *gin.Context) { return } - // Validate that the token's service name matches the subdomain service - if claims.ServiceName != serviceName { - c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "proxy token is not valid for this service", nil)) - return - } - - // Validate that the token's session ID (without dashes) matches the subdomain slug exactly + // Validate that the token's session ID (without dashes) matches the subdomain slug exactly. + // The service name is NOT validated: the cookie is shared across all services + // in the session (domain .support.{domain}), and the user has session-level + // access — they can generate proxy tokens for any service via the frontend. tokenSessionSlug := strings.ReplaceAll(claims.SessionID, "-", "") if tokenSessionSlug != sessionSlug { c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "proxy token does not match this session", nil)) @@ -583,8 +600,9 @@ func SubdomainProxy(c *gin.Context) { // If token came from query param, set cookie and redirect to same path without token if fromQueryParam { secureCookie := !strings.HasPrefix(configuration.Config.AppURL, "http://") - c.SetSameSite(http.SameSiteStrictMode) - c.SetCookie("support_proxy", tokenString, 8*60*60, "/", hostOnly, secureCookie, true) + cookieDomain := ".support." + configuration.Config.SupportProxyDomain + c.SetSameSite(http.SameSiteLaxMode) + c.SetCookie("support_proxy", tokenString, 8*60*60, "/", cookieDomain, secureCookie, true) // Sanitize redirect path to prevent open redirect via protocol-relative URLs (#3). // "//evil.com" is interpreted by browsers as a redirect to evil.com. @@ -646,11 +664,17 @@ func SubdomainProxy(c *gin.Context) { resp.Header.Del("X-Frame-Options") resp.Header.Set("Content-Security-Policy", "frame-ancestors 'self'") - // Strip upstream CORS headers to avoid duplicates + // Strip upstream CORS headers and replace with proxy-controlled values resp.Header.Del("Access-Control-Allow-Origin") resp.Header.Del("Access-Control-Allow-Credentials") resp.Header.Del("Access-Control-Allow-Headers") resp.Header.Del("Access-Control-Allow-Methods") + + // Add CORS headers for cross-subdomain requests within the same session + if corsOrigin != "" { + resp.Header.Set("Access-Control-Allow-Origin", corsOrigin) + resp.Header.Set("Access-Control-Allow-Credentials", "true") + } return nil }, Transport: &sessionTokenTransport{inner: internalTransportNoCompression, sessionToken: sessionToken}, @@ -659,6 +683,29 @@ func SubdomainProxy(c *gin.Context) { proxy.ServeHTTP(c.Writer, c.Request) } +// supportProxyCORSOrigin checks if the request Origin is a sibling subdomain +// of the same support session (same session slug under *.support.{domain}). +// Returns the origin if valid, empty string otherwise. +func supportProxyCORSOrigin(origin, sessionSlug string) string { + if origin == "" || sessionSlug == "" || configuration.Config.SupportProxyDomain == "" { + return "" + } + u, err := url.Parse(origin) + if err != nil { + return "" + } + host := u.Hostname() + parts := strings.SplitN(host, ".support.", 2) + if len(parts) != 2 || parts[1] != configuration.Config.SupportProxyDomain { + return "" + } + subParts := strings.SplitN(parts[0], "--", 2) + if len(subParts) != 2 || subParts[1] != sessionSlug { + return "" + } + return origin +} + // filterSupportProxyCookie removes the support_proxy cookie from the request // while preserving all other cookies and headers (including Authorization) func filterSupportProxyCookie(req *http.Request) { diff --git a/services/support/methods/proxy.go b/services/support/methods/proxy.go index 89bf1ece..d4754d37 100644 --- a/services/support/methods/proxy.go +++ b/services/support/methods/proxy.go @@ -232,14 +232,18 @@ func buildHostRewriteMap(t *tunnel.Tunnel, currentProxyHost string) map[string]s domain := parts[0] domainSuffix := parts[1] - // Extract the session short ID from the subdomain + // Extract the current service name and session short ID from the subdomain subParts := strings.SplitN(domain, "--", 2) if len(subParts) != 2 { return nil } + currentService := subParts[0] sessionShort := subParts[1] - // Build rewrite map for all services with hostnames + // Build rewrite map for all services with hostnames. + // When multiple services share the same original hostname (common in NS8 + // where Traefik routes by path), prefer the current service's proxy hostname. + // This keeps API calls same-origin and lets Traefik handle path-based routing. rewrites := make(map[string]string) services := t.GetServices() for svcName, svc := range services { @@ -251,6 +255,14 @@ func buildHostRewriteMap(t *tunnel.Tunnel, currentProxyHost string) map[string]s rewrites[svc.Host] = proxyHostname } } + // Override: for the current service's hostname, always map to the current + // service's proxy subdomain. This ensures same-origin for shared hostnames. + if currentSvc, ok := services[currentService]; ok && currentSvc.Host != "" { + currentProxy := fmt.Sprintf("%s--%s.support.%s", currentService, sessionShort, domainSuffix) + if currentSvc.Host != currentProxy { + rewrites[currentSvc.Host] = currentProxy + } + } if len(rewrites) == 0 { return nil From 867b93df144c6bf2fdc8f3b36df78b68c58a87e7 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Fri, 20 Mar 2026 11:59:53 +0100 Subject: [PATCH 21/28] feat(support): add per-node diagnostics endpoint and multi-node popover display Add GET /api/support-sessions/diagnostics?system_id=X endpoint that returns diagnostics for all active sessions of a system grouped by node, with an overall_status reflecting the worst across all nodes. Update the frontend popover to show collapsible per-node sections for multi-node NS8 clusters while keeping the flat list for single-node systems. --- backend/entities/support.go | 95 +++++++++++++++ backend/main.go | 1 + backend/methods/support.go | 22 ++++ backend/models/support.go | 15 +++ backend/openapi.yaml | 110 ++++++++++++++++++ .../systems/SupportSessionPopover.vue | 102 ++++++++++++---- frontend/src/lib/support/support.ts | 26 +++++ 7 files changed, 347 insertions(+), 24 deletions(-) diff --git a/backend/entities/support.go b/backend/entities/support.go index 3efa0ca7..f0ab76d1 100644 --- a/backend/entities/support.go +++ b/backend/entities/support.go @@ -491,6 +491,101 @@ func (r *SupportRepository) GetDiagnostics(sessionID, userOrgRole, userOrgID str return data, at, nil } +// statusSeverity maps a diagnostic status to a numeric severity for comparison. +// Higher values indicate worse status. +var statusSeverity = map[string]int{ + "ok": 0, + "warning": 1, + "critical": 2, + "error": 3, + "timeout": 4, +} + +// worstStatus returns the most severe status among the given statuses. +func worstStatus(statuses []string) string { + worst := "ok" + for _, s := range statuses { + if statusSeverity[s] > statusSeverity[worst] { + worst = s + } + } + return worst +} + +// GetSystemDiagnostics returns diagnostics for all active sessions of a system, +// grouped by node, with an overall status reflecting the worst across all nodes. +func (r *SupportRepository) GetSystemDiagnostics(systemID, userOrgRole, userOrgID string) (*models.SystemDiagnostics, error) { + conditions := []string{"ss.system_id = $1", "ss.status IN ('active', 'pending')"} + args := []interface{}{systemID} + argIdx := 2 + + // RBAC scope filter + rbacCondition, rbacArgs, _ := buildRBACFilter(userOrgRole, userOrgID, argIdx) + if rbacCondition != "" { + conditions = append(conditions, rbacCondition) + args = append(args, rbacArgs...) + } + + query := fmt.Sprintf(`SELECT ss.id, ss.node_id, ss.diagnostics, ss.diagnostics_at + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + WHERE %s + ORDER BY ss.node_id NULLS FIRST, ss.started_at DESC`, + strings.Join(conditions, " AND ")) + + rows, err := r.db.Query(query, args...) + if err != nil { + return nil, fmt.Errorf("failed to query system diagnostics: %w", err) + } + defer func() { _ = rows.Close() }() + + result := &models.SystemDiagnostics{ + SystemID: systemID, + Nodes: []models.NodeDiagnostics{}, + } + + var statuses []string + for rows.Next() { + var nd models.NodeDiagnostics + var nodeID sql.NullString + var rawDiagnostics []byte + var diagnosticsAt sql.NullTime + + if err := rows.Scan(&nd.SessionID, &nodeID, &rawDiagnostics, &diagnosticsAt); err != nil { + return nil, fmt.Errorf("failed to scan system diagnostics: %w", err) + } + + if nodeID.Valid { + nd.NodeID = &nodeID.String + } + if diagnosticsAt.Valid { + t := diagnosticsAt.Time + nd.DiagnosticsAt = &t + } + if rawDiagnostics != nil { + var data map[string]interface{} + if err := json.Unmarshal(rawDiagnostics, &data); err != nil { + return nil, fmt.Errorf("failed to unmarshal diagnostics: %w", err) + } + nd.Diagnostics = data + if os, ok := data["overall_status"].(string); ok { + statuses = append(statuses, os) + } + } + + result.Nodes = append(result.Nodes, nd) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("failed to iterate system diagnostics: %w", err) + } + + if len(statuses) > 0 { + result.OverallStatus = worstStatus(statuses) + } + + return result, nil +} + // maxSessionDuration is the maximum total duration a session can have from its start time (30 days) const maxSessionDuration = 30 * 24 // hours diff --git a/backend/main.go b/backend/main.go index ba262e9d..12a4f0f4 100644 --- a/backend/main.go +++ b/backend/main.go @@ -477,6 +477,7 @@ func main() { supportGroup := customAuthWithAudit.Group("/support-sessions", middleware.RequirePermission("connect:systems")) { supportGroup.GET("", methods.GetSupportSessions) + supportGroup.GET("/diagnostics", methods.GetSystemSessionsDiagnostics) supportGroup.GET("/:id", methods.GetSupportSession) supportGroup.PATCH("/:id/extend", methods.ExtendSupportSession) supportGroup.DELETE("/:id", methods.CloseSupportSession) diff --git a/backend/methods/support.go b/backend/methods/support.go index 86bb090e..40dbd597 100644 --- a/backend/methods/support.go +++ b/backend/methods/support.go @@ -231,6 +231,28 @@ func GetSupportSessionDiagnostics(c *gin.Context) { })) } +// GetSystemSessionsDiagnostics handles GET /api/support-sessions/diagnostics?system_id=X +// Returns diagnostics for all active sessions of a system, grouped by node. +func GetSystemSessionsDiagnostics(c *gin.Context) { + systemID := c.Query("system_id") + if systemID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("system_id query parameter required", nil)) + return + } + + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + repo := entities.NewSupportRepository() + result, err := repo.GetSystemDiagnostics(systemID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("system_id", systemID).Msg("failed to get system diagnostics") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get system diagnostics", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("system diagnostics retrieved successfully", result)) +} + // AddSupportSessionServices handles POST /api/support-sessions/:id/services // It sends an add_services command to the tunnel-client via Redis pub/sub, // dynamically injecting static services into the running tunnel without reconnection. diff --git a/backend/models/support.go b/backend/models/support.go index 8e2c163d..1b73cce3 100644 --- a/backend/models/support.go +++ b/backend/models/support.go @@ -69,6 +69,21 @@ type ExtendSessionRequest struct { Hours int `json:"hours" binding:"required,min=1,max=168"` } +// NodeDiagnostics contains diagnostics data for a single node/session +type NodeDiagnostics struct { + NodeID *string `json:"node_id"` + SessionID string `json:"session_id"` + Diagnostics map[string]interface{} `json:"diagnostics"` + DiagnosticsAt *time.Time `json:"diagnostics_at"` +} + +// SystemDiagnostics aggregates diagnostics across all active sessions of a system +type SystemDiagnostics struct { + SystemID string `json:"system_id"` + OverallStatus string `json:"overall_status"` + Nodes []NodeDiagnostics `json:"nodes"` +} + // AddSessionServiceItem describes a single static service to add to a tunnel type AddSessionServiceItem struct { Name string `json:"name" binding:"required"` diff --git a/backend/openapi.yaml b/backend/openapi.yaml index 18589e4e..368a0083 100644 --- a/backend/openapi.yaml +++ b/backend/openapi.yaml @@ -9092,6 +9092,116 @@ paths: '404': $ref: '#/components/responses/NotFound' + /support-sessions/diagnostics: + get: + operationId: getSystemSessionsDiagnostics + tags: + - Backend - Support Sessions + summary: Get diagnostics for all active sessions of a system + description: | + Returns diagnostics for all active/pending sessions of the given system, + grouped by node. Includes an overall_status reflecting the worst status + across all nodes. For single-node systems (NethSecurity), returns a single + entry with node_id null. + security: + - BearerAuth: [] + parameters: + - name: system_id + in: query + required: true + description: System ID (database UUID) + schema: + type: string + format: uuid + responses: + '200': + description: System diagnostics retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: system diagnostics retrieved successfully + data: + type: object + properties: + system_id: + type: string + format: uuid + overall_status: + type: string + enum: [ok, warning, critical, error, timeout, ''] + description: Worst status across all nodes, empty string if no diagnostics available + nodes: + type: array + items: + type: object + properties: + node_id: + type: string + nullable: true + description: Node identifier, null for single-node systems + session_id: + type: string + format: uuid + diagnostics: + nullable: true + type: object + description: Diagnostic report from the tunnel-client + properties: + collected_at: + type: string + format: date-time + duration_ms: + type: integer + overall_status: + type: string + enum: [ok, warning, critical, error, timeout] + plugins: + type: array + items: + type: object + properties: + id: + type: string + name: + type: string + status: + type: string + enum: [ok, warning, critical, error, timeout] + summary: + type: string + checks: + type: array + items: + type: object + properties: + name: + type: string + status: + type: string + enum: [ok, warning, critical, error, timeout] + value: + type: string + details: + type: string + diagnostics_at: + type: string + format: date-time + nullable: true + description: Timestamp when diagnostics were last received + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + /support-sessions/{id}/diagnostics: get: operationId: getSupportSessionDiagnostics diff --git a/frontend/src/components/systems/SupportSessionPopover.vue b/frontend/src/components/systems/SupportSessionPopover.vue index 0e05be2f..897372ce 100644 --- a/frontend/src/components/systems/SupportSessionPopover.vue +++ b/frontend/src/components/systems/SupportSessionPopover.vue @@ -12,9 +12,9 @@ import { useI18n } from 'vue-i18n' import { getSystemActiveSessions, getSupportSessionLogs, - getSupportSessionDiagnostics, + getSystemDiagnostics, type SystemSessionGroup, - type SessionDiagnostics, + type SystemDiagnostics, } from '@/lib/support/support' function formatDateWithMonth(date: Date, loc: string): string { return date.toLocaleString(loc, { @@ -51,7 +51,8 @@ interface PopoverData { } const data = ref(null) -const diagnostics = ref(null) +const diagnostics = ref(null) +const expandedNodes = ref>(new Set()) const loading = ref(false) const error = ref(false) const isOpen = ref(false) @@ -105,16 +106,16 @@ async function fetchData() { operators: Array.from(operatorMap.values()), } - // Fetch diagnostics from the most recently started session - if (group.sessions && group.sessions.length > 0) { - const latestSession = [...group.sessions].sort( - (a, b) => new Date(b.started_at).getTime() - new Date(a.started_at).getTime(), - )[0] - try { - diagnostics.value = await getSupportSessionDiagnostics(latestSession.id) - } catch { - diagnostics.value = null + // Fetch diagnostics for all active sessions of this system + try { + const sysDiag = await getSystemDiagnostics(props.systemId) + diagnostics.value = sysDiag + // Auto-expand all nodes in multi-node view + if (sysDiag.nodes.length > 1) { + expandedNodes.value = new Set(sysDiag.nodes.map((n) => n.node_id ?? '__null__')) } + } catch { + diagnostics.value = null } } catch { error.value = true @@ -172,6 +173,19 @@ function formatConnectionBadge(conn: OperatorConnection): string { return label } +function toggleNode(nodeId: string | null) { + const key = nodeId ?? '__null__' + if (expandedNodes.value.has(key)) { + expandedNodes.value.delete(key) + } else { + expandedNodes.value.add(key) + } +} + +function isNodeExpanded(nodeId: string | null): boolean { + return expandedNodes.value.has(nodeId ?? '__null__') +} + function diagnosticStatusDotClass(status: string): string { switch (status) { case 'ok': @@ -282,26 +296,66 @@ function diagnosticStatusTextClass(status: string): string {
{{ t('support.diagnostics') }}
-
-
- {{ plugin.name }} - - {{ plugin.summary || plugin.status }} - + +
+ +
+ +
+
+ +
+
+ {{ plugin.name }} + + {{ plugin.summary || plugin.status }} + +
+
diff --git a/frontend/src/lib/support/support.ts b/frontend/src/lib/support/support.ts index 3aee9781..c52b0869 100644 --- a/frontend/src/lib/support/support.ts +++ b/frontend/src/lib/support/support.ts @@ -102,6 +102,19 @@ export interface SessionDiagnostics { diagnostics_at: string | null } +export interface NodeDiagnostics { + node_id: string | null + session_id: string + diagnostics: DiagnosticsReport | null + diagnostics_at: string | null +} + +export interface SystemDiagnostics { + system_id: string + overall_status: string + nodes: NodeDiagnostics[] +} + interface SupportSessionsResponse { code: number message: string @@ -418,3 +431,16 @@ export const getSupportSessionDiagnostics = (sessionId: string): Promise res.data.data) } + +export const getSystemDiagnostics = (systemId: string): Promise => { + const loginStore = useLoginStore() + return axios + .get<{ + code: number + message: string + data: SystemDiagnostics + }>(`${API_URL}/support-sessions/diagnostics?system_id=${encodeURIComponent(systemId)}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data.data) +} From 39c7196eec28ccd27de708f8ceccc01b525d2b8b Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 24 Mar 2026 15:56:43 +0100 Subject: [PATCH 22/28] feat(support): add ephemeral user provisioning for support sessions Tunnel-client creates temporary users when a session starts and removes them when it ends, giving operators access to remote admin interfaces without requiring customer credentials. NS8: creates cluster-admin (Redis) + domain users per local LDAP/Samba provider. Worker nodes fetch credentials from the leader via USERS_FETCH yamux stream. NethSecurity: creates local admin user via nethsec Python module. Plugin system (users.d/): executable scripts configure applications for the support user. The tunnel-client passes --instances-file with module context (instances, domains, services) so plugins can configure per- instance credentials. Frontend: unified Services & Credentials modal replaces the old service dropdown, showing cluster admin, domain credentials per module accordion, and clickable service links. --- .../022_add_users_support_sessions.sql | 8 + ...22_add_users_support_sessions_rollback.sql | 4 + backend/database/schema.sql | 4 + backend/entities/support.go | 48 ++ backend/main.go | 1 + backend/methods/support.go | 25 + backend/openapi.yaml | 114 ++++ .../support/SupportSessionsTable.vue | 574 ++++++++++++++---- frontend/src/i18n/en/translation.json | 14 +- frontend/src/lib/support/support.ts | 56 ++ services/support/README.md | 38 ++ .../tunnel-client/internal/config/config.go | 8 + .../internal/connection/connection.go | 169 +++++- .../internal/users/configurator.go | 402 ++++++++++++ .../tunnel-client/internal/users/models.go | 91 +++ .../internal/users/nethsecurity.go | 121 ++++ .../internal/users/nethserver.go | 246 ++++++++ .../cmd/tunnel-client/internal/users/store.go | 137 +++++ services/support/cmd/tunnel-client/main.go | 22 + services/support/methods/tunnel.go | 94 +++ services/support/models/session.go | 2 + services/support/models/users.go | 60 ++ services/support/session/manager.go | 38 ++ 23 files changed, 2156 insertions(+), 120 deletions(-) create mode 100644 backend/database/migrations/022_add_users_support_sessions.sql create mode 100644 backend/database/migrations/022_add_users_support_sessions_rollback.sql create mode 100644 services/support/cmd/tunnel-client/internal/users/configurator.go create mode 100644 services/support/cmd/tunnel-client/internal/users/models.go create mode 100644 services/support/cmd/tunnel-client/internal/users/nethsecurity.go create mode 100644 services/support/cmd/tunnel-client/internal/users/nethserver.go create mode 100644 services/support/cmd/tunnel-client/internal/users/store.go create mode 100644 services/support/models/users.go diff --git a/backend/database/migrations/022_add_users_support_sessions.sql b/backend/database/migrations/022_add_users_support_sessions.sql new file mode 100644 index 00000000..30d0bd05 --- /dev/null +++ b/backend/database/migrations/022_add_users_support_sessions.sql @@ -0,0 +1,8 @@ +-- Migration 022: Add ephemeral support users to support_sessions +-- Stores the users report from tunnel-client (JSONB) alongside the session + +ALTER TABLE support_sessions ADD COLUMN IF NOT EXISTS users JSONB; +ALTER TABLE support_sessions ADD COLUMN IF NOT EXISTS users_at TIMESTAMPTZ; + +COMMENT ON COLUMN support_sessions.users IS 'Ephemeral support users created by tunnel-client for this session (JSON)'; +COMMENT ON COLUMN support_sessions.users_at IS 'Timestamp when users report was received from the tunnel-client'; diff --git a/backend/database/migrations/022_add_users_support_sessions_rollback.sql b/backend/database/migrations/022_add_users_support_sessions_rollback.sql new file mode 100644 index 00000000..db10caa6 --- /dev/null +++ b/backend/database/migrations/022_add_users_support_sessions_rollback.sql @@ -0,0 +1,4 @@ +-- Rollback Migration 022: Remove ephemeral support users columns from support_sessions + +ALTER TABLE support_sessions DROP COLUMN IF EXISTS users; +ALTER TABLE support_sessions DROP COLUMN IF EXISTS users_at; diff --git a/backend/database/schema.sql b/backend/database/schema.sql index 5634d6db..c95d5656 100644 --- a/backend/database/schema.sql +++ b/backend/database/schema.sql @@ -944,6 +944,8 @@ CREATE TABLE IF NOT EXISTS support_sessions ( closed_by VARCHAR(32), diagnostics JSONB, diagnostics_at TIMESTAMPTZ, + users JSONB, + users_at TIMESTAMPTZ, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), CONSTRAINT support_sessions_status_check CHECK (status IN ('pending', 'active', 'expired', 'closed')) @@ -957,6 +959,8 @@ COMMENT ON COLUMN support_sessions.status IS 'Session status: pending (no tunnel COMMENT ON COLUMN support_sessions.closed_by IS 'Who closed the session: client, operator, timeout, system'; COMMENT ON COLUMN support_sessions.diagnostics IS 'Diagnostic report collected by tunnel-client at connect time (JSON)'; COMMENT ON COLUMN support_sessions.diagnostics_at IS 'Timestamp when diagnostics were last received from the tunnel-client'; +COMMENT ON COLUMN support_sessions.users IS 'Ephemeral support users created by tunnel-client for this session (JSON)'; +COMMENT ON COLUMN support_sessions.users_at IS 'Timestamp when users report was received from the tunnel-client'; CREATE INDEX IF NOT EXISTS idx_support_sessions_system_id ON support_sessions(system_id); CREATE INDEX IF NOT EXISTS idx_support_sessions_status ON support_sessions(status); diff --git a/backend/entities/support.go b/backend/entities/support.go index f0ab76d1..9aadbaec 100644 --- a/backend/entities/support.go +++ b/backend/entities/support.go @@ -491,6 +491,54 @@ func (r *SupportRepository) GetDiagnostics(sessionID, userOrgRole, userOrgID str return data, at, nil } +// GetUsers returns the ephemeral support users for a session, if available and accessible. +// Returns nil, nil, nil if users have not been provisioned yet. +func (r *SupportRepository) GetUsers(sessionID, userOrgRole, userOrgID string) (map[string]interface{}, *time.Time, error) { + conditions := []string{"ss.id = $1"} + args := []interface{}{sessionID} + argIdx := 2 + + // RBAC scope filter + rbacCondition, rbacArgs, _ := buildRBACFilter(userOrgRole, userOrgID, argIdx) + if rbacCondition != "" { + conditions = append(conditions, rbacCondition) + args = append(args, rbacArgs...) + } + + query := fmt.Sprintf(`SELECT ss.users, ss.users_at + FROM support_sessions ss + JOIN systems s ON ss.system_id = s.id + WHERE %s`, strings.Join(conditions, " AND ")) + + var rawUsers []byte + var usersAt sql.NullTime + + err := r.db.QueryRow(query, args...).Scan(&rawUsers, &usersAt) + if err != nil { + if err == sql.ErrNoRows { + return nil, nil, nil + } + return nil, nil, fmt.Errorf("failed to get users: %w", err) + } + + if rawUsers == nil { + return nil, nil, nil + } + + var data map[string]interface{} + if err := json.Unmarshal(rawUsers, &data); err != nil { + return nil, nil, fmt.Errorf("failed to unmarshal users: %w", err) + } + + var at *time.Time + if usersAt.Valid { + t := usersAt.Time + at = &t + } + + return data, at, nil +} + // statusSeverity maps a diagnostic status to a numeric severity for comparison. // Higher values indicate worse status. var statusSeverity = map[string]int{ diff --git a/backend/main.go b/backend/main.go index 12a4f0f4..28edab41 100644 --- a/backend/main.go +++ b/backend/main.go @@ -483,6 +483,7 @@ func main() { supportGroup.DELETE("/:id", methods.CloseSupportSession) supportGroup.GET("/:id/logs", methods.GetSupportSessionLogs) supportGroup.GET("/:id/diagnostics", methods.GetSupportSessionDiagnostics) + supportGroup.GET("/:id/users", methods.GetSupportSessionUsers) supportGroup.GET("/:id/services", methods.GetSupportSessionServices) supportGroup.POST("/:id/services", methods.AddSupportSessionServices) supportGroup.POST("/:id/terminal-ticket", methods.GenerateTerminalTicket) diff --git a/backend/methods/support.go b/backend/methods/support.go index 40dbd597..5a4dd6a3 100644 --- a/backend/methods/support.go +++ b/backend/methods/support.go @@ -231,6 +231,31 @@ func GetSupportSessionDiagnostics(c *gin.Context) { })) } +// GetSupportSessionUsers handles GET /api/support-sessions/:id/users +func GetSupportSessionUsers(c *gin.Context) { + sessionID := c.Param("id") + if sessionID == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id required", nil)) + return + } + + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + repo := entities.NewSupportRepository() + data, at, err := repo.GetUsers(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get session users") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get session users", nil)) + return + } + + c.JSON(http.StatusOK, response.OK("session users retrieved successfully", gin.H{ + "session_id": sessionID, + "users": data, + "users_at": at, + })) +} + // GetSystemSessionsDiagnostics handles GET /api/support-sessions/diagnostics?system_id=X // Returns diagnostics for all active sessions of a system, grouped by node. func GetSystemSessionsDiagnostics(c *gin.Context) { diff --git a/backend/openapi.yaml b/backend/openapi.yaml index 368a0083..25e3831f 100644 --- a/backend/openapi.yaml +++ b/backend/openapi.yaml @@ -9297,6 +9297,120 @@ paths: '404': $ref: '#/components/responses/NotFound' + /support-sessions/{id}/users: + get: + operationId: getSupportSessionUsers + tags: + - Backend - Support Sessions + summary: Get ephemeral support users for a session + description: | + Returns the ephemeral users provisioned by the tunnel-client for this support session. + Includes cluster admin credentials (NS8), domain user credentials per LDAP/Samba provider, + and local user credentials (NethSecurity). Returns `null` for `users` if no users have + been provisioned yet. + security: + - BearerAuth: [] + parameters: + - name: id + in: path + required: true + description: Support session ID + schema: + type: string + format: uuid + responses: + '200': + description: Session users retrieved successfully + content: + application/json: + schema: + type: object + properties: + code: + type: integer + example: 200 + message: + type: string + example: session users retrieved successfully + data: + type: object + properties: + session_id: + type: string + format: uuid + users: + type: object + nullable: true + description: Users report from tunnel-client + properties: + created_at: + type: string + format: date-time + duration_ms: + type: integer + users: + type: object + properties: + session_id: + type: string + platform: + type: string + enum: [ns8, nethsecurity] + cluster_admin: + type: object + nullable: true + properties: + username: + type: string + password: + type: string + domain_users: + type: array + items: + type: object + properties: + domain: + type: string + module: + type: string + username: + type: string + password: + type: string + local_users: + type: array + items: + type: object + properties: + username: + type: string + password: + type: string + apps: + type: array + items: + type: object + properties: + id: + type: string + name: + type: string + url: + type: string + notes: + type: string + users_at: + type: string + format: date-time + nullable: true + description: Timestamp when users report was received + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + /support-sessions/{id}/proxy-token: post: operationId: generateSupportProxyToken diff --git a/frontend/src/components/support/SupportSessionsTable.vue b/frontend/src/components/support/SupportSessionsTable.vue index 00c20908..b90b6591 100644 --- a/frontend/src/components/support/SupportSessionsTable.vue +++ b/frontend/src/components/support/SupportSessionsTable.vue @@ -11,6 +11,10 @@ import { faTerminal, faUpRightFromSquare, faPlug, + faCopy, + faCheck, + faChevronDown, + faChevronRight, } from '@fortawesome/free-solid-svg-icons' import { NeTable, @@ -24,11 +28,10 @@ import { NeEmptyState, NeInlineNotification, NeBadge, - NeDropdown, NeModal, NeTextInput, NeToggle, - type NeDropdownItem, + NeSpinner, } from '@nethesis/vue-components' import { computed, ref, watch } from 'vue' import { useI18n } from 'vue-i18n' @@ -41,11 +44,14 @@ import { type SupportSessionStatus, type SystemSessionGroup, type SupportServiceGroup, + type SessionUserCredential, + type SessionDomainUser, extendSupportSession, closeSupportSession, getSupportSessionServices, generateSupportProxyToken, addSupportSessionServices, + getSupportSessionUsers, } from '@/lib/support/support' import UpdatingSpinner from '@/components/UpdatingSpinner.vue' import { formatDateTimeNoSeconds } from '@/lib/dateTime' @@ -113,7 +119,6 @@ function handleCloseTerminal() { // Services map keyed by session ID (individual sessions, not groups) const servicesMap = ref>({}) -const openingServiceId = ref(null) watch( sessionGroups, @@ -135,109 +140,17 @@ watch( { immediate: true }, ) -function groupHasServices(group: SystemSessionGroup): boolean { - return group.sessions.some((s) => - (servicesMap.value[s.id] || []).some((g) => g.services.length > 0), - ) -} - -function getGroupServiceDropdownItems(group: SystemSessionGroup): NeDropdownItem[] { - const items: NeDropdownItem[] = [] - - // Collect all service groups from all sessions, deduplicating by service name. - interface ServiceEntry { - group: SupportServiceGroup - session: SessionRef - } - const allGroups: ServiceEntry[] = [] - const seenServices = new Set() - - for (const session of group.sessions) { - for (const sg of servicesMap.value[session.id] || []) { - const deduped: SupportServiceGroup = { ...sg, services: [] } - for (const svc of sg.services) { - if (seenServices.has(svc.name)) continue - seenServices.add(svc.name) - deduped.services.push(svc) - } - if (deduped.services.length > 0) { - allGroups.push({ group: deduped, session }) - } - } - } - - // Sort by nodeId then moduleId - allGroups.sort((a, b) => { - const nodeA = a.group.nodeId || '' - const nodeB = b.group.nodeId || '' - const nc = nodeA.localeCompare(nodeB, undefined, { numeric: true }) - if (nc !== 0) return nc - return a.group.moduleId.localeCompare(b.group.moduleId) - }) - - // Check if services span multiple nodes - const nodeIds = new Set(allGroups.map((e) => e.group.nodeId).filter(Boolean)) - const multiNode = nodeIds.size > 1 - let lastNodeId = '' - - for (const entry of allGroups) { - const { group: sg, session } = entry - - // Node header - if (multiNode && sg.nodeId && sg.nodeId !== lastNodeId) { - items.push({ - id: `node-${sg.nodeId}`, - label: `— Node ${sg.nodeId} —`, - disabled: true, - }) - lastNodeId = sg.nodeId - } - - // Module header - if (sg.moduleId) { - const header = sg.moduleLabel ? `${sg.moduleId} (${sg.moduleLabel})` : sg.moduleId - items.push({ - id: `header-${sg.nodeId}-${sg.moduleId}`, - label: header, - disabled: true, - }) - } - - // Service items - for (const svc of sg.services) { - let label = svc.name - if (svc.host || svc.path) { - const hostPath = (svc.host || '') + (svc.path || '') - label += ` (${hostPath})` - } - items.push({ - id: `${session.id}-${svc.name}`, - label, - icon: faUpRightFromSquare, - action: () => handleOpenService(session, svc.name, svc.path), - }) - } - } - - return items -} - async function handleOpenService(session: SessionRef, serviceName: string, path?: string) { - openingServiceId.value = session.id try { const result = await generateSupportProxyToken(session.id, serviceName) - // Append the route path (e.g., /pbx-report) so Traefik matches the correct route let baseUrl = result.url if (path && path !== '/') { - // result.url ends with '/', path starts with '/' baseUrl = baseUrl.replace(/\/$/, '') + path + '/' } const url = baseUrl + '?token=' + result.token window.open(url, '_blank') } catch (error) { console.error('Cannot generate proxy token:', error) - } finally { - openingServiceId.value = null } } @@ -275,12 +188,172 @@ function handleCloseAddService() { addServiceGroup.value = null } +// ── Unified Services & Credentials modal ── + +interface MergedCredentials { + clusterAdmin: SessionUserCredential | null + domainUsers: SessionDomainUser[] + localUsers: SessionUserCredential[] +} + +// A module group for the unified modal: module name + label + domain + credentials + services +interface UnifiedModuleGroup { + moduleId: string + moduleLabel: string + nodeId: string + domain: string + password: string + services: { name: string; session: SessionRef; host: string; path: string }[] +} + +const unifiedGroup = ref(null) +const unifiedLoading = ref(false) +const unifiedCredentials = ref(null) +const unifiedModules = ref([]) +const unifiedUngrouped = ref<{ name: string; label: string; session: SessionRef; host: string; path: string }[]>( + [], +) +const copiedField = ref(null) +const expandedModules = ref>(new Set()) + +function toggleModule(moduleId: string) { + if (expandedModules.value.has(moduleId)) { + expandedModules.value.delete(moduleId) + } else { + expandedModules.value.add(moduleId) + } +} + +async function handleOpenUnified(group: SystemSessionGroup) { + unifiedGroup.value = group + unifiedLoading.value = true + unifiedCredentials.value = null + unifiedModules.value = [] + unifiedUngrouped.value = [] + expandedModules.value = new Set() + + try { + // Fetch credentials from all active sessions + const activeSessions = group.sessions.filter((s) => s.status === 'active') + const usersResults = await Promise.all( + activeSessions.map((s) => getSupportSessionUsers(s.id).catch(() => null)), + ) + + // Merge credentials + const merged: MergedCredentials = { clusterAdmin: null, domainUsers: [], localUsers: [] } + const seenDomains = new Set() + const seenLocal = new Set() + for (const r of usersResults) { + if (!r?.users?.users) continue + const u = r.users.users + if (u.cluster_admin && !merged.clusterAdmin) merged.clusterAdmin = u.cluster_admin + for (const du of u.domain_users || []) { + if (!seenDomains.has(du.domain)) { + seenDomains.add(du.domain) + merged.domainUsers.push(du) + } + } + for (const lu of u.local_users || []) { + if (!seenLocal.has(lu.username)) { + seenLocal.add(lu.username) + merged.localUsers.push(lu) + } + } + } + const hasData = + merged.clusterAdmin || merged.domainUsers.length > 0 || merged.localUsers.length > 0 + unifiedCredentials.value = hasData ? merged : null + + // Build domain → password map + const domainPasswords = new Map() + for (const du of merged.domainUsers) { + domainPasswords.set(du.domain, du.password) + } + + // Build module_domains map from all user reports + const moduleDomains = new Map() + for (const r of usersResults) { + if (!r?.users?.users?.module_domains) continue + for (const [modId, domain] of Object.entries(r.users.users.module_domains)) { + moduleDomains.set(modId, domain) + } + } + + // Build unified module groups from services + const moduleMap = new Map() + const ungrouped: typeof unifiedUngrouped.value = [] + const seenServices = new Set() + + for (const session of activeSessions) { + for (const sg of servicesMap.value[session.id] || []) { + for (const svc of sg.services) { + if (seenServices.has(svc.name)) continue + seenServices.add(svc.name) + + if (!svc.moduleId) { + ungrouped.push({ name: svc.name, label: svc.label, session, host: svc.host, path: svc.path }) + continue + } + + let mg = moduleMap.get(svc.moduleId) + if (!mg) { + const domain = moduleDomains.get(svc.moduleId) || '' + mg = { + moduleId: svc.moduleId, + moduleLabel: svc.label || sg.moduleLabel || '', + nodeId: svc.nodeId || sg.nodeId || '', + domain, + password: domain ? domainPasswords.get(domain) || '' : '', + services: [], + } + moduleMap.set(svc.moduleId, mg) + } + if (!mg.moduleLabel && (svc.label || sg.moduleLabel)) { + mg.moduleLabel = svc.label || sg.moduleLabel + } + mg.services.push({ name: svc.name, session, host: svc.host, path: svc.path }) + } + } + } + + // Sort modules by nodeId then moduleId + unifiedModules.value = Array.from(moduleMap.values()).sort((a, b) => { + const nc = (a.nodeId || '').localeCompare(b.nodeId || '', undefined, { numeric: true }) + if (nc !== 0) return nc + return a.moduleId.localeCompare(b.moduleId) + }) + unifiedUngrouped.value = ungrouped.sort((a, b) => a.name.localeCompare(b.name)) + } catch (error) { + console.error('Cannot load services & credentials:', error) + } finally { + unifiedLoading.value = false + } +} + +function handleCloseUnified() { + unifiedGroup.value = null +} + +async function copyToClipboard(text: string, fieldId: string) { + await navigator.clipboard.writeText(text) + copiedField.value = fieldId + setTimeout(() => { + copiedField.value = null + }, 2000) +} + +// Check if unified modal has any services +function groupHasServices(group: SystemSessionGroup): boolean { + return group.sessions.some((s) => + (servicesMap.value[s.id] || []).some((g) => g.services.length > 0), + ) +} + async function handleAddService() { if (!addServiceGroup.value) return addServiceError.value = '' addServiceLoading.value = true try { - // Use the first active session id as the target const activeSession = addServiceGroup.value.sessions.find((s) => s.status === 'active') if (!activeSession) { addServiceError.value = t('support.no_active_session') @@ -294,8 +367,6 @@ async function handleAddService() { tls: addServiceTls.value, }, ]) - // Wait for the Redis → support service → tunnel-client → manifest round-trip - // before re-fetching, otherwise the GET arrives before the manifest is updated handleCloseAddService() setTimeout(() => { getSupportSessionServices(activeSession.id) @@ -400,25 +471,17 @@ async function handleAddService() { {{ $t('support.terminal') }} - - + + diff --git a/frontend/src/i18n/en/translation.json b/frontend/src/i18n/en/translation.json index 93568690..f2f73214 100644 --- a/frontend/src/i18n/en/translation.json +++ b/frontend/src/i18n/en/translation.json @@ -664,6 +664,18 @@ "service_target_helper": "Address and port of the service on the remote system", "service_label": "Label (optional)", "service_label_placeholder": "e.g. My Service", - "service_tls": "TLS" + "service_tls": "TLS", + "credentials": "Credentials", + "credentials_title": "Support Credentials", + "services_and_credentials": "Services & Credentials", + "no_credentials": "No credentials available for this session.", + "cluster_admin": "Cluster Admin", + "domain_users": "Domain Users", + "local_users": "Admin Interface", + "other_services": "Other Services", + "username": "Username", + "password": "Password", + "domain": "Domain", + "copied": "Copied" } } diff --git a/frontend/src/lib/support/support.ts b/frontend/src/lib/support/support.ts index c52b0869..2cab9c24 100644 --- a/frontend/src/lib/support/support.ts +++ b/frontend/src/lib/support/support.ts @@ -432,6 +432,62 @@ export const getSupportSessionDiagnostics = (sessionId: string): Promise res.data.data) } +// Ephemeral support users + +export interface SessionUserCredential { + username: string + password: string +} + +export interface SessionDomainUser { + domain: string + module: string + username: string + password: string +} + +export interface SessionAppConfig { + id: string + name: string + url?: string + notes?: string +} + +export interface SessionUsersData { + session_id: string + platform: string + cluster_admin?: SessionUserCredential + domain_users?: SessionDomainUser[] + local_users?: SessionUserCredential[] + apps?: SessionAppConfig[] + module_domains?: Record // moduleID → user domain +} + +export interface SessionUsersReport { + created_at: string + duration_ms: number + users: SessionUsersData +} + +export interface SessionUsersResponse { + session_id: string + users: SessionUsersReport | null + users_at: string | null +} + +export const getSupportSessionUsers = (sessionId: string): Promise => { + const loginStore = useLoginStore() + return axios + .get<{ + code: number + message: string + data: SessionUsersResponse + }>(`${API_URL}/support-sessions/${sessionId}/users`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data.data) +} + export const getSystemDiagnostics = (systemId: string): Promise => { const loginStore = useLoginStore() return axios diff --git a/services/support/README.md b/services/support/README.md index 1af4fbdb..027dfe61 100644 --- a/services/support/README.md +++ b/services/support/README.md @@ -256,6 +256,44 @@ The overall session status is the worst status across all plugins. If the `id` o --diagnostics-total-timeout duration # Default: 30s (env: DIAGNOSTICS_TOTAL_TIMEOUT) ``` +### Ephemeral Support Users + +The tunnel-client provisions temporary users when a support session starts and removes them when it ends. This gives operators access to the remote system's admin interfaces without requiring customer credentials. + +**Platform detection**: Redis available → NS8 mode, Redis absent → NethSecurity mode. + +**NS8 (NethServer)**: +- Creates a cluster-admin user via `runagent` + `agent.tasks` (leader node only) +- Creates a domain user on each local LDAP/Samba provider (skips remote/read-only providers) +- Worker nodes fetch credentials from the server (created by the leader) via `USERS_FETCH` stream + +**NethSecurity**: +- Creates a local user via `python3` + `nethsec.users` module +- Promotes to admin for web UI access + +**Plugin system** (`users.d/`): executable scripts in `/usr/share/my/users.d/` configure applications for the support user. Each plugin receives `setup` or `teardown` as the first argument and `--users-file ` pointing to a JSON file with the provisioned credentials. + +Plugin output format (setup): +```json +{ + "id": "nethvoice", + "name": "NethVoice Admin", + "notes": "Optional notes for the operator" +} +``` + +**Crash recovery**: a state file (default `/var/run/my-support-users.json`) persists the created users. On startup, orphaned users from a previous crash are cleaned up before connecting. + +**Reconnection**: user provisioning happens only on the first successful connection. Subsequent reconnections re-send the report without re-provisioning. + +```bash +# User provisioning flags +--users-dir string # Default: /usr/share/my/users.d (env: USERS_DIR) +--users-plugin-timeout duration # Default: 15s (env: USERS_PLUGIN_TIMEOUT) +--users-total-timeout duration # Default: 60s (env: USERS_TOTAL_TIMEOUT) +--users-state-file string # Default: /var/run/my-support-users.json (env: USERS_STATE_FILE) +``` + ## Related - [openapi.yaml](../../backend/openapi.yaml) - API specification - [Backend](../../backend/README.md) - API server diff --git a/services/support/cmd/tunnel-client/internal/config/config.go b/services/support/cmd/tunnel-client/internal/config/config.go index ad4f5213..6160d29c 100644 --- a/services/support/cmd/tunnel-client/internal/config/config.go +++ b/services/support/cmd/tunnel-client/internal/config/config.go @@ -26,6 +26,10 @@ const ( RedisPingTimeout = 2 * time.Second DefaultDiagnosticsPluginTimeout = 10 * time.Second DefaultDiagnosticsTotalTimeout = 30 * time.Second + DefaultUsersDir = "/usr/share/my/users.d" + DefaultUsersPluginTimeout = 15 * time.Second + DefaultUsersTotalTimeout = 60 * time.Second + DefaultUsersStateFile = "/var/run/my-support-users.json" ) // ClientConfig holds the runtime configuration for the tunnel client @@ -44,6 +48,10 @@ type ClientConfig struct { DiagnosticsDir string DiagnosticsPluginTimeout time.Duration DiagnosticsTotalTimeout time.Duration + UsersDir string + UsersPluginTimeout time.Duration + UsersTotalTimeout time.Duration + UsersStateFile string } // ParseExcludePatterns parses a comma-separated string of glob patterns. diff --git a/services/support/cmd/tunnel-client/internal/connection/connection.go b/services/support/cmd/tunnel-client/internal/connection/connection.go index b4dbb73d..d401ccd1 100644 --- a/services/support/cmd/tunnel-client/internal/connection/connection.go +++ b/services/support/cmd/tunnel-client/internal/connection/connection.go @@ -20,7 +20,6 @@ import ( "net" "net/http" "net/url" - "os" "regexp" "strings" "sync" @@ -34,6 +33,7 @@ import ( "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/discovery" "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/stream" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/users" ) // validServiceName matches safe service names: lowercase alphanumeric, hyphens, underscores. @@ -161,13 +161,29 @@ func validateTargetIP(ip net.IP) error { const closeCodeSessionClosed = 4000 // RunWithReconnect connects to the support service and reconnects on failure -// with exponential backoff. +// with exponential backoff. User provisioning happens once on first successful +// connection and cleanup happens when the session is permanently closed. func RunWithReconnect(ctx context.Context, cfg *config.ClientConfig) { delay := cfg.ReconnectDelay + var sessionUsers *users.SessionUsers + var provisioner users.Provisioner + var lastServices map[string]models.ServiceInfo + usersProvisioned := false + + // Cleanup users on final exit (context cancel or session closed) + defer func() { + if sessionUsers != nil { + cleanupCtx := context.Background() + users.RunTeardown(cleanupCtx, cfg.UsersDir, sessionUsers, lastServices, cfg.RedisAddr, cfg.UsersPluginTimeout) + _ = provisioner.Delete(sessionUsers) + users.RemoveState(cfg.UsersStateFile) + log.Println("Support users cleaned up") + } + }() for { start := time.Now() - err := connect(ctx, cfg) + err := connect(ctx, cfg, &sessionUsers, &provisioner, &usersProvisioned, &lastServices) if ctx.Err() != nil { return // context cancelled, clean shutdown } @@ -175,7 +191,7 @@ func RunWithReconnect(ctx context.Context, cfg *config.ClientConfig) { // Check if the server sent a "session closed" close frame if websocket.IsCloseError(err, closeCodeSessionClosed) { log.Println("Session closed by operator. Exiting.") - os.Exit(0) + return } log.Printf("Connection lost: %v", err) @@ -201,7 +217,7 @@ func RunWithReconnect(ctx context.Context, cfg *config.ClientConfig) { } } -func connect(ctx context.Context, cfg *config.ClientConfig) error { +func connect(ctx context.Context, cfg *config.ClientConfig, sessionUsers **users.SessionUsers, provisioner *users.Provisioner, usersProvisioned *bool, lastServices *map[string]models.ServiceInfo) error { // Build Basic Auth header creds := base64.StdEncoding.EncodeToString([]byte(cfg.Key + ":" + cfg.Secret)) header := http.Header{} @@ -280,6 +296,64 @@ func connect(ctx context.Context, cfg *config.ClientConfig) error { log.Printf("Diagnostics timed out after %v, skipping", cfg.DiagnosticsTotalTimeout) } + // Provision ephemeral support users (only on first successful connection) + if !*usersProvisioned { + *provisioner = users.NewProvisioner(cfg.RedisAddr) + su, provisionErr := (*provisioner).Create(cfg.Key) + if provisionErr != nil { + log.Printf("User provisioning failed: %v", provisionErr) + } + + // If provisioner didn't create credentials (e.g., worker node), + // fetch them from the server (created by the leader node) + hasCredentials := su != nil && (su.ClusterAdmin != nil || len(su.DomainUsers) > 0 || len(su.LocalUsers) > 0) + if !hasCredentials { + log.Println("No local credentials created, fetching from server...") + fetched := fetchUsersFromServer(session, 3, 10*time.Second) + if fetched != nil { + if su == nil { + su = fetched + } else { + // Merge fetched credentials into the local result + su.ClusterAdmin = fetched.ClusterAdmin + su.DomainUsers = fetched.DomainUsers + su.LocalUsers = fetched.LocalUsers + } + } + } + + if su != nil { + // Run users.d/ setup plugins, passing discovered services for module context + setupCtx, setupCancel := context.WithTimeout(ctx, cfg.UsersTotalTimeout) + currentServices := store.get() + apps, pluginErrors := users.RunSetup(setupCtx, cfg.UsersDir, su, currentServices, cfg.RedisAddr, cfg.UsersPluginTimeout) + setupCancel() + su.Apps = apps + su.Errors = pluginErrors + *lastServices = currentServices + + // Save state for crash recovery + if stateErr := users.SaveState(cfg.UsersStateFile, su); stateErr != nil { + log.Printf("Failed to save users state: %v", stateErr) + } + + *sessionUsers = su + } + *usersProvisioned = true + + // Send users report to support service + if *sessionUsers != nil { + if sendErr := sendUsersReport(session, *sessionUsers); sendErr != nil { + log.Printf("Failed to send users report: %v", sendErr) + } + } + } else if *sessionUsers != nil { + // Re-send users report on reconnect so the new session gets the data + if sendErr := sendUsersReport(session, *sessionUsers); sendErr != nil { + log.Printf("Failed to re-send users report: %v", sendErr) + } + } + // Start periodic re-discovery go func() { ticker := time.NewTicker(cfg.DiscoveryInterval) @@ -436,6 +510,91 @@ func sendManifest(session *yamux.Session, services map[string]models.ServiceInfo return nil } +// fetchUsersFromServer asks the support service for credentials already created +// by another node (e.g., the leader). Returns nil if no credentials are available. +// Retries up to maxAttempts with a delay between attempts to handle the case +// where the leader hasn't connected yet. +func fetchUsersFromServer(session *yamux.Session, maxAttempts int, retryDelay time.Duration) *users.SessionUsers { + for attempt := 1; attempt <= maxAttempts; attempt++ { + yamuxStream, err := session.Open() + if err != nil { + return nil + } + + if _, err := fmt.Fprintf(yamuxStream, "USERS_FETCH 1\n"); err != nil { + _ = yamuxStream.Close() + return nil + } + + // Read response (up to 256 KB) + data, readErr := io.ReadAll(io.LimitReader(yamuxStream, 256*1024)) + _ = yamuxStream.Close() + + if readErr != nil || len(data) == 0 { + return nil + } + + // Parse the response — it's a UsersReport JSON (same format as USERS 1) + var report users.UsersReport + if err := json.Unmarshal(data, &report); err != nil { + // Try parsing as empty object + if string(data) == "{}\n" || string(data) == "{}" { + if attempt < maxAttempts { + log.Printf("No credentials available from server yet (attempt %d/%d), retrying in %v...", attempt, maxAttempts, retryDelay) + time.Sleep(retryDelay) + continue + } + return nil + } + log.Printf("Failed to parse fetched users: %v", err) + return nil + } + + // Check if there are actual credentials + su := &report.Users + if su.ClusterAdmin == nil && len(su.DomainUsers) == 0 && len(su.LocalUsers) == 0 { + if attempt < maxAttempts { + log.Printf("No credentials available from server yet (attempt %d/%d), retrying in %v...", attempt, maxAttempts, retryDelay) + time.Sleep(retryDelay) + continue + } + return nil + } + + log.Printf("Fetched credentials from server: cluster_admin=%v, domain_users=%d, local_users=%d", + su.ClusterAdmin != nil, len(su.DomainUsers), len(su.LocalUsers)) + return su + } + return nil +} + +func sendUsersReport(session *yamux.Session, sessionUsers *users.SessionUsers) error { + yamuxStream, err := session.Open() + if err != nil { + return fmt.Errorf("failed to open users stream: %w", err) + } + defer func() { _ = yamuxStream.Close() }() + + // Write header line to identify this as a users stream + if _, err := fmt.Fprintf(yamuxStream, "USERS 1\n"); err != nil { + return fmt.Errorf("failed to write users header: %w", err) + } + + report := users.UsersReport{ + CreatedAt: sessionUsers.CreatedAt, + DurationMs: time.Since(sessionUsers.CreatedAt).Milliseconds(), + Users: *sessionUsers, + } + + if err := json.NewEncoder(yamuxStream).Encode(report); err != nil { + return fmt.Errorf("failed to encode users report: %w", err) + } + + log.Printf("Users report sent: platform=%s, domain_users=%d, local_users=%d, apps=%d", + sessionUsers.Platform, len(sessionUsers.DomainUsers), len(sessionUsers.LocalUsers), len(sessionUsers.Apps)) + return nil +} + func sendDiagnostics(session *yamux.Session, report diagnostics.DiagnosticsReport) error { yamuxStream, err := session.Open() if err != nil { diff --git a/services/support/cmd/tunnel-client/internal/users/configurator.go b/services/support/cmd/tunnel-client/internal/users/configurator.go new file mode 100644 index 00000000..335e1e60 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/users/configurator.go @@ -0,0 +1,402 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package users + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "os" + "os/exec" + "path/filepath" + "regexp" + "sort" + "syscall" + "time" + + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/models" +) + +const defaultPluginTimeout = 15 * time.Second + +// moduleBaseNameRegex extracts the base name from a module ID by stripping the trailing instance number. +// e.g., "nethvoice103" → "nethvoice", "n8n2" → "n8n", "nethsecurity-controller4" → "nethsecurity-controller" +var moduleBaseNameRegex = regexp.MustCompile(`^(.+?)\d+$`) + +// ExtractModuleBaseName returns the base name of a module ID (without trailing digits). +func ExtractModuleBaseName(moduleID string) string { + m := moduleBaseNameRegex.FindStringSubmatch(moduleID) + if len(m) > 1 { + return m[1] + } + return moduleID +} + +// RunSetup discovers and executes plugin scripts in usersDir with the "setup" action. +// For plugins that match a discovered module (by base name), the module's instances +// are passed via --instances-file. Other plugins run without instance context. +func RunSetup(ctx context.Context, usersDir string, users *SessionUsers, services map[string]models.ServiceInfo, redisAddr string, pluginTimeout time.Duration) ([]AppConfig, []PluginError) { + if pluginTimeout <= 0 { + pluginTimeout = defaultPluginTimeout + } + + plugins := discoverPlugins(usersDir) + if len(plugins) == 0 { + return nil, nil + } + + // Write users data to a temp file for plugins to read + usersFile, err := writeUsersFile(users) + if err != nil { + log.Printf("Users configurator: cannot write temp users file: %v", err) + return nil, nil + } + defer func() { _ = os.Remove(usersFile) }() + + // Build module contexts from discovered services + moduleContexts := buildModuleContexts(services, redisAddr) + + // Populate module_domains mapping on the session users so the frontend + // can display per-module domain credentials inside each accordion. + if users.ModuleDomains == nil { + users.ModuleDomains = make(map[string]string) + } + for _, mc := range moduleContexts { + for _, inst := range mc.Instances { + if inst.Domain != "" { + users.ModuleDomains[inst.ID] = inst.Domain + } + } + } + + var apps []AppConfig + var errors []PluginError + + for _, pluginPath := range plugins { + pluginName := filepath.Base(pluginPath) + + // Check if this plugin matches a discovered module + var instancesFile string + if mc, ok := moduleContexts[pluginName]; ok && len(mc.Instances) > 0 { + var writeErr error + instancesFile, writeErr = writeTempJSON("my-instances-*.json", mc) + if writeErr != nil { + log.Printf("Users configurator: cannot write instances file for %q: %v", pluginName, writeErr) + } else { + defer func(f string) { _ = os.Remove(f) }(instancesFile) + } + } + + results, err := runPlugin(ctx, pluginPath, "setup", usersFile, instancesFile, pluginTimeout) + if err != nil { + log.Printf("Users configurator: plugin %q setup failed: %v", pluginName, err) + errors = append(errors, PluginError{ + ID: pluginName, + Message: err.Error(), + }) + continue + } + for _, app := range results { + apps = append(apps, app) + log.Printf("Users configurator: plugin %q configured app %q", pluginName, app.Name) + } + } + + return apps, errors +} + +// RunTeardown executes plugin scripts with the "teardown" action to undo app configurations. +func RunTeardown(ctx context.Context, usersDir string, users *SessionUsers, services map[string]models.ServiceInfo, redisAddr string, pluginTimeout time.Duration) { + if pluginTimeout <= 0 { + pluginTimeout = defaultPluginTimeout + } + + plugins := discoverPlugins(usersDir) + if len(plugins) == 0 { + return + } + + usersFile, err := writeUsersFile(users) + if err != nil { + log.Printf("Users configurator: cannot write temp users file for teardown: %v", err) + return + } + defer func() { _ = os.Remove(usersFile) }() + + moduleContexts := buildModuleContexts(services, redisAddr) + + // Run teardown in reverse order + for i := len(plugins) - 1; i >= 0; i-- { + pluginName := filepath.Base(plugins[i]) + + var instancesFile string + if mc, ok := moduleContexts[pluginName]; ok && len(mc.Instances) > 0 { + var writeErr error + instancesFile, writeErr = writeTempJSON("my-instances-*.json", mc) + if writeErr == nil { + defer func(f string) { _ = os.Remove(f) }(instancesFile) + } + } + + if _, err := runPlugin(ctx, plugins[i], "teardown", usersFile, instancesFile, pluginTimeout); err != nil { + log.Printf("Users configurator: plugin %q teardown failed: %v", pluginName, err) + } else { + log.Printf("Users configurator: plugin %q teardown complete", pluginName) + } + } +} + +// buildModuleContexts groups discovered services by module base name. +func buildModuleContexts(services map[string]models.ServiceInfo, redisAddr string) map[string]*ModuleContext { + if len(services) == 0 { + return nil + } + + // Group services by moduleID + type moduleInfo struct { + nodeID string + label string + services map[string]ModuleServiceInfo + } + moduleMap := make(map[string]*moduleInfo) + + for serviceName, svc := range services { + if svc.ModuleID == "" { + continue + } + mi, ok := moduleMap[svc.ModuleID] + if !ok { + mi = &moduleInfo{ + nodeID: svc.NodeID, + label: svc.Label, + services: make(map[string]ModuleServiceInfo), + } + moduleMap[svc.ModuleID] = mi + } + if mi.label == "" && svc.Label != "" { + mi.label = svc.Label + } + mi.services[serviceName] = ModuleServiceInfo{ + Host: svc.Host, + Path: svc.Path, + PathPrefix: svc.PathPrefix, + TLS: svc.TLS, + } + } + + // Fetch USER_DOMAIN for each module instance (NS8 only) + domains := make(map[string]string) + if redisAddr != "" { + for moduleID := range moduleMap { + domain := fetchModuleDomain(moduleID) + if domain != "" { + domains[moduleID] = domain + } + } + } + + // Group module instances by base name + contexts := make(map[string]*ModuleContext) + for moduleID, mi := range moduleMap { + baseName := ExtractModuleBaseName(moduleID) + mc, ok := contexts[baseName] + if !ok { + mc = &ModuleContext{Module: baseName} + contexts[baseName] = mc + } + mc.Instances = append(mc.Instances, ModuleInstance{ + ID: moduleID, + NodeID: mi.nodeID, + Label: mi.label, + Domain: domains[moduleID], + Services: mi.services, + }) + } + + // Sort instances within each context + for _, mc := range contexts { + sort.Slice(mc.Instances, func(i, j int) bool { + return mc.Instances[i].ID < mc.Instances[j].ID + }) + } + + return contexts +} + +// fetchModuleDomain reads USER_DOMAIN from Redis for a module instance. +func fetchModuleDomain(moduleID string) string { + cmd := exec.Command("redis-cli", "HGET", fmt.Sprintf("module/%s/environment", moduleID), "USER_DOMAIN") //nolint:gosec // redis-cli is trusted + output, err := cmd.Output() + if err != nil { + return "" + } + domain := string(bytes.TrimSpace(output)) + if domain == "" || domain == "(nil)" { + return "" + } + return domain +} + +// discoverPlugins scans usersDir for valid executable plugin files. +// Uses the same security checks as diagnostics: ownership, permissions. +func discoverPlugins(usersDir string) []string { + if usersDir == "" { + return nil + } + + entries, err := os.ReadDir(usersDir) + if err != nil { + if !os.IsNotExist(err) { + log.Printf("Users configurator: failed to read directory %q: %v", usersDir, err) + } + return nil + } + + currentUID := os.Getuid() + var paths []string + + for _, entry := range entries { + if !entry.Type().IsRegular() { + continue + } + info, infoErr := entry.Info() + if infoErr != nil { + continue + } + // Must be executable + if info.Mode()&0o111 == 0 { + continue + } + + pluginPath := filepath.Join(usersDir, entry.Name()) + + // Ownership check: only root or current process user + if sysInfo, ok := info.Sys().(*syscall.Stat_t); ok { + ownerUID := int(sysInfo.Uid) + if ownerUID != 0 && ownerUID != currentUID { + log.Printf("Users configurator: skipping %q: owned by UID %d (must be root or UID %d)", pluginPath, ownerUID, currentUID) + continue + } + } + + // Reject group-writable or world-writable + if info.Mode().Perm()&0o022 != 0 { + log.Printf("Users configurator: skipping %q: group- or world-writable (mode=%04o)", pluginPath, info.Mode().Perm()) + continue + } + + paths = append(paths, pluginPath) + } + + sort.Strings(paths) + return paths +} + +// runPlugin executes a single plugin with the given action and context files. +// For "setup", parses stdout as a single AppConfig or an array of AppConfig. +// For "teardown", ignores stdout. instancesFile may be empty for generic plugins. +func runPlugin(ctx context.Context, path, action, usersFile, instancesFile string, timeout time.Duration) ([]AppConfig, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + args := []string{action, "--users-file", usersFile} + if instancesFile != "" { + args = append(args, "--instances-file", instancesFile) + } + + cmd := exec.CommandContext(ctx, path, args...) //nolint:gosec // path comes from a configured directory + // Minimal environment to prevent credential leakage + cmd.Env = []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + } + + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("failed to create stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start: %w", err) + } + + var stdout bytes.Buffer + if _, readErr := io.Copy(&stdout, io.LimitReader(stdoutPipe, 64*1024)); readErr != nil { + log.Printf("Users configurator: partial read from %q: %v", path, readErr) + } + + runErr := cmd.Wait() + if ctx.Err() != nil { + return nil, fmt.Errorf("timed out after %v", timeout) + } + if runErr != nil { + return nil, fmt.Errorf("exit error: %w", runErr) + } + + if action != "setup" || stdout.Len() == 0 { + return nil, nil + } + + raw := bytes.TrimSpace(stdout.Bytes()) + pluginName := filepath.Base(path) + + // Try parsing as array first, then as single object + if len(raw) > 0 && raw[0] == '[' { + var apps []AppConfig + if err := json.Unmarshal(raw, &apps); err != nil { + return nil, fmt.Errorf("invalid JSON array output: %w", err) + } + for i := range apps { + if apps[i].ID == "" { + apps[i].ID = pluginName + } + if apps[i].Name == "" { + apps[i].Name = apps[i].ID + } + } + return apps, nil + } + + var app AppConfig + if err := json.Unmarshal(raw, &app); err != nil { + return nil, fmt.Errorf("invalid JSON output: %w", err) + } + if app.ID == "" { + app.ID = pluginName + } + if app.Name == "" { + app.Name = app.ID + } + return []AppConfig{app}, nil +} + +// writeUsersFile writes SessionUsers data to a temporary file and returns its path. +func writeUsersFile(users *SessionUsers) (string, error) { + return writeTempJSON("my-support-users-*.json", users) +} + +// writeTempJSON writes data as JSON to a temporary file and returns its path. +func writeTempJSON(pattern string, data interface{}) (string, error) { + f, err := os.CreateTemp("", pattern) + if err != nil { + return "", err + } + + if err := json.NewEncoder(f).Encode(data); err != nil { + _ = f.Close() + _ = os.Remove(f.Name()) + return "", err + } + + _ = f.Close() + return f.Name(), nil +} diff --git a/services/support/cmd/tunnel-client/internal/users/models.go b/services/support/cmd/tunnel-client/internal/users/models.go new file mode 100644 index 00000000..bceb6503 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/users/models.go @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package users + +import "time" + +// UserCredential holds username and password for an ephemeral support user +type UserCredential struct { + Username string `json:"username"` + Password string `json:"password"` +} + +// DomainUser represents a user created on a specific LDAP/Samba domain +type DomainUser struct { + Domain string `json:"domain"` + Module string `json:"module"` + Username string `json:"username"` + Password string `json:"password"` +} + +// AppConfig describes an application configured for the support user +type AppConfig struct { + ID string `json:"id"` + Name string `json:"name"` + URL string `json:"url,omitempty"` + Notes string `json:"notes,omitempty"` +} + +// PluginError records a users.d plugin that failed during setup +type PluginError struct { + ID string `json:"id"` + Message string `json:"message"` +} + +// SessionUsers is the full user provisioning result for a support session +type SessionUsers struct { + SessionID string `json:"session_id"` + Platform string `json:"platform"` + ClusterAdmin *UserCredential `json:"cluster_admin,omitempty"` + DomainUsers []DomainUser `json:"domain_users,omitempty"` + LocalUsers []UserCredential `json:"local_users,omitempty"` + Apps []AppConfig `json:"apps,omitempty"` + Errors []PluginError `json:"errors,omitempty"` + ModuleDomains map[string]string `json:"module_domains,omitempty"` // moduleID → user domain (e.g., "nethvoice103" → "sf.nethserver.net") + CreatedAt time.Time `json:"created_at"` +} + +// UsersReport is sent to the support service via the USERS yamux stream +type UsersReport struct { + CreatedAt time.Time `json:"created_at"` + DurationMs int64 `json:"duration_ms"` + Users SessionUsers `json:"users"` +} + +// Provisioner creates and deletes ephemeral support users. +// Implementations are platform-specific (NS8 vs NethSecurity). +type Provisioner interface { + Create(sessionID string) (*SessionUsers, error) + Delete(users *SessionUsers) error +} + +// ModuleServiceInfo describes a single service route for a module instance +type ModuleServiceInfo struct { + Host string `json:"host"` + Path string `json:"path,omitempty"` + PathPrefix string `json:"path_prefix,omitempty"` + TLS bool `json:"tls,omitempty"` +} + +// ModuleInstance describes a single instance of a module (e.g., nethvoice103) +type ModuleInstance struct { + ID string `json:"id"` + NodeID string `json:"node_id,omitempty"` + Label string `json:"label,omitempty"` + Domain string `json:"domain,omitempty"` + Services map[string]ModuleServiceInfo `json:"services"` +} + +// ModuleContext is the context passed to a users.d plugin via --instances-file. +// It contains all instances of the module that the plugin manages. +type ModuleContext struct { + Module string `json:"module"` + Instances []ModuleInstance `json:"instances"` +} diff --git a/services/support/cmd/tunnel-client/internal/users/nethsecurity.go b/services/support/cmd/tunnel-client/internal/users/nethsecurity.go new file mode 100644 index 00000000..79ec1af4 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/users/nethsecurity.go @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package users + +import ( + "fmt" + "log" + "os/exec" + "time" +) + +// NethSecurityProvisioner creates ephemeral users on NethSecurity (OpenWrt): +// - A local user via the nethsec Python module +// - Promoted to admin via ubus call ns.users set-admin +type NethSecurityProvisioner struct{} + +func (p *NethSecurityProvisioner) Create(sessionID string) (*SessionUsers, error) { + username := generateUsername(sessionID) + password := generatePassword() + result := &SessionUsers{ + SessionID: sessionID, + Platform: "nethsecurity", + CreatedAt: time.Now(), + } + + // Create local user via Python (ubus has timeout issues) + if err := p.addLocalUser(username, password); err != nil { + return result, fmt.Errorf("failed to create local user: %w", err) + } + + // Promote to admin (web UI access) + if err := p.setAdmin(username); err != nil { + // Rollback: remove the user we just created + _ = p.deleteLocalUser(username) + return result, fmt.Errorf("failed to set admin role: %w", err) + } + + result.LocalUsers = append(result.LocalUsers, UserCredential{ + Username: username, + Password: password, + }) + log.Printf("NethSecurity user provisioning: admin user %q created", username) + + return result, nil +} + +func (p *NethSecurityProvisioner) Delete(users *SessionUsers) error { + if users == nil { + return nil + } + + for _, u := range users.LocalUsers { + if err := p.removeAdmin(u.Username); err != nil { + log.Printf("NethSecurity user cleanup: failed to remove admin role for %q: %v", u.Username, err) + } + if err := p.deleteLocalUser(u.Username); err != nil { + log.Printf("NethSecurity user cleanup: failed to delete user %q: %v", u.Username, err) + } else { + log.Printf("NethSecurity user cleanup: user %q removed", u.Username) + } + } + + return nil +} + +func (p *NethSecurityProvisioner) addLocalUser(username, password string) error { + script := fmt.Sprintf(` +from nethsec import users +from euci import EUci +u = EUci() +users.add_local_user(u, %q, %q, "Support Session", "main") +`, username, password) + return runPython(script) +} + +func (p *NethSecurityProvisioner) setAdmin(username string) error { + script := fmt.Sprintf(` +from nethsec import users +from euci import EUci +u = EUci() +users.set_admin(u, %q, "main") +`, username) + return runPython(script) +} + +func (p *NethSecurityProvisioner) removeAdmin(username string) error { + script := fmt.Sprintf(` +from nethsec import users +from euci import EUci +u = EUci() +users.remove_admin(u, %q) +`, username) + return runPython(script) +} + +func (p *NethSecurityProvisioner) deleteLocalUser(username string) error { + script := fmt.Sprintf(` +from nethsec import users +from euci import EUci +u = EUci() +users.delete_local_user(u, %q, "main") +`, username) + return runPython(script) +} + +// runPython executes a Python3 script on NethSecurity. +func runPython(script string) error { + cmd := exec.Command("python3", "-c", script) //nolint:gosec // python3 is a trusted system binary + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("python3 failed: %w: %s", err, output) + } + return nil +} diff --git a/services/support/cmd/tunnel-client/internal/users/nethserver.go b/services/support/cmd/tunnel-client/internal/users/nethserver.go new file mode 100644 index 00000000..970e6a7a --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/users/nethserver.go @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package users + +import ( + "crypto/sha256" + "encoding/json" + "fmt" + "log" + "os/exec" + "strings" + "time" +) + +// NethServerProvisioner creates ephemeral users on NS8 clusters: +// - A cluster-admin user (Redis-based, only from the leader node) +// - A domain user per local LDAP/Samba provider (skips remote/external providers) +// +// On worker nodes, user provisioning is skipped entirely because cluster tasks +// can only be submitted from the leader. The leader's tunnel-client handles +// user creation for all domains in the cluster. +type NethServerProvisioner struct { + RedisAddr string +} + +// userDomainsResponse is the top-level response from cluster/list-user-domains +type userDomainsResponse struct { + Domains []userDomain `json:"domains"` +} + +// userDomain represents a single user domain from the NS8 API +type userDomain struct { + Name string `json:"name"` + Location string `json:"location"` // "internal" or "external" + Schema string `json:"schema"` // "ad" or "rfc2307" + Providers []userProvider `json:"providers"` +} + +// userProvider identifies a module providing the LDAP/Samba service +type userProvider struct { + ID string `json:"id"` // module ID (e.g., "openldap3", "samba1") +} + +func (p *NethServerProvisioner) Create(sessionID string) (*SessionUsers, error) { + username := generateUsername(sessionID) + result := &SessionUsers{ + SessionID: sessionID, + Platform: "ns8", + CreatedAt: time.Now(), + } + + // Skip user provisioning on worker nodes — only the leader can submit cluster tasks + if !p.isLeaderNode() { + log.Println("NS8 user provisioning: skipping (not the leader node)") + return result, nil + } + + // 1. Create cluster-admin user + adminPwd := generatePassword() + if err := p.createClusterAdmin(username, adminPwd); err != nil { + log.Printf("NS8 user provisioning: cluster-admin creation failed: %v", err) + } else { + result.ClusterAdmin = &UserCredential{Username: username, Password: adminPwd} + log.Printf("NS8 user provisioning: cluster-admin %q created", username) + } + + // 2. List user domains and create users on local providers + domains, err := p.listUserDomains() + if err != nil { + log.Printf("NS8 user provisioning: cannot list user domains: %v", err) + return result, nil + } + + for _, domain := range domains { + if domain.Location == "external" { + log.Printf("NS8 user provisioning: skipping external domain %q (read-only)", domain.Name) + continue + } + if len(domain.Providers) == 0 { + log.Printf("NS8 user provisioning: skipping domain %q (no providers)", domain.Name) + continue + } + + // Use the first provider for the domain + provider := domain.Providers[0].ID + domainPwd := generatePassword() + if err := p.createDomainUser(provider, username, domainPwd); err != nil { + log.Printf("NS8 user provisioning: domain user creation on %q failed: %v", provider, err) + continue + } + + result.DomainUsers = append(result.DomainUsers, DomainUser{ + Domain: domain.Name, + Module: provider, + Username: username, + Password: domainPwd, + }) + log.Printf("NS8 user provisioning: domain user %q created on %s (%s)", username, provider, domain.Name) + } + + return result, nil +} + +func (p *NethServerProvisioner) Delete(users *SessionUsers) error { + if users == nil { + return nil + } + + // Only the leader node can remove cluster/domain users + if !p.isLeaderNode() { + log.Println("NS8 user cleanup: skipping (not the leader node)") + return nil + } + + // Remove domain users (reverse order) + for i := len(users.DomainUsers) - 1; i >= 0; i-- { + du := users.DomainUsers[i] + if err := p.deleteDomainUser(du.Module, du.Username); err != nil { + log.Printf("NS8 user cleanup: failed to remove domain user %q from %s: %v", du.Username, du.Module, err) + } else { + log.Printf("NS8 user cleanup: domain user %q removed from %s", du.Username, du.Module) + } + } + + // Remove cluster-admin + if users.ClusterAdmin != nil { + if err := p.deleteClusterAdmin(users.ClusterAdmin.Username); err != nil { + log.Printf("NS8 user cleanup: failed to remove cluster-admin %q: %v", users.ClusterAdmin.Username, err) + } else { + log.Printf("NS8 user cleanup: cluster-admin %q removed", users.ClusterAdmin.Username) + } + } + + return nil +} + +func (p *NethServerProvisioner) createClusterAdmin(username, password string) error { + hash := fmt.Sprintf("%x", sha256.Sum256([]byte(password))) + + payload := map[string]interface{}{ + "user": username, + "password_hash": hash, + "set": map[string]interface{}{ + "display_name": "Support Session", + }, + "grant": []map[string]interface{}{ + {"role": "owner", "on": "*"}, + }, + } + + data, _ := json.Marshal(payload) + return runAgentTask("cluster", "add-user", string(data)) +} + +func (p *NethServerProvisioner) deleteClusterAdmin(username string) error { + data, _ := json.Marshal(map[string]string{"user": username}) + return runAgentTask("cluster", "remove-user", string(data)) +} + +func (p *NethServerProvisioner) listUserDomains() ([]userDomain, error) { + output, err := runAgentTaskOutput("cluster", "list-user-domains", "{}") + if err != nil { + return nil, fmt.Errorf("list-user-domains failed: %w", err) + } + + var resp userDomainsResponse + if err := json.Unmarshal(output, &resp); err != nil { + return nil, fmt.Errorf("cannot parse user domains: %w", err) + } + return resp.Domains, nil +} + +func (p *NethServerProvisioner) createDomainUser(provider, username, password string) error { + payload := map[string]interface{}{ + "user": username, + "display_name": "Support Session", + "password": password, + "locked": false, + } + data, _ := json.Marshal(payload) + return runAgentTask(fmt.Sprintf("module/%s", provider), "add-user", string(data)) +} + +func (p *NethServerProvisioner) deleteDomainUser(provider, username string) error { + data, _ := json.Marshal(map[string]string{"user": username}) + return runAgentTask(fmt.Sprintf("module/%s", provider), "remove-user", string(data)) +} + +// isLeaderNode checks if the local Redis instance is the cluster leader (master). +// Worker nodes have a read-only replica and cannot submit cluster tasks. +// Uses redis-cli ROLE which works with the default user's permissions on NS8. +func (p *NethServerProvisioner) isLeaderNode() bool { + cmd := exec.Command("redis-cli", "ROLE") //nolint:gosec // redis-cli is a trusted system binary + output, err := cmd.Output() + if err != nil { + log.Printf("NS8 user provisioning: cannot check Redis role: %v", err) + return false + } + firstLine := strings.TrimSpace(strings.SplitN(string(output), "\n", 2)[0]) + return firstLine == "master" +} + +// runAgentTask executes an NS8 agent task via a Python helper using runagent. +// This uses the agent.tasks framework with redis://127.0.0.1 endpoint, +// which bypasses the API server and runs tasks directly via Redis. +func runAgentTask(agentID, action, data string) error { + _, err := runAgentTaskOutput(agentID, action, data) + return err +} + +// runAgentTaskOutput executes an NS8 agent task and returns the output JSON. +func runAgentTaskOutput(agentID, action, data string) ([]byte, error) { + script := fmt.Sprintf(` +import agent.tasks, json, sys +result = agent.tasks.run( + agent_id=%q, + action=%q, + data=json.loads(%q), + extra={"isNotificationHidden": True}, + endpoint="redis://cluster-leader", +) +if result["exit_code"] != 0: + print(json.dumps(result.get("error", "task failed")), file=sys.stderr) + sys.exit(1) +print(json.dumps(result["output"])) +`, agentID, action, data) + + cmd := exec.Command("runagent", "python3", "-c", script) //nolint:gosec // runagent is a trusted NS8 system binary + output, err := cmd.CombinedOutput() + if err != nil { + // Truncate output to avoid dumping full Python stack traces in logs + msg := strings.TrimSpace(string(output)) + if len(msg) > 200 { + msg = msg[:200] + "..." + } + return nil, fmt.Errorf("agent task %s/%s failed: %w: %s", agentID, action, err, msg) + } + return output, nil +} diff --git a/services/support/cmd/tunnel-client/internal/users/store.go b/services/support/cmd/tunnel-client/internal/users/store.go new file mode 100644 index 00000000..a1ceb770 --- /dev/null +++ b/services/support/cmd/tunnel-client/internal/users/store.go @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package users + +import ( + "crypto/rand" + "encoding/json" + "fmt" + "math/big" + "os" + "path/filepath" + "strings" +) + +const passwordLength = 20 +const passwordLower = "abcdefghijklmnopqrstuvwxyz" +const passwordUpper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +const passwordDigits = "0123456789" +const passwordSpecial = "!@#%^&*" +const passwordAll = passwordLower + passwordUpper + passwordDigits + passwordSpecial + +// SaveState writes the provisioned users to a state file for crash recovery. +// If the tunnel-client crashes before cleanup, the next startup reads this +// file and runs Delete + Teardown to remove orphaned users. +func SaveState(stateFile string, users *SessionUsers) error { + // Ensure parent directory exists + dir := filepath.Dir(stateFile) + if err := os.MkdirAll(dir, 0o700); err != nil { + return fmt.Errorf("cannot create state directory: %w", err) + } + + data, err := json.Marshal(users) + if err != nil { + return fmt.Errorf("cannot marshal state: %w", err) + } + + // Write atomically via temp file + rename + tmp := stateFile + ".tmp" + if err := os.WriteFile(tmp, data, 0o600); err != nil { + return fmt.Errorf("cannot write state file: %w", err) + } + if err := os.Rename(tmp, stateFile); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("cannot rename state file: %w", err) + } + + return nil +} + +// LoadState reads the state file. Returns nil if the file does not exist. +func LoadState(stateFile string) (*SessionUsers, error) { + data, err := os.ReadFile(stateFile) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + var users SessionUsers + if err := json.Unmarshal(data, &users); err != nil { + return nil, fmt.Errorf("cannot parse state file: %w", err) + } + return &users, nil +} + +// RemoveState deletes the state file. +func RemoveState(stateFile string) { + _ = os.Remove(stateFile) +} + +// generateUsername creates a support username from a system key. +// Uses the first two and last segment for readability and uniqueness: +// "NETH-2239-DE49-87D6-44AA-B1CE-E07F-9B03-7D37" → "support-neth-2239-7d37" +func generateUsername(systemKey string) string { + key := strings.ToLower(systemKey) + parts := strings.Split(key, "-") + var suffix string + if len(parts) >= 3 { + suffix = parts[0] + "-" + parts[1] + "-" + parts[len(parts)-1] + } else if len(parts) == 2 { + suffix = parts[0] + "-" + parts[1] + } else if len(parts) == 1 && len(parts[0]) >= 4 { + suffix = parts[0] + } else { + b := make([]byte, 8) + for i := range b { + n, _ := rand.Int(rand.Reader, big.NewInt(26)) + b[i] = byte('a') + byte(n.Int64()) + } + suffix = string(b) + } + return "support-" + suffix +} + +// randChar picks a random character from the given charset. +func randChar(charset string) byte { + n, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset)))) + return charset[n.Int64()] +} + +// generatePassword creates a cryptographically random password that satisfies +// common LDAP password quality policies (uppercase, lowercase, digit, special). +func generatePassword() string { + b := make([]byte, passwordLength) + // Guarantee at least one character from each class + b[0] = randChar(passwordLower) + b[1] = randChar(passwordUpper) + b[2] = randChar(passwordDigits) + b[3] = randChar(passwordSpecial) + // Fill the rest from the full charset + for i := 4; i < passwordLength; i++ { + b[i] = randChar(passwordAll) + } + // Shuffle to avoid predictable positions + for i := len(b) - 1; i > 0; i-- { + j, _ := rand.Int(rand.Reader, big.NewInt(int64(i+1))) + b[i], b[j.Int64()] = b[j.Int64()], b[i] + } + return string(b) +} + +// NewProvisioner returns the appropriate provisioner based on platform detection. +// If redisAddr is set, NS8 is assumed; otherwise NethSecurity. +func NewProvisioner(redisAddr string) Provisioner { + if redisAddr != "" { + return &NethServerProvisioner{RedisAddr: redisAddr} + } + return &NethSecurityProvisioner{} +} diff --git a/services/support/cmd/tunnel-client/main.go b/services/support/cmd/tunnel-client/main.go index a0ee314e..f61007dd 100644 --- a/services/support/cmd/tunnel-client/main.go +++ b/services/support/cmd/tunnel-client/main.go @@ -33,6 +33,7 @@ import ( "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/config" "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/connection" "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/discovery" + "github.com/nethesis/my/services/support/cmd/tunnel-client/internal/users" ) func main() { @@ -51,6 +52,11 @@ func main() { diagnosticsDir = flag.String("diagnostics-dir", config.EnvWithDefault("DIAGNOSTICS_DIR", "/usr/share/my/diagnostics.d"), "Directory with diagnostic plugin scripts (env: DIAGNOSTICS_DIR)") diagnosticsPluginTimeout = flag.Duration("diagnostics-plugin-timeout", config.ParseDurationDefault(config.EnvWithDefault("DIAGNOSTICS_PLUGIN_TIMEOUT", ""), config.DefaultDiagnosticsPluginTimeout), "Timeout per diagnostic plugin (env: DIAGNOSTICS_PLUGIN_TIMEOUT)") diagnosticsTotalTimeout = flag.Duration("diagnostics-total-timeout", config.ParseDurationDefault(config.EnvWithDefault("DIAGNOSTICS_TOTAL_TIMEOUT", ""), config.DefaultDiagnosticsTotalTimeout), "Max time to wait for all diagnostics (env: DIAGNOSTICS_TOTAL_TIMEOUT)") + + usersDir = flag.String("users-dir", config.EnvWithDefault("USERS_DIR", config.DefaultUsersDir), "Directory with user configuration plugin scripts (env: USERS_DIR)") + usersPluginTimeout = flag.Duration("users-plugin-timeout", config.ParseDurationDefault(config.EnvWithDefault("USERS_PLUGIN_TIMEOUT", ""), config.DefaultUsersPluginTimeout), "Timeout per user plugin (env: USERS_PLUGIN_TIMEOUT)") + usersTotalTimeout = flag.Duration("users-total-timeout", config.ParseDurationDefault(config.EnvWithDefault("USERS_TOTAL_TIMEOUT", ""), config.DefaultUsersTotalTimeout), "Max time to wait for user provisioning (env: USERS_TOTAL_TIMEOUT)") + usersStateFile = flag.String("users-state-file", config.EnvWithDefault("USERS_STATE_FILE", config.DefaultUsersStateFile), "State file for orphan user cleanup (env: USERS_STATE_FILE)") ) flag.Parse() @@ -112,6 +118,22 @@ func main() { DiagnosticsDir: *diagnosticsDir, DiagnosticsPluginTimeout: *diagnosticsPluginTimeout, DiagnosticsTotalTimeout: *diagnosticsTotalTimeout, + UsersDir: *usersDir, + UsersPluginTimeout: *usersPluginTimeout, + UsersTotalTimeout: *usersTotalTimeout, + UsersStateFile: *usersStateFile, + } + + // Clean up orphaned support users from a previous crash + if state, loadErr := users.LoadState(cfg.UsersStateFile); loadErr != nil { + log.Printf("Warning: cannot read users state file: %v", loadErr) + } else if state != nil { + log.Printf("Found orphaned support users from session %s, cleaning up...", state.SessionID) + provisioner := users.NewProvisioner(cfg.RedisAddr) + users.RunTeardown(ctx, cfg.UsersDir, state, nil, cfg.RedisAddr, cfg.UsersPluginTimeout) + _ = provisioner.Delete(state) + users.RemoveState(cfg.UsersStateFile) + log.Println("Orphaned support users cleaned up") } connection.RunWithReconnect(ctx, cfg) diff --git a/services/support/methods/tunnel.go b/services/support/methods/tunnel.go index 1ce3a462..38d40c4f 100644 --- a/services/support/methods/tunnel.go +++ b/services/support/methods/tunnel.go @@ -180,6 +180,100 @@ func acceptControlStream(t *tunnel.Tunnel, systemID, sessionID string) { continue } + if firstByte == 'U' { + // Read the rest of the header line (br already consumed the 'U') + rest, _ := br.ReadString('\n') + headerLine := "U" + rest + headerParts := strings.Fields(strings.TrimSpace(headerLine)) + + // USERS_FETCH 1: client requests credentials from another node's session + if len(headerParts) == 2 && headerParts[0] == "USERS_FETCH" && headerParts[1] == "1" { + usersData, fetchErr := session.GetUsersBySystemID(systemID) + if fetchErr != nil { + log.Warn().Err(fetchErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to fetch users for system") + _, _ = stream.Write([]byte("{}\n")) + } else if usersData == nil { + _, _ = stream.Write([]byte("{}\n")) + } else { + _, _ = stream.Write(append(usersData, '\n')) + log.Info(). + Str("system_id", systemID). + Str("session_id", sessionID). + Int("payload_bytes", len(usersData)). + Msg("users credentials sent to requesting node") + } + _ = stream.Close() + continue + } + + // USERS 1: client sends its provisioned credentials + if len(headerParts) == 2 && headerParts[0] == "USERS" && headerParts[1] == "1" { + rawJSON, readErr := io.ReadAll(br) + _ = stream.Close() + if readErr != nil { + log.Warn().Err(readErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to read users payload from control stream") + continue + } + + // Reject payloads larger than 256 KB + if len(rawJSON) > 256*1024 { + log.Warn(). + Str("system_id", systemID). + Str("session_id", sessionID). + Int("bytes", len(rawJSON)). + Msg("users payload exceeds 256 KB limit, skipping") + continue + } + + // Schema validation: unmarshal into typed struct and re-serialize + var report models.UsersReport + if parseErr := json.Unmarshal(rawJSON, &report); parseErr != nil { + log.Warn().Err(parseErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("invalid users JSON schema, skipping") + continue + } + sanitized, marshalErr := json.Marshal(report) + if marshalErr != nil { + log.Warn().Err(marshalErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to re-serialize users, skipping") + continue + } + + var raw json.RawMessage = sanitized + saved, jsonErr := session.SaveUsers(sessionID, raw) + if jsonErr != nil { + log.Warn().Err(jsonErr). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("failed to save users") + } else if !saved { + log.Debug(). + Str("system_id", systemID). + Str("session_id", sessionID). + Msg("users update skipped: already present") + } else { + log.Info(). + Str("system_id", systemID). + Str("session_id", sessionID). + Int("payload_bytes", len(rawJSON)). + Msg("users report received") + } + continue + } + _ = stream.Close() + continue + } + if firstByte == 'D' { // Read the rest of the header line (br already consumed the 'D') rest, _ := br.ReadString('\n') diff --git a/services/support/models/session.go b/services/support/models/session.go index 71e92473..3f01d736 100644 --- a/services/support/models/session.go +++ b/services/support/models/session.go @@ -28,6 +28,8 @@ type SupportSession struct { ClosedBy *string `json:"closed_by,omitempty"` Diagnostics *json.RawMessage `json:"diagnostics,omitempty"` DiagnosticsAt *time.Time `json:"diagnostics_at,omitempty"` + Users *json.RawMessage `json:"users,omitempty"` + UsersAt *time.Time `json:"users_at,omitempty"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } diff --git a/services/support/models/users.go b/services/support/models/users.go new file mode 100644 index 00000000..52249163 --- /dev/null +++ b/services/support/models/users.go @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2026 Nethesis S.r.l. + * http://www.nethesis.it - info@nethesis.it + * + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * author: Edoardo Spadoni + */ + +package models + +import "time" + +// UserCredential holds username and password for an ephemeral support user +type UserCredential struct { + Username string `json:"username"` + Password string `json:"password"` +} + +// DomainUser represents a user created on a specific LDAP/Samba domain +type DomainUser struct { + Domain string `json:"domain"` + Module string `json:"module"` + Username string `json:"username"` + Password string `json:"password"` +} + +// AppConfig describes an application configured for the support user +type AppConfig struct { + ID string `json:"id"` + Name string `json:"name"` + URL string `json:"url,omitempty"` + Notes string `json:"notes,omitempty"` +} + +// PluginError records a users.d plugin that failed during setup +type PluginError struct { + ID string `json:"id"` + Message string `json:"message"` +} + +// SessionUsersData is the user provisioning data for a support session +type SessionUsersData struct { + SessionID string `json:"session_id"` + Platform string `json:"platform"` + ClusterAdmin *UserCredential `json:"cluster_admin,omitempty"` + DomainUsers []DomainUser `json:"domain_users,omitempty"` + LocalUsers []UserCredential `json:"local_users,omitempty"` + Apps []AppConfig `json:"apps,omitempty"` + Errors []PluginError `json:"errors,omitempty"` + ModuleDomains map[string]string `json:"module_domains,omitempty"` + CreatedAt time.Time `json:"created_at"` +} + +// UsersReport is the report sent by the tunnel-client via the USERS yamux stream +type UsersReport struct { + CreatedAt time.Time `json:"created_at"` + DurationMs int64 `json:"duration_ms"` + Users SessionUsersData `json:"users"` +} diff --git a/services/support/session/manager.go b/services/support/session/manager.go index f91565bd..f67040af 100644 --- a/services/support/session/manager.go +++ b/services/support/session/manager.go @@ -350,3 +350,41 @@ func SaveDiagnostics(sessionID string, data json.RawMessage) (bool, error) { rows, _ := result.RowsAffected() return rows > 0, nil } + +// SaveUsers stores the ephemeral support users report on a session. +// Similar to SaveDiagnostics, only one update is allowed per session. +// Returns (true, nil) if saved, (false, nil) if already present, (false, err) on error. +func SaveUsers(sessionID string, data json.RawMessage) (bool, error) { + result, err := database.DB.Exec( + `UPDATE support_sessions + SET users = $1, users_at = NOW(), updated_at = NOW() + WHERE id = $2 + AND users IS NULL`, + string(data), sessionID, + ) + if err != nil { + return false, err + } + rows, _ := result.RowsAffected() + return rows > 0, nil +} + +// GetUsersBySystemID returns the users report from any active/pending session +// of the same system that already has credentials. This allows worker nodes +// to fetch credentials created by the leader node's tunnel-client. +func GetUsersBySystemID(systemID string) (json.RawMessage, error) { + var rawUsers []byte + err := database.DB.QueryRow( + `SELECT users FROM support_sessions + WHERE system_id = $1 AND status IN ('pending', 'active') AND users IS NOT NULL + ORDER BY users_at DESC LIMIT 1`, + systemID, + ).Scan(&rawUsers) + if err != nil { + if err == sql.ErrNoRows { + return nil, nil + } + return nil, err + } + return rawUsers, nil +} From 6aec2a2160870b1afa773c0620df85dc5caf968e Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 24 Mar 2026 16:37:02 +0100 Subject: [PATCH 23/28] feat(support): add custom service management and fix re-discovery merge - Fix re-discovery overwriting injected services: the serviceStore now tracks COMMAND-injected services separately and preserves them when periodic re-discovery replaces discovered services. - Add remove_services COMMAND: tunnel-client removes injected services from its store and re-sends the manifest. The support service also removes them server-side immediately for instant API consistency. - Add DELETE /api/support-sessions/:id/services/:name endpoint to remove custom services via the frontend. - Rename "Other Services" to "Custom Services" in the frontend with a delete button (trash icon) for each custom service. - Frontend: re-fetch services on modal open for fresh data. --- backend/main.go | 1 + backend/methods/support.go | 55 +++++++++++ .../support/SupportSessionsTable.vue | 95 +++++++++++++++---- frontend/src/i18n/en/translation.json | 2 +- frontend/src/lib/support/support.ts | 9 ++ .../internal/connection/connection.go | 67 +++++++++++-- services/support/methods/commands.go | 28 +++++- services/support/tunnel/manager.go | 18 +++- 8 files changed, 243 insertions(+), 32 deletions(-) diff --git a/backend/main.go b/backend/main.go index 28edab41..278212b8 100644 --- a/backend/main.go +++ b/backend/main.go @@ -486,6 +486,7 @@ func main() { supportGroup.GET("/:id/users", methods.GetSupportSessionUsers) supportGroup.GET("/:id/services", methods.GetSupportSessionServices) supportGroup.POST("/:id/services", methods.AddSupportSessionServices) + supportGroup.DELETE("/:id/services/:name", methods.RemoveSupportSessionService) supportGroup.POST("/:id/terminal-ticket", methods.GenerateTerminalTicket) supportGroup.Any("/:id/proxy/:service/*path", methods.ProxySupportSession) supportGroup.POST("/:id/proxy-token", methods.GenerateSupportProxyToken) diff --git a/backend/methods/support.go b/backend/methods/support.go index 5a4dd6a3..2ceab967 100644 --- a/backend/methods/support.go +++ b/backend/methods/support.go @@ -363,3 +363,58 @@ func AddSupportSessionServices(c *gin.Context) { "count": len(request.Services), })) } + +// RemoveSupportSessionService handles DELETE /api/support-sessions/:id/services/:name +// It sends a remove_services command to the tunnel-client via Redis pub/sub. +func RemoveSupportSessionService(c *gin.Context) { + sessionID := c.Param("id") + serviceName := c.Param("name") + if sessionID == "" || serviceName == "" { + c.JSON(http.StatusBadRequest, response.BadRequest("session id and service name required", nil)) + return + } + + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + repo := entities.NewSupportRepository() + sess, err := repo.GetSessionByID(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return + } + if sess == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + if sess.Status != "active" { + c.JSON(http.StatusConflict, response.Conflict("session is not active", nil)) + return + } + + redisClient := cache.GetRedisClient() + if redisClient == nil { + c.JSON(http.StatusServiceUnavailable, response.InternalServerError("redis not available", nil)) + return + } + + cmd := map[string]interface{}{ + "action": "remove_services", + "session_id": sessionID, + "service_names": []string{serviceName}, + } + payload, _ := json.Marshal(cmd) + envelope := signAndMarshal(payload) + if err := redisClient.Publish("support:commands", string(envelope)); err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to publish remove_services command") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to send command to support service", nil)) + return + } + + logger.LogBusinessOperation(c, "support", "remove_service", "session", sessionID, true, nil) + + c.JSON(http.StatusOK, response.OK("service removed successfully", gin.H{ + "session_id": sessionID, + "service_name": serviceName, + })) +} diff --git a/frontend/src/components/support/SupportSessionsTable.vue b/frontend/src/components/support/SupportSessionsTable.vue index b90b6591..ea57003c 100644 --- a/frontend/src/components/support/SupportSessionsTable.vue +++ b/frontend/src/components/support/SupportSessionsTable.vue @@ -15,6 +15,7 @@ import { faCheck, faChevronDown, faChevronRight, + faTrashCan, } from '@fortawesome/free-solid-svg-icons' import { NeTable, @@ -51,6 +52,7 @@ import { getSupportSessionServices, generateSupportProxyToken, addSupportSessionServices, + removeSupportSessionService, getSupportSessionUsers, } from '@/lib/support/support' import UpdatingSpinner from '@/components/UpdatingSpinner.vue' @@ -210,9 +212,9 @@ const unifiedGroup = ref(null) const unifiedLoading = ref(false) const unifiedCredentials = ref(null) const unifiedModules = ref([]) -const unifiedUngrouped = ref<{ name: string; label: string; session: SessionRef; host: string; path: string }[]>( - [], -) +const unifiedUngrouped = ref< + { name: string; label: string; session: SessionRef; host: string; path: string }[] +>([]) const copiedField = ref(null) const expandedModules = ref>(new Set()) @@ -233,8 +235,20 @@ async function handleOpenUnified(group: SystemSessionGroup) { expandedModules.value = new Set() try { - // Fetch credentials from all active sessions + // Fetch fresh services and credentials from all active sessions const activeSessions = group.sessions.filter((s) => s.status === 'active') + + // Re-fetch services to get latest state (handles add/remove since last open) + await Promise.all( + activeSessions.map((s) => + getSupportSessionServices(s.id) + .then((svcGroups) => { + servicesMap.value[s.id] = svcGroups || [] + }) + .catch(() => {}), + ), + ) + const usersResults = await Promise.all( activeSessions.map((s) => getSupportSessionUsers(s.id).catch(() => null)), ) @@ -291,7 +305,13 @@ async function handleOpenUnified(group: SystemSessionGroup) { seenServices.add(svc.name) if (!svc.moduleId) { - ungrouped.push({ name: svc.name, label: svc.label, session, host: svc.host, path: svc.path }) + ungrouped.push({ + name: svc.name, + label: svc.label, + session, + host: svc.host, + path: svc.path, + }) continue } @@ -349,6 +369,31 @@ function groupHasServices(group: SystemSessionGroup): boolean { ) } +async function handleRemoveService(session: SessionRef, serviceName: string) { + try { + await removeSupportSessionService(session.id, serviceName) + // Remove from local modal state immediately + unifiedUngrouped.value = unifiedUngrouped.value.filter((s) => s.name !== serviceName) + // Also remove from servicesMap cache so reopening the modal won't show it + const cached = servicesMap.value[session.id] + if (cached) { + servicesMap.value[session.id] = cached + .map((g) => ({ ...g, services: g.services.filter((s) => s.name !== serviceName) })) + .filter((g) => g.services.length > 0) + } + // Re-fetch after delay to sync with server + setTimeout(() => { + getSupportSessionServices(session.id) + .then((svcGroups) => { + servicesMap.value[session.id] = svcGroups || [] + }) + .catch(() => {}) + }, 2000) + } catch (error) { + console.error('Cannot remove service:', error) + } +} + async function handleAddService() { if (!addServiceGroup.value) return addServiceError.value = '' @@ -679,7 +724,7 @@ async function handleAddService() { + + +
diff --git a/frontend/src/i18n/en/translation.json b/frontend/src/i18n/en/translation.json index f2f73214..f6bfd076 100644 --- a/frontend/src/i18n/en/translation.json +++ b/frontend/src/i18n/en/translation.json @@ -672,7 +672,7 @@ "cluster_admin": "Cluster Admin", "domain_users": "Domain Users", "local_users": "Admin Interface", - "other_services": "Other Services", + "custom_services": "Custom Services", "username": "Username", "password": "Password", "domain": "Domain", diff --git a/frontend/src/lib/support/support.ts b/frontend/src/lib/support/support.ts index 2cab9c24..c6784ad0 100644 --- a/frontend/src/lib/support/support.ts +++ b/frontend/src/lib/support/support.ts @@ -408,6 +408,15 @@ export interface AddSessionServiceItem { tls?: boolean } +export const removeSupportSessionService = (sessionId: string, serviceName: string) => { + const loginStore = useLoginStore() + return axios + .delete(`${API_URL}/support-sessions/${sessionId}/services/${serviceName}`, { + headers: { Authorization: `Bearer ${loginStore.jwtToken}` }, + }) + .then((res) => res.data) +} + export const addSupportSessionServices = (sessionId: string, services: AddSessionServiceItem[]) => { const loginStore = useLoginStore() return axios diff --git a/services/support/cmd/tunnel-client/internal/connection/connection.go b/services/support/cmd/tunnel-client/internal/connection/connection.go index d401ccd1..f8a373f9 100644 --- a/services/support/cmd/tunnel-client/internal/connection/connection.go +++ b/services/support/cmd/tunnel-client/internal/connection/connection.go @@ -41,18 +41,24 @@ var validServiceName = regexp.MustCompile(`^[a-z0-9][a-z0-9_-]{0,63}$`) // commandPayload is the JSON body of a COMMAND stream sent by the support service. type commandPayload struct { - Action string `json:"action"` - Services map[string]models.ServiceInfo `json:"services,omitempty"` + Action string `json:"action"` + Services map[string]models.ServiceInfo `json:"services,omitempty"` + ServiceNames []string `json:"service_names,omitempty"` // for remove_services } // serviceStore is a goroutine-safe holder for the current service map. +// It tracks which services were injected via COMMAND so they survive re-discovery. type serviceStore struct { mu sync.RWMutex services map[string]models.ServiceInfo + injected map[string]models.ServiceInfo // services added via COMMAND (preserved across re-discovery) } func newServiceStore(initial map[string]models.ServiceInfo) *serviceStore { - return &serviceStore{services: initial} + return &serviceStore{ + services: initial, + injected: make(map[string]models.ServiceInfo), + } } func (s *serviceStore) get() map[string]models.ServiceInfo { @@ -65,18 +71,43 @@ func (s *serviceStore) get() map[string]models.ServiceInfo { return result } -func (s *serviceStore) set(m map[string]models.ServiceInfo) { +// setDiscovered replaces discovered services but preserves injected (COMMAND) services. +func (s *serviceStore) setDiscovered(discovered map[string]models.ServiceInfo) { s.mu.Lock() defer s.mu.Unlock() - s.services = m + merged := make(map[string]models.ServiceInfo, len(discovered)+len(s.injected)) + for k, v := range discovered { + merged[k] = v + } + // Re-add injected services (COMMAND-added) that weren't in the new discovery + for k, v := range s.injected { + if _, exists := merged[k]; !exists { + merged[k] = v + } + } + s.services = merged } -func (s *serviceStore) merge(additional map[string]models.ServiceInfo) { +// addInjected adds services via COMMAND and marks them as injected. +func (s *serviceStore) addInjected(additional map[string]models.ServiceInfo) { s.mu.Lock() defer s.mu.Unlock() for k, v := range additional { s.services[k] = v + s.injected[k] = v + } +} + +// removeInjected removes an injected service by name. +func (s *serviceStore) removeInjected(name string) bool { + s.mu.Lock() + defer s.mu.Unlock() + if _, ok := s.injected[name]; !ok { + return false } + delete(s.injected, name) + delete(s.services, name) + return true } func (s *serviceStore) len() int { @@ -368,10 +399,10 @@ func connect(ctx context.Context, cfg *config.ClientConfig, sessionUsers **users case <-ticker.C: newServices := discovery.DiscoverServices(ctx, cfg) if len(newServices) > 0 { - if err := sendManifest(session, newServices); err != nil { + store.setDiscovered(newServices) + if err := sendManifest(session, store.get()); err != nil { log.Printf("Failed to send updated manifest: %v", err) } else { - store.set(newServices) log.Printf("Manifest updated with %d services", len(newServices)) } } @@ -448,6 +479,24 @@ func handleCommandStream(s net.Conn, firstLine string, store *serviceStore, sess } log.Printf("add_services: added %d static service(s)", len(payload.Services)) _, _ = fmt.Fprint(s, "OK\n") + case "remove_services": + if len(payload.ServiceNames) == 0 { + _, _ = fmt.Fprint(s, "ERROR no service names provided\n") + return + } + removed := 0 + for _, name := range payload.ServiceNames { + if store.removeInjected(name) { + removed++ + } + } + if removed > 0 { + if err := sendManifest(session, store.get()); err != nil { + log.Printf("Failed to resend manifest after remove: %v", err) + } + } + log.Printf("remove_services: removed %d service(s)", removed) + _, _ = fmt.Fprint(s, "OK\n") default: log.Printf("Unknown command action: %q", payload.Action) _, _ = fmt.Fprintf(s, "ERROR unknown action %q\n", payload.Action) @@ -481,7 +530,7 @@ func applyAddServices(newSvcs map[string]models.ServiceInfo, store *serviceStore validated[name] = svc } - store.merge(validated) + store.addInjected(validated) // Re-send manifest so the support service registers the new services if err := sendManifest(session, store.get()); err != nil { diff --git a/services/support/methods/commands.go b/services/support/methods/commands.go index 4153c9ba..98e47f95 100644 --- a/services/support/methods/commands.go +++ b/services/support/methods/commands.go @@ -56,9 +56,10 @@ func verifyAndUnwrap(raw string) (string, bool) { // SupportCommand represents a command received via Redis pub/sub type SupportCommand struct { - Action string `json:"action"` - SessionID string `json:"session_id"` - Services map[string]tunnel.ServiceInfo `json:"services,omitempty"` + Action string `json:"action"` + SessionID string `json:"session_id"` + Services map[string]tunnel.ServiceInfo `json:"services,omitempty"` + ServiceNames []string `json:"service_names,omitempty"` // for remove_services } // StartCommandListener listens for commands from the backend via Redis pub/sub @@ -105,6 +106,8 @@ func StartCommandListener(ctx context.Context) { handleCloseCommand(cmd.SessionID) case "add_services": handleAddServicesCommand(cmd) + case "remove_services": + handleRemoveServicesCommand(cmd) default: log.Warn().Str("action", cmd.Action).Msg("unknown command action") } @@ -141,6 +144,25 @@ func handleAddServicesCommand(cmd SupportCommand) { } } +func handleRemoveServicesCommand(cmd SupportCommand) { + log := logger.ComponentLogger("commands") + + // Remove services from the server-side tunnel registry immediately + // so GET /services reflects the change without waiting for the tunnel-client manifest + TunnelManager.RemoveServicesBySessionID(cmd.SessionID, cmd.ServiceNames) + + payload := tunnel.CommandPayload{ + Action: "remove_services", + ServiceNames: cmd.ServiceNames, + } + + if err := TunnelManager.SendCommandToSession(cmd.SessionID, payload); err != nil { + log.Error().Err(err).Str("session_id", cmd.SessionID).Msg("failed to send remove_services command to tunnel") + } else { + log.Info().Str("session_id", cmd.SessionID).Int("count", len(cmd.ServiceNames)).Msg("remove_services command sent") + } +} + func handleCloseCommand(sessionID string) { log := logger.ComponentLogger("commands") diff --git a/services/support/tunnel/manager.go b/services/support/tunnel/manager.go index 532aa4c3..ab047d27 100644 --- a/services/support/tunnel/manager.go +++ b/services/support/tunnel/manager.go @@ -549,8 +549,22 @@ func (t *Tunnel) ReleaseStream() { // CommandPayload is the JSON body written to a COMMAND yamux stream. type CommandPayload struct { - Action string `json:"action"` - Services map[string]ServiceInfo `json:"services,omitempty"` + Action string `json:"action"` + Services map[string]ServiceInfo `json:"services,omitempty"` + ServiceNames []string `json:"service_names,omitempty"` +} + +// RemoveServicesBySessionID removes specific services from the server-side tunnel registry. +func (m *Manager) RemoveServicesBySessionID(sessionID string, names []string) { + t := m.GetBySessionID(sessionID) + if t == nil { + return + } + t.servicesMu.Lock() + defer t.servicesMu.Unlock() + for _, name := range names { + delete(t.services, name) + } } // SendCommandToSession opens a COMMAND yamux stream to the tunnel-client for the From d6fcd93686cbe731982441de3ef3d14faf2e9ea8 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Tue, 24 Mar 2026 20:22:05 +0100 Subject: [PATCH 24/28] fix(security): harden support tunnel system after security audit - Make INTERNAL_SECRET mandatory at startup (fail-fast), remove fallback that accepted unsigned Redis commands and unauthenticated internal requests when secret was empty - Add RBAC scope verification to CloseSupportSession and ExtendSupportSession to prevent cross-tenant session manipulation - Clear ephemeral credentials (users JSONB) from database on session close, expire, and replace to limit credential exposure window - Add HTTP server timeouts (ReadHeaderTimeout, IdleTimeout) to prevent slowloris denial-of-service attacks - Re-validate service target DNS at proxy connection time to prevent TOCTOU DNS rebinding attacks (previously only validated at manifest registration) - Move plugin temp files from /tmp to /var/run/my-support-tmp/ with 0700 permissions to prevent inotify-based credential snooping - Add PostgreSQL advisory lock on session creation to prevent race conditions when two tunnel-clients connect simultaneously --- backend/entities/support.go | 4 +-- backend/methods/support.go | 30 ++++++++++++++++++- .../internal/users/configurator.go | 13 ++++++-- .../support/configuration/configuration.go | 5 +++- services/support/main.go | 9 ++++-- services/support/methods/commands.go | 8 +---- services/support/methods/proxy.go | 12 ++++++++ services/support/middleware/auth.go | 5 +--- services/support/session/manager.go | 29 ++++++++++++++---- 9 files changed, 89 insertions(+), 26 deletions(-) diff --git a/backend/entities/support.go b/backend/entities/support.go index 9aadbaec..8b7b6968 100644 --- a/backend/entities/support.go +++ b/backend/entities/support.go @@ -667,11 +667,11 @@ func (r *SupportRepository) ExtendSession(sessionID string, hours int) error { return nil } -// CloseSession force-closes a session +// CloseSession force-closes a session and clears ephemeral credentials func (r *SupportRepository) CloseSession(sessionID string) error { result, err := r.db.Exec( `UPDATE support_sessions - SET status = 'closed', closed_at = NOW(), closed_by = 'operator', updated_at = NOW() + SET status = 'closed', closed_at = NOW(), closed_by = 'operator', users = NULL, users_at = NULL, updated_at = NOW() WHERE id = $1 AND status IN ('pending', 'active')`, sessionID, ) diff --git a/backend/methods/support.go b/backend/methods/support.go index 2ceab967..c0d9da48 100644 --- a/backend/methods/support.go +++ b/backend/methods/support.go @@ -124,13 +124,27 @@ func ExtendSupportSession(c *gin.Context) { return } + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + // RBAC: verify session belongs to the caller's scope + repo := entities.NewSupportRepository() + sess, err := repo.GetSessionByID(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return + } + if sess == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + var request models.ExtendSessionRequest if err := c.ShouldBindBodyWith(&request, binding.JSON); err != nil { c.JSON(http.StatusBadRequest, response.ValidationBadRequestMultiple(err)) return } - repo := entities.NewSupportRepository() if err := repo.ExtendSession(sessionID, request.Hours); err != nil { logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to extend support session") c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to extend support session", nil)) @@ -153,7 +167,21 @@ func CloseSupportSession(c *gin.Context) { return } + _, userOrgID, userOrgRole, _ := helpers.GetUserContextExtended(c) + + // RBAC: verify session belongs to the caller's scope repo := entities.NewSupportRepository() + sess, err := repo.GetSessionByID(sessionID, userOrgRole, userOrgID) + if err != nil { + logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to get support session") + c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to get support session", nil)) + return + } + if sess == nil { + c.JSON(http.StatusNotFound, response.NotFound("support session not found", nil)) + return + } + if err := repo.CloseSession(sessionID); err != nil { logger.Error().Err(err).Str("session_id", sessionID).Msg("failed to close support session") c.JSON(http.StatusInternalServerError, response.InternalServerError("failed to close support session", nil)) diff --git a/services/support/cmd/tunnel-client/internal/users/configurator.go b/services/support/cmd/tunnel-client/internal/users/configurator.go index 335e1e60..a778c3e7 100644 --- a/services/support/cmd/tunnel-client/internal/users/configurator.go +++ b/services/support/cmd/tunnel-client/internal/users/configurator.go @@ -384,9 +384,18 @@ func writeUsersFile(users *SessionUsers) (string, error) { return writeTempJSON("my-support-users-*.json", users) } -// writeTempJSON writes data as JSON to a temporary file and returns its path. +// pluginTempDir is a restricted directory for plugin temp files containing credentials. +// Using a dedicated directory instead of /tmp prevents inotify-based snooping by +// other processes. Created with 0700 permissions (owner-only). +const pluginTempDir = "/var/run/my-support-tmp" + +// writeTempJSON writes data as JSON to a temporary file in a restricted directory. func writeTempJSON(pattern string, data interface{}) (string, error) { - f, err := os.CreateTemp("", pattern) + if err := os.MkdirAll(pluginTempDir, 0o700); err != nil { + return "", fmt.Errorf("cannot create temp directory: %w", err) + } + + f, err := os.CreateTemp(pluginTempDir, pattern) if err != nil { return "", err } diff --git a/services/support/configuration/configuration.go b/services/support/configuration/configuration.go index af696085..d5a0ea15 100644 --- a/services/support/configuration/configuration.go +++ b/services/support/configuration/configuration.go @@ -101,8 +101,11 @@ func Init() { Config.RateLimitSessionPerID = parseIntWithDefault("RATE_LIMIT_SESSION_PER_ID", 500) Config.RateLimitWindow = parseDurationWithDefault("RATE_LIMIT_WINDOW", 1*time.Minute) - // Internal authentication + // Internal authentication (required: fail fast if empty) Config.InternalSecret = os.Getenv("INTERNAL_SECRET") + if Config.InternalSecret == "" { + logger.Fatal().Msg("INTERNAL_SECRET is required but not set") + } logger.LogConfigLoad("env", "configuration", true, nil) } diff --git a/services/support/main.go b/services/support/main.go index 0959258b..b39decc8 100644 --- a/services/support/main.go +++ b/services/support/main.go @@ -153,10 +153,13 @@ func main() { c.JSON(http.StatusNotFound, response.NotFound("api not found", nil)) }) - // Start HTTP server + // Start HTTP server with timeouts to prevent slowloris attacks. + // WriteTimeout is 0 because WebSocket and tunnel connections are long-lived. srv := &http.Server{ - Addr: configuration.Config.ListenAddress, - Handler: router, + Addr: configuration.Config.ListenAddress, + Handler: router, + ReadHeaderTimeout: 15 * time.Second, + IdleTimeout: 120 * time.Second, } go func() { diff --git a/services/support/methods/commands.go b/services/support/methods/commands.go index 98e47f95..eb9c2d2e 100644 --- a/services/support/methods/commands.go +++ b/services/support/methods/commands.go @@ -32,19 +32,13 @@ type signedEnvelope struct { } // verifyAndUnwrap authenticates a signed Redis message and returns the inner payload. -// If INTERNAL_SECRET is not configured, messages are accepted without verification -// (backward-compatible with deployments that have not yet set the secret). +// INTERNAL_SECRET is required at startup, so HMAC verification is always enforced. func verifyAndUnwrap(raw string) (string, bool) { var env signedEnvelope if err := json.Unmarshal([]byte(raw), &env); err != nil { return "", false } secret := configuration.Config.InternalSecret - if secret == "" { - // No secret configured: accept but log a warning so operators know to fix it. - logger.ComponentLogger("commands").Warn().Msg("INTERNAL_SECRET not set: Redis commands accepted without HMAC verification") - return env.Payload, true - } mac := hmac.New(sha256.New, []byte(secret)) mac.Write([]byte(env.Payload)) expected := hex.EncodeToString(mac.Sum(nil)) diff --git a/services/support/methods/proxy.go b/services/support/methods/proxy.go index d4754d37..6b3cb4be 100644 --- a/services/support/methods/proxy.go +++ b/services/support/methods/proxy.go @@ -63,6 +63,18 @@ func HandleProxy(c *gin.Context) { return } + // Re-validate service target at connection time to prevent DNS rebinding attacks. + // The target was validated at manifest registration, but DNS records can change. + if err := tunnel.ValidateServiceTarget(svc.Target); err != nil { + log.Warn().Err(err). + Str("session_id", sessionID). + Str("service", serviceName). + Str("target", svc.Target). + Msg("service target failed DNS re-validation at proxy time") + c.JSON(http.StatusForbidden, response.Error(http.StatusForbidden, "service target blocked by security policy", nil)) + return + } + // Strip Traefik PathPrefix from the request path. // In NS8, Traefik routes e.g. PathPrefix('/cluster-admin') to the backend // and strips the prefix. Our proxy bypasses Traefik, so we must strip it too. diff --git a/services/support/middleware/auth.go b/services/support/middleware/auth.go index 44c9eda8..13e18f9b 100644 --- a/services/support/middleware/auth.go +++ b/services/support/middleware/auth.go @@ -35,13 +35,10 @@ import ( // InternalSecretMiddleware validates the X-Internal-Secret header (#4). // Provides defense-in-depth: even if a session token leaks, the caller // must also know the shared internal secret to access tunnel endpoints. +// INTERNAL_SECRET is required at startup; this middleware always enforces it. func InternalSecretMiddleware() gin.HandlerFunc { return func(c *gin.Context) { secret := configuration.Config.InternalSecret - if secret == "" { - c.Next() - return - } provided := c.GetHeader("X-Internal-Secret") if subtle.ConstantTimeCompare([]byte(provided), []byte(secret)) != 1 { logger.Warn(). diff --git a/services/support/session/manager.go b/services/support/session/manager.go index f67040af..842be5fc 100644 --- a/services/support/session/manager.go +++ b/services/support/session/manager.go @@ -13,9 +13,11 @@ import ( "crypto/rand" "crypto/subtle" "database/sql" + "encoding/binary" "encoding/hex" "encoding/json" "fmt" + "hash/fnv" "time" "github.com/nethesis/my/services/support/configuration" @@ -24,6 +26,14 @@ import ( "github.com/nethesis/my/services/support/models" ) +// systemAdvisoryLockKey derives a deterministic int64 key for pg_advisory_xact_lock +// from a system_id string. The lock serializes session creation per system. +func systemAdvisoryLockKey(systemID string) int64 { + h := fnv.New64a() + _, _ = h.Write([]byte(systemID)) + return int64(binary.BigEndian.Uint64(h.Sum(nil)[:8])) +} + // GenerateToken creates a cryptographically secure session token. // Tokens are stored in plaintext in the database because they serve as a shared // secret for inter-service communication (backend reads them to authenticate @@ -72,19 +82,26 @@ func CreateSession(systemID, nodeID string) (*models.SupportSession, error) { } defer func() { _ = tx.Rollback() }() + // Acquire advisory lock to serialize session creation for this system. + // Prevents race conditions when two tunnel-clients connect simultaneously. + // The lock is automatically released when the transaction commits or rolls back. + if _, err := tx.Exec(`SELECT pg_advisory_xact_lock($1)`, systemAdvisoryLockKey(systemID)); err != nil { + return nil, fmt.Errorf("failed to acquire advisory lock: %w", err) + } + // Close any existing active/pending sessions for this system+node combination var closeResult sql.Result if nodeID == "" { closeResult, err = tx.Exec( `UPDATE support_sessions - SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() + SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', users = NULL, users_at = NULL, updated_at = NOW() WHERE system_id = $1 AND node_id IS NULL AND status IN ('pending', 'active')`, systemID, ) } else { closeResult, err = tx.Exec( `UPDATE support_sessions - SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', updated_at = NOW() + SET status = 'closed', closed_at = NOW(), closed_by = 'replaced', users = NULL, users_at = NULL, updated_at = NOW() WHERE system_id = $1 AND node_id = $2 AND status IN ('pending', 'active')`, systemID, nodeID, ) @@ -283,11 +300,11 @@ func GetSessionByID(sessionID string) (*models.SupportSession, error) { return &session, nil } -// CloseSession closes a support session +// CloseSession closes a support session and clears ephemeral credentials func CloseSession(sessionID, closedBy string) error { result, err := database.DB.Exec( `UPDATE support_sessions - SET status = 'closed', closed_at = NOW(), closed_by = $2, updated_at = NOW() + SET status = 'closed', closed_at = NOW(), closed_by = $2, users = NULL, users_at = NULL, updated_at = NOW() WHERE id = $1 AND status IN ('pending', 'active')`, sessionID, closedBy, ) @@ -308,11 +325,11 @@ func CloseSession(sessionID, closedBy string) error { return nil } -// ExpireSessions marks expired sessions +// ExpireSessions marks expired sessions and clears ephemeral credentials func ExpireSessions() (int64, error) { result, err := database.DB.Exec( `UPDATE support_sessions - SET status = 'expired', closed_at = NOW(), closed_by = 'timeout', updated_at = NOW() + SET status = 'expired', closed_at = NOW(), closed_by = 'timeout', users = NULL, users_at = NULL, updated_at = NOW() WHERE status IN ('pending', 'active') AND expires_at < NOW()`, ) if err != nil { From 882231a00bd877395492b3ccb6144c009981f18f Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Wed, 25 Mar 2026 14:49:18 +0100 Subject: [PATCH 25/28] docs(support): expand plugin system documentation in README Add comprehensive developer reference for users.d/ plugins: --users-file and --instances-file JSON formats, module name matching, AppConfig output format, and example reference. Add shared plugin security model table covering ownership, permissions, environment, timeouts, and temp file handling for both diagnostics.d/ and users.d/ systems. Update credential lifecycle with database cleanup. Add remove_services command, users/ directory, and examples/ to project structure. --- services/support/README.md | 135 +++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 20 deletions(-) diff --git a/services/support/README.md b/services/support/README.md index 027dfe61..0e8e048f 100644 --- a/services/support/README.md +++ b/services/support/README.md @@ -72,6 +72,7 @@ The support service can also **push commands** to the tunnel-client by opening o | Command | Description | |:---|:---| | `add_services` | Inject one or more static `host:port` services into the running session without reconnection | +| `remove_services` | Remove one or more services from the running session by name | ### Session Lifecycle - `pending` — Session created by backend, waiting for system to connect @@ -162,7 +163,8 @@ services/support/ │ ├── discovery/ # Service discovery (Traefik, NethSecurity, static) │ ├── models/ # ServiceInfo, ServiceManifest, ApiCliRoute │ ├── stream/ # CONNECT protocol stream handler -│ └── terminal/ # PTY spawning, binary frame protocol +│ ├── terminal/ # PTY spawning, binary frame protocol +│ └── users/ # Ephemeral user provisioning, users.d plugin runner, state file ├── configuration/ # Environment configuration ├── database/ # PostgreSQL connection ├── helpers/ # SHA256 verification @@ -183,6 +185,8 @@ services/support/ │ ├── manager.go # In-memory tunnel registry │ ├── protocol.go # CONNECT header protocol │ └── stream.go # WebSocket-to-net.Conn adapter +├── examples/ +│ └── users.d/ # Example users.d plugins (nethvoice) ├── pkg/version/ # Build version info └── .env.example # Environment variables template ``` @@ -229,11 +233,14 @@ At connect time, the tunnel-client collects a health report and pushes it to the **Built-in plugin** (`system`): always runs, collects OS info, CPU load averages, RAM usage, disk usage, and uptime from `/proc` and `syscall.Statfs`. **External plugins**: executable files dropped in `/usr/share/my/diagnostics.d/` (configurable via `DIAGNOSTICS_DIR`). Each plugin: -- Runs with a per-plugin timeout (default 10s, configurable via `DIAGNOSTICS_PLUGIN_TIMEOUT`) -- Writes JSON to stdout -- Uses exit code to signal status: `0` = ok, `1` = warning, `2` = critical - -Plugin output format: +- Is any executable (bash, python, Go binary, etc.) +- Runs with **no arguments** and **no stdin** +- Writes JSON to stdout (or raw text as fallback) +- Uses exit code to signal status: `0` = ok, `1` = warning, `2` = critical, other = error +- Has a per-plugin timeout (default 10s, configurable via `DIAGNOSTICS_PLUGIN_TIMEOUT`) +- Stdout is capped at 512 KB + +**Plugin output format** (JSON on stdout): ```json { "id": "nethvoice", @@ -247,7 +254,7 @@ Plugin output format: } ``` -The overall session status is the worst status across all plugins. If the `id` or `name` fields are omitted, they are derived from the filename. If stdout is not valid JSON, the raw text is used as `summary`. +The overall session status is the worst status across all plugins. If the `id` or `name` fields are omitted, they are derived from the filename. If stdout is not valid JSON, the raw text is used as `summary` and status comes from the exit code. ```bash # Diagnostics flags @@ -271,29 +278,117 @@ The tunnel-client provisions temporary users when a support session starts and r - Creates a local user via `python3` + `nethsec.users` module - Promotes to admin for web UI access -**Plugin system** (`users.d/`): executable scripts in `/usr/share/my/users.d/` configure applications for the support user. Each plugin receives `setup` or `teardown` as the first argument and `--users-file ` pointing to a JSON file with the provisioned credentials. +**Credential lifecycle**: +1. **On connect**: tunnel-client creates users, stores credentials in a local state file (`/var/run/my-support-users.json`), reports them to the server via yamux `USERS_REPORT` stream, the server stores them in the `users` JSONB column +2. **On disconnect/session end**: tunnel-client deletes all ephemeral users and removes the state file; the server clears the `users` column from the database +3. **On crash recovery**: next startup reads the orphaned state file, runs cleanup (delete users + teardown plugins), then removes the file +4. **Reconnection**: user provisioning happens only on the first successful connection; subsequent reconnections re-send the existing report without re-provisioning + +```bash +# User provisioning flags +--users-dir string # Default: /usr/share/my/users.d (env: USERS_DIR) +--users-plugin-timeout duration # Default: 15s (env: USERS_PLUGIN_TIMEOUT) +--users-total-timeout duration # Default: 60s (env: USERS_TOTAL_TIMEOUT) +--users-state-file string # Default: /var/run/my-support-users.json (env: USERS_STATE_FILE) +``` -Plugin output format (setup): +### Users Plugin System (`users.d/`) + +After ephemeral users are provisioned, plugins in `users.d/` configure **applications** to accept those credentials. For example, the `nethvoice` plugin creates a FreePBX `ampusers` entry so the support user can log in to NethVoice. + +**Invocation**: each plugin is called with two actions during the session lifecycle: +```bash +# Setup: configure the application for the support user +/usr/share/my/users.d/nethvoice setup --users-file /var/run/my-support-tmp/users.json --instances-file /var/run/my-support-tmp/instances.json + +# Teardown: undo the configuration (called on session end or tunnel-client shutdown) +/usr/share/my/users.d/nethvoice teardown --users-file /var/run/my-support-tmp/users.json --instances-file /var/run/my-support-tmp/instances.json +``` + +**`--users-file` format** (`SessionUsers` — the provisioned credentials): ```json { - "id": "nethvoice", - "name": "NethVoice Admin", - "notes": "Optional notes for the operator" + "session_id": "NETH-2239-DE49-...", + "platform": "nethserver", + "cluster_admin": { + "username": "support-neth-2239-7d37", + "password": "IWUPPBp8HQk#@6Ep#T9@" + }, + "domain_users": [ + { + "domain": "sf.nethserver.net", + "module": "openldap1", + "username": "support-neth-2239-7d37", + "password": "xK9#mLp2@vRtNw4&jB7q" + } + ], + "local_users": [], + "module_domains": { + "nethvoice103": "sf.nethserver.net", + "webtop5": "sf.nethserver.net" + }, + "created_at": "2026-03-24T19:13:28Z" } ``` -**Crash recovery**: a state file (default `/var/run/my-support-users.json`) persists the created users. On startup, orphaned users from a previous crash are cleaned up before connecting. +**`--instances-file` format** (`ModuleContext` — passed only when the plugin name matches a discovered NS8 module): +```json +{ + "module": "nethvoice", + "instances": [ + { + "id": "nethvoice103", + "node_id": "1", + "label": "Main PBX", + "domain": "sf.nethserver.net", + "services": { + "nethvoice103-wizard": { + "host": "nethvoice103.sf.nethserver.net", + "path": "/nethvoice103-wizard", + "path_prefix": "/nethvoice103-wizard", + "tls": true + } + } + } + ] +} +``` -**Reconnection**: user provisioning happens only on the first successful connection. Subsequent reconnections re-send the report without re-provisioning. +**Module matching**: the plugin filename determines module matching. If a plugin is named `nethvoice`, the tunnel-client checks if any discovered NS8 module has base name `nethvoice` (trailing digits are stripped: `nethvoice103` → `nethvoice`). If a match is found, `--instances-file` is provided with all matching instances. If no match, the plugin runs without `--instances-file` (useful for generic plugins). -```bash -# User provisioning flags ---users-dir string # Default: /usr/share/my/users.d (env: USERS_DIR) ---users-plugin-timeout duration # Default: 15s (env: USERS_PLUGIN_TIMEOUT) ---users-total-timeout duration # Default: 60s (env: USERS_TOTAL_TIMEOUT) ---users-state-file string # Default: /var/run/my-support-users.json (env: USERS_STATE_FILE) +**Setup output** (JSON on stdout — single object or array of `AppConfig`): +```json +[ + { + "id": "nethvoice103", + "name": "NethVoice (Main PBX)", + "url": "https://optional-direct-url/", + "notes": "Domain: sf.nethserver.net | Service: nethvoice103-wizard" + } +] ``` +These `AppConfig` entries appear in the support session UI so operators know which applications are configured and how to access them. The `teardown` action ignores stdout. + +**Example**: see `examples/users.d/nethvoice` for a complete reference implementation that creates FreePBX admin users for each NethVoice instance. + +### Plugin Security Model + +Both `diagnostics.d/` and `users.d/` apply identical security checks before executing any plugin: + +| Check | Rule | +|:---|:---| +| **File type** | Must be a regular file (no symlinks, no directories) | +| **Executable** | Must have at least one execute bit set | +| **Ownership** | Must be owned by **root (UID 0)** or the tunnel-client process UID | +| **Write permissions** | Must **not** be group-writable or world-writable | +| **Environment** | Plugins run with a minimal environment (`PATH` only) — no inherited secrets | +| **Timeout** | Per-plugin timeout enforced via `context.WithTimeout` | +| **Output limit** | Stdout capped (512 KB for diagnostics, 64 KB for users) | +| **Temp files** | Credential files (`--users-file`, `--instances-file`) are written to `/var/run/my-support-tmp/` with 0700 permissions and deleted after execution | + +If any check fails, the plugin is silently skipped with a log message. Plugins are executed in alphabetical order. + ## Related - [openapi.yaml](../../backend/openapi.yaml) - API specification - [Backend](../../backend/README.md) - API server From f2325a93b559c82d5ad56244cf656035fbe6fe5d Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Thu, 26 Mar 2026 21:53:18 +0100 Subject: [PATCH 26/28] feat(diagnostics): use structured JSON details in built-in system plugin --- .../internal/diagnostics/runner.go | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/services/support/cmd/tunnel-client/internal/diagnostics/runner.go b/services/support/cmd/tunnel-client/internal/diagnostics/runner.go index cdcb3ce6..ffb69344 100644 --- a/services/support/cmd/tunnel-client/internal/diagnostics/runner.go +++ b/services/support/cmd/tunnel-client/internal/diagnostics/runner.go @@ -107,11 +107,14 @@ func runBuiltinSystem() PluginResult { loadStatus = StatusWarning } overallStatus = worstStatus(overallStatus, loadStatus) + loadDetails, _ := json.Marshal(map[string]interface{}{ + "load_1m": load1m, "load_5m": load5m, "load_15m": load15m, "cpu_count": cpuCount, + }) checks = append(checks, DiagnosticCheck{ Name: "load_average", Status: loadStatus, Value: fmt.Sprintf("%.2f %.2f %.2f", load1m, load5m, load15m), - Details: fmt.Sprintf("1m/5m/15m (cpu_count=%d)", cpuCount), + Details: string(loadDetails), }) // --- RAM usage --- @@ -145,11 +148,14 @@ func runBuiltinSystem() PluginResult { } } overallStatus = worstStatus(overallStatus, ramStatus) + ramDetails, _ := json.Marshal(map[string]interface{}{ + "total_kb": memTotal, "available_kb": memAvailable, "used_pct": ramUsagePct, + }) checks = append(checks, DiagnosticCheck{ Name: "ram_usage", Status: ramStatus, Value: fmt.Sprintf("%.1f%%", ramUsagePct), - Details: fmt.Sprintf("total=%dkB available=%dkB", memTotal, memAvailable), + Details: string(ramDetails), }) // --- Root filesystem usage --- @@ -168,11 +174,14 @@ func runBuiltinSystem() PluginResult { } } overallStatus = worstStatus(overallStatus, diskStatus) + diskDetails, _ := json.Marshal(map[string]interface{}{ + "total_bytes": total, "available_bytes": available, "used_pct": diskUsagePct, + }) checks = append(checks, DiagnosticCheck{ Name: "disk_usage", Status: diskStatus, Value: fmt.Sprintf("%.1f%%", diskUsagePct), - Details: fmt.Sprintf("total=%dB available=%dB", total, available), + Details: string(diskDetails), }) } else { checks = append(checks, DiagnosticCheck{ @@ -192,10 +201,14 @@ func runBuiltinSystem() PluginResult { days := totalSecs / 86400 hours := (totalSecs % 86400) / 3600 minutes := (totalSecs % 3600) / 60 + uptimeDetails, _ := json.Marshal(map[string]interface{}{ + "days": days, "hours": hours, "minutes": minutes, "total_seconds": totalSecs, + }) checks = append(checks, DiagnosticCheck{ - Name: "uptime", - Status: StatusOK, - Value: fmt.Sprintf("%d days %d hours %d minutes", days, hours, minutes), + Name: "uptime", + Status: StatusOK, + Value: fmt.Sprintf("%d days %d hours %d minutes", days, hours, minutes), + Details: string(uptimeDetails), }) } } From b735dc2cef453aeed64047dab861689964041c69 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Fri, 27 Mar 2026 11:10:40 +0100 Subject: [PATCH 27/28] fix(diagnostics): use json.RawMessage for structured details in checks --- .../internal/diagnostics/models.go | 7 ++- .../internal/diagnostics/runner.go | 44 ++++++++++--------- services/support/models/diagnostics.go | 13 +++--- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/services/support/cmd/tunnel-client/internal/diagnostics/models.go b/services/support/cmd/tunnel-client/internal/diagnostics/models.go index 7e802bbe..fb6fcfb1 100644 --- a/services/support/cmd/tunnel-client/internal/diagnostics/models.go +++ b/services/support/cmd/tunnel-client/internal/diagnostics/models.go @@ -9,7 +9,10 @@ package diagnostics -import "time" +import ( + "encoding/json" + "time" +) // DiagnosticStatus represents the health status of a diagnostic check type DiagnosticStatus string @@ -27,7 +30,7 @@ type DiagnosticCheck struct { Name string `json:"name"` Status DiagnosticStatus `json:"status"` Value string `json:"value,omitempty"` - Details string `json:"details,omitempty"` + Details json.RawMessage `json:"details,omitempty"` } // PluginResult is the output of a single diagnostic plugin diff --git a/services/support/cmd/tunnel-client/internal/diagnostics/runner.go b/services/support/cmd/tunnel-client/internal/diagnostics/runner.go index ffb69344..2a53f4ef 100644 --- a/services/support/cmd/tunnel-client/internal/diagnostics/runner.go +++ b/services/support/cmd/tunnel-client/internal/diagnostics/runner.go @@ -57,6 +57,12 @@ func worstStatus(a, b DiagnosticStatus) DiagnosticStatus { return b } +// jsonDetails marshals a value to json.RawMessage for use in DiagnosticCheck.Details. +func jsonDetails(v interface{}) json.RawMessage { + b, _ := json.Marshal(v) + return b +} + // runBuiltinSystem collects basic OS and resource metrics from /proc and syscall. func runBuiltinSystem() PluginResult { result := PluginResult{ @@ -107,14 +113,13 @@ func runBuiltinSystem() PluginResult { loadStatus = StatusWarning } overallStatus = worstStatus(overallStatus, loadStatus) - loadDetails, _ := json.Marshal(map[string]interface{}{ - "load_1m": load1m, "load_5m": load5m, "load_15m": load15m, "cpu_count": cpuCount, - }) checks = append(checks, DiagnosticCheck{ Name: "load_average", Status: loadStatus, Value: fmt.Sprintf("%.2f %.2f %.2f", load1m, load5m, load15m), - Details: string(loadDetails), + Details: jsonDetails(map[string]interface{}{ + "load_1m": load1m, "load_5m": load5m, "load_15m": load15m, "cpu_count": cpuCount, + }), }) // --- RAM usage --- @@ -148,14 +153,13 @@ func runBuiltinSystem() PluginResult { } } overallStatus = worstStatus(overallStatus, ramStatus) - ramDetails, _ := json.Marshal(map[string]interface{}{ - "total_kb": memTotal, "available_kb": memAvailable, "used_pct": ramUsagePct, - }) checks = append(checks, DiagnosticCheck{ Name: "ram_usage", Status: ramStatus, Value: fmt.Sprintf("%.1f%%", ramUsagePct), - Details: string(ramDetails), + Details: jsonDetails(map[string]interface{}{ + "total_kb": memTotal, "available_kb": memAvailable, "used_pct": ramUsagePct, + }), }) // --- Root filesystem usage --- @@ -174,21 +178,20 @@ func runBuiltinSystem() PluginResult { } } overallStatus = worstStatus(overallStatus, diskStatus) - diskDetails, _ := json.Marshal(map[string]interface{}{ - "total_bytes": total, "available_bytes": available, "used_pct": diskUsagePct, - }) checks = append(checks, DiagnosticCheck{ Name: "disk_usage", Status: diskStatus, Value: fmt.Sprintf("%.1f%%", diskUsagePct), - Details: string(diskDetails), + Details: jsonDetails(map[string]interface{}{ + "total_bytes": total, "available_bytes": available, "used_pct": diskUsagePct, + }), }) } else { checks = append(checks, DiagnosticCheck{ Name: "disk_usage", Status: StatusError, Value: "unavailable", - Details: err.Error(), + Details: jsonDetails(map[string]string{"error": err.Error()}), }) } @@ -201,14 +204,13 @@ func runBuiltinSystem() PluginResult { days := totalSecs / 86400 hours := (totalSecs % 86400) / 3600 minutes := (totalSecs % 3600) / 60 - uptimeDetails, _ := json.Marshal(map[string]interface{}{ - "days": days, "hours": hours, "minutes": minutes, "total_seconds": totalSecs, - }) checks = append(checks, DiagnosticCheck{ - Name: "uptime", - Status: StatusOK, - Value: fmt.Sprintf("%d days %d hours %d minutes", days, hours, minutes), - Details: string(uptimeDetails), + Name: "uptime", + Status: StatusOK, + Value: fmt.Sprintf("%d days %d hours %d minutes", days, hours, minutes), + Details: jsonDetails(map[string]interface{}{ + "days": days, "hours": hours, "minutes": minutes, "total_seconds": totalSecs, + }), }) } } @@ -217,7 +219,7 @@ func runBuiltinSystem() PluginResult { Name: "uptime", Status: StatusError, Value: "unavailable", - Details: err.Error(), + Details: jsonDetails(map[string]string{"error": err.Error()}), }) } diff --git a/services/support/models/diagnostics.go b/services/support/models/diagnostics.go index 6026c785..6e02acee 100644 --- a/services/support/models/diagnostics.go +++ b/services/support/models/diagnostics.go @@ -9,14 +9,17 @@ package models -import "time" +import ( + "encoding/json" + "time" +) // DiagnosticCheck is a single named check within a plugin result. type DiagnosticCheck struct { - Name string `json:"name"` - Status string `json:"status"` - Value string `json:"value,omitempty"` - Details string `json:"details,omitempty"` + Name string `json:"name"` + Status string `json:"status"` + Value string `json:"value,omitempty"` + Details json.RawMessage `json:"details,omitempty"` } // DiagnosticPlugin is the result from a single diagnostics plugin. From c8779a93549b6444c4aa5618340c5d2834fd45e3 Mon Sep 17 00:00:00 2001 From: Edoardo Spadoni Date: Fri, 27 Mar 2026 12:30:29 +0100 Subject: [PATCH 28/28] fix(support): show host as primary label in services list, sort alphabetically --- .../support/SupportSessionsTable.vue | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/support/SupportSessionsTable.vue b/frontend/src/components/support/SupportSessionsTable.vue index ea57003c..ccb67e45 100644 --- a/frontend/src/components/support/SupportSessionsTable.vue +++ b/frontend/src/components/support/SupportSessionsTable.vue @@ -336,6 +336,15 @@ async function handleOpenUnified(group: SystemSessionGroup) { } } + // Sort services within each module by host (or name if no host) + for (const mg of moduleMap.values()) { + mg.services.sort((a, b) => { + const ak = a.host ? `${a.host}${a.path || ''}` : a.name + const bk = b.host ? `${b.host}${b.path || ''}` : b.name + return ak.localeCompare(bk) + }) + } + // Sort modules by nodeId then moduleId unifiedModules.value = Array.from(moduleMap.values()).sort((a, b) => { const nc = (a.nodeId || '').localeCompare(b.nodeId || '', undefined, { numeric: true }) @@ -826,9 +835,12 @@ async function handleAddService() { @click="handleOpenService(svc.session, svc.name, svc.path)" > - {{ svc.name }} - ({{ svc.host }}{{ svc.path || '' }}){{ svc.host }}{{ svc.path || '' }} + {{ svc.name }} + ({{ svc.name }})