From d18d502e642d654acfff29cbb7d3f8d747816fde Mon Sep 17 00:00:00 2001 From: Ed Date: Sun, 23 Nov 2025 10:35:19 -0600 Subject: [PATCH 1/8] Added robots.txt endpoint --- helm/gen3/Chart.yaml | 4 ++-- helm/gen3/README.md | 4 ++-- helm/revproxy/Chart.yaml | 2 +- helm/revproxy/README.md | 3 ++- helm/revproxy/gen3.nginx.conf/robots-txt.conf | 6 ++++++ helm/revproxy/nginx/nginx.conf | 3 +++ helm/revproxy/values.yaml | 3 +++ 7 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 helm/revproxy/gen3.nginx.conf/robots-txt.conf diff --git a/helm/gen3/Chart.yaml b/helm/gen3/Chart.yaml index 9c9ed51b5..bf349b63b 100644 --- a/helm/gen3/Chart.yaml +++ b/helm/gen3/Chart.yaml @@ -100,7 +100,7 @@ dependencies: repository: "file://../requestor" condition: requestor.enabled - name: revproxy - version: 0.1.48 + version: 0.1.49 repository: "file://../revproxy" condition: revproxy.enabled - name: sheepdog @@ -173,7 +173,7 @@ type: application # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.96 +version: 0.2.97 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm/gen3/README.md b/helm/gen3/README.md index 33e6be575..7b9a88842 100644 --- a/helm/gen3/README.md +++ b/helm/gen3/README.md @@ -1,6 +1,6 @@ # gen3 -![Version: 0.2.96](https://img.shields.io/badge/Version-0.2.96-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) +![Version: 0.2.97](https://img.shields.io/badge/Version-0.2.97-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) Helm chart to deploy Gen3 Data Commons @@ -48,7 +48,7 @@ Helm chart to deploy Gen3 Data Commons | file://../peregrine | peregrine | 0.1.35 | | file://../portal | portal | 0.1.49 | | file://../requestor | requestor | 0.1.27 | -| file://../revproxy | revproxy | 0.1.48 | +| file://../revproxy | revproxy | 0.1.49 | | file://../sheepdog | sheepdog | 0.1.35 | | file://../sower | sower | 0.1.38 | | file://../ssjdispatcher | ssjdispatcher | 0.1.37 | diff --git a/helm/revproxy/Chart.yaml b/helm/revproxy/Chart.yaml index 5c1a5335f..75e8ec128 100644 --- a/helm/revproxy/Chart.yaml +++ b/helm/revproxy/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.48 +version: 0.1.49 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm/revproxy/README.md b/helm/revproxy/README.md index 3ce73484a..b4dd9d63e 100644 --- a/helm/revproxy/README.md +++ b/helm/revproxy/README.md @@ -1,6 +1,6 @@ # revproxy -![Version: 0.1.48](https://img.shields.io/badge/Version-0.1.48-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) +![Version: 0.1.49](https://img.shields.io/badge/Version-0.1.49-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) A Helm chart for gen3 revproxy @@ -18,6 +18,7 @@ A Helm chart for gen3 revproxy | autoscaling | object | `{}` | | | commonLabels | map | `nil` | Will completely override the commonLabels defined in the common chart's _label_setup.tpl | | criticalService | string | `"true"` | Valid options are "true" or "false". If invalid option is set- the value will default to "false". | +| enableRobotsTxt | bool | `false` | Whether to enable robots.txt generation and serving. | | extraServices | map | `nil` | Configuration to add any extra service endpoints outside of gen3 to be served by revproxy | | fullnameOverride | string | `""` | Override the full name of the deployment. | | global.autoscaling.averageCPUValue | string | `"500m"` | | diff --git a/helm/revproxy/gen3.nginx.conf/robots-txt.conf b/helm/revproxy/gen3.nginx.conf/robots-txt.conf new file mode 100644 index 000000000..d9d7ae3f2 --- /dev/null +++ b/helm/revproxy/gen3.nginx.conf/robots-txt.conf @@ -0,0 +1,6 @@ +{{- if .Values.enableRobotsTxt }} +location /robots.txt { + default_type text/plain; + return 200 "User-agent: *\nDisallow: /\n"; +} +{{- end }} \ No newline at end of file diff --git a/helm/revproxy/nginx/nginx.conf b/helm/revproxy/nginx/nginx.conf index c38743d93..78654ec81 100644 --- a/helm/revproxy/nginx/nginx.conf +++ b/helm/revproxy/nginx/nginx.conf @@ -177,6 +177,9 @@ map $http_user_agent $loggable { add_header "X-Frame-Options" "SAMEORIGIN" always; add_header "X-Content-Type-Options" "nosniff" always; add_header "X-Xss-Protection" "1; mode=block" always; + {{- if .Values.enableRobotsTxt }} + add_header "X-Robots-Tag" "noindex, nofollow" always; + {{- end }} if ($http_x_forwarded_proto = "http") { return 301 https://$host$request_uri; } # diff --git a/helm/revproxy/values.yaml b/helm/revproxy/values.yaml index f169ae61b..cddd618a4 100644 --- a/helm/revproxy/values.yaml +++ b/helm/revproxy/values.yaml @@ -254,3 +254,6 @@ extraServices: # - name: "protein-paint" # path: /protein-paint # serviceName: protein-paint + +# -- (bool) Whether to enable robots.txt generation and serving. +enableRobotsTxt: false From 71a20fbc60d4f0829aa4c300dbc6abec609d1ff7 Mon Sep 17 00:00:00 2001 From: Ed Date: Sun, 23 Nov 2025 10:50:26 -0600 Subject: [PATCH 2/8] Added robots.txt endpoint --- helm/revproxy/nginx/nginx.conf | 3 - helm/revproxy/nginxPrivate/helpers.js | 283 +++++++++++++++++++ helm/revproxy/nginxPrivate/nginx.conf | 348 ++++++++++++++++++++++++ helm/revproxy/templates/configMaps.yaml | 7 + 4 files changed, 638 insertions(+), 3 deletions(-) create mode 100644 helm/revproxy/nginxPrivate/helpers.js create mode 100644 helm/revproxy/nginxPrivate/nginx.conf diff --git a/helm/revproxy/nginx/nginx.conf b/helm/revproxy/nginx/nginx.conf index 78654ec81..c38743d93 100644 --- a/helm/revproxy/nginx/nginx.conf +++ b/helm/revproxy/nginx/nginx.conf @@ -177,9 +177,6 @@ map $http_user_agent $loggable { add_header "X-Frame-Options" "SAMEORIGIN" always; add_header "X-Content-Type-Options" "nosniff" always; add_header "X-Xss-Protection" "1; mode=block" always; - {{- if .Values.enableRobotsTxt }} - add_header "X-Robots-Tag" "noindex, nofollow" always; - {{- end }} if ($http_x_forwarded_proto = "http") { return 301 https://$host$request_uri; } # diff --git a/helm/revproxy/nginxPrivate/helpers.js b/helm/revproxy/nginxPrivate/helpers.js new file mode 100644 index 000000000..9dcb8d524 --- /dev/null +++ b/helm/revproxy/nginxPrivate/helpers.js @@ -0,0 +1,283 @@ +/** + * This is a helper script used in the reverse proxy + * Note that this is not technically javascript, but nginscript (or njs) + * See here for info: + * - http://nginx.org/en/docs/njs/ + * - https://www.nginx.com/blog/introduction-nginscript/ + */ + +/** global supporting atob polyfill below */ +var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; //pragma: allowlist secret +// default threshold for assigning a service to production +// e.g. weight of 0 would mean all services are assigned to production +var DEFAULT_WEIGHT = 0; + +/** + * base64 decode polyfill from + * https://github.com/davidchambers/Base64.js/blob/master/base64.js + */ +function atob(input) { + var str = String(input).replace(/[=]+$/, ''); // #31: ExtendScript bad parse of /= + if (str.length % 4 == 1) { + return input; + } + for ( + // initialize result and counters + var bc = 0, bs, buffer, idx = 0, output = ''; + // get next character + buffer = str.charAt(idx++); + // character found in table? initialize bit storage and add its ascii value; + ~buffer && (bs = bc % 4 ? bs * 64 + buffer : buffer, + // and if not first of each 4 characters, + // convert the first 8 bits to one ascii character + bc++ % 4) ? output += String.fromCharCode(255 & bs >> (-2 * bc & 6)) : 0 + ) { + // try to find character in table (0-63, not found => -1) + buffer = chars.indexOf(buffer); + } + return output; +} + +/** + * nginscript helper for parsing user out of JWT tokens. + * We appear to have access to the 'access_token' variable + * defined in nginx.conf when this function runs via 'js_set'. + * see https://www.nginx.com/blog/introduction-nginscript/ + * + * @param {*} req + * @param {*} res + */ +function userid(req, res) { + var token = req.variables["access_token"]; + var user = "uid:null,unknown@unknown"; + + if (token) { + // note - raw token is secret, so do not expose in userid + var raw = atob((token.split('.')[1] || "").replace('-', '+').replace('_', '/')); + if (raw) { + try { + var data = JSON.parse(raw); + if (data) { + if (data.context && data.context.user && data.context.user.name) { + user = "uid:" + data.sub + "," + data.context.user.name; + } + } + } catch (err) {} + } + } + return user; +} + +/** + * returns absolute value of a number + */ +function MathAbs(x) { + x = +x; + return (x > 0) ? x : 0 - x; +} + +/** + * util for hashing a string into given range + * Source: http://pmav.eu/stuff/javascript-hashing-functions/source.html + * + * @param s - string to hash + */ +function simpleHash(s) { + var i, hash = 0; + for (i = 0; i < s.length; i++) { + hash += (s[i].charCodeAt() * (i+1)); + } + // mod 100 b/c we want a percentage range (ie 0-99) + return MathAbs(hash) % 100; +} + +/** + * Returns a release (string) depending on the given + * values provided + * + * @param hash_res - an integer to compare to service_weight + * @param service_weight - integer threshold for assigning release as 'production' + * @param default_weight - if service_weight is undefined, compare hash to this value + * @returns {string} - release + */ +function selectRelease(hash_res, w) { + // determine release by comparing hash val to service weight + if (hash_res < parseInt(w)) { + return 'canary'; + } + return 'production'; +} + +function getWeight(service, weights) { + if (typeof weights[service] === 'undefined') { + return weights['default']; + } + return weights[service]; +} + +function releasesObjToString(releases) { + var res = ''; + for (var service in releases) { + if (releases.hasOwnProperty(service)) { + res = res + service + '.' + releases[service] + '&'; + } + } + return res; +} + +/** + * Checks cookie (dev_canaries or service_releases) + * for service release versions and assigns + * release versions for services not in the cookie based + * on hash value and the percent weight of the canary. + * If the weight for a service is 0, it ignores the cookie + * and sets the release to production. + * + * @param req - nginx request object + * @return a string of service assignments. E.g: + * "fence.canary&sheepdog.production&" + */ +function getServiceReleases(req) { + // + // client cookie containing releases + // developer override can force canary even when canary has + // been deployed for general users by setting the canary weights to zero + // + var devOverride= !!req.variables['cookie_dev_canaries']; + var release_cookie = req.variables['cookie_dev_canaries'] || req.variables['cookie_service_releases'] || ''; + // services to assign to a service (edit this if adding a new canary service) + var services = ['fence', 'fenceshib', 'sheepdog', 'indexd', 'peregrine']; + // weights for services - if given a default weight, use it; else use the default weight from this file + var canary_weights = JSON.parse(req.variables['canary_percent_json']); + if (typeof canary_weights['default'] === 'undefined') { + canary_weights['default'] = DEFAULT_WEIGHT + } else { + canary_weights['default'] = parseInt(canary_weights['default']) + } + // the string to be hashed + var hash_str = ['app', req.variables['realip'], req.variables['http_user_agent'], req.variables['date_gmt']].join(); + var hash_res = -1; + + // for each service: + // if it's weight == 0, ignore the cookie and set release to production + // else if it's in the cookie, use that release + // else select one by hashing and comparing to weight + var updated_releases = {}; + for (var i=0; i < services.length; i++) { + var service = services[i]; + var parsed_release = release_cookie.match(service+'\.(production|canary)'); + if ((!devOverride) && getWeight(service, canary_weights) === 0) { + updated_releases[service] = 'production'; + } else if (!parsed_release) { + // if we haven't yet generated a hash value, do that now + if (hash_res < 0) { + hash_res = simpleHash(hash_str); + } + updated_releases[service] = selectRelease(hash_res, getWeight(service, canary_weights)); + } else { + // append the matched values from the cookie + updated_releases[service] = parsed_release[1]; + } + } + + return releasesObjToString(updated_releases); +} + +/** + * Controls the value of Access-Control-Allow-Credentials by environment variable + * ORIGINS_ALLOW_CREDENTIALS. + * + * ORIGINS_ALLOW_CREDENTIALS is supposed to be a list of origins in JSON string. Only + * requests with origins in this list are allowed to send credentials like cookies to + * this website. See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS#Requests_with_credentials + * + * In most cases, credentials shouldn't be sent cross-site to mitigate CSRF attack risks. + * This is useful when Gen3 is deployed as an SSO and centralized service in a cross-site + * manner. The NDEF for example, serves two sub-commons at sub1.example.com and + * sub2.example.com with a centralized commons at example.com running Fence, Indexd and + * Arborist. When logged in at example.com, requests sent to both sub1 and sub2 are + * allowed to carry the same authentication cookie, therefore extra login is not needed + * for sub1 or sub2. + * + * @param req - nginx request object + * @returns {string} value used in Access-Control-Allow-Credentials header, empty string + * to not include this header + */ +function isCredentialsAllowed(req) { + if (!!req.variables['http_origin']) { + var origins = JSON.parse(req.variables['origins_allow_credentials'] || '[]') || []; + for (var i = 0; i < origins.length; i++) { + // cannot use === to compare byte strings, whose "typeof" is also confusingly "string" + if (origins[i].fromUTF8().toLowerCase().trim() === + req.variables['http_origin'].fromUTF8().toLowerCase().trim()) { + return 'true'; + } + } + } + return ''; +} + +/** + * Test whether the given ipAddrStr is in the global blackListStr. + * Currently does not support CIDR format - just list of IP's + * + * @param {string} ipAddrStr + * @param {string} blackListStr comma separated black list - defaults to globalBlackListStr (see below) + * @return {boolean} true if ipAddrStr is in the black list + */ +function isOnBlackList(ipAddrStr, blackListStr) { + return blackListStr.includes(ipAddrStr); +} + +/** + * Call via nginx.conf js_set after setting the blackListStr and + * ipAddrStr variables via set: + * + * set blackListStr="whatever" + * set ipAddrStr="whatever" + * js_set blackListCheck checkBlackList + * + * Note: kube-setup-revproxy generates gen3-blacklist.conf - which + * gets sucked into the nginx.conf config + * + * @param {Request} req + * @param {Response} res + * @return "ok" or "block" - fail to "ok" in ambiguous situation + */ +function checkBlackList(req,res) { + var ipAddrStr = req.variables["ip_addr_str"]; + var blackListStr = req.variables["black_list_str"]; + + if (ipAddrStr && blackListStr && isOnBlackList(ipAddrStr, blackListStr)) { + return "block"; + } + return "ok"; // + "-" + ipAddrStr + "-" + blackListStr; +} + + +/** + * Handle the js_content callout from /workspace-authorize. + * Basically - redirect to a subdomain /wts/authorize endpoint + * based on the state=SUBDOMAIN-... query parameter with + * some guards to stop attacks. + * + * @param {*} req + * @param {*} res + */ +function gen3_workspace_authorize_handler(req) { + var subdomain = ''; + var query = req.variables["args"] || ""; + var matchGroups = null; + + if (matchGroups = query.match(/(^state=|&state=)(\w+)-/)) { + subdomain = matchGroups[2]; + var location = "https://" + subdomain + "." + req.variables["host"] + + "/wts/oauth2/authorize?" + query; + req.return(302, location); + } else { + req.headersOut["Content-Type"] = "application/json" + req.return(400, '{ "status": "redirect failed validation" }'); + } +} + +export default {userid, isCredentialsAllowed}; diff --git a/helm/revproxy/nginxPrivate/nginx.conf b/helm/revproxy/nginxPrivate/nginx.conf new file mode 100644 index 000000000..989b0affc --- /dev/null +++ b/helm/revproxy/nginxPrivate/nginx.conf @@ -0,0 +1,348 @@ +user nginx; +worker_processes 4; +pid /var/run/nginx.pid; + +load_module modules/ngx_http_js_module.so; +load_module modules/ngx_http_perl_module.so; + +## +# Preserve environment variables +# Note: to use the variable in blocks below, you must use +# perl to set the variable. eg: +# perl_set $my_var 'sub { return $ENV{"MY_ENVIRONMENT_VAIRABLE"}; }'; +## +env POD_NAMESPACE; +env CANARY_PERCENT_JSON; +env COOKIE_DOMAIN; +env ORIGINS_ALLOW_CREDENTIALS; +env DES_NAMESPACE; +env MAINTENANCE_MODE; +env INDEXD_AUTHZ; +env MDS_AUTHZ; +env FRONTEND_ROOT; +env DOCUMENT_URL; + +events { + worker_connections 768; +} + +http { + ## + # Basic Settings + ## + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + port_in_redirect off; + server_tokens off; + + # For websockets + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + + map $proxy_protocol_addr $initialip { + "" $http_x_forwarded_for; + default $proxy_protocol_addr; + } + + map $initialip $realip { + "" $remote_addr; #if this header missing set remote_addr as real ip + default $initialip; + } + +# Log filtering for health checks +map $http_user_agent $loggable { + default 1; + "ELB-HealthChecker/2.0" 0; + ~^Uptime-Kuma 0; + ~^kube-probe 0; + ~GoogleStackdriverMonitoring 0; +} + + # server_names_hash_bucket_size 64; + # server_name_in_redirect off; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # ## + # # Note - nginscript js_set, etc get processed + # # on demand: https://www.nginx.com/blog/introduction-nginscript/ + # # # + js_import helpers.js; + js_set $userid helpers.userid; + + + perl_set $document_url_env 'sub { return $ENV{"DOCUMENT_URL"} || ""; }'; + + # see portal-conf + perl_set $maintenance_mode_env 'sub { return $ENV{"MAINTENANCE_MODE"} || "undefined"; }'; + + # Setup root path frontend service + perl_set $frontend_root_service 'sub { return $ENV{"FRONTEND_ROOT"} eq "gen3ff" ? "gen3ff" : "portal"; }'; + + + ## + # Logging Settings + ## + log_format json '{"gen3log": "nginx", ' + '"date_access": "$time_iso8601", ' + '"user_id": "$userid", ' + '"request_id": "$request_id", ' + '"session_id": "$session_id", ' + '"visitor_id": "$visitor_id", ' + '"network_client_ip": "$realip", ' + '"network_bytes_write": $body_bytes_sent, ' + '"response_secs": $request_time, ' + '"http_status_code": $status, ' + '"http_request": "$request_uri", ' + '"http_verb": "$request_method", ' + '"http_referer": "$http_referer", ' + '"http_useragent": "$http_user_agent", ' + '"http_upstream": "$upstream", ' + '"proxy_service": "$proxy_service", ' + '"message": "$request" }'; + + access_log /dev/stdout json if=$loggable; + + + ## + # Gzip Settings + ## + gzip on; + gzip_disable "msie6"; + gzip_proxied any; + gzip_types + text/css + text/javascript + text/xml + text/plain + application/javascript + application/x-javascript + application/json; + + # ## + # # Namespace + # ## + perl_set $namespace 'sub { return $ENV{"POD_NAMESPACE"}; }'; + + # ## + # # Fence Namespace + # ## + # # For using fence, indexd, etc from a different namespace within the same k8 cluster - + # # support data ecosystem feature ... + # ## + perl_set $des_domain 'sub { return $ENV{"DES_NAMESPACE"} ? qq{.$ENV{"DES_NAMESPACE"}.svc.cluster.local} : qq{.$ENV{"POD_NAMESPACE"}.svc.cluster.local}; }'; + + # ## + # # CORS Credential White List + # ## + perl_set $origins_allow_credentials 'sub { return $ENV{"ORIGINS_ALLOW_CREDENTIALS"}; }'; + js_set $credentials_allowed helpers.isCredentialsAllowed; + + # ## For multi-domain deployments + perl_set $csrf_cookie_domain 'sub { return $ENV{"COOKIE_DOMAIN"} ? qq{;domain=$ENV{"COOKIE_DOMAIN"}} : ""; }'; + + # # indexd password for admin endpoint + perl_set $indexd_b64 'sub { $_ = $ENV{"INDEXD_AUTHZ"}; chomp; return "$_"; }'; + # # metadata service password for admin endpoint + perl_set $mds_b64 'sub { $_ = $ENV{"MDS_AUTHZ"}; chomp; return "$_"; }'; + + + server { + listen 6567; + + root /var/www/metrics; + + location /aggregated_metrics { + types {} + default_type text/plain; + try_files $uri $uri/ /metrics.txt; + autoindex on; + access_log off; + } + } + + server { + listen 80; + + server_tokens off; + proxy_hide_header server; + proxy_hide_header X-Powered-By; + add_header "X-Frame-Options" "SAMEORIGIN" always; + add_header "X-Content-Type-Options" "nosniff" always; + add_header "X-Xss-Protection" "1; mode=block" always; + add_header "X-Robots-Tag" "noindex, nofollow" always; + + if ($http_x_forwarded_proto = "http") { return 301 https://$host$request_uri; } + # + # Strict-Transport-Security only applys for https traffic - set after testing protocol + # + add_header "Strict-Transport-Security" "max-age=63072000; includeSubdomains;" always; + + # + # From https://enable-cors.org/server_nginx.html + # This overrides the individual services + # + set $allow_origin "*"; + if ($http_origin = "https://$host") { + set $allow_origin "$http_origin"; + } + + proxy_hide_header Access-Control-Allow-Origin; # Remove existing header + add_header "Access-Control-Allow-Origin" "$allow_origin" always; + add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, DELETE, PUT" always; + add_header "Access-Control-Allow-Credentials" "$credentials_allowed" always; + add_header "Access-Control-Allow-Headers" "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization,Cookie,X-CSRF-Token" always; + add_header "Access-Control-Expose-Headers" "Content-Length,Content-Range" always; + + + + # update service release cookie + # add_header Set-Cookie "service_releases=${service_releases};Path=/;Max-Age=600;HttpOnly;Secure;SameSite=Lax" always; + + if ($request_method = 'OPTIONS') { + return 204; + } + + # + # DNS resolver required to resolve dynamic hostnames, btw - kubedns may not support ipv6 + # see https://www.nginx.com/blog/dns-service-discovery-nginx-plus/ + # https://distinctplace.com/2017/04/19/nginx-resolver-explained/ + # + resolver kube-dns.kube-system.svc.cluster.local ipv6=off; + + set $access_token ""; + set $csrf_check "ok-tokenauth"; + + # + # Note: add_header blocks are inheritted iff the current block does not call add_header: + # http://nginx.org/en/docs/http/ngx_http_headers_module.html + # + set $csrf_token "$request_id$request_length$request_time$time_iso8601"; + if ($cookie_csrftoken) { + set $csrf_token "$cookie_csrftoken"; + } + add_header Set-Cookie "csrftoken=$csrf_token$csrf_cookie_domain;Path=/;Secure;SameSite=Lax"; + + # visitor and session tracking for analytics - + # https://developers.google.com/analytics/devguides/collection/analyticsjs/cookies-user-id + # + # Simple session tracking - expire the session if not active for 20 minutes + set $session_id "$request_id"; + if ($cookie_session) { + set $session_id "$cookie_session"; + } + add_header Set-Cookie "session=$session_id;Path=/;Max-Age=1200;HttpOnly;Secure;SameSite=Lax"; + # Simple visitor tracking - immortal + set $visitor_id "$request_id"; + if ($cookie_visitor) { + set $visitor_id "$cookie_visitor"; + } + add_header Set-Cookie "visitor=$visitor_id;Path=/;Max-Age=36000000;HttpOnly;Secure;SameSite=Lax"; + + if ($cookie_access_token) { + set $access_token "bearer $cookie_access_token"; + # cookie auth requires csrf check + set $csrf_check "fail"; + } + if ($http_authorization) { + # Authorization header is present - prefer that token over cookie token + set $access_token "$http_authorization"; + } + + # + # initialize proxy_service and upstream used as key in logs to + # unspecified values - + # individual service locations should override to "peregrine", ... + # + set $proxy_service "noproxy"; + + # + # Note - need to repeat this line in location blocks that call proxy_set_header, + # as nginx proxy module inherits proxy_set_header if and only if current level does + # not set headers ... http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_set_header + # + proxy_set_header Authorization "$access_token"; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For "$realip"; + proxy_set_header X-UserId "$userid"; + # Can propagate this request id through downstream microservice requests for tracing + proxy_set_header X-ReqId "$request_id"; + proxy_set_header X-SessionId "$session_id"; + proxy_set_header X-VisitorId "$visitor_id"; + proxy_intercept_errors on; + + # + # Accomodate large jwt token headers + # * http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_buffer_size + # * https://ma.ttias.be/nginx-proxy-upstream-sent-big-header-reading-response-header-upstream/ + # + proxy_buffer_size 16k; + proxy_buffers 8 16k; + proxy_busy_buffers_size 32k; + client_body_buffer_size 16k; + proxy_read_timeout 400; + proxy_send_timeout 400; + proxy_connect_timeout 400; + + # + # also incoming from client: + # * https://fullvalence.com/2016/07/05/cookie-size-in-nginx/ + # * https://nginx.org/en/docs/http/ngx_http_core_module.html#client_header_buffer_size + large_client_header_buffers 4 64k; + client_header_buffer_size 4k; + + # + # CSRF check + # This block requires a csrftoken for all POST requests. + # + if ($cookie_csrftoken = $http_x_csrf_token) { + # this will fail further below if cookie_csrftoken is empty + set $csrf_check "ok-$cookie_csrftoken"; + } + if ($request_method != "POST") { + set $csrf_check "ok-$request_method"; + } + if ($cookie_access_token = "") { + # do this again here b/c empty cookie_csrftoken == empty http_x_csrf_token - ugh + set $csrf_check "ok-tokenauth"; + } + + error_page 500 501 502 503 504 @5xx; + + location @5xx { + internal; + return 500 "{ \"error\": \"service failure - try again later\"}"; + } + + location = /_status { + default_type application/json; + set $upstream http://localhost; + access_log off; + return 200 "{ \"message\": \"Feelin good!\", \"csrf\": \"$csrf_token\" }\n"; + } + + include /etc/nginx/gen3.conf/*.conf; + if ($document_url_env != "") { + include /etc/nginx/gen3.conf/documentation-site/*.conf; + } + + location @errorworkspace { + # if ($frontend_root_service = "gen3ff") { + # return 302 https://$host/portal/no-workspace-access; + # } + return 302 https://$host/no-workspace-access; + } + + location /canary { + add_header Content-Type text/html; + return 200 'You are running the Helm version of this commons'; + } + } +} diff --git a/helm/revproxy/templates/configMaps.yaml b/helm/revproxy/templates/configMaps.yaml index eb0d5655e..f0374c588 100644 --- a/helm/revproxy/templates/configMaps.yaml +++ b/helm/revproxy/templates/configMaps.yaml @@ -38,7 +38,14 @@ kind: ConfigMap metadata: name: revproxy-nginx-conf data: +{{- if .Values.enableRobotsTxt }} +{{- range $path, $bytes := .Files.Glob "nginxPrivate/*" }} + {{ ($a := split "/" $path)._1 }}: | + {{- $bytes | toString | nindent 4 }} +{{- end}} +{{- else }} {{- range $path, $bytes := .Files.Glob "nginx/*" }} {{ ($a := split "/" $path)._1 }}: | {{- $bytes | toString | nindent 4 }} {{- end}} +{{- end }} From caf53e76754ff0a998d842e3b218ec791298db72 Mon Sep 17 00:00:00 2001 From: Ed Date: Sun, 23 Nov 2025 10:57:14 -0600 Subject: [PATCH 3/8] Added robots.txt endpoint --- helm/revproxy/gen3.nginx.conf/robots-txt.conf | 6 ------ helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf | 4 ++++ helm/revproxy/templates/configMaps.yaml | 4 ++++ 3 files changed, 8 insertions(+), 6 deletions(-) delete mode 100644 helm/revproxy/gen3.nginx.conf/robots-txt.conf create mode 100644 helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf diff --git a/helm/revproxy/gen3.nginx.conf/robots-txt.conf b/helm/revproxy/gen3.nginx.conf/robots-txt.conf deleted file mode 100644 index d9d7ae3f2..000000000 --- a/helm/revproxy/gen3.nginx.conf/robots-txt.conf +++ /dev/null @@ -1,6 +0,0 @@ -{{- if .Values.enableRobotsTxt }} -location /robots.txt { - default_type text/plain; - return 200 "User-agent: *\nDisallow: /\n"; -} -{{- end }} \ No newline at end of file diff --git a/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf b/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf new file mode 100644 index 000000000..e878dd0d9 --- /dev/null +++ b/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf @@ -0,0 +1,4 @@ + location /robots.txt { + default_type text/plain; + return 200 "User-agent: *\nDisallow: /\n"; + } \ No newline at end of file diff --git a/helm/revproxy/templates/configMaps.yaml b/helm/revproxy/templates/configMaps.yaml index f0374c588..97d1fb6e1 100644 --- a/helm/revproxy/templates/configMaps.yaml +++ b/helm/revproxy/templates/configMaps.yaml @@ -18,6 +18,10 @@ data: {{ "portal-service.conf" }}: | {{- .Files.Get "gen3.nginx.conf/portal-as-root/portal-service.conf" | nindent 4}} {{- end }} +{{- if .Values.enableRobotsTxt -}} + {{ "robots-txt.conf" }}: | + {{- .Files.Get "gen3.nginx.conf/robots/robots-txt.conf" | nindent 4 }} +{{- end }} {{- range .Values.extraServices }} {{ printf "%s-service.conf" .name }}: | location {{ .path }}/ { From 5b18987a73dd65f11f587cea95c322eb1282a9e5 Mon Sep 17 00:00:00 2001 From: Ed Date: Sun, 23 Nov 2025 11:26:27 -0600 Subject: [PATCH 4/8] Added robots.txt endpoint --- helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf | 2 +- helm/revproxy/templates/configMaps.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf b/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf index e878dd0d9..9ea7ad24d 100644 --- a/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf +++ b/helm/revproxy/gen3.nginx.conf/robots/robots-txt.conf @@ -1,4 +1,4 @@ location /robots.txt { default_type text/plain; return 200 "User-agent: *\nDisallow: /\n"; - } \ No newline at end of file + } diff --git a/helm/revproxy/templates/configMaps.yaml b/helm/revproxy/templates/configMaps.yaml index 97d1fb6e1..f05e4e5ed 100644 --- a/helm/revproxy/templates/configMaps.yaml +++ b/helm/revproxy/templates/configMaps.yaml @@ -18,9 +18,9 @@ data: {{ "portal-service.conf" }}: | {{- .Files.Get "gen3.nginx.conf/portal-as-root/portal-service.conf" | nindent 4}} {{- end }} -{{- if .Values.enableRobotsTxt -}} +{{- if .Values.enableRobotsTxt }} {{ "robots-txt.conf" }}: | - {{- .Files.Get "gen3.nginx.conf/robots/robots-txt.conf" | nindent 4 }} + {{- .Files.Get "gen3.nginx.conf/robots/robots-txt.conf" | nindent 4}} {{- end }} {{- range .Values.extraServices }} {{ printf "%s-service.conf" .name }}: | From 367b682568cce95853ff0ab98e3087d09f7d5427 Mon Sep 17 00:00:00 2001 From: Ed Date: Wed, 3 Dec 2025 16:35:50 -0600 Subject: [PATCH 5/8] Allowed for setting authz for extra revproxy services --- helm/gen3/Chart.yaml | 2 +- helm/gen3/README.md | 2 +- helm/revproxy/templates/configMaps.yaml | 7 +++++++ helm/revproxy/values.yaml | 2 ++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/helm/gen3/Chart.yaml b/helm/gen3/Chart.yaml index bf349b63b..cb30290ea 100644 --- a/helm/gen3/Chart.yaml +++ b/helm/gen3/Chart.yaml @@ -173,7 +173,7 @@ type: application # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.97 +version: 0.2.98 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm/gen3/README.md b/helm/gen3/README.md index 7b9a88842..adf8ee7ea 100644 --- a/helm/gen3/README.md +++ b/helm/gen3/README.md @@ -1,6 +1,6 @@ # gen3 -![Version: 0.2.97](https://img.shields.io/badge/Version-0.2.97-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) +![Version: 0.2.98](https://img.shields.io/badge/Version-0.2.98-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) Helm chart to deploy Gen3 Data Commons diff --git a/helm/revproxy/templates/configMaps.yaml b/helm/revproxy/templates/configMaps.yaml index f05e4e5ed..e185295e7 100644 --- a/helm/revproxy/templates/configMaps.yaml +++ b/helm/revproxy/templates/configMaps.yaml @@ -28,6 +28,13 @@ data: if ($csrf_check !~ ^ok-\S.+$) { return 403 "failed csrf check"; } + {{- if and .authzPolicy .authzService -}} + set $authz_resource "/{{ .authzPolicy }}"; + set $authz_method "access"; + set $authz_service "{{ .authzService }}"; + # be careful - sub-request runs in same context as this request + auth_request /gen3-authz; + {{- end }} set $proxy_service "{{ .name }}"; set $upstream http://{{ .serviceName }}$des_domain; diff --git a/helm/revproxy/values.yaml b/helm/revproxy/values.yaml index cddd618a4..38f9a75ea 100644 --- a/helm/revproxy/values.yaml +++ b/helm/revproxy/values.yaml @@ -254,6 +254,8 @@ extraServices: # - name: "protein-paint" # path: /protein-paint # serviceName: protein-paint +# authzPolicy: "protein-paint" +# authzService: "protein-paint" # -- (bool) Whether to enable robots.txt generation and serving. enableRobotsTxt: false From 004c3d0aa8ea46758067f54f085ae68397592735 Mon Sep 17 00:00:00 2001 From: Ed Date: Mon, 22 Dec 2025 02:48:19 -0600 Subject: [PATCH 6/8] Made csrf check optional for extra services --- helm/revproxy/templates/configMaps.yaml | 2 ++ helm/revproxy/values.yaml | 1 + 2 files changed, 3 insertions(+) diff --git a/helm/revproxy/templates/configMaps.yaml b/helm/revproxy/templates/configMaps.yaml index e185295e7..3d3703f21 100644 --- a/helm/revproxy/templates/configMaps.yaml +++ b/helm/revproxy/templates/configMaps.yaml @@ -25,9 +25,11 @@ data: {{- range .Values.extraServices }} {{ printf "%s-service.conf" .name }}: | location {{ .path }}/ { + {{- if .csrfCheck -}} if ($csrf_check !~ ^ok-\S.+$) { return 403 "failed csrf check"; } + {{- end }} {{- if and .authzPolicy .authzService -}} set $authz_resource "/{{ .authzPolicy }}"; set $authz_method "access"; diff --git a/helm/revproxy/values.yaml b/helm/revproxy/values.yaml index 38f9a75ea..04f6001c4 100644 --- a/helm/revproxy/values.yaml +++ b/helm/revproxy/values.yaml @@ -256,6 +256,7 @@ extraServices: # serviceName: protein-paint # authzPolicy: "protein-paint" # authzService: "protein-paint" +# csrfCheck: true # -- (bool) Whether to enable robots.txt generation and serving. enableRobotsTxt: false From 9010bbbf65f8aeab25bb4786aaca0eddaf38f149 Mon Sep 17 00:00:00 2001 From: Ed Date: Mon, 5 Jan 2026 15:13:46 -0600 Subject: [PATCH 7/8] Made csrf check optional for extra services --- helm/gen3/Chart.yaml | 4 ++-- helm/gen3/README.md | 2 +- helm/revproxy/Chart.yaml | 2 +- helm/revproxy/README.md | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/helm/gen3/Chart.yaml b/helm/gen3/Chart.yaml index 079015239..01350ba71 100644 --- a/helm/gen3/Chart.yaml +++ b/helm/gen3/Chart.yaml @@ -104,7 +104,7 @@ dependencies: repository: "file://../requestor" condition: requestor.enabled - name: revproxy - version: 0.1.49 + version: 0.1.50 repository: "file://../revproxy" condition: revproxy.enabled - name: sheepdog @@ -177,7 +177,7 @@ type: application # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.115 +version: 0.2.116 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm/gen3/README.md b/helm/gen3/README.md index ad1f505a8..42c007b34 100644 --- a/helm/gen3/README.md +++ b/helm/gen3/README.md @@ -49,7 +49,7 @@ Helm chart to deploy Gen3 Data Commons | file://../peregrine | peregrine | 0.1.36 | | file://../portal | portal | 0.1.50 | | file://../requestor | requestor | 0.1.28 | -| file://../revproxy | revproxy | 0.1.49 | +| file://../revproxy | revproxy | 0.1.50 | | file://../sheepdog | sheepdog | 0.1.36 | | file://../sower | sower | 0.1.39 | | file://../ssjdispatcher | ssjdispatcher | 0.1.38 | diff --git a/helm/revproxy/Chart.yaml b/helm/revproxy/Chart.yaml index 358d44cbd..d5b1a3597 100644 --- a/helm/revproxy/Chart.yaml +++ b/helm/revproxy/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.49 +version: 0.1.50 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm/revproxy/README.md b/helm/revproxy/README.md index d90c5e3d9..4e94c938b 100644 --- a/helm/revproxy/README.md +++ b/helm/revproxy/README.md @@ -1,6 +1,6 @@ # revproxy -![Version: 0.1.49](https://img.shields.io/badge/Version-0.1.49-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) +![Version: 0.1.50](https://img.shields.io/badge/Version-0.1.50-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: master](https://img.shields.io/badge/AppVersion-master-informational?style=flat-square) A Helm chart for gen3 revproxy From bfef9b83da7f9e3dc57f9445494229fe010ba43d Mon Sep 17 00:00:00 2001 From: Ed Date: Mon, 5 Jan 2026 15:16:50 -0600 Subject: [PATCH 8/8] Made csrf check optional for extra services --- helm/revproxy/Chart.yaml | 2 +- helm/revproxy/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/revproxy/Chart.yaml b/helm/revproxy/Chart.yaml index d5b1a3597..51cb1a41f 100644 --- a/helm/revproxy/Chart.yaml +++ b/helm/revproxy/Chart.yaml @@ -25,5 +25,5 @@ appVersion: "master" dependencies: - name: common - version: 0.1.28 + version: 0.1.29 repository: file://../common diff --git a/helm/revproxy/README.md b/helm/revproxy/README.md index 4e94c938b..e5f503049 100644 --- a/helm/revproxy/README.md +++ b/helm/revproxy/README.md @@ -8,7 +8,7 @@ A Helm chart for gen3 revproxy | Repository | Name | Version | |------------|------|---------| -| file://../common | common | 0.1.28 | +| file://../common | common | 0.1.29 | ## Values