diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 4aea442bc3ce1..bd461a8712378 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -500,8 +500,22 @@ class SparkContext(config: SparkConf) extends Logging { _ui = if (conf.get(UI_ENABLED)) { - Some(SparkUI.create(Some(this), _statusStore, _conf, _env.securityManager, appName, "", - startTime)) + // Prefer spark.ui.proxyBasePath; fall back to spark.ui.proxyBase for backward + // compatibility with infrastructure that passes --conf spark.ui.proxyBase=... directly. + val rawBasePath = _conf.get(UI_PROXY_BASE_PATH) + .orElse(_conf.getOption("spark.ui.proxyBase")) + .getOrElse("") + val basePath = if (rawBasePath.nonEmpty) { + val normalized = "/" + rawBasePath.stripPrefix("/").stripSuffix("/") + logInfo(s"Spark UI proxyBasePath configured: " + + s"raw='$rawBasePath', normalized='$normalized'") + System.setProperty("spark.ui.proxyBase", normalized) + normalized + } else { + "" + } + Some(SparkUI.create(Some(this), _statusStore, _conf, _env.securityManager, appName, + basePath, startTime)) } else { // For tests, do not enable the UI None @@ -617,7 +631,12 @@ class SparkContext(config: SparkConf) extends Logging { _conf.set(ShuffleDataIOUtils.SHUFFLE_SPARK_CONF_PREFIX + k, v) } - if (_conf.get(UI_REVERSE_PROXY)) { + if (_conf.get(UI_REVERSE_PROXY) && _conf.get(UI_PROXY_BASE_PATH).isEmpty && + _conf.getOption("spark.ui.proxyBase").isEmpty) { + // Only apply YARN-style reverse proxy URL when no explicit proxyBasePath is configured. + // If spark.ui.proxyBasePath (or spark.ui.proxyBase) is set, SparkUI.create already + // mounted handlers at the correct prefixed paths and set spark.ui.proxyBase -- + // clobbering it here would break link generation in all generated HTML. val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL).getOrElse("").stripSuffix("/") System.setProperty("spark.ui.proxyBase", proxyUrl + "/proxy/" + _applicationId) } diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala index d0db5a9085481..d7fbe8eafca9a 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala @@ -214,6 +214,14 @@ private[spark] object UI { .stringConf .createOptional + val UI_PROXY_BASE_PATH = ConfigBuilder("spark.ui.proxyBasePath") + .doc("Base path prefix for the Spark UI. Used when Spark is served behind a reverse proxy " + + "at a non-root path. All UI servlet handlers will be mounted under this prefix, and " + + "spark.ui.proxyBase system property will be set so UIUtils.uiRoot() generates correct links.") + .version("3.5.0") + .stringConf + .createOptional + val CUSTOM_EXECUTOR_LOG_URL = ConfigBuilder("spark.ui.custom.executor.log.url") .doc("Specifies custom spark executor log url for supporting external log service instead of " + "using cluster managers' application log urls in the Spark UI. Spark will support " + diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala index cc21c1488f67c..656fc2c0f9384 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala @@ -28,7 +28,7 @@ import org.glassfish.jersey.server.ServerProperties import org.glassfish.jersey.servlet.ServletContainer import org.apache.spark.SecurityManager -import org.apache.spark.ui.{SparkUI, UIUtils} +import org.apache.spark.ui.{JettyUtils, SparkUI, UIUtils} /** * Main entry point for serving spark application metrics as json, using JAX-RS. @@ -57,9 +57,17 @@ private[v1] class ApiRootResource extends ApiRequestContext { private[spark] object ApiRootResource { - def getServletHandler(uiRoot: UIRoot): ServletContextHandler = { + def getServletHandler(uiRoot: UIRoot, basePath: String = ""): ServletContextHandler = { val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS) - jerseyContext.setContextPath("/api") + val contextPath = if (basePath.nonEmpty) { + (basePath + "/api").stripSuffix("/") + } else { + "/api" + } + jerseyContext.setContextPath(contextPath) + if (basePath.nonEmpty) { + jerseyContext.setAttribute(JettyUtils.PROXY_BASE_PATH_ATTRIBUTE, basePath) + } val holder: ServletHolder = new ServletHolder(classOf[ServletContainer]) holder.setInitParameter(ServerProperties.PROVIDER_PACKAGES, "org.apache.spark.status.api.v1") UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index 9582bdbf52641..3db4d18247f52 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -17,7 +17,7 @@ package org.apache.spark.ui -import java.net.{URI, URL, URLDecoder} +import java.net.{URI, URLDecoder} import java.util.EnumSet import javax.servlet.DispatcherType import javax.servlet.http._ @@ -104,6 +104,8 @@ private[spark] object JettyUtils extends Logging { createServletHandler(path, createServlet(servletParams, conf), basePath) } + val PROXY_BASE_PATH_ATTRIBUTE = "spark.ui.proxyBasePath" + /** Create a context handler that responds to a request with the given path prefix */ def createServletHandler( path: String, @@ -117,6 +119,9 @@ private[spark] object JettyUtils extends Logging { val contextHandler = new ServletContextHandler val holder = new ServletHolder(servlet) contextHandler.setContextPath(prefixedPath) + if (basePath.nonEmpty) { + contextHandler.setAttribute(PROXY_BASE_PATH_ATTRIBUTE, basePath) + } contextHandler.addServlet(holder, "/") contextHandler } @@ -128,7 +133,6 @@ private[spark] object JettyUtils extends Logging { beforeRedirect: HttpServletRequest => Unit = x => (), basePath: String = "", httpMethods: Set[String] = Set("GET")): ServletContextHandler = { - val prefixedDestPath = basePath + destPath val servlet = new HttpServlet { override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = { if (httpMethods.contains("GET")) { @@ -146,9 +150,29 @@ private[spark] object JettyUtils extends Logging { } private def doRequest(request: HttpServletRequest, response: HttpServletResponse): Unit = { beforeRedirect(request) - // Make sure we don't end up with "//" in the middle - val newUrl = new URL(new URL(request.getRequestURL.toString), prefixedDestPath).toString - response.sendRedirect(newUrl) + // Derive the basePath dynamically from the matched context path at request time. + // request.getContextPath() is set by Jetty to the handler's mounted context path, + // which already includes the basePath (e.g. "/sparkrb/.../sparkui"). + // Stripping srcPath from it gives us the effective basePath, regardless of whether + // spark.ui.proxyBasePath was configured. + val contextPath = request.getContextPath + val effectiveBasePath = if (srcPath == "/") { + contextPath + } else { + contextPath.stripSuffix(srcPath) + } + val prefixedDestPath = (effectiveBasePath + destPath).replaceAll("//+", "/") + // Set Location header directly instead of calling response.sendRedirect(). + // sendRedirect() in the Servlet spec always converts the path to an absolute URL using + // request.getServerName()/getServerPort() — the internal Spark address, not the external + // proxy URL. This causes the browser to receive e.g. http://10.x.x.x:4040/myapp/jobs/ + // which it cannot reach. By setting the header ourselves we send the raw path and let + // the browser resolve it against the external proxy origin it actually used. + // RFC 7231 allows relative references in the Location header and all modern browsers + // handle them correctly. + response.setStatus(HttpServletResponse.SC_FOUND) + response.setHeader("Location", prefixedDestPath) + logDebug(s"Redirect: ${request.getRequestURI} -> $prefixedDestPath") } // SPARK-5983 ensure TRACE is not supported protected override def doTrace(req: HttpServletRequest, res: HttpServletResponse): Unit = { @@ -159,7 +183,10 @@ private[spark] object JettyUtils extends Logging { } /** Create a handler for serving files from a static directory */ - def createStaticHandler(resourceBase: String, path: String): ServletContextHandler = { + def createStaticHandler( + resourceBase: String, + path: String, + basePath: String = ""): ServletContextHandler = { val contextHandler = new ServletContextHandler contextHandler.setInitParameter("org.eclipse.jetty.servlet.Default.gzip", "false") val staticHandler = new DefaultServlet @@ -170,7 +197,15 @@ private[spark] object JettyUtils extends Logging { case None => throw new Exception("Could not find resource path for Web UI: " + resourceBase) } - contextHandler.setContextPath(path) + val prefixedPath = if (basePath.nonEmpty) { + (basePath + path).stripSuffix("/") + } else { + path + } + contextHandler.setContextPath(prefixedPath) + if (basePath.nonEmpty) { + contextHandler.setAttribute(PROXY_BASE_PATH_ATTRIBUTE, basePath) + } contextHandler.addServlet(holder, "/") contextHandler } @@ -570,7 +605,7 @@ private[spark] case class ServerInfo( * a servlet context without the trailing slash (e.g. "/jobs") - Jetty will send a redirect to the * same URL, but with a trailing slash. */ -private class ProxyRedirectHandler(_proxyUri: String) extends HandlerWrapper { +private class ProxyRedirectHandler(_proxyUri: String) extends HandlerWrapper with Logging { private val proxyUri = _proxyUri.stripSuffix("/") @@ -590,14 +625,36 @@ private class ProxyRedirectHandler(_proxyUri: String) extends HandlerWrapper { override def sendRedirect(location: String): Unit = { val newTarget = if (location != null) { val target = new URI(location) - // The target path should already be encoded, so don't re-encode it, just the - // proxy address part. - val proxyBase = UIUtils.uiRoot(req) - val proxyPrefix = if (proxyBase.nonEmpty) s"$proxyUri$proxyBase" else proxyUri - s"${res.encodeURL(proxyPrefix)}${target.getPath()}" + val targetPath = target.getPath() + // UIUtils.uiRoot is the authoritative source for the configured base path: + // it checks spark.ui.proxyBase sys prop, APPLICATION_WEB_PROXY_BASE env var, + // X-Forwarded-Context header, and servlet context attribute -- in that order. + // req.getContextPath is only set after a context is matched, so it can be empty + // for Jetty's own pre-dispatch trailing-slash redirects. Use uiRoot first. + val uiRootBase = UIUtils.uiRoot(req) + val contextBasePath = if (uiRootBase.nonEmpty) { + uiRootBase + } else { + Option(req.getContextPath).filter(_.nonEmpty).getOrElse("") + } + val result = if (contextBasePath.nonEmpty && targetPath.startsWith(contextBasePath)) { + // Redirect target already contains the basePath -- just prepend the proxy host + s"${res.encodeURL(proxyUri)}$targetPath" + } else { + // Jetty generated a short redirect (e.g. /jobs/) missing the basePath -- + // prepend proxyUri + basePath + val proxyPrefix = if (contextBasePath.nonEmpty) { + s"$proxyUri$contextBasePath" + } else { + proxyUri + } + s"${res.encodeURL(proxyPrefix)}$targetPath" + } + result } else { null } + logDebug(s"ProxyRedirect: '$location' -> '$newTarget'") super.sendRedirect(newTarget) } } diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index 685407c11208f..fcc69739d4440 100644 --- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -94,6 +94,9 @@ private[spark] class SparkUI private ( /** Initialize all components of the server. */ def initialize(): Unit = { + if (basePath.nonEmpty) { + logInfo(s"Initializing SparkUI with basePath='$basePath'") + } val jobsTab = new JobsTab(this, store) attachTab(jobsTab) val stagesTab = new StagesTab(this, store) @@ -103,17 +106,18 @@ private[spark] class SparkUI private ( attachTab(new ExecutorsTab(this)) addStaticHandler(SparkUI.STATIC_RESOURCE_DIR) attachHandler(createRedirectHandler("/", "/jobs/", basePath = basePath)) - attachHandler(ApiRootResource.getServletHandler(this)) + attachHandler(ApiRootResource.getServletHandler(this, basePath)) if (sc.map(_.conf.get(UI_PROMETHEUS_ENABLED)).getOrElse(false)) { attachHandler(PrometheusResource.getServletHandler(this)) } // These should be POST only, but, the YARN AM proxy won't proxy POSTs attachHandler(createRedirectHandler( - "/jobs/job/kill", "/jobs/", jobsTab.handleKillRequest, httpMethods = Set("GET", "POST"))) + "/jobs/job/kill", "/jobs/", jobsTab.handleKillRequest, + basePath = basePath, httpMethods = Set("GET", "POST"))) attachHandler(createRedirectHandler( "/stages/stage/kill", "/stages/", stagesTab.handleKillRequest, - httpMethods = Set("GET", "POST"))) + basePath = basePath, httpMethods = Set("GET", "POST"))) } initialize() diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 286c0a1625150..6bd4543568bd9 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -209,18 +209,30 @@ private[spark] object UIUtils extends Logging { def uiRoot(request: HttpServletRequest): String = { // Knox uses X-Forwarded-Context to notify the application the base path val knoxBasePath = Option(request.getHeader("X-Forwarded-Context")) + // Check servlet context attribute set by JettyUtils when basePath is configured + val servletContextBasePath = Option(request.getServletContext) + .flatMap(ctx => Option(ctx.getAttribute(JettyUtils.PROXY_BASE_PATH_ATTRIBUTE))) + .map(_.toString) // SPARK-11484 - Use the proxyBase set by the AM, if not found then use env. - sys.props.get("spark.ui.proxyBase") + val root = sys.props.get("spark.ui.proxyBase") .orElse(sys.env.get("APPLICATION_WEB_PROXY_BASE")) .orElse(knoxBasePath) + .orElse(servletContextBasePath) .getOrElse("") + root } def prependBaseUri( request: HttpServletRequest, basePath: String = "", resource: String = ""): String = { - uiRoot(request) + basePath + resource + val root = uiRoot(request) + val result = if (root.nonEmpty && basePath.startsWith(root)) { + basePath + resource + } else { + root + basePath + resource + } + result } def commonHeaderNodes(request: HttpServletRequest): Seq[Node] = { @@ -287,10 +299,17 @@ private[spark] object UIUtils extends Logging { } val helpButton: Seq[Node] = helpText.map(tooltip(_, "top")).getOrElse(Seq.empty) + val root = uiRoot(request) + val jsAppBasePath = if (root.nonEmpty && activeTab.basePath.startsWith(root)) { + "" + } else { + activeTab.basePath + } + {commonHeaderNodes(request)} - + {if (showVisualization) vizHeaderNodes(request) else Seq.empty} {if (useDataTables) dataTablesHeaderNodes(request) else Seq.empty} + assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND) + val location = Option(conn.getHeaderFields().get("Location")) + .map(_.get(0)).orNull + assert(location === s"$basePath/jobs/", + s"Root redirect should use basePath prefix, got: $location") + } + } finally { + stopServer(serverInfo) + } + } + + test("redirect handler without basePath produces bare Location header") { + val (conf, securityMgr, sslOptions) = sslDisabledConf() + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + val redirect = JettyUtils.createRedirectHandler("/", "/jobs/") + serverInfo.addHandler(redirect, securityMgr) + + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + + TestUtils.withHttpConnection(new URL(s"$serverAddr/")) { conn => + assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND) + val location = Option(conn.getHeaderFields().get("Location")) + .map(_.get(0)).orNull + assert(location === "/jobs/", + s"Without basePath, Location should be bare /jobs/, got: $location") + } + } finally { + stopServer(serverInfo) + } + } + + test("servlet handler with basePath is accessible at prefixed path") { + val basePath = "/myproxy/sparkui" + val (conf, securityMgr, sslOptions) = sslDisabledConf() + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + val (servlet, ctx) = newContext(s"$basePath/test") + serverInfo.addHandler(ctx, securityMgr) + + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + + // Accessible at prefixed path + assert(TestUtils.httpResponseCode( + new URL(s"$serverAddr$basePath/test/root")) === HttpServletResponse.SC_OK) + + // Not accessible at bare path + assert(TestUtils.httpResponseCode( + new URL(s"$serverAddr/test/root")) === HttpServletResponse.SC_NOT_FOUND) + } finally { + stopServer(serverInfo) + } + } + + test("createServletHandler sets PROXY_BASE_PATH_ATTRIBUTE when basePath is non-empty") { + val basePath = "/proxy/app123" + val servlet = new HttpServlet { + override def doGet(req: HttpServletRequest, res: HttpServletResponse): Unit = { + val attr = req.getServletContext.getAttribute(JettyUtils.PROXY_BASE_PATH_ATTRIBUTE) + res.setContentType("text/plain") + res.getWriter.write(String.valueOf(attr)) + } + } + val handler = JettyUtils.createServletHandler("/test", servlet, basePath) + + val (conf, securityMgr, sslOptions) = sslDisabledConf() + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + serverInfo.addHandler(handler, securityMgr) + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + val body = TestUtils.httpResponseMessage( + new URL(s"$serverAddr$basePath/test/")) + assert(body === basePath, + s"PROXY_BASE_PATH_ATTRIBUTE should be '$basePath', got: $body") + } finally { + stopServer(serverInfo) + } + } + + test("redirect handler with basePath + ProxyRedirectHandler rewrites Location correctly") { + val basePath = "/myproxy/sparkui" + val proxyRoot = "https://external.example.com" + val (conf, securityMgr, sslOptions) = sslDisabledConf() + conf.set(UI.PROXY_REDIRECT_URI, proxyRoot) + + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + val redirect = JettyUtils.createRedirectHandler("/", "/jobs/", basePath = basePath) + serverInfo.addHandler(redirect, securityMgr) + + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + + // spark.ui.proxyBase must be set for ProxyRedirectHandler to discover the basePath + val oldProp = sys.props.get("spark.ui.proxyBase") + try { + System.setProperty("spark.ui.proxyBase", basePath) + + TestUtils.withHttpConnection(new URL(s"$serverAddr$basePath/")) { conn => + assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND) + val location = Option(conn.getHeaderFields().get("Location")) + .map(_.get(0)).orNull + assert(location === s"$proxyRoot$basePath/jobs/", + s"ProxyRedirectHandler should prepend proxyUri+basePath, got: $location") + } + } finally { + oldProp match { + case Some(v) => System.setProperty("spark.ui.proxyBase", v) + case None => System.clearProperty("spark.ui.proxyBase") + } + } + } finally { + stopServer(serverInfo) + } + } + + test("static handler with basePath is accessible at prefixed path") { + val basePath = "/myproxy/sparkui" + val (conf, securityMgr, sslOptions) = sslDisabledConf() + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + val handler = JettyUtils.createStaticHandler( + SparkUI.STATIC_RESOURCE_DIR, "/static", basePath) + serverInfo.addHandler(handler, securityMgr) + + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + + // bootstrap.min.css exists in static resources + assert(TestUtils.httpResponseCode( + new URL(s"$serverAddr$basePath/static/bootstrap.min.css")) === HttpServletResponse.SC_OK) + + // Should not be accessible at bare /static + assert(TestUtils.httpResponseCode( + new URL(s"$serverAddr/static/bootstrap.min.css")) === HttpServletResponse.SC_NOT_FOUND) + } finally { + stopServer(serverInfo) + } + } + + test("multiple redirect handlers with basePath each resolve their own effectiveBasePath") { + val basePath = "/myproxy/sparkui" + val (conf, securityMgr, sslOptions) = sslDisabledConf() + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + val rootRedirect = JettyUtils.createRedirectHandler("/", "/jobs/", basePath = basePath) + val killRedirect = JettyUtils.createRedirectHandler( + "/jobs/job/kill", "/jobs/", basePath = basePath, httpMethods = Set("GET", "POST")) + serverInfo.addHandler(rootRedirect, securityMgr) + serverInfo.addHandler(killRedirect, securityMgr) + + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + + // Root redirect + TestUtils.withHttpConnection(new URL(s"$serverAddr$basePath/")) { conn => + assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND) + val location = Option(conn.getHeaderFields().get("Location")) + .map(_.get(0)).orNull + assert(location === s"$basePath/jobs/") + } + + // Kill redirect + TestUtils.withHttpConnection( + new URL(s"$serverAddr$basePath/jobs/job/kill/")) { conn => + assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND) + val location = Option(conn.getHeaderFields().get("Location")) + .map(_.get(0)).orNull + assert(location === s"$basePath/jobs/") + } + } finally { + stopServer(serverInfo) + } + } + + test("redirect handler at bare path returns 404 when basePath is configured") { + val basePath = "/myproxy/sparkui" + val (conf, securityMgr, sslOptions) = sslDisabledConf() + val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf) + try { + val redirect = JettyUtils.createRedirectHandler("/src", "/dst", basePath = basePath) + serverInfo.addHandler(redirect, securityMgr) + + val serverAddr = s"http://$localhost:${serverInfo.boundPort}" + + // Prefixed path works + TestUtils.withHttpConnection(new URL(s"$serverAddr$basePath/src/")) { conn => + assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND) + } + + // Bare path should 404 + assert(TestUtils.httpResponseCode( + new URL(s"$serverAddr/src/")) === HttpServletResponse.SC_NOT_FOUND) + } finally { + stopServer(serverInfo) + } + } + /** * Create a new context handler for the given path, with a single servlet that responds to * requests in `$path/root`.