From d646fe002e11980b5989650400ee1baa40a6d4d8 Mon Sep 17 00:00:00 2001 From: Arnav Sharma <2006arnavsharma@gmail.com> Date: Mon, 1 Dec 2025 14:45:27 +0000 Subject: [PATCH 1/3] fix(parser): support non-standard SCM connection strings (fixes #1645) --- .../data/cleanup/GithubRepoExtractor.scala | 2 +- .../scaladex/data/cleanup/ScmInfoParser.scala | 3 ++- .../data/cleanup/ScmInfoParserTests.scala | 22 +++++++++++-------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/modules/data/src/main/scala/scaladex/data/cleanup/GithubRepoExtractor.scala b/modules/data/src/main/scala/scaladex/data/cleanup/GithubRepoExtractor.scala index fcb745bfc..9e776e435 100644 --- a/modules/data/src/main/scala/scaladex/data/cleanup/GithubRepoExtractor.scala +++ b/modules/data/src/main/scala/scaladex/data/cleanup/GithubRepoExtractor.scala @@ -56,7 +56,7 @@ class GithubRepoExtractor(paths: DataPaths): val fromPoms = pom.scm match case Some(scm) => List(scm.connection, scm.developerConnection, scm.url).flatten - .flatMap(ScmInfoParser.parse) + .flatMap(ScmInfoParser.parseRawConnection) .filter(g => !g.organization.isEmpty() && !g.repository.isEmpty()) case None => List() diff --git a/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala b/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala index 0c753c5b1..b4cb67a00 100644 --- a/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala +++ b/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala @@ -20,13 +20,14 @@ object ScmInfoParser extends Parsers: else v private def ScmUrl[A: P] = P( + // "git:" is optional to handle "scm:git@github.com:..." format "scm:".? ~ "git:".? ~ ("git@" | "https://" | "git://" | ("ssh://" ~ "git@".?) | "//") ~ "github.com" ~ (":" | "/") ~ Segment .rep(1) .! ~ "/" ~ Segment.rep(1).!.map(removeDotGit) ) - def parse(scmInfo: String): Option[Project.Reference] = + def parseRawConnection(scmInfo: String): Option[Project.Reference] = fastparse.parse(scmInfo, x => ScmUrl(x)) match case Parsed.Success((organization, repo), _) => Some(Project.Reference.from(organization, repo)) diff --git a/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala b/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala index 62d3b13ef..53b03381b 100644 --- a/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala +++ b/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala @@ -9,36 +9,40 @@ class ScmInfoParserTests extends AnyFunSpec with Matchers: it("correctly parse valid SCM strings") { // Implicit protocol ScmInfoParser - .parse("scm:git:git@github.com:foobarbuz/example.git") + .parseRawConnection("scm:git:git@github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") // HTTPS ScmInfoParser - .parse("scm:https://github.com/foobarbuz/example.git") + .parseRawConnection("scm:https://github.com/foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") ScmInfoParser - .parse("scm:https://github.com/foobarbuz/example") + .parseRawConnection("scm:https://github.com/foobarbuz/example") .map(_.toString) shouldBe Some("foobarbuz/example") // Git ScmInfoParser - .parse("scm:git:git://github.com:foobarbuz/example.git") + .parseRawConnection("scm:git:git://github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") ScmInfoParser - .parse("scm:git://github.com:foobarbuz/example.git") + .parseRawConnection("scm:git://github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") // SSH ScmInfoParser - .parse("scm:git:ssh://git@github.com:foobarbuz/example.git") + .parseRawConnection("scm:git:ssh://git@github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") ScmInfoParser - .parse("scm:git:ssh://github.com:foobarbuz/example.git") + .parseRawConnection("scm:git:ssh://github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") // Unknown protocol ScmInfoParser - .parse("scm:git:unknown://git@github.com:foobarbuz/example.git") + .parseRawConnection("scm:git:unknown://git@github.com:foobarbuz/example.git") .map(_.toString) shouldBe None ScmInfoParser - .parse("scm:git:unknown://github.com:foobarbuz/example.git") + .parseRawConnection("scm:git:unknown://github.com:foobarbuz/example.git") .map(_.toString) shouldBe None + + ScmInfoParser + .parseRawConnection("scm:git@github.com:mghmay/play-json-shaper.git") + .map(_.toString) shouldBe Some("mghmay/play-json-shaper") } } end ScmInfoParserTests From dfc6be390a37cfbd7ab6326c02dafb9f8d602e49 Mon Sep 17 00:00:00 2001 From: Arnav Sharma <2006arnavsharma@gmail.com> Date: Tue, 2 Dec 2025 23:46:14 +0530 Subject: [PATCH 2/3] Remove commented RFC reference and update SCM URL parsing --- .../src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala b/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala index b4cb67a00..f9e555b44 100644 --- a/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala +++ b/modules/data/src/main/scala/scaladex/data/cleanup/ScmInfoParser.scala @@ -9,7 +9,6 @@ import fastparse.* object ScmInfoParser extends Parsers: import fastparse.NoWhitespace.* - // More info in Rfc3986 private def Unreserved[A: P] = P(Alpha | Digit | "-".! | ".".! | "_".! | "~".!).! private def Segment[A: P] = P(Unreserved | SubDelims | ":" | "@").! @@ -20,7 +19,6 @@ object ScmInfoParser extends Parsers: else v private def ScmUrl[A: P] = P( - // "git:" is optional to handle "scm:git@github.com:..." format "scm:".? ~ "git:".? ~ ("git@" | "https://" | "git://" | ("ssh://" ~ "git@".?) | "//") ~ "github.com" ~ (":" | "/") ~ Segment .rep(1) From b267622aa30e161bf4f156057ef4d42f266d1543 Mon Sep 17 00:00:00 2001 From: Arnav Sharma <2006arnavsharma@gmail.com> Date: Tue, 2 Dec 2025 23:46:50 +0530 Subject: [PATCH 3/3] Clean up comments in ScmInfoParserTests Removed comments for different SCM protocols in tests. --- .../scala/scaladex/data/cleanup/ScmInfoParserTests.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala b/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala index 53b03381b..8ce6ce075 100644 --- a/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala +++ b/modules/data/src/test/scala/scaladex/data/cleanup/ScmInfoParserTests.scala @@ -7,39 +7,33 @@ import org.scalatest.matchers.should.Matchers class ScmInfoParserTests extends AnyFunSpec with Matchers: describe("ScmInfoParse") { it("correctly parse valid SCM strings") { - // Implicit protocol ScmInfoParser .parseRawConnection("scm:git:git@github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") - // HTTPS ScmInfoParser .parseRawConnection("scm:https://github.com/foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") ScmInfoParser .parseRawConnection("scm:https://github.com/foobarbuz/example") .map(_.toString) shouldBe Some("foobarbuz/example") - // Git ScmInfoParser .parseRawConnection("scm:git:git://github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") ScmInfoParser .parseRawConnection("scm:git://github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") - // SSH ScmInfoParser .parseRawConnection("scm:git:ssh://git@github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") ScmInfoParser .parseRawConnection("scm:git:ssh://github.com:foobarbuz/example.git") .map(_.toString) shouldBe Some("foobarbuz/example") - // Unknown protocol ScmInfoParser .parseRawConnection("scm:git:unknown://git@github.com:foobarbuz/example.git") .map(_.toString) shouldBe None ScmInfoParser .parseRawConnection("scm:git:unknown://github.com:foobarbuz/example.git") .map(_.toString) shouldBe None - ScmInfoParser .parseRawConnection("scm:git@github.com:mghmay/play-json-shaper.git") .map(_.toString) shouldBe Some("mghmay/play-json-shaper")