Skip to content

Commit

Permalink
Fix Scaladex API integration
Browse files Browse the repository at this point in the history
  • Loading branch information
WojciechMazur committed Aug 14, 2024
1 parent 108c013 commit 01fd851
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 144 deletions.
130 changes: 59 additions & 71 deletions coordinator/src/main/scala/Scaladex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,72 @@ import java.util.concurrent.TimeUnit.SECONDS
import scala.concurrent.*
import scala.concurrent.duration.*
import java.io.IOException
import java.time.Instant
import java.time.LocalDate

object Scaladex {
case class Pagination(current: Int, pageCount: Int, totalSize: Int)
// releaseDate is always UTC zoned
case class ArtifactMetadata(
final val ScaladexUrl = "https://index.scala-lang.org"

private def asyncGetWithRetry(url: String): AsyncResponse[requests.Response] = {
def tryGet(backoffSeconds: Int): AsyncResponse[requests.Response] =
Future { requests.get(url) }
.recoverWith {
case _: requests.TimeoutException =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request timeout, retry with backoff ${backoffSeconds}s for $url"
)
SECONDS.sleep(backoffSeconds)
tryGet((backoffSeconds * 2).min(60))
case e: requests.RequestsException if e.getMessage.contains("GOWAY") =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request terminated, retry with backoff ${backoffSeconds}s for $url"
)
SECONDS.sleep(backoffSeconds)
tryGet((backoffSeconds * 2).min(60))
}
tryGet(1)
}

def projects: AsyncResponse[Seq[Project]] = {
case class ProjectEntry(organization: String, repository: String)
asyncGetWithRetry(s"$ScaladexUrl/api/projects")
.map: response =>
fromJson[List[ProjectEntry]](response.text())
.map:
case ProjectEntry(organization, repository) =>
Project(organization, repository)
}

case class ProjectArtifact(groupId: String, artifactId: String, version: String)
def artifacts(project: Project): AsyncResponse[Seq[ProjectArtifact]] =
asyncGetWithRetry(s"$ScaladexUrl/api/projects/${project.org}/${project.name}/artifacts")
.map: response =>
fromJson[Seq[ProjectArtifact]](response.text())

case class Artifact(
groupId: String,
artifactId: String,
version: String,
releaseDate: java.time.OffsetDateTime
)
case class ArtifactMetadataResponse(
pagination: Pagination,
items: List[ArtifactMetadata]
)
artifactName: String,
project: String,
releaseDate: Long, // epoch-millis
licenses: Seq[String],
language: String,
platform: String
):
def releaseLocalData: LocalDate = LocalDate.from(Instant.ofEpochMilli(releaseDate))

def artifact(artifact: ProjectArtifact): AsyncResponse[Artifact] =
asyncGetWithRetry(
s"$ScaladexUrl/api/artifacts/${artifact.groupId}/${artifact.artifactId}/${artifact.version}"
)
.map: response =>
fromJson[Artifact](response.text())

case class ProjectSummary(
groupId: String,
artifacts: List[String], // List of artifacts with suffixes
version: String, // latest known versions
versions: List[String] // all published versions
)

final val ScaladexUrl = "https://index.scala-lang.org"

def artifactMetadata(
groupId: String,
artifactId: String
): AsyncResponse[ArtifactMetadataResponse] = {
def tryFetch(backoffSeconds: Int): AsyncResponse[ArtifactMetadataResponse] =
Future {
val response = requests.get(
url = s"$ScaladexUrl/api/artifacts/$groupId/$artifactId"
)
fromJson[ArtifactMetadataResponse](response.text())
}.recoverWith {
case err: org.jsoup.HttpStatusException
if err.getStatusCode == 503 && !Thread.interrupted() =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex unavailable, retry with backoff ${backoffSeconds}s for $groupId:$artifactId"
)
SECONDS.sleep(backoffSeconds)
tryFetch((backoffSeconds * 2).min(60))
case _: requests.TimeoutException =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request timeout, retry with backoff ${backoffSeconds}s for $groupId:$artifactId"
)
SECONDS.sleep(backoffSeconds)
tryFetch((backoffSeconds * 2).min(60))
case e: requests.RequestsException if e.getMessage.contains("GOWAY") =>
Console.err.println(
s"Failed to fetch artifact metadata, Scaladex request terminated, retry with backoff ${backoffSeconds}s for $groupId:$artifactId"
)
SECONDS.sleep(backoffSeconds)
tryFetch((backoffSeconds * 2).min(60))
}
tryFetch(1)
}

def projectSummary(
organization: String,
repository: String,
scalaBinaryVersion: String
): AsyncResponse[Option[ProjectSummary]] = Future {
val response = requests.get(
url = s"$ScaladexUrl/api/project",
params = Map(
"organization" -> organization,
"repository" -> repository,
"target" -> "JVM",
"scalaVersion" -> scalaBinaryVersion
)
)
// If output is empty it means that given project does not define JVM modules
// for given scala version
Option.unless(response.contentLength.contains(0)) {
fromJson[ProjectSummary](response.text())
}
}.recoverWith{
case _: requests.TimeoutException =>
Thread.sleep(scala.util.Random.nextInt(10.seconds.toMillis.toInt))
projectSummary(organization, repository, scalaBinaryVersion)
}

}
115 changes: 42 additions & 73 deletions coordinator/src/main/scala/deps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,13 @@ def loadProjects(scalaBinaryVersion: String): Seq[StarredProject] =
d.select(".list-result .row").asScala.flatMap { e =>
e.select("h4").get(0).text().takeWhile(!_.isWhitespace) match {
case s"${organization}/${repository}" =>
for
ghStars <- e.select(".stats [title=Stars]")
.asScala
.headOption
.flatMap(_.text.toIntOption)
.orElse(Some(-1))
yield
StarredProject(organization, repository)(ghStars)
for ghStars <- e
.select(".stats [title=Stars]")
.asScala
.headOption
.flatMap(_.text.toIntOption)
.orElse(Some(-1))
yield StarredProject(organization, repository)(ghStars)
case _ => None
}
}
Expand All @@ -58,72 +57,42 @@ enum CandidateProject:
case BuildSelected(project: Project, mvs: Seq[ModuleInVersion])
case class ProjectModules(project: Project, mvs: Seq[ModuleInVersion])

def loadScaladexProject(
scalaBinaryVersion: String,
releaseCutOffDate: Option[LocalDate]
)(
def loadScaladexProject(releaseCutOffDate: Option[LocalDate] = None)(
project: Project
): AsyncResponse[ProjectModules] =
): AsyncResponse[ProjectModules] = {
import util.*
val binaryVersionSuffix = "_" + scalaBinaryVersion
Scaladex
.projectSummary(project.org, project.name, scalaBinaryVersion)
.flatMap {
case None =>
Console.err.println(
s"No project summary for ${project.org}/${project.name}"
)
Future.successful(Nil)
case Some(projectSummary) =>
val releaseDates = collection.mutable.Map.empty[String, OffsetDateTime]
case class VersionRelease(version: String, releaseDate: OffsetDateTime)
for
artifactsMetadata <- Future
.traverse(projectSummary.artifacts) { artifact =>
Scaladex
.artifactMetadata(
groupId = projectSummary.groupId,
artifactId = s"${artifact}_3"
)
.map { response =>
if (response.pagination.pageCount != 1)
Console.err.println(
"Scaladex now implementes pagination! Ignoring artifact metadata from additional pages"
)
// Order versions based on their release date, it should be more stable in case of hash-based pre-releases
// Previous approach with sorting SemVersion was not stable and could lead to runtime erros (due to not transitive order of elements)
val versions = response.items
.filter(v =>
releaseCutOffDate
.forall(_.isAfter(v.releaseDate.toLocalDate()))
)
.tapEach(v => releaseDates += v.version -> v.releaseDate)
.map(_.version)
artifact -> versions
}
}
.map(_.toMap)
orderedVersions = projectSummary.versions
.flatMap(v => releaseDates.get(v).map(VersionRelease(v, _)))
.sortBy(_.releaseDate)(using
summon[Ordering[OffsetDateTime]].reverse
)
.map(_.version)
yield for version <- orderedVersions
yield ModuleInVersion(
version,
modules = artifactsMetadata.collect {
case (module, versions) if versions.contains(version) => module
}.toSeq
)
}
.map { moduleVersions =>
val modules = moduleVersions
.filter(_.modules.nonEmpty)
.map(mvs => VersionedModules(mvs, mvs.version))
.map(_.modules)
ProjectModules(project, modules)
}
for {
scala3JvmArtifacts <- Scaladex
.artifacts(project)
.map:
_.filter:
_.artifactId match
case s"${_}_native${_}" => false
case s"${_}_sjs${_}" => false
case s"${_}_3" => true
case _ => false
artifactsByVersion = scala3JvmArtifacts.groupBy(_.version)
versionReleaseData <- Future
.traverse(artifactsByVersion) { case (version, artifacts) =>
Scaladex
.artifact(artifacts.head)
.filter: artifact =>
releaseCutOffDate.forall(_.isAfter(artifact.releaseLocalData))
.map: artifact =>
(version, artifact.releaseDate)
}
.map(_.toMap)
orderedVersions = versionReleaseData.toSeq
.sortBy(-_._2) // releaseDate-epoch-mill descending
.map(_._1)
versionModules =
for version <- orderedVersions
yield ModuleInVersion(
version = version,
modules = artifactsByVersion(version).map(_.artifactId.stripSuffix("_3"))
)
} yield ProjectModules(project, versionModules)
}

case class VersionedModules(modules: ModuleInVersion, semVersion: SemVersion)
case class ModuleVersion(name: String, version: String, p: Project)
Expand Down Expand Up @@ -219,7 +188,7 @@ def loadDepenenecyGraph(
if customProjects.contains(p) then Future.successful(CandidateProject.BuildAll(p))
else
cachedAsync { (p: Project) =>
loadScaladexProject(scalaBinaryVersion, releaseCutOffDate)(p)
loadScaladexProject(releaseCutOffDate)(p)
.map(projectModulesFilter(patterns))
}(p).map { case ProjectModules(project, mvs) =>
CandidateProject.BuildSelected(project, mvs)
Expand Down

0 comments on commit 01fd851

Please sign in to comment.