Skip to content

Commit

Permalink
Scalafmt (#574)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdellabitta authored Mar 21, 2024
1 parent 6ab738b commit d954add
Show file tree
Hide file tree
Showing 185 changed files with 9,957 additions and 6,868 deletions.
2 changes: 2 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
version=3.7.13
runner.dialect=scala213
3 changes: 2 additions & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ resolvers += "Typesafe Repository" at "https://repo.typesafe.com/typesafe/releas
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.1")
addSbtPlugin("com.codacy" % "sbt-codacy-coverage" % "3.0.3")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1")
addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
61 changes: 34 additions & 27 deletions src/main/scala/dpla/ingestion3/confs/ConfUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,61 @@ import scala.util.{Failure, Success, Try}

trait ConfUtils {

/**
* TODO: There are multiple "validateUrl" methods floating around
* the project. Need to find them all and consolidate.
/** TODO: There are multiple "validateUrl" methods floating around the
* project. Need to find them all and consolidate.
*
* @param url
* @return Boolean
* @return
* Boolean
*/
def validateUrl(url: String): Boolean = Try{ url match {
case str if str.startsWith("s3") => false
case str if str.startsWith("http") => new URL(url)
}}.isSuccess
def validateUrl(url: String): Boolean = Try {
url match {
case str if str.startsWith("s3") => false
case str if str.startsWith("http") => new URL(url)
}
}.isSuccess

/**
* Get the contents of the configuration file
/** Get the contents of the configuration file
*
* @param path Path to configuration file
* @return Contents of file as a String
* @param path
* Path to configuration file
* @return
* Contents of file as a String
*/
def getConfigContents(path: String): Option[String] = {
path match {
case path if path.startsWith("http") => throw new UnsupportedOperationException("HTTP not supported yet")
case path if path.startsWith("http") =>
throw new UnsupportedOperationException("HTTP not supported yet")
case _ => getLocalConf(path)
}
}

/**
* Reads contents of file on path
/** Reads contents of file on path
*
* @param path Path to file
* @return Option[String] The contents of the file or None
* @param path
* Path to file
* @return
* Option[String] The contents of the file or None
*/
def getLocalConf(path: String): Option[String] = Try {
Source.fromFile(path).getLines.mkString("\n")
} match {
case Success(s) => Some(s)
case Failure(_) => None
}
Source.fromFile(path).getLines.mkString("\n")
} match {
case Success(s) => Some(s)
case Failure(_) => None
}

/**
*
* @param conf
/** @param conf
* @param prop
* @param default
* @return
*/
def getProp(conf: Config, prop: String, default: Option[String] = None): Option[String] = {
def getProp(
conf: Config,
prop: String,
default: Option[String] = None
): Option[String] = {
conf.hasPath(prop) match {
case true => Some(conf.getString(prop))
case true => Some(conf.getString(prop))
case false => default
}
}
Expand Down
132 changes: 69 additions & 63 deletions src/main/scala/dpla/ingestion3/confs/Ingestion3Conf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,38 @@ package dpla.ingestion3.confs
import com.typesafe.config.ConfigFactory
import org.rogach.scallop.{ScallopConf, ScallopOption}

/**
*
* @param confFilePath Required for all operations (harvest, mapping,
* or enrichment)
* @param providerName Optional - Provider shortName used to lookup provider
* specific settings in application configuration file.
*
* Harvest operations require a set of provider settings
/** @param confFilePath
* Required for all operations (harvest, mapping, or enrichment)
* @param providerName
* Optional - Provider shortName used to lookup provider specific settings in
* application configuration file.
*
* Harvest operations require a set of provider settings
*/
class Ingestion3Conf(confFilePath: String, providerName: Option[String] = None) extends ConfUtils {
class Ingestion3Conf(confFilePath: String, providerName: Option[String] = None)
extends ConfUtils {
def load(): i3Conf = {
ConfigFactory.invalidateCaches()

if (confFilePath.isEmpty) throw new IllegalArgumentException("Missing path to conf file")
if (confFilePath.isEmpty)
throw new IllegalArgumentException("Missing path to conf file")

val confString = getConfigContents(confFilePath)

val baseConfig = ConfigFactory.parseString(confString.getOrElse(
throw new RuntimeException(s"Unable to load configuration file at $confFilePath")))
val baseConfig = ConfigFactory.parseString(
confString.getOrElse(
throw new RuntimeException(
s"Unable to load configuration file at $confFilePath"
)
)
)

val providerConf = providerName match {
case Some(name) => baseConfig.getConfig(name)
.withFallback(baseConfig)
.resolve()
case Some(name) =>
baseConfig
.getConfig(name)
.withFallback(baseConfig)
.resolve()
case _ => baseConfig.resolve()
}

Expand Down Expand Up @@ -56,16 +63,16 @@ class Ingestion3Conf(confFilePath: String, providerName: Option[String] = None)
i3Spark(
// FIXME these should be removed
sparkDriverMemory = getProp(providerConf, "spark.driverMemory"),
sparkExecutorMemory= getProp(providerConf, "spark.executorMemory")
sparkExecutorMemory = getProp(providerConf, "spark.executorMemory")
)
)
}
}

/**
* Command line arguments
/** Command line arguments
*
* @param arguments Command line arguments
* @param arguments
* Command line arguments
*/
class CmdArgs(arguments: Seq[String]) extends ScallopConf(arguments) {
val input: ScallopOption[String] = opt[String](
Expand Down Expand Up @@ -110,34 +117,34 @@ class CmdArgs(arguments: Seq[String]) extends ScallopConf(arguments) {
validate = _.nonEmpty
)

/**
* Gets the configuration file property from command line arguments
/** Gets the configuration file property from command line arguments
*
* @return Configuration file location
* @return
* Configuration file location
*/
def getConfigFile: String = configFile.toOption
.getOrElse(throw new RuntimeException("No configuration file specified."))

/**
* Gets the input property from command line arguments
/** Gets the input property from command line arguments
*
* @return Input location
* @return
* Input location
*/
def getInput: String = input.toOption
.getOrElse(throw new RuntimeException("No input specified."))

/**
* Gets the output property from command line arguments
/** Gets the output property from command line arguments
*
* @return Output location
* @return
* Output location
*/
def getOutput: String = output.toOption
.getOrElse(throw new RuntimeException("No output specified."))

/**
* Gets the provider short name from command line arguments
/** Gets the provider short name from command line arguments
*
* @return Provider short name
* @return
* Provider short name
*/
def getProviderName: String = providerName.toOption
.getOrElse(throw new RuntimeException("No provider name specified."))
Expand All @@ -149,38 +156,37 @@ class CmdArgs(arguments: Seq[String]) extends ScallopConf(arguments) {
verify()
}

/**
* Classes for defining the application.conf file
/** Classes for defining the application.conf file
*/
case class Harvest (
// General
endpoint: Option[String] = None,
setlist: Option[String] = None,
blacklist: Option[String] = None,
harvestType: Option[String] = None,
// OAI
verb: Option[String] = None,
metadataPrefix: Option[String] = None,
harvestAllSets: Option[String] = None,
// API
rows: Option[String] = None,
query: Option[String] = None,
apiKey: Option[String] = None,
// File delta
// Process NARA ingest using a incremental update of records
update: Option[String] = None, // Path to delta update records
previous: Option[String] = None, // Path to previously harvested records
deletes: Option[String] = None // Path to deletes
)
case class Harvest(
// General
endpoint: Option[String] = None,
setlist: Option[String] = None,
blacklist: Option[String] = None,
harvestType: Option[String] = None,
// OAI
verb: Option[String] = None,
metadataPrefix: Option[String] = None,
harvestAllSets: Option[String] = None,
// API
rows: Option[String] = None,
query: Option[String] = None,
apiKey: Option[String] = None,
// File delta
// Process NARA ingest using a incremental update of records
update: Option[String] = None, // Path to delta update records
previous: Option[String] = None, // Path to previously harvested records
deletes: Option[String] = None // Path to deletes
)

case class i3Conf(
email: Option[String] = None,
provider: Option[String] = None,
harvest: Harvest = Harvest(),
spark: i3Spark = i3Spark()
)

case class i3Spark (
sparkDriverMemory: Option[String] = None,
sparkExecutorMemory: Option[String] = None
)
email: Option[String] = None,
provider: Option[String] = None,
harvest: Harvest = Harvest(),
spark: i3Spark = i3Spark()
)

case class i3Spark(
sparkDriverMemory: Option[String] = None,
sparkExecutorMemory: Option[String] = None
)
40 changes: 22 additions & 18 deletions src/main/scala/dpla/ingestion3/dataStorage/DataStorage.scala
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
package dpla.ingestion3


package object dataStorage {

lazy val s3Protocols: List[String] = List("s3", "s3a", "s3n")


/**
* Component parts of an S3 address.
/** Component parts of an S3 address.
*
* @param protocol One of ["s3", "s3n", "s3a"]
* @param bucket The name of the S3 bucket
* @param prefix Nested folder(s) beneath the bucket
* @param protocol
* One of ["s3", "s3n", "s3a"]
* @param bucket
* The name of the S3 bucket
* @param prefix
* Nested folder(s) beneath the bucket
*/
case class S3Address(protocol: String,
bucket: String,
prefix: Option[String])
case class S3Address(protocol: String, bucket: String, prefix: Option[String])

object S3Address {
// Get full S3 path. For sanity, handle leading/trailing slashes.
Expand All @@ -25,13 +23,15 @@ package object dataStorage {
address.prefix.getOrElse("").stripPrefix("/").stripSuffix("/")
}

/**
* Parse an S3 address from a given String.
/** Parse an S3 address from a given String.
*
* @param path Path to an S3 folder
* @return The component parts of an S3 address
* @param path
* Path to an S3 folder
* @return
* The component parts of an S3 address
*
* @throws RuntimeException if unable to parse valid S3 address.
* @throws RuntimeException
* if unable to parse valid S3 address.
*/
def parseS3Address(path: String): S3Address = {
val protocol: String = path.split("://").headOption.getOrElse("")
Expand All @@ -45,12 +45,16 @@ package object dataStorage {
throw new RuntimeException(s"Unable to parse S3 bucket from $path.")
}

val prefixString: String = path.stripPrefix(protocol).stripPrefix("://")
.stripPrefix(bucket).stripPrefix("/").stripSuffix("/")
val prefixString: String = path
.stripPrefix(protocol)
.stripPrefix("://")
.stripPrefix(bucket)
.stripPrefix("/")
.stripSuffix("/")

val prefix: Option[String] =
if (prefixString.isEmpty) None else Some(prefixString)

S3Address(protocol=protocol, bucket=bucket, prefix=prefix)
S3Address(protocol = protocol, bucket = bucket, prefix = prefix)
}
}
Loading

0 comments on commit d954add

Please sign in to comment.