diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/discovery/maven/MavenDiscoveryProcess.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/discovery/maven/MavenDiscoveryProcess.scala index 14c3825..01e2e27 100644 --- a/src/main/scala/de/upb/cs/swt/delphi/crawler/discovery/maven/MavenDiscoveryProcess.scala +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/discovery/maven/MavenDiscoveryProcess.scala @@ -28,8 +28,8 @@ import de.upb.cs.swt.delphi.crawler.{AppLogging, Configuration} import de.upb.cs.swt.delphi.crawler.control.Phase import de.upb.cs.swt.delphi.crawler.control.Phase.Phase import de.upb.cs.swt.delphi.crawler.tools.ActorStreamIntegrationSignals.{Ack, StreamCompleted, StreamFailure, StreamInitialized} -import de.upb.cs.swt.delphi.crawler.preprocessing.{MavenArtifact, MavenDownloadActor} -import de.upb.cs.swt.delphi.crawler.processing.{HermesActor, HermesResults} +import de.upb.cs.swt.delphi.crawler.preprocessing.{MavenArtifact, MavenArtifactMetadata, MavenDownloadActor} +import de.upb.cs.swt.delphi.crawler.processing.{HermesActor, HermesResults, PomFileReadActor} import de.upb.cs.swt.delphi.crawler.storage.ArtifactExistsQuery import de.upb.cs.swt.delphi.crawler.tools.NotYetImplementedException @@ -57,6 +57,7 @@ class MavenDiscoveryProcess(configuration: Configuration, elasticPool: ActorRef) private val seen = mutable.HashSet[MavenIdentifier]() val downloaderPool = system.actorOf(SmallestMailboxPool(8).props(MavenDownloadActor.props)) + val pomReaderPool = system.actorOf(SmallestMailboxPool(8).props(PomFileReadActor.props(configuration))) val hermesPool = system.actorOf(SmallestMailboxPool(configuration.hermesActorPoolSize).props(HermesActor.props())) override def phase: Phase = Phase.Discovery @@ -92,6 +93,8 @@ class MavenDiscoveryProcess(configuration: Configuration, elasticPool: ActorRef) val finalizer = preprocessing + .mapAsync(8)(artifact => (pomReaderPool ? artifact).mapTo[MavenArtifact]) + .alsoTo(createSinkFromActorRef[MavenArtifact](elasticPool)) .mapAsync(configuration.hermesActorPoolSize)(artifact => (hermesPool ? artifact).mapTo[Try[HermesResults]]) .filter(results => results.isSuccess) .map(results => results.get) diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenArtifact.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenArtifact.scala index 3025eff..2709ebf 100644 --- a/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenArtifact.scala +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenArtifact.scala @@ -17,5 +17,26 @@ package de.upb.cs.swt.delphi.crawler.preprocessing import de.upb.cs.swt.delphi.crawler.discovery.maven.MavenIdentifier +import org.joda.time.DateTime -case class MavenArtifact(identifier : MavenIdentifier, jarFile: JarFile, pomFile: PomFile) +case class MavenArtifact(identifier : MavenIdentifier, jarFile: JarFile, pomFile: PomFile, + publicationDate: Option[DateTime], metadata: Option[MavenArtifactMetadata]) + +case class MavenArtifactMetadata(name: String, + description: String, + developers: List[String], + licenses: List[ArtifactLicense], + issueManagement: Option[IssueManagementData], + dependencies: Set[ArtifactDependency], + parent:Option[MavenIdentifier], + packaging: String) + +case class IssueManagementData(system: String, url: String) +case class ArtifactLicense(name: String, url:String) +case class ArtifactDependency(identifier: MavenIdentifier, scope: Option[String]) + +object MavenArtifact{ + def withMetadata(artifact: MavenArtifact, metadata: MavenArtifactMetadata): MavenArtifact = { + MavenArtifact(artifact.identifier, artifact.jarFile, artifact.pomFile, artifact.publicationDate, Some(metadata)) + } +} diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActor.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActor.scala index 52be089..cf9aea9 100644 --- a/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActor.scala +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActor.scala @@ -16,11 +16,14 @@ package de.upb.cs.swt.delphi.crawler.preprocessing +import java.util.Locale + import akka.actor.{Actor, ActorLogging, ActorSystem, Props} import de.upb.cs.swt.delphi.crawler.discovery.maven.MavenIdentifier import de.upb.cs.swt.delphi.crawler.tools.HttpDownloader +import org.joda.time.format.DateTimeFormat -import scala.util.{Failure, Success} +import scala.util.{Failure, Success, Try} class MavenDownloadActor extends Actor with ActorLogging { override def receive: Receive = { @@ -30,14 +33,25 @@ class MavenDownloadActor extends Actor with ActorLogging { val downloader = new HttpDownloader val jarStream = downloader.downloadFromUri(m.toJarLocation.toString()) - val pomStream = downloader.downloadFromUri(m.toPomLocation.toString()) + val pomResponse = downloader.downloadFromUriWithHeaders(m.toPomLocation.toString()) jarStream match { case Success(jar) => { - pomStream match { - case Success(pom) => { + pomResponse match { + case Success((pomStream, pomHeaders)) => { log.info(s"Downloaded $m") - sender() ! Success(MavenArtifact(m, JarFile(jar, m.toJarLocation.toURL), PomFile(pom))) + + // Extract and parse publication date from header + val datePattern = DateTimeFormat.forPattern("E, dd MMM yyyy HH:mm:ss zzz").withLocale(Locale.ENGLISH) + val pomPublicationDate = pomHeaders.find( _.lowercaseName().equals("last-modified") ) + .map( header => Try(datePattern.parseDateTime(header.value())) ) match { + case Some(Success(date)) => Some(date) + case Some(Failure(x)) => x.printStackTrace(); None + case _ => None + } + + sender() ! Success(MavenArtifact(m, JarFile(jar, m.toJarLocation.toURL), PomFile(pomStream), + pomPublicationDate, None)) } case Failure(e) => { // TODO: push error to actor diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/processing/PomFileReadActor.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/processing/PomFileReadActor.scala new file mode 100644 index 0000000..77eedaa --- /dev/null +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/processing/PomFileReadActor.scala @@ -0,0 +1,305 @@ +// Copyright (C) 2018 The Delphi Team. +// See the LICENCE file distributed with this work for additional +// information regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package de.upb.cs.swt.delphi.crawler.processing + +import akka.actor.{Actor, ActorLogging, ActorSystem, Props} +import de.upb.cs.swt.delphi.crawler.Configuration +import de.upb.cs.swt.delphi.crawler.discovery.maven.MavenIdentifier +import de.upb.cs.swt.delphi.crawler.preprocessing.{ArtifactDependency, ArtifactLicense, IssueManagementData, MavenArtifact, MavenArtifactMetadata, PomFile} +import de.upb.cs.swt.delphi.crawler.tools.HttpDownloader +import org.apache.maven.model.{Dependency, Model} +import org.apache.maven.model.io.xpp3.MavenXpp3Reader + +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} + +/** + * An Actor that receives MavenArtifacts and extracts metadata from its POM file. If successful, an + * MavenMetadata object is attached to the artifact and the artifact is returned. If failures occur, + * the artifact is returned without metadata. + * + * @author Johannes Düsing + */ +class PomFileReadActor(configuration: Configuration) extends Actor with ActorLogging{ + + val pomReader: MavenXpp3Reader = new MavenXpp3Reader() + implicit val system : ActorSystem = context.system + + override def receive: Receive = { + case artifact@MavenArtifact(identifier, _ ,PomFile(pomStream), _, _) => + + val pomObject = Try(pomReader.read(pomStream)) + pomStream.close() + + pomObject match { + case Success(pom) => + val issueManagement = Option(pom.getIssueManagement) + .map(i => IssueManagementData(i.getSystem, i.getUrl)) + + val parent = Option(pom.getParent) + .map(p => MavenIdentifier(configuration.mavenRepoBase.toString, p.getGroupId, p.getArtifactId, p.getVersion)) + + val dependencies = getDependencies(pom, identifier) + + val metadata = MavenArtifactMetadata(pom.getName, + pom.getDescription, + pom.getDevelopers.asScala.map(_.getId).toList, + pom.getLicenses.asScala.map(l => ArtifactLicense(l.getName, l.getUrl)).toList, + issueManagement, + dependencies, + parent, + pom.getPackaging) + + sender() ! MavenArtifact.withMetadata(artifact, metadata) + + log.info(s"Successfully processed POM file for $identifier") + + case Failure(ex) => + log.error(s"Failed to parse POM file for artifact $identifier",ex ) + // Best effort semantics: If parsing fails, artifact is returned without metadata + sender() ! artifact + } + + } + + /** + * Tries to resolve, download and parse the parent POM file of the given POM. + * @param pomContent Content of a POM file to resolve parent for + * @return Content of Parent POM, or None if no parent is specified or an error occurred + */ + private def getParentPomModel(implicit pomContent: Model): Option[Model] = { + val parentDef = pomContent.getParent + + if (parentDef != null && parentDef.getGroupId != null && parentDef.getArtifactId != null && parentDef.getVersion != null){ + val parentIdentifier = MavenIdentifier(configuration.mavenRepoBase.toString, parentDef.getGroupId, + parentDef.getArtifactId, parentDef.getVersion) + + new HttpDownloader().downloadFromUri(parentIdentifier.toPomLocation.toString) match { + case Success(pomStream) => + val parentPom = pomReader.read(pomStream) + pomStream.close() + + Some(parentPom) + case Failure(x) => + log.error(x, s"Failed to download parent POM") + None + } + } + else { + None + } + } + + /** + * Recursive method building the parent hierarchy of the given POM. Will download and parse all parent POMs and + * return them in a list. + * @param pomContent POM file to build the parent hierarchy for + * @return List of parent POMs. Might be empty, if no parent is specified at all + */ + private def buildParentHierarchy(implicit pomContent: Model): List[Model] = { + getParentPomModel(pomContent) match { + case Some(parentContent) => + List(parentContent) ++ buildParentHierarchy(parentContent) + case _ => + List() + } + } + + + private def buildParentIdentifier(implicit pomContent:Model): MavenIdentifier = { + MavenIdentifier(configuration.mavenRepoBase.toString, pomContent.getParent.getGroupId, + pomContent.getParent.getArtifactId, pomContent.getParent.getVersion) + } + + /** + * Retrieve all dependencies specified in the given POM file as MavenIdentifiers. Try to resolve variables as well. + * Only returns successfully resolved dependencies, omits failures. + * @param pomContent Object holding POM file contents + * @param identifier Maven identifier, as sometimes version / groupID is not part of POM file! + * @return Set of MavenIdentifiers for each successfully parsed dependency + */ + private def getDependencies(implicit pomContent: Model, identifier: MavenIdentifier): Set[ArtifactDependency] = { + + // Always build the parent hierarchy exactly once + lazy val parentHierarchy: List[Model] = buildParentHierarchy(pomContent) + + // Try to resolve each dependency specified in the POM + val dependencies = pomContent + .getDependencies + .asScala + .toSet[Dependency] + .map(resolveDependency(_, parentHierarchy)) + + if (dependencies.count(_.isFailure) > 0) { + log.warning(s"Failed to resolve some dependencies for $identifier") + } + + // Only return those dependencies that have been successfully resolved + for (Success(identifier) <- dependencies) yield identifier + } + + /** + * Process raw dependency specification from POM file, validate text values and try to resolve project variables. + * @param dependency Raw dependency specification as given in the POM file + * @param pomContent Contents of the POM file + * @param identifier Artifact identifier, as sometimes version / groupID is not part of POM file + * @return Try object holding the dependency's MavenIdentifier if successful + */ + private def resolveDependency(dependency: Dependency, parentHierarchy: => List[Model]) + (implicit pomContent: Model, identifier: MavenIdentifier) + : Try[ArtifactDependency] = { + lazy val parents = parentHierarchy + + Try { + // Resolve groupID and artifact id in current POM + val groupId = resolveProperty(dependency.getGroupId, "groupID", parents) + val artifactId = resolveProperty(dependency.getArtifactId, "artifactID", parents) + + // Often dependency versions are left empty, as they are specified in the parent! + val version: String = if(dependency.getVersion == null && parents.nonEmpty){ + // If there are parents and version is empty => Try to resolve version in parents + resolveDependencyVersion(dependency, pomContent, identifier, parents) + } else { + // If no parents are present or version is specified => Resolve as regular property + resolveProperty(dependency.getVersion, "version", parents) + } + + val scope = Option(dependency.getScope) + + ArtifactDependency(MavenIdentifier(configuration.mavenRepoBase.toString, groupId, artifactId, version), scope) + } + } + + /** + * Resolve the version of the given dependency by inspecting the tag of all parent POMs. + * @param dependency Dependency to resolve version for, ie. no explicit version is defined for this dependency! + * @param pomContent Content of the current POM file to inspect + * @param identifier Identifier of the current POM file + * @param level Level in the parent hierarchy, needed for recursion + * @param parentHierarchy Parent hierarchy object + * @return String value of the resolved version + * @throws NullPointerException If version could not be resolved in any parent + */ + @scala.annotation.tailrec + private def resolveDependencyVersion(dependency: Dependency, pomContent: Model, identifier: MavenIdentifier, + parentHierarchy: => List[Model], level: Int = 0): String = { + lazy val parents = parentHierarchy + + if(pomContent.getDependencyManagement != null){ + // If there is a dependency management tag: Try to find matching groupID and artifactID + pomContent + .getDependencyManagement.getDependencies + .asScala.toSet[Dependency] + .filter(d => d.getGroupId.equals(dependency.getGroupId) && d.getArtifactId.equals(dependency.getArtifactId)) + .map(_.getVersion) + .find(_ != null) match { + case Some(version) => + // Found matching version definition, try to resolve it if its a variable + resolveProperty(version, "version", parents, level)(pomContent, identifier) + case None if level < parents.length => + // Found no matching version definition, but there is parents left to recurse to + resolveDependencyVersion(dependency, parents(level), buildParentIdentifier(pomContent), parents, level + 1) + case None if level >= parents.length => + // No parent left to recurse, so this really is a dependency without a version + throw new NullPointerException(s"Version was null and could not be resolved in parent") + } + } + else if(level < parentHierarchy.length) { + // There is no dependency management tag, immediately recurse into parent if parent left + resolveDependencyVersion(dependency, parents(level), buildParentIdentifier(pomContent), parents, level + 1) + } + else { + // No parent left to recurse, so this really is a dependency without a version + throw new NullPointerException(s"Version was null and could not be resolved in parent") + } + + + } + + /** + * Resolve the given property value of an dependency specification and do input validation + * @param propValue Value to resolve + * @param propName Name of the property (for error logging) + * @param pomContent Contents of the POM file + * @return Fully resolved string value of the property if successful + * @throws NullPointerException If a null values was found for a required property + * @throws RuntimeException If actor failed to resolve a variable inside the POM file + */ + private def resolveProperty(propValue: String, propName: String, parentHierarchy: => List[Model], level: Int = 0) + (implicit pomContent:Model, identifier:MavenIdentifier) + : String = { + lazy val parents = parentHierarchy + if(propValue == null){ + throw new NullPointerException(s"Property '$propName' must not be null for dependencies") + } + else if (propValue.startsWith("$")){ + resolveProjectVariable(propValue, parents, level) + .getOrElse(throw new RuntimeException(s"Failed to resolve variable '$propValue' for property '$propName'")) + } + else { + propValue + } + } + + //noinspection ScalaStyle + @scala.annotation.tailrec + private def resolveProjectVariable(variableName: String, parentHierarchy: => List[Model], level: Int) + (implicit pomContent: Model, identifier: MavenIdentifier) + : Option[String] = { + lazy val parents = parentHierarchy + + // Drop Maven Syntax from variable reference (e.g. ${varname}) + val rawVariableName = variableName.drop(2).dropRight(1) + + // Split dot-separated variable names + val variableParts = rawVariableName.split("\\.", 2) + + var result: Option[String] = None + + // Resolve special references to POM attributes + if (variableParts(0).equals("project") || variableParts(0).equals("pom")) { + result = variableParts(1) match { + // groupID always present in identifier, but not always explicit in POM + case "groupId" => Some(identifier.groupId) + // artifactID always present in POM + case "artifactId" => Some(pomContent.getArtifactId) + // Version always present in identifier, but not always explicit in POM + case "version" => Some(identifier.version) + // Can only extract parent version if explicitly stated + case "parent.version" if pomContent.getParent != null && pomContent.getParent.getVersion != null => + Some(pomContent.getParent.getVersion) + case _ => None + } + } + else { + // All other formats are interpreted as POM property names + result = Option(pomContent.getProperties.getProperty(rawVariableName)) + } + + // If not resolved -> try to resolve in parent! + if (result.isEmpty && level <= parents.length){ + resolveProjectVariable(variableName, parents, level + 1)(parents(level), buildParentIdentifier(pomContent)) + } + else { + result + } + } +} + +object PomFileReadActor { + def props(configuration: Configuration):Props = Props(new PomFileReadActor(configuration)) +} diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticActor.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticActor.scala index 7235f50..98e44b2 100644 --- a/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticActor.scala +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticActor.scala @@ -23,6 +23,7 @@ import de.upb.cs.swt.delphi.crawler.Identifier import de.upb.cs.swt.delphi.crawler.discovery.git.GitIdentifier import de.upb.cs.swt.delphi.crawler.tools.ActorStreamIntegrationSignals.{Ack, StreamCompleted, StreamFailure, StreamInitialized} import de.upb.cs.swt.delphi.crawler.discovery.maven.MavenIdentifier +import de.upb.cs.swt.delphi.crawler.preprocessing.MavenArtifact import de.upb.cs.swt.delphi.crawler.processing.HermesResults /** @@ -47,6 +48,10 @@ class ElasticActor(client: ElasticClient) extends Actor with ActorLogging with A store(m) sender() ! Ack } + case a : MavenArtifact => { + store(a) + sender() ! Ack + } case g : GitIdentifier => { store(g) sender() ! Ack diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticStoreQueries.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticStoreQueries.scala index e26ba3a..91dc91e 100644 --- a/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticStoreQueries.scala +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/storage/ElasticStoreQueries.scala @@ -23,6 +23,7 @@ import com.sksamuel.elastic4s.http.update.UpdateResponse import com.sksamuel.elastic4s.http.{ElasticClient, Response} import de.upb.cs.swt.delphi.crawler.discovery.git.GitIdentifier import de.upb.cs.swt.delphi.crawler.discovery.maven.MavenIdentifier +import de.upb.cs.swt.delphi.crawler.preprocessing.MavenArtifact import de.upb.cs.swt.delphi.crawler.processing.{HermesAnalyzer, HermesResults} import org.joda.time.DateTime @@ -49,6 +50,47 @@ trait ElasticStoreQueries { } } + def store(m: MavenArtifact)(implicit client: ElasticClient, log: LoggingAdapter): Option[Response[UpdateResponse]] = { + elasticId(m.identifier) match { + case Some(id) => + log.info(s"Pushing POM file contents for ${m.identifier} under id $id") + + m.metadata match { + case Some(metadata) => + Some(client.execute { + update(id).in(delphiProjectType).doc(fields = "pom" -> Map( + "name" -> metadata.name, + "description" -> metadata.description, + "issueManagement" -> metadata.issueManagement + .map(management => Map("url" -> management.url, "system" -> management.system)).getOrElse("None"), + "developers" -> metadata.developers.mkString(","), + "licenses" -> metadata.licenses.map(l => Map("name" -> l.name, "url" -> l.url)), + "dependencies" -> metadata.dependencies.map(d => Map( + "groupId" -> d.identifier.groupId, + "artifactId" -> d.identifier.artifactId, + "version" -> d.identifier.version, + "scope" -> d.scope.getOrElse("default") + )), + "parent" -> metadata.parent.map(p => Map( + "groupId" -> p.groupId, + "artifactId" -> p.artifactId, + "version" -> p.version + )).getOrElse("None"), + "packaging" -> metadata.packaging + ), "published" -> m.publicationDate.getOrElse("Unknown")) + }.await) + case None => + log.warning(s"Tried to push POM file results to database, but no results are present for identifier: ${m.identifier}") + None + } + + + case None => + log.warning(s"Tried to push POM file results for non-existing identifier: ${m.identifier}.") + None + } + } + def store(g: GitIdentifier)(implicit client: ElasticClient, log: LoggingAdapter): Response[IndexResponse] = { log.info("Pushing new git identifier to elastic: [{}]", g) client.execute { diff --git a/src/main/scala/de/upb/cs/swt/delphi/crawler/tools/HttpDownloader.scala b/src/main/scala/de/upb/cs/swt/delphi/crawler/tools/HttpDownloader.scala index 452b6cf..f43aec5 100644 --- a/src/main/scala/de/upb/cs/swt/delphi/crawler/tools/HttpDownloader.scala +++ b/src/main/scala/de/upb/cs/swt/delphi/crawler/tools/HttpDownloader.scala @@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit import akka.actor.ActorSystem import akka.http.scaladsl.Http -import akka.http.scaladsl.model.{HttpRequest, HttpResponse, StatusCodes} +import akka.http.scaladsl.model.{HttpHeader, HttpRequest, HttpResponse, StatusCodes} import akka.stream.ActorMaterializer import akka.stream.scaladsl.{Sink, StreamConverters} import akka.util.ByteString @@ -48,4 +48,20 @@ class HttpDownloader(implicit val system: ActorSystem) { Failure(new HttpException(code)) } } + + def downloadFromUriWithHeaders(requestedUri: String): Try[(InputStream, Seq[HttpHeader])] = { + val responseFuture: Future[HttpResponse] = + Http().singleRequest(HttpRequest(uri = requestedUri)) + + + Await.result(responseFuture, Duration.Inf) match { + case HttpResponse(StatusCodes.OK, headers, entity, _) => + Try(( + new ByteArrayInputStream(Await.result(entity.dataBytes.runFold(ByteString.empty)(_ ++ _).map(_.toArray), Duration.Inf)), + headers)) + case resp@HttpResponse(code, _, _, _) => + resp.discardEntityBytes() + Failure(new HttpException(code)) + } + } } diff --git a/src/test/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActorTest.scala b/src/test/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActorTest.scala index 022369e..f8cd7c1 100644 --- a/src/test/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActorTest.scala +++ b/src/test/scala/de/upb/cs/swt/delphi/crawler/preprocessing/MavenDownloadActorTest.scala @@ -44,7 +44,7 @@ class MavenDownloadActorTest extends TestKit(ActorSystem("DownloadActor")) "The maven download actor" must { "create a maven artifact with a jar and pom file" in { - val mavenIdentifier = new MavenIdentifier("http://central.maven.org/maven2/", "junit", "junit", "4.12") + val mavenIdentifier = new MavenIdentifier("https://repo1.maven.org/maven2/", "junit", "junit", "4.12") val downloadActor = system.actorOf(MavenDownloadActor.props) implicit val timeout = Timeout(10 seconds) @@ -59,7 +59,8 @@ class MavenDownloadActorTest extends TestKit(ActorSystem("DownloadActor")) checkJar(artifact.jarFile.is) checkPom(artifact.pomFile.is) - + assert(artifact.metadata.isEmpty) + assert(artifact.publicationDate.isDefined && artifact.publicationDate.get != null) } } } diff --git a/src/test/scala/de/upb/cs/swt/delphi/crawler/processing/PomFileReadActorTest.scala b/src/test/scala/de/upb/cs/swt/delphi/crawler/processing/PomFileReadActorTest.scala new file mode 100644 index 0000000..2fd338a --- /dev/null +++ b/src/test/scala/de/upb/cs/swt/delphi/crawler/processing/PomFileReadActorTest.scala @@ -0,0 +1,98 @@ +// Copyright (C) 2018 The Delphi Team. +// See the LICENCE file distributed with this work for additional +// information regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package de.upb.cs.swt.delphi.crawler.processing + +import akka.actor.ActorSystem +import akka.pattern.ask +import akka.testkit.{ImplicitSender, TestKit} +import akka.util.Timeout +import de.upb.cs.swt.delphi.crawler.Configuration +import de.upb.cs.swt.delphi.crawler.discovery.maven.MavenIdentifier +import de.upb.cs.swt.delphi.crawler.preprocessing.{ArtifactDependency, MavenArtifact, MavenDownloadActor} +import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} + +import scala.concurrent.duration._ +import scala.concurrent.{Await, ExecutionContext} +import scala.util.Success + +class PomFileReadActorTest extends TestKit(ActorSystem("DownloadActor")) + with ImplicitSender + with WordSpecLike + with Matchers + with BeforeAndAfterAll { + + final val RepoUrl = new Configuration().mavenRepoBase.toString + + private def readPomFileFor(identifier: MavenIdentifier): MavenArtifact = { + val downloadActor = system.actorOf(MavenDownloadActor.props) + val readerActor = system.actorOf(PomFileReadActor.props(new Configuration())) + + implicit val timeout: Timeout = Timeout(10 seconds) + implicit val ec: ExecutionContext = system.dispatcher + + val f = downloadActor ? identifier + + val msg = Await.result(f, 10 seconds) + + assert(msg.isInstanceOf[Success[MavenArtifact]]) + val artifact = msg.asInstanceOf[Success[MavenArtifact]].get + + assert(artifact.metadata.isEmpty) + assert(artifact.publicationDate.isDefined && artifact.publicationDate.get != null) + + val result = Await.result(readerActor ? artifact, 10 seconds) + assert(result.isInstanceOf[MavenArtifact]) + result.asInstanceOf[MavenArtifact] + } + + "The POM file reader actor " must { + "create a maven artifact with valid metadata" in { + val annotatedArtifact = readPomFileFor(MavenIdentifier(RepoUrl, "junit", "junit", "4.12")) + + assert(annotatedArtifact.metadata.isDefined) + val metadata = annotatedArtifact.metadata.get + + assert(metadata.name != null && metadata.name.equals("JUnit")) + assert(metadata.description != null && metadata.description.startsWith("JUnit is a unit testing framework for Java,")) + + assert(metadata.issueManagement.isDefined) + assertResult("https://github.com/junit-team/junit/issues")(metadata.issueManagement.get.url) + assertResult("github")(metadata.issueManagement.get.system) + + assertResult(4)(metadata.developers.size) + + assertResult(1)(metadata.licenses.size) + assertResult("Eclipse Public License 1.0")(metadata.licenses.head.name) + } + + "process dependencies as expected" in { + val annotatedArtifact = readPomFileFor(MavenIdentifier(RepoUrl, "org.apache.bookkeeper", "bookkeeper-server", "4.9.2")) + + val dependencies = annotatedArtifact.metadata.get.dependencies + + assertResult(25)(dependencies.size) + assertResult(9)(dependencies.count(_.identifier.version == "4.9.2")) + // Version is local POM reference + assert(dependencies.contains(ArtifactDependency(MavenIdentifier(RepoUrl,"org.apache.bookkeeper", "circe-checksum", "4.9.2"), None))) + // Version in a variable which is defined in parent POM + assert(dependencies.contains(ArtifactDependency(MavenIdentifier(RepoUrl,"org.apache.kerby", "kerby-config", "1.1.1"), Some("test")))) + // Version is not defined in local POM, and must be derived from parent POM + assert(dependencies.contains(ArtifactDependency(MavenIdentifier(RepoUrl,"commons-codec", "commons-codec", "1.6"), None))) + } + } + +}