I’ve been using Spark for a few months now. It’s great for tasks that don’t fit easily into a Pig script. If you’ve already been writing Java for a while it can be a drag to switch everything over to sbt just for Spark.
Here’s a basic pom.xml that can build scala code (you’ll need maven 3) and include all dependencies into one giant jar.
<project xmlns= "http://maven.apache.org/POM/4.0.0"
xmlns:xsi= "http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation= "http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd" >
<modelVersion> 4.0.0</modelVersion>
<groupId> com.mycompany.app</groupId>
<artifactId> my-app</artifactId>
<version> 1.0-SNAPSHOT</version>
<packaging> jar</packaging>
<name> my-app</name>
<url> http://maven.apache.org</url>
<properties>
<maven.compiler.source> 1.6</maven.compiler.source>
<maven.compiler.target> 1.6</maven.compiler.target>
<encoding> UTF-8</encoding>
<scala.version> 2.10.4</scala.version>
</properties>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId> net.alchim31.maven</groupId>
<artifactId> scala-maven-plugin</artifactId>
<version> 3.1.5</version>
</plugin>
<plugin>
<groupId> org.apache.maven.plugins</groupId>
<artifactId> maven-compiler-plugin</artifactId>
<version> 2.0.2</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId> net.alchim31.maven</groupId>
<artifactId> scala-maven-plugin</artifactId>
<executions>
<execution>
<id> scala-compile-first</id>
<phase> process-resources</phase>
<goals>
<goal> add-source</goal>
<goal> compile</goal>
</goals>
</execution>
<execution>
<id> scala-test-compile</id>
<phase> process-test-resources</phase>
<goals>
<goal> testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Plugin to create a single jar that includes all dependencies -->
<plugin>
<artifactId> maven-assembly-plugin</artifactId>
<version> 2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef> jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id> make-assembly</id>
<phase> package</phase>
<goals>
<goal> single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId> org.scala-lang</groupId>
<artifactId> scala-library</artifactId>
<version> ${scala.version}</version>
</dependency>
</dependencies>
</project>