diff --git a/pom.xml b/pom.xml
index 152ed5f..94cf72e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,30 +1,31 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.warcbase</groupId>
   <artifactId>warcbase</artifactId>
   <packaging>pom</packaging>
   <version>0.1.0-SNAPSHOT</version>
   <name>Warcbase</name>
   <description>An open-source platform for managing web archives built on Hadoop and HBase</description>
   <url>http://warcbase.org/</url>
 
   <licenses>
     <license>
       <name>The Apache Software License, Version 2.0</name>
       <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
       <distribution>repo</distribution>
     </license>
   </licenses>
 
   <scm>
     <connection>scm:git:git@github.com:lintool/warcbase.git</connection>
     <developerConnection>scm:git:git@github.com:lintool/warcbase.git</developerConnection>
     <url>git@github.com:lintool/warcbase.git</url>
   </scm>
 
   <modules>
     <module>warcbase-core</module>
+    <module>warcbase-hbase</module>
   </modules>
 
 </project>
diff --git a/warcbase-hbase/pom.xml b/warcbase-hbase/pom.xml
index f33d240..f367b80 100644
--- a/warcbase-hbase/pom.xml
+++ b/warcbase-hbase/pom.xml
@@ -1,539 +1,327 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
   <parent>
     <groupId>org.warcbase</groupId>
     <artifactId>warcbase</artifactId>
     <version>0.1.0-SNAPSHOT</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.warcbase</groupId>
-  <artifactId>warcbase-core</artifactId>
+  <artifactId>warcbase-hbase</artifactId>
   <packaging>jar</packaging>
   <version>0.1.0-SNAPSHOT</version>
   <name>Warcbase</name>
   <description>An open-source platform for managing web archives built on Hadoop and HBase</description>
   <url>http://warcbase.org/</url>
 
   <licenses>
     <license>
       <name>The Apache Software License, Version 2.0</name>
       <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
       <distribution>repo</distribution>
     </license>
   </licenses>
 
   <scm>
     <connection>scm:git:git@github.com:lintool/warcbase.git</connection>
     <developerConnection>scm:git:git@github.com:lintool/warcbase.git</developerConnection>
     <url>git@github.com:lintool/warcbase.git</url>
   </scm>
 
   <developers>
     <developer>
       <id>lintool</id>
       <name>Jimmy Lin</name>
       <email>jimmylin@umd.edu</email>
     </developer>
     <developer>
       <id>milad621</id>
       <name>Milad Gholami</name>
       <email>mgholami@cs.umd.edu</email>
     </developer>
     <developer>
       <id>jeffyRao</id>
       <name>Jinfeng Rao</name>
       <email>jinfeng@cs.umd.edu</email>
     </developer>
   </developers>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
     <jettyVersion>8.1.12.v20130726</jettyVersion>
     <hadoop.version>2.6.0-cdh5.4.1</hadoop.version>
     <hbase.version>1.0.0-cdh5.4.1</hbase.version>
     <zookeeper.version>3.4.5-cdh5.4.1</zookeeper.version>
     <spark.version>1.3.0-cdh5.4.1</spark.version>    
     <scala.version>2.10.4</scala.version>
   </properties>
 
   <build>
     <plugins>
 
  <plugin>
     <artifactId>maven-clean-plugin</artifactId>
     <version>2.6.1</version>
     <configuration>
       <filesets>
         <fileset>
           <directory>src/main/solr/lib</directory>
           <followSymlinks>false</followSymlinks>
         </fileset>
       </filesets>
     </configuration>
   </plugin>
 
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
         <version>3.2</version>
         <configuration>
           <source>1.7</source>
           <target>1.7</target>
         </configuration>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
         <version>2.3</version>
         <executions>
           <execution>
             <phase>package</phase>
             <goals>
               <goal>shade</goal>
             </goals>
             <configuration>
 
 <!--
 http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130822094206.310452@isped.u-bordeaux2.fr%3E
 -->
 
 <transformers><transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
     <resource>META-INF/services/org.apache.lucene.codecs.Codec</resource></transformer></transformers>
 
 
               <!-- This fixes the issue "Invalid signature file digest for Manifest main attributes"
                    cf. http://zhentao-li.blogspot.com/2012/06/maven-shade-plugin-invalid-signature.html -->
               <filters>
                 <filter>
                   <artifact>*:*</artifact>
                   <excludes>
                     <exclude>META-INF/*.SF</exclude>
                     <exclude>META-INF/*.DSA</exclude>
                     <exclude>META-INF/*.RSA</exclude>
                   </excludes>
                 </filter>
               </filters>
               <!-- this will create both a normal thin jar and also a fatjar -->
               <shadedArtifactAttached>true</shadedArtifactAttached>
               <shadedClassifierName>fatjar</shadedClassifierName>
               <artifactSet>
                 <excludes>
                   <exclude>org.apache.hadoop:*</exclude>
                 </excludes>
               </artifactSet>
             </configuration>
           </execution>
         </executions>
       </plugin>
 
 <plugin>
     <groupId>org.apache.maven.plugins</groupId>
     <artifactId>maven-dependency-plugin</artifactId>
     <version>2.4</version>
     <executions>
         <execution>
             <id>copy</id>
             <phase>package</phase>
             <goals>
                 <goal>copy-dependencies</goal>
             </goals>
             <configuration>
                 <outputDirectory>
                     src/main/solr/lib
                 </outputDirectory>
             </configuration>
         </execution>
     </executions>
 </plugin>
 
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>appassembler-maven-plugin</artifactId>
         <version>1.9</version>
         <configuration>
           <extraJvmArguments>-Xms512M -Xmx24576M</extraJvmArguments>
           <programs>
             <program>
               <mainClass>org.warcbase.WarcbaseAdmin</mainClass>
               <name>WarcbaseAdmin</name>
             </program>
             <program>
               <mainClass>org.warcbase.data.UrlMappingBuilder</mainClass>
               <name>UrlMappingBuilder</name>
             </program>
             <program>
               <mainClass>org.warcbase.data.UrlMapping</mainClass>
               <name>UrlMapping</name>
             </program>
             <program>
               <mainClass>org.warcbase.data.ExtractLinks</mainClass>
               <name>ExtractLinks</name>
             </program>
             <program>
               <mainClass>org.warcbase.data.ExtractSiteLinks</mainClass>
               <name>ExtractSiteLinks</name>
             </program>
             <program>
               <mainClass>org.warcbase.ingest.IngestFiles</mainClass>
               <name>IngestFiles</name>
             </program>
             <program>
               <mainClass>org.warcbase.ingest.SearchForUrl</mainClass>
               <name>SearchForUrl</name>
             </program>
             <program>
               <mainClass>org.warcbase.browser.WarcBrowser</mainClass>
               <name>WarcBrowser</name>
             </program>
             <program>
               <mainClass>org.warcbase.analysis.DetectDuplicates</mainClass>
               <name>DetectDuplicates</name>
             </program>
             <program>
               <mainClass>org.warcbase.browser.SeleniumBrowser</mainClass>
               <name>SeleniumBrowser</name>
             </program>
           </programs>
         </configuration>
       </plugin>
       <!-- for Scala -->
       <plugin>
         <groupId>org.scala-tools</groupId>
         <artifactId>maven-scala-plugin</artifactId>
         <version>2.15.2</version>
         <executions>
           <execution>
             <phase>process-resources</phase>
             <goals>
               <goal>add-source</goal>
               <goal>compile</goal>
             </goals>
           </execution>
           <execution>
             <id>scala-test-compile</id>
             <phase>process-test-resources</phase>
             <goals>
               <goal>testCompile</goal>
             </goals>
           </execution>
         </executions>
         <configuration>
           <scalaVersion>${scala.version}</scalaVersion>
           <sendJavaToScalac>true</sendJavaToScalac>
           <args>
             <arg>-target:jvm-1.7</arg>
             <arg>-g:vars</arg>
             <arg>-deprecation</arg>
             <arg>-dependencyfile</arg>
             <arg>${project.build.directory}/.scala_dependencies</arg>
           </args>
         </configuration>
       </plugin>
     </plugins>
   </build>
 
   <repositories>
     <repository>
       <id>maven</id>
       <url>http://repo.maven.apache.org/maven2/</url>
     </repository>
     <repository>
       <id>cloudera</id>
       <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
     </repository>
     <repository>
       <id>internetarchive</id>
       <name>Internet Archive Maven Repository</name>
       <url>http://builds.archive.org:8080/maven2</url>
     </repository>
   </repositories>
 
   <dependencies>
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>4.12</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.10</artifactId>
-      <version>2.2.4</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <version>1.8</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>2.4</version>
-    </dependency>
-    <dependency>
-      <groupId>org.jsoup</groupId>
-      <artifactId>jsoup</artifactId>
-      <version>1.7.3</version>
-    </dependency>
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>14.0.1</version> 
-      <!-- downgrade for Hadoop WARC indexer, see also https://issues.apache.org/jira/browse/HADOOP-10961 -->
-    </dependency>
-    <dependency>
-      <groupId>tl.lin</groupId>
-      <artifactId>lintools-datatypes</artifactId>
-      <version>1.0.0</version>
+      <groupId>org.warcbase</groupId>
+      <artifactId>warcbase-core</artifactId>
+      <version>0.1.0-SNAPSHOT</version>
     </dependency>
 
-    <!-- Begin: Hadoop-related dependencies -->
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-client</artifactId>
       <version>${hbase.version}</version>
       <exclusions>
         <exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
       </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <exclusions>
         <exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
         <exclusion><groupId>org.mortbay.jetty</groupId><artifactId>servlet-api-2.5</artifactId></exclusion>
         <exclusion><groupId>javax.servlet</groupId><artifactId>servlet-api</artifactId></exclusion>
         <exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
       </exclusions>
     </dependency>
 
-    <!-- See http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/cdhvd_hadoop_api_dependencies.html -->
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <version>${hadoop.version}</version>
-      <exclusions>
-        <exclusion><groupId>javax.servlet</groupId><artifactId>servlet-api</artifactId></exclusion>
-      </exclusions>
-    </dependency>
-
     <dependency>
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
       <version>${zookeeper.version}</version>
     </dependency>
 
-    <!-- End: Hadoop-related dependencies -->
-
-    <dependency>
-      <groupId>org.netpreserve.openwayback</groupId>
-      <artifactId>openwayback-core</artifactId>
-      <version>2.0.0.BETA.2</version>
-      <exclusions>
-        <exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
-        <exclusion><groupId>ch.qos.logback</groupId><artifactId>logback-classic</artifactId></exclusion>
-        <exclusion><groupId>org.netpreserve.openwayback</groupId><artifactId>openwayback-cdx-server</artifactId></exclusion>
-        <exclusion><groupId>org.netpreserve.openwayback</groupId><artifactId>openwayback-access-control-core</artifactId></exclusion>
-        <!-- swap in our own latest versions -->
-        <exclusion><groupId>it.unimi.dsi</groupId><artifactId>dsiutils</artifactId></exclusion>
-        <exclusion><groupId>fastutil</groupId><artifactId>fastutil</artifactId></exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.netpreserve.commons</groupId>
-      <artifactId>webarchive-commons</artifactId>
-      <version>1.1.4</version>
-      <exclusions>
-        <exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
-        <exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
-        <exclusion><groupId>fastutil</groupId><artifactId>fastutil</artifactId></exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>dsiutils</artifactId>
-      <version>2.2.0</version>
-      <exclusions>
-        <exclusion><groupId>ch.qos.logback</groupId><artifactId>logback-classic</artifactId></exclusion>
-        <exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-      <version>6.5.15</version>
-      <exclusions>
-        <exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-      <version>${jettyVersion}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-webapp</artifactId>
-      <version>${jettyVersion}</version>
-      <optional>true</optional>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-log4j12</artifactId>
-      <version>1.6.4</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-      <version>3.0</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-cli</groupId>
-      <artifactId>commons-cli</artifactId>
-      <version>1.2</version>
-    </dependency>
-
-    <dependency>
-      <groupId>net.sf.opencsv</groupId>
-      <artifactId>opencsv</artifactId>
-      <version>2.3</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>1.9</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-parsers</artifactId>
-      <version>1.9</version>
-    </dependency>
-
     <dependency>
-      <groupId>org.antlr</groupId>
-      <artifactId>antlr</artifactId>
-      <version>3.5.2</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.seleniumhq.selenium</groupId>
-      <artifactId>selenium-java</artifactId>
-      <version>2.42.2</version>
-      <exclusions>
-        <exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-htmlunit-driver</artifactId></exclusion>
-        <exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-ie-driver</artifactId></exclusion>
-        <exclusion><groupId>org.webbitserver</groupId><artifactId>webbit</artifactId></exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>2.10.4</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.10</artifactId>
-      <version>${spark.version}</version>
+      <groupId>uk.bl.wa.discovery</groupId>
+      <artifactId>warc-hadoop-indexer</artifactId>
+      <version>2.2.0-BETA-5</version>
       <exclusions>
+        <exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
         <exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
-        <exclusion><groupId>org.xerial.snappy</groupId><artifactId>snappy-java</artifactId>
-      </exclusion>
       </exclusions>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-graphx_2.10</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.chuusai</groupId>
-      <artifactId>shapeless_2.10.4</artifactId>
-      <version>2.0.0</version>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-core</artifactId>
-      <version>2.7.2</version>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-      <version>2.7.2</version>
-    </dependency>
-    <dependency>
-      <groupId>org.json4s</groupId>
-      <artifactId>json4s-jackson_2.10</artifactId>
-      <version>3.2.10</version>
-      <!-- see this issue: http://stackoverflow.com/questions/32400061/spark-streaming-json4s-jackson-dependency-problems -->
-    </dependency>
-
-    <dependency>
-      <groupId>com.typesafe</groupId>
-      <artifactId>config</artifactId>
-      <version>1.2.1</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.xerial.snappy</groupId>
-      <artifactId>snappy-java</artifactId>
-      <version>1.0.5</version>
-    </dependency>
-
-    <dependency>
-      <groupId>edu.stanford.nlp</groupId>
-      <artifactId>stanford-corenlp</artifactId>
-      <version>3.4.1</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.syncthemall</groupId>
-      <artifactId>boilerpipe</artifactId>
-      <version>1.2.2</version>
-    </dependency>
-
-    <dependency>
-      <groupId>xerces</groupId>
-      <artifactId>xercesImpl</artifactId>
-      <version>2.11.0</version>
-    </dependency>
 
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
       <version>4.7.2</version>
     </dependency>
     <dependency>
       <groupId>org.apache.solr</groupId>
       <artifactId>solr-core</artifactId>
       <version>4.7.2</version>
       <exclusions>
         <exclusion><artifactId>slf4j-api</artifactId><groupId>org.slf4j</groupId></exclusion>
         <exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-annotations</groupId></exclusion>
         <exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-common</groupId></exclusion>
         <exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-hdfs</groupId></exclusion>
         <exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
       </exclusions>
     </dependency>
 
     <dependency>
-      <groupId>uk.bl.wa.discovery</groupId>
-      <artifactId>warc-hadoop-indexer</artifactId>
-      <version>2.2.0-BETA-5</version>
+      <groupId>org.seleniumhq.selenium</groupId>
+      <artifactId>selenium-java</artifactId>
+      <version>2.42.2</version>
       <exclusions>
-        <exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
-        <exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
+        <exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-htmlunit-driver</artifactId></exclusion>
+        <exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-ie-driver</artifactId></exclusion>
+        <exclusion><groupId>org.webbitserver</groupId><artifactId>webbit</artifactId></exclusion>
       </exclusions>
     </dependency>
 
+
   </dependencies>
 </project>