diff --git a/pom.xml b/pom.xml
index 94cf72e..22e30f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,31 +1,41 @@
4.0.0
org.warcbase
warcbase
pom
0.1.0-SNAPSHOT
Warcbase
- An open-source platform for managing web archives built on Hadoop and HBase
+ An open-source platform for managing and analyzing web archives
http://warcbase.org/
+
+ UTF-8
+ UTF-8
+ 2.10.5
+ 2.6.0-cdh5.4.1
+ 1.3.0-cdh5.4.1
+ 1.0.0-cdh5.4.1
+ 3.4.5-cdh5.4.1
+
+
The Apache Software License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0.txt
repo
scm:git:git@github.com:lintool/warcbase.git
scm:git:git@github.com:lintool/warcbase.git
git@github.com:lintool/warcbase.git
warcbase-core
warcbase-hbase
diff --git a/warcbase-core/pom.xml b/warcbase-core/pom.xml
index db54b2c..5e38055 100644
--- a/warcbase-core/pom.xml
+++ b/warcbase-core/pom.xml
@@ -1,447 +1,228 @@
org.warcbase
warcbase
0.1.0-SNAPSHOT
4.0.0
org.warcbase
warcbase-core
jar
0.1.0-SNAPSHOT
- Warcbase
- An open-source platform for managing web archives built on Hadoop and HBase
+ Warcbase (Core)
+ An open-source platform for managing and analyzing web archives
http://warcbase.org/
The Apache Software License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0.txt
repo
scm:git:git@github.com:lintool/warcbase.git
scm:git:git@github.com:lintool/warcbase.git
git@github.com:lintool/warcbase.git
-
-
- lintool
- Jimmy Lin
- jimmylin@umd.edu
-
-
- milad621
- Milad Gholami
- mgholami@cs.umd.edu
-
-
- jeffyRao
- Jinfeng Rao
- jinfeng@cs.umd.edu
-
-
-
-
- UTF-8
- UTF-8
- 8.1.12.v20130726
- 2.6.0-cdh5.4.1
- 1.0.0-cdh5.4.1
- 3.4.5-cdh5.4.1
- 1.3.0-cdh5.4.1
- 2.10.4
-
-
-
-
-
org.apache.maven.plugins
maven-compiler-plugin
3.2
1.7
org.apache.maven.plugins
maven-shade-plugin
2.3
package
shade
-
-
-
-
- META-INF/services/org.apache.lucene.codecs.Codec
-
+
+
+ META-INF/services/org.apache.lucene.codecs.Codec
+
+
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
true
fatjar
org.apache.hadoop:*
+ org.apache.spark:*
-
-
-
- org.codehaus.mojo
- appassembler-maven-plugin
- 1.9
-
- -Xms512M -Xmx24576M
-
-
- org.warcbase.WarcbaseAdmin
- WarcbaseAdmin
-
-
- org.warcbase.data.UrlMappingBuilder
- UrlMappingBuilder
-
-
- org.warcbase.data.UrlMapping
- UrlMapping
-
-
- org.warcbase.data.ExtractLinks
- ExtractLinks
-
-
- org.warcbase.data.ExtractSiteLinks
- ExtractSiteLinks
-
-
- org.warcbase.ingest.IngestFiles
- IngestFiles
-
-
- org.warcbase.ingest.SearchForUrl
- SearchForUrl
-
-
- org.warcbase.browser.WarcBrowser
- WarcBrowser
-
-
- org.warcbase.analysis.DetectDuplicates
- DetectDuplicates
-
-
- org.warcbase.browser.SeleniumBrowser
- SeleniumBrowser
-
-
-
-
org.scala-tools
maven-scala-plugin
2.15.2
process-resources
add-source
compile
scala-test-compile
process-test-resources
testCompile
${scala.version}
true
-target:jvm-1.7
-g:vars
-deprecation
-dependencyfile
${project.build.directory}/.scala_dependencies
maven
http://repo.maven.apache.org/maven2/
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/
internetarchive
Internet Archive Maven Repository
http://builds.archive.org:8080/maven2
junit
junit
4.12
test
org.scalatest
scalatest_2.10
- 2.2.4
+ 2.2.5
test
- org.jsoup
- jsoup
- 1.7.3
+ org.scala-lang
+ scala-library
+ ${scala.version}
+
+
+ com.chuusai
+ shapeless_2.10.5
+ 2.0.0
+
+
+
+ org.apache.spark
+ spark-core_2.10
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-graphx_2.10
+ ${spark.version}
com.google.guava
guava
14.0.1
-
+
- tl.lin
- lintools-datatypes
- 1.0.0
+ org.xerial.snappy
+ snappy-java
+ 1.0.5
-
-
- org.apache.hadoop
- hadoop-client
- ${hadoop.version}
-
- javax.servletservlet-api
-
+ org.jsoup
+ jsoup
+ 1.7.3
-
-
org.netpreserve.openwayback
openwayback-core
2.0.0.BETA.2
-
- org.apache.hadoophadoop-core
- ch.qos.logbacklogback-classic
- org.netpreserve.openwaybackopenwayback-cdx-server
- org.netpreserve.openwaybackopenwayback-access-control-core
-
- it.unimi.dsidsiutils
- fastutilfastutil
-
org.netpreserve.commons
webarchive-commons
1.1.4
-
- org.apache.hadoophadoop-core
- commons-langcommons-lang
- fastutilfastutil
-
-
-
-
- org.apache.commons
- commons-lang3
- 3.0
-
-
- commons-codec
- commons-codec
- 1.8
-
-
- commons-io
- commons-io
- 2.4
-
-
- commons-cli
- commons-cli
- 1.2
-
org.apache.tika
tika-core
1.9
org.apache.tika
tika-parsers
1.9
- org.antlr
- antlr
- 3.5.2
-
-
-
- org.scala-lang
- scala-library
- 2.10.4
-
-
- org.apache.spark
- spark-core_2.10
- ${spark.version}
-
- com.typesafeconfig
- org.xerial.snappysnappy-java
-
-
-
-
- org.apache.spark
- spark-graphx_2.10
- ${spark.version}
-
-
-
- com.chuusai
- shapeless_2.10.4
- 2.0.0
-
-
- com.fasterxml.jackson.core
- jackson-core
- 2.7.2
-
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.7.2
-
-
- org.json4s
- json4s-jackson_2.10
- 3.2.10
-
-
-
-
- com.typesafe
- config
- 1.2.1
-
-
-
-
-
- edu.stanford.nlp
- stanford-corenlp
- 3.4.1
-
-
-
- com.syncthemall
- boilerpipe
- 1.2.2
-
-
-
- xerces
- xercesImpl
- 2.11.0
+ tl.lin
+ lintools-datatypes
+ 1.0.0
-
diff --git a/warcbase-hbase/pom.xml b/warcbase-hbase/pom.xml
index f367b80..b2b25e4 100644
--- a/warcbase-hbase/pom.xml
+++ b/warcbase-hbase/pom.xml
@@ -1,327 +1,250 @@
org.warcbase
warcbase
0.1.0-SNAPSHOT
4.0.0
org.warcbase
warcbase-hbase
jar
0.1.0-SNAPSHOT
- Warcbase
- An open-source platform for managing web archives built on Hadoop and HBase
+ Warcbase (HBase)
+ An open-source platform for managing and analyzing web archives
http://warcbase.org/
The Apache Software License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0.txt
repo
scm:git:git@github.com:lintool/warcbase.git
scm:git:git@github.com:lintool/warcbase.git
git@github.com:lintool/warcbase.git
-
-
- lintool
- Jimmy Lin
- jimmylin@umd.edu
-
-
- milad621
- Milad Gholami
- mgholami@cs.umd.edu
-
-
- jeffyRao
- Jinfeng Rao
- jinfeng@cs.umd.edu
-
-
-
-
- UTF-8
- UTF-8
- 8.1.12.v20130726
- 2.6.0-cdh5.4.1
- 1.0.0-cdh5.4.1
- 3.4.5-cdh5.4.1
- 1.3.0-cdh5.4.1
- 2.10.4
-
-
-
-
- maven-clean-plugin
- 2.6.1
-
-
-
- src/main/solr/lib
- false
-
-
-
-
-
org.apache.maven.plugins
maven-compiler-plugin
3.2
1.7
org.apache.maven.plugins
maven-shade-plugin
2.3
package
shade
-
-
-
-
- META-INF/services/org.apache.lucene.codecs.Codec
-
+
+
+ META-INF/services/org.apache.lucene.codecs.Codec
+
+
+
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
true
fatjar
org.apache.hadoop:*
-
- org.apache.maven.plugins
- maven-dependency-plugin
- 2.4
-
-
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ 2.4
+
+
copy
package
copy-dependencies
src/main/solr/lib
-
-
-
+
+
+
org.codehaus.mojo
appassembler-maven-plugin
1.9
-Xms512M -Xmx24576M
org.warcbase.WarcbaseAdmin
WarcbaseAdmin
org.warcbase.data.UrlMappingBuilder
UrlMappingBuilder
org.warcbase.data.UrlMapping
UrlMapping
org.warcbase.data.ExtractLinks
ExtractLinks
org.warcbase.data.ExtractSiteLinks
ExtractSiteLinks
org.warcbase.ingest.IngestFiles
IngestFiles
org.warcbase.ingest.SearchForUrl
SearchForUrl
org.warcbase.browser.WarcBrowser
WarcBrowser
org.warcbase.analysis.DetectDuplicates
DetectDuplicates
org.warcbase.browser.SeleniumBrowser
SeleniumBrowser
org.scala-tools
maven-scala-plugin
2.15.2
process-resources
add-source
compile
scala-test-compile
process-test-resources
testCompile
${scala.version}
true
-target:jvm-1.7
-g:vars
-deprecation
-dependencyfile
${project.build.directory}/.scala_dependencies
maven
http://repo.maven.apache.org/maven2/
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/
internetarchive
Internet Archive Maven Repository
http://builds.archive.org:8080/maven2
org.warcbase
warcbase-core
0.1.0-SNAPSHOT
org.apache.hbase
hbase-client
${hbase.version}
-
- org.apache.hadoophadoop-core
-
org.apache.hbase
hbase-server
${hbase.version}
-
- org.apache.hadoophadoop-core
- org.mortbay.jettyservlet-api-2.5
- javax.servletservlet-api
- asmasm
-
-
-
-
- org.apache.zookeeper
- zookeeper
- ${zookeeper.version}
uk.bl.wa.discovery
warc-hadoop-indexer
2.2.0-BETA-5
-
- asmasm
- com.typesafeconfig
-
org.apache.lucene
lucene-core
4.7.2
org.apache.solr
solr-core
4.7.2
-
- slf4j-apiorg.slf4j
- org.apache.hadoophadoop-annotations
- org.apache.hadoophadoop-common
- org.apache.hadoophadoop-hdfs
- com.typesafeconfig
-
org.seleniumhq.selenium
selenium-java
2.42.2
-
- org.seleniumhq.seleniumselenium-htmlunit-driver
- org.seleniumhq.seleniumselenium-ie-driver
- org.webbitserverwebbit
-
-