4.0.0 org.warcbase warcbase jar 0.1.0-SNAPSHOT Warcbase An open-source platform for managing web archives built on Hadoop and HBase http://warcbase.org/ The Apache Software License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt repo scm:git:git@github.com:lintool/warcbase.git scm:git:git@github.com:lintool/warcbase.git git@github.com:lintool/warcbase.git lintool Jimmy Lin jimmylin@umd.edu milad621 Milad Gholami mgholami@cs.umd.edu jeffyRao Jinfeng Rao jinfeng@cs.umd.edu org.sonatype.oss oss-parent 7 UTF-8 UTF-8 8.1.12.v20130726 2.6.0-cdh5.4.1 1.0.0-cdh5.4.1 3.4.5-cdh5.4.1 0.12.0-cdh5.4.1 1.3.0-cdh5.4.1 2.10.4 maven-clean-plugin 2.6.1 src/main/solr/lib false org.apache.maven.plugins maven-compiler-plugin 3.2 1.7 1.7 org.apache.maven.plugins maven-shade-plugin 2.3 package shade META-INF/services/org.apache.lucene.codecs.Codec *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA true fatjar org.apache.hadoop:* org.apache.maven.plugins maven-dependency-plugin 2.4 copy package copy-dependencies src/main/solr/lib org.codehaus.mojo appassembler-maven-plugin 1.9 -Xms512M -Xmx24576M org.warcbase.WarcbaseAdmin WarcbaseAdmin org.warcbase.data.UrlMappingBuilder UrlMappingBuilder org.warcbase.data.UrlMapping UrlMapping org.warcbase.data.ExtractLinks ExtractLinks org.warcbase.data.ExtractSiteLinks ExtractSiteLinks org.warcbase.ingest.IngestFiles IngestFiles org.warcbase.ingest.SearchForUrl SearchForUrl org.warcbase.browser.WarcBrowser WarcBrowser org.warcbase.analysis.DetectDuplicates DetectDuplicates org.warcbase.browser.SeleniumBrowser SeleniumBrowser org.scala-tools maven-scala-plugin 2.15.2 process-resources add-source compile scala-test-compile process-test-resources testCompile ${scala.version} true -target:jvm-1.7 -g:vars -deprecation -dependencyfile ${project.build.directory}/.scala_dependencies maven http://repo.maven.apache.org/maven2/ cloudera https://repository.cloudera.com/artifactory/cloudera-repos/ internetarchive Internet Archive Maven Repository http://builds.archive.org:8080/maven2 junit junit 4.12 test org.scalatest scalatest_2.10 2.2.4 test commons-codec commons-codec 1.8 commons-io commons-io 2.4 org.jsoup jsoup 1.7.3 com.google.guava guava 14.0.1 tl.lin lintools-datatypes 1.0.0 org.apache.hbase hbase-client ${hbase.version} org.apache.hadoophadoop-core org.apache.hbase hbase-server ${hbase.version} org.apache.hadoophadoop-core org.mortbay.jettyservlet-api-2.5 javax.servletservlet-api asmasm org.apache.hadoop hadoop-client ${hadoop.version} javax.servletservlet-api org.apache.zookeeper zookeeper ${zookeeper.version} org.apache.pig pig ${pig.version} org.mortbay.jettyservlet-api-2.5 javax.servletservlet-api org.apache.pig pigunit ${pig.version} commons-langcommons-lang commons-loggingcommons-logging org.netpreserve.openwayback openwayback-core 2.0.0.BETA.2 org.apache.hadoophadoop-core ch.qos.logbacklogback-classic org.netpreserve.openwaybackopenwayback-cdx-server org.netpreserve.openwaybackopenwayback-access-control-core it.unimi.dsidsiutils fastutilfastutil org.netpreserve.commons webarchive-commons 1.1.4 org.apache.hadoophadoop-core commons-langcommons-lang fastutilfastutil it.unimi.dsi dsiutils 2.2.0 ch.qos.logbacklogback-classic commons-langcommons-lang it.unimi.dsi fastutil 6.5.15 commons-langcommons-lang org.eclipse.jetty jetty-server ${jettyVersion} org.eclipse.jetty jetty-webapp ${jettyVersion} true org.slf4j slf4j-log4j12 1.6.4 org.apache.commons commons-lang3 3.0 commons-cli commons-cli 1.2 net.sf.opencsv opencsv 2.3 org.apache.tika tika-core 1.9 org.apache.tika tika-parsers 1.9 org.antlr antlr 3.5.2 org.seleniumhq.selenium selenium-java 2.42.2 org.seleniumhq.seleniumselenium-htmlunit-driver org.seleniumhq.seleniumselenium-ie-driver org.webbitserverwebbit org.scala-lang scala-library 2.10.4 org.apache.spark spark-core_2.10 ${spark.version} com.typesafeconfig com.typesafe config 1.2.1 edu.stanford.nlp stanford-corenlp 3.4.1 com.syncthemall boilerpipe 1.2.2 xerces xercesImpl 2.11.0 org.apache.lucene lucene-core 4.7.2 org.apache.solr solr-core 4.7.2 slf4j-apiorg.slf4j org.apache.hadoophadoop-annotations org.apache.hadoophadoop-common org.apache.hadoophadoop-hdfs com.typesafeconfig uk.bl.wa.discovery warc-hadoop-indexer 2.2.0-BETA-5 asmasm com.typesafeconfig