diff --git a/bin/ycsb b/bin/ycsb index 1f836486..d18de33c 100755 --- a/bin/ycsb +++ b/bin/ycsb @@ -1,225 +1,226 @@ #!/usr/bin/env python # # Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You # may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. See accompanying # LICENSE file. # import argparse import fnmatch import io import os import shlex import sys import subprocess BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" COMMANDS = { "shell" : { "command" : "", "description" : "Interactive mode", "main" : "com.yahoo.ycsb.CommandLine", }, "load" : { "command" : "-load", "description" : "Execute the load phase", "main" : "com.yahoo.ycsb.Client", }, "run" : { "command" : "-t", "description" : "Execute the transaction phase", "main" : "com.yahoo.ycsb.Client", }, } DATABASES = { "accumulo" : "com.yahoo.ycsb.db.AccumuloClient", "aerospike" : "com.yahoo.ycsb.db.AerospikeClient", "basic" : "com.yahoo.ycsb.BasicDB", "cassandra-7" : "com.yahoo.ycsb.db.CassandraClient7", "cassandra-8" : "com.yahoo.ycsb.db.CassandraClient8", "cassandra-10" : "com.yahoo.ycsb.db.CassandraClient10", "cassandra-cql": "com.yahoo.ycsb.db.CassandraCQLClient", "couchbase" : "com.yahoo.ycsb.db.CouchbaseClient", "dynamodb" : "com.yahoo.ycsb.db.DynamoDBClient", "elasticsearch": "com.yahoo.ycsb.db.ElasticSearchClient", "gemfire" : "com.yahoo.ycsb.db.GemFireClient", - "hbase" : "com.yahoo.ycsb.db.HBaseClient", - "hbase1" : "com.yahoo.ycsb.db.HBaseClient10", + "hbase094" : "com.yahoo.ycsb.db.HBaseClient", + "hbase098" : "com.yahoo.ycsb.db.HBaseClient", + "hbase10" : "com.yahoo.ycsb.db.HBaseClient10", "hypertable" : "com.yahoo.ycsb.db.HypertableClient", "infinispan-cs": "com.yahoo.ycsb.db.InfinispanRemoteClient", "infinispan" : "com.yahoo.ycsb.db.InfinispanClient", "jdbc" : "com.yahoo.ycsb.db.JdbcDBClient", "mapkeeper" : "com.yahoo.ycsb.db.MapKeeperClient", "mongodb" : "com.yahoo.ycsb.db.MongoDbClient", "mongodb-async": "com.yahoo.ycsb.db.AsyncMongoDbClient", "nosqldb" : "com.yahoo.ycsb.db.NoSqlDbClient", "orientdb" : "com.yahoo.ycsb.db.OrientDBClient", "redis" : "com.yahoo.ycsb.db.RedisClient", "tarantool" : "com.yahoo.ycsb.db.TarantoolClient", "voldemort" : "com.yahoo.ycsb.db.VoldemortClient" } OPTIONS = { "-P file" : "Specify workload file", "-p key=value" : "Override workload property", "-s" : "Print status to stderr", "-target n" : "Target ops/sec (default: unthrottled)", "-threads n" : "Number of client threads (default: 1)", "-cp path" : "Additional Java classpath entries", "-jvm-args args" : "Additional arguments to the JVM", } def usage(): output = io.BytesIO() print >> output, "%s command database [options]" % sys.argv[0] print >> output, "\nCommands:" for command in sorted(COMMANDS.keys()): print >> output, " %s %s" % (command.ljust(14), COMMANDS[command]["description"]) print >> output, "\nDatabases:" for db in sorted(DATABASES.keys()): print >> output, " %s %s" % (db.ljust(14), BASE_URL + db.split("-")[0]) print >> output, "\nOptions:" for option in sorted(OPTIONS.keys()): print >> output, " %s %s" % (option.ljust(14), OPTIONS[option]) print >> output, """\nWorkload Files: There are various predefined workloads under workloads/ directory. See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties for the list of workload properties.""" return output.getvalue() def debug(message): print >> sys.stderr, "[DEBUG] ", message def warn(message): print >> sys.stderr, "[WARN] ", message def error(message): print >> sys.stderr, "[ERROR] ", message def find_jars(dir, glob='*.jar'): jars = [] for (dirpath, dirnames, filenames) in os.walk(dir): for filename in fnmatch.filter(filenames, glob): jars.append(os.path.join(dirpath, filename)) return jars def get_ycsb_home(): dir = os.path.abspath(os.path.dirname(sys.argv[0])) while "LICENSE.txt" not in os.listdir(dir): dir = os.path.join(dir, os.path.pardir) return os.path.abspath(dir) def is_distribution(): # If there's a top level pom, we're a source checkout. otherwise a dist artifact return "pom.xml" not in os.listdir(get_ycsb_home()) # Run the maven dependency plugin to get the local jar paths. # presumes maven can run, so should only be run on source checkouts # will invoke the 'package' goal for the given binding in order to resolve intra-project deps # presumes maven properly handles system-specific path separators def get_classpath_from_maven(database): try: debug("Running 'mvn -pl com.yahoo.ycsb:"+database+"-binding -am package -DskipTests " "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") mvn_output = subprocess.check_output(["mvn", "-pl", "com.yahoo.ycsb:"+database+"-binding", "-am", "package", "-DskipTests", "dependency:build-classpath", "-DincludeScope=compile", "-Dmdep.outputFilterFile=true"]) # the above outputs a "classpath=/path/tojar:/path/to/other/jar" for each module # the last module will be the datastore binding line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] return line[0][len("classpath="):] except subprocess.CalledProcessError, err: error("Attempting to generate a classpath from Maven failed " "with return code '" + str(err.returncode) + "'. The output from " "Maven follows, try running " "'mvn -DskipTests package dependency:build=classpath' on your " "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + err.output) sys.exit(err.returncode) def main(): p = argparse.ArgumentParser( usage=usage(), formatter_class=argparse.RawDescriptionHelpFormatter) p.add_argument('-cp', dest='classpath', help="""Additional classpath entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be prepended to the YCSB classpath.""") p.add_argument("-jvm-args", default=[], type=shlex.split, help="""Additional arguments to pass to 'java', e.g. '-Xmx4g'""") p.add_argument("command", choices=sorted(COMMANDS), help="""Command to run.""") p.add_argument("database", choices=sorted(DATABASES), help="""Database to test.""") args, remaining = p.parse_known_args() ycsb_home = get_ycsb_home() # Use JAVA_HOME to find java binary if set, otherwise just use PATH. java = "java" java_home = os.getenv("JAVA_HOME") if java_home: java = os.path.join(java_home, "bin", "java") db_classname = DATABASES[args.database] command = COMMANDS[args.command]["command"] main_classname = COMMANDS[args.command]["main"] # Classpath set up binding = args.database.split("-")[0] if is_distribution(): db_dir = os.path.join(ycsb_home, binding + "-binding") # include top-level conf for when we're a binding-specific artifact. # If we add top-level conf to the general artifact, starting here # will allow binding-specific conf to override (because it's prepended) cp = [os.path.join(ycsb_home, "conf")] cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) cp.extend(find_jars(os.path.join(db_dir, "lib"))) else: warn("Running against a source checkout. In order to get our runtime " "dependencies we'll have to invoke Maven. Depending on the state " "of your system, this may take ~30-45 seconds") db_dir = os.path.join(ycsb_home, binding) # goes first so we can rely on side-effect of package maven_says = get_classpath_from_maven(binding) # TODO when we have a version property, skip the glob cp = find_jars(os.path.join(db_dir, "target"), binding + "-binding*.jar") # alredy in jar:jar:jar form cp.append(maven_says) cp.insert(0, os.path.join(db_dir, "conf")) classpath = os.pathsep.join(cp) if args.classpath: classpath = os.pathsep.join([args.classpath, classpath]) ycsb_command = ([java] + args.jvm_args + ["-cp", classpath, main_classname, "-db", db_classname] + remaining) if command: ycsb_command.append(command) print >> sys.stderr, " ".join(ycsb_command) return subprocess.call(ycsb_command) if __name__ == '__main__': sys.exit(main()) diff --git a/distribution/pom.xml b/distribution/pom.xml index 66675125..82592028 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -1,156 +1,161 @@ 4.0.0 com.yahoo.ycsb root 0.4.0-SNAPSHOT ycsb YCSB Release Distribution Builder pom This module creates the release package of the YCSB with all DB library bindings. It is only used by the build process and does not contain any real code of itself. com.yahoo.ycsb core ${project.version} com.yahoo.ycsb accumulo-binding ${project.version} com.yahoo.ycsb aerospike-binding ${project.version} com.yahoo.ycsb cassandra-binding ${project.version} com.yahoo.ycsb couchbase-binding ${project.version} com.yahoo.ycsb dynamodb-binding ${project.version} com.yahoo.ycsb elasticsearch-binding ${project.version} com.yahoo.ycsb gemfire-binding ${project.version} com.yahoo.ycsb - hbase-binding + hbase094-binding ${project.version} com.yahoo.ycsb - hbase1-binding + hbase098-binding + ${project.version} + + + com.yahoo.ycsb + hbase10-binding ${project.version} com.yahoo.ycsb hypertable-binding ${project.version} com.yahoo.ycsb infinispan-binding ${project.version} com.yahoo.ycsb jdbc-binding ${project.version} com.yahoo.ycsb mongodb-binding ${project.version} com.yahoo.ycsb orientdb-binding ${project.version} com.yahoo.ycsb redis-binding ${project.version} com.yahoo.ycsb tarantool-binding ${project.version} org.apache.maven.plugins maven-assembly-plugin ${maven.assembly.version} src/main/assembly/distribution.xml false package single diff --git a/hbase094/README.md b/hbase094/README.md new file mode 100644 index 00000000..3df8264c --- /dev/null +++ b/hbase094/README.md @@ -0,0 +1,23 @@ + + +# HBase (0.94.x) Driver for YCSB +This driver is a binding for the YCSB facilities to operate against a HBase 0.94.x Server cluster. It may also work against 0.92.x +To run against an HBase 0.98.x cluster, use the `hbase098` binding. +To run against an HBase >= 1.0 cluster, use the `hbase10` binding. + +See `hbase098/README.md` for configuration details. diff --git a/hbase1/pom.xml b/hbase094/pom.xml similarity index 58% copy from hbase1/pom.xml copy to hbase094/pom.xml index 6d5ea1fd..3a3ea06e 100644 --- a/hbase1/pom.xml +++ b/hbase094/pom.xml @@ -1,44 +1,68 @@ - 4.0.0 com.yahoo.ycsb binding-parent 0.4.0-SNAPSHOT ../binding-parent/ - hbase1-binding - HBase 1.0 DB Binding + hbase094-binding + HBase 0.94.x DB Binding + org.apache.hbase - hbase-client - ${hbase1.version} + hbase + ${hbase094.version} + + + org.apache.hadoop + hadoop-core + 1.0.4 + + + com.yahoo.ycsb + hbase098-binding + ${project.version} + + + + * + * + + + + + org.slf4j + slf4j-simple + 1.7.12 com.yahoo.ycsb core ${project.version} provided diff --git a/hbase/README.md b/hbase098/README.md similarity index 92% rename from hbase/README.md rename to hbase098/README.md index 9bc01c4c..fc75c626 100644 --- a/hbase/README.md +++ b/hbase098/README.md @@ -1,72 +1,73 @@ -# HBase (0.9x) Driver for YCSB -This driver is a binding for the YCSB facilities to operate against a HBase 0.9x Server cluster. -To run against an HBase >= 1.0 cluster, use the `hbase1` binding. +# HBase (0.98.x) Driver for YCSB +This driver is a binding for the YCSB facilities to operate against a HBase 0.98.x Server cluster. +To run against an HBase 0.94.x cluster, use the `hbase094` binding. +To run against an HBase >= 1.0 cluster, use the `hbase10` binding. ## Quickstart ### 1. Start a HBase Server You need to start a single node or a cluster to point the client at. Please see [Apache HBase Reference Guide](http://hbase.apache.org/book.html) for more details and instructions. ### 2. Set up YCSB You need to clone the repository and compile everything. ``` git clone git://github.com/brianfrankcooper/YCSB.git cd YCSB mvn clean package ``` ### 3. Create a HBase table for testing For best results, use the pre-splitting strategy recommended in [HBASE-4163](https://issues.apache.org/jira/browse/HBASE-4163): ``` hbase(main):001:0> n_splits = 200 # HBase recommends (10 * number of regionservers) hbase(main):002:0> create 'usertable', 'family', {SPLITS => (1..n_splits).map {|i| "user#{1000+i*(9999-1000)/n_splits}"}} ``` *Failing to do so will cause all writes to initially target a single region server*. ### 4. Run the Workload Before you can actually run the workload, you need to "load" the data first. You should specify a HBase config directory(or any other directory containing your hbase-site.xml) and a table name and a column family(-cp is used to set java classpath and -p is used to set various properties). ``` bin/ycsb load hbase -P workloads/workloada -cp /HBASE-HOME-DIR/conf -p table=usertable -p columnfamily=family ``` Then, you can run the workload: ``` bin/ycsb run hbase -P workloads/workloada -cp /HBASE-HOME-DIR/conf -p table=usertable -p columnfamily=family ``` Please see the general instructions in the `doc` folder if you are not sure how it all works. You can apply additional properties (as seen in the next section) like this: ``` bin/ycsb run hbase -P workloads/workloada -cp /HBASE-HOME-DIR/conf -p table=usertable -p columnfamily=family -p clientbuffering=true ``` ## Configuration Options Following options can be configurable using `-p`. * `columnfamily`: The HBase column family to target. * `debug` : If true, debugging logs are activated. The default is false. diff --git a/hbase/pom.xml b/hbase098/pom.xml similarity index 91% rename from hbase/pom.xml rename to hbase098/pom.xml index 1d42f71b..627f31e8 100644 --- a/hbase/pom.xml +++ b/hbase098/pom.xml @@ -1,44 +1,44 @@ - 4.0.0 com.yahoo.ycsb binding-parent 0.4.0-SNAPSHOT ../binding-parent/ - hbase-binding - HBase 0.9x DB Binding + hbase098-binding + HBase 0.98.x DB Binding org.apache.hbase hbase-client - ${hbase.version} + ${hbase098.version} com.yahoo.ycsb core ${project.version} provided diff --git a/hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient.java b/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java similarity index 100% rename from hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient.java rename to hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java diff --git a/hbase1/README.md b/hbase10/README.md similarity index 100% rename from hbase1/README.md rename to hbase10/README.md diff --git a/hbase1/pom.xml b/hbase10/pom.xml similarity index 94% rename from hbase1/pom.xml rename to hbase10/pom.xml index 6d5ea1fd..77693b16 100644 --- a/hbase1/pom.xml +++ b/hbase10/pom.xml @@ -1,44 +1,44 @@ - 4.0.0 com.yahoo.ycsb binding-parent 0.4.0-SNAPSHOT ../binding-parent/ - hbase1-binding + hbase10-binding HBase 1.0 DB Binding org.apache.hbase hbase-client - ${hbase1.version} + ${hbase10.version} com.yahoo.ycsb core ${project.version} provided diff --git a/hbase1/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java similarity index 100% rename from hbase1/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java rename to hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java diff --git a/pom.xml b/pom.xml index 9e15425f..c47cb468 100644 --- a/pom.xml +++ b/pom.xml @@ -1,152 +1,154 @@ 4.0.0 com.yahoo.ycsb root 0.4.0-SNAPSHOT pom YCSB Root This is the top level project that builds, packages the core and all the DB bindings for YCSB infrastructure. scm:git:git://github.com/brianfrankcooper/YCSB.git master https://github.com/brianfrankcooper/YCSB checkstyle checkstyle 5.0 org.jdom jdom 1.1 com.google.collections google-collections 1.0 org.slf4j slf4j-api 1.6.4 2.5.5 2.10 - 0.98.13-hadoop2 - 1.0.0 + 0.94.27 + 0.98.13-hadoop2 + 1.0.1.1 1.6.0 1.2.9 1.0.3 8.1.0 7.2.2.Final 2.1.1 3.0.3 2.0.1 1.0.1 2.0.0 0.81 UTF-8 0.8.0 0.9.5.6 1.1.8 1.6.1 3.1.2 core binding-parent accumulo aerospike cassandra couchbase distribution dynamodb elasticsearch gemfire - hbase - hbase1 + hbase094 + hbase098 + hbase10 hypertable infinispan jdbc mongodb orientdb redis tarantool org.apache.maven.plugins maven-compiler-plugin 3.3 1.6 1.6 org.apache.maven.plugins maven-checkstyle-plugin 2.15 true checkstyle.xml validate validate checkstyle