diff --git a/bin/ycsb b/bin/ycsb index 3f32ea9a..d18de33c 100755 --- a/bin/ycsb +++ b/bin/ycsb @@ -1,225 +1,226 @@ #!/usr/bin/env python # # Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You # may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. See accompanying # LICENSE file. # import argparse import fnmatch import io import os import shlex import sys import subprocess BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" COMMANDS = { "shell" : { "command" : "", "description" : "Interactive mode", "main" : "com.yahoo.ycsb.CommandLine", }, "load" : { "command" : "-load", "description" : "Execute the load phase", "main" : "com.yahoo.ycsb.Client", }, "run" : { "command" : "-t", "description" : "Execute the transaction phase", "main" : "com.yahoo.ycsb.Client", }, } DATABASES = { "accumulo" : "com.yahoo.ycsb.db.AccumuloClient", "aerospike" : "com.yahoo.ycsb.db.AerospikeClient", "basic" : "com.yahoo.ycsb.BasicDB", "cassandra-7" : "com.yahoo.ycsb.db.CassandraClient7", "cassandra-8" : "com.yahoo.ycsb.db.CassandraClient8", "cassandra-10" : "com.yahoo.ycsb.db.CassandraClient10", "cassandra-cql": "com.yahoo.ycsb.db.CassandraCQLClient", "couchbase" : "com.yahoo.ycsb.db.CouchbaseClient", "dynamodb" : "com.yahoo.ycsb.db.DynamoDBClient", "elasticsearch": "com.yahoo.ycsb.db.ElasticSearchClient", "gemfire" : "com.yahoo.ycsb.db.GemFireClient", - "hbase" : "com.yahoo.ycsb.db.HBaseClient", - "hbase-10" : "com.yahoo.ycsb.db.HBaseClient10", + "hbase094" : "com.yahoo.ycsb.db.HBaseClient", + "hbase098" : "com.yahoo.ycsb.db.HBaseClient", + "hbase10" : "com.yahoo.ycsb.db.HBaseClient10", "hypertable" : "com.yahoo.ycsb.db.HypertableClient", "infinispan-cs": "com.yahoo.ycsb.db.InfinispanRemoteClient", "infinispan" : "com.yahoo.ycsb.db.InfinispanClient", "jdbc" : "com.yahoo.ycsb.db.JdbcDBClient", "mapkeeper" : "com.yahoo.ycsb.db.MapKeeperClient", "mongodb" : "com.yahoo.ycsb.db.MongoDbClient", "mongodb-async": "com.yahoo.ycsb.db.AsyncMongoDbClient", "nosqldb" : "com.yahoo.ycsb.db.NoSqlDbClient", "orientdb" : "com.yahoo.ycsb.db.OrientDBClient", "redis" : "com.yahoo.ycsb.db.RedisClient", "tarantool" : "com.yahoo.ycsb.db.TarantoolClient", "voldemort" : "com.yahoo.ycsb.db.VoldemortClient" } OPTIONS = { "-P file" : "Specify workload file", "-p key=value" : "Override workload property", "-s" : "Print status to stderr", "-target n" : "Target ops/sec (default: unthrottled)", "-threads n" : "Number of client threads (default: 1)", "-cp path" : "Additional Java classpath entries", "-jvm-args args" : "Additional arguments to the JVM", } def usage(): output = io.BytesIO() print >> output, "%s command database [options]" % sys.argv[0] print >> output, "\nCommands:" for command in sorted(COMMANDS.keys()): print >> output, " %s %s" % (command.ljust(14), COMMANDS[command]["description"]) print >> output, "\nDatabases:" for db in sorted(DATABASES.keys()): print >> output, " %s %s" % (db.ljust(14), BASE_URL + db.split("-")[0]) print >> output, "\nOptions:" for option in sorted(OPTIONS.keys()): print >> output, " %s %s" % (option.ljust(14), OPTIONS[option]) print >> output, """\nWorkload Files: There are various predefined workloads under workloads/ directory. See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties for the list of workload properties.""" return output.getvalue() def debug(message): print >> sys.stderr, "[DEBUG] ", message def warn(message): print >> sys.stderr, "[WARN] ", message def error(message): print >> sys.stderr, "[ERROR] ", message def find_jars(dir, glob='*.jar'): jars = [] for (dirpath, dirnames, filenames) in os.walk(dir): for filename in fnmatch.filter(filenames, glob): jars.append(os.path.join(dirpath, filename)) return jars def get_ycsb_home(): dir = os.path.abspath(os.path.dirname(sys.argv[0])) while "LICENSE.txt" not in os.listdir(dir): dir = os.path.join(dir, os.path.pardir) return os.path.abspath(dir) def is_distribution(): # If there's a top level pom, we're a source checkout. otherwise a dist artifact return "pom.xml" not in os.listdir(get_ycsb_home()) # Run the maven dependency plugin to get the local jar paths. # presumes maven can run, so should only be run on source checkouts # will invoke the 'package' goal for the given binding in order to resolve intra-project deps # presumes maven properly handles system-specific path separators def get_classpath_from_maven(database): try: debug("Running 'mvn -pl com.yahoo.ycsb:"+database+"-binding -am package -DskipTests " "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") mvn_output = subprocess.check_output(["mvn", "-pl", "com.yahoo.ycsb:"+database+"-binding", "-am", "package", "-DskipTests", "dependency:build-classpath", "-DincludeScope=compile", "-Dmdep.outputFilterFile=true"]) # the above outputs a "classpath=/path/tojar:/path/to/other/jar" for each module # the last module will be the datastore binding line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] return line[0][len("classpath="):] except subprocess.CalledProcessError, err: error("Attempting to generate a classpath from Maven failed " "with return code '" + str(err.returncode) + "'. The output from " "Maven follows, try running " "'mvn -DskipTests package dependency:build=classpath' on your " "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + err.output) sys.exit(err.returncode) def main(): p = argparse.ArgumentParser( usage=usage(), formatter_class=argparse.RawDescriptionHelpFormatter) p.add_argument('-cp', dest='classpath', help="""Additional classpath entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be prepended to the YCSB classpath.""") p.add_argument("-jvm-args", default=[], type=shlex.split, help="""Additional arguments to pass to 'java', e.g. '-Xmx4g'""") p.add_argument("command", choices=sorted(COMMANDS), help="""Command to run.""") p.add_argument("database", choices=sorted(DATABASES), help="""Database to test.""") args, remaining = p.parse_known_args() ycsb_home = get_ycsb_home() # Use JAVA_HOME to find java binary if set, otherwise just use PATH. java = "java" java_home = os.getenv("JAVA_HOME") if java_home: java = os.path.join(java_home, "bin", "java") db_classname = DATABASES[args.database] command = COMMANDS[args.command]["command"] main_classname = COMMANDS[args.command]["main"] # Classpath set up binding = args.database.split("-")[0] if is_distribution(): db_dir = os.path.join(ycsb_home, binding + "-binding") # include top-level conf for when we're a binding-specific artifact. # If we add top-level conf to the general artifact, starting here # will allow binding-specific conf to override (because it's prepended) cp = [os.path.join(ycsb_home, "conf")] cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) cp.extend(find_jars(os.path.join(db_dir, "lib"))) else: warn("Running against a source checkout. In order to get our runtime " "dependencies we'll have to invoke Maven. Depending on the state " "of your system, this may take ~30-45 seconds") db_dir = os.path.join(ycsb_home, binding) # goes first so we can rely on side-effect of package maven_says = get_classpath_from_maven(binding) # TODO when we have a version property, skip the glob cp = find_jars(os.path.join(db_dir, "target"), binding + "-binding*.jar") # alredy in jar:jar:jar form cp.append(maven_says) cp.insert(0, os.path.join(db_dir, "conf")) classpath = os.pathsep.join(cp) if args.classpath: classpath = os.pathsep.join([args.classpath, classpath]) ycsb_command = ([java] + args.jvm_args + ["-cp", classpath, main_classname, "-db", db_classname] + remaining) if command: ycsb_command.append(command) print >> sys.stderr, " ".join(ycsb_command) return subprocess.call(ycsb_command) if __name__ == '__main__': sys.exit(main()) diff --git a/distribution/pom.xml b/distribution/pom.xml index 2dd32d0c..82592028 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -1,151 +1,161 @@ 4.0.0 com.yahoo.ycsb root 0.4.0-SNAPSHOT ycsb YCSB Release Distribution Builder pom This module creates the release package of the YCSB with all DB library bindings. It is only used by the build process and does not contain any real code of itself. com.yahoo.ycsb core ${project.version} com.yahoo.ycsb accumulo-binding ${project.version} com.yahoo.ycsb aerospike-binding ${project.version} com.yahoo.ycsb cassandra-binding ${project.version} com.yahoo.ycsb couchbase-binding ${project.version} com.yahoo.ycsb dynamodb-binding ${project.version} com.yahoo.ycsb elasticsearch-binding ${project.version} com.yahoo.ycsb gemfire-binding ${project.version} com.yahoo.ycsb - hbase-binding + hbase094-binding + ${project.version} + + + com.yahoo.ycsb + hbase098-binding + ${project.version} + + + com.yahoo.ycsb + hbase10-binding ${project.version} com.yahoo.ycsb hypertable-binding ${project.version} com.yahoo.ycsb infinispan-binding ${project.version} com.yahoo.ycsb jdbc-binding ${project.version} com.yahoo.ycsb mongodb-binding ${project.version} com.yahoo.ycsb orientdb-binding ${project.version} com.yahoo.ycsb redis-binding ${project.version} com.yahoo.ycsb tarantool-binding ${project.version} org.apache.maven.plugins maven-assembly-plugin ${maven.assembly.version} src/main/assembly/distribution.xml false package single diff --git a/hbase094/README.md b/hbase094/README.md new file mode 100644 index 00000000..3df8264c --- /dev/null +++ b/hbase094/README.md @@ -0,0 +1,23 @@ + + +# HBase (0.94.x) Driver for YCSB +This driver is a binding for the YCSB facilities to operate against a HBase 0.94.x Server cluster. It may also work against 0.92.x +To run against an HBase 0.98.x cluster, use the `hbase098` binding. +To run against an HBase >= 1.0 cluster, use the `hbase10` binding. + +See `hbase098/README.md` for configuration details. diff --git a/hbase/pom.xml b/hbase094/pom.xml similarity index 58% copy from hbase/pom.xml copy to hbase094/pom.xml index a5241798..3a3ea06e 100644 --- a/hbase/pom.xml +++ b/hbase094/pom.xml @@ -1,44 +1,68 @@ - 4.0.0 com.yahoo.ycsb binding-parent 0.4.0-SNAPSHOT ../binding-parent/ - hbase-binding - HBase DB Binding + hbase094-binding + HBase 0.94.x DB Binding + org.apache.hbase - hbase-client - ${hbase.version} + hbase + ${hbase094.version} + + + org.apache.hadoop + hadoop-core + 1.0.4 + + + com.yahoo.ycsb + hbase098-binding + ${project.version} + + + + * + * + + + + + org.slf4j + slf4j-simple + 1.7.12 com.yahoo.ycsb core ${project.version} provided diff --git a/hbase/README.md b/hbase098/README.md similarity index 91% rename from hbase/README.md rename to hbase098/README.md index 7316c620..fc75c626 100644 --- a/hbase/README.md +++ b/hbase098/README.md @@ -1,73 +1,73 @@ -# HBase Driver for YCSB -This driver is a binding for the YCSB facilities to operate against a HBase Server cluster. +# HBase (0.98.x) Driver for YCSB +This driver is a binding for the YCSB facilities to operate against a HBase 0.98.x Server cluster. +To run against an HBase 0.94.x cluster, use the `hbase094` binding. +To run against an HBase >= 1.0 cluster, use the `hbase10` binding. ## Quickstart ### 1. Start a HBase Server You need to start a single node or a cluster to point the client at. Please see [Apache HBase Reference Guide](http://hbase.apache.org/book.html) for more details and instructions. ### 2. Set up YCSB You need to clone the repository and compile everything. ``` git clone git://github.com/brianfrankcooper/YCSB.git cd YCSB mvn clean package ``` ### 3. Create a HBase table for testing For best results, use the pre-splitting strategy recommended in [HBASE-4163](https://issues.apache.org/jira/browse/HBASE-4163): ``` hbase(main):001:0> n_splits = 200 # HBase recommends (10 * number of regionservers) hbase(main):002:0> create 'usertable', 'family', {SPLITS => (1..n_splits).map {|i| "user#{1000+i*(9999-1000)/n_splits}"}} ``` *Failing to do so will cause all writes to initially target a single region server*. ### 4. Run the Workload Before you can actually run the workload, you need to "load" the data first. You should specify a HBase config directory(or any other directory containing your hbase-site.xml) and a table name and a column family(-cp is used to set java classpath and -p is used to set various properties). ``` bin/ycsb load hbase -P workloads/workloada -cp /HBASE-HOME-DIR/conf -p table=usertable -p columnfamily=family ``` Then, you can run the workload: ``` bin/ycsb run hbase -P workloads/workloada -cp /HBASE-HOME-DIR/conf -p table=usertable -p columnfamily=family ``` Please see the general instructions in the `doc` folder if you are not sure how it all works. You can apply additional properties (as seen in the next section) like this: ``` bin/ycsb run hbase -P workloads/workloada -cp /HBASE-HOME-DIR/conf -p table=usertable -p columnfamily=family -p clientbuffering=true ``` ## Configuration Options Following options can be configurable using `-p`. * `columnfamily`: The HBase column family to target. -* `clientbuffering` : If true, buffer mutations on the client. The default is false. -* `writebuffersize` : Buffer size to be used when `clientbuffering` is activated. The default is 12MB. * `debug` : If true, debugging logs are activated. The default is false. diff --git a/hbase/pom.xml b/hbase098/pom.xml similarity index 91% copy from hbase/pom.xml copy to hbase098/pom.xml index a5241798..627f31e8 100644 --- a/hbase/pom.xml +++ b/hbase098/pom.xml @@ -1,44 +1,44 @@ - 4.0.0 com.yahoo.ycsb binding-parent 0.4.0-SNAPSHOT ../binding-parent/ - hbase-binding - HBase DB Binding + hbase098-binding + HBase 0.98.x DB Binding org.apache.hbase hbase-client - ${hbase.version} + ${hbase098.version} com.yahoo.ycsb core ${project.version} provided diff --git a/hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient.java b/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java similarity index 100% rename from hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient.java rename to hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java diff --git a/hbase10/README.md b/hbase10/README.md new file mode 100644 index 00000000..1da5bc43 --- /dev/null +++ b/hbase10/README.md @@ -0,0 +1,23 @@ + + +# HBase (1.0.x) Driver for YCSB +This driver is a binding for the YCSB facilities to operate against a HBase 1.0.x Server cluster. +To run against an HBase 0.94.x cluster, use the `hbase094` binding. +To run against an HBase 0.98.x cluster, use the `hbase098` binding. + +See `hbase098/README.md` for configuration details. diff --git a/hbase/pom.xml b/hbase10/pom.xml similarity index 91% rename from hbase/pom.xml rename to hbase10/pom.xml index a5241798..77693b16 100644 --- a/hbase/pom.xml +++ b/hbase10/pom.xml @@ -1,44 +1,44 @@ - 4.0.0 com.yahoo.ycsb binding-parent 0.4.0-SNAPSHOT ../binding-parent/ - hbase-binding - HBase DB Binding + hbase10-binding + HBase 1.0 DB Binding org.apache.hbase hbase-client - ${hbase.version} + ${hbase10.version} com.yahoo.ycsb core ${project.version} provided diff --git a/hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java similarity index 97% rename from hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java rename to hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java index b4e94987..4caaf935 100644 --- a/hbase/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java +++ b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java @@ -1,500 +1,504 @@ /** * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You * may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. See accompanying * LICENSE file. */ package com.yahoo.ycsb.db; import com.google.common.base.Preconditions; import com.yahoo.ycsb.ByteArrayByteIterator; import com.yahoo.ycsb.ByteIterator; import com.yahoo.ycsb.DBException; import com.yahoo.ycsb.measurements.Measurements; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.BufferedMutator; import org.apache.hadoop.hbase.client.BufferedMutatorParams; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Properties; import java.util.Random; import java.util.Set; import java.util.Vector; /** * HBase 1.0 client for YCSB framework. * * A modified version of HBaseClient (which targets HBase v0.9) utilizing the * HBase 1.0.0 API. * * This client also adds toggleable client-side buffering and configurable write durability. */ public class HBaseClient10 extends com.yahoo.ycsb.DB { private static final Configuration config = HBaseConfiguration.create(); public boolean _debug=false; public String _tableName=""; public Connection _connection=null; // Depending on the value of _clientBuffering, either _bufferedMutator // (_clientBuffering) or _hTable (!_clientBuffering) will be used. public Table _table=null; public BufferedMutator _bufferedMutator=null; public String _columnFamily=""; public byte _columnFamilyBytes[]; /** * Durability to use for puts and deletes. */ public Durability _durability = Durability.USE_DEFAULT; /** * If true, buffer mutations on the client. * This is the default behavior for HBaseClient. For measuring * insert/update/delete latencies, client side buffering should be disabled. */ public boolean _clientSideBuffering = false; public long _writeBufferSize = 1024 * 1024 * 12; public static final int Ok=0; public static final int ServerError=-1; public static final int HttpError=-2; public static final int NoMatchingRecord=-3; /** * Initialize any state for this DB. * Called once per DB instance; there is one DB instance per client thread. */ @Override public void init() throws DBException { if ("true".equals(getProperties().getProperty("clientbuffering", "false"))) { this._clientSideBuffering = true; } if (getProperties().containsKey("writebuffersize")) { _writeBufferSize = Long.parseLong(getProperties().getProperty("writebuffersize")); } if (getProperties().getProperty("durability") != null) { this._durability = Durability.valueOf(getProperties().getProperty("durability")); } try { _connection = ConnectionFactory.createConnection(config); } catch (java.io.IOException e) { throw new DBException(e); } if ( (getProperties().getProperty("debug")!=null) && (getProperties().getProperty("debug").compareTo("true")==0) ) { _debug=true; } _columnFamily = getProperties().getProperty("columnfamily"); if (_columnFamily == null) { System.err.println("Error, must specify a columnfamily for HBase table"); throw new DBException("No columnfamily specified"); } _columnFamilyBytes = Bytes.toBytes(_columnFamily); // Terminate right now if table does not exist, since the client // will not propagate this error upstream once the workload // starts. String table = com.yahoo.ycsb.workloads.CoreWorkload.table; try { final TableName tableName = TableName.valueOf(table); HTableDescriptor dsc = _connection.getTable(tableName).getTableDescriptor(); } catch (IOException e) { throw new DBException(e); } } /** * Cleanup any state for this DB. * Called once per DB instance; there is one DB instance per client thread. */ @Override public void cleanup() throws DBException { // Get the measurements instance as this is the only client that should // count clean up time like an update if client-side buffering is // enabled. Measurements _measurements = Measurements.getMeasurements(); try { long st=System.nanoTime(); if (_bufferedMutator != null) { _bufferedMutator.close(); } if (_table != null) { _table.close(); } long en=System.nanoTime(); final String type = _clientSideBuffering ? "UPDATE" : "CLEANUP"; _measurements.measure(type, (int)((en-st)/1000)); _connection.close(); } catch (IOException e) { throw new DBException(e); } } public void getHTable(String table) throws IOException { final TableName tableName = TableName.valueOf(table); this._table = this._connection.getTable(tableName); //suggestions from http://ryantwopointoh.blogspot.com/2009/01/performance-of-hbase-importing.html if (_clientSideBuffering) { final BufferedMutatorParams p = new BufferedMutatorParams(tableName); p.writeBufferSize(_writeBufferSize); this._bufferedMutator = this._connection.getBufferedMutator(p); } } /** * Read a record from the database. Each field/value pair from the result will be stored in a HashMap. * * @param table The name of the table * @param key The record key of the record to read. * @param fields The list of fields to read, or null for all of them * @param result A HashMap of field/value pairs for the result * @return Zero on success, a non-zero error code on error */ public int read(String table, String key, Set fields, HashMap result) { //if this is a "new" table, init HTable object. Else, use existing one if (!_tableName.equals(table)) { _table = null; try { getHTable(table); _tableName = table; } catch (IOException e) { System.err.println("Error accessing HBase table: " + e); return ServerError; } } Result r = null; try { if (_debug) { System.out.println("Doing read from HBase columnfamily "+_columnFamily); System.out.println("Doing read for key: "+key); } Get g = new Get(Bytes.toBytes(key)); if (fields == null) { g.addFamily(_columnFamilyBytes); } else { for (String field : fields) { g.addColumn(_columnFamilyBytes, Bytes.toBytes(field)); } } r = _table.get(g); } catch (IOException e) { if (_debug) { System.err.println("Error doing get: "+e); } return ServerError; } catch (ConcurrentModificationException e) { //do nothing for now...need to understand HBase concurrency model better return ServerError; } if (r.isEmpty()) { return NoMatchingRecord; } - for (Cell c : r.listCells()) { + + while (r.advance()) { + final Cell c = r.current(); result.put(Bytes.toString(CellUtil.cloneQualifier(c)), new ByteArrayByteIterator(CellUtil.cloneValue(c))); if (_debug) { System.out.println("Result for field: "+Bytes.toString(CellUtil.cloneQualifier(c))+ " is: "+Bytes.toString(CellUtil.cloneValue(c))); } } return Ok; } /** * Perform a range scan for a set of records in the database. Each field/value pair from the result will be stored in a HashMap. * * @param table The name of the table * @param startkey The record key of the first record to read. * @param recordcount The number of records to read * @param fields The list of fields to read, or null for all of them * @param result A Vector of HashMaps, where each HashMap is a set field/value pairs for one record * @return Zero on success, a non-zero error code on error */ @Override public int scan(String table, String startkey, int recordcount, Set fields, Vector> result) { //if this is a "new" table, init HTable object. Else, use existing one if (!_tableName.equals(table)) { _table = null; try { getHTable(table); _tableName = table; } catch (IOException e) { System.err.println("Error accessing HBase table: "+e); return ServerError; } } Scan s = new Scan(Bytes.toBytes(startkey)); //HBase has no record limit. Here, assume recordcount is small enough to bring back in one call. //We get back recordcount records s.setCaching(recordcount); //add specified fields or else all fields if (fields == null) { s.addFamily(_columnFamilyBytes); } else { for (String field : fields) { s.addColumn(_columnFamilyBytes,Bytes.toBytes(field)); } } //get results ResultScanner scanner = null; try { scanner = _table.getScanner(s); int numResults = 0; for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { //get row key String key = Bytes.toString(rr.getRow()); + if (_debug) { System.out.println("Got scan result for key: "+key); } HashMap rowResult = new HashMap(); - for (KeyValue kv : rr.raw()) { + while (rr.advance()) { + final Cell cell = rr.current(); rowResult.put( - Bytes.toString(kv.getQualifier()), - new ByteArrayByteIterator(kv.getValue())); + Bytes.toString(CellUtil.cloneQualifier(cell)), + new ByteArrayByteIterator(CellUtil.cloneValue(cell))); } + //add rowResult to result vector result.add(rowResult); numResults++; if (numResults >= recordcount) //if hit recordcount, bail out { break; } } //done with row } catch (IOException e) { if (_debug) { System.out.println("Error in getting/parsing scan result: "+e); } return ServerError; } finally { if (scanner != null) { scanner.close(); } } return Ok; } /** * Update a record in the database. Any field/value pairs in the specified values HashMap will be written into the record with the specified * record key, overwriting any existing values with the same field name. * * @param table The name of the table * @param key The record key of the record to write * @param values A HashMap of field/value pairs to update in the record * @return Zero on success, a non-zero error code on error */ @Override public int update(String table, String key, HashMap values) { //if this is a "new" table, init HTable object. Else, use existing one if (!_tableName.equals(table)) { _table = null; try { getHTable(table); _tableName = table; } catch (IOException e) { System.err.println("Error accessing HBase table: "+e); return ServerError; } } if (_debug) { System.out.println("Setting up put for key: "+key); } Put p = new Put(Bytes.toBytes(key)); p.setDurability(_durability); for (Map.Entry entry : values.entrySet()) { byte[] value = entry.getValue().toArray(); if (_debug) { System.out.println("Adding field/value " + entry.getKey() + "/"+ Bytes.toStringBinary(value) + " to put request"); } - p.add(_columnFamilyBytes,Bytes.toBytes(entry.getKey()), value); + p.addColumn(_columnFamilyBytes,Bytes.toBytes(entry.getKey()), value); } try { if (_clientSideBuffering) { Preconditions.checkNotNull(_bufferedMutator); _bufferedMutator.mutate(p); } else{ _table.put(p); } } catch (IOException e) { if (_debug) { System.err.println("Error doing put: "+e); } return ServerError; } catch (ConcurrentModificationException e) { //do nothing for now...hope this is rare return ServerError; } return Ok; } /** * Insert a record in the database. Any field/value pairs in the specified values HashMap will be written into the record with the specified * record key. * * @param table The name of the table * @param key The record key of the record to insert. * @param values A HashMap of field/value pairs to insert in the record * @return Zero on success, a non-zero error code on error */ @Override public int insert(String table, String key, HashMap values) { return update(table,key,values); } /** * Delete a record from the database. * * @param table The name of the table * @param key The record key of the record to delete. * @return Zero on success, a non-zero error code on error */ @Override public int delete(String table, String key) { //if this is a "new" table, init HTable object. Else, use existing one if (!_tableName.equals(table)) { _table = null; try { getHTable(table); _tableName = table; } catch (IOException e) { System.err.println("Error accessing HBase table: "+e); return ServerError; } } if (_debug) { System.out.println("Doing delete for key: "+key); } final Delete d = new Delete(Bytes.toBytes(key)); d.setDurability(_durability); try { if (_clientSideBuffering) { Preconditions.checkNotNull(_bufferedMutator); _bufferedMutator.mutate(d); } else { _table.delete(d); } } catch (IOException e) { if (_debug) { System.err.println("Error doing delete: "+e); } return ServerError; } return Ok; } } /* For customized vim control * set autoindent * set si * set shiftwidth=4 */ diff --git a/pom.xml b/pom.xml index ed2b9b3f..383ce6d1 100644 --- a/pom.xml +++ b/pom.xml @@ -1,150 +1,154 @@ 4.0.0 com.yahoo.ycsb root 0.4.0-SNAPSHOT pom YCSB Root This is the top level project that builds, packages the core and all the DB bindings for YCSB infrastructure. scm:git:git://github.com/brianfrankcooper/YCSB.git master https://github.com/brianfrankcooper/YCSB checkstyle checkstyle 5.0 org.jdom jdom 1.1 com.google.collections google-collections 1.0 org.slf4j slf4j-api 1.6.4 2.5.5 2.10 - 1.0.0 + 0.94.27 + 0.98.14-hadoop2 + 1.0.2 1.6.0 1.2.9 1.0.3 8.1.0 7.2.2.Final 2.1.1 3.0.3 2.0.1 1.0.1 2.0.0 0.81 UTF-8 0.8.0 0.9.5.6 1.1.8 1.6.1 3.1.2 core binding-parent accumulo aerospike cassandra couchbase distribution dynamodb elasticsearch gemfire - hbase + hbase094 + hbase098 + hbase10 hypertable infinispan jdbc mongodb orientdb redis tarantool org.apache.maven.plugins maven-compiler-plugin 3.3 1.6 1.6 org.apache.maven.plugins maven-checkstyle-plugin 2.15 true checkstyle.xml validate validate checkstyle