订阅云计算RSS CSDN首页> 云计算

性能测试:SequoiaDB vs. MongoDB vs. Cassandra vs. HBase

发表于2014-09-22 14:58| 次阅读| 来源CSDN| 0 条评论| 作者云知秋

摘要:NoSQL通过弱化一部分关系型数据库特性(如一致性和关系模型)来提升其可扩展性及高可用性,弥补了关系型数据库在很多互联网类应用中的不足。因此,不同的NoSQL有着不同的杀手级应用,这里我们通过基准测试摸索。

附录B:YCSB调整

一、 驱动调整

1. MongoDB

  • 默认没有采用连接池的形式实现,调整为连接池形式
  • 默认不支持批量插入,增加支持批量插入
  • 默认不支持选择查询接口,增加支持选择查询接口
  • 默认不支持选择readpreference,增加支持选择readpreference
  • 为适应2.12.1版本的driver作了些调整

详细调整如下:

void init()
   {
    ...
      Properties props = getProperties();
      String url = props.getProperty("mongodb.url",
                                           "mongodb://localhost:27017");
      database = props.getProperty("mongodb.database", "ycsb");
      String writeConcernType = props.getProperty("mongodb.writeConcern",
                                                        "safe").toLowerCase();
      //final String maxConnections = props.getProperty(
      //        "mongodb.maxconnections", "100");
      insertmode = props.getProperty("mongodb.insertmode", "single");
      readpreference = props.getProperty("mongodb.readpreference",
            		                           "primary");
      bulknumber=Integer.parseInt(props.getProperty("mongodb.bulknumber", 
            		                                      "5000"));
      final String find = props.getProperty("mongodb.usefindinterface", 
            		                              "false");
    if (replWriteNum != 1){
            	writeConcern = new WriteConcern(replWriteNum);
            }
            
            try {
                // strip out prefix since Java driver doesn't currently support
                // standard connection format URL yet
                // <a href="http://www.mongodb.org/display/DOCS/Connections">http://www.mongodb.org/display/DOCS/Connections</a>
               /* if (url.startsWith("mongodb://")) {
                    url = url.substring(10);
                }*/
                // need to append db to url.
                url += "/" + database;
                System.out.println("new database url = " + url);
                MongoClientURI uri = new MongoClientURI(url);
                mongo = new MongoClient(uri);
                mongo.setReadPreference(ReadPreference.valueOf(readpreference));
                mongo.setWriteConcern(writeConcern);
                System.out.println("mongo connection created with " + url);
            }
            catch (Exception e1) {
                System.err
                        .println("Could not initialize MongoDB connection pool for Loader: "
                                + e1.toString());
                e1.printStackTrace();
                return;
            }
}
public int insert(String table, String key,
            HashMap<String, ByteIterator> values) {
        com.mongodb.DB db = null;
        try {
            db = mongo.getDB(database);
            if (!outputclientflag){
                CommandResult commandResult = db.command("buildInfo");
        		if (commandResult.containsField("tokumxVersion")){
        			System.out.println("tokumx");
        		}
        		else{
        			System.out.println("mongodb");
        		}
        		outputclientflag = true;
                }
            db.requestStart();

            DBCollection collection = db.getCollection(table);
            DBObject r = new BasicDBObject().append("_id", key);
            for (String k : values.keySet()) {
                r.put(k, values.get(k).toArray());
            }
            //WriteResult res = null;
            if (insertmode.equals("bulk")){
            	objs.add(r);
         
            	//bulkwrite.insert(r);
            	if (objs.size() == bulknumber){
            		//res = 
            		collection.insert(objs);
            		objs.clear();
            		//return 0;
            	}
            }else{
            	//res = 
            	collection.insert(r);
            }
            
            //return res.getN() == replWriteNum ? 0:1;
            return 0;
      
        }
        catch (Exception e) {
            e.printStackTrace();
            return 1;
        }
        finally {
            if (db != null) {
                db.requestDone();
            }
        }
    }

public int read(String table, String key, Set<String> fields,
            HashMap<String, ByteIterator> result) {
        com.mongodb.DB db = null;
        DBCursor cursor = null;
        try {
            db = mongo.getDB(database);
            db.requestStart();
        	//getCollection(table);
            DBCollection collection = db.getCollection(table);
            DBObject q = new BasicDBObject().append("_id", key);
            DBObject fieldsToReturn = new BasicDBObject();

            DBObject queryResult = null;
            //DBCursor cursor = null;
            if (fields != null) {
                Iterator<String> iter = fields.iterator();
                while (iter.hasNext()) {
                    fieldsToReturn.put(iter.next(), INCLUDE);
                }
                if (findone){
                	queryResult = collection.findOne(q, fieldsToReturn);
                }
                else{
                	cursor = collection.find(q,fieldsToReturn);
                } 
            }
            else {
            	if (findone){
            		queryResult = collection.findOne(q);
            	}
            	else{
            		cursor = collection.find(q).setReadPreference(ReadPreference.secondaryPreferred());
            	}
            }
            
            if (cursor != null && cursor.hasNext()){
            	queryResult = cursor.next();
            }

            if (queryResult != null) {
                result.putAll(queryResult.toMap());
            }
            return queryResult != null ? 0 : 1;
        }
        catch (Exception e) {
            System.err.println(e.toString());
            return 1;
        }
        finally {
            if (db != null) {
                db.requestDone();
            }
            if (cursor != null){
            	cursor.close();
            }    
        }
    }

2. HBase

  • 支持通过参数控制WriteBufferSize
  • 适应驱动做微调

详细如下:

public void init() throws DBException
{
        if ( (getProperties().getProperty("debug")!=null) &&
                (getProperties().getProperty("debug").compareTo("true")==0) )
        {
            _debug=true;
        }

        _columnFamily = getProperties().getProperty("columnfamily");
        if (_columnFamily == null)
        {
            System.err.println("Error, must specify a columnfamily for HBase table");
            throw new DBException("No columnfamily specified");
        }
        _columnFamilyBytes = Bytes.toBytes(_columnFamily);
        
        clientbufsize = Integer.parseInt(getProperties().getProperty("clientbuffersize"));
 }


public void getHTable(String table) throws IOException
    {
        synchronized (tableLock) {
            _hTable = new HTable(config, table);
            //2 suggestions from <a href="http://ryantwopointoh.blogspot.com/2009/01/performance-of-hbase-importing.html">http://ryantwopointoh.blogspot.com/2009/01/performance-of-hbase-importing.html</a>
            _hTable.setAutoFlush(false);
            _hTable.setWriteBufferSize(clientbufsize*1024);
            //return hTable;
        }

    }

二、 统计数据收集

从原有的Measurements派生出ExcelMeasurementsExporter用于将生成的统计数据导出到excel文件中,ExcelMeasurementsExporter调用jxl.jar开源库实现。



统计数据由Overalloperresult、Overallresult,Periodresult这几个类存储,为了保存统计数据原来的Measurements,StatusThread都相应作了些调整。

三、 预热

增加如下xml配置文件

<?xml version="1.0" encoding="utf-8"?>
<Test>
<load>
    <transaction name="bulkload_concurrent" /> 
    <!--transaction name="bulkload" /-->
    <transaction name="singleload_concurrent" />
    <!--transaction name="singleload" /-->        
</load>
<run>  
 <transaction name="readonly_concurrent" /> 
 <transaction name="readheavy_concurrent" /> 
 <transaction name="updateheavy_concurrent" /> 
<transaction name="insertheavy_concurrent" /> 
 <transaction name="readlastest_concurrent" /> 
</run>
</Test>
我们增加了如下python脚本用于连续运行:

#!/usr/bin/python
#!/bin/env python

import os
import sys
import subprocess
from xml.etree import ElementTree
import ycsb
from ycsb import (DATABASES,COMMANDS,BASE_URL,
                  get_ycsb_home, find_jars) 

def getloadtrans(workloadpath,root):
   load = []
   lst_node = root.find("load")
   for node in lst_node:
      load.append(workloadpath + node.attrib['name'])
   return load

def getruntrans(workloadpath,root):
   run = []
   lst_node = root.find("run")
   for node in lst_node:
      run.append(workloadpath + node.attrib['name'])
   return run

def ushelp():
   print "Usage: %s database  " % sys.argv[0]
   print "\nDatabases:"
   for db in sorted(DATABASES.keys()):
      print "    %s %s" % (db.ljust(13), BASE_URL + db.split("-")[0])
   sys.exit(1)

def runscene(trans, cmd, db_classname, pervscene):
   curscene = ""
   for item in trans:
      curscene = os.path.basename(item)
      command = COMMANDS[cmd]["command"]
      options=["-s", "-P", item]
      ycsb_command = ["java", "-cp", ":".join(find_jars(ycsb_home, database)), \
                       COMMANDS[cmd]["main"], "-db", db_classname] + options 
      if command:
         ycsb_command.append(command)
         #print " ".join(ycsb_command)
      subprocess.call(ycsb_command)
      pervscene = curscene
   return pervscene

if len(sys.argv) < 2:
    ushelp()
if sys.argv[1] not in DATABASES:
    print "ERROR: Database '%s' not found" % sys.argv[1]
    ushelp()

os.chdir('..')
conffile   = os.getcwd()+"/conf/workload.xml"
resultfile = os.getcwd()+"/result/report_output.xls"
resultdir  = os.getcwd()+"/result/"
workloadsdir = os.getcwd()+"/workloads/"

if False == os.path.exists(conffile):
   print conffile + "not exist";
   sys.exit(1)

root = ElementTree.parse(conffile)
loadtrans = getloadtrans(workloadsdir, root)
runtrans = getruntrans(workloadsdir, root)

os.chdir('bin')
ycsb_home = get_ycsb_home()
database = sys.argv[1]
db_classname = DATABASES[database]

runscene(loadtrans, "load", db_classname, pervscene)
runscene(runtrans, "run", db_classname, pervscene)

import time
curtime=time.strftime("%Y-%m-%d_%H_%M_%S",time.localtime(time.time()))
newreportfile=resultdir + "report_" + curtime +".xls"
os.rename(resultfile, newreportfile)

为尽量保证后续的查询、更新操作是基于前续的load操作,以保证缓存的高命中率。

四、 数据类型

本次测试的数据皆为字符串类型:

  • fieldcount=10
  • fieldlength=10
  • key字段由单词"user"后面加上64位的Fowler-Noll-Vo哈希值构成
  • key大小为23字节
  • 所有字段的值采用zipfian算法生成


免费订阅“CSDN云计算(左)CSDN大数据(右)”微信公众号,实时掌握第一手云中消息,了解最新的大数据进展!

CSDN发布虚拟化、Docker、OpenStack、CloudStack、数据中心等相关云计算资讯,     分享Hadoop、Spark、NoSQL/NewSQL、HBase、Impala、内存计算、流计算、机器学习和智能算法等相关大数据观点,提供云计算和大数据技术、平台、实践和产业信息等服务。   

  • CSDN官方微信
  • 扫描二维码,向CSDN吐槽
  • 微信号:CSDNnews
程序员移动端订阅下载

微博关注

相关热门文章