druid和cdh整合

浏览: 3868

Host: SZB-L0032017

Druid0.9.2

Cdh5.7.0

[root@SZB-L0032017 druid]# tar -zxvfdruid-0.9.2-bin.tar.gz

[root@SZB-L0032017 druid]# cd druid-0.9.2

# mv conf-quickstart conf

1.配置文件更改

# cd /conf/druid/_common

# vi common.runtime.properties

druid.extensions.loadList=["druid-hdfs-storage","mysql-metadata-storage"]

 

druid.extensions.hadoopDependenciesDir=/opt/druid/druid-0.9.2/hadoop-dependencies

 

druid.zk.service.host=SZB-L0032013:2181,SZB-L0032014:2181,SZB-L0032015:2181

druid.zk.paths.base=/druid

 

# For MySQL:

druid.metadata.storage.type=mysql

druid.metadata.storage.connector.connectURI=jdbc:mysql://SZB-L0032013:3306/druid?characterEncoding=UTF-8

druid.metadata.storage.connector.user=druid

druid.metadata.storage.connector.password=123456

 

# For HDFS:

druid.storage.type=hdfs

druid.storage.storageDirectory=/druid/segments

 

# For HDFS:

druid.indexer.logs.type=hdfs

druid.indexer.logs.directory=/druid/indexing-logs

 

# ll

-rw-r--r-- 1  501 games 3911 May 22 10:16common.runtime.properties

-rw-r--r-- 1 root root  3855 May 16 16:00 core-site.xml

-rw-r--r-- 1 root root  1757 May 16 16:00 hdfs-site.xml

-rw-r--r-- 1  501 games 1185 Oct 26  2016 log4j2.xml

-rw-r--r-- 1 root root  4560 May 16 16:00 mapred-site.xml

-rw-r--r-- 1 root root  3769 May 16 16:00 yarn-site.xml

 

[root@SZB-L0032017 druid]# cdmiddleManager/

# vi runtime.properties

druid.indexer.runner.javaOpts=-server-Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager

备选参数-Dhadoop.mapreduce.job.classloader=true

druid.indexer.task.baseTaskDir=/var/druid/task

 

druid.indexer.task.hadoopWorkingPath=/druid/hadoop-tmp

druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.6.0-cdh5.7.0"]

 

[root@SZB-L0032017 druid-0.9.2]# cdhadoop-dependencies/

# cd hadoop-client/

# mkdir 2.6.0-cdh5.7.0

复制cdh jar到该目录

 

启动:

# bin/init

# bin/broker.sh start

# bin/coordinator.sh start

# bin/historical.sh start

# bin/middleManager.sh start

# nohup: redirecting stderr to stdout

# bin/overlord.sh start

[root@SZB-L0032017 druid-0.9.2]#

 

druid.segmentCache.infoDir=/druid/segment-cache/infoDir

druid.segmentCache.locations=[{"path":"/druid/segment-cache","maxSize"\:300000000000}]

druid.indexer.task.baseTaskDir=/druid/task

这里路径并不是hdfs,而是本地路径



1.本地数据创建datasource

附录1               ok

附录查询1   ok

 

2.hdfs数据创建datasource

 

 

 

 

 

 

 

 

 

 

 

 

 

附录1

# vi quickstart/my-index.json

{   "type" : "index",   "spec" : {     "ioConfig" : {       "type" : "index",       "firehose" : {         "type" : "local",         "baseDir" : "/opt/druid/druid-0.9.2/quickstart",         "filter" : "wikiticker-2015-09-12-sampled.json"       }     },     "dataSchema" : {       "dataSource" : "wikiticker_local2hdfs",       "granularitySpec" : {         "type" : "uniform",         "segmentGranularity" : "day",         "queryGranularity" : "none",         "intervals" : ["2015-09-12/2015-09-13"]       },       "parser" : {         "type" : "string",         "parseSpec" : {           "format" : "json",           "dimensionsSpec" : {             "dimensions" : [               "channel",               "cityName",               "comment",               "countryIsoCode",               "countryName",               "isAnonymous",               "isMinor",               "isNew",               "isRobot",               "isUnpatrolled",               "metroCode",               "namespace",               "page",               "regionIsoCode",               "regionName",               "user"             ]           },           "timestampSpec" : {             "format" : "auto",             "column" : "time"           }         }       },       "metricsSpec" : [         {           "name" : "count",           "type" : "count"         },         {           "name" : "added",           "type" : "longSum",           "fieldName" : "added"         },         {           "name" : "deleted",           "type" : "longSum",           "fieldName" : "deleted"         },         {           "name" : "delta",           "type" : "longSum",           "fieldName" : "delta"         },         {           "name" : "user_unique",           "type" : "hyperUnique",           "fieldName" : "user"         }       ]     },     "tuningConfig" : {       "type" : "index",       "partitionsSpec" : {         "type" : "hashed",         "targetPartitionSize" : 5000000       },       "jobProperties" : {}     }   } }

 

附录2 查询

[root@SZB-L0032017 druid-0.9.2]# vi quickstart/my-top.json {   "queryType" : "topN",   "dataSource" : "wikiticker_local2hdfs",   "intervals" : ["2015-09-12/2015-09-13"],   "granularity" : "all",   "dimension" : "page",   "metric" : "edits",   "threshold" : 25,   "aggregations" : [     {       "type" : "longSum",       "name" : "edits",       "fieldName" : "count"     }   ] } # curl -L -H'Content-Type: application/json' -XPOST --data-binary @quickstart/my-top.json http://localhost:8082/druid/v2/?pretty

 

附录3 hdfs数据源创建datasource

# cat quickstart/wikiticker-index.json {   "type" : "index_hadoop",   "spec" : {     "ioConfig" : {       "type" : "hadoop",       "inputSpec" : {         "type" : "static",         "paths" : "/druid/quickstart/wikiticker-2015-09-12-sampled.json"       }     },     "dataSchema" : {       "dataSource" : "wikiticker",       "granularitySpec" : {         "type" : "uniform",         "segmentGranularity" : "day",         "queryGranularity" : "none",         "intervals" : ["2015-09-12/2015-09-13"]       },       "parser" : {         "type" : "hadoopyString",         "parseSpec" : {           "format" : "json",           "dimensionsSpec" : {             "dimensions" : [               "channel",               "cityName",               "comment",               "countryIsoCode",               "countryName",               "isAnonymous",               "isMinor",               "isNew",               "isRobot",               "isUnpatrolled",               "metroCode",               "namespace",               "page",               "regionIsoCode",               "regionName",               "user"             ]           },           "timestampSpec" : {             "format" : "auto",             "column" : "time"           }         }       },       "metricsSpec" : [         {           "name" : "count",           "type" : "count"         },         {           "name" : "added",           "type" : "longSum",           "fieldName" : "added"         },         {           "name" : "deleted",           "type" : "longSum",           "fieldName" : "deleted"         },         {           "name" : "delta",           "type" : "longSum",           "fieldName" : "delta"         },         {           "name" : "user_unique",           "type" : "hyperUnique",           "fieldName" : "user"         }       ]     },     "tuningConfig" : {       "type" : "hadoop",       "partitionsSpec" : {         "type" : "hashed",         "targetPartitionSize" : 5000000       },       "jobProperties" : {         "mapreduce.job.classloader": "true",         "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop."       }     }   } }

 

 

 

 

 

 

附录hdfs数据源创建datasource的时候异常解决

 

  异常解决: Error: java.lang.ClassNotFoundException: javax.validation.Validator # vi quickstart/wikiticker-index.json "tuningConfig" : {       "type" : "hadoop",       "partitionsSpec" : {         "type" : "hashed",         "targetPartitionSize" : 5000000       },       "jobProperties" : {               }     } 配置 "jobProperties": {   "mapreduce.job.classloader": "true",   "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop." }

 

org.apache.hadoop.hdfs.DFSClient - Exception in createBlockOutputStream
java.net.ConnectException: Connection refused

Caused by: java.lang.RuntimeException: Nobuckets?? seems there is no data to index.

 

时区问题,更改

-Duser.timezone=UTC

-Duser.timezone=UTC+0800

 

 

1down vote I was able to run the indexing job. No shaded jars necessary. Here are the steps. 1) Download Hadoop client that matches for your CDH. In the druid distribution directory run this command. It will download the Hadoop-client into hadoop-dependencies directory.  ./bin/pull-deps -r https://repository.cloudera.com/content/repositories/releases/ -h org.apache.hadoop:hadoop-client:2.6.0-mr1-cdh5.7.1 2) Set client version in the middleManger config. hadoopDependencyCoordinates=["org.apache.hadoop:hadoop-client:2.6.0-mr1-cdh5.7.1"] 3) I used indexing service to run Hadoop job. Create the JSON, and put jobProperties like this. "jobProperties": {   "mapreduce.job.classloader": "true",   "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop.,org.xerial.snappy." } Add classes to the list as necessary if you still see some errors and know it exists in CDH and Druid. 4) HTTP POST the JSON to Coordinator

 

推荐 0
本文由 平常心 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

0 个评论

要回复文章请先登录注册