Host: SZB-L0032017
Druid0.9.2
Cdh5.7.0
[root@SZB-L0032017 druid]# tar -zxvfdruid-0.9.2-bin.tar.gz
[root@SZB-L0032017 druid]# cd druid-0.9.2
# mv conf-quickstart conf
1.配置文件更改
# cd /conf/druid/_common
# vi common.runtime.properties
druid.extensions.loadList=["druid-hdfs-storage","mysql-metadata-storage"]
druid.extensions.hadoopDependenciesDir=/opt/druid/druid-0.9.2/hadoop-dependencies
druid.zk.service.host=SZB-L0032013:2181,SZB-L0032014:2181,SZB-L0032015:2181
druid.zk.paths.base=/druid
# For MySQL:
druid.metadata.storage.type=mysql
druid.metadata.storage.connector.connectURI=jdbc:mysql://SZB-L0032013:3306/druid?characterEncoding=UTF-8
druid.metadata.storage.connector.user=druid
druid.metadata.storage.connector.password=123456
# For HDFS:
druid.storage.type=hdfs
druid.storage.storageDirectory=/druid/segments
# For HDFS:
druid.indexer.logs.type=hdfs
druid.indexer.logs.directory=/druid/indexing-logs
# ll
-rw-r--r-- 1 501 games 3911 May 22 10:16common.runtime.properties
-rw-r--r-- 1 root root 3855 May 16 16:00 core-site.xml
-rw-r--r-- 1 root root 1757 May 16 16:00 hdfs-site.xml
-rw-r--r-- 1 501 games 1185 Oct 26 2016 log4j2.xml
-rw-r--r-- 1 root root 4560 May 16 16:00 mapred-site.xml
-rw-r--r-- 1 root root 3769 May 16 16:00 yarn-site.xml
[root@SZB-L0032017 druid]# cdmiddleManager/
# vi runtime.properties
druid.indexer.runner.javaOpts=-server-Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager
备选参数-Dhadoop.mapreduce.job.classloader=true
druid.indexer.task.baseTaskDir=/var/druid/task
druid.indexer.task.hadoopWorkingPath=/druid/hadoop-tmp
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.6.0-cdh5.7.0"]
[root@SZB-L0032017 druid-0.9.2]# cdhadoop-dependencies/
# cd hadoop-client/
# mkdir 2.6.0-cdh5.7.0
复制cdh jar到该目录
启动:
# bin/init
# bin/broker.sh start
# bin/coordinator.sh start
# bin/historical.sh start
# bin/middleManager.sh start
# nohup: redirecting stderr to stdout
# bin/overlord.sh start
[root@SZB-L0032017 druid-0.9.2]#
druid.segmentCache.infoDir=/druid/segment-cache/infoDir
druid.segmentCache.locations=[{"path":"/druid/segment-cache","maxSize"\:300000000000}]
druid.indexer.task.baseTaskDir=/druid/task
这里路径并不是hdfs,而是本地路径
1.本地数据创建datasource
附录1 ok
附录查询1 ok
2.hdfs数据创建datasource
附录1
# vi quickstart/my-index.json
{ "type" : "index", "spec" : { "ioConfig" : { "type" : "index", "firehose" : { "type" : "local", "baseDir" : "/opt/druid/druid-0.9.2/quickstart", "filter" : "wikiticker-2015-09-12-sampled.json" } }, "dataSchema" : { "dataSource" : "wikiticker_local2hdfs", "granularitySpec" : { "type" : "uniform", "segmentGranularity" : "day", "queryGranularity" : "none", "intervals" : ["2015-09-12/2015-09-13"] }, "parser" : { "type" : "string", "parseSpec" : { "format" : "json", "dimensionsSpec" : { "dimensions" : [ "channel", "cityName", "comment", "countryIsoCode", "countryName", "isAnonymous", "isMinor", "isNew", "isRobot", "isUnpatrolled", "metroCode", "namespace", "page", "regionIsoCode", "regionName", "user" ] }, "timestampSpec" : { "format" : "auto", "column" : "time" } } }, "metricsSpec" : [ { "name" : "count", "type" : "count" }, { "name" : "added", "type" : "longSum", "fieldName" : "added" }, { "name" : "deleted", "type" : "longSum", "fieldName" : "deleted" }, { "name" : "delta", "type" : "longSum", "fieldName" : "delta" }, { "name" : "user_unique", "type" : "hyperUnique", "fieldName" : "user" } ] }, "tuningConfig" : { "type" : "index", "partitionsSpec" : { "type" : "hashed", "targetPartitionSize" : 5000000 }, "jobProperties" : {} } } }
附录2 查询
[root@SZB-L0032017 druid-0.9.2]# vi quickstart/my-top.json { "queryType" : "topN", "dataSource" : "wikiticker_local2hdfs", "intervals" : ["2015-09-12/2015-09-13"], "granularity" : "all", "dimension" : "page", "metric" : "edits", "threshold" : 25, "aggregations" : [ { "type" : "longSum", "name" : "edits", "fieldName" : "count" } ] } # curl -L -H'Content-Type: application/json' -XPOST --data-binary @quickstart/my-top.json http://localhost:8082/druid/v2/?pretty
附录3 hdfs数据源创建datasource
# cat quickstart/wikiticker-index.json { "type" : "index_hadoop", "spec" : { "ioConfig" : { "type" : "hadoop", "inputSpec" : { "type" : "static", "paths" : "/druid/quickstart/wikiticker-2015-09-12-sampled.json" } }, "dataSchema" : { "dataSource" : "wikiticker", "granularitySpec" : { "type" : "uniform", "segmentGranularity" : "day", "queryGranularity" : "none", "intervals" : ["2015-09-12/2015-09-13"] }, "parser" : { "type" : "hadoopyString", "parseSpec" : { "format" : "json", "dimensionsSpec" : { "dimensions" : [ "channel", "cityName", "comment", "countryIsoCode", "countryName", "isAnonymous", "isMinor", "isNew", "isRobot", "isUnpatrolled", "metroCode", "namespace", "page", "regionIsoCode", "regionName", "user" ] }, "timestampSpec" : { "format" : "auto", "column" : "time" } } }, "metricsSpec" : [ { "name" : "count", "type" : "count" }, { "name" : "added", "type" : "longSum", "fieldName" : "added" }, { "name" : "deleted", "type" : "longSum", "fieldName" : "deleted" }, { "name" : "delta", "type" : "longSum", "fieldName" : "delta" }, { "name" : "user_unique", "type" : "hyperUnique", "fieldName" : "user" } ] }, "tuningConfig" : { "type" : "hadoop", "partitionsSpec" : { "type" : "hashed", "targetPartitionSize" : 5000000 }, "jobProperties" : { "mapreduce.job.classloader": "true", "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop." } } } }
附录hdfs数据源创建datasource的时候异常解决
异常解决: Error: java.lang.ClassNotFoundException: javax.validation.Validator # vi quickstart/wikiticker-index.json "tuningConfig" : { "type" : "hadoop", "partitionsSpec" : { "type" : "hashed", "targetPartitionSize" : 5000000 }, "jobProperties" : { } } 配置 "jobProperties": { "mapreduce.job.classloader": "true", "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop." }
org.apache.hadoop.hdfs.DFSClient - Exception in createBlockOutputStream
java.net.ConnectException: Connection refused
…
Caused by: java.lang.RuntimeException: Nobuckets?? seems there is no data to index.
时区问题,更改
-Duser.timezone=UTC
-Duser.timezone=UTC+0800
1down vote I was able to run the indexing job. No shaded jars necessary. Here are the steps. 1) Download Hadoop client that matches for your CDH. In the druid distribution directory run this command. It will download the Hadoop-client into hadoop-dependencies directory. ./bin/pull-deps -r https://repository.cloudera.com/content/repositories/releases/ -h org.apache.hadoop:hadoop-client:2.6.0-mr1-cdh5.7.1 2) Set client version in the middleManger config. hadoopDependencyCoordinates=["org.apache.hadoop:hadoop-client:2.6.0-mr1-cdh5.7.1"] 3) I used indexing service to run Hadoop job. Create the JSON, and put jobProperties like this. "jobProperties": { "mapreduce.job.classloader": "true", "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop.,org.xerial.snappy." } Add classes to the list as necessary if you still see some errors and know it exists in CDH and Druid. 4) HTTP POST the JSON to Coordinator