With Griffin shell, user can run dq jobs in command line. This is helpful for user to debug and run user dq jobs.
- Compile Griffin using maven.
- Decompress Griffin tool file
measure-x.x.x-package.tar.gz
in the target directory of measure module. - Install and configure spark.
- Run Griffin tool with user defined env file and dq file. eg:
measure-x.x.x/bin/griffin-tool.sh ENV_FILE DQ_FILE
{
"spark": {
"log.level": "WARN",
"config": {
"spark.master": "local[*]"
}
},
"sinks": [
{
"name": "MyConsoleSink",
"type": "CONSOLE",
"config": {
"max.log.lines": 10
}
},
{
"name": "MyHDFSSink",
"type": "HDFS",
"config": {
"path": "hdfs://localhost/griffin/batch/persist",
"max.persist.lines": 10000,
"max.lines.per.file": 10000
}
},
{
"name": "MyElasticSearchSink",
"type": "ELASTICSEARCH",
"config": {
"method": "post",
"api": "http://localhost:9200/griffin/accuracy",
"connection.timeout": "1m",
"retry": 10
}
}
],
"griffin.checkpoint": []
}
{
"name": "accu_batch",
"process.type": "batch",
"data.sources": [
{
"name": "source",
"baseline": true,
"connector": {
"type": "jdbc",
"config": {
"user": "xxx",
"password": "xxx",
"tablename": "stu",
"where": "id < 3",
"url":"jdbc:mysql://localhost:3306/test",
"database": "test",
"driver": "com.mysql.jdbc.Driver"
}
}
},
{
"name": "target",
"connector": {
"type": "jdbc",
"config": {
"user": "xxx",
"password": "xxx",
"tablename": "stu2",
"where": "id < 3",
"url":"jdbc:mysql://localhost:3306/test",
"database": "test",
"driver": "com.mysql.jdbc.Driver"
}
}
}
],
"evaluate.rule": {
"rules": [
{
"dsl.type": "griffin-dsl",
"dq.type": "accuracy",
"out.dataframe.name": "accu",
"rule": "source.id = target.id AND upper(source.name) = upper(target.name) ",
"details": {
"source": "source",
"target": "target",
"miss": "miss_count",
"total": "total_count",
"matched": "matched_count"
},
"out": [
{
"type": "record",
"name": "missRecords"
}
]
}
]
},
"sinks": [
"consoleSink"
]
}