-
Notifications
You must be signed in to change notification settings - Fork 1
/
zipcode-hbase.sh
executable file
·132 lines (111 loc) · 4.49 KB
/
zipcode-hbase.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash
HIVE_TABLE=ZIPCODE_HBASE
HBASE_TABLE=zipcode_hive
IMPALA_HOST=`hostname`:21000
IMPALA_OPTS=-k
clear
unzip -u median_income_by_zipcode_census_2000.zip
# Remove Header
awk 'FNR>2' DEC_00_SF3_P077_with_ann.csv > DEC_00_SF3_P077_with_ann_noheader.csv
sed -i 's/\"//g' DEC_00_SF3_P077_with_ann_noheader.csv
sed -i 's/\ //g' DEC_00_SF3_P077_with_ann_noheader.csv
# drop any existing tables
echo
echo ------------------------------------------------------
echo Delete old data
echo ------------------------------------------------------
echo
hive -e "drop table $HIVE_TABLE"
echo "disable '$HBASE_TABLE'" > hb1
echo "drop '$HBASE_TABLE'" >> hb1
echo "exit" >> hb1
hbase shell hb1
clear
echo ------------------------------------------------------
echo Display the structure of the original file
echo ------------------------------------------------------
echo
echo cat DEC_00_SF3_P077_with_ann_noheader.csv | head -5
echo "(press Enter)"
read
cat DEC_00_SF3_P077_with_ann_noheader.csv | head -5
echo
echo ------------------------------------------------------
echo "Create the HBase table"
echo ------------------------------------------------------
echo
echo "create '$HBASE_TABLE', 'id', 'zip', 'desc', 'income'"
echo "(press Enter)"
read
echo "create '$HBASE_TABLE', 'id', 'zip', 'desc', 'income'" > hb1
echo "exit" >> hb1
hbase shell hb1
rm -rf hb1
echo
echo ------------------------------------------------------
echo "Create Hive's external table using HBaseStorageHandler"
echo ------------------------------------------------------
echo
echo "CREATE EXTERNAL TABLE $HIVE_TABLE (key STRING,zip STRING,desc1 STRING,desc2 STRING,income STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES (\"hbase.columns.mapping\" = \":key,zip:zip,desc:desc1,desc:desc2,income:income\") TBLPROPERTIES(\"hbase.table.name\" = \"$HBASE_TABLE\");"
echo "(press Enter)"
read
hive -e "CREATE EXTERNAL TABLE $HIVE_TABLE (key STRING,zip STRING,desc1 STRING,desc2 STRING,income STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES (\"hbase.columns.mapping\" = \":key,zip:zip,desc:desc1,desc:desc2,income:income\") TBLPROPERTIES(\"hbase.table.name\" = \"$HBASE_TABLE\");"
echo
echo ------------------------------------------------------
echo "Load data into HBase via PIG"
echo ------------------------------------------------------
echo
echo "copyFromLocal DEC_00_SF3_P077_with_ann_noheader.csv ziptest.csv" > pig1
echo "A = LOAD 'ziptest.csv' USING PigStorage(',') as (id:chararray, zip:chararray, desc1:chararray, desc2:chararray, income:chararray); STORE A INTO 'hbase://$HBASE_TABLE' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('zip:zip,desc:desc1,desc:desc2,income:income');" >> pig1
pig pig1 && rm -f pig1
echo
echo ------------------------------------------------------
echo "Scan the HBase table"
echo ------------------------------------------------------
echo
echo "scan '$HBASE_TABLE', LIMIT => 2"
echo "(press Enter)"
read
echo
echo "scan '$HBASE_TABLE', LIMIT => 2" > hb2
echo "exit" >> hb2
hbase shell hb2
echo
echo ------------------------------------------------------
echo "Get a HBase row"
echo ------------------------------------------------------
echo
echo "get 'zipcode_hive', '8600000US00601' , 'zip' , 'income'"
echo "(press Enter)"
read
echo "get 'zipcode_hive', '8600000US00601' , 'zip' , 'income'" > hb2
echo "exit" >> hb2
hbase shell hb2 && rm -rf hb2
echo
echo ------------------------------------------------------
echo "Run sample queries "
echo ------------------------------------------------------
echo
echo "=> select * from $HIVE_TABLE limit 4 (press Enter)"
echo
read
impala-shell -i $IMPALA_HOST $IMPALA_OPTS -q "select * from $HIVE_TABLE limit 4"
echo
echo
echo "=> select count(*) from $HIVE_TABLE (press Enter)"
echo
read
impala-shell -i $IMPALA_HOST $IMPALA_OPTS -q "select count(*) from $HIVE_TABLE;"
echo
echo "=> select count(*) from $HIVE_TABLE where income>'0' and income<'7000' (press Enter)"
echo
read
impala-shell -i $IMPALA_HOST $IMPALA_OPTS -q "select count(*) from $HIVE_TABLE where income>'0' and income<'7000';"
echo
echo " => select * from ZIPCODE_HBASE where income between '1000' and '5000' order by income DESC limit 20; (press Enter)"
echo
read
impala-shell -i $IMPALA_HOST $IMPALA_OPTS -q "select * from ZIPCODE_HBASE where income between '1000' and '5000' order by income DESC limit 20;"
echo
# delete temp data
rm -f DEC_00* aff*.txt