Skip to content

Commit 30182c1

Browse files
committed
Subindo novas variáveis de ambiente. Atualização no Dockerfile.
1 parent bac1c92 commit 30182c1

File tree

3 files changed

+104
-3
lines changed

3 files changed

+104
-3
lines changed

Diff for: .env-example

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
COVERALLS_SERVICE_NAME=travis-pro
22
COVERALLS_REPO_TOKEN=
33
DOCKER_REGISTRY=
4+
AWS_ACCESS_KEY_ID=
5+
AWS_SECRET_ACCESS_KEY=
6+
AWS_DEFAULT_REGION=us-east-1

Diff for: Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ ENV PYENV_ROOT $WORKDIR/.pyenv
1414
ENV PIPENV_CACHE_DIR $WORKDIR/.pipenv
1515
ENV PIPENV_PYTHON ${PYENV_ROOT}/shims/python
1616
ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
17-
ENV BUILD_PACKAGES bzip2-devel gcc git wget which libxml2-dev libxslt-dev make \
18-
openssl-devel python36-dev readline-devel postgresql-devel \
17+
ENV BUILD_PACKAGES bzip2-devel gcc git wget which make \
18+
openssl-devel python37-dev readline-devel postgresql-devel \
1919
libffi-devel sqlite-devel tar
2020

2121
WORKDIR ${WORKDIR}

Diff for: README.md

+99-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,99 @@
1-
# tutorial-batch-data-engineering
1+
# tutorial-batch-data-engineering
2+
3+
[![Python Version](https://img.shields.io/badge/python-3.7.0-green.svg)](https://img.shields.io/badge/python-3.7.0-green.svg)
4+
[![Coverage Status](https://coveralls.io/repos/github/arthuralvim/tutorial-batch-data-engineering/badge.svg?branch=master)](https://coveralls.io/github/arthuralvim/tutorial-batch-data-engineering?branch=master)
5+
6+
7+
## DESENVOLVIMENTO
8+
9+
```bash
10+
$ pre-commit install
11+
```
12+
13+
```bash
14+
$ pipenv install --dev
15+
```
16+
17+
18+
## BASE DE DADOS
19+
20+
CRIAR BALDE NO S3
21+
22+
https://brasil.io/dataset/gastos-deputados/cota_parlamentar
23+
24+
## AWS ECR
25+
26+
## AWS BATCH
27+
28+
## AWS ATHENA
29+
30+
https://docs.aws.amazon.com/athena/latest/ug/select.html
31+
https://docs.aws.amazon.com/athena/latest/ug/data-types.html
32+
33+
```sql
34+
CREATE EXTERNAL TABLE IF NOT EXISTS dados_brasil.deputados_cota_parlamentar (
35+
`codlegislatura` int,
36+
`datemissao` timestamp,
37+
`idedocumento` int,
38+
`idecadastro` int,
39+
`indtipodocumento` int,
40+
`nucarteiraparlamentar` int,
41+
`nudeputadoid` int,
42+
`nulegislatura` int,
43+
`numano` int,
44+
`numespecificacaosubcota` int,
45+
`numlote` int,
46+
`nummes` int,
47+
`numparcela` int,
48+
`numressarcimento` int,
49+
`numsubcota` int,
50+
`sgpartido` string,
51+
`sguf` string,
52+
`txnomeparlamentar` string,
53+
`txtcnpjcpf` string,
54+
`txtdescricao` string,
55+
`txtdescricaoespecificacao` string,
56+
`txtfornecedor` string,
57+
`txtnumero` string,
58+
`txtpassageiro` string,
59+
`txttrecho` string,
60+
`vlrdocumento` float,
61+
`vlrglosa` float,
62+
`vlrliquido` float,
63+
`vlrrestituicao` float
64+
)
65+
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
66+
WITH SERDEPROPERTIES (
67+
'serialization.format' = ',',
68+
'field.delim' = ','
69+
) LOCATION 's3://tutorial-batch-data-engineering/cota-parlamentar/'
70+
TBLPROPERTIES ('has_encrypted_data'='false');
71+
```
72+
73+
https://aws.amazon.com/blogs/big-data/top-10-performance-tuning-tips-for-amazon-athena/
74+
75+
```sql
76+
SELECT * FROM "dados_brasil"."deputados_cota_parlamentar" limit 10;
77+
```
78+
79+
```sql
80+
SELECT COUNT(*) FROM "dados_brasil"."deputados_cota_parlamentar";
81+
```
82+
83+
```sql
84+
SELECT txnomeparlamentar, COUNT(*) count FROM "dados_brasil"."deputados_cota_parlamentar" GROUP BY txnomeparlamentar ORDER BY count DESC limit 10;
85+
```
86+
87+
```sql
88+
SELECT txnomeparlamentar, sgpartido, sguf, SUM(vlrliquido) vlrtotal FROM "dados_brasil"."deputados_cota_parlamentar" GROUP BY txnomeparlamentar, sgpartido, sguf ORDER BY txnomeparlamentar DESC limit 10;
89+
```
90+
91+
92+
1 (Run time: 2.36 seconds, Data scanned: 713.04 MB)
93+
3.697.967
94+
95+
2 (Run time: 2.21 seconds, Data scanned: 99.33 MB)
96+
3.697.983
97+
98+
99+
16 linhas de diferença. 8 arquivos.

0 commit comments

Comments
 (0)