Skip to content

Commit b0152b6

Browse files
committed
Novo Script Validar Verbos Python
1 parent 3e4d9a4 commit b0152b6

File tree

4 files changed

+1251
-29
lines changed

4 files changed

+1251
-29
lines changed

getVerbos.py

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import urllib
4+
from bs4 import BeautifulSoup
5+
import codecs
6+
import MySQLdb
7+
import types
8+
9+
from unicodedata import normalize
10+
11+
#'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'
12+
arrayLetras = ('c')
13+
14+
def remover_acentos(txt, codif='utf-8'):
15+
return normalize('NFKD', txt.decode(codif)).encode('ASCII','ignore')
16+
17+
def getConvertObjectSoup(url):
18+
f = urllib.urlopen(url)
19+
20+
html_doc = f.read()
21+
22+
return BeautifulSoup(html_doc)
23+
24+
def getVerbos():
25+
verbos = file('verbos.txt', 'w')
26+
palavras = []
27+
28+
try:
29+
30+
for i in arrayLetras:
31+
url = 'http://www.achando.info/verbos/conjugar/dicionario-'+i+'.html'
32+
33+
soup = getConvertObjectSoup(url)
34+
35+
print i
36+
if soup.find_all('li') == []:
37+
for s in soup.find_all('td', text=True):
38+
#+ ' - ' + s.string
39+
if s.string and s.string != None and s.string != '':
40+
#print s.string
41+
palavras.append(s.string.encode('utf-8') + '\n')
42+
else:
43+
for a in soup.find_all('li'):
44+
soup = getConvertObjectSoup(a.find('a')['href'])
45+
46+
#print soup
47+
#verbos.write(url.encode('ascii', 'ignore') + '\n')
48+
for s in soup.find_all('td', text=True):
49+
#+ ' - ' + s.string
50+
if s.string and s.string != None and s.string != '':
51+
#print s.string.encode('utf-8')
52+
palavras.append(s.string.encode('utf-8') + '\n')
53+
54+
55+
verbos.writelines(palavras)
56+
verbos.close()
57+
return palavras
58+
except Exception:
59+
print 'Erro ao pegar verbos' + e.message
60+
else:
61+
pass
62+
finally:
63+
pass
64+
65+
def validaVerbos(lista):
66+
#cursor MySQL
67+
db = MySQLdb.connect(host='localhost' , port=3306 ,user='root', passwd='', db='verbos', use_unicode=1, charset='utf8')
68+
cursor = db.cursor()
69+
try:
70+
for i in sorted(lista):
71+
if i > 0 and i:
72+
url = remover_acentos('http://www.dicionariodoaurelio.com/'+i.strip()+'.html')
73+
74+
soup = getConvertObjectSoup(url)
75+
#print soup.find(id="box1")
76+
if soup.find(id="box1") is None and soup.find(id="box1") is False:
77+
print 'Verbo ' + i.strip()+' Inexistente!! Delete BD'
78+
lista.remove(i)
79+
else:
80+
#print (soup.find(id="box1").h4.string)
81+
#print 'Verbo '+ str(i.strip()) + ' Ok na base do Aurelio'
82+
print 'Insert '+str(i.strip())
83+
sql = 'insert into verbos (nome) values ("'+ str(i.strip()) + '")'
84+
db.commit()
85+
cursor.execute(sql)
86+
else:
87+
print 'None'
88+
cursor.close()
89+
return sorted(lista)
90+
91+
except Exception, e:
92+
print 'Erro ao verificar verbos.' + e.message
93+
else:
94+
pass
95+
finally:
96+
pass
97+
98+
def getVerbosConjugados(listaVerbos):
99+
count = [1, 3, 2]
100+
contador = 0
101+
num = 0
102+
#cursor MySQL
103+
db = MySQLdb.connect(host='localhost' , port=3306 ,user='root', passwd='', db='verbos', use_unicode=1, charset='utf8')
104+
cursor = db.cursor()
105+
106+
try:
107+
108+
for verbo in listaVerbos:
109+
soup = getConvertObjectSoup('http://www.conjuga-me.net/verbo-'+verbo.encode('utf-8'))
110+
print 'Verbo: '+verbo.strip()
111+
112+
for s in soup.find_all('td', attrs = { 'class' : 'output'}):
113+
if s.text and s.text != None and s.text != '' and s.text != '\t' and s.text != '\n' and s.text != ' ':
114+
palavra = s.text.encode('ascii', 'ignore').replace('para ', '').replace('eu','').replace('voc','').replace(' ele/ela','').replace('vocs','').replace('tu','').replace('vs','').replace('no','').replace('ns','').replace('eles/elas','').replace('ele/ela','').replace(' s','').replace(' ','')
115+
cursor.execute('select id_verbos from verbos where nome = "'+ verbo.strip() +'"')
116+
if palavra:
117+
id_verbo = cursor.fetchone()
118+
print 'id_verbo: '+ str(id_verbo[0])
119+
sql = 'insert into conjulgacao_verbal (nome_conjulgacao, id_tempo_verbal, id_verbo) value (%s, %s, %s)'
120+
db.commit()
121+
cursor.execute(sql, (palavra, count[num], id_verbo[0]))
122+
123+
124+
num += 1
125+
if num == 3:
126+
num = 0
127+
contador += 1
128+
if contador == 18:
129+
count = [4, 6, 5]
130+
if contador == 36:
131+
count = [7, 9, 8]
132+
if contador == 54:
133+
count = [10, 10, 10]
134+
if contador == 72:
135+
contador = 0
136+
count = [1, 3, 2]
137+
#print palavra
138+
139+
#+ ' - ' + s.string
140+
#if s.string and s.string != None and s.string != '' and s.string != '\t':
141+
#print s.string.encode('utf-8')
142+
cursor.close()
143+
except Exception, e:
144+
print 'Erro ao verificar verbos.' + e.message
145+
else:
146+
pass
147+
finally:
148+
pass
149+
150+
def main():
151+
verbos = getVerbos()
152+
verbosValidados = validaVerbos(verbos)
153+
getVerbosConjugados(verbosValidados)
154+
155+
if __name__ == '__main__':
156+
main()

soup.py

+22-29
Original file line numberDiff line numberDiff line change
@@ -9,43 +9,36 @@
99
nao_encontrados = open('/Users/gilmar/Documents/Projetos/verbos/verbos_nao_encontrados.txt', 'w')
1010

1111
try:
12-
db = MySQLdb.connect(host='localhost' , port=3306 ,user='gilmar', passwd='', db='verbos')
12+
db = MySQLdb.connect(host='localhost' , port=3306 ,user='gilmar', passwd='', db='verbos')
1313

14-
cursor = db.cursor()
14+
cursor = db.cursor()
1515

16-
cursor.execute('select * from verbos')
16+
cursor.execute('select * from verbos')
1717

18-
for x in cursor.fetchall():
19-
verbo = x[1]
18+
for x in cursor.fetchall():
19+
verbo = x[1]
2020

21-
url = 'http://www.dicionariodoaurelio.com/'+verbo+'.html'
22-
f = urllib.urlopen(url)
21+
url = 'http://www.dicionariodoaurelio.com/'+verbo+'.html'
22+
f = urllib.urlopen(url)
2323

24-
html_doc = f.read()
25-
soup = BeautifulSoup(html_doc)
24+
html_doc = f.read()
25+
soup = BeautifulSoup(html_doc)
2626

27-
if soup.find(id="box1") == None:
28-
print 'Verbo ' + verbo +' Inexistente!! Delete BD'
29-
nao_encontrados.write('Verbo ' + verbo +' Inexistente!! Delete BD. \n')
30-
else:
31-
#print (soup.find(id="box1").h4.string)
32-
print 'Verbo '+ verbo + ' Ok na base do Aurelio'
33-
encontrados.write('Verbo '+ verbo + ' Ok na base do Aurelio. \n')
27+
if soup.find(id="box1") == None:
28+
print 'Verbo ' + verbo +' Inexistente!! Delete BD'
29+
nao_encontrados.write('Verbo ' + verbo +' Inexistente!! Delete BD. \n')
30+
else:
31+
#print (soup.find(id="box1").h4.string)
32+
print 'Verbo '+ verbo + ' Ok na base do Aurelio'
33+
encontrados.write('Verbo '+ verbo + ' Ok na base do Aurelio. \n')
3434

35-
nao_encontrados.close()
36-
encontrados.close()
37-
db.close()
35+
nao_encontrados.close()
36+
encontrados.close()
37+
db.close()
3838

3939
except Exception, e:
40-
print 'Erro ao connectar no BD ' + e.message
40+
print 'Erro ao connectar no BD ' + e.message
4141
else:
42-
pass
42+
pass
4343
finally:
44-
pass
45-
46-
47-
48-
49-
50-
51-
44+
pass

0 commit comments

Comments
 (0)