Skip to content

Commit

Permalink
Merge pull request #19 from mlsecproject/arch
Browse files Browse the repository at this point in the history
Process all the sources
  • Loading branch information
krmaxwell committed Jul 19, 2014
2 parents 8fc172c + 112c8dd commit def42ed
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 18 deletions.
21 changes: 11 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@ entity, datatype, direction, source, notes, date
- The `source` field contains the original URL.
- The `notes` field should cover any extra tag info we may want to persist with the data
- The `date` field will be in `YYYY-MM-DD` format.
- All fields are quoted with double-quotes (`"`).
An output example:
```
entity, type, direction, source, notes, date
24.210.174.91,IPv4,inbound,openbl,SSH scan,2014-06-01
201.216.191.174,IPv4,inbound,openbl,SSH scan,2014-06-01
114.130.9.21,IPv4,inbound,openbl,FTP scan,2014-06-01
175.45.187.30,IPv4,inbound,openbl,SSH scan,2014-06-01
118.69.201.55,IPv4,inbound,openbl,SSH scan,2014-06-01
citi-bank.ru,FQDN,outbound,mtc_malwaredns,Malware,2014-06-01
ilo.brenz.pl,FQDN,outbound,mtc_malwaredns,Malware,2014-06-01
utenti.lycos.it,FQDN,outbound,mtc_malwaredns,Malware,2014-06-01
bgr.runk.pl,FQDN,outbound,mtc_malwaredns,Malware,2014-06-01
"entity","type","direction","source","notes","date"
"24.210.174.91","IPv4","inbound","openbl","SSHscan","2014-06-01"
"201.216.191.174","IPv4","inbound","openbl","SSHscan","2014-06-01"
"114.130.9.21","IPv4","inbound","openbl","FTPscan","2014-06-01"
"175.45.187.30","IPv4","inbound","openbl","SSHscan","2014-06-01"
"118.69.201.55","IPv4","inbound","openbl","SSHscan","2014-06-01"
"citi-bank.ru","FQDN","outbound","mtc_malwaredns","Malware","2014-06-01"
"ilo.brenz.pl","FQDN","outbound","mtc_malwaredns","Malware","2014-06-01"
"utenti.lycos.it","FQDN","outbound","mtc_malwaredns","Malware","2014-06-01"
"bgr.runk.pl","FQDN","outbound","mtc_malwaredns","Malware","2014-06-01"
```
### Copyright Info
Expand Down
2 changes: 1 addition & 1 deletion baler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def bale_csv(harvest, output_file):
with open(output_file, 'wb') as csv_file:
bale_writer = csv.writer(csv_file)
bale_writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)

# header row
bale_writer.writerow(('entity', 'type', 'direction', 'source', 'notes', 'date'))
Expand Down
137 changes: 137 additions & 0 deletions data/harvest-20140718.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion harvest.json
90 changes: 84 additions & 6 deletions thresher.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import bs4
import datetime
import json
import re
Expand All @@ -16,22 +17,99 @@ def indicator_type(indicator):


def process_simple_list(response, source, direction):
return [(i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()) for i in response.split('\n')]
data = []
for line in response.split('\n'):
if not line.startswith('#') and len(line) > 0:
i = line.split()[0]
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def process_virbl(response, source, direction):
data = []
for line in response.split('\n'):
if not line.startswith('E') and len(line) > 0:
i = line.split()[0]
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def process_project_honeypot(response, source, direction):
soup = bs4.BeautifulSoup(response)
return [(i.text, indicator_type(i.text), direction, source, '', '%s' % datetime.date.today()) for i in soup.find_all('a', 'bnone')]


def process_drg(response, source, direction):
data = []
for line in response.split('\n'):
if not line.startswith('#') and len(line) > 0:
i = line.split('|')[2].strip()
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def process_alienvault(response, source, direction):
data = []
for line in response.split('\n'):
if not line.startswith('#') and len(line) > 0:
i = line.partition('#')[0].strip()
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def process_packetmail(response, source, direction):
data = []
for line in response.split('\n'):
if not line.startswith('#') and len(line) > 0:
i = line.partition(';')[0].strip()
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def process_autoshun(response, source, direction):
data = []
for line in response.split('\n'):
if not line.startswith('S') and len(line) > 0:
i = line.partition(',')[0].strip()
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def process_haleys(response, source, direction):
data = []
for line in response.split('\n'):
if not line.startswith('#') and len(line) > 0:
i = line.partition(':')[2].strip()
data.append((i, indicator_type(i), direction, source, '', '%s' % datetime.date.today()))
return data


def thresh(input_file, output_file):
with open(input_file, 'rb') as f:
crop = json.load(f)

harvest = []
thresher_map = {'blocklist': process_simple_list}
thresher_map = {'blocklist.de': process_simple_list,
'openbl': process_simple_list,
'projecthoneypot': process_project_honeypot,
'ciarmy': process_simple_list,
'alienvault': process_alienvault,
'rulez': process_alienvault,
'sans': process_simple_list,
'nothink': process_simple_list,
'packetmail': process_packetmail,
'autoshun': process_autoshun,
'the-haleys': process_haleys,
'virbl': process_simple_list,
'dragonresearchgroup': process_drg}

for response in crop:
if response[1] == 200:
if 'blocklist.de' in response[0]:
harvest += thresher_map['blocklist'](response[2], response[0], 'inbound')
else: # include other site types
pass
for site in thresher_map:
if site in response[0]:
harvest += thresher_map[site](response[2], response[0], 'inbound')
else: # how to handle non-mapped sites?
pass
else: # how to handle non-200 non-404?
pass

Expand Down

0 comments on commit def42ed

Please sign in to comment.