Skip to content

Commit 01c6f49

Browse files
committed
first commit
0 parents  commit 01c6f49

9 files changed

+351
-0
lines changed

Diff for: .DS_Store

6 KB
Binary file not shown.

Diff for: .project

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>fileLogger</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.python.pydev.PyDevBuilder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
</buildSpec>
14+
<natures>
15+
<nature>org.python.pydev.pythonNature</nature>
16+
</natures>
17+
</projectDescription>

Diff for: .pydevproject

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
2+
<?eclipse-pydev version="1.0"?><pydev_project>
3+
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
4+
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
5+
</pydev_project>

Diff for: Configs.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Directory Settings
2+
logPath = '/var/log/applications/VAFileExtractor/info.log'
3+
data_directory = '/Users/josiah/Documents/workspace/VA/va/'
4+
5+
#MYSQL Settings
6+
mysql = {
7+
'user': 'root',
8+
'password': 'r00t',
9+
'host': '127.0.0.1',
10+
'database': 'va',
11+
'raise_on_warnings': True,
12+
}
13+
14+
redis = {
15+
'host': '127.0.0.1',
16+
'port': 6379,
17+
'db': 0
18+
}

Diff for: Configs.pyc

531 Bytes
Binary file not shown.

Diff for: Daemon.py

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env python
2+
3+
import sys, os, time, atexit
4+
from signal import SIGTERM
5+
6+
class Daemon:
7+
"""
8+
A generic daemon class.
9+
10+
Usage: subclass the Daemon class and override the run() method
11+
"""
12+
def __init__(self, pidfile, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
13+
self.stdin = stdin
14+
self.stdout = stdout
15+
self.stderr = stderr
16+
self.pidfile = pidfile
17+
18+
def daemonize(self):
19+
"""
20+
do the UNIX double-fork magic, see Stevens' "Advanced
21+
Programming in the UNIX Environment" for details (ISBN 0201563177)
22+
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
23+
"""
24+
try:
25+
pid = os.fork()
26+
if pid > 0:
27+
# exit first parent
28+
sys.exit(0)
29+
except OSError, e:
30+
sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
31+
sys.exit(1)
32+
33+
# decouple from parent environment
34+
os.chdir("/")
35+
os.setsid()
36+
os.umask(0)
37+
38+
# do second fork
39+
try:
40+
pid = os.fork()
41+
if pid > 0:
42+
# exit from second parent
43+
sys.exit(0)
44+
except OSError, e:
45+
sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
46+
sys.exit(1)
47+
48+
# redirect standard file descriptors
49+
sys.stdout.flush()
50+
sys.stderr.flush()
51+
si = file(self.stdin, 'r')
52+
so = file(self.stdout, 'a+')
53+
se = file(self.stderr, 'a+', 0)
54+
os.dup2(si.fileno(), sys.stdin.fileno())
55+
os.dup2(so.fileno(), sys.stdout.fileno())
56+
os.dup2(se.fileno(), sys.stderr.fileno())
57+
58+
# write pidfile
59+
atexit.register(self.delpid)
60+
pid = str(os.getpid())
61+
file(self.pidfile, 'w+').write("%s\n" % pid)
62+
63+
def delpid(self):
64+
os.remove(self.pidfile)
65+
66+
def start(self):
67+
"""
68+
Start the daemon
69+
"""
70+
# Check for a pidfile to see if the daemon already runs
71+
try:
72+
pf = file(self.pidfile, 'r')
73+
pid = int(pf.read().strip())
74+
pf.close()
75+
except IOError:
76+
pid = None
77+
78+
if pid:
79+
message = "pidfile %s already exist. Daemon already running?\n"
80+
sys.stderr.write(message % self.pidfile)
81+
sys.exit(1)
82+
83+
# Start the daemon
84+
self.daemonize()
85+
self.run()
86+
87+
def stop(self):
88+
"""
89+
Stop the daemon
90+
"""
91+
# Get the pid from the pidfile
92+
try:
93+
pf = file(self.pidfile, 'r')
94+
pid = int(pf.read().strip())
95+
pf.close()
96+
except IOError:
97+
pid = None
98+
99+
if not pid:
100+
message = "pidfile %s does not exist. Daemon not running?\n"
101+
sys.stderr.write(message % self.pidfile)
102+
return # not an error in a restart
103+
104+
# Try killing the daemon process
105+
try:
106+
while 1:
107+
os.kill(pid, SIGTERM)
108+
time.sleep(0.1)
109+
except OSError, err:
110+
err = str(err)
111+
if err.find("No such process") > 0:
112+
if os.path.exists(self.pidfile):
113+
os.remove(self.pidfile)
114+
else:
115+
print str(err)
116+
sys.exit(1)
117+
118+
def restart(self):
119+
"""
120+
Restart the daemon
121+
"""
122+
self.stop()
123+
self.start()
124+
125+
def run(self):
126+
"""
127+
You should override this method when you subclass Daemon. It will be called after the process has been
128+
daemonized by start() or restart().
129+
"""

Diff for: Daemon.pyc

4.23 KB
Binary file not shown.

Diff for: README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# VAFileExtractor

Diff for: VAFileExtractor.py

+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/usr/bin/env python
2+
3+
import sys, time, csv, logging, datetime, redis, pickle, mysql.connector
4+
from xlrd import open_workbook
5+
from Daemon import Daemon
6+
import Configs #import configs
7+
class VAFileExtractor(Daemon):
8+
9+
#connect to database
10+
def mysql_connect(self):
11+
logging.info("Connecting to mysql ...")
12+
try:
13+
cnx = mysql.connector.connect(**Configs.mysql)
14+
logging.info("successfully connected to mysql ..." )
15+
return cnx
16+
except Exception, e:
17+
logging.exception("Exception raised while connecting to mysql ...=> " + str(e))
18+
19+
def redis_connect(self):
20+
logging.info("Connecting to redis server ...")
21+
try:
22+
cnx = redis.StrictRedis(**Configs.redis)
23+
logging.info("successfully connected to redis ...")
24+
25+
return cnx
26+
except Exception, e:
27+
logging.exception("Exception raised while connecting to redis ...=> " + str(e))
28+
29+
30+
def get_active_sites(self):
31+
try:
32+
active = 1
33+
cnx = self.mysql_connect()
34+
cursor = cnx.cursor()
35+
query = "SELECT * FROM va_sites WHERE status = %s" % active
36+
logging.debug("SITES QUERY ...=> " + str(query))
37+
cursor.execute(query)
38+
39+
return cursor.fetchall()
40+
cnx.close()
41+
except Exception, e:
42+
logging.exception("Exception raised while querying sites ...=> " + str(e))
43+
cnx.close()
44+
45+
def get_pending_file_upload(self, site_id):
46+
try:
47+
active = 1
48+
cnx = self.mysql_connect()
49+
cursor = cnx.cursor()
50+
query = "SELECT * FROM va_rawdatafiles WHERE siteid_id = %s AND status = %s AND (datecached IS NULL OR refreshcache = true)" % (site_id, active)
51+
logging.debug("FILES QUERY ...=> " + str(query))
52+
cursor.execute(query)
53+
54+
cnx.close()
55+
return cursor.fetchone()
56+
except Exception, e:
57+
logging.exception("Exception raised while querying va_rawdatafiles ...=> " + str(e))
58+
cnx.close()
59+
60+
def update_file_upload(self, fileid):
61+
try:
62+
inactive = 2
63+
cnx = self.mysql_connect()
64+
cursor = cnx.cursor()
65+
cursor.execute ("UPDATE va_rawdatafiles SET status = %s, datecached = now(), refreshcache = false WHERE fileid = %s " % (inactive, fileid))
66+
cnx.commit()
67+
logging.info("file upload updated so that its not processed a second time ... ")
68+
cnx.close()
69+
except Exception, e:
70+
logging.exception("Exception raised while updating va_rawdatafiles ...=> " + str(e))
71+
cnx.close()
72+
73+
74+
def update_site_with_file_upload(self, fileid, siteid):
75+
try:
76+
inactive = 2
77+
cnx = self.mysql_connect()
78+
cursor = cnx.cursor()
79+
cursor.execute ("UPDATE va_sites SET current_fileid = %s WHERE siteid = %s " % (fileid, siteid))
80+
cnx.commit()
81+
logging.info("va_sites updated with current_fileid ... ")
82+
cnx.close()
83+
except Exception, e:
84+
logging.exception("Exception raised while updating va_sites with current_fileid ...=> " + str(e))
85+
cnx.close()
86+
87+
88+
89+
def file_to_array(self, file_to_cache):
90+
data = []
91+
try:
92+
logging.info("file_to_array start...")
93+
book = open_workbook(file_to_cache,on_demand=True)
94+
for name in book.sheet_names():
95+
if name:
96+
logging.info("sheet name ..." + str(name))
97+
worksheet = book.sheet_by_name(name)
98+
num_rows = worksheet.nrows - 1
99+
num_cells = worksheet.ncols - 1
100+
curr_row = 2
101+
while curr_row < num_rows:#each row
102+
curr_row += 1
103+
row = worksheet.row(curr_row)
104+
curr_cell = -1
105+
thisRow = []
106+
while curr_cell < num_cells: #each cell
107+
curr_cell += 1
108+
cell_value = worksheet.cell_value(curr_row, curr_cell)
109+
if isinstance(cell_value, unicode):
110+
cell_value = cell_value.encode('utf-8','ignore')
111+
thisRow.append(str(cell_value))
112+
theString = ','.join(thisRow)
113+
logging.info("add row to list " + str(theString[:100] + "..."))
114+
data.append(thisRow)
115+
logging.info("now return array ... ")
116+
return data
117+
except Exception, e:
118+
logging.exception("Exception raised converting the upload while to array ...=> " + str(e))
119+
return None
120+
121+
122+
123+
124+
#book = open_workbook(data_directory + '/' + output_file,on_demand=True)
125+
126+
def doJob(self):
127+
logging.info("start job...")
128+
sites = self.get_active_sites()
129+
if(sites):
130+
logging.info("active sites exist, loop through them and check if data needs caching ...=> " + str(sites))
131+
for site in sites:
132+
site_id = site[0]
133+
site_name = site[2]
134+
site_datakey = site[3]
135+
logging.info("current site_id=>" + str(site_id) + ", site_name=>" + str(site_name))
136+
#check if there is a file that needs caching
137+
file_row = self.get_pending_file_upload(site_id)
138+
if(file_row):
139+
fileid = file_row[0]
140+
file_name = file_row[2]
141+
datecached = file_row[8]
142+
refreshcache = file_row[9]
143+
logging.info("file found that needs caching: file_name=>" + str(file_name) + ", datecached=>" + str(datecached) + ", refreshcache=>" + str(refreshcache))
144+
file_to_cache = Configs.data_directory + file_name
145+
146+
file_to_array = self.file_to_array(file_to_cache)
147+
if(file_to_array): #if pushed in array, commit to memcache
148+
redis_cnx = self.redis_connect()
149+
pickled_object = pickle.dumps(file_to_array)
150+
redis_cnx.set(site_datakey, pickled_object)
151+
#we are done here, so lets update the upload record as cached.
152+
self.update_file_upload(fileid)
153+
self.update_site_with_file_upload(fileid, site_id)
154+
155+
def run(self):
156+
try:
157+
logging.basicConfig(filename=Configs.logPath, level=logging.DEBUG, format='%(levelname)s: %(asctime)s%(message)s on line %(lineno)d')
158+
except Exception as e:
159+
print str(e)
160+
161+
while True:
162+
self.doJob()
163+
time.sleep(1)
164+
165+
166+
if __name__ == "__main__":
167+
daemon = VAFileExtractor('/tmp/vafileectractor.pid')
168+
if len(sys.argv) == 2:
169+
if 'start' == sys.argv[1]:
170+
daemon.start()
171+
elif 'stop' == sys.argv[1]:
172+
daemon.stop()
173+
elif 'restart' == sys.argv[1]:
174+
daemon.restart()
175+
else:
176+
print "Unknown command"
177+
sys.exit(2)
178+
sys.exit(0)
179+
else:
180+
print "usage: %s start|stop|restart" % sys.argv[0]
181+
sys.exit(2)

0 commit comments

Comments
 (0)