Skip to content

Commit 74ceed9

Browse files
author
root
committed
add mendeley and read right now feature
1 parent b5d990f commit 74ceed9

File tree

12 files changed

+985
-232
lines changed

12 files changed

+985
-232
lines changed

dlmonitor/latex.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import urllib2
2+
import os
3+
4+
from dlmonitor import settings
5+
6+
def build_paper_html(arxiv_id):
7+
src_path = "{}/{}".format(settings.SOURCE_PATH, arxiv_id)
8+
html_path = "{}/main.html".format(src_path)
9+
return False
10+
if os.path.exists(src_path):
11+
return html_path if os.path.exists(html_path) else None
12+
opener = urllib2.build_opener()
13+
opener.addheaders = [('Referer', 'https://arxiv.org/format/{}'.format(arxiv_id)), ('User-Agent', 'Mozilla/5.0')]
14+
page = opener.open("https://arxiv.org/e-print/{}".format(arxiv_id))
15+
meta = page.info()
16+
file_size = meta.getheaders("Content-Length")[0]
17+
if (int(file_size) / 1024. / 1024. > 15):
18+
# File too big
19+
os.mkdir(src_path)
20+
return False
21+
print("download {}: {}".format(arxiv_id, file_size))
22+
data = page.read()
23+
os.mkdir(src_path)
24+
tgz_path = "{}/source.tgz".format(src_path)
25+
open(tgz_path, "wb").write(data)
26+
os.chdir(src_path)
27+
os.system("tar xzf {} --directory {}".format(tgz_path, src_path))
28+
texfiles = [fn for fn in os.listdir(src_path) if fn.endswith(".tex")]
29+
select_texfile = texfiles[0]
30+
if len(texfiles) > 1:
31+
for fn in texfiles:
32+
text = open("{}/{}".format(src_path, fn)).read()
33+
if "begin{document}" in text:
34+
select_texfile = fn
35+
break
36+
if texfiles:
37+
os.system("latexml --includestyles --dest=main.xml {}".format(select_texfile.replace(".tex", "")))
38+
os.system("latexmlpost --dest=main.html main.xml")
39+
os.remove(tgz_path)
40+
return html_path if os.path.exists(html_path) else None
41+
42+
def retrieve_paper_html(arxiv_token):
43+
src_path = "{}/{}".format(settings.SOURCE_PATH, arxiv_token)
44+
html_path = "{}/main.html".format(src_path)
45+
if os.path.exists(src_path) and not os.path.exists(html_path):
46+
html_body = "NOT_AVAILABE"
47+
elif os.path.exists(src_path) and os.path.exists(html_path):
48+
html_body = open(html_path).read().decode("utf-8")
49+
html_body = html_body.split("<body>")[-1]
50+
html_body = html_body.split("</body>")[0]
51+
html_body = html_body.replace('<img src="', '<img src="/arxiv_files/{}/'.format(arxiv_token))
52+
html_body = html_body.replace("#0000FF", "#6666FF")
53+
else:
54+
html_body = "NOT_EXIST"
55+
return html_body

dlmonitor/settings.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,11 @@
2424
TWITTER_ACCESS_SECRET = os.environ.get('TWITTER_ACCESS_SECRET', "")
2525

2626
PDF_PATH = os.environ.get("PDF_PATH", "/tmp")
27+
SOURCE_PATH = os.environ.get("SOURCE_PATH", "/tmp")
28+
HOME_URL = os.environ.get("HOME_URL", "https://deeplearn.org")
29+
30+
31+
MENDELEY_CLIENTID = os.environ.get("MENDELEY_CLIENTID", "")
32+
MENDELEY_SECRET = os.environ.get("MENDELEY_SECRET", "")
33+
34+
SESSION_KEY = os.environ.get("SESSION_KEY", "DEEPLEARN.ORG SECRET KEY")

dlmonitor/webapp/app.py

Lines changed: 103 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
1-
from flask import Flask, request
1+
import os
2+
from flask import Flask, request, redirect, session, send_from_directory
23
from flask import render_template, send_from_directory
34
from dlmonitor.db import close_global_session
45
from dlmonitor.fetcher import get_posts
6+
from dlmonitor import settings
57
from urllib2 import unquote
68
import datetime as DT
79

10+
from mendeley import Mendeley
11+
from mendeley.session import MendeleySession
12+
import oauthlib
13+
14+
15+
816
app = Flask(__name__, static_url_path='/static')
17+
app.secret_key = settings.SESSION_KEY
18+
app.config['SESSION_TYPE'] = 'filesystem'
19+
920

1021
NUMBER_EACH_PAGE = 30
1122
DEFAULT_KEYWORDS = "Hot Tweets,Hot Papers,Fresh Papers,reinforcement learning,language"
12-
1323
DATE_TOKEN_SET = set(['1-week', '2-week', '1-month'])
1424

25+
# Mendeley
26+
MENDELEY_REDIRECT = "{}/oauth".format(settings.HOME_URL)
27+
mendeley = Mendeley(settings.MENDELEY_CLIENTID, settings.MENDELEY_SECRET, MENDELEY_REDIRECT)
28+
1529
def get_date_str(token):
1630
"""
1731
Convert token to date string.
@@ -43,7 +57,25 @@ def index():
4357
posts = get_posts(src, keywords=kw, since=target_date, start=0, num=num_page)
4458
column_list.append((src, kw, posts))
4559

46-
return render_template("index.html", columns=column_list)
60+
# Mendeley
61+
auth = mendeley.start_authorization_code_flow()
62+
if "ma_token" in session and session["ma_token"] is not None:
63+
ma_session = MendeleySession(mendeley, session['ma_token'])
64+
try:
65+
ma_firstname = ma_session.profiles.me.first_name
66+
except:
67+
session['ma_token'] = None
68+
ma_session =None
69+
ma_firstname = None
70+
else:
71+
ma_session = None
72+
ma_firstname = None
73+
74+
ma_authorized = ma_session is not None and ma_session.authorized
75+
return render_template(
76+
"index.html", columns=column_list, mendeley_login=auth.get_login_url(),
77+
ma_session=ma_session, ma_authorized=ma_authorized, ma_firstname=ma_firstname
78+
)
4779

4880
@app.route('/fetch', methods=['POST'])
4981
def fetch():
@@ -64,15 +96,25 @@ def fetch():
6496

6597
num_page = 80 if src == "twitter" else NUMBER_EACH_PAGE
6698

99+
# Mendeley
100+
ma_authorized = "ma_token" in session and session["ma_token"] is not None
101+
67102
return render_template(
68103
"post_{}.html".format(src),
69-
posts=get_posts(src, keywords=kw, since=target_date, start=start, num=num_page))
104+
posts=get_posts(src, keywords=kw, since=target_date, start=start, num=num_page),
105+
ma_authorized=ma_authorized)
70106

71107
@app.route("/arxiv/<int:arxiv_id>/<paper_str>")
72108
def arxiv(arxiv_id, paper_str):
73109
from dlmonitor.sources.arxivsrc import ArxivSource
110+
from dlmonitor.latex import retrieve_paper_html
74111
post = ArxivSource().get_one_post(arxiv_id)
75-
return render_template("single.html", post=post)
112+
arxiv_token = post.arxiv_url.split("/")[-1]
113+
114+
# Check the HTML page
115+
html_body = retrieve_paper_html(arxiv_token)
116+
return render_template("single.html",
117+
post=post, arxiv_token=arxiv_token, html_body=html_body)
76118

77119
@app.route("/about")
78120
def about():
@@ -82,5 +124,61 @@ def about():
82124
def search():
83125
return render_template("search.html")
84126

127+
@app.route('/oauth')
128+
def auth_return():
129+
auth = mendeley.start_authorization_code_flow(state=request.args.get("state"))
130+
mendeley_session = auth.authenticate(request.url)
131+
132+
session["ma_token"] = mendeley_session.token
133+
session["ma_state"] = request.args.get("state")
134+
135+
return redirect('/')
136+
137+
@app.route("/save_mendeley")
138+
def save_mendeley():
139+
import urllib
140+
if "ma_token" in session and session["ma_token"] is not None:
141+
ma_session = MendeleySession(mendeley, session['ma_token'])
142+
else:
143+
ma_session = None
144+
145+
ma_authorized = ma_session is not None and ma_session.authorized
146+
if not ma_authorized:
147+
return "Please log in into Mendeley."
148+
149+
pdf_url = request.args.get('url')
150+
# Retrieve pdf file
151+
arxiv_id = pdf_url.split("/")[-1].replace(".pdf", "")
152+
local_pdf = "{}/{}.pdf".format(settings.PDF_PATH, arxiv_id)
153+
remote_pdf = "http://arxiv.org/pdf/{}.pdf".format(arxiv_id)
154+
if not os.path.exists(local_pdf):
155+
urllib.urlretrieve(remote_pdf, local_pdf)
156+
157+
# Create file
158+
ma_session.documents.create_from_file(local_pdf)
159+
160+
return "{} is saved into Mendeley".format(os.path.basename(local_pdf))
161+
162+
@app.route("/load_fulltext/<arxiv_token>")
163+
def load_fulltext(arxiv_token):
164+
from dlmonitor.latex import build_paper_html, retrieve_paper_html
165+
build_paper_html(arxiv_token)
166+
167+
return retrieve_paper_html(arxiv_token)
168+
169+
@app.route("/arxiv_files/<arxiv_token>/<path:fp>")
170+
def arxiv_files(arxiv_token, fp):
171+
fp = "{}/{}/{}".format(settings.SOURCE_PATH, arxiv_token, fp)
172+
if os.path.exists(fp):
173+
return send_from_directory(os.path.dirname(fp), os.path.basename(fp))
174+
else:
175+
return ""
176+
177+
@app.route("/logout")
178+
def logout():
179+
session["ma_token"] = None
180+
return redirect("/")
181+
85182
if __name__ == '__main__':
183+
# app.run(host='0.0.0.0', debug=True, ssl_context='adhoc')
86184
app.run(host='0.0.0.0', debug=True)

0 commit comments

Comments
 (0)