1
1
import urllib2
2
2
import os
3
+ import subprocess
3
4
4
5
from dlmonitor import settings
5
6
6
7
def build_paper_html (arxiv_id ):
7
8
src_path = "{}/{}" .format (settings .SOURCE_PATH , arxiv_id )
8
9
html_path = "{}/main.html" .format (src_path )
9
- return False
10
10
if os .path .exists (src_path ):
11
11
return html_path if os .path .exists (html_path ) else None
12
12
opener = urllib2 .build_opener ()
@@ -17,7 +17,7 @@ def build_paper_html(arxiv_id):
17
17
if (int (file_size ) / 1024. / 1024. > 15 ):
18
18
# File too big
19
19
os .mkdir (src_path )
20
- return False
20
+ return None
21
21
print ("download {}: {}" .format (arxiv_id , file_size ))
22
22
data = page .read ()
23
23
os .mkdir (src_path )
@@ -26,23 +26,28 @@ def build_paper_html(arxiv_id):
26
26
os .chdir (src_path )
27
27
os .system ("tar xzf {} --directory {}" .format (tgz_path , src_path ))
28
28
texfiles = [fn for fn in os .listdir (src_path ) if fn .endswith (".tex" )]
29
- select_texfile = texfiles [0 ]
30
- if len (texfiles ) > 1 :
31
- for fn in texfiles :
32
- text = open ("{}/{}" .format (src_path , fn )).read ()
33
- if "begin{document}" in text :
34
- select_texfile = fn
35
- break
36
29
if texfiles :
37
- os .system ("latexml --includestyles --dest=main.xml {}" .format (select_texfile .replace (".tex" , "" )))
30
+ select_texfile = texfiles [0 ]
31
+ if len (texfiles ) > 1 :
32
+ for fn in texfiles :
33
+ text = open ("{}/{}" .format (src_path , fn )).read ()
34
+ if "begin{document}" in text :
35
+ select_texfile = fn
36
+ break
37
+ cmd = "latexml --includestyles --dest=main.xml {}" .format (select_texfile .replace (".tex" , "" ))
38
+ os .system (cmd )
39
+ os .system ("latexmlpost --dest=main.html main.xml" )
38
40
os .system ("latexmlpost --dest=main.html main.xml" )
39
41
os .remove (tgz_path )
42
+ open ("{}/.loaded" .format (src_path ), "wb" ).write ("loaded" )
40
43
return html_path if os .path .exists (html_path ) else None
41
44
42
45
def retrieve_paper_html (arxiv_token ):
43
46
src_path = "{}/{}" .format (settings .SOURCE_PATH , arxiv_token )
44
47
html_path = "{}/main.html" .format (src_path )
45
- if os .path .exists (src_path ) and not os .path .exists (html_path ):
48
+ if os .path .exists (src_path ) and not os .path .exists ("{}/.loaded" .format (src_path )):
49
+ html_body = "PROCESSING"
50
+ elif os .path .exists (src_path ) and not os .path .exists (html_path ):
46
51
html_body = "NOT_AVAILABE"
47
52
elif os .path .exists (src_path ) and os .path .exists (html_path ):
48
53
html_body = open (html_path ).read ().decode ("utf-8" )
0 commit comments