File tree 1 file changed +62
-0
lines changed
1 file changed +62
-0
lines changed Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[9]:
5
+
6
+
7
+ import requests
8
+ import os
9
+ import pandas as pd
10
+ import time
11
+ os .chdir ('d:/python' )
12
+
13
+
14
+ # In[4]:
15
+
16
+
17
+ def scrape (url ):
18
+
19
+ session = requests .Session ()
20
+
21
+ page = session .get (url ,verify = False )
22
+
23
+ return page .content
24
+
25
+
26
+ # In[5]:
27
+ def main ():
28
+
29
+ #get textbook list
30
+ content = scrape ('https://resource-cms.springernature.com/springer-cms/rest/v1/content/17858272/data/v4' )
31
+
32
+ f = open ('textbook.xlsx' ,'wb' )
33
+ f .write (content )
34
+ f .close
35
+
36
+ df = pd .ExcelFile ('textbook.xlsx' ).parse ('eBook list' )
37
+
38
+
39
+ #iterate through all books but it will take a long ass time
40
+ for i in range (len (df )):
41
+
42
+
43
+ name = df ['Book Title' ][i ]
44
+ url = df ['OpenURL' ][i ]
45
+ print (name )
46
+
47
+ prefix = 'https://rd.springer.com/content/pdf/'
48
+ postfix = df ['DOI URL' ][i ].split ('http://doi.org/' )[- 1 ].replace ('/' ,'%2F' )
49
+ url = prefix + postfix + '.pdf'
50
+
51
+ time .sleep (5 )
52
+ content = scrape (url )
53
+ f = open (f'{ name } .pdf' ,'wb' )
54
+ f .write (content )
55
+ f .close
56
+
57
+
58
+ if __name__ == "__main__" :
59
+ main ()
60
+
61
+
62
+
You can’t perform that action at this time.
0 commit comments