Skip to content

Commit 07fea2b

Browse files
authoredMay 1, 2020
take advantage of Springer free textbook due to COVID-19
1 parent 76f8def commit 07fea2b

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed
 

‎Springer.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[9]:
5+
6+
7+
import requests
8+
import os
9+
import pandas as pd
10+
import time
11+
os.chdir('d:/python')
12+
13+
14+
# In[4]:
15+
16+
17+
def scrape(url):
18+
19+
session=requests.Session()
20+
21+
page=session.get(url,verify=False)
22+
23+
return page.content
24+
25+
26+
# In[5]:
27+
def main():
28+
29+
#get textbook list
30+
content=scrape('https://resource-cms.springernature.com/springer-cms/rest/v1/content/17858272/data/v4')
31+
32+
f=open('textbook.xlsx','wb')
33+
f.write(content)
34+
f.close
35+
36+
df=pd.ExcelFile('textbook.xlsx').parse('eBook list')
37+
38+
39+
#iterate through all books but it will take a long ass time
40+
for i in range(len(df)):
41+
42+
43+
name=df['Book Title'][i]
44+
url=df['OpenURL'][i]
45+
print(name)
46+
47+
prefix='https://rd.springer.com/content/pdf/'
48+
postfix=df['DOI URL'][i].split('http://doi.org/')[-1].replace('/','%2F')
49+
url=prefix+postfix+'.pdf'
50+
51+
time.sleep(5)
52+
content=scrape(url)
53+
f=open(f'{name}.pdf','wb')
54+
f.write(content)
55+
f.close
56+
57+
58+
if __name__ == "__main__":
59+
main()
60+
61+
62+

0 commit comments

Comments
 (0)
Please sign in to comment.