Skip to content

Commit 62edd5b

Browse files
committed
Add a pandas CSV reader test case
1 parent 58ef0f6 commit 62edd5b

File tree

2 files changed

+92
-47
lines changed

2 files changed

+92
-47
lines changed

Diff for: requirements-dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
coverage
22
flake8
3+
pandas
34
sphinx_rtd_theme

Diff for: test_module.py

+91-47
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
import requests
1111
from bs4 import BeautifulSoup
1212
from contextlib import contextmanager
13+
try:
14+
import pandas as pd
15+
except ImportError:
16+
pd = None
1317

1418

1519
class TestLuminati(unittest.TestCase):
@@ -140,6 +144,21 @@ def setUpClass(cls):
140144

141145
scholarly.use_proxy(proxy_generator, secondary_proxy_generator)
142146

147+
# Try storing the file temporarily as `scholarly.csv` and delete it.
148+
# If there exists already a file with that name, generate a random name
149+
# that does not exist yet, so we can safely delete it.
150+
cls.mandates_filename = "scholarly.csv"
151+
while os.path.exists(cls.mandates_filename):
152+
cls.mandates_filename = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=10)) + ".csv"
153+
154+
@classmethod
155+
def tearDownClass(cls):
156+
"""
157+
Clean up the mandates csv fiile downloaded.
158+
"""
159+
if os.path.exists(cls.mandates_filename):
160+
os.remove(cls.mandates_filename)
161+
143162
@staticmethod
144163
@contextmanager
145164
def suppress_stdout():
@@ -632,53 +651,79 @@ def test_pubs_custom_url(self):
632651
self.assertGreaterEqual(pub['num_citations'], 581)
633652

634653
def test_download_mandates_csv(self):
635-
# Try storing the file temporarily as `scholarly.csv` and delete it.
636-
# If there exists already a file with that name, generate a random name
637-
# that does not exist yet, so we can safely delete it.
638-
filename = "scholarly.csv"
639-
while os.path.exists(filename):
640-
filename = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=10)) + ".csv"
641-
642-
# Delete the file with a finally block no matter what happens
643-
try:
644-
scholarly.download_mandates_csv(filename)
645-
funder, policy, percentage2020, percentageOverall = [], [], [], []
646-
with open(filename, "r") as f:
647-
csv_reader = csv.DictReader(f)
648-
for row in csv_reader:
649-
funder.append(row['\ufeffFunder'])
650-
policy.append(row['Policy'])
651-
percentage2020.append(row['2020'])
652-
percentageOverall.append(row['Overall'])
653-
654-
agency_policy = {
655-
"US National Science Foundation": "https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf",
656-
"Department of Science & Technology, India": "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf",
657-
"Swedish Research Council": "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html",
658-
"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning": ""
659-
}
660-
agency_2020 = {
661-
"US National Science Foundation": "87%",
662-
"Department of Science & Technology, India": "49%",
663-
"Swedish Research Council": "89%",
664-
"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning": "88%"
665-
}
666-
667-
response = requests.get("https://scholar.google.com/citations?view_op=mandates_leaderboard&hl=en")
668-
soup = BeautifulSoup(response.text, "html.parser")
669-
agency_overall = soup.find_all("td", class_="gsc_mlt_n gsc_mlt_bd")
670-
671-
for agency, index in zip(agency_policy, [4-1,10-1, 19-1, 64-1]):
672-
agency_index = funder.index(agency)
673-
self.assertEqual(policy[agency_index], agency_policy[agency])
674-
# Check that the percentage values from CSV and on the page agree.
675-
self.assertEqual(percentageOverall[agency_index], agency_overall[index].text)
676-
# The percentage fluctuates, so we can't check the exact value.
677-
self.assertAlmostEqual(int(percentage2020[agency_index][:-1]), int(agency_2020[agency][:-1]), delta=2)
678-
finally:
679-
if os.path.exists(filename):
680-
os.remove(filename)
654+
"""
655+
Test that we can download the mandates CSV and read it.
656+
"""
657+
if not os.path.exists(self.mandates_filename):
658+
text = scholarly.download_mandates_csv(self.mandates_filename)
659+
self.assertGreater(len(text), 0)
660+
funder, policy, percentage2020, percentageOverall = [], [], [], []
661+
with open(self.mandates_filename, "r") as f:
662+
csv_reader = csv.DictReader(f)
663+
for row in csv_reader:
664+
funder.append(row['\ufeffFunder'])
665+
policy.append(row['Policy'])
666+
percentage2020.append(row['2020'])
667+
percentageOverall.append(row['Overall'])
668+
669+
agency_policy = {
670+
"US National Science Foundation": "https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf",
671+
"Department of Science & Technology, India": "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf",
672+
"Swedish Research Council": "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html",
673+
"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning": ""
674+
}
675+
agency_2020 = {
676+
"US National Science Foundation": "87%",
677+
"Department of Science & Technology, India": "49%",
678+
"Swedish Research Council": "89%",
679+
"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning": "88%"
680+
}
681681

682+
response = requests.get("https://scholar.google.com/citations?view_op=mandates_leaderboard&hl=en")
683+
soup = BeautifulSoup(response.text, "html.parser")
684+
agency_overall = soup.find_all("td", class_="gsc_mlt_n gsc_mlt_bd")
685+
686+
for agency, index in zip(agency_policy, [4-1,10-1, 19-1, 64-1]):
687+
agency_index = funder.index(agency)
688+
self.assertEqual(policy[agency_index], agency_policy[agency])
689+
# Check that the percentage values from CSV and on the page agree.
690+
self.assertEqual(percentageOverall[agency_index], agency_overall[index].text)
691+
# The percentage fluctuates, so we can't check the exact value.
692+
self.assertAlmostEqual(int(percentage2020[agency_index][:-1]), int(agency_2020[agency][:-1]), delta=2)
693+
694+
@unittest.skipIf(pd is None, reason="pandas is not installed")
695+
def test_download_mandates_csv_with_pandas(self):
696+
"""
697+
Test that we can use pandas to read the CSV file
698+
"""
699+
if not os.path.exists(self.mandates_filename):
700+
text = scholarly.download_mandates_csv(self.mandates_filename)
701+
self.assertGreater(len(text), 0)
702+
df = pd.read_csv(self.mandates_filename, usecols=["Funder", "Policy", "2020", "Overall"]).fillna("")
703+
self.assertGreater(len(df), 0)
704+
705+
funders = ["US National Science Foundation",
706+
"Department of Science & Technology, India",
707+
"Swedish Research Council",
708+
"Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning"
709+
]
710+
711+
policies = ["https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf",
712+
"http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf",
713+
"https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html",
714+
""
715+
]
716+
percentage_overall = [84, 54, 83, 83]
717+
percentage_2020 = [87, 49, 89, 88]
718+
719+
rows = df["Funder"].isin(funders)
720+
self.assertEqual(rows.sum(), 4)
721+
self.assertEqual(df["Policy"][rows].tolist(), policies)
722+
df_overall = df["Overall"][rows].tolist()
723+
df_2020 = df["2020"][rows].tolist()
724+
for idx in range(4):
725+
self.assertAlmostEqual(int(df_overall[idx][:-1]), percentage_overall[idx], delta=2)
726+
self.assertAlmostEqual(int(df_2020[idx][:-1]), percentage_2020[idx], delta=2)
682727

683728
def test_save_journal_leaderboard(self):
684729
"""
@@ -694,7 +739,6 @@ def test_save_journal_leaderboard(self):
694739
with open(filename, "r") as f:
695740
csv_reader = csv.DictReader(f)
696741
for row in csv_reader:
697-
#import pdb; pdb.set_trace()
698742
self.assertEqual(row['Publication'], 'The Astrophysical Journal')
699743
self.assertEqual(row['h5-index'], '161')
700744
self.assertEqual(row['h5-median'], '239')

0 commit comments

Comments
 (0)