10
10
import requests
11
11
from bs4 import BeautifulSoup
12
12
from contextlib import contextmanager
13
+ try :
14
+ import pandas as pd
15
+ except ImportError :
16
+ pd = None
13
17
14
18
15
19
class TestLuminati (unittest .TestCase ):
@@ -140,6 +144,21 @@ def setUpClass(cls):
140
144
141
145
scholarly .use_proxy (proxy_generator , secondary_proxy_generator )
142
146
147
+ # Try storing the file temporarily as `scholarly.csv` and delete it.
148
+ # If there exists already a file with that name, generate a random name
149
+ # that does not exist yet, so we can safely delete it.
150
+ cls .mandates_filename = "scholarly.csv"
151
+ while os .path .exists (cls .mandates_filename ):
152
+ cls .mandates_filename = '' .join (random .choices ('abcdefghijklmnopqrstuvwxyz' , k = 10 )) + ".csv"
153
+
154
+ @classmethod
155
+ def tearDownClass (cls ):
156
+ """
157
+ Clean up the mandates csv fiile downloaded.
158
+ """
159
+ if os .path .exists (cls .mandates_filename ):
160
+ os .remove (cls .mandates_filename )
161
+
143
162
@staticmethod
144
163
@contextmanager
145
164
def suppress_stdout ():
@@ -632,53 +651,79 @@ def test_pubs_custom_url(self):
632
651
self .assertGreaterEqual (pub ['num_citations' ], 581 )
633
652
634
653
def test_download_mandates_csv (self ):
635
- # Try storing the file temporarily as `scholarly.csv` and delete it.
636
- # If there exists already a file with that name, generate a random name
637
- # that does not exist yet, so we can safely delete it.
638
- filename = "scholarly.csv"
639
- while os .path .exists (filename ):
640
- filename = '' .join (random .choices ('abcdefghijklmnopqrstuvwxyz' , k = 10 )) + ".csv"
641
-
642
- # Delete the file with a finally block no matter what happens
643
- try :
644
- scholarly .download_mandates_csv (filename )
645
- funder , policy , percentage2020 , percentageOverall = [], [], [], []
646
- with open (filename , "r" ) as f :
647
- csv_reader = csv .DictReader (f )
648
- for row in csv_reader :
649
- funder .append (row ['\ufeff Funder' ])
650
- policy .append (row ['Policy' ])
651
- percentage2020 .append (row ['2020' ])
652
- percentageOverall .append (row ['Overall' ])
653
-
654
- agency_policy = {
655
- "US National Science Foundation" : "https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf" ,
656
- "Department of Science & Technology, India" : "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf" ,
657
- "Swedish Research Council" : "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html" ,
658
- "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : ""
659
- }
660
- agency_2020 = {
661
- "US National Science Foundation" : "87%" ,
662
- "Department of Science & Technology, India" : "49%" ,
663
- "Swedish Research Council" : "89%" ,
664
- "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : "88%"
665
- }
666
-
667
- response = requests .get ("https://scholar.google.com/citations?view_op=mandates_leaderboard&hl=en" )
668
- soup = BeautifulSoup (response .text , "html.parser" )
669
- agency_overall = soup .find_all ("td" , class_ = "gsc_mlt_n gsc_mlt_bd" )
670
-
671
- for agency , index in zip (agency_policy , [4 - 1 ,10 - 1 , 19 - 1 , 64 - 1 ]):
672
- agency_index = funder .index (agency )
673
- self .assertEqual (policy [agency_index ], agency_policy [agency ])
674
- # Check that the percentage values from CSV and on the page agree.
675
- self .assertEqual (percentageOverall [agency_index ], agency_overall [index ].text )
676
- # The percentage fluctuates, so we can't check the exact value.
677
- self .assertAlmostEqual (int (percentage2020 [agency_index ][:- 1 ]), int (agency_2020 [agency ][:- 1 ]), delta = 2 )
678
- finally :
679
- if os .path .exists (filename ):
680
- os .remove (filename )
654
+ """
655
+ Test that we can download the mandates CSV and read it.
656
+ """
657
+ if not os .path .exists (self .mandates_filename ):
658
+ text = scholarly .download_mandates_csv (self .mandates_filename )
659
+ self .assertGreater (len (text ), 0 )
660
+ funder , policy , percentage2020 , percentageOverall = [], [], [], []
661
+ with open (self .mandates_filename , "r" ) as f :
662
+ csv_reader = csv .DictReader (f )
663
+ for row in csv_reader :
664
+ funder .append (row ['\ufeff Funder' ])
665
+ policy .append (row ['Policy' ])
666
+ percentage2020 .append (row ['2020' ])
667
+ percentageOverall .append (row ['Overall' ])
668
+
669
+ agency_policy = {
670
+ "US National Science Foundation" : "https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf" ,
671
+ "Department of Science & Technology, India" : "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf" ,
672
+ "Swedish Research Council" : "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html" ,
673
+ "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : ""
674
+ }
675
+ agency_2020 = {
676
+ "US National Science Foundation" : "87%" ,
677
+ "Department of Science & Technology, India" : "49%" ,
678
+ "Swedish Research Council" : "89%" ,
679
+ "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : "88%"
680
+ }
681
681
682
+ response = requests .get ("https://scholar.google.com/citations?view_op=mandates_leaderboard&hl=en" )
683
+ soup = BeautifulSoup (response .text , "html.parser" )
684
+ agency_overall = soup .find_all ("td" , class_ = "gsc_mlt_n gsc_mlt_bd" )
685
+
686
+ for agency , index in zip (agency_policy , [4 - 1 ,10 - 1 , 19 - 1 , 64 - 1 ]):
687
+ agency_index = funder .index (agency )
688
+ self .assertEqual (policy [agency_index ], agency_policy [agency ])
689
+ # Check that the percentage values from CSV and on the page agree.
690
+ self .assertEqual (percentageOverall [agency_index ], agency_overall [index ].text )
691
+ # The percentage fluctuates, so we can't check the exact value.
692
+ self .assertAlmostEqual (int (percentage2020 [agency_index ][:- 1 ]), int (agency_2020 [agency ][:- 1 ]), delta = 2 )
693
+
694
+ @unittest .skipIf (pd is None , reason = "pandas is not installed" )
695
+ def test_download_mandates_csv_with_pandas (self ):
696
+ """
697
+ Test that we can use pandas to read the CSV file
698
+ """
699
+ if not os .path .exists (self .mandates_filename ):
700
+ text = scholarly .download_mandates_csv (self .mandates_filename )
701
+ self .assertGreater (len (text ), 0 )
702
+ df = pd .read_csv (self .mandates_filename , usecols = ["Funder" , "Policy" , "2020" , "Overall" ]).fillna ("" )
703
+ self .assertGreater (len (df ), 0 )
704
+
705
+ funders = ["US National Science Foundation" ,
706
+ "Department of Science & Technology, India" ,
707
+ "Swedish Research Council" ,
708
+ "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning"
709
+ ]
710
+
711
+ policies = ["https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf" ,
712
+ "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf" ,
713
+ "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html" ,
714
+ ""
715
+ ]
716
+ percentage_overall = [84 , 54 , 83 , 83 ]
717
+ percentage_2020 = [87 , 49 , 89 , 88 ]
718
+
719
+ rows = df ["Funder" ].isin (funders )
720
+ self .assertEqual (rows .sum (), 4 )
721
+ self .assertEqual (df ["Policy" ][rows ].tolist (), policies )
722
+ df_overall = df ["Overall" ][rows ].tolist ()
723
+ df_2020 = df ["2020" ][rows ].tolist ()
724
+ for idx in range (4 ):
725
+ self .assertAlmostEqual (int (df_overall [idx ][:- 1 ]), percentage_overall [idx ], delta = 2 )
726
+ self .assertAlmostEqual (int (df_2020 [idx ][:- 1 ]), percentage_2020 [idx ], delta = 2 )
682
727
683
728
def test_save_journal_leaderboard (self ):
684
729
"""
@@ -694,7 +739,6 @@ def test_save_journal_leaderboard(self):
694
739
with open (filename , "r" ) as f :
695
740
csv_reader = csv .DictReader (f )
696
741
for row in csv_reader :
697
- #import pdb; pdb.set_trace()
698
742
self .assertEqual (row ['Publication' ], 'The Astrophysical Journal' )
699
743
self .assertEqual (row ['h5-index' ], '161' )
700
744
self .assertEqual (row ['h5-median' ], '239' )
0 commit comments