-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetBibInfo.m
51 lines (49 loc) · 1.98 KB
/
getBibInfo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
function [textStruct] = getBibInfo(nFiles, fileNames, options)
% GETBIBINFO Retrieves bibliographic information and text of given files
% [textStruct] = getBibInfo(nFiles, fileNames, options)
%
% Iterates over fileNames, extracts texts from given pdf files, attempts
% to retrieve bibliographic information via the DOI in a document. Gives
% out a struct containing the extracted bibliographic information.
%
% ====INPUT=====
% nFiles integer Number of files detected in folder
% fileNames cell array List of files parsed from folder
%
% -options-
% downloadBibTex logical Download additional BibTex info for a
% given doi from crossref.org
%
% ====OUTPUT====
% textStruct struct Struct containing extracted text of pdf
% as well as retrieved bibliographic
% information.
% -fields-
% text, doi, file, optional: bibtex
arguments
nFiles double {mustBeNumeric}
fileNames cell
options.downloadBibTex logical {mustBeNumericOrLogical} = true
end
% Initialise struct to be filled with text and bibliographic entries later
textStruct(nFiles) = struct();
% Initialise waitbar to track progess
progBar = waitbar(0, "Please Wait", Name = "Preprocessing text...");
for idx = 1:nFiles
waitbar(idx/nFiles, progBar);
% Fetch text from pdf file
textStruct(idx).text = extractFileText(fileNames(idx));
% Retrieve DOI and assign to field
doi = getDoi(textStruct(idx).text);
textStruct(idx).doi = doi;
% Add field with file location to struct
textStruct(idx).file = fileNames(idx);
if options.downloadBibTex
textStruct(idx).bibtex = bibTexfromDoi(doi);
end
if idx == nFiles
%Signal end of preprocessing
waitbar(1, progBar, "Bibliographic info retrieved." + ...
" You may close this window now.")
end
end