Skip to content
80 changes: 80 additions & 0 deletions config/thesisdeptmap.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
campus_unitid,dept_unitid,thesis_department
ucsc_etd,ucsc_anthro_etd,Anthropology
ucsc_etd,ucsc_anthro_etd,Anthropology (Feminist Studies)
ucsc_etd,ucsc_anthro_etd,Anthropology (Latin American and Latino Studies)
ucsc_etd,ucsc_appms_etd,Applied Mathematics and Statistics
ucsc_etd,ucsc_appms_etd,Applied Mathematics and Statistics (Robotics and Control)
ucsc_etd,ucsc_astro_etd ,Astronomy & Astrophysics
ucsc_etd,ucsc_astro_etd ,Astronomy and Astrophysics (Statistics)
ucsc_etd,ucsc_chem_etd ,Biochemistry
ucsc_etd,ucsc_chem_etd ,Bioinformatics
ucsc_etd,ucsc_biomeng_etd,Biomolecular Engineering and Bioinformatics
ucsc_etd,ucsc_chem_etd ,Chemistry
ucsc_etd,ucsc_compmed_etd,Computational Media
ucsc_etd,ucsc_cseng_etd,Computer Engineering
ucsc_etd,ucsc_cseng_etd,Computer Engineering (Robotics and Control)
ucsc_etd,ucsc_cseng_etd,Computer Science
ucsc_etd,ucsc_digartmedia_etd,Digital Arts and New Media
ucsc_etd,ucsc_earth_etd,Earth Science
ucsc_etd,ucsc_eeb_etd,Ecology and Evolutionary Biology
ucsc_etd,ucsc_eeb_etd,Ecology and Evolutionary Biology (Coastal Science and Policy)
ucsc_etd,ucsc_eeb_etd,Ecology and Evolutionary Biology (Education)
ucsc_etd,ucsc_eeb_etd,Ecology and Evolutionary Biology (Environmental Studies)
ucsc_etd,ucscecon_etd,Economics
ucsc_etd,ucsced_etd,Education
ucsc_etd,ucsced_etd,Education (Sociology)
ucsc_etd,ucsc_eceng_etd,Electrical Engineering
ucsc_etd,ucsc_eceng_etd,Electrical Engineering (Robotics and Control)
ucsc_etd,ucsc_env_etd,Environmental Studies
ucsc_etd,ucsc_env_etd,Environmental Studies (Ecology & Evolutionary Biology)
ucsc_etd,ucsc_env_etd,Environmental Studies (Sociology)
ucsc_etd,ucsc_env_etd,Environmental Toxicology
ucsc_etd,ucscfeministstudies_etd,Feminist Studies
ucsc_etd,ucsc_film_etd,Film & Digital Media (Visual Studies)
ucsc_etd,ucsc_film_etd,Film and Digital Media
ucsc_etd,ucsc_hist_etd,History
ucsc_etd,ucsc_hist_etd,History (Feminist Studies)
ucsc_etd,ucsc_histcons_etd,History of Consciousness
ucsc_etd,ucsc_histcons_etd,History of Consciousness (American Studies)
ucsc_etd,ucsc_histcons_etd,History of Consciousness (Anthropology)
ucsc_etd,ucsc_histcons_etd,History of Consciousness (Feminist Studies)
ucsc_etd,ucsc_histcons_etd,History of Consciousness (Literature)
ucsc_etd,ucsc_histcons_etd,History of Consciousness (Philosophy)
ucsc_etd,ucsc_histcons_etd,History of Consciousness (Visual Studies)
ucsc_etd,ucscecon_etd,International Economics
ucsc_etd,ucsc_latin_etd,Latin American Latino Studies
ucsc_etd,ucsc_ling_etd,Linguistics
ucsc_etd,ucsc_lit_etd,Literature
ucsc_etd,ucsc_lit_etd,Literature (American Studies)
ucsc_etd,ucsc_lit_etd,Literature (Feminist Studies)
ucsc_etd,ucscmath_etd,Mathematics
ucsc_etd,metx_etd,Microbiology and Environmental Toxicology
ucsc_etd,ucsc_cellbio_etd,Molecular Cell and Developmental Biology
ucsc_etd,ucsc_music_etd ,Music
ucsc_etd,ucsc_music_etd ,Music Composition
ucsc_etd,ucsc_ocean_etd,Ocean Sciences
ucsc_etd,ucsc_philo_etd,Philosophy
ucsc_etd,ucsc_phys_etd,Physics
ucsc_etd,ucsc_pol_etd,Politics
ucsc_etd,ucsc_pol_etd,Politics (American Studies)
ucsc_etd,ucsc_pol_etd,Politics (Environmental Studies)
ucsc_etd,ucsc_pol_etd,Politics (Feminist Studies and Latin American & Latino Studies)
ucsc_etd,ucsc_pol_etd,Politics (Feminist Studies)
ucsc_etd,ucsc_pol_etd,Politics (History of Consciousness and Latin American & Latino Studies)
ucsc_etd,ucsc_pol_etd,Politics (History of Consciousness)
ucsc_etd,ucsc_pol_etd,Politics (Latin American and Latino Studies)
ucsc_etd,ucsc_psych_etd,Psychology
ucsc_etd,ucsc_psych_etd,Psychology (Feminist Studies)
ucsc_etd,ucsc_psych_etd,Psychology (Latin American and Latino Studies)
ucsc_etd,ucsc_cseng_etd,Scientific Computing and Applied Mathematics
ucsc_etd,ucsc_socio_etd,Sociology
ucsc_etd,ucsc_socio_etd,Sociology (Anthropology)
ucsc_etd,ucsc_socio_etd,Sociology (Feminist Studies and Latin American & Latino Studies)
ucsc_etd,ucsc_socio_etd,Sociology (Feminist Studies)
ucsc_etd,ucsc_socio_etd,Sociology (Latin American and Latino Studies)
ucsc_etd,ucsc_socio_etd,Sociology (Philosophy)
ucsc_etd,ucsc_stats_etd,Statistical Science
ucsc_etd,ucsc_stats_etd,Statistics and Applied Mathematics
ucsc_etd,ucsc_cseng_etd,Technology and Information Management
ucsc_etd,ucsc_ppd_etd,Theater Arts
ucsc_etd,ucsc_arthist_etd ,Visual Studies
31 changes: 29 additions & 2 deletions tools/convert.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
require 'sequel'
require 'time'
require 'unindent'
require 'csv'
require 'cgi'
require_relative '../util/nailgun.rb'
require_relative '../util/normalize.rb'
require_relative '../util/sanitize.rb'
Expand Down Expand Up @@ -111,6 +113,16 @@ def getEnv(name)
$issueCoverCache = {}
$issueNumberingCache = {}

# Initialize the nested map for thesis department
$thesisdeptmap = Hash.new { |h, k| h[k] = {} }
mappath = File.join(__dir__, '..', 'config', 'thesisdeptmap.csv')
CSV.foreach(mappath, headers: true) do |row|
campus = row['campus_unitid'].strip
department = CGI.unescapeHTML(row['thesis_department']).strip
dept_unitid = row['dept_unitid'].strip
$thesisdeptmap[campus][department] = dept_unitid
end
#puts $thesisdeptmap
# Make puts thread-safe, and prepend each line with the thread it's coming from. While we're at it,
# let's auto-flush the output.
$stdoutMutex = Mutex.new
Expand Down Expand Up @@ -1001,6 +1013,7 @@ def tryMainAndSequester(path)

###################################################################################################
def parseUCIngest(itemID, inMeta, fileType, isPending)
puts "IN Parse"
attrs = {}
attrs[:addl_info] = inMeta.html_at("./comments") and sanitizeHTML(inMeta.html_at("./comments"))
attrs[:author_hide] = !!inMeta.at("./authors[@hideAuthor]") # Only journal items can have this attribute
Expand All @@ -1023,7 +1036,9 @@ def parseUCIngest(itemID, inMeta, fileType, isPending)
attrs[:pub_submit] = parseDate(inMeta.text_at("./context/dateSubmitted"))
attrs[:pub_accept] = parseDate(inMeta.text_at("./context/dateAccepted"))
attrs[:pub_publish] = parseDate(inMeta.text_at("./context/datePublished"))

attrs[:thesis_dept] = inMeta.text_at("./context/department")
puts "ATTRS THESIS Dept #{attrs[:thesis_dept]}"
#
# Record submitter (especially useful for forensics)
attrs[:submitter] = inMeta.xpath("./history/stateChange").map { |sc|
sc[:state] =~ /^(new|uploaded|pending|published)/ && sc[:who] ? sc[:who] : nil
Expand Down Expand Up @@ -1261,7 +1276,7 @@ def parseUCIngest(itemID, inMeta, fileType, isPending)
dbItem[:content_type].nil? &&
attrs[:supp_files]) ? "multimedia" :
fileType == "ETD" ? "dissertation" :
inMeta[:type] ? inMeta[:type].sub("paper", "article") :
inMeta[:type] ? inMeta[:type].sub("paper", "article").sub("etd","dissertation"):
"article"
dbItem[:submitted] = submissionDate
dbItem[:added] = addDate
Expand Down Expand Up @@ -1442,6 +1457,18 @@ def indexItem(itemID, batch, nailgun)
parseUCIngest(itemID, rawMeta, "UCIngest", isPending)
end

# Add department unit id if possible
if attrs[:thesis_dept]
puts "found thesis dept"
etd_units = units.select { |unit| unit.include?('_etd') }
etd_units.each do |campus|
dept_unitid = $thesisdeptmap.dig(campus, attrs[:thesis_dept])
puts dept_unitid
units << dept_unitid unless dept_unitid.nil?
end
puts "All units"
puts units
end
text = $noCloudSearchMode ? "" : grabText(itemID, dbItem.content_type)

# Create JSON for the full text index
Expand Down
5 changes: 5 additions & 0 deletions tools/updIndex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@
search_enabled: true,
facet_enabled: true,
sort_enabled: false } },
{ index_field_name: "thesis_dept", index_field_type: "literal",
literal_options: { facet_enabled: true,
search_enabled: true,
return_enabled: true,
sort_enabled: true } },
]

###################################################################################################
Expand Down