Skip to content

Commit

Permalink
fix data library job
Browse files Browse the repository at this point in the history
  • Loading branch information
hexylena committed Apr 3, 2024
1 parent e5ca8e1 commit de2a583
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 5 deletions.
29 changes: 24 additions & 5 deletions bin/update-data-library
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,26 @@ require 'commander/import'
require 'net/http'
require 'json'
require 'yaml'
require 'httparty'

program :name, 'Data Library Updater'
program :version, '0.0.1'
program :description, 'Updates data libraries from from zenodo_links'

@SHARED_DATATYPES = YAML.load_file('shared/datatypes.yaml')

def request(url)
uri = URI.parse(url)
request = Net::HTTP::Get.new(uri)
request['Accept'] = 'application/json'
req_options = {
use_ssl: uri.scheme == 'https',
}
Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
json_s = http.request(request).body
JSON.parse(json_s)
end
end

def parse_zenodo_id_formats(link)
# https://zenodo.org/record/1234567
# https://zenodo.org/record/1234567#.X0X0X0X0X0X
Expand Down Expand Up @@ -44,15 +56,23 @@ def update_data_library(path, topic, tutorial, zenodo_record)
zenodo_id = zenodo_record['id'].to_s
zenodo_files = zenodo_record.fetch('files', []).map do |f|
official_extension = f['type']
unofficial_extension = f['links']['self'].split('.')[-2..].join('.')

link = f['links']['self'].sub(%r{/content$}, '')
unofficial_extension = link.split('.')[-2..].join('.')
ext = @SHARED_DATATYPES.fetch(unofficial_extension, nil) || @SHARED_DATATYPES.fetch(official_extension, nil)

# Example:
# https://zenodo.org/api/records/10870107/files/elem_s2_r1.fq.gz/content
# Needs to be
# https://zenodo.org/record/10870107/files/elem_s2_r1.fq.gz
real_link = f['links']['self'].sub(%r{/content$}, '').sub('/api/records/', '/record/')
# puts "Processing file: #{f['type']} #{f['links']['self']} => #{ext}"
# puts "#{unofficial_extension} => #{@SHARED_DATATYPES.fetch(unofficial_extension, nil)}"
# puts "#{official_extension} => #{@SHARED_DATATYPES.fetch(official_extension, nil)}"
warn "Unknown file type: #{f['type']}. Consider adding this to shared/datatypes.yaml" if ext.nil?

{
'url' => f['links']['self'],
'url' => real_link,
'src' => 'url',
'ext' => ext || f['type'],
'info' => "https://doi.org/10.5281/zenodo.#{zenodo_id}",
Expand Down Expand Up @@ -88,8 +108,7 @@ end

def write_data_library(path, topic, tutorial, tutorial_zenodo_id, force)
# Fetch the zenodo record
uri = URI("https://zenodo.org/api/records/#{tutorial_zenodo_id}")
zenodo_record = HTTParty.get(uri)
zenodo_record = request("https://zenodo.org/api/records/#{tutorial_zenodo_id}")
new_zenodo_id = zenodo_record['id'].to_s

# If it's redirected we'll get a different ID here
Expand Down
3 changes: 3 additions & 0 deletions shared/datatypes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ fastqsanger.bz: fastqsanger.bz
fa: fasta
fna: fasta
fq: fastqsanger
fq.gz: fastqsanger.gz
fq.bz: fastqsanger.bz
fq.bz2: fastqsanger.bz2
gbk: gbk
gff: gff
gff3: gff3
Expand Down

0 comments on commit de2a583

Please sign in to comment.