Skip to content

Commit b51d569

Browse files
committed
port more test
1 parent 9d4bdf7 commit b51d569

File tree

1 file changed

+122
-1
lines changed

1 file changed

+122
-1
lines changed

Diff for: test/runtests.jl

+122-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ using OhMyArtifacts
33
using Dates
44
using Test, Pkg
55

6-
using HuggingFaceApi: CONFIG_NAME, get_etag
6+
using HuggingFaceApi: CONFIG_NAME, get_etag, with_endpoint
77

88
# https://github.com/huggingface/huggingface_hub/blob/f124f8be1e02ca9fbcda7a849e70271299ad5738/tests/testing_utils.py
99
const DUMMY_MODEL_ID = "julien-c/dummy-unknown"
@@ -15,10 +15,24 @@ const DUMMY_MODEL_ID_PINNED_SHA256 =
1515
const SAMPLE_DATASET_IDENTIFIER = "lhoestq/custom_squad"
1616
const DATASET_ID = SAMPLE_DATASET_IDENTIFIER
1717
const DUMMY_DATASET_ID = "lhoestq/test"
18+
const DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT =
19+
"81d06f998585f8ee10e6e3a2ea47203dc75f2a16"
20+
1821
const DATASET_REVISION_ID_ONE_SPECIFIC_COMMIT = "e25d55a1c4933f987c46cc75d8ffadd67f257c61"
1922
# One particular commit for DATASET_ID
2023
const DATASET_SAMPLE_PY_FILE = "custom_squad.py"
2124

25+
# https://github.com/huggingface/huggingface_hub/blob/0c78398d42af1bb605b8d69c277b1751067d0d57/tests/testing_constants.py
26+
const USER = "__DUMMY_TRANSFORMERS_USER__"
27+
const FULL_NAME = "Dummy User"
28+
const PASS = "__DUMMY_TRANSFORMERS_PASS__"
29+
30+
# Not critical, only usable on the sandboxed CI instance.
31+
const TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
32+
33+
const ENDPOINT_STAGING = "https://hub-ci.huggingface.co"
34+
35+
2236
@testset "HuggingFaceApi.jl" begin
2337
@test HuggingFaceURL(split("a/b/c",'/')...) == HuggingFaceURL("a", "b", "c")
2438
@test get_etag(HuggingFaceURL(DUMMY_MODEL_ID, CONFIG_NAME)) == DUMMY_MODEL_ID_PINNED_SHA1
@@ -59,6 +73,15 @@ const DATASET_SAMPLE_PY_FILE = "custom_squad.py"
5973
end
6074

6175
@testset "Api endpoint" begin
76+
info = with_endpoint(ENDPOINT_STAGING) do
77+
HuggingFaceApi.whoami(TOKEN)
78+
end
79+
@test info.name == USER
80+
@test info.fullname == FULL_NAME
81+
@test info.orgs isa AbstractVector
82+
valid_org_i = findfirst(org->org.name == "valid_org", info.orgs)
83+
@test info.orgs[valid_org_i].apiToken isa AbstractString
84+
6285
_api = HuggingFaceApi
6386
model_tags = _api.get_model_tags()
6487
for kind in ("library", "language", "license", "dataset", "pipeline_tag")
@@ -112,4 +135,102 @@ end
112135
@test length(d) == 1
113136
@test "huggingface" == d[1].author
114137
@test occursin("DataMeasurementsFiles", d[1].id)
138+
d_raft = _api.list_datasets(; filter = "benchmark:raft")
139+
@test length(d_raft) > 0
140+
@test "benchmark:raft" in d_raft[1].tags
141+
d_lang_creat = _api.list_datasets(; filter = "language_creators:crowdsourced")
142+
@test length(d_lang_creat) > 0
143+
@test "language_creators:crowdsourced" in d_lang_creat[1].tags
144+
d_lang_en = _api.list_datasets(; filter = "languages:en", limit=3)
145+
@test length(d_lang_en) > 0
146+
@test "languages:en" in d_lang_en[1].tags
147+
d_lang_en_fr = _api.list_datasets(; filter = ["languages:en", "languages:fr"])
148+
@test length(d_lang_en_fr) > 0
149+
@test "languages:en" in d_lang_en_fr[1].tags
150+
@test "languages:fr" in d_lang_en_fr[1].tags
151+
d_multiling = _api.list_datasets(; filter = "multilinguality:unknown")
152+
@test length(d_multiling) > 0
153+
@test "multilinguality:unknown" in d_multiling[1].tags
154+
d_sized = _api.list_datasets(; filter = "size_categories:100K<n<1M")
155+
@test length(d_sized) > 0
156+
@test "size_categories:100K<n<1M" in d_sized[1].tags
157+
d_task = _api.list_datasets(; filter = "task_categories:audio-classification")
158+
@test length(d_task) > 0
159+
@test "task_categories:audio-classification" in d_task[1].tags
160+
d_task_id = _api.list_datasets(; filter = "task_ids:automatic-speech-recognition")
161+
@test length(d_task_id) > 0
162+
@test "task_ids:automatic-speech-recognition" in d_task_id[1].tags
163+
d_full = _api.list_datasets(; full = true)
164+
@test length(d_full) > 100
165+
@test any(d->haskey(d, :cardData), d_full)
166+
d_author = _api.list_datasets(; author = "huggingface")
167+
@test length(d_author) > 1
168+
d_search = _api.list_datasets(; search = "wikipedia")
169+
@test length(d_search) > 10
170+
d_card = _api.list_datasets(; cardData = true)
171+
@test count(d->haskey(d, :cardData), d_card) > 0
172+
d_all = _api.list_datasets()
173+
@test all(d->!haskey(d, :cardData), d_all) > 0
174+
175+
d_info = _api.dataset_info(DUMMY_DATASET_ID)
176+
@test d_info.cardData isa AbstractDict && length(d_info.cardData) > 0
177+
@test d_info.siblings isa AbstractVector && length(d_info.siblings) > 0
178+
@test d_info.sha != DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT
179+
@test _api.dataset_info(DUMMY_DATASET_ID, revision=DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT).sha ==
180+
DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT
181+
182+
mtr_all = _api.list_metrics()
183+
@test length(mtr_all) > 10
184+
@test any(m->haskey(m, :description), mtr_all)
185+
186+
m_author = _api.list_models(; author = "muellerzr")
187+
@test length(m_author) > 0
188+
@test occursin("muellerzr", m_author[1].modelId)
189+
m_fb_bart = _api.list_models(; search = "facebook/bart-base")
190+
@test occursin("facebook/bart-base", m_fb_bart[1].modelId)
191+
m_fail = _api.list_models(; search = "muellerzr/testme")
192+
@test length(m_fail) == 0
193+
m_ms_tf = _api.list_models(; search = "microsoft/wavlm-base-sd", filter = "tensorflow")
194+
@test length(m_ms_tf) == 0
195+
m_ms_pt = _api.list_models(; search = "microsoft/wavlm-base-sd", filter = "pytorch")
196+
@test length(m_ms_pt) > 0
197+
m_task = _api.list_models(; search = "albert-base-v2", filter = "fill-mask")
198+
@test "fill-mask" == m_task[1].pipeline_tag
199+
@test occursin("albert-base-v2" , m_task[1].modelId)
200+
@test length(_api.list_models(; filter = "dummytask")) == 0
201+
@test length(_api.list_models(; filter = "en")) != length(_api.list_models(; filter = "fr"))
202+
m_cplx = _api.list_models(; filter = ("text-classification", "pytorch", "tensorflow"))
203+
@test length(m_cplx) > 1
204+
@test all(m->"text-classification" == m.pipeline_tag || "text-classification" in m.tags, m_cplx)
205+
@test all(m->"pytorch" in m.tags && "tensorflow" in m.tags, m_cplx)
206+
@test all(m->haskey(m, :cardData), _api.list_models(filter="co2_eq_emissions", cardData = true))
207+
@test all(m->!haskey(m, :cardData), _api.list_models(filter="co2_eq_emissions"))
208+
209+
s_all = _api.list_spaces(; full = true)
210+
@test length(s_all) > 100
211+
@test any(s->haskey(s, :cardData), s_all)
212+
s_eval = _api.list_spaces(; author = "evaluate-metric")
213+
@test ["evaluate-metric/trec_eval", "evaluate-metric/perplexity"] [s.id for s in s_eval]
214+
s_wiki = _api.list_spaces(; search = "wikipedia")
215+
@test occursin("wikipedia", lowercase(s_wiki[1].id))
216+
s_des = _api.list_spaces(; sort = "likes", direction = -1)
217+
s_asc = _api.list_spaces(; sort = "likes")
218+
@test s_des[1].likes > s_des[2].likes
219+
@test s_asc[end-1].likes < s_asc[end].likes
220+
@test length(_api.list_spaces(; limit=5)) == 5
221+
s_bert = _api.list_spaces(; models = "bert-base-uncased")
222+
@test "bert-base-uncased" in s_bert[1].models
223+
s_d_wiki = _api.list_spaces(; datasets = "wikipedia")
224+
@test "wikipedia" in s_d_wiki[1].datasets
225+
s_link = _api.list_spaces(; linked = true)
226+
@test any(s->haskey(s, :models), s_link)
227+
@test any(s->haskey(s, :datasets), s_link)
228+
@test any(s->haskey(s, :models) && haskey(s, :datasets), s_link)
229+
230+
@test length(with_endpoint(_api.list_datasets, ENDPOINT_STAGING)) <
231+
length(with_endpoint(()->_api.list_datasets(TOKEN), ENDPOINT_STAGING))
232+
@test length(with_endpoint(_api.list_models, ENDPOINT_STAGING)) <
233+
length(with_endpoint(()->_api.list_models(TOKEN), ENDPOINT_STAGING))
234+
@test length(with_endpoint(_api.list_spaces, ENDPOINT_STAGING)) <=
235+
length(with_endpoint(()->_api.list_spaces(TOKEN), ENDPOINT_STAGING))
115236
end

0 commit comments

Comments
 (0)