@@ -3,7 +3,7 @@ using OhMyArtifacts
3
3
using Dates
4
4
using Test, Pkg
5
5
6
- using HuggingFaceApi: CONFIG_NAME, get_etag
6
+ using HuggingFaceApi: CONFIG_NAME, get_etag, with_endpoint
7
7
8
8
# https://github.com/huggingface/huggingface_hub/blob/f124f8be1e02ca9fbcda7a849e70271299ad5738/tests/testing_utils.py
9
9
const DUMMY_MODEL_ID = " julien-c/dummy-unknown"
@@ -15,10 +15,24 @@ const DUMMY_MODEL_ID_PINNED_SHA256 =
15
15
const SAMPLE_DATASET_IDENTIFIER = " lhoestq/custom_squad"
16
16
const DATASET_ID = SAMPLE_DATASET_IDENTIFIER
17
17
const DUMMY_DATASET_ID = " lhoestq/test"
18
+ const DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT =
19
+ " 81d06f998585f8ee10e6e3a2ea47203dc75f2a16"
20
+
18
21
const DATASET_REVISION_ID_ONE_SPECIFIC_COMMIT = " e25d55a1c4933f987c46cc75d8ffadd67f257c61"
19
22
# One particular commit for DATASET_ID
20
23
const DATASET_SAMPLE_PY_FILE = " custom_squad.py"
21
24
25
+ # https://github.com/huggingface/huggingface_hub/blob/0c78398d42af1bb605b8d69c277b1751067d0d57/tests/testing_constants.py
26
+ const USER = " __DUMMY_TRANSFORMERS_USER__"
27
+ const FULL_NAME = " Dummy User"
28
+ const PASS = " __DUMMY_TRANSFORMERS_PASS__"
29
+
30
+ # Not critical, only usable on the sandboxed CI instance.
31
+ const TOKEN = " hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
32
+
33
+ const ENDPOINT_STAGING = " https://hub-ci.huggingface.co"
34
+
35
+
22
36
@testset " HuggingFaceApi.jl" begin
23
37
@test HuggingFaceURL (split (" a/b/c" ,' /' )... ) == HuggingFaceURL (" a" , " b" , " c" )
24
38
@test get_etag (HuggingFaceURL (DUMMY_MODEL_ID, CONFIG_NAME)) == DUMMY_MODEL_ID_PINNED_SHA1
@@ -59,6 +73,15 @@ const DATASET_SAMPLE_PY_FILE = "custom_squad.py"
59
73
end
60
74
61
75
@testset " Api endpoint" begin
76
+ info = with_endpoint (ENDPOINT_STAGING) do
77
+ HuggingFaceApi. whoami (TOKEN)
78
+ end
79
+ @test info. name == USER
80
+ @test info. fullname == FULL_NAME
81
+ @test info. orgs isa AbstractVector
82
+ valid_org_i = findfirst (org-> org. name == " valid_org" , info. orgs)
83
+ @test info. orgs[valid_org_i]. apiToken isa AbstractString
84
+
62
85
_api = HuggingFaceApi
63
86
model_tags = _api. get_model_tags ()
64
87
for kind in (" library" , " language" , " license" , " dataset" , " pipeline_tag" )
@@ -112,4 +135,102 @@ end
112
135
@test length (d) == 1
113
136
@test " huggingface" == d[1 ]. author
114
137
@test occursin (" DataMeasurementsFiles" , d[1 ]. id)
138
+ d_raft = _api. list_datasets (; filter = " benchmark:raft" )
139
+ @test length (d_raft) > 0
140
+ @test " benchmark:raft" in d_raft[1 ]. tags
141
+ d_lang_creat = _api. list_datasets (; filter = " language_creators:crowdsourced" )
142
+ @test length (d_lang_creat) > 0
143
+ @test " language_creators:crowdsourced" in d_lang_creat[1 ]. tags
144
+ d_lang_en = _api. list_datasets (; filter = " languages:en" , limit= 3 )
145
+ @test length (d_lang_en) > 0
146
+ @test " languages:en" in d_lang_en[1 ]. tags
147
+ d_lang_en_fr = _api. list_datasets (; filter = [" languages:en" , " languages:fr" ])
148
+ @test length (d_lang_en_fr) > 0
149
+ @test " languages:en" in d_lang_en_fr[1 ]. tags
150
+ @test " languages:fr" in d_lang_en_fr[1 ]. tags
151
+ d_multiling = _api. list_datasets (; filter = " multilinguality:unknown" )
152
+ @test length (d_multiling) > 0
153
+ @test " multilinguality:unknown" in d_multiling[1 ]. tags
154
+ d_sized = _api. list_datasets (; filter = " size_categories:100K<n<1M" )
155
+ @test length (d_sized) > 0
156
+ @test " size_categories:100K<n<1M" in d_sized[1 ]. tags
157
+ d_task = _api. list_datasets (; filter = " task_categories:audio-classification" )
158
+ @test length (d_task) > 0
159
+ @test " task_categories:audio-classification" in d_task[1 ]. tags
160
+ d_task_id = _api. list_datasets (; filter = " task_ids:automatic-speech-recognition" )
161
+ @test length (d_task_id) > 0
162
+ @test " task_ids:automatic-speech-recognition" in d_task_id[1 ]. tags
163
+ d_full = _api. list_datasets (; full = true )
164
+ @test length (d_full) > 100
165
+ @test any (d-> haskey (d, :cardData ), d_full)
166
+ d_author = _api. list_datasets (; author = " huggingface" )
167
+ @test length (d_author) > 1
168
+ d_search = _api. list_datasets (; search = " wikipedia" )
169
+ @test length (d_search) > 10
170
+ d_card = _api. list_datasets (; cardData = true )
171
+ @test count (d-> haskey (d, :cardData ), d_card) > 0
172
+ d_all = _api. list_datasets ()
173
+ @test all (d-> ! haskey (d, :cardData ), d_all) > 0
174
+
175
+ d_info = _api. dataset_info (DUMMY_DATASET_ID)
176
+ @test d_info. cardData isa AbstractDict && length (d_info. cardData) > 0
177
+ @test d_info. siblings isa AbstractVector && length (d_info. siblings) > 0
178
+ @test d_info. sha != DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT
179
+ @test _api. dataset_info (DUMMY_DATASET_ID, revision= DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT). sha ==
180
+ DUMMY_DATASET_ID_REVISION_ONE_SPECIFIC_COMMIT
181
+
182
+ mtr_all = _api. list_metrics ()
183
+ @test length (mtr_all) > 10
184
+ @test any (m-> haskey (m, :description ), mtr_all)
185
+
186
+ m_author = _api. list_models (; author = " muellerzr" )
187
+ @test length (m_author) > 0
188
+ @test occursin (" muellerzr" , m_author[1 ]. modelId)
189
+ m_fb_bart = _api. list_models (; search = " facebook/bart-base" )
190
+ @test occursin (" facebook/bart-base" , m_fb_bart[1 ]. modelId)
191
+ m_fail = _api. list_models (; search = " muellerzr/testme" )
192
+ @test length (m_fail) == 0
193
+ m_ms_tf = _api. list_models (; search = " microsoft/wavlm-base-sd" , filter = " tensorflow" )
194
+ @test length (m_ms_tf) == 0
195
+ m_ms_pt = _api. list_models (; search = " microsoft/wavlm-base-sd" , filter = " pytorch" )
196
+ @test length (m_ms_pt) > 0
197
+ m_task = _api. list_models (; search = " albert-base-v2" , filter = " fill-mask" )
198
+ @test " fill-mask" == m_task[1 ]. pipeline_tag
199
+ @test occursin (" albert-base-v2" , m_task[1 ]. modelId)
200
+ @test length (_api. list_models (; filter = " dummytask" )) == 0
201
+ @test length (_api. list_models (; filter = " en" )) != length (_api. list_models (; filter = " fr" ))
202
+ m_cplx = _api. list_models (; filter = (" text-classification" , " pytorch" , " tensorflow" ))
203
+ @test length (m_cplx) > 1
204
+ @test all (m-> " text-classification" == m. pipeline_tag || " text-classification" in m. tags, m_cplx)
205
+ @test all (m-> " pytorch" in m. tags && " tensorflow" in m. tags, m_cplx)
206
+ @test all (m-> haskey (m, :cardData ), _api. list_models (filter= " co2_eq_emissions" , cardData = true ))
207
+ @test all (m-> ! haskey (m, :cardData ), _api. list_models (filter= " co2_eq_emissions" ))
208
+
209
+ s_all = _api. list_spaces (; full = true )
210
+ @test length (s_all) > 100
211
+ @test any (s-> haskey (s, :cardData ), s_all)
212
+ s_eval = _api. list_spaces (; author = " evaluate-metric" )
213
+ @test [" evaluate-metric/trec_eval" , " evaluate-metric/perplexity" ] ⊆ [s. id for s in s_eval]
214
+ s_wiki = _api. list_spaces (; search = " wikipedia" )
215
+ @test occursin (" wikipedia" , lowercase (s_wiki[1 ]. id))
216
+ s_des = _api. list_spaces (; sort = " likes" , direction = - 1 )
217
+ s_asc = _api. list_spaces (; sort = " likes" )
218
+ @test s_des[1 ]. likes > s_des[2 ]. likes
219
+ @test s_asc[end - 1 ]. likes < s_asc[end ]. likes
220
+ @test length (_api. list_spaces (; limit= 5 )) == 5
221
+ s_bert = _api. list_spaces (; models = " bert-base-uncased" )
222
+ @test " bert-base-uncased" in s_bert[1 ]. models
223
+ s_d_wiki = _api. list_spaces (; datasets = " wikipedia" )
224
+ @test " wikipedia" in s_d_wiki[1 ]. datasets
225
+ s_link = _api. list_spaces (; linked = true )
226
+ @test any (s-> haskey (s, :models ), s_link)
227
+ @test any (s-> haskey (s, :datasets ), s_link)
228
+ @test any (s-> haskey (s, :models ) && haskey (s, :datasets ), s_link)
229
+
230
+ @test length (with_endpoint (_api. list_datasets, ENDPOINT_STAGING)) <
231
+ length (with_endpoint (()-> _api. list_datasets (TOKEN), ENDPOINT_STAGING))
232
+ @test length (with_endpoint (_api. list_models, ENDPOINT_STAGING)) <
233
+ length (with_endpoint (()-> _api. list_models (TOKEN), ENDPOINT_STAGING))
234
+ @test length (with_endpoint (_api. list_spaces, ENDPOINT_STAGING)) <=
235
+ length (with_endpoint (()-> _api. list_spaces (TOKEN), ENDPOINT_STAGING))
115
236
end
0 commit comments