diff --git a/packages/hub/src/lib/index.ts b/packages/hub/src/lib/index.ts index cd777784f7..19c933a759 100644 --- a/packages/hub/src/lib/index.ts +++ b/packages/hub/src/lib/index.ts @@ -18,6 +18,7 @@ export * from "./list-datasets"; export * from "./list-files"; export * from "./list-models"; export * from "./list-spaces"; +export * from "./list-collections"; export * from "./model-info"; export * from "./oauth-handle-redirect"; export * from "./oauth-login-url"; diff --git a/packages/hub/src/lib/list-collections.spec.ts b/packages/hub/src/lib/list-collections.spec.ts new file mode 100644 index 0000000000..25692f5901 --- /dev/null +++ b/packages/hub/src/lib/list-collections.spec.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from "vitest"; +import { listCollections } from "./list-collections"; +import type { ApiCollectionInfo } from "../types/api/api-collection"; +import { TEST_HUB_URL } from "../test/consts"; + +describe("listCollections", () => { + it("should list collections", async () => { + const results: ApiCollectionInfo[] = []; + + for await (const entry of listCollections({ + search: { owner: ["quanghuynt14"] }, + hubUrl: TEST_HUB_URL, + })) { + if (entry.slug !== "quanghuynt14/test-collection-6866ff686ca2d2e0a1931507") { + continue; + } + + if (typeof entry.lastUpdated === "string") { + entry.lastUpdated = "2025-07-03T22:18:56.239Z"; + } + + if (entry.items && Array.isArray(entry.items)) { + entry.items.map((item) => { + if ("lastModified" in item && typeof item.lastModified === "string") { + item.lastModified = "2025-07-01T00:36:29.000Z"; + } + if ("lastUpdated" in item && typeof item.lastUpdated === "string") { + item.lastUpdated = "2025-07-01T00:41:27.525Z"; + } + }); + } + + results.push(entry); + } + + const collection = results[0]; + const items = collection.items; + collection.items = []; + + // Check all properties of the collection except items + expect(collection).deep.equal({ + slug: "quanghuynt14/test-collection-6866ff686ca2d2e0a1931507", + title: "Test Collection", + description: "This collection is only for test", + gating: false, + lastUpdated: "2025-07-03T22:18:56.239Z", + owner: { + _id: "6866ff3936a7677f427f99e3", + avatarUrl: "/avatars/b51088e22fb7194888551365b1bafada.svg", + fullname: "Quang-Huy Tran", + name: "quanghuynt14", + type: "user", + isPro: false, + isHf: false, + isHfAdmin: false, + isMod: false, + }, + items: [], + theme: "purple", + private: false, + upvotes: 0, + isUpvotedByUser: false, + }); + + // Check for item type model + expect(items[0]).deep.equal({ + _id: "686700086ca2d2e0a193150b", + position: 0, + type: "model", + author: "quanghuynt14", + authorData: { + _id: "6866ff3936a7677f427f99e3", + avatarUrl: "/avatars/b51088e22fb7194888551365b1bafada.svg", + fullname: "Quang-Huy Tran", + name: "quanghuynt14", + type: "user", + isPro: false, + isHf: false, + isHfAdmin: false, + isMod: false, + }, + downloads: 0, + gated: false, + id: "quanghuynt14/TestModel", + availableInferenceProviders: [], + lastModified: "2025-07-01T00:36:29.000Z", + likes: 0, + private: false, + repoType: "model", + isLikedByUser: false, + }); + + // Check for item type dataset + expect(items[1]).deep.equal({ + _id: "686701cd86ea6972ba6c9da5", + position: 1, + type: "dataset", + author: "quanghuynt14", + downloads: 0, + gated: false, + id: "quanghuynt14/TestDataset", + lastModified: "2025-07-01T00:36:29.000Z", + private: false, + repoType: "dataset", + likes: 0, + isLikedByUser: false, + }); + + // Check for item type space + expect(items[2]).deep.equal({ + _id: "6867000f6ca2d2e0a193150e", + position: 2, + type: "space", + author: "quanghuynt14", + authorData: { + _id: "6866ff3936a7677f427f99e3", + avatarUrl: "/avatars/b51088e22fb7194888551365b1bafada.svg", + fullname: "Quang-Huy Tran", + name: "quanghuynt14", + type: "user", + isPro: false, + isHf: false, + isHfAdmin: false, + isMod: false, + }, + colorFrom: "pink", + colorTo: "indigo", + createdAt: "2025-07-03T22:10:39.000Z", + emoji: "🏆", + id: "quanghuynt14/TestSpace", + lastModified: "2025-07-01T00:36:29.000Z", + likes: 0, + pinned: false, + private: false, + sdk: "docker", + repoType: "space", + runtime: { + stage: "BUILDING", + hardware: { + current: null, + requested: "cpu-basic", + }, + storage: null, + gcTimeout: 172800, + replicas: { + current: 0, + requested: 1, + }, + }, + shortDescription: "This space is only for test", + title: "TestSpace", + isLikedByUser: false, + trendingScore: 0, + tags: ["docker", "region:us"], + }); + + // Check for item type collection + expect(items[3]).deep.equal({ + _id: "68670014f25517a0a7eaf505", + position: 3, + type: "collection", + id: "6866ff686ca2d2e0a1931507", + slug: "quanghuynt14/test-collection-6866ff686ca2d2e0a1931507", + title: "Test Collection", + description: "This collection is only for test", + lastUpdated: "2025-07-01T00:41:27.525Z", + numberItems: 5, + owner: { + _id: "6866ff3936a7677f427f99e3", + avatarUrl: "/avatars/b51088e22fb7194888551365b1bafada.svg", + fullname: "Quang-Huy Tran", + name: "quanghuynt14", + type: "user", + isPro: false, + isHf: false, + isHfAdmin: false, + isMod: false, + }, + theme: "purple", + shareUrl: "https://hub-ci.huggingface.co/collections/quanghuynt14/test-collection-6866ff686ca2d2e0a1931507", + upvotes: 0, + isUpvotedByUser: false, + }); + }); +}); diff --git a/packages/hub/src/lib/list-collections.ts b/packages/hub/src/lib/list-collections.ts new file mode 100644 index 0000000000..e667803410 --- /dev/null +++ b/packages/hub/src/lib/list-collections.ts @@ -0,0 +1,98 @@ +import { HUB_URL } from "../consts"; +import { createApiError } from "../error"; +import type { CredentialsParams } from "../types/public"; +import { checkCredentials } from "../utils/checkCredentials"; +import { parseLinkHeader } from "../utils/parseLinkHeader"; +import type { ApiCollectionInfo } from "../types/api/api-collection"; + +export async function* listCollections( + params?: { + search?: { + /** + * Filter collections created by specific owners (users or organizations). + */ + owner?: string[]; + /** + * Filter collections containing specific items. + * Value must be the item_type and item_id concatenated. + * Example: "models/teknium/OpenHermes-2.5-Mistral-7B", "datasets/rajpurkar/squad" or "papers/2311.12983". + */ + item?: string[]; + /** + * Filter based on substrings for titles & descriptions. + */ + q?: string; + }; + /** + * Sort the returned collections. Supported values are "lastModified", "trending" (default) and "upvotes". + */ + sort?: "lastModified" | "trending" | "upvotes"; + /** + * Set to limit the number of collections returned. + */ + limit?: number; + hubUrl?: string; + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; + } & Partial +): AsyncGenerator { + const accessToken = params && checkCredentials(params); + + const searchParams = new URLSearchParams(); + + let totalToFetch = params?.limit ?? Infinity; + searchParams.append("limit", String(Math.min(totalToFetch, 100))); + + if (params?.sort) { + searchParams.append("sort", params.sort); + } + + if (params?.search?.owner) { + for (const owner of params.search.owner) { + searchParams.append("owner", owner); + } + } + + if (params?.search?.item) { + for (const item of params.search.item) { + searchParams.append("item", item); + } + } + + if (params?.search?.q) { + searchParams.append("q", params.search.q); + } + + let url: string | undefined = `${params?.hubUrl || HUB_URL}/api/collections?${searchParams}`; + + while (url) { + const res: Response = await (params?.fetch ?? fetch)(url, { + headers: { + accept: "application/json", + ...(accessToken ? { Authorization: `Bearer ${accessToken}` } : undefined), + }, + }); + + if (!res.ok) { + throw await createApiError(res); + } + + const collections: ApiCollectionInfo[] = await res.json(); + + for (const collection of collections) { + yield collection; + + totalToFetch--; + + if (totalToFetch <= 0) { + return; + } + } + + const linkHeader = res.headers.get("Link"); + + url = linkHeader ? parseLinkHeader(linkHeader).next : undefined; + } +} diff --git a/packages/hub/src/types/api/api-author.ts b/packages/hub/src/types/api/api-author.ts new file mode 100644 index 0000000000..e5bb77735b --- /dev/null +++ b/packages/hub/src/types/api/api-author.ts @@ -0,0 +1,26 @@ +export type ApiAuthor = + | { + avatarUrl: string; + fullname: string; + name: string; + isHf: boolean; + isHfAdmin: boolean; + isMod: boolean; + followerCount?: number; + type: "org"; + isEnterprise: boolean; + isUserFollowing?: boolean; + } + | { + avatarUrl: string; + fullname: string; + name: string; + isHf: boolean; + isHfAdmin: boolean; + isMod: boolean; + followerCount?: number; + type: "user"; + isPro: boolean; + _id: string; + isUserFollowing?: boolean; + }; diff --git a/packages/hub/src/types/api/api-collection.ts b/packages/hub/src/types/api/api-collection.ts new file mode 100644 index 0000000000..740bf6b8cd --- /dev/null +++ b/packages/hub/src/types/api/api-collection.ts @@ -0,0 +1,328 @@ +import type { ApiAuthor } from "./api-author"; + +export interface ApiCollectionInfo { + slug: string; + title: string; + description?: string; + lastUpdated: string; + gating: + | true + | ( + | false + | { + mode: "auto"; + } + | { + mode: "manual"; + notifications: { + mode: "bulk" | "real-time"; + email?: string; + }; + } + ); + owner: ApiAuthor; + /* + * The items list per collection is truncated to 4 items maximum. + * To retrieve all items from a collection, you need to make an additional call using its collection slug. + */ + items: ApiCollectionItem[]; + theme: "orange" | "blue" | "green" | "purple" | "pink" | "indigo"; + private: boolean; + upvotes: number; + isUpvotedByUser: boolean; +} + +interface ApiCollectionItemBase { + _id: string; + position: number; + note?: { + html: string; + text: string; + }; + gallery?: string[]; +} + +interface ApiCollectionItemModel extends ApiCollectionItemBase { + type: "model"; + author: string; + downloads: number; + id: string; + availableInferenceProviders: { + provider: + | "black-forest-labs" + | "cerebras" + | "cohere" + | "fal-ai" + | "featherless-ai" + | "fireworks-ai" + | "groq" + | "hf-inference" + | "hyperbolic" + | "nebius" + | "novita" + | "nscale" + | "openai" + | "ovhcloud" + | "replicate" + | "sambanova" + | "together"; + providerStatus: "live" | "staging" | "error"; + modelStatus: "live" | "staging" | "error"; + providerId: string; + task: + | "text-classification" + | "token-classification" + | "table-question-answering" + | "question-answering" + | "zero-shot-classification" + | "translation" + | "summarization" + | "feature-extraction" + | "text-generation" + | "text2text-generation" + | "fill-mask" + | "sentence-similarity" + | "text-to-speech" + | "text-to-audio" + | "automatic-speech-recognition" + | "audio-to-audio" + | "audio-classification" + | "audio-text-to-text" + | "voice-activity-detection" + | "depth-estimation" + | "image-classification" + | "object-detection" + | "image-segmentation" + | "text-to-image" + | "image-to-text" + | "image-to-image" + | "image-to-video" + | "unconditional-image-generation" + | "video-classification" + | "reinforcement-learning" + | "robotics" + | "tabular-classification" + | "tabular-regression" + | "tabular-to-text" + | "table-to-text" + | "multiple-choice" + | "text-ranking" + | "text-retrieval" + | "time-series-forecasting" + | "text-to-video" + | "image-text-to-text" + | "visual-question-answering" + | "document-question-answering" + | "zero-shot-image-classification" + | "graph-ml" + | "mask-generation" + | "zero-shot-object-detection" + | "text-to-3d" + | "image-to-3d" + | "image-feature-extraction" + | "video-text-to-text" + | "keypoint-detection" + | "visual-document-retrieval" + | "any-to-any" + | "video-to-video" + | "other" + | "conversational"; + adapterType?: "lora"; + adapterWeightsPath?: string; + }[]; + isLikedByUser: boolean; + lastModified: string; + likes: number; + pipeline_tag?: string; + private: boolean; + repoType: "model"; + gated: false | ("auto" | "manual"); + resourceGroup?: { + id: string; + name: string; + numUsers: number; + }; + numParameters?: number; + authorData?: ApiAuthor; + widgetOutputUrls?: string[]; +} + +interface ApiCollectionItemDataset extends ApiCollectionItemBase { + type: "dataset"; + author: string; + id: string; + isLikedByUser: boolean; + likes: number; + datasetsServerInfo?: { + viewer: "preview" | "viewer-partial" | "viewer"; + numRows: number | null; + libraries: ( + | "mlcroissant" + | "webdataset" + | "datasets" + | "pandas" + | "dask" + | "distilabel" + | "fiftyone" + | "argilla" + | "polars" + | "duckdb" + )[]; + formats: ("json" | "csv" | "parquet" | "imagefolder" | "audiofolder" | "webdataset" | "text" | "arrow")[]; + modalities: ("3d" | "audio" | "document" | "geospatial" | "image" | "tabular" | "text" | "timeseries" | "video")[]; + }; + private: boolean; + repoType: "dataset"; + downloads: number; + gated: false | ("auto" | "manual"); + lastModified: string; + resourceGroup?: { + id: string; + name: string; + numUsers: number; + }; +} + +interface ApiCollectionItemSpace extends ApiCollectionItemBase { + type: "space"; + author: string; + colorFrom: string; + colorTo: string; + createdAt: string; + emoji: string; + id: string; + isLikedByUser: boolean; + lastModified: string; + likes: number; + pinned: boolean; + private: boolean; + repoType: "space"; + title: string; + sdk?: "gradio" | "docker" | "static" | "streamlit"; + runtime: { + stage: + | "NO_APP_FILE" + | "CONFIG_ERROR" + | "BUILDING" + | "BUILD_ERROR" + | "APP_STARTING" + | "RUNNING" + | "RUNNING_BUILDING" + | "RUNNING_APP_STARTING" + | "RUNTIME_ERROR" + | "DELETING" + | "STOPPED" + | "PAUSED" + | "SLEEPING"; + hardware: { + current: + | ( + | "cpu-basic" + | "cpu-upgrade" + | "cpu-performance" + | "cpu-xl" + | "zero-a10g" + | "t4-small" + | "t4-medium" + | "l4x1" + | "l4x4" + | "l40sx1" + | "l40sx4" + | "l40sx8" + | "a10g-small" + | "a10g-large" + | "a10g-largex2" + | "a10g-largex4" + | "a100-large" + | "h100" + | "h100x8" + ) + | null; + requested: + | ( + | "cpu-basic" + | "cpu-upgrade" + | "cpu-performance" + | "cpu-xl" + | "zero-a10g" + | "t4-small" + | "t4-medium" + | "l4x1" + | "l4x4" + | "l40sx1" + | "l40sx4" + | "l40sx8" + | "a10g-small" + | "a10g-large" + | "a10g-largex2" + | "a10g-largex4" + | "a100-large" + | "h100" + | "h100x8" + ) + | null; + }; + storage: ("small" | "medium" | "large") | null; + errorMessage?: string; + gcTimeout?: number | null; + replicas: { + current?: number | null; + requested: number | "auto"; + }; + devMode?: boolean; + domains?: { + domain: string; + isCustom?: boolean | null; + stage: "READY" | "PENDING"; + }[]; + sha?: string; + }; + originSpace?: { + author: ApiAuthor; + name: string; + }; + ai_short_description?: string; + ai_category?: string; + trendingScore?: number; + resourceGroup?: { + id: string; + name: string; + numUsers: number; + }; + tags: string[]; + authorData?: ApiAuthor; + shortDescription?: string; + semanticRelevancyScore?: number; +} + +interface ApiCollectionItemPaper extends ApiCollectionItemBase { + type: "paper"; + id: string; + title: string; + upvotes: number; + publishedAt: string; + thumbnailUrl?: string; + isUpvotedByUser?: boolean; +} + +interface ApiCollectionItemCollection extends ApiCollectionItemBase { + type: "collection"; + slug: string; + lastUpdated: string; + description?: string; + owner: ApiAuthor; + title: string; + theme: "orange" | "blue" | "green" | "purple" | "pink" | "indigo"; + upvotes: number; + isUpvotedByUser: boolean; + id: string; + numberItems: number; + shareUrl: string; +} + +type ApiCollectionItem = + | ApiCollectionItemModel + | ApiCollectionItemDataset + | ApiCollectionItemSpace + | ApiCollectionItemPaper + | ApiCollectionItemCollection;