ndebuhr
diff --git a/‎.github/workflows/test.yml
+12 b/‎.github/workflows/test.yml
+12
diff --git a/‎README.md
+32-17 b/‎README.md
+32-17
diff --git a/‎packages/inference/LICENSE
+1-1 b/‎packages/inference/LICENSE
+1-1
diff --git a/‎packages/inference/README.md
+36-3 b/‎packages/inference/README.md
+36-3
diff --git a/‎packages/inference/package.json
+1-1 b/‎packages/inference/package.json
+1-1
diff --git a/‎packages/inference/src/config.ts
+2 b/‎packages/inference/src/config.ts
+2
diff --git a/‎packages/inference/src/index.ts
+5 b/‎packages/inference/src/index.ts
+5
diff --git a/‎packages/inference/src/lib/getDefaultTask.ts
+1-1 b/‎packages/inference/src/lib/getDefaultTask.ts
+1-1
@@ -41,6 +41,10 @@ jobs:
         run: VCR_MODE=playback pnpm --filter ...[${{ steps.since.outputs.SINCE }}] test
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_FAL_KEY: dummy
+          HF_REPLICATE_KEY: dummy
+          HF_SAMBANOVA_KEY: dummy
+          HF_TOGETHER_KEY: dummy
 
   browser:
     runs-on: ubuntu-latest
@@ -77,6 +81,10 @@ jobs:
         run: VCR_MODE=playback pnpm --filter ...[${{ steps.since.outputs.SINCE }}] test:browser
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_FAL_KEY: dummy
+          HF_REPLICATE_KEY: dummy
+          HF_SAMBANOVA_KEY: dummy
+          HF_TOGETHER_KEY: dummy
 
   e2e:
     runs-on: ubuntu-latest
@@ -140,3 +148,7 @@ jobs:
         env:
           NPM_CONFIG_REGISTRY: http://localhost:4874/
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_FAL_KEY: dummy
+          HF_REPLICATE_KEY: dummy
+          HF_SAMBANOVA_KEY: dummy
+          HF_TOGETHER_KEY: dummy
@@ -13,7 +13,7 @@
 // Programatically interact with the Hub
 
 await createRepo({
-  repo: {type: "model", name: "my-user/nlp-model"},
+  repo: { type: "model", name: "my-user/nlp-model" },
   accessToken: HF_TOKEN
 });
 
@@ -27,7 +27,7 @@ await uploadFile({
   }
 });
 
-// Use Inference API
+// Use HF Inference API, or external Inference Providers!
 
 await inference.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
@@ -39,6 +39,7 @@ await inference.chatCompletion({
   ],
   max_tokens: 512,
   temperature: 0.5,
+  provider: "sambanova", // or together, fal-ai, replicate, …
 });
 
 await inference.textToImage({
@@ -53,11 +54,13 @@ await inference.textToImage({
 
 This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
 
-- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints (dedicated) and Inference API (serverless) to make calls to 100,000+ Machine Learning models
+- [@huggingface/inference](packages/inference/README.md): Use Inference API (serverless), Inference Endpoints (dedicated) and third-party Inference providers to make calls to 100,000+ Machine Learning models
 - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
 - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
 - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
+- [@huggingface/dduf](packages/dduf/README.md): Similar package for DDUF (DDUF Diffusers Unified Format)
 - [@huggingface/tasks](packages/tasks/README.md): The definition files and source-of-truth for the Hub's main primitives like pipeline tasks, model libraries, etc.
+- [@huggingface/jinja](packages/jinja/README.md): A minimalistic JS implementation of the Jinja templating engine, to be used for ML chat templates.
 - [@huggingface/space-header](packages/space-header/README.md): Use the Space `mini_header` outside Hugging Face
 
 
@@ -92,7 +95,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```html
 <script type="module">
-    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@2.8.1/+esm';
+    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.0.0/+esm';
     import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
 </script>
 ```
@@ -142,6 +145,22 @@ for await (const chunk of inference.chatCompletionStream({
   console.log(chunk.choices[0].delta.content);
 }
 
+/// Using a third-party provider: 
+await inference.chatCompletion({
+  model: "meta-llama/Llama-3.1-8B-Instruct",
+  messages: [{ role: "user", content: "Hello, nice to meet you!" }],
+  max_tokens: 512,
+  provider: "sambanova", // or together, fal-ai, replicate, …
+})
+
+await inference.textToImage({
+  model: "black-forest-labs/FLUX.1-dev",
+  inputs: "a picture of a green bird",
+  provider: "fal-ai",
+})
+
+
+
 // You can also omit "model" to use the recommended model for the task
 await inference.translation({
   inputs: "My name is Wolfgang and I live in Amsterdam",
@@ -151,28 +170,24 @@ await inference.translation({
   },
 });
 
-await inference.textToImage({
-  model: 'black-forest-labs/FLUX.1-dev',
-  inputs: 'a picture of a green bird',
-})
-
+// pass multimodal files or URLs as inputs
 await inference.imageToText({
+  model: 'nlpconnect/vit-gpt2-image-captioning',
   data: await (await fetch('https://picsum.photos/300/300')).blob(),
-  model: 'nlpconnect/vit-gpt2-image-captioning',  
 })
 
 // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
 const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
 const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
 
-//Chat Completion
+// Chat Completion
 const llamaEndpoint = inference.endpoint(
  "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
 );
 const out = await llamaEndpoint.chatCompletion({
- model: "meta-llama/Llama-3.1-8B-Instruct",
- messages: [{ role: "user", content: "Hello, nice to meet you!" }],
- max_tokens: 512,
+  model: "meta-llama/Llama-3.1-8B-Instruct",
+  messages: [{ role: "user", content: "Hello, nice to meet you!" }],
+  max_tokens: 512,
 });
 console.log(out.choices[0].message);
 ```
@@ -185,7 +200,7 @@ import { createRepo, uploadFile, deleteFiles } from "@huggingface/hub";
 const HF_TOKEN = "hf_...";
 
 await createRepo({
-  repo: "my-user/nlp-model", // or {type: "model", name: "my-user/nlp-test"},
+  repo: "my-user/nlp-model", // or { type: "model", name: "my-user/nlp-test" },
   accessToken: HF_TOKEN
 });
 
@@ -200,7 +215,7 @@ await uploadFile({
 });
 
 await deleteFiles({
-  repo: {type: "space", name: "my-user/my-space"}, // or "spaces/my-user/my-space"
+  repo: { type: "space", name: "my-user/my-space" }, // or "spaces/my-user/my-space"
   accessToken: HF_TOKEN,
   paths: ["README.md", ".gitattributes"]
 });
@@ -209,7 +224,7 @@ await deleteFiles({
 ### @huggingface/agents example
 
 ```ts
-import {HfAgent, LLMFromHub, defaultTools} from '@huggingface/agents';
+import { HfAgent, LLMFromHub, defaultTools } from '@huggingface/agents';
 
 const HF_TOKEN = "hf_...";
 
 
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 Tim Mikeladze
+Copyright (c) 2022 Tim Mikeladze and the Hugging Face team
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 
@@ -1,7 +1,7 @@
 # 🤗 Hugging Face Inference Endpoints
 
-A Typescript powered wrapper for the Hugging Face Inference Endpoints API. Learn more about Inference Endpoints at [Hugging Face](https://huggingface.co/inference-endpoints).
-It works with both [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index).
+A Typescript powered wrapper for the Hugging Face Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers.
+It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers.
 
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
 
@@ -42,7 +42,40 @@ const hf = new HfInference('your access token')
 
 Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
 
-#### Tree-shaking
+### Third-party inference providers
+
+You can send inference requests to third-party providers with the inference client.
+
+Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
+
+To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
+```ts
+const accessToken = "hf_..."; // Either a HF access token, or an API key from the third-party provider (Replicate in this example)
+
+const client = new HfInference(accessToken);
+await client.textToImage({
+  provider: "replicate",
+  model:"black-forest-labs/Flux.1-dev",
+  inputs: "A black forest cake"
+})
+```
+
+When authenticated with a Hugging Face access token, the request is routed through https://huggingface.co.
+When authenticated with a third-party provider key, the request is made directly against that provider's inference API.
+
+Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
+- [Fal.ai supported models](./src/providers/fal-ai.ts)
+- [Replicate supported models](./src/providers/replicate.ts)
+- [Sambanova supported models](./src/providers/sambanova.ts)
+- [Together supported models](./src/providers/together.ts)
+- [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
+
+❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. 
+This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you!
+
+👋**Want to add another provider?** Get in touch if you'd like to add support for another Inference provider, and/or request it on https://huggingface.co/spaces/huggingface/HuggingDiscussions/discussions/49
+
+### Tree-shaking
 
 You can import the functions you need directly from the module instead of using the `HfInference` class.
 
 
@@ -1,6 +1,6 @@
 {
 	"name": "@huggingface/inference",
-	"version": "2.8.1",
+	"version": "3.0.0",
 	"packageManager": "[email protected]",
 	"license": "MIT",
 	"author": "Tim Mikeladze <[email protected]>",
 
@@ -0,0 +1,2 @@
+export const HF_HUB_URL = "https://huggingface.co";
+export const HF_INFERENCE_API_URL = "https://api-inference.huggingface.co";
@@ -1,4 +1,9 @@
+export type { ProviderMapping } from "./providers/types";
 export { HfInference, HfInferenceEndpoint } from "./HfInference";
 export { InferenceOutputError } from "./lib/InferenceOutputError";
+export { FAL_AI_SUPPORTED_MODEL_IDS } from "./providers/fal-ai";
+export { REPLICATE_SUPPORTED_MODEL_IDS } from "./providers/replicate";
+export { SAMBANOVA_SUPPORTED_MODEL_IDS } from "./providers/sambanova";
+export { TOGETHER_SUPPORTED_MODEL_IDS } from "./providers/together";
 export * from "./types";
 export * from "./tasks";
@@ -1,3 +1,4 @@
+import { HF_HUB_URL } from "../config";
 import { isUrl } from "./isUrl";
 
 /**
@@ -8,7 +9,6 @@ import { isUrl } from "./isUrl";
 const taskCache = new Map<string, { task: string; date: Date }>();
 const CACHE_DURATION = 10 * 60 * 1000;
 const MAX_CACHE_ITEMS = 1000;
-export const HF_HUB_URL = "https://huggingface.co";
 
 export interface DefaultTaskOptions {
 	fetch?: typeof fetch;
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/inference",`
`3`		`- "version": "2.8.1",`
	`3`	`+ "version": "3.0.0",`
`4`	`4`	`"packageManager": "[email protected]",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"author": "Tim Mikeladze <[email protected]>",`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+export const HF_HUB_URL = "https://huggingface.co";`
	`2`	`+export const HF_INFERENCE_API_URL = "https://api-inference.huggingface.co";`