From c64cfdbd42b682d840dcf6652e5ad1ee8f96b5e5 Mon Sep 17 00:00:00 2001 From: Harry Hu Date: Mon, 25 Aug 2025 12:29:22 -0400 Subject: [PATCH] [FFI] Refactored WebLLM per new TVM ffi changes --- examples/model-tests/README.md | 0 examples/model-tests/package.json | 20 ++ examples/model-tests/src/model_tests.html | 26 +++ examples/model-tests/src/model_tests.ts | 234 ++++++++++++++++++++++ src/config.ts | 8 +- src/embedding.ts | 2 +- src/llm_chat.ts | 2 +- 7 files changed, 286 insertions(+), 6 deletions(-) create mode 100644 examples/model-tests/README.md create mode 100644 examples/model-tests/package.json create mode 100644 examples/model-tests/src/model_tests.html create mode 100644 examples/model-tests/src/model_tests.ts diff --git a/examples/model-tests/README.md b/examples/model-tests/README.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/model-tests/package.json b/examples/model-tests/package.json new file mode 100644 index 00000000..132ff655 --- /dev/null +++ b/examples/model-tests/package.json @@ -0,0 +1,20 @@ +{ + "name": "model-tests", + "version": "0.1.0", + "private": true, + "scripts": { + "start": "parcel src/model_tests.html --port 8889", + "build": "parcel build src/model_tests.html --dist-dir lib" + }, + "devDependencies": { + "buffer": "^5.7.1", + "parcel": "^2.8.3", + "process": "^0.11.10", + "tslib": "^2.3.1", + "typescript": "^4.9.5", + "url": "^0.11.3" + }, + "dependencies": { + "@mlc-ai/web-llm": "file:../../" + } +} diff --git a/examples/model-tests/src/model_tests.html b/examples/model-tests/src/model_tests.html new file mode 100644 index 00000000..a15fc678 --- /dev/null +++ b/examples/model-tests/src/model_tests.html @@ -0,0 +1,26 @@ + + + + +

WebLLM Model Tester

+ Open console to see output +
+
+ + +

Current Model

+ + +

Progress

+ + +

Latest Response

+ +
+ + + + + diff --git a/examples/model-tests/src/model_tests.ts b/examples/model-tests/src/model_tests.ts new file mode 100644 index 00000000..24cab338 --- /dev/null +++ b/examples/model-tests/src/model_tests.ts @@ -0,0 +1,234 @@ +import * as webllm from "@mlc-ai/web-llm"; + +function setLabel(id: string, text: string) { + const label = document.getElementById(id); + if (label == null) { + throw Error("Cannot find label " + id); + } + label.innerText = text; +} + +// Models to test: uncomment the specific ones you want to test +const TEST_MODELS = [ + // Llama 2 7B + // "Llama-2-7b-chat-hf-q4f16_1-MLC", + // "Llama-2-7b-chat-hf-q4f32_1-MLC", + + // // Llama 3 8B + // "Llama-3-8B-Instruct-q4f16_1-MLC", + // "Llama-3-8B-Instruct-q4f32_1-MLC", + + // // Llama 3.1 8B + // "Llama-3.1-8B-Instruct-q4f16_1-MLC", + // "Llama-3.1-8B-Instruct-q4f32_1-MLC", + + // // Llama 3.2 1B, 3B + // "Llama-3.2-1B-Instruct-q4f16_1-MLC", + // "Llama-3.2-1B-Instruct-q4f32_1-MLC", + // "Llama-3.2-3B-Instruct-q4f16_1-MLC", + // "Llama-3.2-3B-Instruct-q4f32_1-MLC", + + // // Mistral 7B v0.3 + // "Mistral-7B-Instruct-v0.3-q4f16_1-MLC", + // "Mistral-7B-Instruct-v0.3-q4f32_1-MLC", + + // // Phi models + // "phi-1_5-q4f16_1-MLC", + // "phi-1_5-q4f32_1-MLC", + // "phi-2-q4f16_1-MLC", + // "phi-2-q4f32_1-MLC", + // "Phi-3-mini-4k-instruct-q4f16_1-MLC", + // "Phi-3-mini-4k-instruct-q4f32_1-MLC", + // "Phi-3.5-mini-instruct-q4f16_1-MLC", + // "Phi-3.5-mini-instruct-q4f32_1-MLC", + + // // Qwen2 + "Qwen2-0.5B-Instruct-q4f16_1-MLC", + // "Qwen2-0.5B-Instruct-q4f32_1-MLC", + // "Qwen2-1.5B-Instruct-q4f16_1-MLC", + // "Qwen2-1.5B-Instruct-q4f32_1-MLC", + + // // Qwen2.5 + // "Qwen2.5-3B-Instruct-q4f16_1-MLC", + // "Qwen2.5-3B-Instruct-q4f32_1-MLC", + + // // Qwen3 (including q0 for 0.6B) + // "Qwen3-0.6B-q4f16_1-MLC", + // "Qwen3-0.6B-q4f32_1-MLC", + // "Qwen3-0.6B-q0f32-MLC", + // "Qwen3-1.7B-q4f16_1-MLC", + // "Qwen3-1.7B-q4f32_1-MLC", + // "Qwen3-4B-q4f16_1-MLC", + // "Qwen3-4B-q4f32_1-MLC", + // "Qwen3-8B-q4f16_1-MLC", + // "Qwen3-8B-q4f32_1-MLC", + + // // RedPajama + // "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + // "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", + + // // SmolLM2 (including q0 for smaller ones) + // "SmolLM2-135M-Instruct-q0f16-MLC", + // "SmolLM2-135M-Instruct-q0f32-MLC", + // "SmolLM2-360M-Instruct-q0f16-MLC", + // "SmolLM2-360M-Instruct-q0f32-MLC", + // "SmolLM2-1.7B-Instruct-q4f16_1-MLC", + // "SmolLM2-1.7B-Instruct-q4f32_1-MLC", + + // // TinyLlama v1.0 + // "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC", + // "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC", + + // // Gemma models + // "gemma-2b-it-q4f16_1-MLC", + // "gemma-2b-it-q4f32_1-MLC", + // "gemma-2-2b-it-q4f16_1-MLC", + // "gemma-2-2b-it-q4f32_1-MLC", + // "gemma-2-9b-it-q4f16_1-MLC", + // "gemma-2-9b-it-q4f32_1-MLC", + + // // StableLM + // "stablelm-2-zephyr-1_6b-q4f16_1-MLC", + // "stablelm-2-zephyr-1_6b-q4f32_1-MLC", +]; + +const TEST_PROMPT = "Tell me a joke."; + +const initProgressCallback = (report: webllm.InitProgressReport) => { + setLabel("init-label", report.text); +}; + +async function testModel( + modelId: string, + modelIndex: number, + totalModels: number, +): Promise { + try { + // print output into console + console.log( + `\n=== Testing Model ${modelIndex + 1}/${totalModels}: ${modelId} ===`, + ); + setLabel( + "current-model-label", + `${modelId} (${modelIndex + 1}/${totalModels})`, + ); + setLabel("progress-label", `Loading model...`); + setLabel("response-label", ""); + + const startTime = Date.now(); + + const appConfig = webllm.prebuiltAppConfig; + appConfig.useIndexedDBCache = true; + + const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( + modelId, + { + initProgressCallback: initProgressCallback, + appConfig: appConfig, + logLevel: "ERROR", + }, + ); + + const loadTime = Date.now() - startTime; + console.log(`Model loaded in ${(loadTime / 1000).toFixed(1)}s`); + setLabel( + "progress-label", + `Model loaded in ${(loadTime / 1000).toFixed(1)}s. Generating...`, + ); + + // Test chat completion + const generateStart = Date.now(); + const reply = await engine.chat.completions.create({ + messages: [{ role: "user", content: TEST_PROMPT }], + temperature: 0.1, + max_tokens: 500, + }); + + const generateTime = Date.now() - generateStart; + const response = reply.choices[0]?.message?.content || "No response"; + + console.log(`Generated response in ${(generateTime / 1000).toFixed(1)}s`); + console.log(`Response: "${response}"`); + + setLabel( + "response-label", + response.substring(0, 200) + (response.length > 200 ? "..." : ""), + ); + setLabel( + "stats-label", + `Load: ${(loadTime / 1000).toFixed(1)}s, Generate: ${(generateTime / 1000).toFixed(1)}s, Tokens: ${reply.usage?.completion_tokens || "?"}`, + ); + + // Clear cache for this model + setLabel("progress-label", `Clearing cache...`); + await webllm.deleteModelAllInfoInCache(modelId, appConfig); + console.log(`Cleared cache for ${modelId}`); + + return true; + } catch (error) { + console.error(`Error testing ${modelId}:`, error); + setLabel("response-label", `Error: ${error.message}`); + setLabel("progress-label", `Error with ${modelId}`); + + // Still try to clear cache even if test failed + try { + const appConfig = webllm.prebuiltAppConfig; + appConfig.useIndexedDBCache = true; + await webllm.deleteModelAllInfoInCache(modelId, appConfig); + console.log(`Cleared cache for ${modelId} (after error)`); + } catch (clearError) { + console.error(`Failed to clear cache for ${modelId}:`, clearError); + } + + return false; + } +} + +async function main() { + console.log("Starting WebLLM Model Testing"); + console.log(`Testing ${TEST_MODELS.length} chat models`); + + const results = { + passed: 0, + failed: 0, + total: TEST_MODELS.length, + }; + + setLabel("current-model-label", "Starting tests..."); + setLabel("progress-label", `0/${TEST_MODELS.length} models tested`); + + for (let i = 0; i < TEST_MODELS.length; i++) { + const modelId = TEST_MODELS[i]; + const success = await testModel(modelId, i, TEST_MODELS.length); + + if (success) { + results.passed++; + } else { + results.failed++; + } + + setLabel( + "progress-label", + `${i + 1}/${TEST_MODELS.length} models tested (${results.passed} passed, ${results.failed} failed)`, + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + + console.log(`\nTesting completed!`); + console.log( + `Results: ${results.passed}/${results.total} models passed (${Math.round((results.passed / results.total) * 100)}%)`, + ); + console.log(`Passed: ${results.passed}`); + console.log(`Failed: ${results.failed}`); + + setLabel("current-model-label", "All tests completed!"); + setLabel( + "progress-label", + `Final: ${results.passed}/${results.total} passed (${Math.round((results.passed / results.total) * 100)}%)`, + ); + setLabel("response-label", "Check console for full results"); + setLabel("stats-label", `${results.passed} passed, ${results.failed} failed`); +} + +main(); diff --git a/src/config.ts b/src/config.ts index dfeb1913..93f141e2 100644 --- a/src/config.ts +++ b/src/config.ts @@ -287,7 +287,7 @@ export interface AppConfig { * @note The model version does not have to match the npm version, since not each npm update * requires an update of the model libraries. */ -export const modelVersion = "v0_2_48"; +export const modelVersion = "v0_2_80"; export const modelLibURLPrefix = "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/"; @@ -1190,7 +1190,7 @@ export const prebuiltAppConfig: AppConfig = { model_lib: modelLibURLPrefix + modelVersion + - "/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", + "/Qwen2-0.5B-Instruct-testtokenizer-q4f16_1-ctx4k_cs1k-webgpu.wasm", low_resource_required: true, vram_required_MB: 944.62, overrides: { @@ -1322,7 +1322,7 @@ export const prebuiltAppConfig: AppConfig = { model_lib: modelLibURLPrefix + modelVersion + - "/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", + "/Qwen2-0.5B-Instruct-testtokenizer-q4f16_1-ctx4k_cs1k-webgpu.wasm", low_resource_required: true, vram_required_MB: 944.62, overrides: { @@ -1677,7 +1677,7 @@ export const prebuiltAppConfig: AppConfig = { model_lib: modelLibURLPrefix + modelVersion + - "/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", + "/Qwen2-0.5B-Instruct-testtokenizer-q4f16_1-ctx4k_cs1k-webgpu.wasm", low_resource_required: true, vram_required_MB: 944.62, overrides: { diff --git a/src/embedding.ts b/src/embedding.ts index ae5d9123..7b23e246 100644 --- a/src/embedding.ts +++ b/src/embedding.ts @@ -49,7 +49,7 @@ export class EmbeddingPipeline { // 2. Get json stored in the vm's metadata function const fgetMetadata = this.vm.getFunction("_metadata"); const ret_value = fgetMetadata(); - const metadataStr = this.tvm.detachFromCurrentScope(ret_value).toString(); + const metadataStr = ret_value.toString(); const metadata = JSON.parse(metadataStr); // 3. Load parameters by name diff --git a/src/llm_chat.ts b/src/llm_chat.ts index 5f8ecf00..7a4d80c9 100644 --- a/src/llm_chat.ts +++ b/src/llm_chat.ts @@ -201,7 +201,7 @@ export class LLMChatPipeline { // 2. Get json stored in the vm's metadata function const fgetMetadata = this.vm.getFunction("_metadata"); const ret_value = fgetMetadata(); - const metadataStr = this.tvm.detachFromCurrentScope(ret_value).toString(); + const metadataStr = ret_value.toString(); const metadata = JSON.parse(metadataStr); // 3. Load parameters by name