diff --git a/.github/workflows/ollama-template-update.yml b/.github/workflows/ollama-template-update.yml new file mode 100644 index 000000000..8b33b4e6f --- /dev/null +++ b/.github/workflows/ollama-template-update.yml @@ -0,0 +1,102 @@ +name: Ollama template update +on: + push: # for debugging + workflow_dispatch: + #schedule: + # - cron: '0 7 * * 1' # every monday at 7am, so I'll review it after having a 🥐 + +permissions: + pull-requests: write # for creating PR + issues: write # for adding labels to the created PR + contents: write # for git push new branch + +jobs: + create-pull-request: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Prepare + id: prepare + shell: bash + run: | + git config --global user.name machineuser + git config --global user.email infra+machineuser@huggingface.co + git config --global --add safe.directory "$GITHUB_WORKSPACE" + npm install -g pnpm + CURRENT_DATE=$(date -u +"%Y-%m-%d") + echo "CURRENT_DATE=$CURRENT_DATE" + echo "CURRENT_DATE=$CURRENT_DATE" >> $GITHUB_OUTPUT + + - name: Install dependencies + shell: bash + run: | + cd packages/ollama-utils + pnpm install --frozen-lockfile + + - name: Run update script + shell: bash + run: | + cd packages/ollama-utils + pnpm run build:automap + + - name: Check for changed files + id: changes + shell: bash + env: + CURRENT_DATE: ${{ steps.prepare.outputs.CURRENT_DATE }} + run: | + set -x + + FILE_TO_ADD="packages/ollama-utils/src/chat-template-automap.ts" + + git status + modified_files="$(git status -s)" + echo "Modified files: ${modified_files}" + if [ -n "${modified_files}" ]; then + NEW_BRANCH="ollama-${CURRENT_DATE}" + echo "NEW_BRANCH=${NEW_BRANCH}" + echo "Changes detected, will create a new branch:" + echo "${modified_files}" + git add "${FILE_TO_ADD}" + git commit -m "ollama update ${CURRENT_DATE}" + git checkout -b "${NEW_BRANCH}" + git push -f origin "${NEW_BRANCH}" + echo "HAS_CHANGES=true" >> $GITHUB_OUTPUT + echo "NEW_BRANCH=${NEW_BRANCH}" >> $GITHUB_OUTPUT + else + echo "No files changed, skipping..." + echo "HAS_CHANGES=false" >> $GITHUB_OUTPUT + fi + + - name: Create PR + if: steps.changes.outputs.HAS_CHANGES == 'true' + uses: actions/github-script@v6 + env: + CURRENT_DATE: ${{ steps.prepare.outputs.CURRENT_DATE }} + NEW_BRANCH: ${{ steps.changes.outputs.NEW_BRANCH }} + with: + script: | + const { repo, owner } = context.repo; + const currDate = process.env.CURRENT_DATE; + const newBranch = process.env.NEW_BRANCH; + + const result = await github.rest.pulls.create({ + title: '[ollama-utils] 🤖 Auto-update chat templates (' + currDate + ')', + owner, + repo, + head: newBranch, + base: 'main', + body: [ + 'This PR is auto-generated by', + '[generate-automap.ts](https://github.com/huggingface/huggingface.js/blob/main/packages/ollama-utils/scripts/generate-automap.ts).' + ].join('\n') + }); + + console.log({ result }); + // github.rest.issues.addLabels({ + // owner, + // repo, + // issue_number: result.data.number, + // labels: ['feature', 'automated pr'] + // }); diff --git a/.github/workflows/ollama-utils-publish.yml b/.github/workflows/ollama-utils-publish.yml new file mode 100644 index 000000000..0140e707e --- /dev/null +++ b/.github/workflows/ollama-utils-publish.yml @@ -0,0 +1,68 @@ +name: Ollama Utils - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/ollama-utils + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: npm install -g corepack@latest && corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "20" + cache: "pnpm" + cache-dependency-path: | + packages/ollama-utils/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @huggingface/ollama-utils $BUMPED_VERSION" + git tag "ollama-utils-v$BUMPED_VERSION" + + - name: "Check Deps are published before publishing this package" + run: pnpm -w check-deps tasks + + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: git pull --rebase && git push --follow-tags + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CODEOWNERS b/CODEOWNERS index 235616b9a..4d953eeda 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -23,4 +23,9 @@ /packages/gguf @mishig25 @ngxson @julien-c # Ownership for the space-header Package + /packages/space-header @enzostvs + +# Ownership for the ollama-utils Package + +/packages/ollama-utils @ngxson diff --git a/packages/ollama-utils/scripts/generate-automap.ts b/packages/ollama-utils/scripts/generate-automap.ts index f6774197a..53b3cc512 100644 --- a/packages/ollama-utils/scripts/generate-automap.ts +++ b/packages/ollama-utils/scripts/generate-automap.ts @@ -107,6 +107,9 @@ const getSpecialTokens = (tmpl: string): string[] => { nDoing = 0; nAll = modelsWithTag.length; + const addedModels: string[] = []; + const skippedModelsDueToErr: string[] = []; + const workerGetTemplate = async () => { while (true) { const modelWithTag = modelsWithTag.shift(); @@ -137,44 +140,52 @@ const getSpecialTokens = (tmpl: string): string[] => { try { ggufData = await gguf(modelUrl); } catch (e) { - console.log(" --> [X] FATAL: GGUF error", { model, tag, modelUrl }); - throw e; // rethrow + console.log(` --> [X] Skipping ${modelWithTag} due to error while calling gguf()`, e); + skippedModelsDueToErr.push(modelWithTag); + continue; } const { metadata } = ggufData; const ggufTmpl = metadata["tokenizer.chat_template"]; if (ggufTmpl) { - if (seenGGUFTemplate.has(ggufTmpl)) { - console.log(" --> Already seen this GGUF template, skip..."); + try { + if (seenGGUFTemplate.has(ggufTmpl)) { + console.log(" --> Already seen this GGUF template, skip..."); + continue; + } + seenGGUFTemplate.add(ggufTmpl); + console.log(" --> GGUF chat template OK"); + const tmplBlob = manifest.layers.find((l) => l.mediaType.match(/\.template/)); + if (!tmplBlob) continue; + const ollamaTmplUrl = getBlobUrl(tmplBlob.digest); + if (!ollamaTmplUrl) { + console.log(" --> [X] No ollama template"); + continue; + } + const ollamaTmpl = await (await fetch(ollamaTmplUrl)).text(); + console.log(" --> All OK"); + const record: OutputItem = { + model: modelWithTag, + gguf: ggufTmpl, + ollama: { + template: ollamaTmpl, + tokens: getSpecialTokens(ggufTmpl), + }, + }; + // get params + const ollamaParamsBlob = manifest.layers.find((l) => l.mediaType.match(/\.params/)); + const ollamaParamsUrl = ollamaParamsBlob ? getBlobUrl(ollamaParamsBlob.digest) : null; + if (ollamaParamsUrl) { + console.log(" --> Got params"); + record.ollama.params = await (await fetch(ollamaParamsUrl)).json(); + } + output.push(record); + addedModels.push(modelWithTag); + if (DEBUG) appendFileSync("ollama_tmp.jsonl", JSON.stringify(record) + "\n"); + } catch (e) { + console.log(` --> [X] Skipping ${modelWithTag} due to error`, e); + skippedModelsDueToErr.push(modelWithTag); continue; } - seenGGUFTemplate.add(ggufTmpl); - console.log(" --> GGUF chat template OK"); - const tmplBlob = manifest.layers.find((l) => l.mediaType.match(/\.template/)); - if (!tmplBlob) continue; - const ollamaTmplUrl = getBlobUrl(tmplBlob.digest); - if (!ollamaTmplUrl) { - console.log(" --> [X] No ollama template"); - continue; - } - const ollamaTmpl = await (await fetch(ollamaTmplUrl)).text(); - console.log(" --> All OK"); - const record: OutputItem = { - model: modelWithTag, - gguf: ggufTmpl, - ollama: { - template: ollamaTmpl, - tokens: getSpecialTokens(ggufTmpl), - }, - }; - // get params - const ollamaParamsBlob = manifest.layers.find((l) => l.mediaType.match(/\.params/)); - const ollamaParamsUrl = ollamaParamsBlob ? getBlobUrl(ollamaParamsBlob.digest) : null; - if (ollamaParamsUrl) { - console.log(" --> Got params"); - record.ollama.params = await (await fetch(ollamaParamsUrl)).json(); - } - output.push(record); - if (DEBUG) appendFileSync("ollama_tmp.jsonl", JSON.stringify(record) + "\n"); } else { console.log(" --> [X] No GGUF template"); continue; @@ -190,7 +201,13 @@ const getSpecialTokens = (tmpl: string): string[] => { .map(() => workerGetTemplate()) ); + console.log("===================================="); console.log("DONE"); + console.log("Added templates for:"); + console.log(addedModels.join("\n")); + console.log("Skipped these models due to error:"); + console.log(skippedModelsDueToErr.join("\n")); + output.sort((a, b) => a.model.localeCompare(b.model)); writeFileSync( @@ -201,6 +218,11 @@ const getSpecialTokens = (tmpl: string): string[] => { import { OllamaChatTemplateMapEntry } from "./types"; +/** + * Skipped these models due to error: +${skippedModelsDueToErr.map((m) => ` * - ${m}`).join("\n")} + */ + export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = ${JSON.stringify(output, null, "\t")}; `.trim() ); diff --git a/packages/ollama-utils/src/chat-template-automap.ts b/packages/ollama-utils/src/chat-template-automap.ts index 9bcc81785..f7337711b 100644 --- a/packages/ollama-utils/src/chat-template-automap.ts +++ b/packages/ollama-utils/src/chat-template-automap.ts @@ -1,7 +1,12 @@ // This file is auto generated, please do not modify manually // To update it, run "pnpm run build:automap" -import type { OllamaChatTemplateMapEntry } from "./types"; +import { OllamaChatTemplateMapEntry } from "./types"; + +/** + * Skipped these models due to error: + * - library/reflection:70b + */ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ { @@ -545,6 +550,18 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ }, }, }, + { + model: "library/mistral-small:24b", + gguf: "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST]' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}", + ollama: { + template: + '{{- range $index, $_ := .Messages }}\n{{- if eq .Role "system" }}[SYSTEM_PROMPT]{{ .Content }}[/SYSTEM_PROMPT]\n{{- else if eq .Role "user" }}\n{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]\n{{- end }}[INST]{{ .Content }}[/INST]\n{{- else if eq .Role "assistant" }}\n{{- if .Content }}{{ .Content }}\n{{- if not (eq (len (slice $.Messages $index)) 1) }}\n{{- end }}\n{{- else if .ToolCalls }}[TOOL_CALLS][\n{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}\n{{- end }}]\n{{- end }}\n{{- else if eq .Role "tool" }}[TOOL_RESULTS]{"content": {{ .Content }}}[/TOOL_RESULTS]\n{{- end }}\n{{- end }}', + tokens: ["[INST]"], + params: { + temperature: 0.15, + }, + }, + }, { model: "library/mixtral:8x22b", gguf: "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.last and system_message is defined %}\n {{- '[INST] ' + system_message + '\\n\\n' + message['content'] + '[/INST]' }}\n {%- else %}\n {{- '[INST] ' + message['content'] + '[/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",