|
| 1 | +class TextGenerationInference < Formula |
| 2 | + include Language::Python::Virtualenv |
| 3 | + |
| 4 | + desc "Large Language Model Text Generation Inference" |
| 5 | + homepage "https://hf.co/docs/text-generation-inference" |
| 6 | + url "https://github.com/huggingface/text-generation-inference/archive/refs/tags/v3.1.1.tar.gz" |
| 7 | + sha256 "5d67c581fa5af71bfdd7b57e24f36b559a50e3c8472597432b27cc1a1d45b03d" |
| 8 | + license "Apache-2.0" |
| 9 | + |
| 10 | + depends_on "cmake" => :build |
| 11 | + depends_on "rust" => :build |
| 12 | + depends_on "uv" => :build |
| 13 | + depends_on "protobuf" |
| 14 | + |
| 15 | + |
| 16 | + def install |
| 17 | + system "cargo", "install", *std_cargo_args(path: "backends/v3") |
| 18 | + system "cargo", "install", *std_cargo_args(path: "launcher") |
| 19 | + |
| 20 | + # prevent error with outlines installation due to location of uv cache |
| 21 | + rm "Cargo.toml" |
| 22 | + |
| 23 | + venv = virtualenv_create(libexec, "python3.13", system_site_packages: false) |
| 24 | + ENV["VIRTUAL_ENV"] = venv.root |
| 25 | + |
| 26 | + uv = Formula["uv"].opt_bin/"uv" |
| 27 | + cd "server" do |
| 28 | + system uv, "run", "--active", "--extra", "gen", "--no-binary-package", "safetensors", |
| 29 | + "--", "make", "gen-server-raw" |
| 30 | + system uv, "pip", "install", ".[accelerate,compressed-tensors,quantize,peft,outlines]" |
| 31 | + end |
| 32 | + bin.install_symlink libexec/"bin/text-generation-server" |
| 33 | + end |
| 34 | + |
| 35 | + test do |
| 36 | + port = free_port |
| 37 | + fork do |
| 38 | + exec bin/"text-generation-launcher", "-p", port.to_s, "--model-id", "distilbert/distilgpt2" |
| 39 | + end |
| 40 | + |
| 41 | + data = "{\"inputs\":\"What is Deep Learning?\",\"parameters\":{\"max_new_tokens\":1}}}" |
| 42 | + header = "Content-Type: application/json" |
| 43 | + retries = "--retry 10 --retry-connrefused" |
| 44 | + output = shell_output("curl -s 127.0.0.1:#{port}/generate_stream -X POST -d '#{data}' -H '#{header}' #{retries}") |
| 45 | + assert_match "generated_text", output |
| 46 | + end |
| 47 | +end |
0 commit comments