Skip to content

Commit cd088db

Browse files
docs: regenerate serverless create reference for hub model-reference
1 parent 253ece4 commit cd088db

1 file changed

Lines changed: 4 additions & 1 deletion

File tree

docs/runpodctl_serverless_create.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ examples:
2323
runpodctl hub search vllm # find the hub id
2424
runpodctl serverless create --hub-id <id> --gpu-id "NVIDIA GeForce RTX 4090"
2525

26+
# create from a hub repo and attach a model
27+
runpodctl serverless create --hub-id <id> --gpu-id "NVIDIA GeForce RTX 4090" --model-reference https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct:main
28+
2629
# override or add env vars (hub defaults are included automatically)
2730
runpodctl serverless create --hub-id <id> --env MODEL_NAME=my-model --env MAX_TOKENS=4096
2831

@@ -45,7 +48,7 @@ runpodctl serverless create [flags]
4548
--idle-timeout int seconds before idle worker scales down (1-3600) (default -1)
4649
--instance-id string cpu instance id for --compute-type CPU (e.g. cpu3g-4-16)
4750
--min-cuda-version string minimum cuda version (e.g., 12.6)
48-
--model-reference stringArray model reference to attach to the endpoint (repeatable)
51+
--model-reference stringArray hugging face model url with a ref to cache on the endpoint, e.g. https://huggingface.co/<org>/<model>:main; works with --template-id or --hub-id, gpu only (repeatable)
4952
--name string endpoint name
5053
--network-volume-id string network volume id to attach
5154
--network-volume-ids string comma-separated network volume ids for multi-region

0 commit comments

Comments
 (0)