Conversation
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
vllm/tgis_utils/args.py
Outdated
| # map to ssl_ca_certs | ||
| parser.add_argument('--tls-client-ca-cert-path', type=str) | ||
| # add a path when lora adapters will be loaded from | ||
| parser.add_argument('--lora-adapter-cache', type=str) |
There was a problem hiding this comment.
open to ideas on naming here
vllm/entrypoints/grpc/adapters.py
Outdated
| lora_id = request.lora_id | ||
| if lora_id: | ||
| if not lora_adapter_store: | ||
| # using raise/format instead of .error so mypy knows this raises | ||
| raise ValueError(TGISValidationError.LoraDisabled.value.format()) | ||
|
|
||
| local_lora_path = os.path.join(lora_adapter_store.cache_path, lora_id) | ||
|
|
||
| # Do a bit of up-front validation so that we don't ask the engine | ||
| # to try to load an invalid adapter | ||
| if not os.path.exists(local_lora_path): | ||
| TGISValidationError.LoraAdapterNotFound.error( | ||
| lora_id, "directory does not exist") | ||
| if not os.path.exists( | ||
| os.path.join(local_lora_path, "adapter_config.json")): | ||
| TGISValidationError.LoraAdapterNotFound.error( | ||
| lora_id, "invalid adapter: no adapter_config.json found") | ||
|
|
||
| # We need to track a unique integer for vLLM to identify the lora | ||
| # adapters | ||
| if lora_id not in lora_adapter_store.unique_id_map: | ||
| lora_adapter_store.unique_id_map[ | ||
| lora_id] = lora_adapter_store.next_unique_id | ||
| lora_adapter_store.next_unique_id += 1 | ||
| unique_id = lora_adapter_store.unique_id_map[lora_id] | ||
| lora_request = LoRARequest(lora_name=lora_id, | ||
| lora_int_id=unique_id, | ||
| lora_local_path=local_lora_path) | ||
| else: | ||
| lora_request = None | ||
|
|
||
| if request.prefix_id: | ||
| # TODO: hook up PromptAdapterRequest once implemented in the engine | ||
| raise ValueError("prefix_id not implemented yet") | ||
|
|
||
| # Second return slot left here for the incoming PromptAdapterRequest | ||
| # See https://github.com/vllm-project/vllm/pull/4645/files | ||
| return lora_request, None |
There was a problem hiding this comment.
How about flattening this a bit?
| lora_id = request.lora_id | |
| if lora_id: | |
| if not lora_adapter_store: | |
| # using raise/format instead of .error so mypy knows this raises | |
| raise ValueError(TGISValidationError.LoraDisabled.value.format()) | |
| local_lora_path = os.path.join(lora_adapter_store.cache_path, lora_id) | |
| # Do a bit of up-front validation so that we don't ask the engine | |
| # to try to load an invalid adapter | |
| if not os.path.exists(local_lora_path): | |
| TGISValidationError.LoraAdapterNotFound.error( | |
| lora_id, "directory does not exist") | |
| if not os.path.exists( | |
| os.path.join(local_lora_path, "adapter_config.json")): | |
| TGISValidationError.LoraAdapterNotFound.error( | |
| lora_id, "invalid adapter: no adapter_config.json found") | |
| # We need to track a unique integer for vLLM to identify the lora | |
| # adapters | |
| if lora_id not in lora_adapter_store.unique_id_map: | |
| lora_adapter_store.unique_id_map[ | |
| lora_id] = lora_adapter_store.next_unique_id | |
| lora_adapter_store.next_unique_id += 1 | |
| unique_id = lora_adapter_store.unique_id_map[lora_id] | |
| lora_request = LoRARequest(lora_name=lora_id, | |
| lora_int_id=unique_id, | |
| lora_local_path=local_lora_path) | |
| else: | |
| lora_request = None | |
| if request.prefix_id: | |
| # TODO: hook up PromptAdapterRequest once implemented in the engine | |
| raise ValueError("prefix_id not implemented yet") | |
| # Second return slot left here for the incoming PromptAdapterRequest | |
| # See https://github.com/vllm-project/vllm/pull/4645/files | |
| return lora_request, None | |
| if request.prefix_id: | |
| # TODO: hook up PromptAdapterRequest once implemented in the engine | |
| raise ValueError("prefix_id not implemented yet") | |
| lora_id = request.lora_id | |
| if not lora_id: | |
| return None, None | |
| if not lora_adapter_store: | |
| # using raise/format instead of .error so mypy knows this raises | |
| raise ValueError(TGISValidationError.LoraDisabled.value.format()) | |
| local_lora_path = os.path.join(lora_adapter_store.cache_path, lora_id) | |
| # Do a bit of up-front validation so that we don't ask the engine | |
| # to try to load an invalid adapter | |
| if not os.path.exists(local_lora_path): | |
| TGISValidationError.LoraAdapterNotFound.error( | |
| lora_id, "directory does not exist") | |
| if not os.path.exists( | |
| os.path.join(local_lora_path, "adapter_config.json")): | |
| TGISValidationError.LoraAdapterNotFound.error( | |
| lora_id, "invalid adapter: no adapter_config.json found") | |
| # We need to track a unique integer for vLLM to identify the lora | |
| # adapters | |
| if lora_id not in lora_adapter_store.unique_id_map: | |
| lora_adapter_store.unique_id_map[ | |
| lora_id] = lora_adapter_store.next_unique_id | |
| lora_adapter_store.next_unique_id += 1 | |
| unique_id = lora_adapter_store.unique_id_map[lora_id] | |
| lora_request = LoRARequest(lora_name=lora_id, | |
| lora_int_id=unique_id, | |
| lora_local_path=local_lora_path) | |
| # Second return slot left here for the incoming PromptAdapterRequest | |
| # See https://github.com/vllm-project/vllm/pull/4645/files | |
| return lora_request, None | |
There was a problem hiding this comment.
Hah, I un-nested but then re-nested so that the file checking and opening will only happen if the adapter wasn't already loaded
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
vllm/entrypoints/grpc/grpc_server.py
Outdated
| context) | ||
|
|
||
| lora_request, _ = await self._validate_adapters(request, context) | ||
| adapter_kwargs, _ = await self._validate_adapters(request, context) |
There was a problem hiding this comment.
oh yeah, totally not. Interestingly python seems totally fine with the unpacking mismatch if you leave an underscore, TIL
vllm/entrypoints/grpc/adapters.py
Outdated
| TGISValidationError.AdapterNotFound.error( | ||
| adapter_id, "invalid adapter: no adapter_config.json found") | ||
|
|
||
| # NB: blocks event loop |
There was a problem hiding this comment.
I think this will be important to address - to remove the all the file access from the event loop
There was a problem hiding this comment.
Yeah, I looked into this a bit and it sounds like the asyncio file access in third party libs is... not very good.
I'm not 100% up to speed on event loops, would we want to make a new executor for this sorta like
file_load_executor = ThreadPoolExecutor(max_workers=n)
task = _load_the_config_json_file(...)
await loop.run_in_exeuctor(task, file_load_executor)
or would that just also block the loop?
There was a problem hiding this comment.
Yeah exactly .. probably should just make that function be the all the code that's run if we don't find adapter in the dict (i.e. checking on disk, loading it etc).
There's a default asyncio executor that can be used for this kind of thing, or we may want a static one rather than creating one on the fly (not that you were necessarily suggesting that).
There was a problem hiding this comment.
Cool, I'll see if I can get that working quickly
There was a problem hiding this comment.
@njhill can I get a run from your static analysis on this change?
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
| # If not already cached, we need to validate that files exist and | ||
| # grab the type out of the adapter_config.json file | ||
| if (adapter_metadata := adapter_store.adapters.get(adapter_id)) is None: | ||
| local_adapter_path = os.path.join(adapter_store.cache_path, adapter_id) |
There was a problem hiding this comment.
I think we should sanitize the adapter_id here to make sure that the user can't send funny things like ../../../etc/passwd.
maxdebayser
left a comment
There was a problem hiding this comment.
I've left a comment suggesting a security improvement, but otherwise it looks good to me.
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This PR simplifies the model loading taking advantage of the new functionality of `get_model()` from `fms.models`. The current implementation automatically infers `architecture` and `variant` from a given `model_path` pointing to directory with weights in **hf** (hugging face) format. ### Changes: - replacing as_fms_model() by get_model() for **hf** models. - removing if condition for **meta** weights Note: make sure to use the **hf** format of the weights for model **7B-F** (checkpoint trained by meta) from now on...
Adds support for multi-lora adapters.
Passing tests added over in this PR: https://github.ibm.com/ai-foundation/tgis-deploy-tests/pull/25/files