File size: 1,290 Bytes
e29d383 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
#! /bin/bash
################################################################################
# Shell script that starts a copy of vLLM with a base model plus all the
# available LoRA adapters in this repository.
#
# To run this script:
# 1. Install an appropriate build of vLLM for your machine
# 2. Install the Hugging Face CLI (`hf`)
# 3. Download the intrinsics library by running:
# hf download ibm-granite/intrinsics-lib --local-dir ./intrinsics-lib
# 4. Edit the constants BASE_MODEL_NAME and BASE_MODEL_ORG as needed
# 5. Run this script from the root of your local copy of intrinsics-lib.
################################################################################
BASE_MODEL_NAME=granite-3.3-8b-instruct
BASE_MODEL_ORG=ibm-granite
export VLLM_API_KEY=rag_intrinsics_1234
# Find all LoRA adapters for the target base model.
LORAS=""
for item in "."/*; do
# Remove the "./"
name=$(basename -- "${item}")
if [ -d "./${name}/lora/${BASE_MODEL_NAME}" ]; then
LORAS+="${name}=./${name}/lora/${BASE_MODEL_NAME} "
fi
done
CMD="vllm serve ibm-granite/granite-3.3-8b-instruct \
--port 55555 \
--gpu-memory-utilization 0.45 \
--max-model-len 8192 \
--enable-lora \
--max_lora_rank 64 \
--lora-modules $LORAS"
echo $CMD
$CMD
|