Spaces:
Runtime error
Runtime error
open n_threads to set by user
Browse files
app.py
CHANGED
|
@@ -49,6 +49,7 @@ model_cache = {
|
|
| 49 |
'model_file': None,
|
| 50 |
'clip_file': None,
|
| 51 |
'verbose': None,
|
|
|
|
| 52 |
'llm': None
|
| 53 |
}
|
| 54 |
|
|
@@ -83,9 +84,10 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
| 83 |
"{% if add_generation_prompt %}Assistant:{% endif %}"
|
| 84 |
)
|
| 85 |
|
| 86 |
-
# Load and cache LLM (only on dropdown or verbose change)
|
| 87 |
-
def update_llm(size, model_file, clip_file, verbose_mode):
|
| 88 |
-
|
|
|
|
| 89 |
mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
|
| 90 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
| 91 |
llm = Llama(
|
|
@@ -93,9 +95,9 @@ def update_llm(size, model_file, clip_file, verbose_mode):
|
|
| 93 |
chat_handler=handler,
|
| 94 |
n_ctx=512,
|
| 95 |
verbose=verbose_mode,
|
| 96 |
-
n_threads=
|
| 97 |
)
|
| 98 |
-
model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'llm': llm})
|
| 99 |
return None
|
| 100 |
|
| 101 |
# Build weight filename lists
|
|
@@ -155,6 +157,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
| 155 |
|
| 156 |
timestamp = time.strftime('%H:%M:%S')
|
| 157 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
|
|
|
|
|
|
| 158 |
|
| 159 |
t_start = time.time()
|
| 160 |
buf = io.StringIO()
|
|
@@ -188,6 +192,7 @@ def main():
|
|
| 188 |
logging.basicConfig(level=logging.INFO)
|
| 189 |
default = '256M'
|
| 190 |
default_verbose = True
|
|
|
|
| 191 |
mf, cf = get_weight_files(default)
|
| 192 |
|
| 193 |
with gr.Blocks() as demo:
|
|
@@ -197,33 +202,40 @@ def main():
|
|
| 197 |
model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
|
| 198 |
clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
|
| 199 |
verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
|
|
|
|
| 200 |
|
| 201 |
-
def on_size_change(sz, verbose):
|
| 202 |
mlist, clist = get_weight_files(sz)
|
| 203 |
-
update_llm(sz, mlist[0], clist[0], verbose)
|
| 204 |
return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
|
| 205 |
|
| 206 |
size_dd.change(
|
| 207 |
fn=on_size_change,
|
| 208 |
-
inputs=[size_dd, verbose_cb],
|
| 209 |
outputs=[model_dd, clip_dd]
|
| 210 |
)
|
| 211 |
model_dd.change(
|
| 212 |
-
fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
|
| 213 |
-
inputs=[size_dd, model_dd, clip_dd, verbose_cb],
|
| 214 |
outputs=[]
|
| 215 |
)
|
| 216 |
clip_dd.change(
|
| 217 |
-
fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
|
| 218 |
-
inputs=[size_dd, model_dd, clip_dd, verbose_cb],
|
| 219 |
outputs=[]
|
| 220 |
)
|
| 221 |
verbose_cb.change(
|
| 222 |
-
fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
|
| 223 |
-
inputs=[size_dd, model_dd, clip_dd, verbose_cb],
|
| 224 |
outputs=[]
|
| 225 |
)
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
| 229 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|
|
|
|
| 49 |
'model_file': None,
|
| 50 |
'clip_file': None,
|
| 51 |
'verbose': None,
|
| 52 |
+
'n_threads': None,
|
| 53 |
'llm': None
|
| 54 |
}
|
| 55 |
|
|
|
|
| 84 |
"{% if add_generation_prompt %}Assistant:{% endif %}"
|
| 85 |
)
|
| 86 |
|
| 87 |
+
# Load and cache LLM (only on dropdown or verbose or thread change)
|
| 88 |
+
def update_llm(size, model_file, clip_file, verbose_mode, n_threads):
|
| 89 |
+
# Only reload if any of parameters changed
|
| 90 |
+
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose'], model_cache['n_threads']) != (size, model_file, clip_file, verbose_mode, n_threads):
|
| 91 |
mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
|
| 92 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
| 93 |
llm = Llama(
|
|
|
|
| 95 |
chat_handler=handler,
|
| 96 |
n_ctx=512,
|
| 97 |
verbose=verbose_mode,
|
| 98 |
+
n_threads=n_threads
|
| 99 |
)
|
| 100 |
+
model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'n_threads': n_threads, 'llm': llm})
|
| 101 |
return None
|
| 102 |
|
| 103 |
# Build weight filename lists
|
|
|
|
| 157 |
|
| 158 |
timestamp = time.strftime('%H:%M:%S')
|
| 159 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 160 |
+
if model_cache.get('n_threads') is not None:
|
| 161 |
+
debug_msgs.append(f"[{timestamp}] llama_cpp n_threads = {model_cache['n_threads']}")
|
| 162 |
|
| 163 |
t_start = time.time()
|
| 164 |
buf = io.StringIO()
|
|
|
|
| 192 |
logging.basicConfig(level=logging.INFO)
|
| 193 |
default = '256M'
|
| 194 |
default_verbose = True
|
| 195 |
+
default_threads = os.cpu_count() or 1
|
| 196 |
mf, cf = get_weight_files(default)
|
| 197 |
|
| 198 |
with gr.Blocks() as demo:
|
|
|
|
| 202 |
model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
|
| 203 |
clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
|
| 204 |
verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
|
| 205 |
+
thread_dd = gr.Slider(minimum=1, maximum=default_threads, step=1, value=default_threads, label='CPU Threads (n_threads)')
|
| 206 |
|
| 207 |
+
def on_size_change(sz, verbose, n_threads):
|
| 208 |
mlist, clist = get_weight_files(sz)
|
| 209 |
+
update_llm(sz, mlist[0], clist[0], verbose, n_threads)
|
| 210 |
return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
|
| 211 |
|
| 212 |
size_dd.change(
|
| 213 |
fn=on_size_change,
|
| 214 |
+
inputs=[size_dd, verbose_cb, thread_dd],
|
| 215 |
outputs=[model_dd, clip_dd]
|
| 216 |
)
|
| 217 |
model_dd.change(
|
| 218 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
| 219 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
| 220 |
outputs=[]
|
| 221 |
)
|
| 222 |
clip_dd.change(
|
| 223 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
| 224 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
| 225 |
outputs=[]
|
| 226 |
)
|
| 227 |
verbose_cb.change(
|
| 228 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
| 229 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
| 230 |
outputs=[]
|
| 231 |
)
|
| 232 |
+
thread_dd.change(
|
| 233 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
| 234 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
| 235 |
+
outputs=[]
|
| 236 |
+
)
|
| 237 |
+
# Initial load
|
| 238 |
+
update_llm(default, mf[0], cf[0], default_verbose, default_threads)
|
| 239 |
|
| 240 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
| 241 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|