Spaces:

prithivMLmods
/

Multimodal-OCR3

Running on Zero

prithivMLmods commited on 14 days ago

Commit

a9a60a5

verified ·

1 Parent(s): e6ddca2

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -133,6 +133,7 @@ MODEL_ID_V = "datalab-to/chandra"
 processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
 model_v = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_V,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -142,6 +143,7 @@ MODEL_ID_X = "nanonets/Nanonets-OCR2-3B"
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
 ).to(device).eval()
@@ -162,6 +164,7 @@ MODEL_ID_M = "allenai/olmOCR-2-7B-1025"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -235,7 +238,7 @@ image_examples = [
     ["Extract the contents. [page].", "examples/2.jpg"],
 ]
-with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **Multimodal [OCR3](https://huggingface.co/collections/prithivMLmods/multimodal-implementations)**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
@@ -274,4 +277,4 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     )
 if __name__ == "__main__":
-    demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)

 processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
 model_v = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_V,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
 ).to(device).eval()
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
     ["Extract the contents. [page].", "examples/2.jpg"],
 ]
+with gr.Blocks() as demo:
     gr.Markdown("# **Multimodal [OCR3](https://huggingface.co/collections/prithivMLmods/multimodal-implementations)**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
     )
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(css=css, theme=steel_blue_theme, mcp_server=True, ssr_mode=False, show_error=True)