Spaces:

akhaliq
/

granite-docling-258M

Paused

App Files Files Community

akhaliq HF Staff commited on Sep 17

Commit

4422fa9

verified ·

1 Parent(s): 0d29d88

Update app.py

Browse files

Files changed (1) hide show

app.py +222 -59

app.py CHANGED Viewed

@@ -4,39 +4,125 @@ import gradio as gr
 from docling_core.types.doc import DoclingDocument
 from docling_core.types.doc.document import DocTagsDocument
 from transformers import AutoProcessor, AutoModelForVision2Seq
-from transformers.image_utils import load_image
 from pathlib import Path
 import tempfile
 import subprocess
 import sys
-# Install flash attention before anything else
-@spaces.GPU(duration=120)
-def install_flash_attention():
-    """Install flash attention if not available"""
     try:
-        import flash_attn
-        return True
-    except ImportError:
-        print("Installing flash-attention...")
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "flash-attn"])
-        return True
-install_flash_attention()
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Load processor and model
-processor = AutoProcessor.from_pretrained("ibm-granite/granite-docling-258M")
-model = AutoModelForVision2Seq.from_pretrained(
-    "ibm-granite/granite-docling-258M",
-    dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",
-).to(DEVICE)
-def process_document(image, output_format="markdown"):
-    """Process uploaded image to generate Docling document"""
     try:
         # Prepare messages
         messages = [
             {
@@ -51,12 +137,29 @@ def process_document(image, output_format="markdown"):
         # Prepare inputs
         prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
         inputs = processor(text=prompt, images=[image], return_tensors="pt")
-        inputs = inputs.to(DEVICE)
-        # Generate outputs
         with torch.no_grad():
-            generated_ids = model.generate(**inputs, max_new_tokens=8192)
         prompt_length = inputs.input_ids.shape[1]
         trimmed_generated_ids = generated_ids[:, prompt_length:]
         doctags = processor.batch_decode(
@@ -64,6 +167,8 @@ def process_document(image, output_format="markdown"):
             skip_special_tokens=False,
         )[0].lstrip()
         # Create Docling document
         doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
         doc = DoclingDocument.load_from_doctags(doctags_doc, document_name="Document")
@@ -87,28 +192,55 @@ def process_document(image, output_format="markdown"):
             return markdown_content, html_file, doctags
     except Exception as e:
-        return f"Error processing document: {str(e)}", None, None
 def clear_results():
     """Clear all outputs"""
-    return None, None, None
 with gr.Blocks(
     title="Docling Document Converter",
     theme=gr.themes.Soft(),
     css="""
-    .header { text-align: center; margin-bottom: 2rem; }
-    .format-selector { margin-top: 1rem; }
     """
 ) as demo:
     gr.Markdown(
         """
-        # Docling Document Converter
-        Upload an image of a document page and convert it to structured markdown or HTML using the Granite-Docling model.
-        <div class="header">
-        <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a>
-        </div>
         """,
         elem_classes="header"
     )
@@ -118,50 +250,67 @@ with gr.Blocks(
             image_input = gr.Image(
                 label="Upload Document Image",
                 type="pil",
-                height=600,
-                width=400,
-                sources=["upload", "webcam", "clipboard"],
-                show_share_button=True
             )
             format_choice = gr.Radio(
                 choices=["markdown", "html", "both"],
                 value="markdown",
                 label="Output Format",
                 elem_classes="format-selector"
             )
-            process_btn = gr.Button(
-                "Convert to Docling",
-                variant="primary",
-                size="lg"
-            )
-            clear_btn = gr.Button(
-                "Clear Results",
-                variant="secondary"
             )
         with gr.Column(scale=2):
-            with gr.Tab("Markdown Output"):
                 markdown_output = gr.Markdown(
                     label="Structured Markdown",
                     show_copy_button=True,
-                    lines=20
                 )
-            with gr.Tab("HTML Output"):
                 html_output = gr.File(
                     label="Download HTML File",
                     file_types=[".html"],
-                    show_download_button=True
                 )
-            with gr.Tab("Raw DocTags"):
                 doctags_output = gr.Textbox(
                     label="Raw DocTags Output",
                     lines=15,
-                    show_copy_button=True
                 )
     # Event handlers
@@ -177,14 +326,28 @@ with gr.Blocks(
         outputs=[markdown_output, html_output, doctags_output]
     )
-    # Example button
-    gr.Examples(
-        examples=[
-            ["https://huggingface.co/ibm-granite/granite-docling-258M/resolve/main/assets/new_arxiv.png"],
-        ],
-        inputs=[image_input],
-        label="Try this example"
     )
 if __name__ == "__main__":
     demo.launch()

 from docling_core.types.doc import DoclingDocument
 from docling_core.types.doc.document import DocTagsDocument
 from transformers import AutoProcessor, AutoModelForVision2Seq
 from pathlib import Path
 import tempfile
+import os
 import subprocess
 import sys
+# Try to install flash-attn at startup if not available
+try:
+    import flash_attn
+    print("Flash attention already installed")
+except ImportError:
+    print("Flash attention not found, attempting to install...")
     try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"],
+            check=True,
+            capture_output=True,
+            text=True
+        )
+        print("Flash attention installed successfully")
+    except subprocess.CalledProcessError as e:
+        print(f"Could not install flash attention: {e}")
+        print("Continuing without flash attention...")
+# Global variables for model and processor
+model = None
+processor = None
+model_loaded = False
+def load_model():
+    """Load the model and processor"""
+    global model, processor, model_loaded
+    if not model_loaded:
+        try:
+            # Load processor
+            processor = AutoProcessor.from_pretrained("ibm-granite/granite-docling-258M")
+            # Determine device
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            # Check if flash attention is available
+            attn_implementation = "eager"  # default
+            if device == "cuda":
+                try:
+                    import flash_attn
+                    attn_implementation = "flash_attention_2"
+                    print("Using Flash Attention 2")
+                except ImportError:
+                    print("Flash attention not available, using eager attention")
+                    attn_implementation = "eager"
+            # Load model with appropriate settings
+            print(f"Loading model on {device} with {attn_implementation}...")
+            if device == "cuda":
+                # For GPU, use bfloat16 for better performance
+                model = AutoModelForVision2Seq.from_pretrained(
+                    "ibm-granite/granite-docling-258M",
+                    torch_dtype=torch.bfloat16,
+                    attn_implementation=attn_implementation,
+                    device_map="auto",
+                    trust_remote_code=True
+                )
+            else:
+                # For CPU, use float32
+                model = AutoModelForVision2Seq.from_pretrained(
+                    "ibm-granite/granite-docling-258M",
+                    torch_dtype=torch.float32,
+                    attn_implementation="eager",
+                    trust_remote_code=True
+                )
+                model = model.to(device)
+            model_loaded = True
+            print(f"Model loaded successfully on {device}")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            # Fallback loading without special attention
+            try:
+                processor = AutoProcessor.from_pretrained("ibm-granite/granite-docling-258M")
+                model = AutoModelForVision2Seq.from_pretrained(
+                    "ibm-granite/granite-docling-258M",
+                    torch_dtype=torch.float32,
+                    trust_remote_code=True
+                )
+                device = "cpu"
+                model = model.to(device)
+                model_loaded = True
+                print("Model loaded on CPU as fallback")
+            except Exception as fallback_error:
+                print(f"Fallback loading also failed: {fallback_error}")
+                raise
+# Load model at startup
+load_model()
+@spaces.GPU(duration=120)
+def process_document_gpu(image, output_format="markdown"):
+    """Process uploaded image to generate Docling document - GPU version"""
+    global model, processor
     try:
+        # Ensure model is loaded
+        if not model_loaded:
+            load_model()
+        # Move model to GPU if available (for ZeroGPU)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # For ZeroGPU, the model might need to be moved to GPU
+        if device == "cuda":
+            # Only move if not already on cuda
+            if hasattr(model, 'device') and model.device.type != 'cuda':
+                model = model.to(device)
+        print(f"Processing on {device}")
         # Prepare messages
         messages = [
             {
         # Prepare inputs
         prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
         inputs = processor(text=prompt, images=[image], return_tensors="pt")
+        # Move inputs to the same device as the model
+        inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
+        # Generate outputs with memory-efficient settings
         with torch.no_grad():
+            if device == "cuda":
+                with torch.cuda.amp.autocast(dtype=torch.bfloat16):
+                    generated_ids = model.generate(
+                        **inputs,
+                        max_new_tokens=8192,
+                        do_sample=False,
+                        temperature=None,
+                        top_p=None
+                    )
+            else:
+                generated_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=8192,
+                    do_sample=False
+                )
+        # Process the output
         prompt_length = inputs.input_ids.shape[1]
         trimmed_generated_ids = generated_ids[:, prompt_length:]
         doctags = processor.batch_decode(
             skip_special_tokens=False,
         )[0].lstrip()
+        print(f"Generated {len(doctags)} characters of DocTags")
         # Create Docling document
         doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
         doc = DoclingDocument.load_from_doctags(doctags_doc, document_name="Document")
             return markdown_content, html_file, doctags
     except Exception as e:
+        error_msg = f"Error processing document: {str(e)}"
+        print(error_msg)
+        import traceback
+        print(traceback.format_exc())
+        return error_msg, None, None
+def process_document(image, output_format="markdown"):
+    """Wrapper function to handle processing"""
+    if image is None:
+        return "Please upload an image first.", None, None
+    # Call the GPU-decorated function
+    return process_document_gpu(image, output_format)
 def clear_results():
     """Clear all outputs"""
+    return "", None, ""
+# Create Gradio interface
 with gr.Blocks(
     title="Docling Document Converter",
     theme=gr.themes.Soft(),
     css="""
+    .header {
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .format-selector {
+        margin-top: 1rem;
+    }
+    .markdown-output {
+        max-height: 600px;
+        overflow-y: auto;
+        padding: 10px;
+        border: 1px solid #ddd;
+        border-radius: 5px;
+        background-color: #f9f9f9;
+    }
     """
 ) as demo:
     gr.Markdown(
         """
+        # 📄 Docling Document Converter
+        Upload an image of a document page and convert it to structured markdown or HTML using the IBM Granite-Docling model.
+        This space uses ZeroGPU for efficient processing. The model converts document images into structured formats while preserving layout and formatting.
+        ---
         """,
         elem_classes="header"
     )
             image_input = gr.Image(
                 label="Upload Document Image",
                 type="pil",
+                height=400,
+                sources=["upload", "clipboard"],
+                show_label=True
             )
             format_choice = gr.Radio(
                 choices=["markdown", "html", "both"],
                 value="markdown",
                 label="Output Format",
+                info="Choose the output format for the converted document",
                 elem_classes="format-selector"
             )
+            with gr.Row():
+                process_btn = gr.Button(
+                    "🚀 Convert Document",
+                    variant="primary",
+                    size="lg",
+                    scale=2
+                )
+                clear_btn = gr.Button(
+                    "🗑️ Clear",
+                    variant="secondary",
+                    size="lg",
+                    scale=1
+                )
+            # Status indicator
+            gr.Markdown(
+                """
+                ### ℹ️ Tips:
+                - Upload clear, high-resolution images for best results
+                - The model works best with text documents, tables, and structured content
+                - Processing may take a few moments depending on document complexity
+                """
             )
         with gr.Column(scale=2):
+            with gr.Tab("📝 Markdown Output"):
                 markdown_output = gr.Markdown(
+                    value="",
                     label="Structured Markdown",
                     show_copy_button=True,
+                    elem_classes="markdown-output"
                 )
+            with gr.Tab("🌐 HTML Output"):
                 html_output = gr.File(
                     label="Download HTML File",
                     file_types=[".html"],
+                    visible=True
                 )
+            with gr.Tab("🏷️ Raw DocTags"):
                 doctags_output = gr.Textbox(
                     label="Raw DocTags Output",
                     lines=15,
+                    max_lines=30,
+                    show_copy_button=True,
+                    placeholder="Raw DocTags will appear here after processing..."
                 )
     # Event handlers
         outputs=[markdown_output, html_output, doctags_output]
     )
+    # Examples section
+    with gr.Accordion("📚 Example Documents", open=False):
+        gr.Examples(
+            examples=[
+                ["https://huggingface.co/ibm-granite/granite-docling-258M/resolve/main/assets/new_arxiv.png"],
+            ],
+            inputs=[image_input],
+            label="Click to load an example document",
+            cache_examples=False
+        )
+    # Footer
+    gr.Markdown(
+        """
+        ---
+        <div style="text-align: center; margin-top: 2rem;">
+            <p>Powered by <a href="https://huggingface.co/ibm-granite/granite-docling-258M" target="_blank">IBM Granite-Docling-258M</a></p>
+            <p>Built with ❤️ using Gradio and Hugging Face Spaces</p>
+        </div>
+        """
     )
+# Launch the app
 if __name__ == "__main__":
     demo.launch()