Spaces:

CohereLabs
/

command-a-reasoning

Running on CPU Upgrade

App Files Files Community

alexrs-cohere commited on Aug 11

Commit

fb7858e

1 Parent(s): 7045760

Command A Reasoning

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +161 -120
pyproject.toml +1 -1
requirements.txt +10 -10
uv.lock +1 -1

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Command A Vision
 emoji: ⚡
 colorFrom: red
 colorTo: purple

 ---
+title: Command A Reasoning
 emoji: ⚡
 colorFrom: red
 colorTo: purple

app.py CHANGED Viewed

@@ -1,175 +1,216 @@
 import os
-import base64
 from collections.abc import Iterator
 import gradio as gr
 from cohere import ClientV2
-model_id = "command-a-vision-07-2025"
 # Initialize Cohere client
 api_key = os.getenv("COHERE_API_KEY")
 if not api_key:
     raise ValueError("COHERE_API_KEY environment variable is required")
-client = ClientV2(api_key=api_key, client_name="hf-command-a-vision-07-2025")
-IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
-def count_files_in_new_message(paths: list[str]) -> int:
-    image_count = 0
-    for path in paths:
-        if path.endswith(IMAGE_FILE_TYPES):
-            image_count += 1
-    return image_count
-def validate_media_constraints(message: dict) -> bool:
-    image_count = count_files_in_new_message(message["files"])
-    if image_count > 10:
-        gr.Warning("Maximum 10 images are supported.")
-        return False
-    return True
-def encode_image_to_base64(image_path: str) -> str:
-    """Encode an image file to base64 data URL format."""
-    with open(image_path, "rb") as image_file:
-        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
-        # Determine file extension for MIME type
-        if image_path.lower().endswith('.png'):
-            mime_type = "image/png"
-        elif image_path.lower().endswith('.jpg') or image_path.lower().endswith('.jpeg'):
-            mime_type = "image/jpeg"
-        elif image_path.lower().endswith('.webp'):
-            mime_type = "image/webp"
-        else:
-            mime_type = "image/jpeg"  # default
-        return f"data:{mime_type};base64,{encoded_string}"
-def generate(message: dict, history: list[dict], max_new_tokens: int = 512) -> Iterator[str]:
-    if not validate_media_constraints(message):
-        yield ""
-        return
-    # Build messages for Cohere API
-    messages = []
-    # Add conversation history
-    for item in history:
-        if item["role"] == "assistant":
-            messages.append({"role": "assistant", "content": item["content"]})
         else:
-            content = item["content"]
-            if isinstance(content, str):
-                messages.append({"role": "user", "content": [{"type": "text", "text": content}]})
-            else:
-                filepath = content[0]
-                # For file-only messages, don't include empty text
-                messages.append({
-                    "role": "user",
-                    "content": [
-                        {"type": "image_url", "image_url": {"url": encode_image_to_base64(filepath)}}
-                    ]
-                })
     # Add current message
-    current_content = []
-    if message["text"]:
-        current_content.append({"type": "text", "text": message["text"]})
-    for file_path in message["files"]:
-        current_content.append({
-            "type": "image_url",
-            "image_url": {"url": encode_image_to_base64(file_path)}
-        })
-    # Only add the message if there's content
-    if current_content:
-        messages.append({"role": "user", "content": current_content})
     try:
         # Call Cohere API using the correct event type and delta access
         response = client.chat_stream(
             model=model_id,
             messages=messages,
             temperature=0.3,
-            max_tokens=max_new_tokens,
         )
-        output = ""
         for event in response:
             if getattr(event, "type", None) == "content-delta":
-                # event.delta.message.content.text is the streamed text
-                text = getattr(event.delta.message.content, "text", "")
-                output += text
-                yield output
     except Exception as e:
         gr.Warning(f"Error calling Cohere API: {str(e)}")
-        yield ""
 examples = [
     [
-        {
-            "text": "Write a COBOL function to reverse a string",
-            "files": [],
-        }
     ],
     [
-        {
-            "text": "Como sair de um helicóptero que caiu na água?",
-            "files": [],
-        }
     ],
     [
-        {
-            "text": "What is the total amount of the invoice with and without tax?",
-            "files": ["assets/invoice-1.jpg"],
-        }
     ],
     [
-        {
-            "text": "¿Contra qué modelo gana más Aya Vision 8B?",
-            "files": ["assets/aya-vision-win-rates.png"],
-        }
     ],
     [
-        {
-            "text": "Erläutern Sie die Ergebnisse in der Tabelle",
-            "files": ["assets/command-a-longbech-v2.png"],
-        }
     ],
     [
-        {
-            "text": "Explique la théorie de la relativité en français",
-            "files": [],
-        }
     ],
 ]
 demo = gr.ChatInterface(
     fn=generate,
     type="messages",
-    textbox=gr.MultimodalTextbox(
-        file_types=list(IMAGE_FILE_TYPES),
-        file_count="multiple",
-        autofocus=True,
-    ),
-    multimodal=True,
-    additional_inputs=[
-        gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
-    ],
-    stop_btn=False,
-    title="Command A Vision",
     examples=examples,
-    run_examples_on_click=False,
-    cache_examples=False,
     css_paths="style.css",
     delete_cache=(1800, 1800),
 )
 if __name__ == "__main__":

 import os
 from collections.abc import Iterator
 import gradio as gr
+from gradio import ChatMessage
 from cohere import ClientV2
+from cohere.core import RequestOptions
+model_id = "command-a-reasoning-08-2025"
 # Initialize Cohere client
 api_key = os.getenv("COHERE_API_KEY")
 if not api_key:
     raise ValueError("COHERE_API_KEY environment variable is required")
+client = ClientV2(api_key=api_key, client_name="hf-command-a-reasoning-08-2025")
+def format_chat_history(messages: list) -> list:
+    """
+    Formats the chat history into a structure Cohere can understand
+    """
+    formatted_history = []
+    for message in messages:
+        # Handle both ChatMessage objects and regular dictionaries
+        if hasattr(message, "metadata") and message.metadata:
+            # Skip thinking messages (messages with metadata)
+            continue
+        # Extract role and content safely
+        if hasattr(message, "role"):
+            role = message.role
+            content = message.content
+        elif isinstance(message, dict):
+            role = message.get("role")
+            content = message.get("content")
         else:
+            continue
+        if role and content:
+            # Ensure content is a string to prevent validation issues
+            if content is None:
+                content = ""
+            elif not isinstance(content, str):
+                content = str(content)
+            formatted_history.append({
+                "role": role,
+                "content": content
+            })
+    return formatted_history
+def generate(message: str, history: list, thinking_budget: int) -> Iterator[list]:
+    # Create a clean working copy of the history (excluding thinking messages)
+    working_history = []
+    for msg in history:
+        # Skip thinking messages (messages with metadata)
+        if hasattr(msg, "metadata") and msg.metadata:
+            continue
+        working_history.append(msg)
+    # Format chat history for Cohere API (exclude thinking messages)
+    messages = format_chat_history(working_history)
     # Add current message
+    if message:
+        messages.append({"role": "user", "content": message})
     try:
+        # Set thinking type based on thinking_budget
+        if thinking_budget == 0:
+            thinking_param = {"type": "disabled"}
+        else:
+            thinking_param = {"type": "enabled", "token_budget": thinking_budget}
         # Call Cohere API using the correct event type and delta access
         response = client.chat_stream(
             model=model_id,
             messages=messages,
             temperature=0.3,
+            request_options=RequestOptions(additional_body_parameters={"thinking": thinking_param})
         )
+        # Initialize buffers
+        thought_buffer = ""
+        response_buffer = ""
+        thinking_complete = False
+        # Start with just the new assistant messages for this interaction
+        current_interaction = [
+            ChatMessage(
+                role="assistant",
+                content="",
+                metadata={"title": "🧠 Thinking..."}
+            )
+        ]
         for event in response:
             if getattr(event, "type", None) == "content-delta":
+                delta = event.delta
+                if hasattr(delta, 'message'):
+                    message = delta.message
+                    if hasattr(message, 'content'):
+                        content = message.content
+                        # Check for thinking tokens first
+                        thinking_text = getattr(content, 'thinking', None)
+                        if thinking_text:
+                            thought_buffer += thinking_text
+                            # Update thinking message with metadata
+                            current_interaction[0] = ChatMessage(
+                                role="assistant",
+                                content=thought_buffer,
+                                metadata={"title": "🧠 Thinking..."}
+                            )
+                            # Yield only the current interaction, but ensure proper formatting
+                            yield [
+                                {
+                                    "role": msg.role,
+                                    "content": msg.content,
+                                    "metadata": getattr(msg, "metadata", None)
+                                } for msg in current_interaction
+                            ]
+                            continue
+                        # Check for regular text tokens
+                        text = getattr(content, 'text', None)
+                        if text:
+                            # Ensure text is a string
+                            if text is None:
+                                text = ""
+                            elif not isinstance(text, str):
+                                text = str(text)
+                            # If we haven't completed thinking yet, this might be the start of the response
+                            if not thinking_complete and thought_buffer:
+                                thinking_complete = True
+                                # Add response message below thinking
+                                current_interaction.append(
+                                    ChatMessage(
+                                        role="assistant",
+                                        content=""
+                                    )
+                                )
+                            if thinking_complete:
+                                # if thinking is complete, we collapse the thinking message
+                                current_interaction[0] = ChatMessage(
+                                    role="assistant",
+                                    content=thought_buffer,
+                                    metadata={"title": "🧠 Thoughts", "status": "done"}
+                                )
+                            response_buffer += text
+                            # Update response message
+                            current_interaction[-1] = ChatMessage(
+                                role="assistant",
+                                content=response_buffer
+                            )
+                            # Yield only the current interaction, but ensure proper formatting
+                            yield [
+                                {
+                                    "role": msg.role,
+                                    "content": msg.content,
+                                    "metadata": getattr(msg, "metadata", None)
+                                } for msg in current_interaction
+                            ]
+        # Final cleanup: ensure the final response is clean
+        if thought_buffer and response_buffer:
+            # Keep both thinking and response messages in the final history
+            # The thinking message will be preserved with its metadata
+            pass
     except Exception as e:
         gr.Warning(f"Error calling Cohere API: {str(e)}")
+        yield []
 examples = [
     [
+        "Write a COBOL function to reverse a string"
     ],
     [
+        "Como sair de um helicóptero que caiu na água?"
     ],
     [
+        "What is the best way to learn machine learning?"
     ],
     [
+        "Explain quantum computing in simple terms"
     ],
     [
+        "How do I implement a binary search tree?"
     ],
     [
+        "Explique la théorie de la relativité en français"
     ],
 ]
 demo = gr.ChatInterface(
     fn=generate,
     type="messages",
+    autofocus=True,
+    title="Command A Reasoning",
     examples=examples,
+    run_examples_on_click=True,
     css_paths="style.css",
     delete_cache=(1800, 1800),
+    cache_examples=False,
+    additional_inputs=[
+        gr.Slider(label="Thinking Budget", minimum=0, maximum=2000, step=10, value=500),
+    ],
 )
 if __name__ == "__main__":

pyproject.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 [project]
-name = "command-a-vision-07-2025"
 version = "0.1.0"
 description = ""
 readme = "README.md"

 [project]
+name = "command-a-reasoning-07-2025"
 version = "0.1.0"
 description = ""
 readme = "README.md"

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml -o requirements.txt
 accelerate==1.8.1
-    # via command-a-vision-07-2025 (pyproject.toml)
 aiofiles==24.1.0
     # via gradio
 annotated-types==0.7.0
@@ -14,7 +14,7 @@ anyio==4.9.0
 audioread==3.0.1
     # via librosa
 av==14.4.0
-    # via command-a-vision-07-2025 (pyproject.toml)
 certifi==2025.6.15
     # via
     #   httpcore
@@ -49,7 +49,7 @@ fsspec==2025.5.1
     #   torch
 gradio==5.34.2
     # via
-    #   command-a-vision-07-2025 (pyproject.toml)
     #   spaces
 gradio-client==1.10.3
     # via gradio
@@ -60,7 +60,7 @@ h11==0.16.0
     #   httpcore
     #   uvicorn
 hf-transfer==0.1.9
-    # via command-a-vision-07-2025 (pyproject.toml)
 hf-xet==1.1.5
     # via huggingface-hub
 httpcore==1.0.9
@@ -95,7 +95,7 @@ joblib==1.5.1
 lazy-loader==0.4
     # via librosa
 librosa==0.11.0
-    # via command-a-vision-07-2025 (pyproject.toml)
 llvmlite==0.44.0
     # via numba
 markdown-it-py==3.0.0
@@ -249,7 +249,7 @@ soundfile==0.13.1
 soxr==0.5.0.post1
     # via librosa
 spaces==0.37.1
-    # via command-a-vision-07-2025 (pyproject.toml)
 starlette==0.46.2
     # via
     #   fastapi
@@ -259,27 +259,27 @@ sympy==1.13.1
 threadpoolctl==3.6.0
     # via scikit-learn
 timm==1.0.16
-    # via command-a-vision-07-2025 (pyproject.toml)
 tokenizers==0.21.2
     # via transformers
 tomlkit==0.13.3
     # via gradio
 torch==2.5.1
     # via
-    #   command-a-vision-07-2025 (pyproject.toml)
     #   accelerate
     #   timm
     #   torchvision
 torchvision==0.20.1
     # via
-    #   command-a-vision-07-2025 (pyproject.toml)
     #   timm
 tqdm==4.67.1
     # via
     #   huggingface-hub
     #   transformers
 transformers==4.53.0
-    # via command-a-vision-07-2025 (pyproject.toml)
 triton==3.1.0
     # via torch
 typer==0.16.0

 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml -o requirements.txt
 accelerate==1.8.1
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 aiofiles==24.1.0
     # via gradio
 annotated-types==0.7.0
 audioread==3.0.1
     # via librosa
 av==14.4.0
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 certifi==2025.6.15
     # via
     #   httpcore
     #   torch
 gradio==5.34.2
     # via
+    #   command-a-reasoning-08-2025 (pyproject.toml)
     #   spaces
 gradio-client==1.10.3
     # via gradio
     #   httpcore
     #   uvicorn
 hf-transfer==0.1.9
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 hf-xet==1.1.5
     # via huggingface-hub
 httpcore==1.0.9
 lazy-loader==0.4
     # via librosa
 librosa==0.11.0
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 llvmlite==0.44.0
     # via numba
 markdown-it-py==3.0.0
 soxr==0.5.0.post1
     # via librosa
 spaces==0.37.1
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 starlette==0.46.2
     # via
     #   fastapi
 threadpoolctl==3.6.0
     # via scikit-learn
 timm==1.0.16
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 tokenizers==0.21.2
     # via transformers
 tomlkit==0.13.3
     # via gradio
 torch==2.5.1
     # via
+    #   command-a-reasoning-08-2025 (pyproject.toml)
     #   accelerate
     #   timm
     #   torchvision
 torchvision==0.20.1
     # via
+    #   command-a-reasoning-08-2025 (pyproject.toml)
     #   timm
 tqdm==4.67.1
     # via
     #   huggingface-hub
     #   transformers
 transformers==4.53.0
+    # via command-a-reasoning-08-2025 (pyproject.toml)
 triton==3.1.0
     # via torch
 typer==0.16.0

uv.lock CHANGED Viewed

@@ -321,7 +321,7 @@ wheels = [
 ]
 [[package]]
-name = "command-a-vision-07-2025"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [

 ]
 [[package]]
+name = "command-a-reasoning-07-2025"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [