Spaces:
Sleeping
Sleeping
Commit
·
966ffcd
1
Parent(s):
2eac01a
Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.
Browse files- src/agents/__pycache__/field_mapper_agent.cpython-312.pyc +0 -0
- src/agents/__pycache__/pdf_agent.cpython-312.pyc +0 -0
- src/agents/__pycache__/table_agent.cpython-312.pyc +0 -0
- src/agents/field_mapper_agent.py +57 -3
- src/agents/pdf_agent.py +6 -0
- src/app.py +25 -2
- src/orchestrator/__pycache__/executor.cpython-312.pyc +0 -0
- src/orchestrator/__pycache__/planner.cpython-312.pyc +0 -0
- src/orchestrator/executor.py +12 -1
- src/orchestrator/planner.py +9 -2
- src/services/__pycache__/llm_client.cpython-312.pyc +0 -0
- src/services/cost_tracker.py +221 -0
- src/services/llm_client.py +29 -2
src/agents/__pycache__/field_mapper_agent.cpython-312.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc and b/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc differ
|
|
|
src/agents/__pycache__/pdf_agent.cpython-312.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/pdf_agent.cpython-312.pyc and b/src/agents/__pycache__/pdf_agent.cpython-312.pyc differ
|
|
|
src/agents/__pycache__/table_agent.cpython-312.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/table_agent.cpython-312.pyc and b/src/agents/__pycache__/table_agent.cpython-312.pyc differ
|
|
|
src/agents/field_mapper_agent.py
CHANGED
|
@@ -35,7 +35,25 @@ class FieldMapperAgent(BaseAgent):
|
|
| 35 |
try:
|
| 36 |
self.logger.info("Inferring document context...")
|
| 37 |
self.logger.debug(f"Using text preview: {text[:500]}...")
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
self.logger.info(f"Inferred context: {context}")
|
| 40 |
return context
|
| 41 |
except Exception as e:
|
|
@@ -142,7 +160,25 @@ class FieldMapperAgent(BaseAgent):
|
|
| 142 |
try:
|
| 143 |
self.logger.info(f"Calling LLM to extract value for field '{field}'")
|
| 144 |
self.logger.debug(f"Using prompt: {prompt}")
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
self.logger.debug(f"Raw LLM response: {value}")
|
| 147 |
|
| 148 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
|
@@ -191,7 +227,25 @@ class FieldMapperAgent(BaseAgent):
|
|
| 191 |
|
| 192 |
try:
|
| 193 |
self.logger.info(f"Calling LLM to extract value for field '{field}' from page")
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
self.logger.debug(f"Raw LLM response: {value}")
|
| 196 |
|
| 197 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
|
|
|
| 35 |
try:
|
| 36 |
self.logger.info("Inferring document context...")
|
| 37 |
self.logger.debug(f"Using text preview: {text[:500]}...")
|
| 38 |
+
|
| 39 |
+
# Get cost tracker from context
|
| 40 |
+
cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
|
| 41 |
+
if cost_tracker:
|
| 42 |
+
self.logger.info("Cost tracker found in context")
|
| 43 |
+
else:
|
| 44 |
+
self.logger.warning("No cost tracker found in context")
|
| 45 |
+
|
| 46 |
+
context = self.llm.responses(
|
| 47 |
+
prompt, temperature=0.0,
|
| 48 |
+
ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
|
| 49 |
+
description="Document Context Inference"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Log cost tracking results if available
|
| 53 |
+
if cost_tracker:
|
| 54 |
+
self.logger.info(f"Context inference costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
|
| 55 |
+
self.logger.info(f"Context inference cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
|
| 56 |
+
|
| 57 |
self.logger.info(f"Inferred context: {context}")
|
| 58 |
return context
|
| 59 |
except Exception as e:
|
|
|
|
| 160 |
try:
|
| 161 |
self.logger.info(f"Calling LLM to extract value for field '{field}'")
|
| 162 |
self.logger.debug(f"Using prompt: {prompt}")
|
| 163 |
+
|
| 164 |
+
# Get cost tracker from context
|
| 165 |
+
cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
|
| 166 |
+
if cost_tracker:
|
| 167 |
+
self.logger.info("Cost tracker found in context")
|
| 168 |
+
else:
|
| 169 |
+
self.logger.warning("No cost tracker found in context")
|
| 170 |
+
|
| 171 |
+
value = self.llm.responses(
|
| 172 |
+
prompt, temperature=0.0,
|
| 173 |
+
ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
|
| 174 |
+
description=f"Field Extraction - {field} (Search)"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# Log cost tracking results if available
|
| 178 |
+
if cost_tracker:
|
| 179 |
+
self.logger.info(f"Field extraction costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
|
| 180 |
+
self.logger.info(f"Field extraction cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
|
| 181 |
+
|
| 182 |
self.logger.debug(f"Raw LLM response: {value}")
|
| 183 |
|
| 184 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
|
|
|
| 227 |
|
| 228 |
try:
|
| 229 |
self.logger.info(f"Calling LLM to extract value for field '{field}' from page")
|
| 230 |
+
|
| 231 |
+
# Get cost tracker from context
|
| 232 |
+
cost_tracker = self.ctx.get("cost_tracker") if hasattr(self, 'ctx') else None
|
| 233 |
+
if cost_tracker:
|
| 234 |
+
self.logger.info("Cost tracker found in context")
|
| 235 |
+
else:
|
| 236 |
+
self.logger.warning("No cost tracker found in context")
|
| 237 |
+
|
| 238 |
+
value = self.llm.responses(
|
| 239 |
+
prompt, temperature=0.0,
|
| 240 |
+
ctx={"cost_tracker": cost_tracker} if cost_tracker else None,
|
| 241 |
+
description=f"Field Extraction - {field} (Page)"
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
# Log cost tracking results if available
|
| 245 |
+
if cost_tracker:
|
| 246 |
+
self.logger.info(f"Page extraction costs - Input tokens: {cost_tracker.llm_input_tokens}, Output tokens: {cost_tracker.llm_output_tokens}")
|
| 247 |
+
self.logger.info(f"Page extraction cost: ${cost_tracker.calculate_current_file_costs()['openai']['total_cost']:.4f}")
|
| 248 |
+
|
| 249 |
self.logger.debug(f"Raw LLM response: {value}")
|
| 250 |
|
| 251 |
if value and value.lower() not in ["none", "null", "n/a"]:
|
src/agents/pdf_agent.py
CHANGED
|
@@ -25,4 +25,10 @@ class PDFAgent(BaseAgent):
|
|
| 25 |
pdf_bytes = pdf_file.read()
|
| 26 |
text = self._extract_text(pdf_bytes)
|
| 27 |
ctx["text"] = text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return text
|
|
|
|
| 25 |
pdf_bytes = pdf_file.read()
|
| 26 |
text = self._extract_text(pdf_bytes)
|
| 27 |
ctx["text"] = text
|
| 28 |
+
|
| 29 |
+
# After extracting pages
|
| 30 |
+
num_pages = len(fitz.open(stream=pdf_bytes, filetype="pdf")) # type: ignore[arg-type]
|
| 31 |
+
if "cost_tracker" in ctx:
|
| 32 |
+
ctx["cost_tracker"].add_di_pages(num_pages)
|
| 33 |
+
|
| 34 |
return text
|
src/app.py
CHANGED
|
@@ -13,6 +13,7 @@ from datetime import datetime
|
|
| 13 |
import io
|
| 14 |
import sys
|
| 15 |
from io import StringIO
|
|
|
|
| 16 |
|
| 17 |
# Create a custom stream handler to capture logs
|
| 18 |
class LogCaptureHandler(logging.StreamHandler):
|
|
@@ -317,7 +318,10 @@ else: # page == "Execution"
|
|
| 317 |
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") # type: ignore[arg-type]
|
| 318 |
preview = "\n".join(page.get_text() for page in doc[:10])[:20000] # first 2 pages, 2k chars
|
| 319 |
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
| 321 |
plan = planner.build_plan(
|
| 322 |
pdf_meta={"filename": pdf_file.name},
|
| 323 |
doc_preview=preview,
|
|
@@ -329,9 +333,28 @@ else: # page == "Execution"
|
|
| 329 |
st.markdown("---")
|
| 330 |
|
| 331 |
with st.spinner("Executing …"):
|
| 332 |
-
executor = Executor(settings=settings)
|
| 333 |
results, logs = executor.run(plan, pdf_file)
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
# Add detailed logging about what executor returned
|
| 336 |
logger.info(f"Executor returned results of type: {type(results)}")
|
| 337 |
logger.info(f"Results content: {results}")
|
|
|
|
| 13 |
import io
|
| 14 |
import sys
|
| 15 |
from io import StringIO
|
| 16 |
+
from services.cost_tracker import CostTracker
|
| 17 |
|
| 18 |
# Create a custom stream handler to capture logs
|
| 19 |
class LogCaptureHandler(logging.StreamHandler):
|
|
|
|
| 318 |
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") # type: ignore[arg-type]
|
| 319 |
preview = "\n".join(page.get_text() for page in doc[:10])[:20000] # first 2 pages, 2k chars
|
| 320 |
|
| 321 |
+
# Create a cost tracker for this run
|
| 322 |
+
cost_tracker = CostTracker()
|
| 323 |
+
|
| 324 |
+
planner = Planner(cost_tracker=cost_tracker)
|
| 325 |
plan = planner.build_plan(
|
| 326 |
pdf_meta={"filename": pdf_file.name},
|
| 327 |
doc_preview=preview,
|
|
|
|
| 333 |
st.markdown("---")
|
| 334 |
|
| 335 |
with st.spinner("Executing …"):
|
| 336 |
+
executor = Executor(settings=settings, cost_tracker=cost_tracker)
|
| 337 |
results, logs = executor.run(plan, pdf_file)
|
| 338 |
|
| 339 |
+
# Get detailed costs
|
| 340 |
+
costs = executor.cost_tracker.calculate_current_file_costs()
|
| 341 |
+
model_cost = costs["openai"]["total_cost"]
|
| 342 |
+
di_cost = costs["document_intelligence"]["total_cost"]
|
| 343 |
+
|
| 344 |
+
# Display detailed costs table
|
| 345 |
+
st.subheader("Detailed Costs")
|
| 346 |
+
costs_df = executor.cost_tracker.get_detailed_costs_table()
|
| 347 |
+
st.dataframe(costs_df, use_container_width=True)
|
| 348 |
+
|
| 349 |
+
st.info(
|
| 350 |
+
f"LLM input tokens: {executor.cost_tracker.llm_input_tokens}, "
|
| 351 |
+
f"LLM output tokens: {executor.cost_tracker.llm_output_tokens}, "
|
| 352 |
+
f"DI pages: {executor.cost_tracker.di_pages}, "
|
| 353 |
+
f"Model cost: ${model_cost:.4f}, "
|
| 354 |
+
f"DI cost: ${di_cost:.4f}, "
|
| 355 |
+
f"Total cost: ${model_cost + di_cost:.4f}"
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
# Add detailed logging about what executor returned
|
| 359 |
logger.info(f"Executor returned results of type: {type(results)}")
|
| 360 |
logger.info(f"Results content: {results}")
|
src/orchestrator/__pycache__/executor.cpython-312.pyc
CHANGED
|
Binary files a/src/orchestrator/__pycache__/executor.cpython-312.pyc and b/src/orchestrator/__pycache__/executor.cpython-312.pyc differ
|
|
|
src/orchestrator/__pycache__/planner.cpython-312.pyc
CHANGED
|
Binary files a/src/orchestrator/__pycache__/planner.cpython-312.pyc and b/src/orchestrator/__pycache__/planner.cpython-312.pyc differ
|
|
|
src/orchestrator/executor.py
CHANGED
|
@@ -16,9 +16,12 @@ from agents.semantic_reasoner import SemanticReasonerAgent
|
|
| 16 |
from agents.confidence_scorer import ConfidenceScorer
|
| 17 |
from agents.query_generator import QueryGenerator
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
class Executor:
|
| 21 |
-
def __init__(self, settings):
|
| 22 |
# map name → instance
|
| 23 |
self.tools = {
|
| 24 |
"PDFAgent": PDFAgent(),
|
|
@@ -32,6 +35,7 @@ class Executor:
|
|
| 32 |
|
| 33 |
self.logs: List[Dict[str, Any]] = []
|
| 34 |
self.logger = logging.getLogger(__name__)
|
|
|
|
| 35 |
|
| 36 |
# ---------------------------------------------------------
|
| 37 |
def run(self, plan: Dict[str, Any], pdf_file) -> tuple[pd.DataFrame, List[Dict[str, Any]]]: # noqa: D401
|
|
@@ -47,6 +51,7 @@ class Executor:
|
|
| 47 |
"results": [],
|
| 48 |
"conf": 1.0,
|
| 49 |
"pdf_meta": plan.get("pdf_meta", {}), # Include the plan's metadata
|
|
|
|
| 50 |
}
|
| 51 |
|
| 52 |
try:
|
|
@@ -112,6 +117,12 @@ class Executor:
|
|
| 112 |
else:
|
| 113 |
df = pd.DataFrame()
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
return df, self.logs
|
| 116 |
|
| 117 |
# ---------------------------------------------------------
|
|
|
|
| 16 |
from agents.confidence_scorer import ConfidenceScorer
|
| 17 |
from agents.query_generator import QueryGenerator
|
| 18 |
|
| 19 |
+
# Add import for CostTracker
|
| 20 |
+
from services.cost_tracker import CostTracker
|
| 21 |
+
|
| 22 |
|
| 23 |
class Executor:
|
| 24 |
+
def __init__(self, settings, cost_tracker=None):
|
| 25 |
# map name → instance
|
| 26 |
self.tools = {
|
| 27 |
"PDFAgent": PDFAgent(),
|
|
|
|
| 35 |
|
| 36 |
self.logs: List[Dict[str, Any]] = []
|
| 37 |
self.logger = logging.getLogger(__name__)
|
| 38 |
+
self.cost_tracker = cost_tracker or CostTracker()
|
| 39 |
|
| 40 |
# ---------------------------------------------------------
|
| 41 |
def run(self, plan: Dict[str, Any], pdf_file) -> tuple[pd.DataFrame, List[Dict[str, Any]]]: # noqa: D401
|
|
|
|
| 51 |
"results": [],
|
| 52 |
"conf": 1.0,
|
| 53 |
"pdf_meta": plan.get("pdf_meta", {}), # Include the plan's metadata
|
| 54 |
+
"cost_tracker": self.cost_tracker,
|
| 55 |
}
|
| 56 |
|
| 57 |
try:
|
|
|
|
| 117 |
else:
|
| 118 |
df = pd.DataFrame()
|
| 119 |
|
| 120 |
+
# At the end, log the costs
|
| 121 |
+
self.logger.info(f"Total LLM input tokens: {self.cost_tracker.llm_input_tokens}")
|
| 122 |
+
self.logger.info(f"Total LLM output tokens: {self.cost_tracker.llm_output_tokens}")
|
| 123 |
+
self.logger.info(f"Total DI pages: {self.cost_tracker.di_pages}")
|
| 124 |
+
self.logger.info(f"Total cost: ${self.cost_tracker.total_cost():.4f}")
|
| 125 |
+
|
| 126 |
return df, self.logs
|
| 127 |
|
| 128 |
# ---------------------------------------------------------
|
src/orchestrator/planner.py
CHANGED
|
@@ -11,6 +11,7 @@ import yaml
|
|
| 11 |
|
| 12 |
from services.llm_client import LLMClient
|
| 13 |
from config.settings import settings
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
_PROMPTS_FILE = Path(__file__).parent.parent / "config" / "prompts.yaml"
|
|
@@ -23,9 +24,10 @@ logger = logging.getLogger(__name__)
|
|
| 23 |
class Planner:
|
| 24 |
"""Generate a plan with the Responses API; fall back to a static template if parsing fails."""
|
| 25 |
|
| 26 |
-
def __init__(self) -> None:
|
| 27 |
self.prompt_template = self._load_prompt("planner")
|
| 28 |
self.llm = LLMClient(settings)
|
|
|
|
| 29 |
logger.info("Planner initialized with prompt template")
|
| 30 |
|
| 31 |
# --------------------------------------------------
|
|
@@ -53,7 +55,12 @@ class Planner:
|
|
| 53 |
|
| 54 |
try:
|
| 55 |
logger.info("Calling LLM to generate plan")
|
| 56 |
-
raw = self.llm.responses(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
logger.debug(f"Raw LLM response: {raw}")
|
| 58 |
|
| 59 |
try:
|
|
|
|
| 11 |
|
| 12 |
from services.llm_client import LLMClient
|
| 13 |
from config.settings import settings
|
| 14 |
+
from services.cost_tracker import CostTracker
|
| 15 |
|
| 16 |
|
| 17 |
_PROMPTS_FILE = Path(__file__).parent.parent / "config" / "prompts.yaml"
|
|
|
|
| 24 |
class Planner:
|
| 25 |
"""Generate a plan with the Responses API; fall back to a static template if parsing fails."""
|
| 26 |
|
| 27 |
+
def __init__(self, cost_tracker=None) -> None:
|
| 28 |
self.prompt_template = self._load_prompt("planner")
|
| 29 |
self.llm = LLMClient(settings)
|
| 30 |
+
self.cost_tracker = cost_tracker or CostTracker()
|
| 31 |
logger.info("Planner initialized with prompt template")
|
| 32 |
|
| 33 |
# --------------------------------------------------
|
|
|
|
| 55 |
|
| 56 |
try:
|
| 57 |
logger.info("Calling LLM to generate plan")
|
| 58 |
+
raw = self.llm.responses(
|
| 59 |
+
prompt,
|
| 60 |
+
temperature=0.0,
|
| 61 |
+
ctx={"cost_tracker": self.cost_tracker},
|
| 62 |
+
description="Execution Plan Generation"
|
| 63 |
+
)
|
| 64 |
logger.debug(f"Raw LLM response: {raw}")
|
| 65 |
|
| 66 |
try:
|
src/services/__pycache__/llm_client.cpython-312.pyc
CHANGED
|
Binary files a/src/services/__pycache__/llm_client.cpython-312.pyc and b/src/services/__pycache__/llm_client.cpython-312.pyc differ
|
|
|
src/services/cost_tracker.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cost tracking service for Azure OpenAI and Document Intelligence."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Dict, Optional, List
|
| 5 |
+
import logging
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class TokenCosts:
|
| 13 |
+
"""Costs per 1M tokens for different models."""
|
| 14 |
+
GPT41: float = 2.0 # $10 per 1M input tokens for GPT-4.1
|
| 15 |
+
GPT41_OUTPUT: float = 8.0 # $30 per 1M output tokens for GPT-4.1
|
| 16 |
+
|
| 17 |
+
@dataclass
|
| 18 |
+
class DocumentIntelligenceCosts:
|
| 19 |
+
"""Costs for Document Intelligence."""
|
| 20 |
+
READ: float = 1.5 # $1.5 per 1,000 pages
|
| 21 |
+
LAYOUT: float = 10.0 # $10 per 1,000 pages
|
| 22 |
+
CUSTOM: float = 30.0 # $30 per 1,000 pages
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class LLMCall:
|
| 26 |
+
"""Represents a single LLM call with its details."""
|
| 27 |
+
description: str
|
| 28 |
+
input_tokens: int
|
| 29 |
+
output_tokens: int
|
| 30 |
+
timestamp: datetime = datetime.now()
|
| 31 |
+
|
| 32 |
+
@property
|
| 33 |
+
def input_cost(self) -> float:
|
| 34 |
+
return (self.input_tokens / 1_000_000) * TokenCosts.GPT41
|
| 35 |
+
|
| 36 |
+
@property
|
| 37 |
+
def output_cost(self) -> float:
|
| 38 |
+
return (self.output_tokens / 1_000_000) * TokenCosts.GPT41_OUTPUT
|
| 39 |
+
|
| 40 |
+
@property
|
| 41 |
+
def total_cost(self) -> float:
|
| 42 |
+
return self.input_cost + self.output_cost
|
| 43 |
+
|
| 44 |
+
class CostTracker:
|
| 45 |
+
"""Tracks costs for Azure OpenAI and Document Intelligence usage."""
|
| 46 |
+
|
| 47 |
+
def __init__(self):
|
| 48 |
+
self.token_costs = TokenCosts()
|
| 49 |
+
self.di_costs = DocumentIntelligenceCosts()
|
| 50 |
+
self.current_file_costs = {
|
| 51 |
+
"input_tokens": 0,
|
| 52 |
+
"output_tokens": 0,
|
| 53 |
+
"di_pages": 0,
|
| 54 |
+
"di_operations": {} # Track different DI operations
|
| 55 |
+
}
|
| 56 |
+
self.total_costs = {
|
| 57 |
+
"input_tokens": 0,
|
| 58 |
+
"output_tokens": 0,
|
| 59 |
+
"di_pages": 0,
|
| 60 |
+
"di_operations": {}
|
| 61 |
+
}
|
| 62 |
+
self.llm_input_tokens = 0
|
| 63 |
+
self.llm_output_tokens = 0
|
| 64 |
+
self.di_pages = 0
|
| 65 |
+
self.llm_calls: List[LLMCall] = [] # Track individual LLM calls
|
| 66 |
+
|
| 67 |
+
def reset_current_file(self):
|
| 68 |
+
"""Reset costs for current file."""
|
| 69 |
+
self.current_file_costs = {
|
| 70 |
+
"input_tokens": 0,
|
| 71 |
+
"output_tokens": 0,
|
| 72 |
+
"di_pages": 0,
|
| 73 |
+
"di_operations": {}
|
| 74 |
+
}
|
| 75 |
+
self.llm_calls = [] # Reset LLM calls for new file
|
| 76 |
+
|
| 77 |
+
def add_tokens(self, input_tokens: int, output_tokens: int, model: str = "GPT41"):
|
| 78 |
+
"""Add tokens for current file and total."""
|
| 79 |
+
self.current_file_costs["input_tokens"] += input_tokens
|
| 80 |
+
self.current_file_costs["output_tokens"] += output_tokens
|
| 81 |
+
self.total_costs["input_tokens"] += input_tokens
|
| 82 |
+
self.total_costs["output_tokens"] += output_tokens
|
| 83 |
+
|
| 84 |
+
logger.info(f"Added tokens - Input: {input_tokens}, Output: {output_tokens} for model {model}")
|
| 85 |
+
|
| 86 |
+
def add_di_operation(self, operation: str, pages: int):
|
| 87 |
+
"""Add Document Intelligence operation costs."""
|
| 88 |
+
if operation not in self.current_file_costs["di_operations"]:
|
| 89 |
+
self.current_file_costs["di_operations"][operation] = 0
|
| 90 |
+
self.total_costs["di_operations"][operation] = 0
|
| 91 |
+
|
| 92 |
+
self.current_file_costs["di_operations"][operation] += pages
|
| 93 |
+
self.current_file_costs["di_pages"] += pages
|
| 94 |
+
self.total_costs["di_operations"][operation] += pages
|
| 95 |
+
self.total_costs["di_pages"] += pages
|
| 96 |
+
|
| 97 |
+
logger.info(f"Added DI operation - {operation}: {pages} pages")
|
| 98 |
+
|
| 99 |
+
def calculate_current_file_costs(self) -> Dict:
|
| 100 |
+
"""Calculate costs for current file."""
|
| 101 |
+
costs = {
|
| 102 |
+
"openai": {
|
| 103 |
+
"input_tokens": self.current_file_costs["input_tokens"],
|
| 104 |
+
"output_tokens": self.current_file_costs["output_tokens"],
|
| 105 |
+
"input_cost": (self.current_file_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
|
| 106 |
+
"output_cost": (self.current_file_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
|
| 107 |
+
"total_cost": 0,
|
| 108 |
+
"calls": [call.__dict__ for call in self.llm_calls] # Include detailed call information
|
| 109 |
+
},
|
| 110 |
+
"document_intelligence": {
|
| 111 |
+
"total_pages": self.current_file_costs["di_pages"],
|
| 112 |
+
"operations": {},
|
| 113 |
+
"total_cost": 0
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Calculate OpenAI total cost
|
| 118 |
+
costs["openai"]["total_cost"] = costs["openai"]["input_cost"] + costs["openai"]["output_cost"]
|
| 119 |
+
|
| 120 |
+
# Calculate Document Intelligence costs
|
| 121 |
+
for operation, pages in self.current_file_costs["di_operations"].items():
|
| 122 |
+
cost = (pages / 1000) * getattr(self.di_costs, operation.upper(), self.di_costs.READ)
|
| 123 |
+
costs["document_intelligence"]["operations"][operation] = {
|
| 124 |
+
"pages": pages,
|
| 125 |
+
"cost": cost
|
| 126 |
+
}
|
| 127 |
+
costs["document_intelligence"]["total_cost"] += cost
|
| 128 |
+
|
| 129 |
+
return costs
|
| 130 |
+
|
| 131 |
+
def get_total_costs(self) -> Dict:
|
| 132 |
+
"""Get total costs across all files."""
|
| 133 |
+
return {
|
| 134 |
+
"openai": {
|
| 135 |
+
"input_tokens": self.total_costs["input_tokens"],
|
| 136 |
+
"output_tokens": self.total_costs["output_tokens"],
|
| 137 |
+
"input_cost": (self.total_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
|
| 138 |
+
"output_cost": (self.total_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
|
| 139 |
+
"total_cost": 0
|
| 140 |
+
},
|
| 141 |
+
"document_intelligence": {
|
| 142 |
+
"total_pages": self.total_costs["di_pages"],
|
| 143 |
+
"operations": {},
|
| 144 |
+
"total_cost": 0
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
def add_llm_tokens(self, input_tokens, output_tokens, description: str = "LLM Call"):
|
| 149 |
+
"""Add tokens for an LLM call with a description."""
|
| 150 |
+
self.llm_input_tokens += input_tokens
|
| 151 |
+
self.llm_output_tokens += output_tokens
|
| 152 |
+
# Also update the current file costs
|
| 153 |
+
self.current_file_costs["input_tokens"] += input_tokens
|
| 154 |
+
self.current_file_costs["output_tokens"] += output_tokens
|
| 155 |
+
self.total_costs["input_tokens"] += input_tokens
|
| 156 |
+
self.total_costs["output_tokens"] += output_tokens
|
| 157 |
+
# Add to LLM calls list
|
| 158 |
+
self.llm_calls.append(LLMCall(
|
| 159 |
+
description=description,
|
| 160 |
+
input_tokens=input_tokens,
|
| 161 |
+
output_tokens=output_tokens
|
| 162 |
+
))
|
| 163 |
+
|
| 164 |
+
def add_di_pages(self, num_pages):
|
| 165 |
+
self.di_pages += num_pages
|
| 166 |
+
# Also update the current file costs
|
| 167 |
+
self.current_file_costs["di_pages"] += num_pages
|
| 168 |
+
self.total_costs["di_pages"] += num_pages
|
| 169 |
+
# Add to LAYOUT operation by default since we're using layout analysis
|
| 170 |
+
if "LAYOUT" not in self.current_file_costs["di_operations"]:
|
| 171 |
+
self.current_file_costs["di_operations"]["LAYOUT"] = 0
|
| 172 |
+
self.total_costs["di_operations"]["LAYOUT"] = 0
|
| 173 |
+
self.current_file_costs["di_operations"]["LAYOUT"] += num_pages
|
| 174 |
+
self.total_costs["di_operations"]["LAYOUT"] += num_pages
|
| 175 |
+
|
| 176 |
+
def total_cost(self):
|
| 177 |
+
"""Calculate total cost using the same pricing as calculate_current_file_costs."""
|
| 178 |
+
costs = self.calculate_current_file_costs()
|
| 179 |
+
return costs["openai"]["total_cost"] + costs["document_intelligence"]["total_cost"]
|
| 180 |
+
|
| 181 |
+
def get_detailed_costs_table(self) -> pd.DataFrame:
|
| 182 |
+
"""Return a DataFrame of detailed costs."""
|
| 183 |
+
if not self.llm_calls:
|
| 184 |
+
return pd.DataFrame()
|
| 185 |
+
|
| 186 |
+
# Create list of dictionaries for DataFrame
|
| 187 |
+
rows = []
|
| 188 |
+
for call in self.llm_calls:
|
| 189 |
+
rows.append({
|
| 190 |
+
'Description': call.description,
|
| 191 |
+
'Input Tokens': call.input_tokens,
|
| 192 |
+
'Output Tokens': call.output_tokens,
|
| 193 |
+
'Input Cost': f"${call.input_cost:.4f}",
|
| 194 |
+
'Output Cost': f"${call.output_cost:.4f}",
|
| 195 |
+
'Total Cost': f"${call.total_cost:.4f}"
|
| 196 |
+
})
|
| 197 |
+
|
| 198 |
+
# Calculate totals
|
| 199 |
+
total_input = sum(call.input_tokens for call in self.llm_calls)
|
| 200 |
+
total_output = sum(call.output_tokens for call in self.llm_calls)
|
| 201 |
+
total_input_cost = sum(call.input_cost for call in self.llm_calls)
|
| 202 |
+
total_output_cost = sum(call.output_cost for call in self.llm_calls)
|
| 203 |
+
total_cost = total_input_cost + total_output_cost
|
| 204 |
+
|
| 205 |
+
# Add total row
|
| 206 |
+
rows.append({
|
| 207 |
+
'Description': 'TOTAL',
|
| 208 |
+
'Input Tokens': total_input,
|
| 209 |
+
'Output Tokens': total_output,
|
| 210 |
+
'Input Cost': f"${total_input_cost:.4f}",
|
| 211 |
+
'Output Cost': f"${total_output_cost:.4f}",
|
| 212 |
+
'Total Cost': f"${total_cost:.4f}"
|
| 213 |
+
})
|
| 214 |
+
|
| 215 |
+
# Create DataFrame
|
| 216 |
+
df = pd.DataFrame(rows)
|
| 217 |
+
|
| 218 |
+
# Set column order
|
| 219 |
+
df = df[['Description', 'Input Tokens', 'Output Tokens', 'Input Cost', 'Output Cost', 'Total Cost']]
|
| 220 |
+
|
| 221 |
+
return df
|
src/services/llm_client.py
CHANGED
|
@@ -35,12 +35,15 @@ class LLMClient:
|
|
| 35 |
logger.info(f"API Key length: {len(openai.api_key) if openai.api_key else 0}")
|
| 36 |
|
| 37 |
# --------------------------------------------------
|
| 38 |
-
def responses(self, prompt: str, tools: List[dict] | None = None, **kwargs: Any) -> str:
|
| 39 |
"""Call the Responses API and return the assistant content as string."""
|
| 40 |
logger = logging.getLogger(__name__)
|
| 41 |
logger.info(f"Making request with API version: {openai.api_version}")
|
| 42 |
logger.info(f"Request URL will be: {openai.api_base}/openai/responses?api-version={openai.api_version}")
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
resp = openai.responses.create(
|
| 45 |
input=prompt,
|
| 46 |
model=self._deployment,
|
|
@@ -49,7 +52,31 @@ class LLMClient:
|
|
| 49 |
)
|
| 50 |
# Log the raw response for debugging
|
| 51 |
logging.debug(f"LLM raw response: {resp}")
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Extract the text content from the response
|
| 54 |
if hasattr(resp, "output") and isinstance(resp.output, list):
|
| 55 |
# Handle list of ResponseOutputMessage objects
|
|
|
|
| 35 |
logger.info(f"API Key length: {len(openai.api_key) if openai.api_key else 0}")
|
| 36 |
|
| 37 |
# --------------------------------------------------
|
| 38 |
+
def responses(self, prompt: str, tools: List[dict] | None = None, description: str = "LLM Call", **kwargs: Any) -> str:
|
| 39 |
"""Call the Responses API and return the assistant content as string."""
|
| 40 |
logger = logging.getLogger(__name__)
|
| 41 |
logger.info(f"Making request with API version: {openai.api_version}")
|
| 42 |
logger.info(f"Request URL will be: {openai.api_base}/openai/responses?api-version={openai.api_version}")
|
| 43 |
|
| 44 |
+
# Remove ctx from kwargs before passing to openai
|
| 45 |
+
ctx = kwargs.pop("ctx", None)
|
| 46 |
+
|
| 47 |
resp = openai.responses.create(
|
| 48 |
input=prompt,
|
| 49 |
model=self._deployment,
|
|
|
|
| 52 |
)
|
| 53 |
# Log the raw response for debugging
|
| 54 |
logging.debug(f"LLM raw response: {resp}")
|
| 55 |
+
|
| 56 |
+
# --- Cost tracking: must be BEFORE any return! ---
|
| 57 |
+
logger.info(f"LLMClient.responses: ctx is {ctx}")
|
| 58 |
+
if ctx and "cost_tracker" in ctx:
|
| 59 |
+
logger.info(f"LLMClient.responses: cost_tracker is {ctx['cost_tracker']}")
|
| 60 |
+
usage = getattr(resp, "usage", None)
|
| 61 |
+
if usage:
|
| 62 |
+
logger.info(f"LLMClient.responses: usage is {usage}")
|
| 63 |
+
ctx["cost_tracker"].add_llm_tokens(
|
| 64 |
+
input_tokens=getattr(usage, "input_tokens", 0),
|
| 65 |
+
output_tokens=getattr(usage, "output_tokens", 0),
|
| 66 |
+
description=description
|
| 67 |
+
)
|
| 68 |
+
logger.info(f"LLMClient.responses: prompt: {prompt[:200]}...") # Log first 200 chars
|
| 69 |
+
logger.info(f"LLMClient.responses: resp: {str(resp)[:200]}...") # Log first 200 chars
|
| 70 |
+
if usage:
|
| 71 |
+
logger.info(f"LLMClient.responses: usage.input_tokens={getattr(usage, 'input_tokens', None)}, usage.output_tokens={getattr(usage, 'output_tokens', None)}, usage.total_tokens={getattr(usage, 'total_tokens', None)}")
|
| 72 |
+
else:
|
| 73 |
+
# Fallback: estimate tokens (very rough)
|
| 74 |
+
ctx["cost_tracker"].add_llm_tokens(
|
| 75 |
+
input_tokens=len(prompt.split()),
|
| 76 |
+
output_tokens=len(str(resp).split()),
|
| 77 |
+
description=description
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
# Extract the text content from the response
|
| 81 |
if hasattr(resp, "output") and isinstance(resp.output, list):
|
| 82 |
# Handle list of ResponseOutputMessage objects
|