Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

Tonic commited on 12 days ago

Commit

3f9bc77

2 Parent(s): 97d41ab ef18c90

Merge branch 'dev' into feat/deepresearch

Browse files

Signed-off-by: Tonic <[email protected]>

Files changed (7) hide show

.env.example +11 -6
.gitignore +1 -0
src/app.py +2 -0
src/services/neo4j_service.py +107 -0
src/tools/neo4j_search.py +65 -0
src/tools/search_handler.py +27 -0
src/utils/models.py +1 -1

.env.example CHANGED Viewed

@@ -1,11 +1,11 @@
-# ============== LLM CONFIGURATION ==============
-# Provider: "openai" or "anthropic"
-LLM_PROVIDER=openai
-# API Keys (at least one required for full LLM analysis)
-OPENAI_API_KEY=sk-your-key-here
-ANTHROPIC_API_KEY=sk-ant-your-key-here
 # Model names (optional - sensible defaults set in config.py)
 # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
@@ -100,3 +100,8 @@ NCBI_API_KEY=your-ncbi-key-here
 # Vector Database (optional - for LlamaIndex RAG)
 CHROMA_DB_PATH=./chroma_db

+# HuggingFace
+HF_TOKEN=your_huggingface_token_here
+# OpenAI (optional)
+OPENAI_API_KEY=your_openai_key_here
+# Anthropic (optional)
+ANTHROPIC_API_KEY=your_anthropic_key_here
 # Model names (optional - sensible defaults set in config.py)
 # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
 # Vector Database (optional - for LlamaIndex RAG)
 CHROMA_DB_PATH=./chroma_db
+# Neo4j Knowledge Graph
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_neo4j_password_here
+NEO4J_DATABASE=your_database_name

.gitignore CHANGED Viewed

	@@ -77,3 +77,4 @@ chroma_db/
77
78
79	# Trigger rebuild Wed Nov 26 17:51:41 EST 2025


77
78
79	# Trigger rebuild Wed Nov 26 17:51:41 EST 2025
80	+ .env

src/app.py CHANGED Viewed

@@ -35,6 +35,7 @@ from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.europepmc import EuropePMCTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.config import settings
 from src.utils.models import AgentEvent, OrchestratorConfig
@@ -85,6 +86,7 @@ def configure_orchestrator(
     search_handler = SearchHandler(
         tools=tools,
         timeout=config.search_timeout,
         include_rag=True,
         auto_ingest_to_rag=True,

 from src.tools.europepmc import EuropePMCTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
+from src.tools.neo4j_search import Neo4jSearchTool
 from src.utils.config import settings
 from src.utils.models import AgentEvent, OrchestratorConfig
     search_handler = SearchHandler(
         tools=tools,
+        tools=[Neo4jSearchTool(), PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()],
         timeout=config.search_timeout,
         include_rag=True,
         auto_ingest_to_rag=True,

src/services/neo4j_service.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""Neo4j Knowledge Graph Service for Drug Repurposing"""
+from neo4j import GraphDatabase
+from typing import List, Dict, Optional, Any
+import os
+from dotenv import load_dotenv
+import logging
+load_dotenv()
+logger = logging.getLogger(__name__)
+class Neo4jService:
+    def __init__(self):
+        self.uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
+        self.user = os.getenv("NEO4J_USER", "neo4j")
+        self.password = os.getenv("NEO4J_PASSWORD")
+        self.database = os.getenv("NEO4J_DATABASE", "neo4j")
+        if not self.password:
+            logger.warning("⚠️ NEO4J_PASSWORD not set")
+            self.driver = None
+            return
+        try:
+            self.driver = GraphDatabase.driver(self.uri, auth=(self.user, self.password))
+            self.driver.verify_connectivity()
+            logger.info(f"✅ Neo4j connected: {self.uri} (db: {self.database})")
+        except Exception as e:
+            logger.error(f"❌ Neo4j connection failed: {e}")
+            self.driver = None
+    def is_connected(self) -> bool:
+        return self.driver is not None
+    def close(self):
+        if self.driver:
+            self.driver.close()
+    def ingest_search_results(self, disease_name: str, papers: List[Dict[str, Any]],
+                             drugs_mentioned: List[str] = None) -> Dict[str, int]:
+        if not self.driver:
+            return {"error": "Neo4j not connected"}
+        stats = {"papers": 0, "drugs": 0, "relationships": 0, "errors": 0}
+        try:
+            with self.driver.session(database=self.database) as session:
+                session.run("MERGE (d:Disease {name: $name})", name=disease_name)
+                for paper in papers:
+                    try:
+                        paper_id = paper.get('id') or paper.get('url', '')
+                        if not paper_id:
+                            continue
+                        session.run("""
+                            MERGE (p:Paper {paper_id: $id})
+                            SET p.title = $title,
+                                p.abstract = $abstract,
+                                p.url = $url,
+                                p.source = $source,
+                                p.updated_at = datetime()
+                        """,
+                        id=paper_id,
+                        title=str(paper.get('title', ''))[:500],
+                        abstract=str(paper.get('abstract', ''))[:2000],
+                        url=str(paper.get('url', ''))[:500],
+                        source=str(paper.get('source', ''))[:100])
+                        session.run("""
+                            MATCH (p:Paper {paper_id: $id})
+                            MATCH (d:Disease {name: $disease})
+                            MERGE (p)-[r:ABOUT]->(d)
+                        """, id=paper_id, disease=disease_name)
+                        stats['papers'] += 1
+                        stats['relationships'] += 1
+                    except Exception as e:
+                        stats['errors'] += 1
+                if drugs_mentioned:
+                    for drug in drugs_mentioned:
+                        try:
+                            session.run("MERGE (d:Drug {name: $name})", name=drug)
+                            session.run("""
+                                MATCH (drug:Drug {name: $drug})
+                                MATCH (disease:Disease {name: $disease})
+                                MERGE (drug)-[r:POTENTIAL_TREATMENT]->(disease)
+                            """, drug=drug, disease=disease_name)
+                            stats['drugs'] += 1
+                            stats['relationships'] += 1
+                        except Exception as e:
+                            stats['errors'] += 1
+            logger.info(f"�� Neo4j ingestion: {stats['papers']} papers, {stats['drugs']} drugs")
+        except Exception as e:
+            logger.error(f"Neo4j ingestion error: {e}")
+            stats['errors'] += 1
+        return stats
+_neo4j_service = None
+def get_neo4j_service() -> Optional[Neo4jService]:
+    global _neo4j_service
+    if _neo4j_service is None:
+        _neo4j_service = Neo4jService()
+    return _neo4j_service if _neo4j_service and _neo4j_service.is_connected() else None

src/tools/neo4j_search.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""Neo4j knowledge graph search tool."""
+import structlog
+from src.utils.models import Citation, Evidence
+from src.services.neo4j_service import get_neo4j_service
+logger = structlog.get_logger()
+class Neo4jSearchTool:
+    """Search Neo4j knowledge graph for papers."""
+    def __init__(self):
+        self.name = "neo4j"  # ✅ Definir explícitamente
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """Search Neo4j for papers about diseases in the query."""
+        try:
+            service = get_neo4j_service()
+            if not service:
+                logger.warning("Neo4j service not available")
+                return []
+            # Extract disease name from query
+            disease = query
+            if "for" in query.lower():
+                disease = query.split("for")[-1].strip().rstrip("?")
+            # Query Neo4j
+            with service.driver.session(database=service.database) as session:
+                result = session.run("""
+                    MATCH (p:Paper)-[:ABOUT]->(d:Disease)
+                    WHERE d.name CONTAINS $disease
+                    RETURN p.title as title, p.abstract as abstract,
+                           p.url as url, p.source as source
+                    ORDER BY p.updated_at DESC
+                    LIMIT $max_results
+                """, disease=disease, max_results=max_results)
+                records = list(result)
+            results = []
+            for record in records:
+                citation = Citation(
+                    source="neo4j",
+                    title=record["title"] or "Untitled",
+                    url=record["url"] or "",
+                    date="",
+                    authors=[]
+                )
+                evidence = Evidence(
+                    content=record["abstract"] or record["title"] or "",
+                    citation=citation,
+                    relevance=1.0,
+                    metadata={
+                        "from_kb": True,
+                        "original_source": record["source"]
+                    }
+                )
+                results.append(evidence)
+            logger.info(f"📊 Neo4j returned {len(results)} results")
+            return results
+        except Exception as e:
+            logger.error(f"Neo4j search failed: {e}")
+            return []

src/tools/search_handler.py CHANGED Viewed

@@ -9,6 +9,7 @@ from src.tools.base import SearchTool
 from src.tools.rag_tool import create_rag_tool
 from src.utils.exceptions import ConfigurationError, SearchError
 from src.utils.models import Evidence, SearchResult, SourceName
 if TYPE_CHECKING:
     from src.services.llamaindex_rag import LlamaIndexRAGService
@@ -163,6 +164,32 @@ class SearchHandler:
                 except Exception as e:
                     logger.warning("Failed to ingest evidence into RAG", error=str(e))
         return search_result
     async def _search_with_timeout(

 from src.tools.rag_tool import create_rag_tool
 from src.utils.exceptions import ConfigurationError, SearchError
 from src.utils.models import Evidence, SearchResult, SourceName
+from src.services.neo4j_service import get_neo4j_service
 if TYPE_CHECKING:
     from src.services.llamaindex_rag import LlamaIndexRAGService
                 except Exception as e:
                     logger.warning("Failed to ingest evidence into RAG", error=str(e))
+        # 🔥 INGEST INTO NEO4J KNOWLEDGE GRAPH 🔥
+        if all_evidence:
+            try:
+                neo4j_service = get_neo4j_service()
+                if neo4j_service:
+                    # Extract disease from query
+                    disease = query
+                    if "for" in query.lower():
+                        disease = query.split("for")[-1].strip().rstrip("?")
+                    # Convert Evidence objects to dicts for Neo4j
+                    papers = []
+                    for ev in all_evidence:
+                        papers.append({
+                            'id': ev.citation.url or '',
+                            'title': ev.citation.title or '',
+                            'abstract': ev.content,
+                            'url': ev.citation.url or '',
+                            'source': ev.citation.source,
+                        })
+                    stats = neo4j_service.ingest_search_results(disease, papers)
+                    logger.info("💾 Saved to Neo4j", stats=stats)
+            except Exception as e:
+                logger.warning("Neo4j ingestion failed", error=str(e))
         return search_result
     async def _search_with_timeout(

src/utils/models.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Any, ClassVar, Literal
 from pydantic import BaseModel, Field
 # Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
-SourceName = Literal["pubmed", "clinicaltrials", "biorxiv", "europepmc", "preprint", "rag", "web"]
 class Citation(BaseModel):

 from pydantic import BaseModel, Field
 # Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
+SourceName = Literal["pubmed", "clinicaltrials", "biorxiv", "europepmc", "preprint", "rag", "web", "neo4j"]
 class Citation(BaseModel):