Tonic commited on
Commit
3f9bc77
·
2 Parent(s): 97d41ab ef18c90

Merge branch 'dev' into feat/deepresearch

Browse files

Signed-off-by: Tonic <[email protected]>

.env.example CHANGED
@@ -1,11 +1,11 @@
1
- # ============== LLM CONFIGURATION ==============
 
2
 
3
- # Provider: "openai" or "anthropic"
4
- LLM_PROVIDER=openai
5
 
6
- # API Keys (at least one required for full LLM analysis)
7
- OPENAI_API_KEY=sk-your-key-here
8
- ANTHROPIC_API_KEY=sk-ant-your-key-here
9
 
10
  # Model names (optional - sensible defaults set in config.py)
11
  # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
@@ -100,3 +100,8 @@ NCBI_API_KEY=your-ncbi-key-here
100
 
101
  # Vector Database (optional - for LlamaIndex RAG)
102
  CHROMA_DB_PATH=./chroma_db
 
 
 
 
 
 
1
+ # HuggingFace
2
+ HF_TOKEN=your_huggingface_token_here
3
 
4
+ # OpenAI (optional)
5
+ OPENAI_API_KEY=your_openai_key_here
6
 
7
+ # Anthropic (optional)
8
+ ANTHROPIC_API_KEY=your_anthropic_key_here
 
9
 
10
  # Model names (optional - sensible defaults set in config.py)
11
  # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
 
100
 
101
  # Vector Database (optional - for LlamaIndex RAG)
102
  CHROMA_DB_PATH=./chroma_db
103
+ # Neo4j Knowledge Graph
104
+ NEO4J_URI=bolt://localhost:7687
105
+ NEO4J_USER=neo4j
106
+ NEO4J_PASSWORD=your_neo4j_password_here
107
+ NEO4J_DATABASE=your_database_name
.gitignore CHANGED
@@ -77,3 +77,4 @@ chroma_db/
77
 
78
 
79
  # Trigger rebuild Wed Nov 26 17:51:41 EST 2025
 
 
77
 
78
 
79
  # Trigger rebuild Wed Nov 26 17:51:41 EST 2025
80
+ .env
src/app.py CHANGED
@@ -35,6 +35,7 @@ from src.tools.clinicaltrials import ClinicalTrialsTool
35
  from src.tools.europepmc import EuropePMCTool
36
  from src.tools.pubmed import PubMedTool
37
  from src.tools.search_handler import SearchHandler
 
38
  from src.utils.config import settings
39
  from src.utils.models import AgentEvent, OrchestratorConfig
40
 
@@ -85,6 +86,7 @@ def configure_orchestrator(
85
 
86
  search_handler = SearchHandler(
87
  tools=tools,
 
88
  timeout=config.search_timeout,
89
  include_rag=True,
90
  auto_ingest_to_rag=True,
 
35
  from src.tools.europepmc import EuropePMCTool
36
  from src.tools.pubmed import PubMedTool
37
  from src.tools.search_handler import SearchHandler
38
+ from src.tools.neo4j_search import Neo4jSearchTool
39
  from src.utils.config import settings
40
  from src.utils.models import AgentEvent, OrchestratorConfig
41
 
 
86
 
87
  search_handler = SearchHandler(
88
  tools=tools,
89
+ tools=[Neo4jSearchTool(), PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()],
90
  timeout=config.search_timeout,
91
  include_rag=True,
92
  auto_ingest_to_rag=True,
src/services/neo4j_service.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Neo4j Knowledge Graph Service for Drug Repurposing"""
2
+ from neo4j import GraphDatabase
3
+ from typing import List, Dict, Optional, Any
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import logging
7
+
8
+ load_dotenv()
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class Neo4jService:
12
+ def __init__(self):
13
+ self.uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
14
+ self.user = os.getenv("NEO4J_USER", "neo4j")
15
+ self.password = os.getenv("NEO4J_PASSWORD")
16
+ self.database = os.getenv("NEO4J_DATABASE", "neo4j")
17
+
18
+ if not self.password:
19
+ logger.warning("⚠️ NEO4J_PASSWORD not set")
20
+ self.driver = None
21
+ return
22
+
23
+ try:
24
+ self.driver = GraphDatabase.driver(self.uri, auth=(self.user, self.password))
25
+ self.driver.verify_connectivity()
26
+ logger.info(f"✅ Neo4j connected: {self.uri} (db: {self.database})")
27
+ except Exception as e:
28
+ logger.error(f"❌ Neo4j connection failed: {e}")
29
+ self.driver = None
30
+
31
+ def is_connected(self) -> bool:
32
+ return self.driver is not None
33
+
34
+ def close(self):
35
+ if self.driver:
36
+ self.driver.close()
37
+
38
+ def ingest_search_results(self, disease_name: str, papers: List[Dict[str, Any]],
39
+ drugs_mentioned: List[str] = None) -> Dict[str, int]:
40
+ if not self.driver:
41
+ return {"error": "Neo4j not connected"}
42
+
43
+ stats = {"papers": 0, "drugs": 0, "relationships": 0, "errors": 0}
44
+
45
+ try:
46
+ with self.driver.session(database=self.database) as session:
47
+ session.run("MERGE (d:Disease {name: $name})", name=disease_name)
48
+
49
+ for paper in papers:
50
+ try:
51
+ paper_id = paper.get('id') or paper.get('url', '')
52
+ if not paper_id:
53
+ continue
54
+
55
+ session.run("""
56
+ MERGE (p:Paper {paper_id: $id})
57
+ SET p.title = $title,
58
+ p.abstract = $abstract,
59
+ p.url = $url,
60
+ p.source = $source,
61
+ p.updated_at = datetime()
62
+ """,
63
+ id=paper_id,
64
+ title=str(paper.get('title', ''))[:500],
65
+ abstract=str(paper.get('abstract', ''))[:2000],
66
+ url=str(paper.get('url', ''))[:500],
67
+ source=str(paper.get('source', ''))[:100])
68
+
69
+ session.run("""
70
+ MATCH (p:Paper {paper_id: $id})
71
+ MATCH (d:Disease {name: $disease})
72
+ MERGE (p)-[r:ABOUT]->(d)
73
+ """, id=paper_id, disease=disease_name)
74
+
75
+ stats['papers'] += 1
76
+ stats['relationships'] += 1
77
+ except Exception as e:
78
+ stats['errors'] += 1
79
+
80
+ if drugs_mentioned:
81
+ for drug in drugs_mentioned:
82
+ try:
83
+ session.run("MERGE (d:Drug {name: $name})", name=drug)
84
+ session.run("""
85
+ MATCH (drug:Drug {name: $drug})
86
+ MATCH (disease:Disease {name: $disease})
87
+ MERGE (drug)-[r:POTENTIAL_TREATMENT]->(disease)
88
+ """, drug=drug, disease=disease_name)
89
+ stats['drugs'] += 1
90
+ stats['relationships'] += 1
91
+ except Exception as e:
92
+ stats['errors'] += 1
93
+
94
+ logger.info(f"�� Neo4j ingestion: {stats['papers']} papers, {stats['drugs']} drugs")
95
+ except Exception as e:
96
+ logger.error(f"Neo4j ingestion error: {e}")
97
+ stats['errors'] += 1
98
+
99
+ return stats
100
+
101
+ _neo4j_service = None
102
+
103
+ def get_neo4j_service() -> Optional[Neo4jService]:
104
+ global _neo4j_service
105
+ if _neo4j_service is None:
106
+ _neo4j_service = Neo4jService()
107
+ return _neo4j_service if _neo4j_service and _neo4j_service.is_connected() else None
src/tools/neo4j_search.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Neo4j knowledge graph search tool."""
2
+ import structlog
3
+ from src.utils.models import Citation, Evidence
4
+ from src.services.neo4j_service import get_neo4j_service
5
+
6
+ logger = structlog.get_logger()
7
+
8
+ class Neo4jSearchTool:
9
+ """Search Neo4j knowledge graph for papers."""
10
+
11
+ def __init__(self):
12
+ self.name = "neo4j" # ✅ Definir explícitamente
13
+
14
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
15
+ """Search Neo4j for papers about diseases in the query."""
16
+ try:
17
+ service = get_neo4j_service()
18
+ if not service:
19
+ logger.warning("Neo4j service not available")
20
+ return []
21
+
22
+ # Extract disease name from query
23
+ disease = query
24
+ if "for" in query.lower():
25
+ disease = query.split("for")[-1].strip().rstrip("?")
26
+
27
+ # Query Neo4j
28
+ with service.driver.session(database=service.database) as session:
29
+ result = session.run("""
30
+ MATCH (p:Paper)-[:ABOUT]->(d:Disease)
31
+ WHERE d.name CONTAINS $disease
32
+ RETURN p.title as title, p.abstract as abstract,
33
+ p.url as url, p.source as source
34
+ ORDER BY p.updated_at DESC
35
+ LIMIT $max_results
36
+ """, disease=disease, max_results=max_results)
37
+
38
+ records = list(result)
39
+
40
+ results = []
41
+ for record in records:
42
+ citation = Citation(
43
+ source="neo4j",
44
+ title=record["title"] or "Untitled",
45
+ url=record["url"] or "",
46
+ date="",
47
+ authors=[]
48
+ )
49
+
50
+ evidence = Evidence(
51
+ content=record["abstract"] or record["title"] or "",
52
+ citation=citation,
53
+ relevance=1.0,
54
+ metadata={
55
+ "from_kb": True,
56
+ "original_source": record["source"]
57
+ }
58
+ )
59
+ results.append(evidence)
60
+
61
+ logger.info(f"📊 Neo4j returned {len(results)} results")
62
+ return results
63
+ except Exception as e:
64
+ logger.error(f"Neo4j search failed: {e}")
65
+ return []
src/tools/search_handler.py CHANGED
@@ -9,6 +9,7 @@ from src.tools.base import SearchTool
9
  from src.tools.rag_tool import create_rag_tool
10
  from src.utils.exceptions import ConfigurationError, SearchError
11
  from src.utils.models import Evidence, SearchResult, SourceName
 
12
 
13
  if TYPE_CHECKING:
14
  from src.services.llamaindex_rag import LlamaIndexRAGService
@@ -163,6 +164,32 @@ class SearchHandler:
163
  except Exception as e:
164
  logger.warning("Failed to ingest evidence into RAG", error=str(e))
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  return search_result
167
 
168
  async def _search_with_timeout(
 
9
  from src.tools.rag_tool import create_rag_tool
10
  from src.utils.exceptions import ConfigurationError, SearchError
11
  from src.utils.models import Evidence, SearchResult, SourceName
12
+ from src.services.neo4j_service import get_neo4j_service
13
 
14
  if TYPE_CHECKING:
15
  from src.services.llamaindex_rag import LlamaIndexRAGService
 
164
  except Exception as e:
165
  logger.warning("Failed to ingest evidence into RAG", error=str(e))
166
 
167
+ # 🔥 INGEST INTO NEO4J KNOWLEDGE GRAPH 🔥
168
+ if all_evidence:
169
+ try:
170
+ neo4j_service = get_neo4j_service()
171
+ if neo4j_service:
172
+ # Extract disease from query
173
+ disease = query
174
+ if "for" in query.lower():
175
+ disease = query.split("for")[-1].strip().rstrip("?")
176
+
177
+ # Convert Evidence objects to dicts for Neo4j
178
+ papers = []
179
+ for ev in all_evidence:
180
+ papers.append({
181
+ 'id': ev.citation.url or '',
182
+ 'title': ev.citation.title or '',
183
+ 'abstract': ev.content,
184
+ 'url': ev.citation.url or '',
185
+ 'source': ev.citation.source,
186
+ })
187
+
188
+ stats = neo4j_service.ingest_search_results(disease, papers)
189
+ logger.info("💾 Saved to Neo4j", stats=stats)
190
+ except Exception as e:
191
+ logger.warning("Neo4j ingestion failed", error=str(e))
192
+
193
  return search_result
194
 
195
  async def _search_with_timeout(
src/utils/models.py CHANGED
@@ -6,7 +6,7 @@ from typing import Any, ClassVar, Literal
6
  from pydantic import BaseModel, Field
7
 
8
  # Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
9
- SourceName = Literal["pubmed", "clinicaltrials", "biorxiv", "europepmc", "preprint", "rag", "web"]
10
 
11
 
12
  class Citation(BaseModel):
 
6
  from pydantic import BaseModel, Field
7
 
8
  # Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
9
+ SourceName = Literal["pubmed", "clinicaltrials", "biorxiv", "europepmc", "preprint", "rag", "web", "neo4j"]
10
 
11
 
12
  class Citation(BaseModel):