|
|
|
|
|
""" |
|
|
Enricher Agent - Enriches prospects with real-time web search data |
|
|
Now uses actual web search instead of static/mock data |
|
|
""" |
|
|
from datetime import datetime |
|
|
from app.schema import Prospect, Fact |
|
|
from app.config import FACT_TTL_HOURS, SKIP_WEB_SEARCH |
|
|
import uuid |
|
|
import logging |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class Enricher: |
|
|
""" |
|
|
Enriches prospects with facts from real web search |
|
|
|
|
|
IMPROVED: Now uses actual web search to find: |
|
|
- Company news and updates |
|
|
- Industry trends and challenges |
|
|
- Customer experience insights |
|
|
- Recent developments |
|
|
""" |
|
|
|
|
|
def __init__(self, mcp_registry): |
|
|
self.mcp = mcp_registry |
|
|
self.search = mcp_registry.get_search_client() |
|
|
self.store = mcp_registry.get_store_client() |
|
|
|
|
|
async def run(self, prospect: Prospect) -> Prospect: |
|
|
"""Enrich prospect with facts from web search""" |
|
|
|
|
|
logger.info(f"Enricher: Enriching prospect '{prospect.company.name}'") |
|
|
|
|
|
facts = [] |
|
|
seen_texts = set() |
|
|
|
|
|
|
|
|
if not SKIP_WEB_SEARCH: |
|
|
logger.info("Enricher: Performing web search for facts") |
|
|
|
|
|
|
|
|
queries = [ |
|
|
|
|
|
f"{prospect.company.name} news latest updates", |
|
|
|
|
|
f"{prospect.company.name} {prospect.company.industry} customer experience", |
|
|
|
|
|
f"{prospect.company.name} challenges problems", |
|
|
|
|
|
f"{prospect.company.domain} customer support contact" |
|
|
] |
|
|
|
|
|
for query in queries: |
|
|
try: |
|
|
logger.info(f"Enricher: Searching for: '{query}'") |
|
|
results = await self.search.query(query) |
|
|
|
|
|
|
|
|
for result in results[:3]: |
|
|
text = result.get("text", "").strip() |
|
|
title = result.get("title", "").strip() |
|
|
|
|
|
|
|
|
if not text or len(text) < 20: |
|
|
continue |
|
|
|
|
|
|
|
|
if title and title not in text: |
|
|
full_text = f"{title}. {text}" |
|
|
else: |
|
|
full_text = text |
|
|
|
|
|
|
|
|
if full_text in seen_texts: |
|
|
continue |
|
|
seen_texts.add(full_text) |
|
|
|
|
|
|
|
|
fact = Fact( |
|
|
id=str(uuid.uuid4()), |
|
|
source=result.get("source", "web search"), |
|
|
text=full_text[:500], |
|
|
collected_at=datetime.utcnow(), |
|
|
ttl_hours=FACT_TTL_HOURS, |
|
|
confidence=result.get("confidence", 0.75), |
|
|
company_id=prospect.company.id |
|
|
) |
|
|
facts.append(fact) |
|
|
await self.store.save_fact(fact) |
|
|
|
|
|
logger.info(f"Enricher: Added fact from {fact.source}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Enricher: Error searching for '{query}': {str(e)}") |
|
|
continue |
|
|
else: |
|
|
logger.info("Enricher: Skipping web search (SKIP_WEB_SEARCH=true)") |
|
|
|
|
|
|
|
|
for pain in prospect.company.pains: |
|
|
if pain and len(pain) > 10: |
|
|
fact = Fact( |
|
|
id=str(uuid.uuid4()), |
|
|
source="company_discovery", |
|
|
text=f"Known challenge: {pain}", |
|
|
collected_at=datetime.utcnow(), |
|
|
ttl_hours=FACT_TTL_HOURS * 2, |
|
|
confidence=0.85, |
|
|
company_id=prospect.company.id |
|
|
) |
|
|
facts.append(fact) |
|
|
await self.store.save_fact(fact) |
|
|
|
|
|
|
|
|
for note in prospect.company.notes: |
|
|
if note and len(note) > 10: |
|
|
fact = Fact( |
|
|
id=str(uuid.uuid4()), |
|
|
source="company_discovery", |
|
|
text=note, |
|
|
collected_at=datetime.utcnow(), |
|
|
ttl_hours=FACT_TTL_HOURS * 2, |
|
|
confidence=0.8, |
|
|
company_id=prospect.company.id |
|
|
) |
|
|
facts.append(fact) |
|
|
await self.store.save_fact(fact) |
|
|
|
|
|
prospect.facts = facts |
|
|
prospect.status = "enriched" |
|
|
await self.store.save_prospect(prospect) |
|
|
|
|
|
logger.info(f"Enricher: Added {len(facts)} facts for '{prospect.company.name}'") |
|
|
|
|
|
return prospect |