Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 25

Commit

a1bc85b

verified ·

1 Parent(s): e610129

Update workflow.py

Browse files

Files changed (1) hide show

workflow.py +27 -32

workflow.py CHANGED Viewed

@@ -23,19 +23,18 @@ class AgentState(TypedDict):
 class ResearchWorkflow:
     """
-    A multi-step research workflow that leverages a Retrieval-Augmented Generation (RAG) strategy.
-    It dynamically retrieves external data and integrates it with the raw query to generate domain-specific analyses.
-    Supported domains include:
       - Biomedical Research
       - Legal Research
       - Environmental and Energy Studies
       - Competitive Programming and Theoretical Computer Science
       - Social Sciences
     """
     def __init__(self) -> None:
         self.processor = EnhancedCognitiveProcessor()
-        # Provide the state schema to the StateGraph constructor.
-        self.workflow = StateGraph(AgentState)
         self._build_workflow()
         self.app = self.workflow.compile()
@@ -55,7 +54,7 @@ class ResearchWorkflow:
         )
         self.workflow.add_edge("validate", END)
         self.workflow.add_edge("refine", "retrieve")
-        # Extended node for multi-modal enhancement.
         self.workflow.add_node("enhance", self.enhance_analysis)
         self.workflow.add_edge("validate", "enhance")
         self.workflow.add_edge("enhance", END)
@@ -63,8 +62,8 @@ class ResearchWorkflow:
     def ingest_query(self, state: Dict) -> Dict:
         try:
             query = state["messages"][-1].content
-            # Get the domain from state; default to Biomedical Research if not provided.
-            domain = state.get("context", {}).get("domain", "Biomedical Research")
             new_context = {
                 "raw_query": query,
                 "domain": domain,
@@ -84,9 +83,8 @@ class ResearchWorkflow:
     def retrieve_documents(self, state: Dict) -> Dict:
         try:
             query = state["context"]["raw_query"]
-            # For demonstration, we use an empty list to simulate retrieval failure.
-            # In a full implementation, integrate a retriever (e.g., via LangChain, LlamaIndex, or a vector DB).
-            docs = []
             logger.info(f"Retrieved {len(docs)} documents for query.")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
@@ -95,7 +93,7 @@ class ResearchWorkflow:
                     "retrieval_time": time.time(),
                     "refine_count": state["context"].get("refine_count", 0),
                     "refinement_history": state["context"].get("refinement_history", []),
-                    "domain": state["context"].get("domain", "Biomedical Research")
                 }
             }
         except Exception as e:
@@ -104,16 +102,18 @@ class ResearchWorkflow:
     def analyze_content(self, state: Dict) -> Dict:
         try:
-            domain = state["context"].get("domain", "Biomedical Research").strip().lower()
             docs = state["context"].get("documents", [])
-            # If documents are present, use their content; otherwise, fall back to the raw query.
             if docs:
                 docs_text = "\n\n".join([d.page_content for d in docs])
             else:
                 docs_text = state["context"].get("raw_query", "")
-                logger.info("No documents retrieved; switching to dynamic synthesis using RAG.")
-            domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
-            # Combine the domain prompt with either retrieved text or raw query.
             full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
                           f"{domain_prompt}\n\n" + \
                           ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
@@ -134,13 +134,13 @@ class ResearchWorkflow:
         try:
             analysis = state["messages"][-1].content
             validation_prompt = (
-                f"Validate the following research analysis:\n{analysis}\n\n"
-                "Check for:\n"
-                "1. Technical accuracy\n"
-                "2. Citation support (are claims backed by evidence?)\n"
                 "3. Logical consistency\n"
                 "4. Methodological soundness\n\n"
-                "Respond with 'VALID: [brief justification]' or 'INVALID: [brief justification]'."
             )
             response = self.processor.process_query(validation_prompt)
             logger.info("Output validation completed.")
@@ -160,15 +160,15 @@ class ResearchWorkflow:
             current_analysis = state["messages"][-1].content
             refinement_history.append(current_analysis)
             difficulty_level = max(0, 3 - state["context"]["refine_count"])
-            domain = state["context"].get("domain", "Biomedical Research")
             logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
             if state["context"]["refine_count"] >= 3:
                 meta_prompt = (
                     f"Domain: {domain}\n"
                     "You are given the following series of refinement outputs:\n" +
                     "\n---\n".join(refinement_history) +
-                    "\n\nSynthesize the above into a final, concise, and high-quality technical analysis report. "
-                    "Focus on the key findings and improvements made across the iterations. Do not introduce new ideas; just synthesize the improvements. Ensure the report is well-structured and easy to understand."
                 )
                 meta_response = self.processor.process_query(meta_prompt)
                 logger.info("Meta-refinement completed.")
@@ -180,12 +180,8 @@ class ResearchWorkflow:
                 refinement_prompt = (
                     f"Domain: {domain}\n"
                     f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
-                    "First, critically evaluate the analysis and identify its weaknesses, such as inaccuracies, unsupported claims, or lack of clarity. Summarize these weaknesses in a short paragraph.\n\n"
-                    "Then, improve the following aspects:\n"
-                    "1. Technical precision\n"
-                    "2. Empirical grounding\n"
-                    "3. Theoretical coherence\n\n"
-                    "Use a structured difficulty gradient approach to produce a simpler yet more accurate variant, addressing the identified weaknesses."
                 )
                 response = self.processor.process_query(refinement_prompt)
                 logger.info("Refinement completed.")
@@ -235,4 +231,3 @@ class ResearchWorkflow:
         except Exception as e:
             logger.exception("Error during multi-modal enhancement.")
             return self._error_state(f"Enhancement Error: {str(e)}")

 class ResearchWorkflow:
     """
+    A multi-step research workflow that leverages Retrieval-Augmented Generation (RAG).
+    Supports domains including:
       - Biomedical Research
       - Legal Research
       - Environmental and Energy Studies
       - Competitive Programming and Theoretical Computer Science
       - Social Sciences
+    This implementation normalizes the domain and uses domain-specific prompts and fallbacks.
     """
     def __init__(self) -> None:
         self.processor = EnhancedCognitiveProcessor()
+        self.workflow = StateGraph(AgentState)  # Supply state schema
         self._build_workflow()
         self.app = self.workflow.compile()
         )
         self.workflow.add_edge("validate", END)
         self.workflow.add_edge("refine", "retrieve")
+        # Extended node for multi-modal enhancement
         self.workflow.add_node("enhance", self.enhance_analysis)
         self.workflow.add_edge("validate", "enhance")
         self.workflow.add_edge("enhance", END)
     def ingest_query(self, state: Dict) -> Dict:
         try:
             query = state["messages"][-1].content
+            # Normalize domain string to lower-case; default to 'biomedical research'
+            domain = state.get("context", {}).get("domain", "Biomedical Research").strip().lower()
             new_context = {
                 "raw_query": query,
                 "domain": domain,
     def retrieve_documents(self, state: Dict) -> Dict:
         try:
             query = state["context"]["raw_query"]
+            # Simulate retrieval; for now, an empty list indicates no external documents found.
+            docs = []
             logger.info(f"Retrieved {len(docs)} documents for query.")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
                     "retrieval_time": time.time(),
                     "refine_count": state["context"].get("refine_count", 0),
                     "refinement_history": state["context"].get("refinement_history", []),
+                    "domain": state["context"].get("domain", "biomedical research")
                 }
             }
         except Exception as e:
     def analyze_content(self, state: Dict) -> Dict:
         try:
+            # Normalize domain and use it for prompt generation
+            domain = state["context"].get("domain", "biomedical research").strip().lower()
             docs = state["context"].get("documents", [])
+            # Use retrieved documents if available; else, use raw query as fallback.
             if docs:
                 docs_text = "\n\n".join([d.page_content for d in docs])
             else:
                 docs_text = state["context"].get("raw_query", "")
+                logger.info("No documents retrieved; using dynamic synthesis (RAG mode).")
+            # Get domain-specific prompt; ensure fallback prompts exist for all supported domains.
+            domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "Consider relevant legal cases and statutory interpretations.")
+            # Build the final prompt with domain tag for clarity.
             full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
                           f"{domain_prompt}\n\n" + \
                           ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
         try:
             analysis = state["messages"][-1].content
             validation_prompt = (
+                f"Validate the following analysis for correctness, clarity, and legal grounding:\n{analysis}\n\n"
+                "Criteria:\n"
+                "1. Technical and legal accuracy\n"
+                "2. Evidence and citation support\n"
                 "3. Logical consistency\n"
                 "4. Methodological soundness\n\n"
+                "Respond with 'VALID: [justification]' or 'INVALID: [justification]'."
             )
             response = self.processor.process_query(validation_prompt)
             logger.info("Output validation completed.")
             current_analysis = state["messages"][-1].content
             refinement_history.append(current_analysis)
             difficulty_level = max(0, 3 - state["context"]["refine_count"])
+            domain = state["context"].get("domain", "biomedical research")
             logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
             if state["context"]["refine_count"] >= 3:
                 meta_prompt = (
                     f"Domain: {domain}\n"
                     "You are given the following series of refinement outputs:\n" +
                     "\n---\n".join(refinement_history) +
+                    "\n\nSynthesize these into a final, concise legal analysis report, highlighting key precedents and statutory interpretations. "
+                    "Focus on improving accuracy and relevance for legal research."
                 )
                 meta_response = self.processor.process_query(meta_prompt)
                 logger.info("Meta-refinement completed.")
                 refinement_prompt = (
                     f"Domain: {domain}\n"
                     f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
+                    "First, identify weaknesses such as lack of legal grounding or misinterpretation of cases. "
+                    "Then, improve the analysis with clear references to legal precedents and statutory language."
                 )
                 response = self.processor.process_query(refinement_prompt)
                 logger.info("Refinement completed.")
         except Exception as e:
             logger.exception("Error during multi-modal enhancement.")
             return self._error_state(f"Enhancement Error: {str(e)}")