File size: 6,361 Bytes
e147c33
b87b5f6
 
e147c33
c43538b
 
4ec4b26
 
 
c43538b
4ec4b26
b87b5f6
e147c33
b87b5f6
c43538b
b87b5f6
 
e147c33
 
b87b5f6
e147c33
 
c43538b
b87b5f6
4ec4b26
b87b5f6
 
4ec4b26
b87b5f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ec4b26
b87b5f6
4ec4b26
 
 
b87b5f6
4ec4b26
b87b5f6
 
4ec4b26
b87b5f6
e147c33
4ec4b26
 
e147c33
 
 
 
b87b5f6
e147c33
 
4ec4b26
e147c33
b87b5f6
e147c33
b87b5f6
c43538b
4ec4b26
e147c33
c43538b
b87b5f6
c43538b
b87b5f6
4ec4b26
e147c33
 
4ec4b26
 
e147c33
 
 
 
 
 
4ec4b26
 
e147c33
 
 
 
 
 
4ec4b26
 
e147c33
 
 
 
 
 
4ec4b26
 
e147c33
 
 
 
 
 
4ec4b26
 
e147c33
 
 
 
 
 
 
b87b5f6
c43538b
b87b5f6
e147c33
b87b5f6
4ec4b26
b87b5f6
4ec4b26
 
 
 
 
 
e147c33
 
4ec4b26
 
e147c33
 
 
 
4ec4b26
 
e147c33
 
 
 
4ec4b26
 
e147c33
 
 
 
b87b5f6
4ec4b26
e147c33
 
 
 
4ec4b26
 
e147c33
 
 
 
 
b87b5f6
c43538b
b87b5f6
e147c33
a47d7d2
e147c33
 
 
4ec4b26
e147c33
 
4ec4b26
e147c33
 
a47d7d2
e147c33
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import os
import requests
from typing import List
from crewai import Agent, Task, Crew, Process
from dotenv import load_dotenv

# ✅ Gemini client (modern import)
from google import genai

load_dotenv()

# ---------------------------------
# CONFIG
# ---------------------------------
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")

if not GOOGLE_API_KEY:
    raise RuntimeError("❌ Missing GOOGLE_API_KEY — get one at https://aistudio.google.com")

client = genai.Client(api_key=GOOGLE_API_KEY)

# ---------------------------------
# SIMPLE GITHUB FETCHER (no embeddings)
# ---------------------------------
def fetch_repo_files(repo_url: str, max_files: int = 10) -> List[str]:
    """Fetch a few code/text files from a GitHub repo using the REST API."""
    try:
        owner_repo = repo_url.strip().split("github.com/")[-1]
        api_url = f"https://api.github.com/repos/{owner_repo}/contents"
        headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
        response = requests.get(api_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        files = []
        for f in data:
            if f["type"] == "file" and f["name"].endswith((".py", ".js", ".ts", ".md")):
                files.append(f["download_url"])
            if len(files) >= max_files:
                break
        return files
    except Exception as e:
        return [f"⚠️ Error fetching repo: {e}"]


def fetch_file_content(url: str) -> str:
    """Fetch raw file text safely."""
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        return res.text
    except Exception as e:
        return f"⚠️ Could not fetch {url}\nError: {e}"

# ---------------------------------
# GEMINI WRAPPER
# ---------------------------------
class GeminiLLM:
    """CrewAI-compatible Gemini LLM wrapper."""
    def __init__(self, model: str):
        self.model = model

    def generate(self, prompt: str) -> str:
        try:
            res = client.models.generate_content(
                model=self.model,
                contents=prompt,
                generation_config={"temperature": 0.6, "max_output_tokens": 2048},
            )
            return res.text
        except Exception as e:
            return f"⚠️ Gemini Error: {e}"

# instantiate global LLM
gemini_llm = GeminiLLM(MODEL_NAME)

# ---------------------------------
# AGENTS
# ---------------------------------
def make_agents(repo_url: str):
    repo_mapper = Agent(
        role="Repository Mapper",
        goal="Map project structure, detect tech stack, and summarize key components.",
        backstory="You analyze folder trees and dependencies for architecture insights.",
        llm=gemini_llm,
        verbose=True,
    )

    code_reviewer = Agent(
        role="Code Reviewer",
        goal="Perform pragmatic code reviews with clear, actionable feedback.",
        backstory="An experienced engineer providing concise improvement tips.",
        llm=gemini_llm,
        verbose=True,
    )

    security_auditor = Agent(
        role="Security Auditor",
        goal="Find and describe security issues, secrets, or risky dependencies.",
        backstory="You think like an attacker but document like a pro auditor.",
        llm=gemini_llm,
        verbose=True,
    )

    doc_explainer = Agent(
        role="Documentation Explainer",
        goal="Explain repository purpose, architecture, and how to run or contribute.",
        backstory="You make complex projects understandable for new contributors.",
        llm=gemini_llm,
        verbose=True,
    )

    manager = Agent(
        role="Engineering Manager",
        goal="Coordinate all other agents and compile a cohesive final report.",
        backstory="A seasoned manager merging all insights into one structured summary.",
        allow_delegation=True,
        llm=gemini_llm,
        verbose=True,
    )

    return repo_mapper, code_reviewer, security_auditor, doc_explainer, manager

# ---------------------------------
# TASKS
# ---------------------------------
def make_tasks(repo_url: str, brief: str = ""):
    repo_files = fetch_repo_files(repo_url)
    file_contents = "\n\n".join(fetch_file_content(f) for f in repo_files[:5])

    context = (
        f"Repository: {repo_url}\n"
        f"{'Brief: ' + brief if brief else ''}\n"
        f"Fetched files: {', '.join(repo_files[:5])}\n\n"
        f"{file_contents[:5000]}"
    )

    t_map = Task(
        description=f"{context}\n\nMap structure, dependencies, and key technologies.",
        expected_output="Markdown summary: Structure | Frameworks | Key Files.",
        agent_role="Repository Mapper",
    )

    t_review = Task(
        description=f"{context}\n\nPerform a detailed review and suggest refactors.",
        expected_output="Code review bullets grouped by improvement type.",
        agent_role="Code Reviewer",
    )

    t_sec = Task(
        description=f"{context}\n\nPerform security audit of visible files.",
        expected_output="Table: Issue | Evidence | Risk | Fix.",
        agent_role="Security Auditor",
    )

    t_doc = Task(
        description=f"{context}\n\nExplain what this repo does and how to run it.",
        expected_output="Architecture overview + Quickstart guide.",
        agent_role="Documentation Explainer",
    )

    t_merge = Task(
        description="Merge all reports into one well-structured Markdown file with title, TOC, and clear sections.",
        expected_output="Final cohesive Markdown report.",
        agent_role="Engineering Manager",
    )

    return t_map, t_review, t_sec, t_doc, t_merge

# ---------------------------------
# RUNNER
# ---------------------------------
def run_repo_review(repo_url: str, brief: str = "") -> str:
    repo_mapper, reviewer, auditor, explainer, manager = make_agents(repo_url)
    t_map, t_review, t_sec, t_doc, t_merge = make_tasks(repo_url, brief)

    crew = Crew(
        agents=[repo_mapper, reviewer, auditor, explainer],  # manager excluded
        tasks=[t_map, t_review, t_sec, t_doc, t_merge],
        process=Process.hierarchical,
        manager_agent=manager,
        verbose=True,
    )

    result = crew.kickoff()
    return str(result)