- app.py +131 -0
- embedding_service.py +102 -0
- github_repo_downloader.py +132 -0
- graph_converter.py +41 -0
- level_computer.py +57 -0
- modal_client.py +40 -0
- modal_functions.py +109 -0
- prompt_generator.py +165 -0
- requirements.txt +11 -0
app.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv(".env")
|
| 3 |
+
|
| 4 |
+
import uuid
|
| 5 |
+
import ast
|
| 6 |
+
|
| 7 |
+
from llama_index.core.schema import TextNode
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
from github_repo_downloader import GitHubRepoDownloader
|
| 11 |
+
from pyan_insperation.analyzer import CallGraphVisitor
|
| 12 |
+
from graph_converter import pyan_to_networkx
|
| 13 |
+
from level_computer import compute_node_levels
|
| 14 |
+
from prompt_generator import generate_explaination_by_level
|
| 15 |
+
from embedding_service import EmbeddingService
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
from structlog import get_logger
|
| 19 |
+
logger = get_logger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
import gradio as gr
|
| 23 |
+
|
| 24 |
+
local_db = {}
|
| 25 |
+
|
| 26 |
+
def ingest(repo_url, branch="main"):
|
| 27 |
+
"""
|
| 28 |
+
Clone a GitHub repo, parse Python code, build code graph, index it.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
repo_url: public GitHub repo URL.
|
| 32 |
+
branch: branch to index, defaults to "main".
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
project_id: internal ID used to reference the indexed project.
|
| 36 |
+
"""
|
| 37 |
+
repo = GitHubRepoDownloader(
|
| 38 |
+
repo_url=repo_url
|
| 39 |
+
, branch=branch
|
| 40 |
+
)
|
| 41 |
+
yield "repo downloaded"
|
| 42 |
+
|
| 43 |
+
files = repo.read_files(
|
| 44 |
+
file_filter=lambda path: path.endswith(".py")
|
| 45 |
+
)
|
| 46 |
+
yield "pyhton files loaded"
|
| 47 |
+
|
| 48 |
+
pyan_graph = CallGraphVisitor(files=files)
|
| 49 |
+
graph = pyan_to_networkx(pyan_graph=pyan_graph)
|
| 50 |
+
|
| 51 |
+
yield "graph builded"
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
levels = compute_node_levels(graph=graph)
|
| 55 |
+
yield "start generating explination"
|
| 56 |
+
prompts_by_level = generate_explaination_by_level(graph=graph, levels=levels)
|
| 57 |
+
yield " start embedding"
|
| 58 |
+
nodes = []
|
| 59 |
+
for node in graph.nodes:
|
| 60 |
+
if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None:
|
| 61 |
+
continue
|
| 62 |
+
if hasattr(node, "explination"):
|
| 63 |
+
nodes.append(TextNode(
|
| 64 |
+
text=node.explination,
|
| 65 |
+
metadata={
|
| 66 |
+
"name": node.name,
|
| 67 |
+
"filename": node.filename,
|
| 68 |
+
"type": node.flavor.name,
|
| 69 |
+
"namespace": node.namespace
|
| 70 |
+
},
|
| 71 |
+
))
|
| 72 |
+
else:
|
| 73 |
+
nodes.append(TextNode(
|
| 74 |
+
text=ast.unparse(node.ast_node),
|
| 75 |
+
metadata={
|
| 76 |
+
"name": node.name,
|
| 77 |
+
"filename": node.filename,
|
| 78 |
+
"type": node.flavor.name,
|
| 79 |
+
"namespace": node.namespace
|
| 80 |
+
},
|
| 81 |
+
))
|
| 82 |
+
embedding = EmbeddingService("test")
|
| 83 |
+
embedding.prepare_index(nodes)
|
| 84 |
+
project_id = uuid.uuid4().hex
|
| 85 |
+
local_db[project_id]=embedding
|
| 86 |
+
|
| 87 |
+
yield project_id
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def query(project_id, question, top_k=10):
|
| 91 |
+
"""
|
| 92 |
+
Retrieve relevant nodes and send to reasoning LLM.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
project_id: ID returned from ingest().
|
| 96 |
+
question: user question about the codebase.
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
answer: generated explanation or context.
|
| 100 |
+
"""
|
| 101 |
+
retrievers = local_db[project_id].infer(question, top_k=top_k)
|
| 102 |
+
yield retrievers
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
demo = gr.TabbedInterface(
|
| 106 |
+
[
|
| 107 |
+
gr.Interface(
|
| 108 |
+
ingest,
|
| 109 |
+
[
|
| 110 |
+
gr.Textbox(label="repo_url"),
|
| 111 |
+
gr.Textbox(label="branch", value="main"),
|
| 112 |
+
],
|
| 113 |
+
gr.Textbox(label="project_id"),
|
| 114 |
+
),
|
| 115 |
+
gr.Interface(
|
| 116 |
+
query,
|
| 117 |
+
[
|
| 118 |
+
gr.Textbox(label="project_id"),
|
| 119 |
+
gr.Textbox(label="query"),
|
| 120 |
+
gr.Number(value =5, label="top_k", maximum=20, minimum=2),
|
| 121 |
+
],
|
| 122 |
+
gr.Textbox(label="answer"),
|
| 123 |
+
),
|
| 124 |
+
],
|
| 125 |
+
[
|
| 126 |
+
"Ingest Repo",
|
| 127 |
+
"Query Project",
|
| 128 |
+
],
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
demo.launch(mcp_server=True)
|
embedding_service.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
from llama_index.core import VectorStoreIndex
|
| 3 |
+
from llama_index.vector_stores.chroma import ChromaVectorStore
|
| 4 |
+
from llama_index.core import StorageContext, Settings
|
| 5 |
+
from llama_index.core.schema import TextNode
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from modal_client import ModalClient
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
from structlog import get_logger
|
| 11 |
+
|
| 12 |
+
logger = get_logger(__name__)
|
| 13 |
+
|
| 14 |
+
from typing import Any, List
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
from llama_index.core.bridge.pydantic import PrivateAttr
|
| 18 |
+
from llama_index.core.embeddings import BaseEmbedding
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class CustomEmbeddings(BaseEmbedding):
|
| 22 |
+
|
| 23 |
+
def __init__(
|
| 24 |
+
self,
|
| 25 |
+
base_url:str,
|
| 26 |
+
api_key:str,
|
| 27 |
+
model_name: str ,
|
| 28 |
+
**kwargs: Any,
|
| 29 |
+
) -> None:
|
| 30 |
+
super().__init__(**kwargs)
|
| 31 |
+
self._client = OpenAI(
|
| 32 |
+
base_url=base_url,
|
| 33 |
+
api_key=api_key
|
| 34 |
+
)
|
| 35 |
+
self.model_name = model_name
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@classmethod
|
| 39 |
+
def class_name(cls) -> str:
|
| 40 |
+
return "custom"
|
| 41 |
+
|
| 42 |
+
async def _aget_query_embedding(self, query: str) -> List[float]:
|
| 43 |
+
return self._get_query_embedding(query)
|
| 44 |
+
|
| 45 |
+
async def _aget_text_embedding(self, text: str) -> List[float]:
|
| 46 |
+
return self._get_text_embedding(text)
|
| 47 |
+
|
| 48 |
+
def _get_query_embedding(self, query: str) -> List[float]:
|
| 49 |
+
embeddings = self._client.embeddings.create(
|
| 50 |
+
model=self.model_name,
|
| 51 |
+
input=[query]
|
| 52 |
+
).data[0].embedding
|
| 53 |
+
return embeddings
|
| 54 |
+
|
| 55 |
+
def _get_text_embedding(self, text: str) -> List[float]:
|
| 56 |
+
embeddings = self._client.embeddings.create(
|
| 57 |
+
model=self.model_name,
|
| 58 |
+
input=[text]
|
| 59 |
+
).data[0].embedding
|
| 60 |
+
return embeddings
|
| 61 |
+
|
| 62 |
+
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
| 63 |
+
embeddings_data = self._client.embeddings.create(
|
| 64 |
+
model=self.model_name,
|
| 65 |
+
input=texts
|
| 66 |
+
)
|
| 67 |
+
return [embedding.embedding for embedding in embeddings_data.data]
|
| 68 |
+
|
| 69 |
+
class EmbeddingService:
|
| 70 |
+
def __init__(self, collection_name):
|
| 71 |
+
config = ModalClient.embedding_config()
|
| 72 |
+
Settings.embed_model = CustomEmbeddings(
|
| 73 |
+
api_key=config.get("api_key"),
|
| 74 |
+
base_url=config.get("base_url"),
|
| 75 |
+
model_name=config.get("model"),
|
| 76 |
+
embed_batch_size=32
|
| 77 |
+
)
|
| 78 |
+
Settings.chunk_size = 1024
|
| 79 |
+
chroma_client = chromadb.EphemeralClient()
|
| 80 |
+
chroma_collection = chroma_client.create_collection(collection_name)
|
| 81 |
+
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
| 82 |
+
self.storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
| 83 |
+
|
| 84 |
+
def prepare_index(self,nodes):
|
| 85 |
+
|
| 86 |
+
self.index = VectorStoreIndex.from_documents(nodes, storage_context=self.storage_context)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def infer(self, query, top_k=10):
|
| 90 |
+
retriever = self.index.as_retriever(similarity_top_k=top_k)
|
| 91 |
+
results = retriever.retrieve(query)
|
| 92 |
+
text = ""
|
| 93 |
+
for result in results:
|
| 94 |
+
text += "\n -------------------------- \n"
|
| 95 |
+
text += f"name = {result.metadata['name']}\n"
|
| 96 |
+
text += f"filename = {result.metadata['filename']}\n"
|
| 97 |
+
text += f"type = {result.metadata['type']}\n"
|
| 98 |
+
text += f"namespace = {result.metadata['namespace']}\n"
|
| 99 |
+
text += f"content = {result.text}\n"
|
| 100 |
+
return text
|
| 101 |
+
|
| 102 |
+
|
github_repo_downloader.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Callable, Dict, Optional
|
| 5 |
+
from urllib.parse import urlparse
|
| 6 |
+
|
| 7 |
+
from structlog import get_logger
|
| 8 |
+
import requests
|
| 9 |
+
import zipfile
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
logger = get_logger(__name__)
|
| 13 |
+
|
| 14 |
+
class GitHubRepoDownloader:
|
| 15 |
+
def __init__(self, repo_url: str, branch: str = "main", cache_dir: str = ".cache"):
|
| 16 |
+
"""
|
| 17 |
+
Initialize downloader with a GitHub repo URL.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
repo_url: Full GitHub repo URL (e.g., https://github.com/owner/repo)
|
| 21 |
+
branch: Branch name to download (default: main)
|
| 22 |
+
cache_dir: Directory to cache downloaded files
|
| 23 |
+
"""
|
| 24 |
+
self.owner, self.repo = self._parse_repo_url(repo_url)
|
| 25 |
+
self.branch = branch
|
| 26 |
+
self.cache_dir = Path(cache_dir)
|
| 27 |
+
self.cache_dir.mkdir(exist_ok=True)
|
| 28 |
+
self._validate_branch()
|
| 29 |
+
|
| 30 |
+
def _parse_repo_url(self, repo_url: str) -> tuple[str, str]:
|
| 31 |
+
"""Extract owner and repo name from GitHub URL"""
|
| 32 |
+
|
| 33 |
+
repo_url = repo_url.rstrip('/').replace('.git', '')
|
| 34 |
+
|
| 35 |
+
# Validate it's a GitHub URL
|
| 36 |
+
parsed = urlparse(repo_url)
|
| 37 |
+
if 'github.com' not in repo_url:
|
| 38 |
+
message = f"Not a GitHub URL: {repo_url}"
|
| 39 |
+
logger.error(message)
|
| 40 |
+
raise ValueError(message)
|
| 41 |
+
|
| 42 |
+
parts = repo_url.split('/')
|
| 43 |
+
if len(parts) < 2:
|
| 44 |
+
message = f"Invalid GitHub URL format: {repo_url}"
|
| 45 |
+
logger.error(message)
|
| 46 |
+
raise ValueError(message)
|
| 47 |
+
|
| 48 |
+
repo = parts[-1]
|
| 49 |
+
owner = parts[-2]
|
| 50 |
+
|
| 51 |
+
return owner, repo
|
| 52 |
+
|
| 53 |
+
def _validate_branch(self) -> None:
|
| 54 |
+
"""Validate that the branch exists in the repository"""
|
| 55 |
+
url = f"https://api.github.com/repos/{self.owner}/{self.repo}/branches/{self.branch}"
|
| 56 |
+
logger.info(f"Validating branch: {self.branch}")
|
| 57 |
+
|
| 58 |
+
response = requests.get(url)
|
| 59 |
+
if response.status_code == 404:
|
| 60 |
+
message = f"Branch '{self.branch}' not found in {self.owner}/{self.repo}"
|
| 61 |
+
logger.error(message)
|
| 62 |
+
raise ValueError(message)
|
| 63 |
+
response.raise_for_status()
|
| 64 |
+
|
| 65 |
+
def _get_cache_path(self) -> Path:
|
| 66 |
+
"""Get the cache file path for this repo"""
|
| 67 |
+
return self.cache_dir / f"{self.owner}_{self.repo}_{self.branch}.zip"
|
| 68 |
+
|
| 69 |
+
def _download_zip(self) -> Path:
|
| 70 |
+
"""Download repo ZIP to cache"""
|
| 71 |
+
cache_path = self._get_cache_path()
|
| 72 |
+
|
| 73 |
+
# Return cached file if exists
|
| 74 |
+
if cache_path.exists():
|
| 75 |
+
logger.info(f"Using cached file: {cache_path}")
|
| 76 |
+
return cache_path
|
| 77 |
+
|
| 78 |
+
# Download ZIP
|
| 79 |
+
url = f"https://github.com/{self.owner}/{self.repo}/archive/refs/heads/{self.branch}.zip"
|
| 80 |
+
logger.info(f"Downloading {self.owner}/{self.repo} (branch: {self.branch})...")
|
| 81 |
+
|
| 82 |
+
response = requests.get(url)
|
| 83 |
+
response.raise_for_status()
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
with open(cache_path, 'wb') as f:
|
| 87 |
+
f.write(response.content)
|
| 88 |
+
|
| 89 |
+
logger.info(f"Saved to cache: {cache_path}")
|
| 90 |
+
return cache_path
|
| 91 |
+
|
| 92 |
+
def read_files(self, file_filter: Optional[Callable[[str], bool]] = None) -> Dict[str, str]:
|
| 93 |
+
"""
|
| 94 |
+
Read files from the repo without extracting.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
file_filter: Optional function to filter files (e.g., lambda path: path.endswith('.py'))
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
Dictionary mapping file paths to their contents
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
cache_path = self._download_zip()
|
| 105 |
+
|
| 106 |
+
files_content = {}
|
| 107 |
+
|
| 108 |
+
with zipfile.ZipFile(cache_path) as zip_file:
|
| 109 |
+
for filename in zip_file.namelist():
|
| 110 |
+
|
| 111 |
+
if filename.endswith('/'):
|
| 112 |
+
continue
|
| 113 |
+
|
| 114 |
+
# Remove root folder (format: repo-branch/path/to/file)
|
| 115 |
+
clean_path = '/'.join(filename.split('/')[1:])
|
| 116 |
+
if not clean_path:
|
| 117 |
+
continue
|
| 118 |
+
|
| 119 |
+
# Apply filter
|
| 120 |
+
if file_filter and not file_filter(clean_path):
|
| 121 |
+
continue
|
| 122 |
+
|
| 123 |
+
logger.info(f"Reading: {clean_path}")
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
with zip_file.open(filename) as file:
|
| 127 |
+
content = file.read().decode('utf-8', errors='ignore')
|
| 128 |
+
files_content[clean_path] = content
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.exception(f"⚠️ Error reading {clean_path}: {e}")
|
| 131 |
+
|
| 132 |
+
return files_content
|
graph_converter.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Graph Converter Module
|
| 3 |
+
|
| 4 |
+
Converts Pyan call graphs to NetworkX directed graphs for further processing.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import networkx as nx
|
| 8 |
+
from pyan_insperation.analyzer import CallGraphVisitor
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def pyan_to_networkx(pyan_graph: CallGraphVisitor) -> nx.DiGraph:
|
| 12 |
+
"""
|
| 13 |
+
Convert a Pyan call graph to a NetworkX directed graph.
|
| 14 |
+
|
| 15 |
+
This function processes both defines_edges (containment relationships like
|
| 16 |
+
class-contains-method) and uses_edges (usage relationships like function-calls-function)
|
| 17 |
+
from the Pyan analyzer and creates a unified NetworkX graph with labeled edges.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
pyan_graph: CallGraphVisitor instance with defines_edges and uses_edges
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
nx.DiGraph with nodes and labeled edges ("contains" or "use")
|
| 24 |
+
"""
|
| 25 |
+
graph = nx.DiGraph()
|
| 26 |
+
|
| 27 |
+
# Process defines_edges - containment relationships
|
| 28 |
+
for node in pyan_graph.defines_edges.keys():
|
| 29 |
+
graph.add_node(node)
|
| 30 |
+
for defined_node in pyan_graph.defines_edges[node]:
|
| 31 |
+
graph.add_node(defined_node)
|
| 32 |
+
graph.add_edge(node, defined_node, label="contains")
|
| 33 |
+
|
| 34 |
+
# Process uses_edges - usage relationships
|
| 35 |
+
for node in pyan_graph.uses_edges.keys():
|
| 36 |
+
graph.add_node(node)
|
| 37 |
+
for used_node in pyan_graph.uses_edges[node]:
|
| 38 |
+
graph.add_node(used_node)
|
| 39 |
+
graph.add_edge(node, used_node, label="use")
|
| 40 |
+
|
| 41 |
+
return graph
|
level_computer.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Level Computer Module
|
| 3 |
+
|
| 4 |
+
Computes dependency levels for graph nodes to enable efficient batching for LLM processing.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import networkx as nx
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def compute_node_levels(graph: nx.DiGraph) -> dict:
|
| 11 |
+
"""
|
| 12 |
+
Compute the level for each node based on successor depth.
|
| 13 |
+
|
| 14 |
+
Nodes are assigned levels based on their position in the dependency graph:
|
| 15 |
+
- Level 0: nodes with no successors (leaf nodes)
|
| 16 |
+
- Level N: 1 + max(successor levels)
|
| 17 |
+
|
| 18 |
+
This function handles cycles by condensing the graph into strongly connected
|
| 19 |
+
components before computing levels.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
graph: NetworkX directed graph
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Dictionary mapping each node to its level (int)
|
| 26 |
+
"""
|
| 27 |
+
# Condense the graph to handle strongly connected components (cycles)
|
| 28 |
+
C_graph = nx.condensation(graph)
|
| 29 |
+
scc_map = C_graph.graph['mapping']
|
| 30 |
+
|
| 31 |
+
levels = {}
|
| 32 |
+
|
| 33 |
+
def level(node):
|
| 34 |
+
"""Recursively compute level with memoization."""
|
| 35 |
+
if node in levels:
|
| 36 |
+
return levels[node]
|
| 37 |
+
|
| 38 |
+
succ = list(C_graph.successors(node))
|
| 39 |
+
|
| 40 |
+
if not succ: # No outgoing edges → level 0
|
| 41 |
+
levels[node] = 0
|
| 42 |
+
else:
|
| 43 |
+
levels[node] = 1 + max(level(s) for s in succ)
|
| 44 |
+
|
| 45 |
+
return levels[node]
|
| 46 |
+
|
| 47 |
+
# Compute levels for all nodes in condensed graph
|
| 48 |
+
for node in C_graph.nodes():
|
| 49 |
+
level(node)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
node_to_level = {node: levels[scc_map[node]] for node in graph.nodes()}
|
| 53 |
+
|
| 54 |
+
level_to_node = {}
|
| 55 |
+
for node, level in node_to_level.items():
|
| 56 |
+
level_to_node.setdefault(level, []).append(node)
|
| 57 |
+
return level_to_node
|
modal_client.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 4 |
+
|
| 5 |
+
# Define your models
|
| 6 |
+
EXPLANATION_MODEL = "Qwen/Qwen3-4B-Instruct-2507"
|
| 7 |
+
EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-8B"
|
| 8 |
+
|
| 9 |
+
class ModalClient:
|
| 10 |
+
|
| 11 |
+
@staticmethod
|
| 12 |
+
def infer_llm(prompts: list[str], max_tokens: int = 800):
|
| 13 |
+
client = OpenAI(
|
| 14 |
+
base_url=os.environ.get("MODAL_URL_LLM_INFERENCE"),
|
| 15 |
+
api_key=os.environ.get('VLLM_API_KEY', 'not-needed')
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
def process_one(prompt):
|
| 19 |
+
|
| 20 |
+
response = client.chat.completions.create(
|
| 21 |
+
model=EXPLANATION_MODEL,
|
| 22 |
+
messages=[{"role": "user", "content": prompt}],
|
| 23 |
+
max_tokens=max_tokens
|
| 24 |
+
)
|
| 25 |
+
return response.choices[0].message.content
|
| 26 |
+
|
| 27 |
+
with ThreadPoolExecutor(max_workers=32) as executor:
|
| 28 |
+
results = list(executor.map(process_one, prompts))
|
| 29 |
+
|
| 30 |
+
return results
|
| 31 |
+
|
| 32 |
+
@staticmethod
|
| 33 |
+
def embedding_config():
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
return {
|
| 37 |
+
"base_url":os.environ.get("MODAL_URL_LLM_EMBEDDING"),
|
| 38 |
+
"api_key":os.environ.get('VLLM_API_KEY', 'not-needed'),
|
| 39 |
+
"model":EMBEDDING_MODEL
|
| 40 |
+
}
|
modal_functions.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import modal
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
app = modal.App("code-understanding")
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
import aiohttp
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
vllm_image = (
|
| 14 |
+
modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.12")
|
| 15 |
+
.entrypoint([])
|
| 16 |
+
.uv_pip_install(
|
| 17 |
+
"vllm==0.11.2",
|
| 18 |
+
"huggingface-hub==0.36.0",
|
| 19 |
+
"flashinfer-python==0.5.2",
|
| 20 |
+
)
|
| 21 |
+
.env({"HF_XET_HIGH_PERFORMANCE": "1"}) # faster model transfers
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Configuration
|
| 26 |
+
EXPLANATION_MODEL = os.environ.get("EXPLANATION_MODEL", "Qwen/Qwen3-4B-Instruct-2507")
|
| 27 |
+
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-8B")
|
| 28 |
+
VLLM_PORT = 8000
|
| 29 |
+
MINUTES = 60
|
| 30 |
+
N_GPU=1
|
| 31 |
+
FAST_BOOT=True
|
| 32 |
+
|
| 33 |
+
@app.function(image=vllm_image,
|
| 34 |
+
gpu=f"A10:{N_GPU}",
|
| 35 |
+
scaledown_window=55 * MINUTES, # how long should we stay up with no requests?
|
| 36 |
+
timeout=10 * MINUTES, # how long should we wait for container start?
|
| 37 |
+
secrets=[modal.Secret.from_name("vllm-auth")]
|
| 38 |
+
)
|
| 39 |
+
@modal.concurrent(
|
| 40 |
+
max_inputs=32
|
| 41 |
+
)
|
| 42 |
+
@modal.web_server(port=VLLM_PORT, startup_timeout=10 * MINUTES)
|
| 43 |
+
def explain_code_batch():
|
| 44 |
+
import subprocess
|
| 45 |
+
|
| 46 |
+
cmd = [
|
| 47 |
+
"vllm",
|
| 48 |
+
"serve",
|
| 49 |
+
"--uvicorn-log-level=info",
|
| 50 |
+
EXPLANATION_MODEL,
|
| 51 |
+
"--served-model-name",
|
| 52 |
+
EXPLANATION_MODEL,
|
| 53 |
+
|
| 54 |
+
"--host",
|
| 55 |
+
"0.0.0.0",
|
| 56 |
+
"--port",
|
| 57 |
+
str(VLLM_PORT),
|
| 58 |
+
"--max-model-len", "40000"
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
cmd += ["--enforce-eager" if FAST_BOOT else "--no-enforce-eager"]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
cmd += ["--tensor-parallel-size", str(N_GPU)]
|
| 66 |
+
|
| 67 |
+
print(cmd)
|
| 68 |
+
|
| 69 |
+
subprocess.Popen(" ".join(cmd), shell=True)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@app.function(image=vllm_image,
|
| 73 |
+
gpu=f"A10:{N_GPU}",
|
| 74 |
+
scaledown_window=55 * MINUTES,
|
| 75 |
+
timeout=10 * MINUTES,
|
| 76 |
+
secrets=[modal.Secret.from_name("vllm-auth")])
|
| 77 |
+
@modal.concurrent(
|
| 78 |
+
max_inputs=32
|
| 79 |
+
)
|
| 80 |
+
@modal.web_server(port=VLLM_PORT, startup_timeout=10 * MINUTES)
|
| 81 |
+
def generate_embeddings_batch():
|
| 82 |
+
import subprocess
|
| 83 |
+
|
| 84 |
+
cmd = [
|
| 85 |
+
"vllm",
|
| 86 |
+
"serve",
|
| 87 |
+
"--uvicorn-log-level=info",
|
| 88 |
+
EMBEDDING_MODEL,
|
| 89 |
+
"--served-model-name",
|
| 90 |
+
EMBEDDING_MODEL,
|
| 91 |
+
|
| 92 |
+
"--host",
|
| 93 |
+
"0.0.0.0",
|
| 94 |
+
"--port",
|
| 95 |
+
str(VLLM_PORT),
|
| 96 |
+
"--task",
|
| 97 |
+
"embedding",
|
| 98 |
+
"--max-model-len", "40000"
|
| 99 |
+
]
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
cmd += ["--enforce-eager" if FAST_BOOT else "--no-enforce-eager"]
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
cmd += ["--tensor-parallel-size", str(N_GPU)]
|
| 106 |
+
|
| 107 |
+
print(cmd)
|
| 108 |
+
|
| 109 |
+
subprocess.Popen(" ".join(cmd), shell=True)
|
prompt_generator.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Prompt Generator Module
|
| 3 |
+
|
| 4 |
+
Generates structured prompts for LLM-based code explanation organized by node level.
|
| 5 |
+
"""
|
| 6 |
+
import re
|
| 7 |
+
import ast
|
| 8 |
+
import copy
|
| 9 |
+
import networkx as nx
|
| 10 |
+
from structlog import get_logger
|
| 11 |
+
from modal_client import ModalClient
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
logger = get_logger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def generate_explaination_by_level(graph: nx.DiGraph, levels: dict) -> dict[int, dict]:
|
| 19 |
+
"""
|
| 20 |
+
Generate LLM prompts organized by node level.
|
| 21 |
+
|
| 22 |
+
Creates prompts for each node that include:
|
| 23 |
+
- File path
|
| 24 |
+
- Used modules (name + content from graph successors)
|
| 25 |
+
- Node content (unparsed AST)
|
| 26 |
+
|
| 27 |
+
Nodes without a namespace are skipped as they typically represent
|
| 28 |
+
external or incomplete references.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
graph: NetworkX directed graph with code nodes
|
| 32 |
+
levels: Dictionary mapping nodes to their levels
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
Dictionary mapping level → {node: prompt_string}
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# Generate prompts for each level
|
| 40 |
+
prompts_by_level = {}
|
| 41 |
+
|
| 42 |
+
for level in range(max(levels.keys()) + 1):
|
| 43 |
+
if level not in levels:
|
| 44 |
+
continue
|
| 45 |
+
|
| 46 |
+
batch = {}
|
| 47 |
+
|
| 48 |
+
for node in levels[level]:
|
| 49 |
+
if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None:
|
| 50 |
+
continue
|
| 51 |
+
|
| 52 |
+
if len(ast.unparse(node.ast_node))<1000:
|
| 53 |
+
|
| 54 |
+
continue
|
| 55 |
+
prompt = prompt = """You are a Python code analysis expert.
|
| 56 |
+
|
| 57 |
+
**CRITICAL RULES:**
|
| 58 |
+
1. ONLY use information directly visible in the "TARGET CODE" section
|
| 59 |
+
2. For methods marked as "[SUMMARIZED]", reference them by their actual name shown
|
| 60 |
+
3. If a method body is replaced with a summary, DO NOT invent details about its implementation
|
| 61 |
+
4. State "implementation details not shown" for summarized methods
|
| 62 |
+
|
| 63 |
+
Your explanation must be brief and cover:
|
| 64 |
+
- Purpose: What this code does (1-2 sentences)
|
| 65 |
+
- Inputs: Parameters (only those visible)
|
| 66 |
+
- Outputs: Return values (only those visible)
|
| 67 |
+
- Exceptions: Only exceptions explicitly raised in the visible code (1 sentence)
|
| 68 |
+
|
| 69 |
+
"""
|
| 70 |
+
node_copy = copy.deepcopy(node)
|
| 71 |
+
|
| 72 |
+
# Extract used modules from graph successors
|
| 73 |
+
used_modules = []
|
| 74 |
+
summarized_methods = []
|
| 75 |
+
for used_node in graph.successors(node):
|
| 76 |
+
if used_node.namespace is None or used_node.get_short_name() in ["lambda" ] or used_node.ast_node is None:# this will ignore Python built-in functions
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
label = graph.get_edge_data(node, used_node).get("label")
|
| 80 |
+
|
| 81 |
+
# Only include "use" edges, skip "contains" edges
|
| 82 |
+
if used_node.ast_node is None:
|
| 83 |
+
continue
|
| 84 |
+
elif label == 'contains':
|
| 85 |
+
if used_node.ast_node in node.ast_node.body and\
|
| 86 |
+
hasattr(used_node,"explination"):
|
| 87 |
+
if isinstance(used_node.ast_node, ast.FunctionDef):
|
| 88 |
+
# Keep function signature visible
|
| 89 |
+
signature = f"def {used_node.ast_node.name}({ast.unparse(used_node.ast_node.args)})"
|
| 90 |
+
if used_node.ast_node.returns:
|
| 91 |
+
signature += f" -> {ast.unparse(used_node.ast_node.returns)}"
|
| 92 |
+
|
| 93 |
+
marker_text = f"""[SUMMARIZED METHOD]
|
| 94 |
+
Method: {used_node.name}
|
| 95 |
+
Signature: {signature}
|
| 96 |
+
Summary: {used_node.explination}
|
| 97 |
+
Note: Full implementation replaced for brevity"""
|
| 98 |
+
|
| 99 |
+
elif isinstance(used_node.ast_node, ast.ClassDef):
|
| 100 |
+
marker_text = f"""[SUMMARIZED CLASS]
|
| 101 |
+
Class: {used_node.name}
|
| 102 |
+
Summary: {used_node.explination}
|
| 103 |
+
Note: Full implementation replaced for brevity"""
|
| 104 |
+
|
| 105 |
+
else:
|
| 106 |
+
marker_text = f"""[SUMMARIZED]
|
| 107 |
+
Name: {used_node.name}
|
| 108 |
+
Summary: {used_node.explination}"""
|
| 109 |
+
new_child = ast.Expr(value=ast.Constant(value=marker_text))
|
| 110 |
+
for i, child in enumerate(node.ast_node.body):
|
| 111 |
+
if child == used_node.ast_node:
|
| 112 |
+
node_copy.ast_node.body[i] = new_child
|
| 113 |
+
summarized_methods.append(used_node.name)
|
| 114 |
+
break
|
| 115 |
+
pass
|
| 116 |
+
elif hasattr(used_node,"explination") is False:
|
| 117 |
+
pass
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
elif label == 'use':
|
| 122 |
+
|
| 123 |
+
used_modules.append(used_node)
|
| 124 |
+
|
| 125 |
+
# Build the prompt
|
| 126 |
+
prompt += f"**Target File Path:** {node.filename}\n\n"
|
| 127 |
+
logger.info(f"used modules numers {len(used_modules)}")
|
| 128 |
+
if used_modules:
|
| 129 |
+
if len(used_modules) > 20:
|
| 130 |
+
pass
|
| 131 |
+
prompt += "**External Dependencies Used:**\n"
|
| 132 |
+
for used_node in used_modules:
|
| 133 |
+
if hasattr(used_node, "explination"):
|
| 134 |
+
prompt += f"""- **{used_node.name}** [EXPLAINED]
|
| 135 |
+
- File: {used_node.filename}
|
| 136 |
+
- Explanation: {used_node.explination}"""
|
| 137 |
+
else:
|
| 138 |
+
prompt += f"""- **{used_node.name}**
|
| 139 |
+
- File: {used_node.filename}
|
| 140 |
+
- Python Code: {ast.unparse(used_node.ast_node)}"""
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
if summarized_methods:
|
| 148 |
+
prompt += f"**Note:** The following methods are summarized in the code below: {', '.join(summarized_methods)}\n\n"
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
prompt += f"""**TARGET CODE:**
|
| 152 |
+
```python
|
| 153 |
+
{ast.unparse(node_copy.ast_node)}
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
Explain the TARGET CODE above and Brief and precise
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
batch[node] = prompt
|
| 160 |
+
|
| 161 |
+
if batch:
|
| 162 |
+
results = ModalClient.infer_llm(batch.values())
|
| 163 |
+
for index, node in enumerate(batch.keys()):
|
| 164 |
+
node.explination = results[index]
|
| 165 |
+
return prompts_by_level
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp]
|
| 2 |
+
structlog
|
| 3 |
+
requests
|
| 4 |
+
networkx
|
| 5 |
+
matplotlib
|
| 6 |
+
modal
|
| 7 |
+
openai
|
| 8 |
+
python-dotenv
|
| 9 |
+
llama-index-vector-stores-chroma
|
| 10 |
+
llama-index
|
| 11 |
+
|