CodeMode Agent commited on
Commit ·
0498daa
1
Parent(s): fbf5c49
Deploy CodeMode via Agent
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# CodeMode Comparison Demo - Last updated: 2026-02-11 23:59
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
import torch.nn.functional as F
|
|
@@ -115,6 +114,125 @@ def list_finetuned_files():
|
|
| 115 |
except Exception as e:
|
| 116 |
return [[f"Error: {str(e)}", "-", "-"]]
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
# --- Search Functions ---
|
| 119 |
def search_baseline(query, top_k=5):
|
| 120 |
if baseline_collection.count() == 0: return []
|
|
@@ -600,6 +718,66 @@ with gr.Blocks(theme=theme, css=css, title="CodeMode - Baseline vs Fine-tuned")
|
|
| 600 |
value=[["No data yet", "-", "-"]]
|
| 601 |
)
|
| 602 |
inspect_finetuned_btn.click(list_finetuned_files, inputs=[], outputs=finetuned_files_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
|
| 604 |
# TAB 2: COMPARISON SEARCH
|
| 605 |
with gr.Tab("2. Comparison Search (Note: Semantic search is sensitive to query phrasing)"):
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
import torch.nn.functional as F
|
|
|
|
| 114 |
except Exception as e:
|
| 115 |
return [[f"Error: {str(e)}", "-", "-"]]
|
| 116 |
|
| 117 |
+
# --- Chunk Inspector Functions ---
|
| 118 |
+
def get_files_list_baseline():
|
| 119 |
+
"""Get list of unique files in baseline collection"""
|
| 120 |
+
try:
|
| 121 |
+
data = baseline_collection.get(include=["metadatas"])
|
| 122 |
+
if not data['metadatas']:
|
| 123 |
+
return []
|
| 124 |
+
files = list(set([m.get("file_name", "unknown") for m in data['metadatas']]))
|
| 125 |
+
return sorted(files)
|
| 126 |
+
except Exception as e:
|
| 127 |
+
return []
|
| 128 |
+
|
| 129 |
+
def get_files_list_finetuned():
|
| 130 |
+
"""Get list of unique files in fine-tuned collection"""
|
| 131 |
+
try:
|
| 132 |
+
data = finetuned_collection.get(include=["metadatas"])
|
| 133 |
+
if not data['metadatas']:
|
| 134 |
+
return []
|
| 135 |
+
files = list(set([m.get("file_name", "unknown") for m in data['metadatas']]))
|
| 136 |
+
return sorted(files)
|
| 137 |
+
except Exception as e:
|
| 138 |
+
return []
|
| 139 |
+
|
| 140 |
+
def get_chunks_for_file_baseline(file_name):
|
| 141 |
+
"""Get all chunks for a specific file from baseline collection"""
|
| 142 |
+
if not file_name:
|
| 143 |
+
return {"error": "No file selected"}
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
data = baseline_collection.get(
|
| 147 |
+
where={"file_name": file_name},
|
| 148 |
+
include=["documents", "metadatas", "embeddings"]
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
if not data['documents']:
|
| 152 |
+
return {"error": "No chunks found for this file"}
|
| 153 |
+
|
| 154 |
+
chunks = []
|
| 155 |
+
for i, (doc, meta) in enumerate(zip(data['documents'], data['metadatas'])):
|
| 156 |
+
chunks.append({
|
| 157 |
+
"chunk_id": i + 1,
|
| 158 |
+
"content": doc[:500] + "..." if len(doc) > 500 else doc,
|
| 159 |
+
"full_length": len(doc),
|
| 160 |
+
"metadata": meta,
|
| 161 |
+
"embedding_dim": len(data['embeddings'][i]) if data['embeddings'] else 0
|
| 162 |
+
})
|
| 163 |
+
|
| 164 |
+
return {
|
| 165 |
+
"file_name": file_name,
|
| 166 |
+
"total_chunks": len(chunks),
|
| 167 |
+
"chunks": chunks
|
| 168 |
+
}
|
| 169 |
+
except Exception as e:
|
| 170 |
+
return {"error": str(e)}
|
| 171 |
+
|
| 172 |
+
def get_chunks_for_file_finetuned(file_name):
|
| 173 |
+
"""Get all chunks for a specific file from fine-tuned collection"""
|
| 174 |
+
if not file_name:
|
| 175 |
+
return {"error": "No file selected"}
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
data = finetuned_collection.get(
|
| 179 |
+
where={"file_name": file_name},
|
| 180 |
+
include=["documents", "metadatas", "embeddings"]
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
if not data['documents']:
|
| 184 |
+
return {"error": "No chunks found for this file"}
|
| 185 |
+
|
| 186 |
+
chunks = []
|
| 187 |
+
for i, (doc, meta) in enumerate(zip(data['documents'], data['metadatas'])):
|
| 188 |
+
chunks.append({
|
| 189 |
+
"chunk_id": i + 1,
|
| 190 |
+
"content": doc[:500] + "..." if len(doc) > 500 else doc,
|
| 191 |
+
"full_length": len(doc),
|
| 192 |
+
"metadata": meta,
|
| 193 |
+
"embedding_dim": len(data['embeddings'][i]) if data['embeddings'] else 0
|
| 194 |
+
})
|
| 195 |
+
|
| 196 |
+
return {
|
| 197 |
+
"file_name": file_name,
|
| 198 |
+
"total_chunks": len(chunks),
|
| 199 |
+
"chunks": chunks
|
| 200 |
+
}
|
| 201 |
+
except Exception as e:
|
| 202 |
+
return {"error": str(e)}
|
| 203 |
+
|
| 204 |
+
def download_chunks_baseline(file_name):
|
| 205 |
+
"""Export chunks to JSON file for baseline"""
|
| 206 |
+
if not file_name:
|
| 207 |
+
return None
|
| 208 |
+
|
| 209 |
+
import json
|
| 210 |
+
import tempfile
|
| 211 |
+
|
| 212 |
+
chunks_data = get_chunks_for_file_baseline(file_name)
|
| 213 |
+
|
| 214 |
+
temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json')
|
| 215 |
+
json.dump(chunks_data, temp_file, indent=2)
|
| 216 |
+
temp_file.close()
|
| 217 |
+
|
| 218 |
+
return temp_file.name
|
| 219 |
+
|
| 220 |
+
def download_chunks_finetuned(file_name):
|
| 221 |
+
"""Export chunks to JSON file for fine-tuned"""
|
| 222 |
+
if not file_name:
|
| 223 |
+
return None
|
| 224 |
+
|
| 225 |
+
import json
|
| 226 |
+
import tempfile
|
| 227 |
+
|
| 228 |
+
chunks_data = get_chunks_for_file_finetuned(file_name)
|
| 229 |
+
|
| 230 |
+
temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json')
|
| 231 |
+
json.dump(chunks_data, temp_file, indent=2)
|
| 232 |
+
temp_file.close()
|
| 233 |
+
|
| 234 |
+
return temp_file.name
|
| 235 |
+
|
| 236 |
# --- Search Functions ---
|
| 237 |
def search_baseline(query, top_k=5):
|
| 238 |
if baseline_collection.count() == 0: return []
|
|
|
|
| 718 |
value=[["No data yet", "-", "-"]]
|
| 719 |
)
|
| 720 |
inspect_finetuned_btn.click(list_finetuned_files, inputs=[], outputs=finetuned_files_df)
|
| 721 |
+
|
| 722 |
+
gr.Markdown("---")
|
| 723 |
+
gr.Markdown("### Chunk Inspector")
|
| 724 |
+
gr.Markdown("View detailed chunk information for indexed files (content, metadata, schema)")
|
| 725 |
+
|
| 726 |
+
with gr.Row():
|
| 727 |
+
with gr.Column():
|
| 728 |
+
gr.Markdown("#### Baseline Collection")
|
| 729 |
+
baseline_file_dropdown = gr.Dropdown(
|
| 730 |
+
label="Select File to Inspect",
|
| 731 |
+
choices=[],
|
| 732 |
+
interactive=True
|
| 733 |
+
)
|
| 734 |
+
baseline_refresh_files = gr.Button("Refresh File List", variant="secondary")
|
| 735 |
+
baseline_chunks_display = gr.JSON(label="Chunk Details")
|
| 736 |
+
baseline_download_btn = gr.Button("Download Chunks as JSON", variant="primary")
|
| 737 |
+
baseline_download_output = gr.File(label="Download")
|
| 738 |
+
|
| 739 |
+
with gr.Column():
|
| 740 |
+
gr.Markdown("#### Fine-tuned Collection")
|
| 741 |
+
finetuned_file_dropdown = gr.Dropdown(
|
| 742 |
+
label="Select File to Inspect",
|
| 743 |
+
choices=[],
|
| 744 |
+
interactive=True
|
| 745 |
+
)
|
| 746 |
+
finetuned_refresh_files = gr.Button("Refresh File List", variant="secondary")
|
| 747 |
+
finetuned_chunks_display = gr.JSON(label="Chunk Details")
|
| 748 |
+
finetuned_download_btn = gr.Button("Download Chunks as JSON", variant="primary")
|
| 749 |
+
finetuned_download_output = gr.File(label="Download")
|
| 750 |
+
|
| 751 |
+
# Wire up Chunk Inspector events
|
| 752 |
+
baseline_refresh_files.click(
|
| 753 |
+
lambda: gr.Dropdown(choices=get_files_list_baseline()),
|
| 754 |
+
outputs=baseline_file_dropdown
|
| 755 |
+
)
|
| 756 |
+
baseline_file_dropdown.change(
|
| 757 |
+
get_chunks_for_file_baseline,
|
| 758 |
+
inputs=baseline_file_dropdown,
|
| 759 |
+
outputs=baseline_chunks_display
|
| 760 |
+
)
|
| 761 |
+
baseline_download_btn.click(
|
| 762 |
+
download_chunks_baseline,
|
| 763 |
+
inputs=baseline_file_dropdown,
|
| 764 |
+
outputs=baseline_download_output
|
| 765 |
+
)
|
| 766 |
+
|
| 767 |
+
finetuned_refresh_files.click(
|
| 768 |
+
lambda: gr.Dropdown(choices=get_files_list_finetuned()),
|
| 769 |
+
outputs=finetuned_file_dropdown
|
| 770 |
+
)
|
| 771 |
+
finetuned_file_dropdown.change(
|
| 772 |
+
get_chunks_for_file_finetuned,
|
| 773 |
+
inputs=finetuned_file_dropdown,
|
| 774 |
+
outputs=finetuned_chunks_display
|
| 775 |
+
)
|
| 776 |
+
finetuned_download_btn.click(
|
| 777 |
+
download_chunks_finetuned,
|
| 778 |
+
inputs=finetuned_file_dropdown,
|
| 779 |
+
outputs=finetuned_download_output
|
| 780 |
+
)
|
| 781 |
|
| 782 |
# TAB 2: COMPARISON SEARCH
|
| 783 |
with gr.Tab("2. Comparison Search (Note: Semantic search is sensitive to query phrasing)"):
|