CodeMode Agent commited on
Commit
0498daa
·
1 Parent(s): fbf5c49

Deploy CodeMode via Agent

Browse files
Files changed (1) hide show
  1. app.py +179 -1
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # CodeMode Comparison Demo - Last updated: 2026-02-11 23:59
2
  import gradio as gr
3
  import torch
4
  import torch.nn.functional as F
@@ -115,6 +114,125 @@ def list_finetuned_files():
115
  except Exception as e:
116
  return [[f"Error: {str(e)}", "-", "-"]]
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  # --- Search Functions ---
119
  def search_baseline(query, top_k=5):
120
  if baseline_collection.count() == 0: return []
@@ -600,6 +718,66 @@ with gr.Blocks(theme=theme, css=css, title="CodeMode - Baseline vs Fine-tuned")
600
  value=[["No data yet", "-", "-"]]
601
  )
602
  inspect_finetuned_btn.click(list_finetuned_files, inputs=[], outputs=finetuned_files_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
 
604
  # TAB 2: COMPARISON SEARCH
605
  with gr.Tab("2. Comparison Search (Note: Semantic search is sensitive to query phrasing)"):
 
 
1
  import gradio as gr
2
  import torch
3
  import torch.nn.functional as F
 
114
  except Exception as e:
115
  return [[f"Error: {str(e)}", "-", "-"]]
116
 
117
+ # --- Chunk Inspector Functions ---
118
+ def get_files_list_baseline():
119
+ """Get list of unique files in baseline collection"""
120
+ try:
121
+ data = baseline_collection.get(include=["metadatas"])
122
+ if not data['metadatas']:
123
+ return []
124
+ files = list(set([m.get("file_name", "unknown") for m in data['metadatas']]))
125
+ return sorted(files)
126
+ except Exception as e:
127
+ return []
128
+
129
+ def get_files_list_finetuned():
130
+ """Get list of unique files in fine-tuned collection"""
131
+ try:
132
+ data = finetuned_collection.get(include=["metadatas"])
133
+ if not data['metadatas']:
134
+ return []
135
+ files = list(set([m.get("file_name", "unknown") for m in data['metadatas']]))
136
+ return sorted(files)
137
+ except Exception as e:
138
+ return []
139
+
140
+ def get_chunks_for_file_baseline(file_name):
141
+ """Get all chunks for a specific file from baseline collection"""
142
+ if not file_name:
143
+ return {"error": "No file selected"}
144
+
145
+ try:
146
+ data = baseline_collection.get(
147
+ where={"file_name": file_name},
148
+ include=["documents", "metadatas", "embeddings"]
149
+ )
150
+
151
+ if not data['documents']:
152
+ return {"error": "No chunks found for this file"}
153
+
154
+ chunks = []
155
+ for i, (doc, meta) in enumerate(zip(data['documents'], data['metadatas'])):
156
+ chunks.append({
157
+ "chunk_id": i + 1,
158
+ "content": doc[:500] + "..." if len(doc) > 500 else doc,
159
+ "full_length": len(doc),
160
+ "metadata": meta,
161
+ "embedding_dim": len(data['embeddings'][i]) if data['embeddings'] else 0
162
+ })
163
+
164
+ return {
165
+ "file_name": file_name,
166
+ "total_chunks": len(chunks),
167
+ "chunks": chunks
168
+ }
169
+ except Exception as e:
170
+ return {"error": str(e)}
171
+
172
+ def get_chunks_for_file_finetuned(file_name):
173
+ """Get all chunks for a specific file from fine-tuned collection"""
174
+ if not file_name:
175
+ return {"error": "No file selected"}
176
+
177
+ try:
178
+ data = finetuned_collection.get(
179
+ where={"file_name": file_name},
180
+ include=["documents", "metadatas", "embeddings"]
181
+ )
182
+
183
+ if not data['documents']:
184
+ return {"error": "No chunks found for this file"}
185
+
186
+ chunks = []
187
+ for i, (doc, meta) in enumerate(zip(data['documents'], data['metadatas'])):
188
+ chunks.append({
189
+ "chunk_id": i + 1,
190
+ "content": doc[:500] + "..." if len(doc) > 500 else doc,
191
+ "full_length": len(doc),
192
+ "metadata": meta,
193
+ "embedding_dim": len(data['embeddings'][i]) if data['embeddings'] else 0
194
+ })
195
+
196
+ return {
197
+ "file_name": file_name,
198
+ "total_chunks": len(chunks),
199
+ "chunks": chunks
200
+ }
201
+ except Exception as e:
202
+ return {"error": str(e)}
203
+
204
+ def download_chunks_baseline(file_name):
205
+ """Export chunks to JSON file for baseline"""
206
+ if not file_name:
207
+ return None
208
+
209
+ import json
210
+ import tempfile
211
+
212
+ chunks_data = get_chunks_for_file_baseline(file_name)
213
+
214
+ temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json')
215
+ json.dump(chunks_data, temp_file, indent=2)
216
+ temp_file.close()
217
+
218
+ return temp_file.name
219
+
220
+ def download_chunks_finetuned(file_name):
221
+ """Export chunks to JSON file for fine-tuned"""
222
+ if not file_name:
223
+ return None
224
+
225
+ import json
226
+ import tempfile
227
+
228
+ chunks_data = get_chunks_for_file_finetuned(file_name)
229
+
230
+ temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json')
231
+ json.dump(chunks_data, temp_file, indent=2)
232
+ temp_file.close()
233
+
234
+ return temp_file.name
235
+
236
  # --- Search Functions ---
237
  def search_baseline(query, top_k=5):
238
  if baseline_collection.count() == 0: return []
 
718
  value=[["No data yet", "-", "-"]]
719
  )
720
  inspect_finetuned_btn.click(list_finetuned_files, inputs=[], outputs=finetuned_files_df)
721
+
722
+ gr.Markdown("---")
723
+ gr.Markdown("### Chunk Inspector")
724
+ gr.Markdown("View detailed chunk information for indexed files (content, metadata, schema)")
725
+
726
+ with gr.Row():
727
+ with gr.Column():
728
+ gr.Markdown("#### Baseline Collection")
729
+ baseline_file_dropdown = gr.Dropdown(
730
+ label="Select File to Inspect",
731
+ choices=[],
732
+ interactive=True
733
+ )
734
+ baseline_refresh_files = gr.Button("Refresh File List", variant="secondary")
735
+ baseline_chunks_display = gr.JSON(label="Chunk Details")
736
+ baseline_download_btn = gr.Button("Download Chunks as JSON", variant="primary")
737
+ baseline_download_output = gr.File(label="Download")
738
+
739
+ with gr.Column():
740
+ gr.Markdown("#### Fine-tuned Collection")
741
+ finetuned_file_dropdown = gr.Dropdown(
742
+ label="Select File to Inspect",
743
+ choices=[],
744
+ interactive=True
745
+ )
746
+ finetuned_refresh_files = gr.Button("Refresh File List", variant="secondary")
747
+ finetuned_chunks_display = gr.JSON(label="Chunk Details")
748
+ finetuned_download_btn = gr.Button("Download Chunks as JSON", variant="primary")
749
+ finetuned_download_output = gr.File(label="Download")
750
+
751
+ # Wire up Chunk Inspector events
752
+ baseline_refresh_files.click(
753
+ lambda: gr.Dropdown(choices=get_files_list_baseline()),
754
+ outputs=baseline_file_dropdown
755
+ )
756
+ baseline_file_dropdown.change(
757
+ get_chunks_for_file_baseline,
758
+ inputs=baseline_file_dropdown,
759
+ outputs=baseline_chunks_display
760
+ )
761
+ baseline_download_btn.click(
762
+ download_chunks_baseline,
763
+ inputs=baseline_file_dropdown,
764
+ outputs=baseline_download_output
765
+ )
766
+
767
+ finetuned_refresh_files.click(
768
+ lambda: gr.Dropdown(choices=get_files_list_finetuned()),
769
+ outputs=finetuned_file_dropdown
770
+ )
771
+ finetuned_file_dropdown.change(
772
+ get_chunks_for_file_finetuned,
773
+ inputs=finetuned_file_dropdown,
774
+ outputs=finetuned_chunks_display
775
+ )
776
+ finetuned_download_btn.click(
777
+ download_chunks_finetuned,
778
+ inputs=finetuned_file_dropdown,
779
+ outputs=finetuned_download_output
780
+ )
781
 
782
  # TAB 2: COMPARISON SEARCH
783
  with gr.Tab("2. Comparison Search (Note: Semantic search is sensitive to query phrasing)"):