sammoftah commited on
Commit
a6356f4
·
verified ·
1 Parent(s): f6281b2

Deploy Code Search Engine

Browse files
Files changed (6) hide show
  1. README.md +53 -4
  2. app.py +311 -0
  3. requirements.txt +5 -0
  4. shared/components.py +375 -0
  5. shared/styles.css +425 -0
  6. shared/utils.py +366 -0
README.md CHANGED
@@ -1,12 +1,61 @@
1
  ---
2
  title: Code Search Engine
3
- emoji: 🐠
4
- colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.13.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Code Search Engine
3
+ emoji: 💻
4
+ colorFrom: yellow
5
  colorTo: blue
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
+ license: mit
10
  ---
11
 
12
+ # Code Search Engine
13
+
14
+ ## Question
15
+
16
+ Can we search code by intent instead of exact identifiers?
17
+
18
+ ## System Boundary
19
+
20
+ This Space is a small semantic code retrieval demo. It does not attempt full repository understanding; it focuses on embedding snippets and ranking them against natural-language queries.
21
+
22
+ ## Method
23
+
24
+ Code samples are loaded from a Hub dataset, embedded with a code-oriented transformer, and compared to the embedded user query. Results are syntax-highlighted so the returned artifact is readable.
25
+
26
+ ## Technique
27
+
28
+ Code search uses representation learning to place code and natural language in a shared semantic space. The query "read csv and group by column" can retrieve code even if the function is not named that way.
29
+
30
+ This is a retrieval problem before it is a generation problem. Good code assistants need to find the right context before they can edit or explain it.
31
+
32
+ ## Output
33
+
34
+ The app returns ranked code snippets, similarity scores, metadata, and highlighted source text.
35
+
36
+ ## Why It Matters
37
+
38
+ Developer tools increasingly depend on code intelligence: semantic search, repair, generation, review, and retrieval-augmented coding. This Space isolates the retrieval layer.
39
+
40
+ ## What To Notice
41
+
42
+ Look for whether retrieved code matches intent or only shares surface words. A strong embedding model should recover functional similarity.
43
+
44
+ ## Effect In Practice
45
+
46
+ Semantic code retrieval can power internal codebase search, example discovery, migration tools, and coding-agent context selection.
47
+
48
+ ## Hugging Face Extension
49
+
50
+ This can grow into a code-search evaluation Space using query-snippet relevance labels and comparing CodeBERT-style embeddings against newer code embedding models.
51
+
52
+ ## Limitations
53
+
54
+ The demo uses a sampled dataset and a single embedding model. Production code search should parse symbols, track repository context, index dependencies, and evaluate relevance with developer judgments.
55
+
56
+ ## Run Locally
57
+
58
+ ```bash
59
+ pip install -r requirements.txt
60
+ python app.py
61
+ ```
app.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ from sentence_transformers import SentenceTransformer
4
+ import numpy as np
5
+ from pygments import highlight
6
+ from pygments.lexers import get_lexer_by_name, guess_lexer
7
+ from pygments.formatters import HtmlFormatter
8
+ import re
9
+ import os
10
+ import sys
11
+
12
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
13
+ from shared.components import create_method_panel, create_premium_hero
14
+
15
+ # Load code-specific model
16
+ embedder = SentenceTransformer('microsoft/codebert-base')
17
+
18
+ # Global storage
19
+ dataset_sample = None
20
+ embeddings = None
21
+ code_samples = []
22
+
23
+ def load_code_dataset(progress=gr.Progress()):
24
+ """Load a sample of The Stack dataset."""
25
+ global dataset_sample, embeddings, code_samples
26
+
27
+ progress(0, desc="Loading The Stack dataset...")
28
+ try:
29
+ # Load Python subset (smaller and more accessible)
30
+ dataset_sample = load_dataset(
31
+ "bigcode/the-stack-smol",
32
+ data_dir="data/python",
33
+ split="train",
34
+ streaming=True
35
+ )
36
+
37
+ # Take first 500 samples
38
+ code_samples = []
39
+ progress(0.3, desc="Sampling code repositories...")
40
+ for i, item in enumerate(dataset_sample):
41
+ if i >= 500:
42
+ break
43
+
44
+ code = item.get('content', '')
45
+ if len(code) < 50 or len(code) > 5000: # Filter very short/long
46
+ continue
47
+
48
+ code_samples.append({
49
+ 'code': code,
50
+ 'language': 'python',
51
+ 'size': len(code),
52
+ 'max_stars_repo_name': item.get('max_stars_repo_name', 'unknown'),
53
+ 'max_stars_count': item.get('max_stars_count', 0),
54
+ 'license': item.get('max_stars_repo_licenses', ['unknown'])[0] if item.get('max_stars_repo_licenses') else 'unknown'
55
+ })
56
+
57
+ if len(code_samples) >= 300:
58
+ break
59
+
60
+ progress(0.7, desc="Creating code embeddings...")
61
+ code_texts = [c['code'][:512] for c in code_samples] # Use first 512 chars
62
+ embeddings = embedder.encode(code_texts, show_progress_bar=False)
63
+
64
+ progress(1.0, desc="Ready!")
65
+ avg_stars = np.mean([c['max_stars_count'] for c in code_samples])
66
+ return f"✅ Loaded {len(code_samples)} code samples (avg stars: {avg_stars:.0f})"
67
+
68
+ except Exception as e:
69
+ return f"❌ Error: {str(e)}\nNote: Using fallback - dataset requires internet"
70
+
71
+ def extract_function_name(code):
72
+ """Extract main function/class name from code."""
73
+ # Look for function definitions
74
+ func_match = re.search(r'def\s+(\w+)\s*\(', code)
75
+ if func_match:
76
+ return func_match.group(1)
77
+
78
+ # Look for class definitions
79
+ class_match = re.search(r'class\s+(\w+)\s*[:\(]', code)
80
+ if class_match:
81
+ return class_match.group(1)
82
+
83
+ return "code snippet"
84
+
85
+ def syntax_highlight_code(code, language='python'):
86
+ """Apply syntax highlighting to code."""
87
+ try:
88
+ lexer = get_lexer_by_name(language)
89
+ formatter = HtmlFormatter(style='monokai', noclasses=True)
90
+ highlighted = highlight(code, lexer, formatter)
91
+ return highlighted
92
+ except:
93
+ return f"<pre><code>{code}</code></pre>"
94
+
95
+ def search_code(query, language='python', min_stars=0, top_k=5):
96
+ """Search for code samples."""
97
+ if embeddings is None or not code_samples:
98
+ return []
99
+
100
+ # Filter by language and stars
101
+ filtered_samples = [
102
+ (i, sample) for i, sample in enumerate(code_samples)
103
+ if sample['language'] == language and sample['max_stars_count'] >= min_stars
104
+ ]
105
+
106
+ if not filtered_samples:
107
+ # Fallback: remove star filter
108
+ filtered_samples = [(i, sample) for i, sample in enumerate(code_samples)]
109
+
110
+ indices = [i for i, _ in filtered_samples]
111
+ filtered_embeddings = embeddings[indices]
112
+
113
+ # Search
114
+ query_embedding = embedder.encode([query])
115
+ similarities = np.dot(filtered_embeddings, query_embedding.T).flatten()
116
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
117
+
118
+ # Map back to original samples
119
+ results = []
120
+ for idx in top_indices:
121
+ original_idx = indices[idx]
122
+ sample = code_samples[original_idx].copy()
123
+ sample['similarity'] = float(similarities[idx])
124
+ results.append(sample)
125
+
126
+ return results
127
+
128
+ def format_code_results(results, query):
129
+ """Format code search results."""
130
+ if not results:
131
+ return "<p>No code samples found. Try adjusting filters or query.</p>"
132
+
133
+ html = f"<h2>🔍 Code Search Results</h2>"
134
+ html += f"<p><strong>Query:</strong> {query}</p>"
135
+ html += f"<p><strong>Found:</strong> {len(results)} relevant code samples</p>"
136
+ html += "<hr>"
137
+
138
+ for i, result in enumerate(results, 1):
139
+ html += f"<div style='margin: 20px 0; padding: 15px; background: #1e1e1e; border-radius: 8px;'>"
140
+ html += f"<h3 style='color: #fff;'>Result {i}: {extract_function_name(result['code'])}</h3>"
141
+
142
+ # Metadata
143
+ html += f"<p style='color: #888;'>"
144
+ html += f"<strong>Repo:</strong> {result['max_stars_repo_name']} | "
145
+ html += f"<strong>Stars:</strong> ⭐ {result['max_stars_count']} | "
146
+ html += f"<strong>License:</strong> {result['license']} | "
147
+ html += f"<strong>Relevance:</strong> {result['similarity']:.3f}"
148
+ html += f"</p>"
149
+
150
+ # Code
151
+ code = result['code'][:1000] # Limit display length
152
+ highlighted = syntax_highlight_code(code, result['language'])
153
+ html += highlighted
154
+
155
+ # Copy button (using JavaScript)
156
+ escaped_code = result['code'].replace('`', '\\`').replace('$', '\\$')
157
+ html += f"""
158
+ <button onclick="navigator.clipboard.writeText(`{escaped_code}`);
159
+ this.innerText='Copied!';
160
+ setTimeout(() => this.innerText='Copy Code', 2000);"
161
+ style="margin-top: 10px; padding: 8px 16px; background: #4CAF50; color: white;
162
+ border: none; border-radius: 4px; cursor: pointer;">
163
+ Copy Code
164
+ </button>
165
+ """
166
+
167
+ html += "</div>"
168
+
169
+ return html
170
+
171
+ def perform_code_search(query, language, min_stars, num_results, progress=gr.Progress()):
172
+ """Perform code search."""
173
+ if not query:
174
+ return "<p>Please enter a search query</p>", ""
175
+
176
+ if embeddings is None:
177
+ return "<p>Please load the dataset first</p>", ""
178
+
179
+ progress(0, desc="Searching code...")
180
+ results = search_code(query, language, min_stars, top_k=num_results)
181
+
182
+ progress(0.7, desc="Formatting results...")
183
+ formatted = format_code_results(results, query)
184
+
185
+ progress(1.0, desc="Done!")
186
+
187
+ # Stats
188
+ stats = f"""
189
+ ### 📊 Search Statistics
190
+
191
+ - **Total samples**: {len(code_samples)}
192
+ - **Results**: {len(results)}
193
+ - **Language**: {language}
194
+ - **Min stars**: {min_stars}
195
+ - **Model**: CodeBERT (Microsoft)
196
+
197
+ ### 🧠 How CodeBERT Works
198
+
199
+ CodeBERT is trained on code and documentation:
200
+ - Understands programming patterns
201
+ - Maps code to natural language
202
+ - Trained on GitHub repos
203
+ - Supports multiple languages
204
+ """
205
+
206
+ return formatted, stats
207
+
208
+ # Gradio Interface
209
+ with gr.Blocks(title="Code Search Engine", theme=gr.themes.Soft()) as demo:
210
+ create_premium_hero(
211
+ "Semantic Code Search Engine",
212
+ "Search code with natural language using code embeddings, dataset sampling, and syntax-highlighted retrieval results.",
213
+ "💻",
214
+ badge="Code Intelligence",
215
+ highlights=["CodeBERT", "The Stack sample", "Semantic retrieval"],
216
+ )
217
+ create_method_panel({
218
+ "Technique": "Encode code snippets into vectors and rank them against natural-language queries.",
219
+ "What it proves": "You can adapt embedding search beyond documents into developer tooling.",
220
+ "HF capability": "Combines Hub datasets with transformer embeddings in an interactive Space.",
221
+ })
222
+
223
+ with gr.Row():
224
+ with gr.Column(scale=1):
225
+ gr.Markdown("### Step 1: Load Dataset")
226
+ load_btn = gr.Button("Load Code Dataset", variant="primary")
227
+ load_status = gr.Textbox(label="Status", interactive=False)
228
+
229
+ gr.Markdown("### Step 2: Search Code")
230
+ query_input = gr.Textbox(
231
+ label="What are you looking for?",
232
+ placeholder="e.g., binary search implementation",
233
+ lines=2
234
+ )
235
+
236
+ language = gr.Dropdown(
237
+ choices=['python'],
238
+ value='python',
239
+ label="Language (more coming soon)"
240
+ )
241
+
242
+ min_stars = gr.Slider(
243
+ minimum=0,
244
+ maximum=1000,
245
+ value=0,
246
+ step=10,
247
+ label="Minimum GitHub Stars"
248
+ )
249
+
250
+ num_results = gr.Slider(
251
+ minimum=3,
252
+ maximum=10,
253
+ value=5,
254
+ step=1,
255
+ label="Number of Results"
256
+ )
257
+
258
+ search_btn = gr.Button("Search Code", variant="primary")
259
+
260
+ gr.Markdown("""
261
+ ### 💡 Example Searches:
262
+ - "binary search tree"
263
+ - "web scraper with requests"
264
+ - "recursive fibonacci"
265
+ - "API client with authentication"
266
+ - "data validation decorator"
267
+ """)
268
+
269
+ with gr.Column(scale=2):
270
+ results_output = gr.HTML(label="Code Results")
271
+
272
+ with gr.Accordion("📊 Statistics & Info", open=False):
273
+ stats_output = gr.Markdown()
274
+
275
+ gr.Markdown("""
276
+ ### 🎯 Why Semantic Code Search?
277
+
278
+ **Traditional search** (GitHub, Google):
279
+ - Keyword matching only
280
+ - Must know exact function names
281
+ - Hard to find by functionality
282
+
283
+ **Semantic search** (this tool):
284
+ - Search by what code does, not what it's called
285
+ - "sort a list" finds quicksort, mergesort, etc.
286
+ - Understands programming concepts
287
+
288
+ ### 🔧 Features:
289
+
290
+ - **Syntax highlighting** with Pygments
291
+ - **Copy to clipboard** button
292
+ - **Filter by stars** (code quality proxy)
293
+ - **License information** (know before you use)
294
+ - **CodeBERT embeddings** (code + NL understanding)
295
+
296
+ Perfect for developers learning, debugging, or finding code examples!
297
+ """)
298
+
299
+ load_btn.click(
300
+ load_code_dataset,
301
+ outputs=[load_status]
302
+ )
303
+
304
+ search_btn.click(
305
+ perform_code_search,
306
+ inputs=[query_input, language, min_stars, num_results],
307
+ outputs=[results_output, stats_output]
308
+ )
309
+
310
+ if __name__ == "__main__":
311
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ huggingface-hub==0.25.0
3
+ datasets==3.2.0
4
+ sentence-transformers==3.3.1
5
+ pygments==2.18.0
shared/components.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HF-Master Shared Components
3
+ Reusable Gradio components for all projects
4
+ """
5
+
6
+ import html
7
+ from pathlib import Path
8
+ import gradio as gr
9
+ from typing import List, Tuple, Optional, Dict, Any
10
+
11
+
12
+ class SharedComponents:
13
+ """Shared UI components for all HF-Master projects"""
14
+
15
+ @staticmethod
16
+ def _esc(value: Any) -> str:
17
+ """Escape text before placing it inside shared HTML components."""
18
+ return html.escape(str(value), quote=True)
19
+
20
+ @staticmethod
21
+ def _style_tag() -> str:
22
+ """Inline the shared stylesheet for Gradio Spaces that do not pass css=."""
23
+ style_path = Path(__file__).with_name("styles.css")
24
+ try:
25
+ return f"<style>{style_path.read_text(encoding='utf-8')}</style>"
26
+ except OSError:
27
+ return ""
28
+
29
+ @staticmethod
30
+ def create_header(title: str, description: str, emoji: str = "🚀") -> gr.Markdown:
31
+ """Create standardized project header"""
32
+ return SharedComponents.create_premium_hero(
33
+ title=title,
34
+ description=description,
35
+ emoji=emoji,
36
+ badge="AI/ML Space",
37
+ highlights=["Interactive demo", "Explainable workflow", "HF-ready"],
38
+ )
39
+
40
+ @staticmethod
41
+ def create_footer(version: str = "1.0.0") -> gr.Markdown:
42
+ """Create standardized project footer"""
43
+ return gr.Markdown(f"""
44
+ <div class="hf-footer">
45
+ <strong>{SharedComponents._esc(version)}</strong> · Built as a practical AI/ML learning Space for the Hugging Face community.
46
+ </div>
47
+ """)
48
+
49
+ @staticmethod
50
+ def create_premium_hero(
51
+ title: str,
52
+ description: str,
53
+ emoji: str = "🚀",
54
+ badge: str = "Featured Space",
55
+ highlights: Optional[List[str]] = None,
56
+ ) -> gr.HTML:
57
+ """Create a richer landing-page hero for Spaces."""
58
+ highlights = highlights or []
59
+ esc = SharedComponents._esc
60
+ chips = "".join(
61
+ f"""
62
+ <span class="hf-chip">{esc(item)}</span>
63
+ """
64
+ for item in highlights
65
+ )
66
+
67
+ return gr.HTML(f"""
68
+ {SharedComponents._style_tag()}
69
+ <div class="hf-hero">
70
+ <div class="hf-hero-grid">
71
+ <div class="hf-hero-copy">
72
+ <div class="hf-icon">{esc(emoji)}</div>
73
+ <div>
74
+ <div class="hf-badge">{esc(badge)}</div>
75
+ <h1>{esc(title)}</h1>
76
+ <p>{esc(description)}</p>
77
+ </div>
78
+ </div>
79
+ </div>
80
+ <div class="hf-chip-row">
81
+ {chips}
82
+ </div>
83
+ </div>
84
+ """)
85
+
86
+ @staticmethod
87
+ def create_method_panel(items: Dict[str, str]) -> gr.HTML:
88
+ """Create a compact method/pipeline explainer panel."""
89
+ esc = SharedComponents._esc
90
+ cards = "".join(
91
+ f"""
92
+ <div class="hf-method-card">
93
+ <span>{esc(label)}</span>
94
+ <p>{esc(text)}</p>
95
+ </div>
96
+ """
97
+ for label, text in items.items()
98
+ )
99
+ return gr.HTML(f"""{SharedComponents._style_tag()}<div class="hf-method-grid">{cards}</div>""")
100
+
101
+ @staticmethod
102
+ def create_status_badge(status: str) -> str:
103
+ """Create status badge"""
104
+ colors = {
105
+ "complete": "🟢",
106
+ "in-progress": "🟡",
107
+ "planned": "⚪",
108
+ "experimental": "🔴"
109
+ }
110
+ return colors.get(status.lower(), "⚪")
111
+
112
+ @staticmethod
113
+ def create_project_card(
114
+ title: str,
115
+ description: str,
116
+ tech_stack: List[str],
117
+ difficulty: str,
118
+ viral_potential: str
119
+ ) -> str:
120
+ """Create markdown project card"""
121
+ tech_badges = " ".join([f"`{t}`" for t in tech_stack])
122
+
123
+ return f"""
124
+ ## {title}
125
+
126
+ {description}
127
+
128
+ **Tech Stack:** {tech_badges}
129
+
130
+ **Difficulty:** {difficulty} | **Viral Potential:** {viral_potential}
131
+ """
132
+
133
+ @staticmethod
134
+ def create_risk_chart(risk_factors: Dict[str, float]) -> Any:
135
+ """Create risk factor visualization"""
136
+ import plotly.graph_objects as go
137
+
138
+ factors = list(risk_factors.keys())
139
+ scores = [risk_factors[f] * 100 for f in factors]
140
+
141
+ fig = go.Figure(data=[
142
+ go.Bar(
143
+ x=scores,
144
+ y=[f.replace('_', ' ').title() for f in factors],
145
+ orientation='h',
146
+ marker=dict(
147
+ color=scores,
148
+ colorscale='RdYlGn_r',
149
+ cmin=0,
150
+ cmax=100
151
+ )
152
+ )
153
+ ])
154
+
155
+ fig.update_layout(
156
+ title="Risk Factor Breakdown",
157
+ xaxis_title="Risk Score",
158
+ yaxis_title="Factor",
159
+ height=400,
160
+ template="plotly_white"
161
+ )
162
+
163
+ return fig
164
+
165
+ @staticmethod
166
+ def create_comparison_chart(items: List[Dict], keys: List[str]) -> Any:
167
+ """Create comparison visualization"""
168
+ import plotly.graph_objects as go
169
+
170
+ fig = go.Figure()
171
+
172
+ for i, item in enumerate(items):
173
+ fig.add_trace(go.Bar(
174
+ name=item.get('name', f'Item {i+1}'),
175
+ x=keys,
176
+ y=[item.get(k, 0) for k in keys]
177
+ ))
178
+
179
+ fig.update_layout(
180
+ barmode='group',
181
+ height=400
182
+ )
183
+
184
+ return fig
185
+
186
+ @staticmethod
187
+ def create_metric_card(label: str, value: str, emoji: str = "📊") -> gr.Markdown:
188
+ """Create metric display card"""
189
+ return gr.Markdown(f"""
190
+ ### {emoji} {label}
191
+
192
+ **{value}**
193
+ """)
194
+
195
+ @staticmethod
196
+ def create_error_display(error: str) -> gr.Markdown:
197
+ """Create error message display"""
198
+ return gr.Markdown(f"""
199
+ ❌ **Error**
200
+
201
+ {error}
202
+ """)
203
+
204
+ @staticmethod
205
+ def create_success_display(message: str) -> gr.Markdown:
206
+ """Create success message display"""
207
+ return gr.Markdown(f"""
208
+ ✅ **Success**
209
+
210
+ {message}
211
+ """)
212
+
213
+
214
+ class LoadingSpinner:
215
+ """Loading state display"""
216
+
217
+ @staticmethod
218
+ def create_spinner(message: str = "Loading...") -> gr.Markdown:
219
+ """Create loading spinner"""
220
+ return gr.Markdown(f"""
221
+ ⏳ **{message}**
222
+
223
+ _This may take a moment..._
224
+ """)
225
+
226
+ @staticmethod
227
+ def create_progress_bar(initial: float = 0) -> gr.Markdown:
228
+ """Create progress display"""
229
+ return gr.Markdown(f"""
230
+ ░░░░░░░░░ **{initial}%**
231
+ """)
232
+
233
+
234
+ class TableFormatter:
235
+ """Format data as tables"""
236
+
237
+ @staticmethod
238
+ def format_dict_table(data: Dict[str, Any], headers: List[str] = None) -> List:
239
+ """Format dictionary as table rows"""
240
+ if not headers:
241
+ headers = ["Key", "Value"]
242
+
243
+ rows = []
244
+ for key, value in data.items():
245
+ rows.append([key, str(value)])
246
+
247
+ return [headers] + rows
248
+
249
+ @staticmethod
250
+ def create_dataframe(data: List[Dict], columns: List[str] = None) -> List:
251
+ """Create dataframe-compatible data structure"""
252
+ if not data:
253
+ return []
254
+
255
+ if columns:
256
+ headers = columns
257
+ else:
258
+ headers = list(data[0].keys()) if data else []
259
+
260
+ rows = [[row.get(h, "") for h in headers] for row in data]
261
+
262
+ return [headers] + rows
263
+
264
+
265
+ class CodeHighlighter:
266
+ """Code display and highlighting"""
267
+
268
+ @staticmethod
269
+ def create_code_display(code: str, language: str = "python") -> gr.Code:
270
+ """Create code display block"""
271
+ return gr.Code(
272
+ value=code,
273
+ language=language,
274
+ lines=20
275
+ )
276
+
277
+ @staticmethod
278
+ def create_copy_button(code: str) -> gr.Button:
279
+ """Create copy-to-clipboard button"""
280
+ return gr.Button("📋 Copy Code")
281
+
282
+ @staticmethod
283
+ def create_diff_view(old_code: str, new_code: str) -> Tuple[gr.Code, gr.Code]:
284
+ """Create side-by-side diff view"""
285
+ return (
286
+ gr.Code(value=old_code, language="python", lines=15, label="Before"),
287
+ gr.Code(value=new_code, language="python", lines=15, label="After")
288
+ )
289
+
290
+
291
+ def create_header(title: str, description: str, emoji: str = "🚀") -> gr.Markdown:
292
+ return SharedComponents.create_header(title, description, emoji)
293
+
294
+
295
+ def create_footer(version: str = "1.0.0") -> gr.Markdown:
296
+ return SharedComponents.create_footer(version)
297
+
298
+
299
+ def create_premium_hero(
300
+ title: str,
301
+ description: str,
302
+ emoji: str = "🚀",
303
+ badge: str = "Featured Space",
304
+ highlights: Optional[List[str]] = None,
305
+ ) -> gr.HTML:
306
+ return SharedComponents.create_premium_hero(title, description, emoji, badge, highlights)
307
+
308
+
309
+ def create_method_panel(items: Dict[str, str]) -> gr.HTML:
310
+ return SharedComponents.create_method_panel(items)
311
+
312
+
313
+ class ProgressTracker:
314
+ """Track multi-step progress"""
315
+
316
+ def __init__(self, steps: List[str]):
317
+ self.steps = steps
318
+ self.current = 0
319
+
320
+ def get_status(self) -> str:
321
+ """Get current status"""
322
+ completed = "✅ " + "\n".join(self.steps[:self.current])
323
+ current = f"🔄 {self.steps[self.current]}" if self.current < len(self.steps) else ""
324
+ remaining = "\n".join([f"⬜ {s}" for s in self.steps[self.current+1:]])
325
+
326
+ return f"""
327
+ ## Progress
328
+
329
+ {completed}
330
+ {current}
331
+ {remaining}
332
+ """
333
+
334
+ def advance(self) -> bool:
335
+ """Move to next step"""
336
+ if self.current < len(self.steps):
337
+ self.current += 1
338
+ return True
339
+ return False
340
+
341
+ def reset(self):
342
+ """Reset progress"""
343
+ self.current = 0
344
+
345
+
346
+ def create_tabbed_interface(tabs: Dict[str, Any]) -> gr.Blocks:
347
+ """Create tabbed interface helper"""
348
+ with gr.Blocks() as demo:
349
+ with gr.Tabs():
350
+ for tab_name, tab_content in tabs.items():
351
+ with gr.Tab(tab_name):
352
+ tab_content
353
+
354
+ return demo
355
+
356
+
357
+ def create_side_by_side(left_content: Any, right_content: Any) -> Tuple[gr.Column, gr.Column]:
358
+ """Create side-by-side layout"""
359
+ with gr.Row():
360
+ with gr.Column():
361
+ left_content
362
+ with gr.Column():
363
+ right_content
364
+
365
+ return left_content, right_content
366
+
367
+
368
+ def create_accordion(items: List[Tuple[str, Any]]) -> gr.Accordion:
369
+ """Create accordion-style expandable sections"""
370
+ with gr.Accordion("Click to expand") as accordion:
371
+ for title, content in items:
372
+ gr.Markdown(f"### {title}")
373
+ content
374
+
375
+ return accordion
shared/styles.css ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* HF-Master Shared Styles
2
+ Light research-studio system inspired by MCP Video Localizer. */
3
+
4
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
5
+
6
+ :root {
7
+ --peach: #ffad7a;
8
+ --peach-dark: #e8935c;
9
+ --lavender: #b8a9d9;
10
+ --sky-blue: #7accff;
11
+ --ink: #1f2937;
12
+ --slate: #4b5563;
13
+ --muted: #6b7280;
14
+ --bg-light: #f9fafb;
15
+ --surface: #ffffff;
16
+ --surface-soft: #fff7f1;
17
+ --border-default: #e5e7eb;
18
+ --border-subtle: #f3f4f6;
19
+ --accent-subtle: rgba(255, 173, 122, 0.14);
20
+ --shadow-sm: 0 1px 2px rgba(31, 41, 55, 0.05);
21
+ --shadow-md: 0 8px 24px rgba(31, 41, 55, 0.08);
22
+ --shadow-lg: 0 18px 48px rgba(31, 41, 55, 0.12);
23
+ --radius: 8px;
24
+ }
25
+
26
+ html {
27
+ scroll-behavior: smooth;
28
+ }
29
+
30
+ body,
31
+ .gradio-container {
32
+ background:
33
+ radial-gradient(circle at 7% 4%, rgba(255, 173, 122, 0.22), transparent 28%),
34
+ radial-gradient(circle at 88% 8%, rgba(122, 204, 255, 0.20), transparent 30%),
35
+ linear-gradient(180deg, #ffffff 0%, var(--bg-light) 44%, #f7f2fb 100%) !important;
36
+ color: var(--ink) !important;
37
+ font-family: Inter, "Helvetica Neue", "Segoe UI", system-ui, -apple-system, sans-serif !important;
38
+ font-weight: 400;
39
+ letter-spacing: 0;
40
+ -webkit-font-smoothing: antialiased;
41
+ }
42
+
43
+ .gradio-container {
44
+ max-width: 1180px !important;
45
+ margin: 0 auto !important;
46
+ }
47
+
48
+ .main,
49
+ .block-container {
50
+ background: transparent !important;
51
+ }
52
+
53
+ .block-container {
54
+ max-width: 1180px;
55
+ padding-top: 2rem;
56
+ padding-bottom: 2rem;
57
+ }
58
+
59
+ /* Shared hero used by Gradio helpers and Streamlit HTML. */
60
+ .hf-hero,
61
+ .hero {
62
+ position: relative;
63
+ overflow: hidden;
64
+ background:
65
+ radial-gradient(circle at 20% 8%, rgba(255, 255, 255, 0.52), transparent 26%),
66
+ linear-gradient(135deg, var(--peach) 0%, var(--lavender) 54%, var(--sky-blue) 100%);
67
+ border: 1px solid rgba(255, 255, 255, 0.72);
68
+ border-radius: 16px;
69
+ box-shadow: var(--shadow-lg), 0 0 34px rgba(255, 173, 122, 0.18);
70
+ color: #ffffff;
71
+ margin: 1rem 0 1.25rem 0;
72
+ padding: clamp(1.25rem, 3vw, 2.25rem);
73
+ }
74
+
75
+ .hf-hero::after,
76
+ .hero::after {
77
+ content: "";
78
+ position: absolute;
79
+ inset: auto -12% -45% auto;
80
+ width: 360px;
81
+ height: 360px;
82
+ background: rgba(255, 255, 255, 0.22);
83
+ border-radius: 999px;
84
+ pointer-events: none;
85
+ }
86
+
87
+ .hf-hero-grid,
88
+ .hf-hero-copy {
89
+ position: relative;
90
+ z-index: 1;
91
+ }
92
+
93
+ .hf-hero-copy {
94
+ display: flex;
95
+ align-items: flex-start;
96
+ gap: 1rem;
97
+ }
98
+
99
+ .hf-icon {
100
+ align-items: center;
101
+ background: rgba(255, 255, 255, 0.24);
102
+ border: 1px solid rgba(255, 255, 255, 0.45);
103
+ border-radius: 8px;
104
+ box-shadow: var(--shadow-sm);
105
+ display: inline-flex;
106
+ flex: 0 0 auto;
107
+ font-size: 1.7rem;
108
+ height: 3.75rem;
109
+ justify-content: center;
110
+ width: 3.75rem;
111
+ }
112
+
113
+ .hf-badge {
114
+ background: rgba(255, 255, 255, 0.22);
115
+ border: 1px solid rgba(255, 255, 255, 0.42);
116
+ border-radius: 999px;
117
+ color: rgba(255, 255, 255, 0.96);
118
+ display: inline-flex;
119
+ font-size: 0.76rem;
120
+ font-weight: 800;
121
+ letter-spacing: 0.06em;
122
+ margin-bottom: 0.7rem;
123
+ padding: 0.34rem 0.7rem;
124
+ text-transform: uppercase;
125
+ }
126
+
127
+ .hf-hero h1,
128
+ .hero h1 {
129
+ color: #ffffff !important;
130
+ font-size: clamp(2rem, 4vw, 3.35rem);
131
+ font-weight: 800;
132
+ letter-spacing: 0;
133
+ line-height: 1.04;
134
+ margin: 0 0 0.45rem 0;
135
+ text-shadow: 0 2px 12px rgba(31, 41, 55, 0.18);
136
+ }
137
+
138
+ .hf-hero p,
139
+ .hero p {
140
+ color: rgba(255, 255, 255, 0.96) !important;
141
+ font-size: 1.03rem;
142
+ line-height: 1.65;
143
+ margin: 0;
144
+ max-width: 68ch;
145
+ }
146
+
147
+ .hf-chip-row,
148
+ .pill-row {
149
+ display: flex;
150
+ flex-wrap: wrap;
151
+ gap: 0.55rem;
152
+ margin-top: 1.15rem;
153
+ position: relative;
154
+ z-index: 1;
155
+ }
156
+
157
+ .hf-chip,
158
+ .badge,
159
+ .tech-tag {
160
+ background: rgba(255, 255, 255, 0.24);
161
+ border: 1px solid rgba(255, 255, 255, 0.45);
162
+ border-radius: 999px;
163
+ color: #ffffff;
164
+ display: inline-flex;
165
+ font-size: 0.86rem;
166
+ font-weight: 700;
167
+ padding: 0.42rem 0.75rem;
168
+ }
169
+
170
+ .hf-method-grid {
171
+ display: grid;
172
+ gap: 0.9rem;
173
+ grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
174
+ margin: 1rem 0;
175
+ }
176
+
177
+ .hf-method-card,
178
+ .glass-card,
179
+ .project-card,
180
+ .info-card,
181
+ .metric-card,
182
+ .stat-box,
183
+ .gradio-container .form,
184
+ .gradio-container .panel {
185
+ background: rgba(255, 255, 255, 0.88) !important;
186
+ border: 1px solid var(--border-default) !important;
187
+ border-radius: var(--radius) !important;
188
+ box-shadow: var(--shadow-md) !important;
189
+ }
190
+
191
+ .hf-method-card {
192
+ padding: 1rem;
193
+ }
194
+
195
+ .hf-method-card span {
196
+ color: var(--peach-dark);
197
+ display: block;
198
+ font-size: 0.78rem;
199
+ font-weight: 800;
200
+ letter-spacing: 0.04em;
201
+ margin-bottom: 0.35rem;
202
+ text-transform: uppercase;
203
+ }
204
+
205
+ .hf-method-card p {
206
+ color: var(--slate);
207
+ line-height: 1.55;
208
+ margin: 0;
209
+ }
210
+
211
+ .project-card,
212
+ .info-card,
213
+ .metric-card,
214
+ .stat-box {
215
+ padding: 1rem;
216
+ }
217
+
218
+ h1,
219
+ h2,
220
+ h3,
221
+ h4,
222
+ .markdown-text h1,
223
+ .markdown-text h2,
224
+ .markdown-text h3 {
225
+ color: var(--ink) !important;
226
+ font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
227
+ letter-spacing: 0;
228
+ }
229
+
230
+ p,
231
+ li,
232
+ label,
233
+ .markdown-text,
234
+ .markdown-text p,
235
+ .markdown-text span {
236
+ color: var(--slate) !important;
237
+ font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
238
+ }
239
+
240
+ strong {
241
+ color: var(--ink);
242
+ font-weight: 700;
243
+ }
244
+
245
+ input,
246
+ select,
247
+ textarea,
248
+ .gr-textbox,
249
+ .gr-dropdown {
250
+ background: #ffffff !important;
251
+ border: 1px solid var(--border-default) !important;
252
+ border-radius: var(--radius) !important;
253
+ color: var(--ink) !important;
254
+ font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
255
+ transition: border-color 0.15s ease, box-shadow 0.15s ease !important;
256
+ }
257
+
258
+ input:focus,
259
+ select:focus,
260
+ textarea:focus,
261
+ .gr-textbox:focus {
262
+ border-color: var(--peach) !important;
263
+ box-shadow: 0 0 0 3px var(--accent-subtle) !important;
264
+ outline: none !important;
265
+ }
266
+
267
+ button.primary,
268
+ button[class*="primary"],
269
+ div[data-testid="stButton"] > button {
270
+ background: linear-gradient(135deg, var(--peach) 0%, var(--peach-dark) 100%) !important;
271
+ border: 0 !important;
272
+ border-radius: var(--radius) !important;
273
+ box-shadow: 0 8px 20px rgba(255, 173, 122, 0.28) !important;
274
+ color: #ffffff !important;
275
+ font-family: Inter, "Helvetica Neue", system-ui, sans-serif !important;
276
+ font-weight: 800 !important;
277
+ padding: 0.72rem 1.1rem !important;
278
+ transition: transform 0.18s ease, box-shadow 0.18s ease !important;
279
+ }
280
+
281
+ button.primary:hover,
282
+ button[class*="primary"]:hover,
283
+ div[data-testid="stButton"] > button:hover {
284
+ box-shadow: 0 12px 26px rgba(255, 173, 122, 0.36) !important;
285
+ transform: translateY(-1px) !important;
286
+ }
287
+
288
+ button.secondary,
289
+ button[class*="secondary"] {
290
+ background: #ffffff !important;
291
+ border: 1px solid var(--border-default) !important;
292
+ border-radius: var(--radius) !important;
293
+ color: var(--ink) !important;
294
+ font-weight: 700 !important;
295
+ }
296
+
297
+ code,
298
+ pre {
299
+ border-radius: var(--radius) !important;
300
+ }
301
+
302
+ code {
303
+ background: #fff2e8 !important;
304
+ color: #9a4f1f !important;
305
+ }
306
+
307
+ pre {
308
+ background: #111827 !important;
309
+ border: 1px solid #273244 !important;
310
+ color: #f9fafb !important;
311
+ }
312
+
313
+ table {
314
+ border-collapse: collapse;
315
+ width: 100%;
316
+ }
317
+
318
+ th {
319
+ background: #fff2e8;
320
+ color: var(--ink);
321
+ font-weight: 800;
322
+ }
323
+
324
+ td,
325
+ th {
326
+ border-bottom: 1px solid var(--border-default);
327
+ padding: 0.7rem;
328
+ }
329
+
330
+ blockquote,
331
+ .markdown-text blockquote {
332
+ background: #faf9fc !important;
333
+ border-left: 3px solid var(--lavender) !important;
334
+ border-radius: 0 var(--radius) var(--radius) 0 !important;
335
+ color: var(--slate) !important;
336
+ margin: 0.5rem 0 !important;
337
+ padding: 0.75rem 1rem !important;
338
+ }
339
+
340
+ a {
341
+ color: #2774a9 !important;
342
+ font-weight: 700;
343
+ }
344
+
345
+ .hf-footer {
346
+ border-top: 1px solid var(--border-default);
347
+ color: var(--muted);
348
+ font-size: 0.92rem;
349
+ margin-top: 1.5rem;
350
+ padding: 1rem 0;
351
+ text-align: center;
352
+ }
353
+
354
+ .hf-footer strong {
355
+ color: var(--ink);
356
+ }
357
+
358
+ /* Streamlit shell polish. */
359
+ div[data-testid="stHeader"],
360
+ div[data-testid="stToolbar"] {
361
+ background: transparent !important;
362
+ }
363
+
364
+ div[data-testid="stSidebar"] {
365
+ background: rgba(255, 255, 255, 0.82) !important;
366
+ border-right: 1px solid var(--border-default);
367
+ }
368
+
369
+ div[data-baseweb="input"],
370
+ div[data-baseweb="textarea"],
371
+ div[data-baseweb="select"] {
372
+ background: #ffffff !important;
373
+ }
374
+
375
+ div[data-testid="stTextInput"] input,
376
+ div[data-testid="stTextArea"] textarea,
377
+ div[data-testid="stSelectbox"] div {
378
+ border-radius: var(--radius) !important;
379
+ border-color: var(--border-default) !important;
380
+ }
381
+
382
+ div[data-testid="stMetric"] {
383
+ background: rgba(255, 255, 255, 0.9);
384
+ border: 1px solid var(--border-default);
385
+ border-radius: var(--radius);
386
+ box-shadow: var(--shadow-md);
387
+ padding: 0.8rem 1rem;
388
+ }
389
+
390
+ .stPlotlyChart {
391
+ background: rgba(255, 255, 255, 0.86);
392
+ border: 1px solid var(--border-default);
393
+ border-radius: var(--radius);
394
+ box-shadow: var(--shadow-sm);
395
+ padding: 0.3rem;
396
+ }
397
+
398
+ @keyframes fadeIn {
399
+ from {
400
+ opacity: 0;
401
+ transform: translateY(8px);
402
+ }
403
+ to {
404
+ opacity: 1;
405
+ transform: translateY(0);
406
+ }
407
+ }
408
+
409
+ .hf-hero,
410
+ .hf-method-card,
411
+ .metric-card,
412
+ .info-card {
413
+ animation: fadeIn 0.28s ease-out;
414
+ }
415
+
416
+ @media (max-width: 720px) {
417
+ .hf-hero-copy {
418
+ flex-direction: column;
419
+ }
420
+
421
+ .hf-icon {
422
+ height: 3.2rem;
423
+ width: 3.2rem;
424
+ }
425
+ }
shared/utils.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HF-Master Shared Utilities
3
+ Helper functions for all projects
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import json
9
+ import hashlib
10
+ from typing import Dict, List, Optional, Any, Union
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ import sqlite3
14
+
15
+
16
+ def load_env(var_name: str, default: Optional[str] = None) -> Optional[str]:
17
+ """Load environment variable with optional default"""
18
+ return os.getenv(var_name, default)
19
+
20
+
21
+ def load_api_key(provider: str = "openai") -> Optional[str]:
22
+ """Load API key for specified provider"""
23
+ key_map = {
24
+ "openai": "OPENAI_API_KEY",
25
+ "anthropic": "ANTHROPIC_API_KEY",
26
+ "huggingface": "HF_TOKEN",
27
+ "cohere": "COHERE_API_KEY",
28
+ "together": "TOGETHER_API_KEY"
29
+ }
30
+
31
+ env_var = key_map.get(provider.lower())
32
+ if env_var:
33
+ return load_env(env_var)
34
+
35
+ return None
36
+
37
+
38
+ def estimate_token_count(text: str, model: str = "gpt-4") -> int:
39
+ """Estimate token count for text"""
40
+ tokens_per_word = {
41
+ "gpt-4": 4, # ~4 chars per token
42
+ "gpt-3.5": 4,
43
+ "claude": 4,
44
+ "llama": 3 # More efficient
45
+ }
46
+
47
+ chars_per_token = tokens_per_word.get(model, 4)
48
+ return len(text) // chars_per_token
49
+
50
+
51
+ def estimate_tokens(text: str, model: str = "gpt-4") -> int:
52
+ """Backward-compatible alias used by older apps"""
53
+ return estimate_token_count(text, model)
54
+
55
+
56
+ def calculate_api_cost(
57
+ model: str,
58
+ input_tokens: int,
59
+ output_tokens: int,
60
+ provider: str = "openai"
61
+ ) -> float:
62
+ """Calculate API cost for model usage"""
63
+
64
+ pricing = {
65
+ "openai": {
66
+ "gpt-4": {"input": 0.03, "output": 0.06},
67
+ "gpt-3.5-turbo": {"input": 0.001, "output": 0.002},
68
+ "gpt-4-turbo": {"input": 0.01, "output": 0.03}
69
+ },
70
+ "anthropic": {
71
+ "claude-3-opus": {"input": 0.015, "output": 0.075},
72
+ "claude-3-sonnet": {"input": 0.003, "output": 0.015}
73
+ }
74
+ }
75
+
76
+ provider_pricing = pricing.get(provider, {})
77
+ model_pricing = provider_pricing.get(model, {"input": 0.01, "output": 0.03})
78
+
79
+ input_cost = (input_tokens / 1000) * model_pricing["input"]
80
+ output_cost = (output_tokens / 1000) * model_pricing["output"]
81
+
82
+ return input_cost + output_cost
83
+
84
+
85
+ def calculate_cost(tokens: int, model: str = "gpt-4", provider: str = "openai") -> float:
86
+ """Backward-compatible alias used by older apps"""
87
+ return calculate_api_cost(model=model, input_tokens=tokens, output_tokens=0, provider=provider)
88
+
89
+
90
+ def sanitize_filename(name: str) -> str:
91
+ """Convert string to safe filename"""
92
+ name = name.lower().strip()
93
+ name = re.sub(r'[^\w\s-]', '', name)
94
+ name = re.sub(r'[\s]+', '-', name)
95
+ return name
96
+
97
+
98
+ def create_hash(text: str, length: int = 8) -> str:
99
+ """Create short hash from text"""
100
+ return hashlib.md5(text.encode()).hexdigest()[:length]
101
+
102
+
103
+ def format_duration(seconds: float) -> str:
104
+ """Format duration in human-readable form"""
105
+ if seconds < 60:
106
+ return f"{seconds:.1f}s"
107
+ elif seconds < 3600:
108
+ return f"{seconds/60:.1f}m"
109
+ else:
110
+ return f"{seconds/3600:.1f}h"
111
+
112
+
113
+ def format_bytes(bytes: int) -> str:
114
+ """Format bytes in human-readable form"""
115
+ for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
116
+ if bytes < 1024:
117
+ return f"{bytes:.1f} {unit}"
118
+ bytes /= 1024
119
+ return f"{bytes:.1f} PB"
120
+
121
+
122
+ def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
123
+ """Truncate text with suffix"""
124
+ if len(text) <= max_length:
125
+ return text
126
+ return text[:max_length - len(suffix)] + suffix
127
+
128
+
129
+ def parse_dice_notation(notation: str) -> Dict[str, Any]:
130
+ """Parse dice notation like 2d6+3"""
131
+ match = re.match(r'(\d+)d(\d+)(kh\d+)?([+-]\d+)?', notation.upper())
132
+ if not match:
133
+ raise ValueError(f"Invalid dice notation: {notation}")
134
+
135
+ num_dice = int(match.group(1))
136
+ die_size = int(match.group(2))
137
+ keep_high = match.group(3)
138
+ modifier = int(match.group(4)) if match.group(4) else 0
139
+
140
+ return {
141
+ "num_dice": num_dice,
142
+ "die_size": die_size,
143
+ "keep_high": keep_high,
144
+ "modifier": modifier
145
+ }
146
+
147
+
148
+ def roll_dice(notation: str) -> List[int]:
149
+ """Roll dice and return individual rolls"""
150
+ import random
151
+
152
+ parsed = parse_dice_notation(notation)
153
+ rolls = [random.randint(1, parsed["die_size"]) for _ in range(parsed["num_dice"])]
154
+
155
+ if parsed["keep_high"]:
156
+ keep = int(parsed["keep_high"][2:])
157
+ rolls = sorted(rolls, reverse=True)[:keep]
158
+
159
+ return rolls
160
+
161
+
162
+ def calculate_modifier(ability_score: int) -> int:
163
+ """Calculate D&D ability modifier from score"""
164
+ return (ability_score - 10) // 2
165
+
166
+
167
+ def validate_ethereum_address(address: str) -> bool:
168
+ """Validate Ethereum address format"""
169
+ pattern = r'^0x[a-fA-F0-9]{40}$'
170
+ return bool(re.match(pattern, address))
171
+
172
+
173
+ def validate_solana_address(address: str) -> bool:
174
+ """Validate Solana address format"""
175
+ pattern = r'^[1-9A-HJ-NP-Za-km-z]{32,44}$'
176
+ return bool(re.match(pattern, address))
177
+
178
+
179
+ def extract_urls(text: str) -> List[str]:
180
+ """Extract URLs from text"""
181
+ url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
182
+ return re.findall(url_pattern, text)
183
+
184
+
185
+ def extract_code_blocks(text: str) -> List[str]:
186
+ """Extract code blocks from markdown text"""
187
+ pattern = r'```(?:\w+)?\n(.*?)```'
188
+ return re.findall(pattern, text, re.DOTALL)
189
+
190
+
191
+ def parse_math_expression(expr: str) -> float:
192
+ """Safely evaluate simple math expressions"""
193
+ allowed_chars = set("0123456789+-*/.() ")
194
+ if all(c in allowed_chars for c in expr):
195
+ return eval(expr)
196
+ raise ValueError(f"Unsafe expression: {expr}")
197
+
198
+
199
+ def create_timer(func):
200
+ """Decorator to time function execution"""
201
+ import time
202
+ from functools import wraps
203
+
204
+ @wraps(func)
205
+ def wrapper(*args, **kwargs):
206
+ start = time.time()
207
+ result = func(*args, **kwargs)
208
+ duration = time.time() - start
209
+ print(f"{func.__name__} took {format_duration(duration)}")
210
+ return result
211
+
212
+ return wrapper
213
+
214
+
215
+ def retry_on_failure(max_attempts: int = 3, delay: float = 1.0):
216
+ """Decorator to retry function on failure"""
217
+ from functools import wraps
218
+ import time
219
+
220
+ def decorator(func):
221
+ @wraps(func)
222
+ def wrapper(*args, **kwargs):
223
+ for attempt in range(max_attempts):
224
+ try:
225
+ return func(*args, **kwargs)
226
+ except Exception as e:
227
+ if attempt == max_attempts - 1:
228
+ raise
229
+ time.sleep(delay * (attempt + 1))
230
+
231
+ return wrapper
232
+
233
+ return decorator
234
+
235
+
236
+ class SimpleCache:
237
+ """Simple in-memory cache"""
238
+
239
+ def __init__(self, max_size: int = 100):
240
+ self.cache: Dict[str, Any] = {}
241
+ self.max_size = max_size
242
+ self.access_times: Dict[str, datetime] = {}
243
+
244
+ def get(self, key: str) -> Optional[Any]:
245
+ """Get value from cache"""
246
+ if key in self.cache:
247
+ self.access_times[key] = datetime.now()
248
+ return self.cache[key]
249
+ return None
250
+
251
+ def set(self, key: str, value: Any):
252
+ """Set value in cache"""
253
+ if len(self.cache) >= self.max_size:
254
+ oldest = min(self.access_times.items(), key=lambda x: x[1])[0]
255
+ del self.cache[oldest]
256
+ del self.access_times[oldest]
257
+
258
+ self.cache[key] = value
259
+ self.access_times[key] = datetime.now()
260
+
261
+ def clear(self):
262
+ """Clear cache"""
263
+ self.cache.clear()
264
+ self.access_times.clear()
265
+
266
+
267
+ class Database:
268
+ """Simple SQLite wrapper"""
269
+
270
+ def __init__(self, db_path: str = "data.db"):
271
+ self.db_path = db_path
272
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
273
+ self.conn = None
274
+
275
+ def connect(self):
276
+ """Connect to database"""
277
+ self.conn = sqlite3.connect(self.db_path)
278
+ self.conn.row_factory = sqlite3.Row
279
+
280
+ def close(self):
281
+ """Close database connection"""
282
+ if self.conn:
283
+ self.conn.close()
284
+
285
+ def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
286
+ """Execute query"""
287
+ if not self.conn:
288
+ self.connect()
289
+ return self.conn.execute(query, params)
290
+
291
+ def commit(self):
292
+ """Commit transaction"""
293
+ if self.conn:
294
+ self.conn.commit()
295
+
296
+ def fetchall(self, query: str, params: tuple = ()) -> List[Dict]:
297
+ """Fetch all results"""
298
+ cursor = self.execute(query, params)
299
+ return [dict(row) for row in cursor.fetchall()]
300
+
301
+ def fetchone(self, query: str, params: tuple = ()) -> Optional[Dict]:
302
+ """Fetch one result"""
303
+ cursor = self.execute(query, params)
304
+ row = cursor.fetchone()
305
+ return dict(row) if row else None
306
+
307
+ def create_table(self, name: str, columns: Dict[str, str]):
308
+ """Create table with columns"""
309
+ cols = ", ".join([f"{k} {v}" for k, v in columns.items()])
310
+ self.execute(f"CREATE TABLE IF NOT EXISTS {name} ({cols})")
311
+ self.commit()
312
+
313
+
314
+ def load_json_file(filepath: str) -> Dict:
315
+ """Load JSON file"""
316
+ with open(filepath, 'r') as f:
317
+ return json.load(f)
318
+
319
+
320
+ def save_json_file(data: Dict, filepath: str):
321
+ """Save JSON file"""
322
+ Path(filepath).parent.mkdir(parents=True, exist_ok=True)
323
+ with open(filepath, 'w') as f:
324
+ json.dump(data, f, indent=2)
325
+
326
+
327
+ def merge_dicts(*dicts: Dict) -> Dict:
328
+ """Merge multiple dictionaries"""
329
+ result = {}
330
+ for d in dicts:
331
+ result.update(d)
332
+ return result
333
+
334
+
335
+ def flatten_list(nested: List[Any]) -> List[Any]:
336
+ """Flatten nested list"""
337
+ result = []
338
+ for item in nested:
339
+ if isinstance(item, list):
340
+ result.extend(flatten_list(item))
341
+ else:
342
+ result.append(item)
343
+ return result
344
+
345
+
346
+ def chunk_text(text: str, chunk_size: int, overlap: int = 0) -> List[str]:
347
+ """Split text into overlapping chunks"""
348
+ chunks = []
349
+ start = 0
350
+
351
+ while start < len(text):
352
+ end = start + chunk_size
353
+ chunks.append(text[start:end])
354
+ start = end - overlap
355
+
356
+ return chunks
357
+
358
+
359
+ def get_project_root() -> Path:
360
+ """Get project root directory"""
361
+ return Path(__file__).parent.parent
362
+
363
+
364
+ def ensure_dir(path: str):
365
+ """Ensure directory exists"""
366
+ Path(path).mkdir(parents=True, exist_ok=True)