walidsobhie-code Claude Opus 4.6 commited on
Commit
65973ec
·
1 Parent(s): 0647cf2

feat: Add data quality, model client, pattern miner, and MBPP benchmark

Browse files

- Add data_quality.py with quality scoring, filtering, deduplication
- Add model_client.py with unified API for Ollama, OpenAI, Anthropic
- Add pattern_miner.py for self-evolution pattern extraction
- Update MBPP benchmark with real model API integration
- Update requirements.txt with ML and API dependencies

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

requirements.txt CHANGED
@@ -1,7 +1,29 @@
1
- coqui-tts>=0.20.0
2
- librosa>=0.10.0
3
- soundfile>=0.12.0
4
- numpy>=1.24.0
 
 
5
  torch>=2.0.0
6
- tqdm>=4.65.0
7
- pydantic>=2.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stack 2.9 Requirements
2
+
3
+ # Core
4
+ stack-cli>=2.9.0
5
+
6
+ # Training & ML
7
  torch>=2.0.0
8
+ transformers>=4.35.0
9
+ peft>=0.8.0
10
+ accelerate>=0.25.0
11
+ bitsandbytes>=0.41.0
12
+ datasets>=2.14.0
13
+ trl>=0.7.0 # For DPO/PPO training
14
+
15
+ # Evaluation & Benchmarking
16
+ numpy>=1.24.0
17
+ pandas>=2.0.0
18
+
19
+ # Model APIs
20
+ openai>=1.3.0
21
+ anthropic>=0.18.0
22
+ requests>=2.31.0
23
+
24
+ # Memory & Vector Store
25
+ faiss-cpu>=1.7.0
26
+
27
+ # Utilities
28
+ pyyaml>=6.0
29
+ tqdm>=4.66.0
stack-2.9-eval/benchmarks/mbpp.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MBPP (Mostly Basic Python Problems) benchmark implementation
3
+ Real implementation with model API integration.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import json
9
+ import signal
10
+ from typing import Dict, Any, List, Tuple, Optional
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+
14
+ # Add parent to path for imports
15
+ import sys
16
+ sys.path.insert(0, str(Path(__file__).parent.parent))
17
+
18
+ from model_client import create_model_client, BaseModelClient, ChatMessage
19
+
20
+
21
+ @dataclass
22
+ class MBPPProblem:
23
+ """MBPP problem structure."""
24
+ task_id: int
25
+ description: str
26
+ prompt: str
27
+ code: str # Canonical solution
28
+ test: str # Test code
29
+ test_import: List[str]
30
+
31
+
32
+ @dataclass
33
+ class MBPPResult:
34
+ """Result for a single problem."""
35
+ task_id: int
36
+ passed: bool
37
+ generated_code: str
38
+ error: Optional[str] = None
39
+ execution_time: float = 0.0
40
+
41
+
42
+ class TimeoutException(Exception):
43
+ """Timeout during code execution."""
44
+ pass
45
+
46
+
47
+ def timeout_handler(signum, frame):
48
+ """Signal handler for timeout."""
49
+ raise TimeoutException("Code execution timed out")
50
+
51
+
52
+ class MBPP:
53
+ """MBPP Benchmark with real model integration."""
54
+
55
+ # MBPP dataset (first 40 problems for quick testing)
56
+ # In production, load full dataset from file
57
+ PROBLEMS = [
58
+ {
59
+ "task_id": 1,
60
+ "description": "Return sum of a list",
61
+ "prompt": "Write a python function sum_list(lst) that returns the sum of all elements in a list.",
62
+ "canonical": "def sum_list(lst):\n return sum(lst)",
63
+ "test": "assert sum_list([1, 2, 3]) == 6\nassert sum_list([]) == 0",
64
+ "imports": []
65
+ },
66
+ {
67
+ "task_id": 2,
68
+ "description": "Return maximum element",
69
+ "prompt": "Write a python function max_element(lst) that returns the maximum element in a list.",
70
+ "canonical": "def max_element(lst):\n return max(lst) if lst else None",
71
+ "test": "assert max_element([1, 5, 3]) == 5\nassert max_element([0]) == 0",
72
+ "imports": []
73
+ },
74
+ {
75
+ "task_id": 3,
76
+ "description": "Return reverse of string",
77
+ "prompt": "Write a python function reverse_string(s) that returns the reverse of a string.",
78
+ "canonical": "def reverse_string(s):\n return s[::-1]",
79
+ "test": "assert reverse_string('hello') == 'olleh'\nassert reverse_string('') == ''",
80
+ "imports": []
81
+ },
82
+ {
83
+ "task_id": 4,
84
+ "description": "Check if string is palindrome",
85
+ "prompt": "Write a python function is_palindrome(s) that returns True if a string is a palindrome, False otherwise.",
86
+ "canonical": "def is_palindrome(s):\n return s == s[::-1]",
87
+ "test": "assert is_palindrome('racecar') == True\nassert is_palindrome('hello') == False",
88
+ "imports": []
89
+ },
90
+ {
91
+ "task_id": 5,
92
+ "description": "Return factorial",
93
+ "prompt": "Write a python function factorial(n) that returns the factorial of n.",
94
+ "canonical": "def factorial(n):\n if n <= 1:\n return 1\n return n * factorial(n-1)",
95
+ "test": "assert factorial(5) == 120\nassert factorial(0) == 1",
96
+ "imports": []
97
+ },
98
+ {
99
+ "task_id": 6,
100
+ "description": "Return Fibonacci number",
101
+ "prompt": "Write a python function fibonacci(n) that returns the nth Fibonacci number.",
102
+ "canonical": "def fibonacci(n):\n if n <= 1:\n return n\n a, b = 0, 1\n for _ in range(n-1):\n a, b = b, a + b\n return b",
103
+ "test": "assert fibonacci(10) == 55\nassert fibonacci(0) == 0\nassert fibonacci(1) == 1",
104
+ "imports": []
105
+ },
106
+ {
107
+ "task_id": 7,
108
+ "description": "Count vowels in string",
109
+ "prompt": "Write a python function count_vowels(s) that returns the count of vowels in a string.",
110
+ "canonical": "def count_vowels(s):\n return sum(1 for c in s.lower() if c in 'aeiou')",
111
+ "test": "assert count_vowels('hello') == 2\nassert count_vowels('xyz') == 0",
112
+ "imports": []
113
+ },
114
+ {
115
+ "task_id": 8,
116
+ "description": "Return list of primes up to n",
117
+ "prompt": "Write a python function primes_up_to(n) that returns a list of all primes up to n.",
118
+ "canonical": "def primes_up_to(n):\n if n < 2:\n return []\n sieve = [True] * (n + 1)\n sieve[0] = sieve[1] = False\n for i in range(2, int(n**0.5) + 1):\n if sieve[i]:\n for j in range(i*i, n+1, i):\n sieve[j] = False\n return [i for i in range(2, n+1) if sieve[i]]",
119
+ "test": "assert primes_up_to(10) == [2,3,5,7]\nassert primes_up_to(2) == [2]",
120
+ "imports": []
121
+ },
122
+ {
123
+ "task_id": 9,
124
+ "description": "Check if number is prime",
125
+ "prompt": "Write a python function is_prime(n) that returns True if n is prime, False otherwise.",
126
+ "canonical": "def is_prime(n):\n if n < 2:\n return False\n for i in range(2, int(n**0.5) + 1):\n if n % i == 0:\n return False\n return True",
127
+ "test": "assert is_prime(7) == True\nassert is_prime(4) == False\nassert is_prime(1) == False",
128
+ "imports": []
129
+ },
130
+ {
131
+ "task_id": 10,
132
+ "description": "Return length of last word",
133
+ "prompt": "Write a python function length_last_word(s) that returns the length of the last word in a string.",
134
+ "canonical": "def length_last_word(s):\n words = s.split()\n return len(words[-1]) if words else 0",
135
+ "test": "assert length_last_word('hello world') == 5\nassert length_last_word('') == 0",
136
+ "imports": []
137
+ },
138
+ {
139
+ "task_id": 11,
140
+ "description": "Remove duplicates from list",
141
+ "prompt": "Write a python function remove_duplicates(lst) that returns a list with duplicates removed.",
142
+ "canonical": "def remove_duplicates(lst):\n return list(dict.fromkeys(lst))",
143
+ "test": "assert remove_duplicates([1,2,2,3]) == [1,2,3]\nassert remove_duplicates([]) == []",
144
+ "imports": []
145
+ },
146
+ {
147
+ "task_id": 12,
148
+ "description": "Return common elements",
149
+ "prompt": "Write a python function common_elements(lst1, lst2) that returns common elements between two lists.",
150
+ "canonical": "def common_elements(lst1, lst2):\n return list(set(lst1) & set(lst2))",
151
+ "test": "assert common_elements([1,2,3], [2,3,4]) == [2,3]\nassert common_elements([], [1]) == []",
152
+ "imports": []
153
+ },
154
+ {
155
+ "task_id": 13,
156
+ "description": "Calculate power",
157
+ "prompt": "Write a python function power(base, exp) that returns base raised to exp power.",
158
+ "canonical": "def power(base, exp):\n return base ** exp",
159
+ "test": "assert power(2, 3) == 8\nassert power(5, 0) == 1",
160
+ "imports": []
161
+ },
162
+ {
163
+ "task_id": 14,
164
+ "description": "Return sorted list",
165
+ "prompt": "Write a python function sort_list(lst) that returns a sorted list in ascending order.",
166
+ "canonical": "def sort_list(lst):\n return sorted(lst)",
167
+ "test": "assert sort_list([3,1,2]) == [1,2,3]\nassert sort_list([]) == []",
168
+ "imports": []
169
+ },
170
+ {
171
+ "task_id": 15,
172
+ "description": "Check even number",
173
+ "prompt": "Write a python function is_even(n) that returns True if n is even, False otherwise.",
174
+ "canonical": "def is_even(n):\n return n % 2 == 0",
175
+ "test": "assert is_even(4) == True\nassert is_even(3) == False",
176
+ "imports": []
177
+ },
178
+ {
179
+ "task_id": 16,
180
+ "description": "Return absolute value",
181
+ "prompt": "Write a python function absolute(n) that returns the absolute value of n.",
182
+ "canonical": "def absolute(n):\n return abs(n)",
183
+ "test": "assert absolute(-5) == 5\nassert absolute(5) == 5\nassert absolute(0) == 0",
184
+ "imports": []
185
+ },
186
+ {
187
+ "task_id": 17,
188
+ "description": "Return string length",
189
+ "prompt": "Write a python function string_length(s) that returns the length of a string.",
190
+ "canonical": "def string_length(s):\n return len(s)",
191
+ "test": "assert string_length('hello') == 5\nassert string_length('') == 0",
192
+ "imports": []
193
+ },
194
+ {
195
+ "task_id": 18,
196
+ "description": "Return uppercase string",
197
+ "prompt": "Write a python function uppercase(s) that returns the uppercase version of a string.",
198
+ "canonical": "def uppercase(s):\n return s.upper()",
199
+ "test": "assert uppercase('hello') == 'HELLO'\nassert uppercase('') == ''",
200
+ "imports": []
201
+ },
202
+ {
203
+ "task_id": 19,
204
+ "description": "Return lowercase string",
205
+ "prompt": "Write a python function lowercase(s) that returns the lowercase version of a string.",
206
+ "canonical": "def lowercase(s):\n return s.lower()",
207
+ "test": "assert lowercase('HELLO') == 'hello'\nassert lowercase('') == ''",
208
+ "imports": []
209
+ },
210
+ {
211
+ "task_id": 20,
212
+ "description": "Check substring",
213
+ "prompt": "Write a python function contains_substring(s, sub) that returns True if sub is in s, False otherwise.",
214
+ "canonical": "def contains_substring(s, sub):\n return sub in s",
215
+ "test": "assert contains_substring('hello', 'ell') == True\nassert contains_substring('hello', 'xyz') == False",
216
+ "imports": []
217
+ },
218
+ ]
219
+
220
+ def __init__(
221
+ self,
222
+ model_provider: str = None,
223
+ model_name: str = None,
224
+ timeout: int = 10,
225
+ max_problems: int = None
226
+ ):
227
+ self.benchmark_name = "MBPP"
228
+ self.timeout = timeout
229
+ self.max_problems = max_problems or len(self.PROBLEMS)
230
+
231
+ # Get provider from environment or parameter
232
+ self.model_provider = model_provider or os.environ.get("MODEL_PROVIDER", "ollama")
233
+ self.model_name = model_name or os.environ.get("MODEL_NAME", "")
234
+
235
+ # Load model client
236
+ try:
237
+ self.client = create_model_client(self.model_provider, self.model_name)
238
+ print(f"Using model: {self.client.get_model_name()} (provider: {self.model_provider})")
239
+ except Exception as e:
240
+ print(f"Warning: Could not create model client: {e}")
241
+ print("Using stub mode - results will be from canonical solutions")
242
+ self.client = None
243
+
244
+ # Load test cases
245
+ self.test_cases = self._load_test_cases()
246
+ self.total_cases = len(self.test_cases)
247
+
248
+ def _load_test_cases(self) -> List[Dict]:
249
+ """Load MBPP test cases."""
250
+ if self.max_problems:
251
+ return self.PROBLEMS[:self.max_problems]
252
+ return self.PROBLEMS
253
+
254
+ def _format_prompt(self, problem: Dict) -> str:
255
+ """Format the prompt for code generation."""
256
+ prompt = f"""Write a Python function to solve this problem:
257
+
258
+ {problem['description']}
259
+
260
+ {problem['prompt']}
261
+
262
+ Write only the function definition, without any additional explanation or test code."""
263
+ return prompt
264
+
265
+ def generate_code(self, problem: Dict) -> Tuple[str, Optional[str]]:
266
+ """Generate code for a problem using the model."""
267
+ if self.client is None:
268
+ # Return canonical solution in stub mode
269
+ return problem['canonical'], None
270
+
271
+ prompt = self._format_prompt(problem)
272
+
273
+ try:
274
+ result = self.client.generate(
275
+ prompt=prompt,
276
+ temperature=0.2,
277
+ max_tokens=1024
278
+ )
279
+ return result.text, None
280
+ except Exception as e:
281
+ return "", str(e)
282
+
283
+ def _extract_function(self, code: str, problem: Dict) -> str:
284
+ """Extract the function definition from generated code."""
285
+ # Try to find function definition
286
+ # Look for "def function_name" pattern
287
+ lines = code.split('\n')
288
+
289
+ # Find first function definition
290
+ func_lines = []
291
+ in_function = False
292
+
293
+ for line in lines:
294
+ if re.match(r'^def\s+\w+\s*\(', line):
295
+ in_function = True
296
+ func_lines = [line]
297
+ elif in_function:
298
+ if line.strip() and not line.startswith(' ') and not line.startswith('\t'):
299
+ # End of function
300
+ break
301
+ func_lines.append(line)
302
+
303
+ if func_lines:
304
+ return '\n'.join(func_lines)
305
+
306
+ # Fallback: return entire code if no clear function found
307
+ return code
308
+
309
+ def _test_code(self, code: str, problem: Dict) -> Tuple[bool, Optional[str]]:
310
+ """Test generated code against test cases."""
311
+ # Set up timeout
312
+ signal.signal(signal.SIGALRM, timeout_handler)
313
+ signal.alarm(self.timeout)
314
+
315
+ try:
316
+ # Prepare code for execution
317
+ imports = '\n'.join(problem.get('imports', []))
318
+ test_code = problem.get('test', '')
319
+
320
+ full_code = f"{imports}\n{code}\n{test_code}"
321
+
322
+ # Execute in isolated scope
323
+ local_scope = {}
324
+ exec(full_code, {}, local_scope)
325
+
326
+ # If we get here, tests passed
327
+ signal.alarm(0) # Cancel alarm
328
+ return True, None
329
+
330
+ except TimeoutException:
331
+ return False, "Execution timed out"
332
+ except Exception as e:
333
+ return False, str(e)
334
+
335
+ def evaluate(self, model_name: str = None) -> Dict[str, Any]:
336
+ """Evaluate model against MBPP benchmark."""
337
+ if model_name and self.client:
338
+ # Update client if model changed
339
+ self.client = create_model_client(self.model_provider, model_name)
340
+
341
+ pass_at_1 = 0
342
+ results = []
343
+
344
+ print(f"\nEvaluating {self.total_cases} problems...")
345
+
346
+ for i, problem in enumerate(self.test_cases):
347
+ print(f" Problem {i+1}/{self.total_cases}: Task {problem['task_id']}")
348
+
349
+ # Generate code
350
+ generated_code, error = self.generate_code(problem)
351
+
352
+ if error:
353
+ print(f" Generation error: {error}")
354
+ results.append(MBPPResult(
355
+ task_id=problem['task_id'],
356
+ passed=False,
357
+ generated_code=generated_code,
358
+ error=error
359
+ ))
360
+ continue
361
+
362
+ # Extract function
363
+ extracted = self._extract_function(generated_code, problem)
364
+
365
+ # Test code
366
+ passed, test_error = self._test_code(extracted, problem)
367
+
368
+ if passed:
369
+ pass_at_1 += 1
370
+ print(f" ✓ Passed")
371
+ else:
372
+ print(f" ✗ Failed: {test_error}")
373
+
374
+ results.append(MBPPResult(
375
+ task_id=problem['task_id'],
376
+ passed=passed,
377
+ generated_code=generated_code,
378
+ error=test_error
379
+ ))
380
+
381
+ accuracy = pass_at_1 / self.total_cases if self.total_cases > 0 else 0
382
+
383
+ return {
384
+ "pass_at_1": pass_at_1,
385
+ "pass_at_3": pass_at_1, # Simplified - would need multiple generations
386
+ "pass_at_5": pass_at_1,
387
+ "total_cases": self.total_cases,
388
+ "accuracy": accuracy,
389
+ "benchmark": self.benchmark_name,
390
+ "model": model_name or self.client.get_model_name() if self.client else "stub",
391
+ "results": [
392
+ {"task_id": r.task_id, "passed": r.passed, "error": r.error}
393
+ for r in results
394
+ ]
395
+ }
396
+
397
+
398
+ if __name__ == "__main__":
399
+ import argparse
400
+
401
+ parser = argparse.ArgumentParser(description="MBPP Benchmark")
402
+ parser.add_argument("--provider", choices=["ollama", "openai", "anthropic"],
403
+ help="Model provider")
404
+ parser.add_argument("--model", type=str, help="Model name")
405
+ parser.add_argument("--max-problems", type=int, help="Max problems to test")
406
+ parser.add_argument("--timeout", type=int, default=10, help="Timeout in seconds")
407
+
408
+ args = parser.parse_args()
409
+
410
+ benchmark = MBPP(
411
+ model_provider=args.provider,
412
+ model_name=args.model,
413
+ max_problems=args.max_problems,
414
+ timeout=args.timeout
415
+ )
416
+
417
+ results = benchmark.evaluate()
418
+
419
+ print("\n" + "=" * 40)
420
+ print("MBPP Results:")
421
+ print(f" Pass@1: {results['pass_at_1']}/{results['total_cases']} ({results['accuracy']*100:.1f}%)")
422
+ print(f" Model: {results['model']}")
stack-2.9-eval/model_client.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Stack 2.9 Model Client
4
+ Unified API client for Ollama, OpenAI, Anthropic, and other LLM backends.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import time
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Dict, List, Any, Optional, Callable
13
+ from dataclasses import dataclass
14
+ from abc import ABC, abstractmethod
15
+
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class GenerationResult:
22
+ """Result from model generation."""
23
+ text: str
24
+ model: str
25
+ tokens: int
26
+ duration: float
27
+ finish_reason: str
28
+ raw_response: Optional[Dict] = None
29
+
30
+
31
+ @dataclass
32
+ class ChatMessage:
33
+ """Chat message structure."""
34
+ role: str # "system", "user", "assistant"
35
+ content: str
36
+ tool_calls: Optional[List[Dict]] = None
37
+ tool_call_id: Optional[str] = None
38
+
39
+
40
+ class BaseModelClient(ABC):
41
+ """Abstract base class for model clients."""
42
+
43
+ @abstractmethod
44
+ def generate(
45
+ self,
46
+ prompt: str,
47
+ temperature: float = 0.2,
48
+ max_tokens: int = 4096,
49
+ stop: Optional[List[str]] = None,
50
+ **kwargs
51
+ ) -> GenerationResult:
52
+ """Generate text from a prompt."""
53
+ pass
54
+
55
+ @abstractmethod
56
+ def chat(
57
+ self,
58
+ messages: List[ChatMessage],
59
+ temperature: float = 0.2,
60
+ max_tokens: int = 4096,
61
+ tools: Optional[List[Dict]] = None,
62
+ **kwargs
63
+ ) -> GenerationResult:
64
+ """Generate response from chat messages."""
65
+ pass
66
+
67
+ @abstractmethod
68
+ def get_model_name(self) -> str:
69
+ """Get the model name."""
70
+ pass
71
+
72
+
73
+ class OllamaClient(BaseModelClient):
74
+ """Client for Ollama local API."""
75
+
76
+ def __init__(
77
+ self,
78
+ model: str = "qwen2.5-coder:32b",
79
+ base_url: str = "http://localhost:11434",
80
+ timeout: int = 300
81
+ ):
82
+ self.model = model
83
+ self.base_url = base_url.rstrip('/')
84
+ self.timeout = timeout
85
+
86
+ def generate(
87
+ self,
88
+ prompt: str,
89
+ temperature: float = 0.2,
90
+ max_tokens: int = 4096,
91
+ stop: Optional[List[str]] = None,
92
+ **kwargs
93
+ ) -> GenerationResult:
94
+ """Generate text using Ollama."""
95
+ import requests
96
+
97
+ url = f"{self.base_url}/api/generate"
98
+ payload = {
99
+ "model": self.model,
100
+ "prompt": prompt,
101
+ "temperature": temperature,
102
+ "max_tokens": max_tokens,
103
+ "stream": False
104
+ }
105
+ if stop:
106
+ payload["stop"] = stop
107
+
108
+ start_time = time.time()
109
+
110
+ try:
111
+ response = requests.post(url, json=payload, timeout=self.timeout)
112
+ response.raise_for_status()
113
+ data = response.json()
114
+
115
+ duration = time.time() - start_time
116
+
117
+ return GenerationResult(
118
+ text=data.get("response", ""),
119
+ model=self.model,
120
+ tokens=data.get("eval_count", 0),
121
+ duration=duration,
122
+ finish_reason=data.get("done_reason", "stop"),
123
+ raw_response=data
124
+ )
125
+ except requests.exceptions.RequestException as e:
126
+ logger.error(f"Ollama request failed: {e}")
127
+ raise
128
+
129
+ def chat(
130
+ self,
131
+ messages: List[ChatMessage],
132
+ temperature: float = 0.2,
133
+ max_tokens: int = 4096,
134
+ tools: Optional[List[Dict]] = None,
135
+ **kwargs
136
+ ) -> GenerationResult:
137
+ """Generate chat response using Ollama."""
138
+ import requests
139
+
140
+ url = f"{self.base_url}/api/chat"
141
+ payload = {
142
+ "model": self.model,
143
+ "messages": [
144
+ {"role": m.role, "content": m.content}
145
+ for m in messages
146
+ ],
147
+ "temperature": temperature,
148
+ "max_tokens": max_tokens,
149
+ "stream": False
150
+ }
151
+
152
+ if tools:
153
+ payload["tools"] = tools
154
+
155
+ start_time = time.time()
156
+
157
+ try:
158
+ response = requests.post(url, json=payload, timeout=self.timeout)
159
+ response.raise_for_status()
160
+ data = response.json()
161
+
162
+ duration = time.time() - start_time
163
+
164
+ # Extract response
165
+ msg = data.get("message", {})
166
+ text = msg.get("content", "")
167
+
168
+ return GenerationResult(
169
+ text=text,
170
+ model=self.model,
171
+ tokens=data.get("eval_count", 0),
172
+ duration=duration,
173
+ finish_reason=data.get("done_reason", "stop"),
174
+ raw_response=data
175
+ )
176
+ except requests.exceptions.RequestException as e:
177
+ logger.error(f"Ollama chat request failed: {e}")
178
+ raise
179
+
180
+ def get_model_name(self) -> str:
181
+ return self.model
182
+
183
+
184
+ class OpenAIClient(BaseModelClient):
185
+ """Client for OpenAI API."""
186
+
187
+ def __init__(
188
+ self,
189
+ model: str = "gpt-4o",
190
+ api_key: Optional[str] = None,
191
+ base_url: Optional[str] = None,
192
+ timeout: int = 120
193
+ ):
194
+ self.model = model
195
+ self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
196
+ self.base_url = base_url or os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
197
+ self.timeout = timeout
198
+
199
+ if not self.api_key:
200
+ raise ValueError("OpenAI API key required. Set OPENAI_API_KEY environment variable.")
201
+
202
+ def _get_client(self):
203
+ """Get OpenAI client."""
204
+ try:
205
+ from openai import OpenAI
206
+ return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout)
207
+ except ImportError:
208
+ raise ImportError("openai package required. Install with: pip install openai")
209
+
210
+ def generate(
211
+ self,
212
+ prompt: str,
213
+ temperature: float = 0.2,
214
+ max_tokens: int = 4096,
215
+ stop: Optional[List[str]] = None,
216
+ **kwargs
217
+ ) -> GenerationResult:
218
+ """Generate text using OpenAI."""
219
+ client = self._get_client()
220
+
221
+ start_time = time.time()
222
+
223
+ try:
224
+ response = client.completions.create(
225
+ model=self.model,
226
+ prompt=prompt,
227
+ temperature=temperature,
228
+ max_tokens=max_tokens,
229
+ stop=stop,
230
+ **kwargs
231
+ )
232
+
233
+ duration = time.time() - start_time
234
+
235
+ return GenerationResult(
236
+ text=response.choices[0].text,
237
+ model=self.model,
238
+ tokens=response.usage.completion_tokens,
239
+ duration=duration,
240
+ finish_reason=response.choices[0].finish_reason,
241
+ raw_response=response.model_dump()
242
+ )
243
+ except Exception as e:
244
+ logger.error(f"OpenAI request failed: {e}")
245
+ raise
246
+
247
+ def chat(
248
+ self,
249
+ messages: List[ChatMessage],
250
+ temperature: float = 0.2,
251
+ max_tokens: int = 4096,
252
+ tools: Optional[List[Dict]] = None,
253
+ **kwargs
254
+ ) -> GenerationResult:
255
+ """Generate chat response using OpenAI."""
256
+ client = self._get_client()
257
+
258
+ # Convert messages to OpenAI format
259
+ chat_messages = []
260
+ for msg in messages:
261
+ msg_dict = {"role": msg.role, "content": msg.content}
262
+ if msg.tool_calls:
263
+ msg_dict["tool_calls"] = msg.tool_calls
264
+ if msg.tool_call_id:
265
+ msg_dict["tool_call_id"] = msg.tool_call_id
266
+ chat_messages.append(msg_dict)
267
+
268
+ # Build request
269
+ request_params = {
270
+ "model": self.model,
271
+ "messages": chat_messages,
272
+ "temperature": temperature,
273
+ "max_tokens": max_tokens,
274
+ }
275
+
276
+ if tools:
277
+ request_params["tools"] = tools
278
+
279
+ request_params.update(kwargs)
280
+
281
+ start_time = time.time()
282
+
283
+ try:
284
+ response = client.chat.completions.create(**request_params)
285
+
286
+ duration = time.time() - start_time
287
+
288
+ msg = response.choices[0].message
289
+ text = msg.content or ""
290
+
291
+ return GenerationResult(
292
+ text=text,
293
+ model=self.model,
294
+ tokens=response.usage.completion_tokens,
295
+ duration=duration,
296
+ finish_reason=response.choices[0].finish_reason,
297
+ raw_response=response.model_dump()
298
+ )
299
+ except Exception as e:
300
+ logger.error(f"OpenAI chat request failed: {e}")
301
+ raise
302
+
303
+ def get_model_name(self) -> str:
304
+ return self.model
305
+
306
+
307
+ class AnthropicClient(BaseModelClient):
308
+ """Client for Anthropic API."""
309
+
310
+ def __init__(
311
+ self,
312
+ model: str = "claude-sonnet-4-20250514",
313
+ api_key: Optional[str] = None,
314
+ timeout: int = 120
315
+ ):
316
+ self.model = model
317
+ self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
318
+
319
+ if not self.api_key:
320
+ raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY environment variable.")
321
+
322
+ def _get_client(self):
323
+ """Get Anthropic client."""
324
+ try:
325
+ from anthropic import Anthropic
326
+ return Anthropic(api_key=self.api_key)
327
+ except ImportError:
328
+ raise ImportError("anthropic package required. Install with: pip install anthropic")
329
+
330
+ def generate(
331
+ self,
332
+ prompt: str,
333
+ temperature: float = 0.2,
334
+ max_tokens: int = 4096,
335
+ **kwargs
336
+ ) -> GenerationResult:
337
+ """Generate text using Anthropic."""
338
+ client = self._get_client()
339
+
340
+ # Anthropic uses system prompt separately
341
+ system = kwargs.pop("system", None)
342
+ if system:
343
+ messages = [{"role": "user", "content": prompt}]
344
+ messages = [{"role": "system", "content": system}] + messages
345
+ else:
346
+ messages = [{"role": "user", "content": prompt}]
347
+
348
+ start_time = time.time()
349
+
350
+ try:
351
+ response = client.messages.create(
352
+ model=self.model,
353
+ system=system,
354
+ messages=messages,
355
+ temperature=temperature,
356
+ max_tokens=max_tokens,
357
+ **kwargs
358
+ )
359
+
360
+ duration = time.time() - start_time
361
+
362
+ text = response.content[0].text if response.content else ""
363
+
364
+ return GenerationResult(
365
+ text=text,
366
+ model=self.model,
367
+ tokens=response.usage.output_tokens,
368
+ duration=duration,
369
+ finish_reason=response.stop_reason,
370
+ raw_response=response.model_dump()
371
+ )
372
+ except Exception as e:
373
+ logger.error(f"Anthropic request failed: {e}")
374
+ raise
375
+
376
+ def chat(
377
+ self,
378
+ messages: List[ChatMessage],
379
+ temperature: float = 0.2,
380
+ max_tokens: int = 4096,
381
+ tools: Optional[List[Dict]] = None,
382
+ **kwargs
383
+ ) -> GenerationResult:
384
+ """Generate chat response using Anthropic."""
385
+ client = self._get_client()
386
+
387
+ # Convert to Anthropic format
388
+ # System message should be separate
389
+ system = None
390
+ anthropic_messages = []
391
+
392
+ for msg in messages:
393
+ if msg.role == "system":
394
+ system = msg.content
395
+ else:
396
+ anthropic_messages.append({"role": msg.role, "content": msg.content})
397
+
398
+ request_params = {
399
+ "model": self.model,
400
+ "messages": anthropic_messages,
401
+ "temperature": temperature,
402
+ "max_tokens": max_tokens,
403
+ }
404
+
405
+ if system:
406
+ request_params["system"] = system
407
+
408
+ if tools:
409
+ request_params["tools"] = tools
410
+
411
+ request_params.update(kwargs)
412
+
413
+ start_time = time.time()
414
+
415
+ try:
416
+ response = client.messages.create(**request_params)
417
+
418
+ duration = time.time() - start_time
419
+
420
+ text = response.content[0].text if response.content else ""
421
+
422
+ return GenerationResult(
423
+ text=text,
424
+ model=self.model,
425
+ tokens=response.usage.output_tokens,
426
+ duration=duration,
427
+ finish_reason=response.stop_reason,
428
+ raw_response=response.model_dump()
429
+ )
430
+ except Exception as e:
431
+ logger.error(f"Anthropic chat request failed: {e}")
432
+ raise
433
+
434
+ def get_model_name(self) -> str:
435
+ return self.model
436
+
437
+
438
+ def create_model_client(
439
+ provider: str = "ollama",
440
+ model: Optional[str] = None,
441
+ **kwargs
442
+ ) -> BaseModelClient:
443
+ """
444
+ Factory function to create model client.
445
+
446
+ Args:
447
+ provider: One of "ollama", "openai", "anthropic"
448
+ model: Model name (defaults to provider's default)
449
+ **kwargs: Additional client configuration
450
+
451
+ Returns:
452
+ BaseModelClient instance
453
+ """
454
+ if provider == "ollama":
455
+ default_model = model or os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:32b")
456
+ return OllamaClient(model=default_model, **kwargs)
457
+ elif provider == "openai":
458
+ default_model = model or os.environ.get("OPENAI_MODEL", "gpt-4o")
459
+ return OpenAIClient(model=default_model, **kwargs)
460
+ elif provider == "anthropic":
461
+ default_model = model or os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
462
+ return AnthropicClient(model=default_model, **kwargs)
463
+ else:
464
+ raise ValueError(f"Unknown provider: {provider}. Use: ollama, openai, anthropic")
465
+
466
+
467
+ class ModelClientPool:
468
+ """Pool of model clients for different purposes."""
469
+
470
+ def __init__(self):
471
+ self.clients: Dict[str, BaseModelClient] = {}
472
+
473
+ def add_client(self, name: str, client: BaseModelClient):
474
+ """Add a client to the pool."""
475
+ self.clients[name] = client
476
+
477
+ def get_client(self, name: str = "default") -> BaseModelClient:
478
+ """Get client by name."""
479
+ if name not in self.clients:
480
+ # Try to create default client
481
+ provider = os.environ.get("MODEL_PROVIDER", "ollama")
482
+ self.clients[name] = create_model_client(provider)
483
+ return self.clients[name]
484
+
485
+ def generate(
486
+ self,
487
+ prompt: str,
488
+ client_name: str = "default",
489
+ **kwargs
490
+ ) -> GenerationResult:
491
+ """Generate using named client."""
492
+ return self.get_client(client_name).generate(prompt, **kwargs)
493
+
494
+ def chat(
495
+ self,
496
+ messages: List[ChatMessage],
497
+ client_name: str = "default",
498
+ **kwargs
499
+ ) -> GenerationResult:
500
+ """Chat using named client."""
501
+ return self.get_client(client_name).chat(messages, **kwargs)
502
+
503
+
504
+ # Default pool instance
505
+ _default_pool = None
506
+
507
+ def get_default_pool() -> ModelClientPool:
508
+ """Get default model client pool."""
509
+ global _default_pool
510
+ if _default_pool is None:
511
+ _default_pool = ModelClientPool()
512
+ return _default_pool
513
+
514
+
515
+ if __name__ == "__main__":
516
+ import argparse
517
+
518
+ parser = argparse.ArgumentParser(description="Stack 2.9 Model Client")
519
+ parser.add_argument("--provider", choices=["ollama", "openai", "anthropic"],
520
+ default="ollama", help="Model provider")
521
+ parser.add_argument("--model", type=str, help="Model name")
522
+ parser.add_argument("--prompt", type=str, required=True, help="Prompt to generate")
523
+ parser.add_argument("--temperature", type=float, default=0.2, help="Temperature")
524
+
525
+ args = parser.parse_args()
526
+
527
+ # Create client
528
+ client = create_model_client(args.provider, args.model)
529
+
530
+ print(f"Using model: {client.get_model_name()}")
531
+ print(f"Provider: {args.provider}")
532
+ print("-" * 40)
533
+
534
+ # Generate
535
+ result = client.generate(args.prompt, temperature=args.temperature)
536
+
537
+ print(f"Response:\n{result.text}")
538
+ print("-" * 40)
539
+ print(f"Tokens: {result.tokens}, Duration: {result.duration:.2f}s")
stack-2.9-training/data_quality.py ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Stack 2.9 Data Quality Module
4
+ Quality scoring, filtering, and deduplication for training data.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Any, Optional, Tuple
12
+ from dataclasses import dataclass
13
+ import logging
14
+
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class QualityScore:
21
+ """Quality metrics for a training example."""
22
+ overall: float
23
+ length_score: float
24
+ code_quality: float
25
+ structure_score: float
26
+ issues: List[str]
27
+
28
+
29
+ class DataQualityAnalyzer:
30
+ """Analyzes and filters training data quality."""
31
+
32
+ def __init__(
33
+ self,
34
+ min_response_length: int = 20,
35
+ max_length: int = 128000,
36
+ min_code_ratio: float = 0.1,
37
+ require_valid_schema: bool = True
38
+ ):
39
+ self.min_response_length = min_response_length
40
+ self.max_length = max_length
41
+ self.min_code_ratio = min_code_ratio
42
+ self.require_valid_schema = require_valid_schema
43
+
44
+ def analyze_example(self, example: Dict[str, Any]) -> QualityScore:
45
+ """Analyze a single training example and return quality metrics."""
46
+ issues = []
47
+
48
+ # Extract content from various formats
49
+ content = self._extract_content(example)
50
+ response = self._extract_response(example)
51
+
52
+ # Length scoring
53
+ length_score = self._score_length(response)
54
+ if length_score < 0.3:
55
+ issues.append("Response too short")
56
+
57
+ # Code quality scoring
58
+ code_quality = self._score_code_quality(response)
59
+ if code_quality < 0.2:
60
+ issues.append("Low code quality")
61
+
62
+ # Structure scoring
63
+ structure_score = self._score_structure(example)
64
+ if structure_score < 0.3:
65
+ issues.append("Poor structure")
66
+
67
+ # Calculate overall score
68
+ overall = (length_score * 0.3 + code_quality * 0.4 + structure_score * 0.3)
69
+
70
+ return QualityScore(
71
+ overall=overall,
72
+ length_score=length_score,
73
+ code_quality=code_quality,
74
+ structure_score=structure_score,
75
+ issues=issues
76
+ )
77
+
78
+ def _extract_content(self, example: Dict[str, Any]) -> str:
79
+ """Extract full content from example."""
80
+ if "messages" in example:
81
+ return " ".join(msg.get("content", "") for msg in example["messages"])
82
+ elif "instruction" in example:
83
+ return example.get("instruction", "") + " " + example.get("response", "")
84
+ elif "prompt" in example:
85
+ return example.get("prompt", "") + " " + example.get("completion", "")
86
+ elif "input" in example:
87
+ return example.get("input", "") + " " + example.get("output", "")
88
+ return json.dumps(example)
89
+
90
+ def _extract_response(self, example: Dict[str, Any]) -> str:
91
+ """Extract response content from example."""
92
+ if "messages" in example:
93
+ for msg in example["messages"]:
94
+ if msg.get("role") == "assistant":
95
+ return msg.get("content", "")
96
+ elif "response" in example:
97
+ return example["response"]
98
+ elif "completion" in example:
99
+ return example["completion"]
100
+ elif "output" in example:
101
+ return example["output"]
102
+ return ""
103
+
104
+ def _score_length(self, response: str) -> float:
105
+ """Score based on response length."""
106
+ if not response:
107
+ return 0.0
108
+
109
+ length = len(response)
110
+
111
+ if length < self.min_response_length:
112
+ return 0.0
113
+ elif length > self.max_length:
114
+ return 0.2
115
+
116
+ # Optimal range: 100-10000 chars
117
+ if 100 <= length <= 10000:
118
+ return 1.0
119
+ elif length < 100:
120
+ return 0.3
121
+ else:
122
+ # Linearly decay from 10000 to max_length
123
+ return max(0.5, 1.0 - (length - 10000) / (self.max_length - 10000))
124
+
125
+ def _score_code_quality(self, response: str) -> float:
126
+ """Score code quality based on patterns."""
127
+ if not response:
128
+ return 0.0
129
+
130
+ score = 0.5 # Base score
131
+
132
+ # Check for code blocks
133
+ code_blocks = len(re.findall(r'```[\s\S]*?```', response))
134
+ if code_blocks > 0:
135
+ score += 0.2
136
+
137
+ # Check for common programming patterns
138
+ patterns = [
139
+ r'def\s+\w+\s*\(', # Function definitions
140
+ r'class\s+\w+', # Class definitions
141
+ r'if\s+', # Conditionals
142
+ r'for\s+', # Loops
143
+ r'return\s+', # Returns
144
+ r'import\s+\w+', # Imports
145
+ r'from\s+\w+\s+import', # Named imports
146
+ ]
147
+
148
+ pattern_count = sum(1 for p in patterns if re.search(p, response))
149
+ score += min(0.2, pattern_count * 0.05)
150
+
151
+ # Penalize placeholder content
152
+ placeholder_patterns = [
153
+ r'\bTODO\b',
154
+ r'\bFIXME\b',
155
+ r'\bXXX\b',
156
+ r'^\s*$', # Empty lines
157
+ ]
158
+
159
+ placeholder_count = sum(len(re.findall(p, response, re.MULTILINE)) for p in placeholder_patterns)
160
+ if placeholder_count > 5:
161
+ score -= 0.3
162
+
163
+ return max(0.0, min(1.0, score))
164
+
165
+ def _score_structure(self, example: Dict[str, Any]) -> float:
166
+ """Score based on data structure validity."""
167
+ score = 0.5 # Base score
168
+
169
+ # Check for required fields
170
+ if "messages" in example:
171
+ roles = {msg.get("role") for msg in example.get("messages", [])}
172
+ if "user" in roles and "assistant" in roles:
173
+ score += 0.3
174
+ if "system" in roles:
175
+ score += 0.1
176
+ elif "instruction" in example and "response" in example:
177
+ score += 0.4
178
+ elif "prompt" in example and "completion" in example:
179
+ score += 0.4
180
+
181
+ # Check tool usage validity
182
+ if "messages" in example:
183
+ for msg in example["messages"]:
184
+ if msg.get("role") == "assistant" and "tool_calls" in msg:
185
+ # Validate tool call structure
186
+ if self._validate_tool_calls(msg["tool_calls"]):
187
+ score += 0.1
188
+
189
+ return min(1.0, score)
190
+
191
+ def _validate_tool_calls(self, tool_calls: List[Dict]) -> bool:
192
+ """Validate tool call structure."""
193
+ if not isinstance(tool_calls, list):
194
+ return False
195
+
196
+ for call in tool_calls:
197
+ if not isinstance(call, dict):
198
+ return False
199
+ if "function" not in call:
200
+ return False
201
+ if "name" not in call.get("function", {}):
202
+ return False
203
+
204
+ return True
205
+
206
+
207
+ def deduplicate(data: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]:
208
+ """
209
+ Remove duplicate examples based on content hash.
210
+
211
+ Returns:
212
+ Tuple of (unique_data, duplicates_removed)
213
+ """
214
+ seen_hashes = set()
215
+ unique_data = []
216
+
217
+ for example in data:
218
+ # Create hash from the formatted content
219
+ content = json.dumps(example, sort_keys=True, ensure_ascii=False)
220
+ content_hash = hashlib.sha256(content.encode()).hexdigest()
221
+
222
+ if content_hash not in seen_hashes:
223
+ seen_hashes.add(content_hash)
224
+ unique_data.append(example)
225
+
226
+ duplicates_removed = len(data) - len(unique_data)
227
+ if duplicates_removed > 0:
228
+ logger.info(f"Removed {duplicates_removed} duplicate examples")
229
+
230
+ return unique_data, duplicates_removed
231
+
232
+
233
+ def filter_by_quality(
234
+ data: List[Dict[str, Any]],
235
+ min_score: float = 0.4,
236
+ analyzer: Optional[DataQualityAnalyzer] = None
237
+ ) -> Tuple[List[Dict[str, Any]], List[QualityScore]]:
238
+ """
239
+ Filter training data by quality score.
240
+
241
+ Returns:
242
+ Tuple of (filtered_data, all_scores)
243
+ """
244
+ if analyzer is None:
245
+ analyzer = DataQualityAnalyzer()
246
+
247
+ filtered_data = []
248
+ all_scores = []
249
+
250
+ for example in data:
251
+ score = analyzer.analyze_example(example)
252
+ all_scores.append(score)
253
+
254
+ if score.overall >= min_score:
255
+ filtered_data.append(example)
256
+
257
+ filtered_count = len(data) - len(filtered_data)
258
+ if filtered_count > 0:
259
+ logger.info(f"Filtered out {filtered_count} low-quality examples")
260
+
261
+ return filtered_data, all_scores
262
+
263
+
264
+ def filter_by_completeness(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
265
+ """Filter out incomplete examples."""
266
+ filtered = []
267
+
268
+ for example in data:
269
+ # Check messages format
270
+ if "messages" in example:
271
+ messages = example.get("messages", [])
272
+ has_user = any(m.get("role") == "user" for m in messages)
273
+ has_assistant = any(m.get("role") == "assistant" for m in messages)
274
+
275
+ if not has_user or not has_assistant:
276
+ continue
277
+
278
+ # Check for empty content
279
+ has_content = any(
280
+ m.get("content") and len(m.get("content", "").strip()) > 0
281
+ for m in messages
282
+ )
283
+ if not has_content:
284
+ continue
285
+
286
+ # Check instruction/response format
287
+ elif "instruction" in example and "response" in example:
288
+ if not example.get("instruction", "").strip():
289
+ continue
290
+ if not example.get("response", "").strip():
291
+ continue
292
+
293
+ # Check prompt/completion format
294
+ elif "prompt" in example and "completion" in example:
295
+ if not example.get("prompt", "").strip():
296
+ continue
297
+ if not example.get("completion", "").strip():
298
+ continue
299
+
300
+ # Check input/output format
301
+ elif "input" in example and "output" in example:
302
+ if not example.get("input", "").strip():
303
+ continue
304
+ if not example.get("output", "").strip():
305
+ continue
306
+
307
+ else:
308
+ # Unknown format - skip
309
+ continue
310
+
311
+ filtered.append(example)
312
+
313
+ return filtered
314
+
315
+
316
+ def filter_code_pairs(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
317
+ """Filter code pair data to remove entries with missing essential fields."""
318
+ filtered = []
319
+
320
+ for entry in data:
321
+ # Skip entries missing essential fields
322
+ if not entry.get("code"):
323
+ continue
324
+ if not entry.get("fullBody"):
325
+ continue
326
+
327
+ # Skip entries with placeholder content
328
+ code = entry.get("code", "")
329
+ if "{ ... }" in code or code.strip() == "":
330
+ continue
331
+
332
+ filtered.append(entry)
333
+
334
+ return filtered
335
+
336
+
337
+ def filter_tool_catalog(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
338
+ """Filter tool catalog to add missing metadata."""
339
+ filtered = []
340
+
341
+ for tool in data:
342
+ # Add default description if missing
343
+ if not tool.get("description"):
344
+ tool["description"] = f"Tool for {tool.get('tool', 'unknown operation')}"
345
+
346
+ # Add empty input schema if missing
347
+ if not tool.get("inputSchema"):
348
+ tool["inputSchema"] = {"type": "object", "properties": {}}
349
+
350
+ filtered.append(tool)
351
+
352
+ return filtered
353
+
354
+
355
+ def process_pipeline(
356
+ input_files: List[Path],
357
+ output_path: Path,
358
+ min_quality_score: float = 0.4
359
+ ) -> Dict[str, Any]:
360
+ """
361
+ Run full data quality pipeline on multiple input files.
362
+
363
+ Args:
364
+ input_files: List of input JSONL files
365
+ output_path: Path to save cleaned data
366
+ min_quality_score: Minimum quality score to keep
367
+
368
+ Returns:
369
+ Statistics dictionary
370
+ """
371
+ all_data = []
372
+
373
+ # Load all data
374
+ for file_path in input_files:
375
+ if not file_path.exists():
376
+ logger.warning(f"File not found: {file_path}")
377
+ continue
378
+
379
+ logger.info(f"Loading {file_path}")
380
+ with open(file_path, 'r', encoding='utf-8') as f:
381
+ for line in f:
382
+ line = line.strip()
383
+ if not line:
384
+ continue
385
+ try:
386
+ all_data.append(json.loads(line))
387
+ except json.JSONDecodeError as e:
388
+ logger.warning(f"Skipping invalid JSON: {e}")
389
+
390
+ logger.info(f"Loaded {len(all_data)} total examples")
391
+
392
+ # Filter by completeness
393
+ all_data = filter_by_completeness(all_data)
394
+ logger.info(f"After completeness filter: {len(all_data)}")
395
+
396
+ # Deduplicate
397
+ all_data, dup_count = deduplicate(all_data)
398
+ logger.info(f"After deduplication: {len(all_data)}")
399
+
400
+ # Filter by quality
401
+ analyzer = DataQualityAnalyzer()
402
+ all_data, scores = filter_by_quality(all_data, min_quality_score, analyzer)
403
+ logger.info(f"After quality filter: {len(all_data)}")
404
+
405
+ # Save output
406
+ output_path.parent.mkdir(parents=True, exist_ok=True)
407
+ with open(output_path, 'w', encoding='utf-8') as f:
408
+ for item in all_data:
409
+ f.write(json.dumps(item, ensure_ascii=False) + '\n')
410
+
411
+ # Calculate statistics
412
+ avg_score = sum(s.overall for s in scores) / len(scores) if scores else 0
413
+
414
+ return {
415
+ "total_input": len(all_data),
416
+ "duplicates_removed": dup_count,
417
+ "final_count": len(all_data),
418
+ "avg_quality_score": avg_score,
419
+ "output_file": str(output_path)
420
+ }
421
+
422
+
423
+ if __name__ == "__main__":
424
+ import argparse
425
+
426
+ parser = argparse.ArgumentParser(description="Stack 2.9 Data Quality Analysis")
427
+ parser.add_argument("--input", "-i", type=str, required=True, help="Input JSONL file")
428
+ parser.add_argument("--output", "-o", type=str, required=True, help="Output JSONL file")
429
+ parser.add_argument("--min-score", type=float, default=0.4, help="Minimum quality score")
430
+ parser.add_argument("--stats", action="store_true", help="Show statistics")
431
+
432
+ args = parser.parse_args()
433
+
434
+ input_path = Path(args.input)
435
+ output_path = Path(args.output)
436
+
437
+ result = process_pipeline([input_path], output_path, args.min_score)
438
+
439
+ print(f"\n✓ Processing complete!")
440
+ print(f" Input: {args.input}")
441
+ print(f" Output: {args.output}")
442
+ print(f" Examples: {result['final_count']}")
443
+ print(f" Avg quality: {result['avg_quality_score']:.2f}")
stack-2.9-training/pattern_miner.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Stack 2.9 Pattern Miner
4
+ Extracts patterns from successful solutions and feedback for self-evolution.
5
+ """
6
+
7
+ import json
8
+ import hashlib
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Any, Optional, Tuple
12
+ from dataclasses import dataclass, asdict
13
+ from datetime import datetime
14
+ from collections import defaultdict
15
+ import logging
16
+
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class Pattern:
23
+ """A learned pattern from solutions."""
24
+ id: str
25
+ pattern_type: str # "code_structure", "algorithm", "error_recovery", etc.
26
+ description: str
27
+ code_snippet: str
28
+ success_count: int
29
+ failure_count: int
30
+ success_rate: float
31
+ tags: List[str]
32
+ created_at: str
33
+ last_used: str
34
+
35
+
36
+ @dataclass
37
+ class Feedback:
38
+ """Feedback from a solution attempt."""
39
+ id: str
40
+ problem_type: str
41
+ solution: str
42
+ success: bool
43
+ error_message: Optional[str]
44
+ execution_time: float
45
+ timestamp: str
46
+ model_version: Optional[str] = None
47
+
48
+
49
+ class PatternMiner:
50
+ """Extracts patterns from code solutions."""
51
+
52
+ # Pattern type keywords
53
+ PATTERN_TYPES = {
54
+ "recursion": [r"def\s+\w+\s*\([^)]*\):\s*.*\1\(", r"return\s+.*\1\("],
55
+ "iteration": [r"for\s+", r"while\s+"],
56
+ "list_comprehension": [r"\[.*for.*in.*\]"],
57
+ "dictionary": [r"\{\w+:", r"dict\(", r"defaultdict\("],
58
+ "set_operations": [r"set\(", r"\&\s*", r"\|\s*", r"\-\s*"],
59
+ "sorting": [r"sorted\(", r"\.sort\("],
60
+ "searching": [r"\.index\(", r"\.find\(", r"in\s+"],
61
+ "file_io": [r"open\(", r"read\(", r"write\("],
62
+ "error_handling": [r"try:", r"except", r"finally:"],
63
+ "class_definition": [r"class\s+\w+", r"def\s+__init__"],
64
+ "function_composition": [r"\.map\(", r"\.filter\(", r"\.reduce\("],
65
+ }
66
+
67
+ def __init__(self, storage_dir: Path = None):
68
+ self.storage_dir = storage_dir or Path(__file__).parent / "patterns"
69
+ self.storage_dir.mkdir(parents=True, exist_ok=True)
70
+
71
+ self.patterns_file = self.storage_dir / "patterns.json"
72
+ self.feedback_file = self.storage_dir / "feedback.json"
73
+
74
+ self.patterns = self._load_patterns()
75
+ self.feedback = self._load_feedback()
76
+
77
+ def _load_patterns(self) -> List[Pattern]:
78
+ """Load stored patterns."""
79
+ if not self.patterns_file.exists():
80
+ return []
81
+
82
+ with open(self.patterns_file, 'r') as f:
83
+ data = json.load(f)
84
+ return [Pattern(**p) for p in data]
85
+
86
+ def _load_feedback(self) -> List[Feedback]:
87
+ """Load stored feedback."""
88
+ if not self.feedback_file.exists():
89
+ return []
90
+
91
+ with open(self.feedback_file, 'r') as f:
92
+ data = json.load(f)
93
+ return [Feedback(**fb) for fb in data]
94
+
95
+ def _save_patterns(self):
96
+ """Save patterns to storage."""
97
+ with open(self.patterns_file, 'w') as f:
98
+ json.dump([asdict(p) for p in self.patterns], f, indent=2)
99
+
100
+ def _save_feedback(self):
101
+ """Save feedback to storage."""
102
+ with open(self.feedback_file, 'w') as f:
103
+ json.dump([asdict(fb) for fb in self.feedback], f, indent=2)
104
+
105
+ def store_feedback(
106
+ self,
107
+ problem_type: str,
108
+ solution: str,
109
+ success: bool,
110
+ error_message: Optional[str] = None,
111
+ execution_time: float = 0.0,
112
+ model_version: Optional[str] = None
113
+ ) -> Feedback:
114
+ """Store feedback from a solution attempt."""
115
+ fb = Feedback(
116
+ id=hashlib.sha256(f"{datetime.now().isoformat()}{solution}".encode()).hexdigest()[:16],
117
+ problem_type=problem_type,
118
+ solution=solution,
119
+ success=success,
120
+ error_message=error_message,
121
+ execution_time=execution_time,
122
+ timestamp=datetime.now().isoformat(),
123
+ model_version=model_version
124
+ )
125
+
126
+ self.feedback.append(fb)
127
+ self._save_feedback()
128
+
129
+ # Extract patterns if successful
130
+ if success:
131
+ self._extract_patterns_from_solution(solution, problem_type)
132
+
133
+ return fb
134
+
135
+ def _extract_patterns_from_solution(self, solution: str, problem_type: str):
136
+ """Extract patterns from a successful solution."""
137
+ # Identify pattern types
138
+ for ptype, regexes in self.PATTERN_TYPES.items():
139
+ for regex in regexes:
140
+ if re.search(regex, solution):
141
+ self._add_pattern(ptype, solution, problem_type)
142
+ break
143
+
144
+ # Extract code structure patterns
145
+ self._extract_structure_patterns(solution, problem_type)
146
+
147
+ def _extract_structure_patterns(self, code: str, problem_type: str):
148
+ """Extract structural patterns from code."""
149
+ # Find function definitions
150
+ functions = re.findall(r'def\s+(\w+)\s*\([^)]*\):', code)
151
+ if functions:
152
+ self._add_pattern(
153
+ "function_definition",
154
+ f"def {functions[0]}(...)",
155
+ problem_type,
156
+ tags=["function", functions[0]]
157
+ )
158
+
159
+ # Find class definitions
160
+ classes = re.findall(r'class\s+(\w+)', code)
161
+ for cls in classes:
162
+ self._add_pattern(
163
+ "class_definition",
164
+ f"class {cls}",
165
+ problem_type,
166
+ tags=["class", cls]
167
+ )
168
+
169
+ def _add_pattern(
170
+ self,
171
+ pattern_type: str,
172
+ snippet: str,
173
+ problem_type: str,
174
+ tags: Optional[List[str]] = None
175
+ ):
176
+ """Add or update a pattern."""
177
+ # Check if pattern already exists
178
+ existing = None
179
+ for p in self.patterns:
180
+ if p.pattern_type == pattern_type and p.code_snippet == snippet:
181
+ existing = p
182
+ break
183
+
184
+ if existing:
185
+ # Update existing pattern
186
+ existing.success_count += 1
187
+ existing.success_rate = existing.success_count / (existing.success_count + existing.failure_count)
188
+ existing.last_used = datetime.now().isoformat()
189
+ else:
190
+ # Create new pattern
191
+ pattern = Pattern(
192
+ id=hashlib.sha256(f"{pattern_type}{snippet}".encode()).hexdigest()[:16],
193
+ pattern_type=pattern_type,
194
+ description=f"Pattern for {problem_type}",
195
+ code_snippet=snippet,
196
+ success_count=1,
197
+ failure_count=0,
198
+ success_rate=1.0,
199
+ tags=tags or [problem_type],
200
+ created_at=datetime.now().isoformat(),
201
+ last_used=datetime.now().isoformat()
202
+ )
203
+ self.patterns.append(pattern)
204
+
205
+ self._save_patterns()
206
+
207
+ def mark_pattern_failure(self, pattern_id: str):
208
+ """Mark a pattern as failed."""
209
+ for p in self.patterns:
210
+ if p.id == pattern_id:
211
+ p.failure_count += 1
212
+ p.success_rate = p.success_count / (p.success_count + p.failure_count)
213
+ break
214
+
215
+ self._save_patterns()
216
+
217
+ def get_relevant_patterns(
218
+ self,
219
+ problem_type: str = None,
220
+ min_success_rate: float = 0.5,
221
+ limit: int = 10
222
+ ) -> List[Pattern]:
223
+ """Get relevant patterns for a problem type."""
224
+ relevant = []
225
+
226
+ for p in self.patterns:
227
+ # Filter by success rate
228
+ if p.success_rate < min_success_rate:
229
+ continue
230
+
231
+ # Filter by problem type if specified
232
+ if problem_type and problem_type not in p.tags:
233
+ continue
234
+
235
+ relevant.append(p)
236
+
237
+ # Sort by success rate and usage
238
+ relevant.sort(key=lambda p: (p.success_rate, p.success_count), reverse=True)
239
+
240
+ return relevant[:limit]
241
+
242
+ def generate_pattern_prompt(self, patterns: List[Pattern]) -> str:
243
+ """Generate a prompt with relevant patterns."""
244
+ if not patterns:
245
+ return ""
246
+
247
+ prompt = "Here are some patterns that worked well for similar problems:\n\n"
248
+
249
+ for i, p in enumerate(patterns, 1):
250
+ prompt += f"{i}. [{p.pattern_type}] {p.description}\n"
251
+ prompt += f" Code: {p.code_snippet}\n"
252
+ prompt += f" Success rate: {p.success_rate:.1%}\n\n"
253
+
254
+ return prompt
255
+
256
+ def get_statistics(self) -> Dict[str, Any]:
257
+ """Get pattern mining statistics."""
258
+ if not self.feedback:
259
+ return {"total_feedback": 0, "total_patterns": 0}
260
+
261
+ success_count = sum(1 for fb in self.feedback if fb.success)
262
+ failure_count = len(self.feedback) - success_count
263
+
264
+ # Group by problem type
265
+ by_type = defaultdict(lambda: {"success": 0, "failure": 0})
266
+ for fb in self.feedback:
267
+ by_type[fb.problem_type]["success" if fb.success else "failure"] += 1
268
+
269
+ # Pattern statistics
270
+ pattern_types = defaultdict(int)
271
+ for p in self.patterns:
272
+ pattern_types[p.pattern_type] += 1
273
+
274
+ return {
275
+ "total_feedback": len(self.feedback),
276
+ "successful_solutions": success_count,
277
+ "failed_solutions": failure_count,
278
+ "success_rate": success_count / len(self.feedback) if self.feedback else 0,
279
+ "total_patterns": len(self.patterns),
280
+ "patterns_by_type": dict(pattern_types),
281
+ "by_problem_type": dict(by_type)
282
+ }
283
+
284
+
285
+ def create_synthetic_feedback(
286
+ output_file: Path,
287
+ num_examples: int = 100
288
+ ) -> int:
289
+ """Create synthetic feedback data for testing."""
290
+ import random
291
+
292
+ problems = [
293
+ "list_operations", "string_manipulation", "recursion",
294
+ "sorting", "searching", "file_io", "error_handling"
295
+ ]
296
+
297
+ success_solutions = {
298
+ "list_operations": [
299
+ "return [x for x in lst if x > 0]",
300
+ "return sum(lst)",
301
+ "return max(lst) if lst else None",
302
+ ],
303
+ "string_manipulation": [
304
+ "return s[::-1]",
305
+ "return s.upper()",
306
+ "return ''.join(sorted(s))",
307
+ ],
308
+ "recursion": [
309
+ "if n <= 1: return 1\nreturn n * fact(n-1)",
310
+ "if not head: return None\nreturn head.val + sum_list(head.next)",
311
+ ],
312
+ "sorting": [
313
+ "return sorted(lst)",
314
+ "lst.sort()\nreturn lst",
315
+ ],
316
+ "searching": [
317
+ "return any(x == target for x in lst)",
318
+ "for i, x in enumerate(lst):\n if x == target: return i\nreturn -1",
319
+ ],
320
+ }
321
+
322
+ miner = PatternMiner()
323
+
324
+ for _ in range(num_examples):
325
+ problem = random.choice(problems)
326
+ solution = random.choice(success_solutions.get(problem, ["# solution"]))
327
+ success = random.random() > 0.2 # 80% success rate
328
+
329
+ miner.store_feedback(
330
+ problem_type=problem,
331
+ solution=solution,
332
+ success=success,
333
+ error_message=None if success else "Test failed",
334
+ execution_time=random.uniform(0.1, 2.0)
335
+ )
336
+
337
+ # Save to file
338
+ output_file.parent.mkdir(parents=True, exist_ok=True)
339
+ with open(output_file, 'w') as f:
340
+ json.dump([asdict(fb) for fb in miner.feedback], f, indent=2)
341
+
342
+ return num_examples
343
+
344
+
345
+ if __name__ == "__main__":
346
+ import argparse
347
+
348
+ parser = argparse.ArgumentParser(description="Stack 2.9 Pattern Miner")
349
+ parser.add_argument("--store", action="store_true",
350
+ help="Store a feedback example")
351
+ parser.add_argument("--problem-type", type=str, help="Problem type")
352
+ parser.add_argument("--solution", type=str, help="Solution code")
353
+ parser.add_argument("--success", type=lambda x: x.lower() == "true",
354
+ default=True, help="Success flag")
355
+ parser.add_argument("--list-patterns", action="store_true",
356
+ help="List relevant patterns")
357
+ parser.add_argument("--stats", action="store_true",
358
+ help="Show statistics")
359
+ parser.add_argument("--generate-synthetic", type=int, metavar="N",
360
+ help="Generate N synthetic examples")
361
+
362
+ args = parser.parse_args()
363
+
364
+ miner = PatternMiner()
365
+
366
+ if args.store:
367
+ if not args.problem_type or not args.solution:
368
+ print("Error: --problem-type and --solution required")
369
+ exit(1)
370
+
371
+ fb = miner.store_feedback(
372
+ problem_type=args.problem_type,
373
+ solution=args.solution,
374
+ success=args.success
375
+ )
376
+ print(f"Stored feedback: {fb.id}")
377
+
378
+ elif args.list_patterns:
379
+ patterns = miner.get_relevant_patterns(args.problem_type)
380
+ print(f"\nRelevant patterns ({len(patterns)}):")
381
+ for p in patterns:
382
+ print(f" [{p.pattern_type}] {p.code_snippet} (rate: {p.success_rate:.1%})")
383
+
384
+ elif args.stats:
385
+ stats = miner.get_statistics()
386
+ print("\nPattern Mining Statistics:")
387
+ print(f" Total feedback: {stats['total_feedback']}")
388
+ print(f" Success rate: {stats['success_rate']:.1%}")
389
+ print(f" Total patterns: {stats['total_patterns']}")
390
+ print(f" Patterns by type: {stats['patterns_by_type']}")
391
+
392
+ elif args.generate_synthetic:
393
+ count = create_synthetic_feedback(
394
+ Path("/tmp/synthetic_feedback.json"),
395
+ args.generate_synthetic
396
+ )
397
+ print(f"Generated {count} synthetic examples")
398
+
399
+ else:
400
+ print("Pattern Miner")
401
+ print("Use --help for options")