Yooshiii commited on
Commit
f8a39f0
·
verified ·
1 Parent(s): 5a428e1

Upload 36 files

Browse files
Files changed (36) hide show
  1. codesense/__init__.py +0 -0
  2. codesense/__pycache__/__init__.cpython-310.pyc +0 -0
  3. codesense/__pycache__/__init__.cpython-314.pyc +0 -0
  4. codesense/__pycache__/analyzer.cpython-310.pyc +0 -0
  5. codesense/__pycache__/analyzer.cpython-314.pyc +0 -0
  6. codesense/__pycache__/complexity.cpython-310.pyc +0 -0
  7. codesense/__pycache__/complexity.cpython-314.pyc +0 -0
  8. codesense/__pycache__/embedder.cpython-310.pyc +0 -0
  9. codesense/__pycache__/embedder.cpython-314.pyc +0 -0
  10. codesense/__pycache__/explanations.cpython-310.pyc +0 -0
  11. codesense/__pycache__/explanations.cpython-314.pyc +0 -0
  12. codesense/__pycache__/features.cpython-310.pyc +0 -0
  13. codesense/__pycache__/features.cpython-314.pyc +0 -0
  14. codesense/__pycache__/parser.cpython-310.pyc +0 -0
  15. codesense/__pycache__/parser.cpython-314.pyc +0 -0
  16. codesense/__pycache__/rules.cpython-310.pyc +0 -0
  17. codesense/__pycache__/rules.cpython-314.pyc +0 -0
  18. codesense/__pycache__/similarity.cpython-310.pyc +0 -0
  19. codesense/__pycache__/similarity.cpython-314.pyc +0 -0
  20. codesense/analyzer.py +69 -0
  21. codesense/complexity.py +95 -0
  22. codesense/embedder.py +49 -0
  23. codesense/explanations.py +201 -0
  24. codesense/features.py +417 -0
  25. codesense/ml/__init__.py +0 -0
  26. codesense/ml/__pycache__/__init__.cpython-310.pyc +0 -0
  27. codesense/ml/__pycache__/__init__.cpython-314.pyc +0 -0
  28. codesense/ml/__pycache__/embedder.cpython-310.pyc +0 -0
  29. codesense/ml/__pycache__/embedder.cpython-314.pyc +0 -0
  30. codesense/ml/__pycache__/interface.cpython-310.pyc +0 -0
  31. codesense/ml/__pycache__/interface.cpython-314.pyc +0 -0
  32. codesense/ml/__pycache__/similarity.cpython-310.pyc +0 -0
  33. codesense/ml/__pycache__/similarity.cpython-314.pyc +0 -0
  34. codesense/parser.py +13 -0
  35. codesense/rules.py +92 -0
  36. codesense/similarity.py +366 -0
codesense/__init__.py ADDED
File without changes
codesense/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (149 Bytes). View file
 
codesense/__pycache__/__init__.cpython-314.pyc ADDED
Binary file (155 Bytes). View file
 
codesense/__pycache__/analyzer.cpython-310.pyc ADDED
Binary file (1.46 kB). View file
 
codesense/__pycache__/analyzer.cpython-314.pyc ADDED
Binary file (2.9 kB). View file
 
codesense/__pycache__/complexity.cpython-310.pyc ADDED
Binary file (1.47 kB). View file
 
codesense/__pycache__/complexity.cpython-314.pyc ADDED
Binary file (2.52 kB). View file
 
codesense/__pycache__/embedder.cpython-310.pyc ADDED
Binary file (1.88 kB). View file
 
codesense/__pycache__/embedder.cpython-314.pyc ADDED
Binary file (2.91 kB). View file
 
codesense/__pycache__/explanations.cpython-310.pyc ADDED
Binary file (4.76 kB). View file
 
codesense/__pycache__/explanations.cpython-314.pyc ADDED
Binary file (6.36 kB). View file
 
codesense/__pycache__/features.cpython-310.pyc ADDED
Binary file (8.15 kB). View file
 
codesense/__pycache__/features.cpython-314.pyc ADDED
Binary file (21.6 kB). View file
 
codesense/__pycache__/parser.cpython-310.pyc ADDED
Binary file (535 Bytes). View file
 
codesense/__pycache__/parser.cpython-314.pyc ADDED
Binary file (757 Bytes). View file
 
codesense/__pycache__/rules.cpython-310.pyc ADDED
Binary file (1.76 kB). View file
 
codesense/__pycache__/rules.cpython-314.pyc ADDED
Binary file (3.32 kB). View file
 
codesense/__pycache__/similarity.cpython-310.pyc ADDED
Binary file (7.07 kB). View file
 
codesense/__pycache__/similarity.cpython-314.pyc ADDED
Binary file (8.07 kB). View file
 
codesense/analyzer.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from codesense.parser import parse_code
2
+ from codesense.features import extract_features
3
+ from codesense.rules import detect_algorithm
4
+ from codesense.complexity import estimate_complexity
5
+ from codesense.explanations import generate_explanation
6
+
7
+ # ============================================================
8
+ # STRICT ML IMPORT (Mentor Requirement)
9
+ # This file provides the logic, NOT the server.
10
+ # ============================================================
11
+ from codesense.similarity import predict_algorithm
12
+
13
+ def analyze_code(source: str) -> dict:
14
+ """
15
+ Main analysis pipeline called by app.py.
16
+ """
17
+ # 1. Structural Analysis via AST
18
+ tree = parse_code(source)
19
+ features = extract_features(tree)
20
+ detection = detect_algorithm(features)
21
+
22
+ # 2. Semantic Analysis via CodeT5 (Checker 2)
23
+ ml_result = predict_algorithm(source)
24
+
25
+ rule_pattern = detection.get("pattern")
26
+ category = detection.get("category")
27
+ ml_prediction = ml_result.get("ml_prediction")
28
+ ml_confidence = ml_result.get("confidence")
29
+
30
+ # Override Policy: Does CodeT5 see something the Rules missed?
31
+ resolved_pattern = rule_pattern
32
+ ml_refined = False
33
+
34
+ if ml_confidence is not None:
35
+ if (ml_confidence >= 0.93 and ml_prediction != rule_pattern):
36
+ resolved_pattern = ml_prediction
37
+ category = ml_result.get("ml_category")
38
+ ml_refined = True
39
+ elif (ml_confidence >= 0.90 and rule_pattern in ["Linear Iterative", "Nested Iterative"]):
40
+ resolved_pattern = ml_prediction
41
+ ml_refined = True
42
+
43
+ # 3. Complexity
44
+ complexity = estimate_complexity(features)
45
+
46
+ # Clean up for JSON
47
+ if "function_calls" in features:
48
+ features["function_calls"] = list(features["function_calls"])
49
+
50
+ detection["pattern"] = resolved_pattern
51
+
52
+ base_result = {
53
+ "features": features,
54
+ "analysis": detection,
55
+ "complexity": complexity
56
+ }
57
+ explanation = generate_explanation(base_result)
58
+
59
+ return {
60
+ "pattern": resolved_pattern,
61
+ "category": category,
62
+ "time_complexity": complexity.get("time_complexity"),
63
+ "summary": explanation.get("summary"),
64
+ "ml_insights": {
65
+ "ml_prediction": ml_prediction,
66
+ "confidence": ml_confidence if ml_confidence is not None else 0.0,
67
+ "refined": ml_refined
68
+ }
69
+ }
codesense/complexity.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def estimate_complexity(features: dict) -> dict:
2
+ """
3
+ Estimates time complexity using heuristic rules
4
+ based on static code features.
5
+ """
6
+
7
+ result = {
8
+ "time_complexity": "O(1)",
9
+ "explanation": "No loops or recursion were detected, suggesting constant time operations."
10
+ }
11
+
12
+ max_depth = features.get("max_loop_depth", 0)
13
+
14
+ # ----- BFS/DFS Pattern -----
15
+ if features.get("bfs_pattern") or features.get("dfs_pattern"):
16
+ return {
17
+ "time_complexity": "O(V + E)",
18
+ }
19
+
20
+ # ----- Binary search heuristic -----
21
+ if features.get("binary_search_pattern"):
22
+ result["time_complexity"] = "O(log n)"
23
+ return result
24
+
25
+ # ----- Dynamic Programming -----
26
+ if features.get("dp_pattern"):
27
+ result["time_complexity"] = "O(n) or O(n²)"
28
+ # result["explanation"] = (
29
+ # "Dynamic programming detected. Complexity depends on the number "
30
+ # "of states and transitions."
31
+ # )
32
+ return result
33
+
34
+ # ----- Merge Sort -----
35
+ if features.get("merge_sort_pattern"):
36
+ return {
37
+ "time_complexity": "O(n log n)"
38
+ }
39
+
40
+ # ----- Quick Sort -----
41
+ if features.get("quick_sort_pattern"):
42
+ return {
43
+ "time_complexity": "O(n log n) average, O(n²) worst-case"
44
+ }
45
+
46
+ # ----- Recursion Heuristics -----
47
+ if features["recursion"]:
48
+
49
+ if features.get("divide_and_conquer"):
50
+ result["time_complexity"] = "O(n log n)"
51
+ return result
52
+
53
+ if features["recursive_call_count"] >= 2:
54
+ result["time_complexity"] = "O(2^n)"
55
+ return result
56
+
57
+ result["time_complexity"] = "O(n)"
58
+ return result
59
+
60
+ # ----- Sorting Algorithms -----
61
+ if features.get("bubble_sort_pattern"):
62
+ return {
63
+ "time_complexity": "O(n²) worst-case (O(n) best-case if optimized)"
64
+ }
65
+
66
+ if features.get("insertion_sort_pattern"):
67
+ return {
68
+ "time_complexity": "O(n²) worst-case (O(n) best-case for nearly sorted input)"
69
+ }
70
+
71
+ # ----- Sliding Window -----
72
+ if features.get("sliding_window_pattern"):
73
+ return {
74
+ "time_complexity": "O(n)"
75
+ }
76
+
77
+ # ----- Heap-Based -----
78
+ if features.get("heap_pattern"):
79
+ return {
80
+ "time_complexity": "O(n log n) or O(log n) per operation"
81
+ }
82
+
83
+ # ----- Iterative Heuristics -----
84
+ if max_depth == 1:
85
+ result["time_complexity"] = "O(n)"
86
+ return result
87
+
88
+ if max_depth >= 2:
89
+ result["time_complexity"] = f"O(n^{max_depth})"
90
+ return result
91
+
92
+
93
+ return result
94
+
95
+
codesense/embedder.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ from transformers import AutoTokenizer, T5EncoderModel
4
+
5
+ class CodeT5Embedder:
6
+ def __init__(self, model_name="Salesforce/codet5-base"):
7
+ print(f"⏳ Initializing CodeT5 Engine ({model_name})...")
8
+
9
+ # use_fast=False is the specific fix for the 'List' error on Windows.
10
+ try:
11
+ self.tokenizer = AutoTokenizer.from_pretrained(
12
+ model_name,
13
+ use_fast=False
14
+ )
15
+ except Exception as e:
16
+ print(f"⚠️ Primary loader failed, attempting fast-mode fallback: {e}")
17
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+
19
+ print("⏳ Loading CodeT5 Model weights (this may take a moment)...")
20
+ self.model = T5EncoderModel.from_pretrained(model_name)
21
+
22
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+ self.model.to(self.device)
24
+
25
+ device_name = str(self.device).upper()
26
+ print(f"✅ CodeT5 Engine is Live on {device_name}")
27
+
28
+ def embed(self, code: str):
29
+ """Standard method name used by similarity.py"""
30
+ return self.get_embedding(code)
31
+
32
+ def get_embedding(self, code: str):
33
+ """Original method name for compatibility"""
34
+ if not code or not isinstance(code, str):
35
+ code = " "
36
+
37
+ inputs = self.tokenizer(
38
+ code,
39
+ return_tensors="pt",
40
+ truncation=True,
41
+ max_length=512,
42
+ padding=True
43
+ ).to(self.device)
44
+
45
+ with torch.no_grad():
46
+ outputs = self.model(**inputs)
47
+
48
+ # Global Average Pooling of the hidden states
49
+ return outputs.last_hidden_state.mean(dim=1).cpu().numpy().flatten()
codesense/explanations.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def generate_explanation(analysis_result: dict) -> dict:
2
+ """
3
+ Generates structured explanations.
4
+
5
+ Priority Order:
6
+ 1. Final resolved pattern (highest authority)
7
+ 2. Strong structural feature patterns
8
+ 3. Generic structural fallback
9
+ """
10
+
11
+ features = analysis_result.get("features", {})
12
+ pattern = analysis_result.get("analysis", {}).get("pattern")
13
+
14
+ explanation = ""
15
+
16
+ # ============================================================
17
+ # 1️⃣ PATTERN-DRIVEN EXPLANATIONS (HIGHEST PRIORITY)
18
+ # ============================================================
19
+
20
+ if pattern == "Quick Sort":
21
+ explanation = (
22
+ "|Quick Sort Pattern| A pivot element partitions the array into "
23
+ "smaller subarrays which are recursively sorted."
24
+ )
25
+
26
+ elif pattern == "Merge Sort":
27
+ explanation = (
28
+ "|Merge Sort Pattern| The array is recursively divided into halves, "
29
+ "sorted independently, and merged back together."
30
+ )
31
+
32
+ elif pattern == "Bubble Sort":
33
+ explanation = (
34
+ "|Bubble Sort Pattern| Adjacent elements are repeatedly compared "
35
+ "and swapped if out of order. Larger elements 'bubble' to the end "
36
+ "with each pass."
37
+ )
38
+
39
+ elif pattern == "Insertion Sort":
40
+ explanation = (
41
+ "|Insertion Sort Pattern| Elements are inserted into their correct "
42
+ "position within the sorted portion of the list by shifting larger elements."
43
+ )
44
+
45
+ elif pattern == "Heap-Based Algorithm" or pattern == "Heap Sort":
46
+ explanation = (
47
+ "|Heap / Priority Queue Pattern| The algorithm uses a heap data "
48
+ "structure to maintain ordered elements efficiently, typically "
49
+ "enabling O(log n) insertions and removals."
50
+ )
51
+
52
+ elif pattern == "Breadth-First Search":
53
+ explanation = (
54
+ "|Breadth-first search Pattern| The algorithm explores nodes "
55
+ "level by level using a queue."
56
+ )
57
+
58
+ elif pattern == "Depth-First Search":
59
+ explanation = (
60
+ "|Depth-first traversal Pattern| The algorithm explores as far "
61
+ "as possible along each branch before backtracking."
62
+ )
63
+
64
+ elif pattern == "Binary Search":
65
+ explanation = (
66
+ "|Binary search Pattern| The algorithm repeatedly halves the "
67
+ "search space, resulting in logarithmic time complexity."
68
+ )
69
+
70
+ elif pattern == "Memoization":
71
+ explanation = (
72
+ "|Memoization Pattern| Previously computed results are stored "
73
+ "to avoid redundant recursive calls."
74
+ )
75
+
76
+ elif pattern == "Tabulation":
77
+ explanation = (
78
+ "|Tabulation Dynamic Programming Pattern| A table is built "
79
+ "iteratively using previously computed subproblem results."
80
+ )
81
+
82
+ elif pattern == "Sliding Window":
83
+ explanation = (
84
+ "|Sliding Window Pattern| A window expands and contracts across "
85
+ "the data structure while maintaining a running condition."
86
+ )
87
+
88
+ elif pattern == "Two-Pointer Technique":
89
+ explanation = (
90
+ "|Two-pointer technique Pattern| Two indices move toward each other "
91
+ "in a controlled manner during a single traversal."
92
+ )
93
+
94
+ # ============================================================
95
+ # 2️⃣ STRUCTURAL FEATURE FALLBACK (ONLY IF NO PATTERN MATCH)
96
+ # ============================================================
97
+
98
+ elif features.get("heap_pattern"):
99
+ explanation = (
100
+ "|Heap Pattern| The algorithm relies on a heap data structure "
101
+ "for ordered extraction or insertion."
102
+ )
103
+
104
+ elif features.get("memoization_pattern"):
105
+ explanation = (
106
+ "|Memoization Pattern| Previously computed results are reused "
107
+ "to reduce redundant computation."
108
+ )
109
+
110
+ elif features.get("tabulation_pattern"):
111
+ explanation = (
112
+ "|Tabulation Pattern| A dynamic programming table is constructed "
113
+ "iteratively to build the final solution."
114
+ )
115
+
116
+ elif features.get("bfs_pattern"):
117
+ explanation = (
118
+ "|Breadth-first search Pattern| The algorithm processes elements "
119
+ "level by level using a queue."
120
+ )
121
+
122
+ elif features.get("binary_search_pattern"):
123
+ explanation = (
124
+ "|Binary search Pattern| The search space is repeatedly divided in half."
125
+ )
126
+
127
+ elif features.get("sliding_window_pattern"):
128
+ explanation = (
129
+ "|Sliding Window Pattern| A dynamic window adjusts across input "
130
+ "to maintain a condition efficiently."
131
+ )
132
+
133
+ elif features.get("pointer_updates", 0) >= 2:
134
+ explanation = (
135
+ "|Two-pointer Pattern| Two pointers are adjusted during traversal "
136
+ "to control search or comparison."
137
+ )
138
+
139
+ elif features.get("dfs_pattern"):
140
+ explanation = (
141
+ "|Depth-first traversal Pattern| The algorithm explores branches "
142
+ "deeply before backtracking."
143
+ )
144
+
145
+ elif features.get("merge_sort_pattern"):
146
+ explanation = (
147
+ "|Merge Sort Pattern| Recursive division and merging strategy detected."
148
+ )
149
+
150
+ elif features.get("quick_sort_pattern"):
151
+ explanation = (
152
+ "|Quick Sort Pattern| Partition-based recursive sorting detected."
153
+ )
154
+
155
+ elif features.get("divide_and_conquer"):
156
+ explanation = (
157
+ "|Divide-and-conquer Pattern| The problem is split into smaller "
158
+ "subproblems and their results are combined."
159
+ )
160
+
161
+ elif features.get("recursion") and features.get("recursive_call_count", 0) > 1:
162
+ explanation = (
163
+ "Multiple recursive calls per invocation detected, suggesting "
164
+ "exponential growth."
165
+ )
166
+
167
+ elif features.get("recursion"):
168
+ explanation = (
169
+ "Single recursive call per invocation detected, suggesting "
170
+ "linear recursion depth."
171
+ )
172
+
173
+ elif features.get("bubble_sort_pattern"):
174
+ explanation = (
175
+ "|Bubble Sort Pattern| Repeated adjacent swaps detected."
176
+ )
177
+
178
+ elif features.get("insertion_sort_pattern"):
179
+ explanation = (
180
+ "|Insertion Sort Pattern| Element shifting within a sorted subarray detected."
181
+ )
182
+
183
+ elif features.get("max_loop_depth", 0) > 1:
184
+ explanation = (
185
+ "Nested loop structures detected, indicating polynomial behavior."
186
+ )
187
+
188
+ elif features.get("max_loop_depth", 0) == 1:
189
+ explanation = (
190
+ "Single loop traversal detected, indicating linear iteration."
191
+ )
192
+
193
+ else:
194
+ explanation = (
195
+ "No significant structural patterns were detected."
196
+ )
197
+
198
+ return {
199
+ "summary": explanation,
200
+ "details": [explanation]
201
+ }
codesense/features.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+
3
+ class FeatureExtractor(ast.NodeVisitor):
4
+ """
5
+ Traverses the AST and extracts structural features
6
+ from Python source code.
7
+ """
8
+
9
+ def __init__(self):
10
+
11
+ self.features = {
12
+ "for_loops": 0,
13
+ "while_loops": 0,
14
+ "function_calls": set(),
15
+ "recursion": False,
16
+ "max_loop_depth": 0,
17
+ "recursive_call_count": 0,
18
+ "divide_and_conquer": False,
19
+ "binary_search_pattern": False,
20
+
21
+ "pointer_variables": set(),
22
+ "pointer_updates": 0,
23
+
24
+ "bfs_pattern": False,
25
+ "queue_variables": set(),
26
+ "queue_operations": 0,
27
+ "queue_pop_front": False,
28
+ "queue_append_detected": False,
29
+ "graph_iteration": False,
30
+
31
+ "dfs_pattern": False,
32
+ "uses_stack": False,
33
+ "uses_pop": False,
34
+
35
+ "dp_pattern": False,
36
+ "uses_dp_array": False,
37
+
38
+ "sorting_pattern": False,
39
+ "bubble_sort_pattern": False,
40
+ "insertion_sort_pattern": False,
41
+ "adjacent_swap_detected": False,
42
+ "insertion_shift_detected": False,
43
+
44
+ "memoization_pattern": False,
45
+ "memo_dict_defined": False,
46
+ "memo_lookup_detected": False,
47
+ "memo_store_detected": False,
48
+
49
+ "tabulation_pattern": False,
50
+ "dp_self_dependency": False,
51
+ "dp_dimension": 1,
52
+
53
+ "merge_sort_pattern": False,
54
+ "quick_sort_pattern": False,
55
+
56
+ "sliding_window_pattern": False,
57
+ "window_updates": 0,
58
+ "window_shrinks": 0,
59
+
60
+ "heap_imported": False,
61
+ "heap_operations": 0,
62
+ "heap_pattern": False,
63
+ }
64
+ self.current_function = None
65
+
66
+ self.current_loop_depth = 0
67
+ self.max_loop_depth = 0
68
+
69
+ self.current_function_name = None
70
+
71
+ def visit_Import(self, node):
72
+ for alias in node.names:
73
+ if alias.name == "heapq":
74
+ self.features["heap_imported"] = True
75
+ self.generic_visit(node)
76
+
77
+ def visit_ImportFrom(self, node):
78
+ if node.module == "heapq":
79
+ self.features["heap_imported"] = True
80
+ self.generic_visit(node)
81
+
82
+ def visit_FunctionDef(self, node):
83
+ previous_function = self.current_function_name
84
+ self.current_function_name = node.name
85
+
86
+ self.generic_visit(node)
87
+
88
+ self.current_function_name = previous_function
89
+
90
+ def visit_For(self, node):
91
+ self.features["for_loops"] += 1
92
+
93
+ self.current_loop_depth += 1
94
+ self.max_loop_depth = max(self.max_loop_depth, self.current_loop_depth)
95
+
96
+ # Detect graph[node] iteration
97
+ if isinstance(node.iter, ast.Subscript):
98
+ self.features["graph_iteration"] = True
99
+
100
+ self.generic_visit(node)
101
+ self.current_loop_depth -= 1
102
+
103
+ if isinstance(node.target, ast.Name):
104
+ var = node.target.id.lower()
105
+ if var in ("right", "r", "end"):
106
+ self.features["window_updates"] += 1
107
+
108
+ def visit_While(self, node):
109
+ self.features["while_loops"] += 1
110
+
111
+ self.current_loop_depth += 1
112
+ self.max_loop_depth = max(self.max_loop_depth, self.current_loop_depth)
113
+
114
+ self.generic_visit(node)
115
+ self.current_loop_depth -= 1
116
+
117
+ def visit_Call(self, node):
118
+ if isinstance(node.func, ast.Name):
119
+ function_name = node.func.id
120
+
121
+ self.features["function_calls"].add(function_name)
122
+
123
+ # Detect recursion
124
+ if function_name == self.current_function_name:
125
+ self.features["recursion"] = True
126
+ self.features["recursive_call_count"] += 1
127
+
128
+ # If recursion + loop present → DFS-style
129
+ if self.features["for_loops"] >= 1:
130
+ self.features["dfs_pattern"] = True
131
+
132
+ # Detect divide-and-conquer
133
+ for arg in node.args:
134
+
135
+ # Case 1: n // 2 or n / 2
136
+ if isinstance(arg, ast.BinOp) and isinstance(arg.op, (ast.FloorDiv, ast.Div)):
137
+ self.features["divide_and_conquer"] = True
138
+
139
+ # Case 2: slicing like arr[:mid]
140
+ if isinstance(arg, ast.Subscript):
141
+ if isinstance(arg.slice, ast.Slice):
142
+ self.features["divide_and_conquer"] = True
143
+ if isinstance(arg, ast.Subscript) and isinstance(arg.slice, ast.Slice):
144
+ self.features["merge_sort_pattern"] = True
145
+
146
+ # Detect queue operations
147
+ if isinstance(node.func, ast.Attribute):
148
+ if isinstance(node.func.value, ast.Name):
149
+ var = node.func.value.id
150
+
151
+ if var in self.features["queue_variables"]:
152
+ if node.func.attr in ("append", "popleft"):
153
+ self.features["queue_operations"] += 1
154
+
155
+ # Detect stack.pop() or queue.pop()
156
+ if isinstance(node.func, ast.Attribute):
157
+ method = node.func.attr
158
+
159
+ if method == "pop":
160
+ self.features["uses_pop"] = True
161
+
162
+ if method == "append":
163
+ # mark append usage
164
+ pass
165
+
166
+ # Detect pop(0) for list-based BFS
167
+ if isinstance(node.func, ast.Attribute):
168
+ method = node.func.attr
169
+
170
+ # pop(0)
171
+ if method == "pop":
172
+ if node.args and isinstance(node.args[0], ast.Constant):
173
+ if node.args[0].value == 0:
174
+ self.features["queue_pop_front"] = True
175
+
176
+ # append()
177
+ if method == "append":
178
+ self.features["queue_append_detected"] = True
179
+
180
+ # popleft()
181
+ if method == "popleft":
182
+ self.features["queue_pop_front"] = True
183
+
184
+
185
+ # Iterative DFS heuristic
186
+ if (
187
+ self.features["uses_stack"]
188
+ and self.features["uses_pop"]
189
+ and self.features["for_loops"] >= 1
190
+ ):
191
+ self.features["dfs_pattern"] = True
192
+
193
+ # Heap operations
194
+ if isinstance(node.func, ast.Attribute):
195
+ if isinstance(node.func.value, ast.Name):
196
+ if node.func.value.id == "heapq":
197
+ if node.func.attr in ("heappush", "heappop", "heapify"):
198
+ self.features["heap_operations"] += 1
199
+
200
+ self.generic_visit(node)
201
+
202
+ def visit_Assign(self, node):
203
+
204
+ # -------- Binary Search Pattern Detection --------
205
+ if isinstance(node.value, ast.BinOp):
206
+ if isinstance(node.value.op, ast.FloorDiv):
207
+ if isinstance(node.value.left, ast.BinOp):
208
+ if isinstance(node.value.left.op, ast.Add):
209
+ self.features["binary_search_pattern"] = True
210
+
211
+ # -------- Two Pointer Detection --------
212
+ if node.targets and isinstance(node.targets[0], ast.Name):
213
+ var = node.targets[0].id
214
+
215
+ # Case 1: left = 0
216
+ if isinstance(node.value, (ast.Constant, ast.Num)):
217
+ self.features["pointer_variables"].add(var)
218
+
219
+ # Case 2: right = len(arr) - 1
220
+ if isinstance(node.value, ast.BinOp):
221
+ self.features["pointer_variables"].add(var)
222
+
223
+ # -------- BFS Detection --------
224
+ if isinstance(node.value, ast.Call):
225
+ if isinstance(node.value.func, ast.Name):
226
+ if node.value.func.id == "deque":
227
+ if node.targets and isinstance(node.targets[0], ast.Name):
228
+ var = node.targets[0].id
229
+ self.features["queue_variables"].add(var)
230
+
231
+ # ------- Detect stack initialization
232
+ if node.targets and isinstance(node.targets[0], ast.Name):
233
+ var = node.targets[0].id
234
+
235
+ if isinstance(node.value, (ast.List, ast.Call)):
236
+ if var.lower() == "stack":
237
+ self.features["uses_stack"] = True
238
+
239
+ # ------- Detect memo dictionary initialization
240
+ if isinstance(node.value, ast.Dict):
241
+ if node.targets and isinstance(node.targets[0], ast.Name):
242
+ var = node.targets[0].id.lower()
243
+ if var in ("memo", "cache", "dp"):
244
+ self.features["memo_dict_defined"] = True
245
+
246
+ # Detect memo[n] = ...
247
+ if node.targets and isinstance(node.targets[0], ast.Subscript):
248
+ target = node.targets[0]
249
+
250
+ if isinstance(target.value, ast.Name):
251
+ var = target.value.id.lower()
252
+ if var in ("memo", "cache", "dp"):
253
+ self.features["memo_store_detected"] = True
254
+
255
+ # Detect 2D DP Tables
256
+ if isinstance(node.value, ast.ListComp):
257
+ self.features["dp_dimension"] = 2
258
+
259
+ if isinstance(node.value, ast.List):
260
+ if any(isinstance(el, ast.List) for el in node.value.elts):
261
+ self.features["dp_dimension"] = 2
262
+
263
+ # Detect true tabulation recurrence && 2D KNAPSACK FIX
264
+ if node.targets and isinstance(node.targets[0], ast.Subscript):
265
+ target = node.targets[0]
266
+
267
+ # Find base name
268
+ base = target.value
269
+ while isinstance(base, ast.Subscript):
270
+ base = base.value
271
+
272
+ if isinstance(base, ast.Name):
273
+ var = base.id.lower()
274
+
275
+ if var in ("dp", "memo", "cache"):
276
+ for child in ast.walk(node.value):
277
+ if isinstance(child, ast.Name) and child.id.lower() == var:
278
+ self.features["dp_self_dependency"] = True
279
+
280
+ # -------- Bubble Sort Adjacent Swap Detection --------
281
+ if (
282
+ isinstance(node.targets[0], ast.Tuple)
283
+ and isinstance(node.value, ast.Tuple)
284
+ and len(node.targets[0].elts) == 2
285
+ and len(node.value.elts) == 2
286
+ ):
287
+ left = node.targets[0].elts
288
+ right = node.value.elts
289
+
290
+ if all(isinstance(el, ast.Subscript) for el in left + right):
291
+ self.features["adjacent_swap_detected"] = True
292
+
293
+ # -------- Insertion Sort Shift Detection --------
294
+ if node.targets and isinstance(node.targets[0], ast.Subscript):
295
+ target = node.targets[0]
296
+
297
+ if isinstance(node.value, ast.Subscript):
298
+ self.features["insertion_shift_detected"] = True
299
+
300
+ # Merge Sort
301
+ if node.targets and isinstance(node.targets[0], ast.Name):
302
+ var = node.targets[0].id.lower()
303
+ if var == "pivot":
304
+ self.features["quick_sort_pattern"] = True
305
+
306
+ self.generic_visit(node)
307
+
308
+
309
+ def visit_AugAssign(self, node):
310
+ if isinstance(node.target, ast.Name):
311
+ var = node.target.id
312
+
313
+ if isinstance(node.op, (ast.Add, ast.Sub)):
314
+ if var in self.features["pointer_variables"]:
315
+ self.features["pointer_updates"] += 1
316
+
317
+ if isinstance(node.target, ast.Name):
318
+ var = node.target.id.lower()
319
+
320
+ if var in ("left", "l", "start"):
321
+ self.features["window_shrinks"] += 1
322
+
323
+ self.generic_visit(node)
324
+
325
+ # ------ subscript access -----
326
+ def visit_Subscript(self, node):
327
+ # Walk up until we find base name
328
+ base = node.value
329
+ while isinstance(base, ast.Subscript):
330
+ base = base.value
331
+
332
+ if isinstance(base, ast.Name):
333
+ var = base.id.lower()
334
+
335
+ if var in ("dp", "memo", "cache"):
336
+ self.features["uses_dp_array"] = True
337
+
338
+ self.generic_visit(node)
339
+
340
+
341
+ def visit_Compare(self, node):
342
+ # Detect: X in memo/cache/dp
343
+ if any(isinstance(op, ast.In) for op in node.ops):
344
+ for comparator in node.comparators:
345
+ if isinstance(comparator, ast.Name):
346
+ if comparator.id.lower() in ("memo", "cache", "dp"):
347
+ self.features["memo_lookup_detected"] = True
348
+
349
+ self.generic_visit(node)
350
+
351
+
352
+ def extract_features(tree: ast.AST) -> dict:
353
+ extractor = FeatureExtractor()
354
+ extractor.visit(tree)
355
+
356
+ extractor.features["max_loop_depth"] = extractor.max_loop_depth
357
+
358
+ if (
359
+ extractor.features["while_loops"] >= 1
360
+ and extractor.features["queue_pop_front"]
361
+ and extractor.features["queue_append_detected"]
362
+ and extractor.features["graph_iteration"]
363
+ ):
364
+ extractor.features["bfs_pattern"] = True
365
+
366
+ # High confidence memoization
367
+ if (
368
+ extractor.features["recursion"]
369
+ and extractor.features["memo_dict_defined"]
370
+ and extractor.features["memo_lookup_detected"]
371
+ and extractor.features["memo_store_detected"]
372
+ ):
373
+ extractor.features["memoization_pattern"] = True
374
+
375
+ # Tabulation
376
+ if (
377
+ extractor.features["uses_dp_array"]
378
+ and extractor.features["dp_self_dependency"]
379
+ and extractor.features["for_loops"] >= 1
380
+ ):
381
+ extractor.features["tabulation_pattern"] = True
382
+
383
+ # Final DP pattern
384
+ if (
385
+ extractor.features["memoization_pattern"]
386
+ or extractor.features["tabulation_pattern"]
387
+ ):
388
+ extractor.features["dp_pattern"] = True
389
+
390
+ # Sorting detection
391
+ if extractor.features["max_loop_depth"] >= 2:
392
+ if extractor.features["adjacent_swap_detected"]:
393
+ extractor.features["bubble_sort_pattern"] = True
394
+ extractor.features["sorting_pattern"] = True
395
+
396
+ elif extractor.features["insertion_shift_detected"]:
397
+ extractor.features["insertion_sort_pattern"] = True
398
+ extractor.features["sorting_pattern"] = True
399
+
400
+ # Sliding Window heuristic
401
+ if (
402
+ extractor.features["for_loops"] >= 1
403
+ and extractor.features["while_loops"] >= 1
404
+ and extractor.features["window_updates"] >= 1
405
+ and extractor.features["window_shrinks"] >= 1
406
+ ):
407
+ extractor.features["sliding_window_pattern"] = True
408
+
409
+ # Heap pattern detection
410
+ if (
411
+ extractor.features["heap_imported"]
412
+ and extractor.features["heap_operations"] >= 1
413
+ ):
414
+ extractor.features["heap_pattern"] = True
415
+
416
+ return extractor.features
417
+
codesense/ml/__init__.py ADDED
File without changes
codesense/ml/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (152 Bytes). View file
 
codesense/ml/__pycache__/__init__.cpython-314.pyc ADDED
Binary file (158 Bytes). View file
 
codesense/ml/__pycache__/embedder.cpython-310.pyc ADDED
Binary file (1.64 kB). View file
 
codesense/ml/__pycache__/embedder.cpython-314.pyc ADDED
Binary file (2.91 kB). View file
 
codesense/ml/__pycache__/interface.cpython-310.pyc ADDED
Binary file (609 Bytes). View file
 
codesense/ml/__pycache__/interface.cpython-314.pyc ADDED
Binary file (938 Bytes). View file
 
codesense/ml/__pycache__/similarity.cpython-310.pyc ADDED
Binary file (7.05 kB). View file
 
codesense/ml/__pycache__/similarity.cpython-314.pyc ADDED
Binary file (8.07 kB). View file
 
codesense/parser.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+
3
+ def parse_code(source_code: str) -> ast.AST:
4
+ """
5
+ Parses Python source code into an Abstract Syntax Tree (AST).
6
+
7
+ Parameters:
8
+ source_code (str): Python source code as a string.
9
+
10
+ Returns:
11
+ ast.AST: Parsed abstract syntax tree.
12
+ """
13
+ return ast.parse(source_code)
codesense/rules.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def detect_algorithm(features: dict) -> dict:
2
+
3
+ result = {
4
+ "pattern": "Unknown",
5
+ "category": "Unknown"
6
+ }
7
+
8
+ total_loops = features.get("for_loops", 0) + features.get("while_loops", 0)
9
+
10
+ # ===============================
11
+ # Dynamic Programming
12
+ # ===============================
13
+ if features.get("memoization_pattern"):
14
+ return {"pattern": "Memoization", "category": "Dynamic Programming"}
15
+
16
+ if features.get("tabulation_pattern"):
17
+ return {"pattern": "Tabulation", "category": "Dynamic Programming"}
18
+
19
+ # ===============================
20
+ # Heap
21
+ # ===============================
22
+ if features.get("heap_pattern"):
23
+ return {"pattern": "Heap-Based Algorithm", "category": "Data Structure Based"}
24
+
25
+ # ===============================
26
+ # Search
27
+ # ===============================
28
+ if features.get("binary_search_pattern"):
29
+ return {"pattern": "Binary Search", "category": "Search Algorithm"}
30
+
31
+ # ===============================
32
+ # Graph
33
+ # ===============================
34
+ if features.get("bfs_pattern"):
35
+ return {"pattern": "Breadth-First Search", "category": "Graph Algorithm"}
36
+
37
+ if features.get("dfs_pattern"):
38
+ return {"pattern": "Depth-First Search", "category": "Graph Algorithm"}
39
+
40
+ # ===============================
41
+ # Pointer Techniques
42
+ # ===============================
43
+ if features.get("sliding_window_pattern"):
44
+ return {"pattern": "Sliding Window", "category": "Pointer Technique"}
45
+
46
+ if (
47
+ len(features.get("pointer_variables", [])) >= 2
48
+ and features.get("pointer_updates", 0) >= 2
49
+ and features.get("while_loops", 0) >= 1
50
+ ):
51
+ return {"pattern": "Two-Pointer Technique", "category": "Pointer Technique"}
52
+
53
+ # ===============================
54
+ # Sorting Algorithms
55
+ # ===============================
56
+ if features.get("bubble_sort_pattern"):
57
+ return {"pattern": "Bubble Sort", "category": "Sorting Algorithm"}
58
+
59
+ if features.get("insertion_sort_pattern"):
60
+ return {"pattern": "Insertion Sort", "category": "Sorting Algorithm"}
61
+
62
+ if features.get("merge_sort_pattern"):
63
+ return {"pattern": "Merge Sort", "category": "Sorting Algorithm"}
64
+
65
+ if features.get("quick_sort_pattern"):
66
+ return {"pattern": "Quick Sort", "category": "Sorting Algorithm"}
67
+
68
+ # ===============================
69
+ # Recursive Patterns
70
+ # ===============================
71
+ if features.get("divide_and_conquer"):
72
+ return {"pattern": "Recursive Divide-and-Conquer", "category": "Divide-and-Conquer"}
73
+
74
+ if features.get("recursion") and features.get("recursive_call_count", 0) >= 2:
75
+ return {"pattern": "Recursive (Exponential)", "category": "Recursive Pattern"}
76
+
77
+ if features.get("recursion"):
78
+ return {"pattern": "Recursive (Linear)", "category": "Recursive Pattern"}
79
+
80
+ # ===============================
81
+ # Iterative Patterns
82
+ # ===============================
83
+ if features.get("max_loop_depth", 0) >= 2:
84
+ return {"pattern": "Nested Iterative", "category": "Iterative Pattern"}
85
+
86
+ if total_loops == 1:
87
+ return {"pattern": "Linear Iterative", "category": "Iterative Pattern"}
88
+
89
+ if total_loops == 0:
90
+ return {"pattern": "Constant-Time", "category": "Direct Computation"}
91
+
92
+ return result
codesense/similarity.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import warnings
4
+ warnings.filterwarnings("ignore", category=FutureWarning)
5
+
6
+ from .embedder import CodeT5Embedder
7
+
8
+ # -------- Singleton Embedder --------
9
+ _embedder = CodeT5Embedder()
10
+
11
+ # ============================================================
12
+ # ML v2 PROTOTYPE STRUCTURE
13
+ # Category → Algorithm → [Variants]
14
+ # ============================================================
15
+
16
+ PROTOTYPES = {
17
+
18
+ "Sorting Algorithm": {
19
+
20
+ "Bubble Sort": [
21
+ # Classic
22
+ """
23
+ def bubble_sort(arr):
24
+ for i in range(len(arr)):
25
+ for j in range(len(arr)-i-1):
26
+ if arr[j] > arr[j+1]:
27
+ arr[j], arr[j+1] = arr[j+1], arr[j]
28
+ """,
29
+ # Optimized (swapped flag)
30
+ """
31
+ def bubble_sort(arr):
32
+ n = len(arr)
33
+ for i in range(n):
34
+ swapped = False
35
+ for j in range(0, n-i-1):
36
+ if arr[j] > arr[j+1]:
37
+ arr[j], arr[j+1] = arr[j+1], arr[j]
38
+ swapped = True
39
+ if not swapped:
40
+ break
41
+ """
42
+ ],
43
+
44
+ "Insertion Sort": [
45
+ # Shift-based
46
+ """
47
+ def insertion_sort(arr):
48
+ for i in range(1, len(arr)):
49
+ key = arr[i]
50
+ j = i - 1
51
+ while j >= 0 and arr[j] > key:
52
+ arr[j+1] = arr[j]
53
+ j -= 1
54
+ arr[j+1] = key
55
+ """,
56
+ # Swap-based variant
57
+ """
58
+ def insertion_sort(arr):
59
+ for i in range(1, len(arr)):
60
+ j = i
61
+ while j > 0 and arr[j] < arr[j-1]:
62
+ arr[j], arr[j-1] = arr[j-1], arr[j]
63
+ j -= 1
64
+ """
65
+ ],
66
+
67
+ "Merge Sort": [
68
+ # Slicing-based
69
+ """
70
+ def merge_sort(arr):
71
+ if len(arr) <= 1:
72
+ return arr
73
+ mid = len(arr)//2
74
+ left = merge_sort(arr[:mid])
75
+ right = merge_sort(arr[mid:])
76
+ return merge(left, right)
77
+ """,
78
+ # Index-based (GFG style)
79
+ """
80
+ def merge(arr, l, m, r):
81
+ n1 = m - l + 1
82
+ n2 = r - m
83
+ L = [0] * n1
84
+ R = [0] * n2
85
+ for i in range(n1):
86
+ L[i] = arr[l + i]
87
+ for j in range(n2):
88
+ R[j] = arr[m + 1 + j]
89
+ i = j = 0
90
+ k = l
91
+ while i < n1 and j < n2:
92
+ if L[i] <= R[j]:
93
+ arr[k] = L[i]
94
+ i += 1
95
+ else:
96
+ arr[k] = R[j]
97
+ j += 1
98
+ k += 1
99
+
100
+ def merge_sort(arr, l, r):
101
+ if l < r:
102
+ m = l + (r - l)//2
103
+ merge_sort(arr, l, m)
104
+ merge_sort(arr, m+1, r)
105
+ merge(arr, l, m, r)
106
+ """
107
+ ],
108
+
109
+ "Quick Sort": [
110
+ # List-comprehension variant
111
+ """
112
+ def quick_sort(arr):
113
+ if len(arr) <= 1:
114
+ return arr
115
+ pivot = arr[0]
116
+ left = [x for x in arr[1:] if x <= pivot]
117
+ right = [x for x in arr[1:] if x > pivot]
118
+ return quick_sort(left) + [pivot] + quick_sort(right)
119
+ """,
120
+ # Partition-based (GFG style)
121
+ """
122
+ def partition(arr, low, high):
123
+ pivot = arr[high]
124
+ i = low - 1
125
+ for j in range(low, high):
126
+ if arr[j] <= pivot:
127
+ i += 1
128
+ arr[i], arr[j] = arr[j], arr[i]
129
+ arr[i+1], arr[high] = arr[high], arr[i+1]
130
+ return i+1
131
+
132
+ def quick_sort(arr, low, high):
133
+ if low < high:
134
+ pi = partition(arr, low, high)
135
+ quick_sort(arr, low, pi-1)
136
+ quick_sort(arr, pi+1, high)
137
+ """
138
+ ],
139
+
140
+ "Heap Sort": [
141
+ # heapq-based
142
+ """
143
+ import heapq
144
+ def heap_sort(arr):
145
+ heapq.heapify(arr)
146
+ return [heapq.heappop(arr) for _ in range(len(arr))]
147
+ """,
148
+ # Manual heapify (GFG style)
149
+ """
150
+ def heapify(arr, n, i):
151
+ largest = i
152
+ l = 2*i + 1
153
+ r = 2*i + 2
154
+ if l < n and arr[l] > arr[largest]:
155
+ largest = l
156
+ if r < n and arr[r] > arr[largest]:
157
+ largest = r
158
+ if largest != i:
159
+ arr[i], arr[largest] = arr[largest], arr[i]
160
+ heapify(arr, n, largest)
161
+
162
+ def heap_sort(arr):
163
+ n = len(arr)
164
+ for i in range(n//2 - 1, -1, -1):
165
+ heapify(arr, n, i)
166
+ for i in range(n-1, 0, -1):
167
+ arr[i], arr[0] = arr[0], arr[i]
168
+ heapify(arr, i, 0)
169
+ """
170
+ ]
171
+ },
172
+
173
+ "Dynamic Programming": {
174
+ "Memoization": [
175
+ """
176
+ memo = {}
177
+ def fib(n):
178
+ if n in memo:
179
+ return memo[n]
180
+ if n <= 1:
181
+ return n
182
+ memo[n] = fib(n-1) + fib(n-2)
183
+ return memo[n]
184
+ """
185
+ ],
186
+
187
+ "Tabulation": [
188
+ """
189
+ def fib(n):
190
+ dp = [0]*(n+1)
191
+ dp[1] = 1
192
+ for i in range(2, n+1):
193
+ dp[i] = dp[i-1] + dp[i-2]
194
+ return dp[n]
195
+ """,
196
+ """
197
+ def knapsack(weights, values, capacity):
198
+ n = len(weights)
199
+ dp = [[0]*(capacity+1) for _ in range(n+1)]
200
+ for i in range(1, n+1):
201
+ for w in range(capacity+1):
202
+ if weights[i-1] <= w:
203
+ dp[i][w] = max(values[i-1] + dp[i-1][w-weights[i-1]],
204
+ dp[i-1][w])
205
+ else:
206
+ dp[i][w] = dp[i-1][w]
207
+ """
208
+ ]
209
+ },
210
+
211
+ "Graph Algorithm": {
212
+ "Breadth-First Search": [
213
+ """
214
+ from collections import deque
215
+ def bfs(graph, start):
216
+ visited = set()
217
+ queue = deque([start])
218
+ while queue:
219
+ node = queue.popleft()
220
+ for neighbor in graph[node]:
221
+ if neighbor not in visited:
222
+ visited.add(neighbor)
223
+ queue.append(neighbor)
224
+ """
225
+ ],
226
+
227
+ "Depth-First Search": [
228
+ # Recursive
229
+ """
230
+ def dfs(graph, node, visited):
231
+ visited.add(node)
232
+ for neighbor in graph[node]:
233
+ if neighbor not in visited:
234
+ dfs(graph, neighbor, visited)
235
+ """,
236
+ # Iterative
237
+ """
238
+ def dfs(graph, start):
239
+ visited = set()
240
+ stack = [start]
241
+ while stack:
242
+ node = stack.pop()
243
+ if node not in visited:
244
+ visited.add(node)
245
+ for neighbor in graph[node]:
246
+ stack.append(neighbor)
247
+ """
248
+ ]
249
+ },
250
+
251
+ "Pointer Technique": {
252
+ "Two-Pointer Technique": [
253
+ """
254
+ def two_sum_sorted(arr, target):
255
+ left, right = 0, len(arr)-1
256
+ while left < right:
257
+ s = arr[left] + arr[right]
258
+ if s == target:
259
+ return True
260
+ elif s < target:
261
+ left += 1
262
+ else:
263
+ right -= 1
264
+ """
265
+ ],
266
+
267
+ "Sliding Window": [
268
+ """
269
+ def max_subarray(arr, k):
270
+ current_sum = 0
271
+ left = 0
272
+ for right in range(len(arr)):
273
+ current_sum += arr[right]
274
+ if right-left+1 > k:
275
+ current_sum -= arr[left]
276
+ left += 1
277
+ """
278
+ ]
279
+ },
280
+
281
+ "Search Algorithm": {
282
+ "Binary Search": [
283
+ """
284
+ def binary_search(arr, target):
285
+ left, right = 0, len(arr)-1
286
+ while left <= right:
287
+ mid = (left+right)//2
288
+ if arr[mid] == target:
289
+ return mid
290
+ elif arr[mid] < target:
291
+ left = mid+1
292
+ else:
293
+ right = mid-1
294
+ """
295
+ ]
296
+ },
297
+
298
+ "Data Structure Based": {
299
+ "Heap-Based Algorithm": [
300
+ """
301
+ import heapq
302
+ def top_k(nums, k):
303
+ heap = []
304
+ for num in nums:
305
+ heapq.heappush(heap, num)
306
+ if len(heap) > k:
307
+ heapq.heappop(heap)
308
+ """
309
+ ]
310
+ }
311
+ }
312
+
313
+ # ============================================================
314
+ # PRECOMPUTE EMBEDDINGS
315
+ # ============================================================
316
+
317
+ _PROTOTYPE_EMBEDDINGS = {}
318
+
319
+ for category, algorithms in PROTOTYPES.items():
320
+ _PROTOTYPE_EMBEDDINGS[category] = {}
321
+ for algo_name, variants in algorithms.items():
322
+ _PROTOTYPE_EMBEDDINGS[category][algo_name] = [
323
+ _embedder.embed(code) for code in variants
324
+ ]
325
+
326
+ # ============================================================
327
+ # ML v2 PREDICTION
328
+ # ============================================================
329
+
330
+ def predict_algorithm(code: str) -> dict:
331
+ user_embedding = _embedder.embed(code)
332
+
333
+ best_algorithm = None
334
+ best_category = None
335
+ best_score = -1.0
336
+
337
+ category_scores = {}
338
+
339
+ for category, algorithms in _PROTOTYPE_EMBEDDINGS.items():
340
+ category_best = -1.0
341
+
342
+ for algo_name, variant_embeddings in algorithms.items():
343
+ for proto_embedding in variant_embeddings:
344
+ similarity = F.cosine_similarity(
345
+ torch.tensor(user_embedding).unsqueeze(0),
346
+ torch.tensor(proto_embedding).unsqueeze(0)
347
+ ).item()
348
+
349
+ # Track global best
350
+ if similarity > best_score:
351
+ best_score = similarity
352
+ best_algorithm = algo_name
353
+ best_category = category
354
+
355
+ # Track best per category
356
+ if similarity > category_best:
357
+ category_best = similarity
358
+
359
+ category_scores[category] = round(category_best, 3)
360
+
361
+ return {
362
+ "ml_prediction": best_algorithm,
363
+ "ml_category": best_category,
364
+ "confidence": round(best_score, 3),
365
+ "category_scores": category_scores
366
+ }