Spaces:

Yooshiii
/

CodeSense

Running

App Files Files Community

Yooshiii commited on 6 days ago

Commit

f8a39f0

verified ·

1 Parent(s): 5a428e1

Upload 36 files

Browse files

Files changed (36) hide show

codesense/__init__.py +0 -0
codesense/__pycache__/__init__.cpython-310.pyc +0 -0
codesense/__pycache__/__init__.cpython-314.pyc +0 -0
codesense/__pycache__/analyzer.cpython-310.pyc +0 -0
codesense/__pycache__/analyzer.cpython-314.pyc +0 -0
codesense/__pycache__/complexity.cpython-310.pyc +0 -0
codesense/__pycache__/complexity.cpython-314.pyc +0 -0
codesense/__pycache__/embedder.cpython-310.pyc +0 -0
codesense/__pycache__/embedder.cpython-314.pyc +0 -0
codesense/__pycache__/explanations.cpython-310.pyc +0 -0
codesense/__pycache__/explanations.cpython-314.pyc +0 -0
codesense/__pycache__/features.cpython-310.pyc +0 -0
codesense/__pycache__/features.cpython-314.pyc +0 -0
codesense/__pycache__/parser.cpython-310.pyc +0 -0
codesense/__pycache__/parser.cpython-314.pyc +0 -0
codesense/__pycache__/rules.cpython-310.pyc +0 -0
codesense/__pycache__/rules.cpython-314.pyc +0 -0
codesense/__pycache__/similarity.cpython-310.pyc +0 -0
codesense/__pycache__/similarity.cpython-314.pyc +0 -0
codesense/analyzer.py +69 -0
codesense/complexity.py +95 -0
codesense/embedder.py +49 -0
codesense/explanations.py +201 -0
codesense/features.py +417 -0
codesense/ml/__init__.py +0 -0
codesense/ml/__pycache__/__init__.cpython-310.pyc +0 -0
codesense/ml/__pycache__/__init__.cpython-314.pyc +0 -0
codesense/ml/__pycache__/embedder.cpython-310.pyc +0 -0
codesense/ml/__pycache__/embedder.cpython-314.pyc +0 -0
codesense/ml/__pycache__/interface.cpython-310.pyc +0 -0
codesense/ml/__pycache__/interface.cpython-314.pyc +0 -0
codesense/ml/__pycache__/similarity.cpython-310.pyc +0 -0
codesense/ml/__pycache__/similarity.cpython-314.pyc +0 -0
codesense/parser.py +13 -0
codesense/rules.py +92 -0
codesense/similarity.py +366 -0

codesense/__init__.py ADDED Viewed

File without changes

codesense/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (149 Bytes). View file

codesense/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (155 Bytes). View file

codesense/__pycache__/analyzer.cpython-310.pyc ADDED Viewed

Binary file (1.46 kB). View file

codesense/__pycache__/analyzer.cpython-314.pyc ADDED Viewed

Binary file (2.9 kB). View file

codesense/__pycache__/complexity.cpython-310.pyc ADDED Viewed

Binary file (1.47 kB). View file

codesense/__pycache__/complexity.cpython-314.pyc ADDED Viewed

Binary file (2.52 kB). View file

codesense/__pycache__/embedder.cpython-310.pyc ADDED Viewed

Binary file (1.88 kB). View file

codesense/__pycache__/embedder.cpython-314.pyc ADDED Viewed

Binary file (2.91 kB). View file

codesense/__pycache__/explanations.cpython-310.pyc ADDED Viewed

Binary file (4.76 kB). View file

codesense/__pycache__/explanations.cpython-314.pyc ADDED Viewed

Binary file (6.36 kB). View file

codesense/__pycache__/features.cpython-310.pyc ADDED Viewed

Binary file (8.15 kB). View file

codesense/__pycache__/features.cpython-314.pyc ADDED Viewed

Binary file (21.6 kB). View file

codesense/__pycache__/parser.cpython-310.pyc ADDED Viewed

Binary file (535 Bytes). View file

codesense/__pycache__/parser.cpython-314.pyc ADDED Viewed

Binary file (757 Bytes). View file

codesense/__pycache__/rules.cpython-310.pyc ADDED Viewed

Binary file (1.76 kB). View file

codesense/__pycache__/rules.cpython-314.pyc ADDED Viewed

Binary file (3.32 kB). View file

codesense/__pycache__/similarity.cpython-310.pyc ADDED Viewed

Binary file (7.07 kB). View file

codesense/__pycache__/similarity.cpython-314.pyc ADDED Viewed

Binary file (8.07 kB). View file

codesense/analyzer.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from codesense.parser import parse_code
+from codesense.features import extract_features
+from codesense.rules import detect_algorithm
+from codesense.complexity import estimate_complexity
+from codesense.explanations import generate_explanation
+# ============================================================
+# STRICT ML IMPORT (Mentor Requirement)
+# This file provides the logic, NOT the server.
+# ============================================================
+from codesense.similarity import predict_algorithm
+def analyze_code(source: str) -> dict:
+    """
+    Main analysis pipeline called by app.py.
+    """
+    # 1. Structural Analysis via AST
+    tree = parse_code(source)
+    features = extract_features(tree)
+    detection = detect_algorithm(features)
+    # 2. Semantic Analysis via CodeT5 (Checker 2)
+    ml_result = predict_algorithm(source)
+    rule_pattern = detection.get("pattern")
+    category = detection.get("category")
+    ml_prediction = ml_result.get("ml_prediction")
+    ml_confidence = ml_result.get("confidence")
+    # Override Policy: Does CodeT5 see something the Rules missed?
+    resolved_pattern = rule_pattern
+    ml_refined = False
+    if ml_confidence is not None:
+        if (ml_confidence >= 0.93 and ml_prediction != rule_pattern):
+            resolved_pattern = ml_prediction
+            category = ml_result.get("ml_category")
+            ml_refined = True
+        elif (ml_confidence >= 0.90 and rule_pattern in ["Linear Iterative", "Nested Iterative"]):
+            resolved_pattern = ml_prediction
+            ml_refined = True
+    # 3. Complexity
+    complexity = estimate_complexity(features)
+    # Clean up for JSON
+    if "function_calls" in features:
+        features["function_calls"] = list(features["function_calls"])
+    detection["pattern"] = resolved_pattern
+    base_result = {
+        "features": features,
+        "analysis": detection,
+        "complexity": complexity
+    }
+    explanation = generate_explanation(base_result)
+    return {
+        "pattern": resolved_pattern,
+        "category": category,
+        "time_complexity": complexity.get("time_complexity"),
+        "summary": explanation.get("summary"),
+        "ml_insights": {
+            "ml_prediction": ml_prediction,
+            "confidence": ml_confidence if ml_confidence is not None else 0.0,
+            "refined": ml_refined
+        }
+    }

codesense/complexity.py ADDED Viewed

	@@ -0,0 +1,95 @@

+def estimate_complexity(features: dict) -> dict:
+    """
+    Estimates time complexity using heuristic rules
+    based on static code features.
+    """
+    result = {
+        "time_complexity": "O(1)",
+        "explanation": "No loops or recursion were detected, suggesting constant time operations."
+    }
+    max_depth = features.get("max_loop_depth", 0)
+    # ----- BFS/DFS Pattern -----
+    if features.get("bfs_pattern") or features.get("dfs_pattern"):
+        return {
+            "time_complexity": "O(V + E)",
+        }
+    # ----- Binary search heuristic -----
+    if features.get("binary_search_pattern"):
+        result["time_complexity"] = "O(log n)"
+        return result
+    # ----- Dynamic Programming -----
+    if features.get("dp_pattern"):
+        result["time_complexity"] = "O(n) or O(n²)"
+        # result["explanation"] = (
+        #     "Dynamic programming detected. Complexity depends on the number "
+        #     "of states and transitions."
+        # )
+        return result
+    # ----- Merge Sort -----
+    if features.get("merge_sort_pattern"):
+        return {
+            "time_complexity": "O(n log n)"
+        }
+    # ----- Quick Sort -----
+    if features.get("quick_sort_pattern"):
+        return {
+            "time_complexity": "O(n log n) average, O(n²) worst-case"
+        }
+    # ----- Recursion Heuristics -----
+    if features["recursion"]:
+        if features.get("divide_and_conquer"):
+            result["time_complexity"] = "O(n log n)"
+            return result
+        if features["recursive_call_count"] >= 2:
+            result["time_complexity"] = "O(2^n)"
+            return result
+        result["time_complexity"] = "O(n)"
+        return result
+    # ----- Sorting Algorithms -----
+    if features.get("bubble_sort_pattern"):
+        return {
+            "time_complexity": "O(n²) worst-case (O(n) best-case if optimized)"
+        }
+    if features.get("insertion_sort_pattern"):
+        return {
+            "time_complexity": "O(n²) worst-case (O(n) best-case for nearly sorted input)"
+        }
+    # ----- Sliding Window -----
+    if features.get("sliding_window_pattern"):
+        return {
+            "time_complexity": "O(n)"
+        }
+    # ----- Heap-Based -----
+    if features.get("heap_pattern"):
+        return {
+            "time_complexity": "O(n log n) or O(log n) per operation"
+        }
+    # ----- Iterative Heuristics -----
+    if max_depth == 1:
+        result["time_complexity"] = "O(n)"
+        return result
+    if max_depth >= 2:
+        result["time_complexity"] = f"O(n^{max_depth})"
+        return result
+    return result

codesense/embedder.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import os
+from transformers import AutoTokenizer, T5EncoderModel
+class CodeT5Embedder:
+    def __init__(self, model_name="Salesforce/codet5-base"):
+        print(f"⏳ Initializing CodeT5 Engine ({model_name})...")
+        # use_fast=False is the specific fix for the 'List' error on Windows.
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                use_fast=False
+            )
+        except Exception as e:
+            print(f"⚠️ Primary loader failed, attempting fast-mode fallback: {e}")
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        print("⏳ Loading CodeT5 Model weights (this may take a moment)...")
+        self.model = T5EncoderModel.from_pretrained(model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+        device_name = str(self.device).upper()
+        print(f"✅ CodeT5 Engine is Live on {device_name}")
+    def embed(self, code: str):
+        """Standard method name used by similarity.py"""
+        return self.get_embedding(code)
+    def get_embedding(self, code: str):
+        """Original method name for compatibility"""
+        if not code or not isinstance(code, str):
+            code = " "
+        inputs = self.tokenizer(
+            code,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True
+        ).to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Global Average Pooling of the hidden states
+        return outputs.last_hidden_state.mean(dim=1).cpu().numpy().flatten()

codesense/explanations.py ADDED Viewed

	@@ -0,0 +1,201 @@

+def generate_explanation(analysis_result: dict) -> dict:
+    """
+    Generates structured explanations.
+    Priority Order:
+    1. Final resolved pattern (highest authority)
+    2. Strong structural feature patterns
+    3. Generic structural fallback
+    """
+    features = analysis_result.get("features", {})
+    pattern = analysis_result.get("analysis", {}).get("pattern")
+    explanation = ""
+    # ============================================================
+    # 1️⃣ PATTERN-DRIVEN EXPLANATIONS (HIGHEST PRIORITY)
+    # ============================================================
+    if pattern == "Quick Sort":
+        explanation = (
+            "|Quick Sort Pattern| A pivot element partitions the array into "
+            "smaller subarrays which are recursively sorted."
+        )
+    elif pattern == "Merge Sort":
+        explanation = (
+            "|Merge Sort Pattern| The array is recursively divided into halves, "
+            "sorted independently, and merged back together."
+        )
+    elif pattern == "Bubble Sort":
+        explanation = (
+            "|Bubble Sort Pattern| Adjacent elements are repeatedly compared "
+            "and swapped if out of order. Larger elements 'bubble' to the end "
+            "with each pass."
+        )
+    elif pattern == "Insertion Sort":
+        explanation = (
+            "|Insertion Sort Pattern| Elements are inserted into their correct "
+            "position within the sorted portion of the list by shifting larger elements."
+        )
+    elif pattern == "Heap-Based Algorithm" or pattern == "Heap Sort":
+        explanation = (
+            "|Heap / Priority Queue Pattern| The algorithm uses a heap data "
+            "structure to maintain ordered elements efficiently, typically "
+            "enabling O(log n) insertions and removals."
+        )
+    elif pattern == "Breadth-First Search":
+        explanation = (
+            "|Breadth-first search Pattern| The algorithm explores nodes "
+            "level by level using a queue."
+        )
+    elif pattern == "Depth-First Search":
+        explanation = (
+            "|Depth-first traversal Pattern| The algorithm explores as far "
+            "as possible along each branch before backtracking."
+        )
+    elif pattern == "Binary Search":
+        explanation = (
+            "|Binary search Pattern| The algorithm repeatedly halves the "
+            "search space, resulting in logarithmic time complexity."
+        )
+    elif pattern == "Memoization":
+        explanation = (
+            "|Memoization Pattern| Previously computed results are stored "
+            "to avoid redundant recursive calls."
+        )
+    elif pattern == "Tabulation":
+        explanation = (
+            "|Tabulation Dynamic Programming Pattern| A table is built "
+            "iteratively using previously computed subproblem results."
+        )
+    elif pattern == "Sliding Window":
+        explanation = (
+            "|Sliding Window Pattern| A window expands and contracts across "
+            "the data structure while maintaining a running condition."
+        )
+    elif pattern == "Two-Pointer Technique":
+        explanation = (
+            "|Two-pointer technique Pattern| Two indices move toward each other "
+            "in a controlled manner during a single traversal."
+        )
+    # ============================================================
+    # 2️⃣ STRUCTURAL FEATURE FALLBACK (ONLY IF NO PATTERN MATCH)
+    # ============================================================
+    elif features.get("heap_pattern"):
+        explanation = (
+            "|Heap Pattern| The algorithm relies on a heap data structure "
+            "for ordered extraction or insertion."
+        )
+    elif features.get("memoization_pattern"):
+        explanation = (
+            "|Memoization Pattern| Previously computed results are reused "
+            "to reduce redundant computation."
+        )
+    elif features.get("tabulation_pattern"):
+        explanation = (
+            "|Tabulation Pattern| A dynamic programming table is constructed "
+            "iteratively to build the final solution."
+        )
+    elif features.get("bfs_pattern"):
+        explanation = (
+            "|Breadth-first search Pattern| The algorithm processes elements "
+            "level by level using a queue."
+        )
+    elif features.get("binary_search_pattern"):
+        explanation = (
+            "|Binary search Pattern| The search space is repeatedly divided in half."
+        )
+    elif features.get("sliding_window_pattern"):
+        explanation = (
+            "|Sliding Window Pattern| A dynamic window adjusts across input "
+            "to maintain a condition efficiently."
+        )
+    elif features.get("pointer_updates", 0) >= 2:
+        explanation = (
+            "|Two-pointer Pattern| Two pointers are adjusted during traversal "
+            "to control search or comparison."
+        )
+    elif features.get("dfs_pattern"):
+        explanation = (
+            "|Depth-first traversal Pattern| The algorithm explores branches "
+            "deeply before backtracking."
+        )
+    elif features.get("merge_sort_pattern"):
+        explanation = (
+            "|Merge Sort Pattern| Recursive division and merging strategy detected."
+        )
+    elif features.get("quick_sort_pattern"):
+        explanation = (
+            "|Quick Sort Pattern| Partition-based recursive sorting detected."
+        )
+    elif features.get("divide_and_conquer"):
+        explanation = (
+            "|Divide-and-conquer Pattern| The problem is split into smaller "
+            "subproblems and their results are combined."
+        )
+    elif features.get("recursion") and features.get("recursive_call_count", 0) > 1:
+        explanation = (
+            "Multiple recursive calls per invocation detected, suggesting "
+            "exponential growth."
+        )
+    elif features.get("recursion"):
+        explanation = (
+            "Single recursive call per invocation detected, suggesting "
+            "linear recursion depth."
+        )
+    elif features.get("bubble_sort_pattern"):
+        explanation = (
+            "|Bubble Sort Pattern| Repeated adjacent swaps detected."
+        )
+    elif features.get("insertion_sort_pattern"):
+        explanation = (
+            "|Insertion Sort Pattern| Element shifting within a sorted subarray detected."
+        )
+    elif features.get("max_loop_depth", 0) > 1:
+        explanation = (
+            "Nested loop structures detected, indicating polynomial behavior."
+        )
+    elif features.get("max_loop_depth", 0) == 1:
+        explanation = (
+            "Single loop traversal detected, indicating linear iteration."
+        )
+    else:
+        explanation = (
+            "No significant structural patterns were detected."
+        )
+    return {
+        "summary": explanation,
+        "details": [explanation]
+    }

codesense/features.py ADDED Viewed

	@@ -0,0 +1,417 @@

+import ast
+class FeatureExtractor(ast.NodeVisitor):
+    """
+    Traverses the AST and extracts structural features
+    from Python source code.
+    """
+    def __init__(self):
+        self.features = {
+            "for_loops": 0,
+            "while_loops": 0,
+            "function_calls": set(),
+            "recursion": False,
+            "max_loop_depth": 0,
+            "recursive_call_count": 0,
+            "divide_and_conquer": False,
+            "binary_search_pattern": False,
+            "pointer_variables": set(),
+            "pointer_updates": 0,
+            "bfs_pattern": False,
+            "queue_variables": set(),
+            "queue_operations": 0,
+            "queue_pop_front": False,
+            "queue_append_detected": False,
+            "graph_iteration": False,
+            "dfs_pattern": False,
+            "uses_stack": False,
+            "uses_pop": False,
+            "dp_pattern": False,
+            "uses_dp_array": False,
+            "sorting_pattern": False,
+            "bubble_sort_pattern": False,
+            "insertion_sort_pattern": False,
+            "adjacent_swap_detected": False,
+            "insertion_shift_detected": False,
+            "memoization_pattern": False,
+            "memo_dict_defined": False,
+            "memo_lookup_detected": False,
+            "memo_store_detected": False,
+            "tabulation_pattern": False,
+            "dp_self_dependency": False,
+            "dp_dimension": 1,
+            "merge_sort_pattern": False,
+            "quick_sort_pattern": False,
+            "sliding_window_pattern": False,
+            "window_updates": 0,
+            "window_shrinks": 0,
+            "heap_imported": False,
+            "heap_operations": 0,
+            "heap_pattern": False,
+        }
+        self.current_function = None
+        self.current_loop_depth = 0
+        self.max_loop_depth = 0
+        self.current_function_name = None
+    def visit_Import(self, node):
+        for alias in node.names:
+            if alias.name == "heapq":
+                self.features["heap_imported"] = True
+        self.generic_visit(node)
+    def visit_ImportFrom(self, node):
+        if node.module == "heapq":
+            self.features["heap_imported"] = True
+        self.generic_visit(node)
+    def visit_FunctionDef(self, node):
+        previous_function = self.current_function_name
+        self.current_function_name = node.name
+        self.generic_visit(node)
+        self.current_function_name = previous_function
+    def visit_For(self, node):
+        self.features["for_loops"] += 1
+        self.current_loop_depth += 1
+        self.max_loop_depth = max(self.max_loop_depth, self.current_loop_depth)
+        # Detect graph[node] iteration
+        if isinstance(node.iter, ast.Subscript):
+            self.features["graph_iteration"] = True
+        self.generic_visit(node)
+        self.current_loop_depth -= 1
+        if isinstance(node.target, ast.Name):
+            var = node.target.id.lower()
+            if var in ("right", "r", "end"):
+                self.features["window_updates"] += 1
+    def visit_While(self, node):
+        self.features["while_loops"] += 1
+        self.current_loop_depth += 1
+        self.max_loop_depth = max(self.max_loop_depth, self.current_loop_depth)
+        self.generic_visit(node)
+        self.current_loop_depth -= 1
+    def visit_Call(self, node):
+        if isinstance(node.func, ast.Name):
+            function_name = node.func.id
+            self.features["function_calls"].add(function_name)
+            # Detect recursion
+            if function_name == self.current_function_name:
+                self.features["recursion"] = True
+                self.features["recursive_call_count"] += 1
+                # If recursion + loop present → DFS-style
+                if self.features["for_loops"] >= 1:
+                    self.features["dfs_pattern"] = True
+                # Detect divide-and-conquer
+                for arg in node.args:
+                    # Case 1: n // 2 or n / 2
+                    if isinstance(arg, ast.BinOp) and isinstance(arg.op, (ast.FloorDiv, ast.Div)):
+                        self.features["divide_and_conquer"] = True
+                    # Case 2: slicing like arr[:mid]
+                    if isinstance(arg, ast.Subscript):
+                        if isinstance(arg.slice, ast.Slice):
+                            self.features["divide_and_conquer"] = True
+                        if isinstance(arg, ast.Subscript) and isinstance(arg.slice, ast.Slice):
+                            self.features["merge_sort_pattern"] = True
+        # Detect queue operations
+        if isinstance(node.func, ast.Attribute):
+            if isinstance(node.func.value, ast.Name):
+                var = node.func.value.id
+                if var in self.features["queue_variables"]:
+                    if node.func.attr in ("append", "popleft"):
+                        self.features["queue_operations"] += 1
+        # Detect stack.pop() or queue.pop()
+        if isinstance(node.func, ast.Attribute):
+            method = node.func.attr
+            if method == "pop":
+                self.features["uses_pop"] = True
+            if method == "append":
+                # mark append usage
+                pass
+        # Detect pop(0) for list-based BFS
+        if isinstance(node.func, ast.Attribute):
+            method = node.func.attr
+            # pop(0)
+            if method == "pop":
+                if node.args and isinstance(node.args[0], ast.Constant):
+                    if node.args[0].value == 0:
+                        self.features["queue_pop_front"] = True
+            # append()
+            if method == "append":
+                self.features["queue_append_detected"] = True
+            # popleft()
+            if method == "popleft":
+                self.features["queue_pop_front"] = True
+        # Iterative DFS heuristic
+        if (
+            self.features["uses_stack"]
+            and self.features["uses_pop"]
+            and self.features["for_loops"] >= 1
+        ):
+            self.features["dfs_pattern"] = True
+        # Heap operations
+        if isinstance(node.func, ast.Attribute):
+            if isinstance(node.func.value, ast.Name):
+                if node.func.value.id == "heapq":
+                    if node.func.attr in ("heappush", "heappop", "heapify"):
+                        self.features["heap_operations"] += 1
+        self.generic_visit(node)
+    def visit_Assign(self, node):
+        # -------- Binary Search Pattern Detection --------
+        if isinstance(node.value, ast.BinOp):
+            if isinstance(node.value.op, ast.FloorDiv):
+                if isinstance(node.value.left, ast.BinOp):
+                    if isinstance(node.value.left.op, ast.Add):
+                        self.features["binary_search_pattern"] = True
+        # -------- Two Pointer Detection --------
+        if node.targets and isinstance(node.targets[0], ast.Name):
+            var = node.targets[0].id
+            # Case 1: left = 0
+            if isinstance(node.value, (ast.Constant, ast.Num)):
+                self.features["pointer_variables"].add(var)
+            # Case 2: right = len(arr) - 1
+            if isinstance(node.value, ast.BinOp):
+                self.features["pointer_variables"].add(var)
+        # -------- BFS Detection --------
+        if isinstance(node.value, ast.Call):
+            if isinstance(node.value.func, ast.Name):
+                if node.value.func.id == "deque":
+                    if node.targets and isinstance(node.targets[0], ast.Name):
+                        var = node.targets[0].id
+                        self.features["queue_variables"].add(var)
+        # ------- Detect stack initialization
+        if node.targets and isinstance(node.targets[0], ast.Name):
+            var = node.targets[0].id
+            if isinstance(node.value, (ast.List, ast.Call)):
+                if var.lower() == "stack":
+                    self.features["uses_stack"] = True
+        # ------- Detect memo dictionary initialization
+        if isinstance(node.value, ast.Dict):
+            if node.targets and isinstance(node.targets[0], ast.Name):
+                var = node.targets[0].id.lower()
+                if var in ("memo", "cache", "dp"):
+                    self.features["memo_dict_defined"] = True
+        # Detect memo[n] = ...
+        if node.targets and isinstance(node.targets[0], ast.Subscript):
+            target = node.targets[0]
+            if isinstance(target.value, ast.Name):
+                var = target.value.id.lower()
+                if var in ("memo", "cache", "dp"):
+                    self.features["memo_store_detected"] = True
+        # Detect 2D DP Tables
+        if isinstance(node.value, ast.ListComp):
+            self.features["dp_dimension"] = 2
+        if isinstance(node.value, ast.List):
+            if any(isinstance(el, ast.List) for el in node.value.elts):
+                self.features["dp_dimension"] = 2
+        # Detect true tabulation recurrence && 2D KNAPSACK FIX
+        if node.targets and isinstance(node.targets[0], ast.Subscript):
+            target = node.targets[0]
+            # Find base name
+            base = target.value
+            while isinstance(base, ast.Subscript):
+                base = base.value
+            if isinstance(base, ast.Name):
+                var = base.id.lower()
+                if var in ("dp", "memo", "cache"):
+                    for child in ast.walk(node.value):
+                        if isinstance(child, ast.Name) and child.id.lower() == var:
+                            self.features["dp_self_dependency"] = True
+        # -------- Bubble Sort Adjacent Swap Detection --------
+        if (
+            isinstance(node.targets[0], ast.Tuple)
+            and isinstance(node.value, ast.Tuple)
+            and len(node.targets[0].elts) == 2
+            and len(node.value.elts) == 2
+        ):
+            left = node.targets[0].elts
+            right = node.value.elts
+            if all(isinstance(el, ast.Subscript) for el in left + right):
+                self.features["adjacent_swap_detected"] = True
+        # -------- Insertion Sort Shift Detection --------
+        if node.targets and isinstance(node.targets[0], ast.Subscript):
+            target = node.targets[0]
+            if isinstance(node.value, ast.Subscript):
+                self.features["insertion_shift_detected"] = True
+        # Merge Sort
+        if node.targets and isinstance(node.targets[0], ast.Name):
+            var = node.targets[0].id.lower()
+            if var == "pivot":
+                self.features["quick_sort_pattern"] = True
+        self.generic_visit(node)
+    def visit_AugAssign(self, node):
+        if isinstance(node.target, ast.Name):
+            var = node.target.id
+            if isinstance(node.op, (ast.Add, ast.Sub)):
+                if var in self.features["pointer_variables"]:
+                    self.features["pointer_updates"] += 1
+        if isinstance(node.target, ast.Name):
+            var = node.target.id.lower()
+            if var in ("left", "l", "start"):
+                self.features["window_shrinks"] += 1
+        self.generic_visit(node)
+    # ------ subscript access -----
+    def visit_Subscript(self, node):
+        # Walk up until we find base name
+        base = node.value
+        while isinstance(base, ast.Subscript):
+            base = base.value
+        if isinstance(base, ast.Name):
+            var = base.id.lower()
+            if var in ("dp", "memo", "cache"):
+                self.features["uses_dp_array"] = True
+        self.generic_visit(node)
+    def visit_Compare(self, node):
+        # Detect: X in memo/cache/dp
+        if any(isinstance(op, ast.In) for op in node.ops):
+            for comparator in node.comparators:
+                if isinstance(comparator, ast.Name):
+                    if comparator.id.lower() in ("memo", "cache", "dp"):
+                        self.features["memo_lookup_detected"] = True
+        self.generic_visit(node)
+def extract_features(tree: ast.AST) -> dict:
+    extractor = FeatureExtractor()
+    extractor.visit(tree)
+    extractor.features["max_loop_depth"] = extractor.max_loop_depth
+    if (
+        extractor.features["while_loops"] >= 1
+        and extractor.features["queue_pop_front"]
+        and extractor.features["queue_append_detected"]
+        and extractor.features["graph_iteration"]
+    ):
+        extractor.features["bfs_pattern"] = True
+    # High confidence memoization
+    if (
+        extractor.features["recursion"]
+        and extractor.features["memo_dict_defined"]
+        and extractor.features["memo_lookup_detected"]
+        and extractor.features["memo_store_detected"]
+    ):
+        extractor.features["memoization_pattern"] = True
+    # Tabulation
+    if (
+        extractor.features["uses_dp_array"]
+        and extractor.features["dp_self_dependency"]
+        and extractor.features["for_loops"] >= 1
+    ):
+        extractor.features["tabulation_pattern"] = True
+    # Final DP pattern
+    if (
+        extractor.features["memoization_pattern"]
+        or extractor.features["tabulation_pattern"]
+    ):
+        extractor.features["dp_pattern"] = True
+    # Sorting detection
+    if extractor.features["max_loop_depth"] >= 2:
+        if extractor.features["adjacent_swap_detected"]:
+            extractor.features["bubble_sort_pattern"] = True
+            extractor.features["sorting_pattern"] = True
+        elif extractor.features["insertion_shift_detected"]:
+            extractor.features["insertion_sort_pattern"] = True
+            extractor.features["sorting_pattern"] = True
+    # Sliding Window heuristic
+    if (
+        extractor.features["for_loops"] >= 1
+        and extractor.features["while_loops"] >= 1
+        and extractor.features["window_updates"] >= 1
+        and extractor.features["window_shrinks"] >= 1
+    ):
+        extractor.features["sliding_window_pattern"] = True
+    # Heap pattern detection
+    if (
+        extractor.features["heap_imported"]
+        and extractor.features["heap_operations"] >= 1
+    ):
+        extractor.features["heap_pattern"] = True
+    return extractor.features

codesense/ml/__init__.py ADDED Viewed

File without changes

codesense/ml/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (152 Bytes). View file

codesense/ml/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (158 Bytes). View file

codesense/ml/__pycache__/embedder.cpython-310.pyc ADDED Viewed

Binary file (1.64 kB). View file

codesense/ml/__pycache__/embedder.cpython-314.pyc ADDED Viewed

Binary file (2.91 kB). View file

codesense/ml/__pycache__/interface.cpython-310.pyc ADDED Viewed

Binary file (609 Bytes). View file

codesense/ml/__pycache__/interface.cpython-314.pyc ADDED Viewed

Binary file (938 Bytes). View file

codesense/ml/__pycache__/similarity.cpython-310.pyc ADDED Viewed

Binary file (7.05 kB). View file

codesense/ml/__pycache__/similarity.cpython-314.pyc ADDED Viewed

Binary file (8.07 kB). View file

codesense/parser.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import ast
+def parse_code(source_code: str) -> ast.AST:
+    """
+    Parses Python source code into an Abstract Syntax Tree (AST).
+    Parameters:
+        source_code (str): Python source code as a string.
+    Returns:
+        ast.AST: Parsed abstract syntax tree.
+    """
+    return ast.parse(source_code)

codesense/rules.py ADDED Viewed

	@@ -0,0 +1,92 @@

+def detect_algorithm(features: dict) -> dict:
+    result = {
+        "pattern": "Unknown",
+        "category": "Unknown"
+    }
+    total_loops = features.get("for_loops", 0) + features.get("while_loops", 0)
+    # ===============================
+    # Dynamic Programming
+    # ===============================
+    if features.get("memoization_pattern"):
+        return {"pattern": "Memoization", "category": "Dynamic Programming"}
+    if features.get("tabulation_pattern"):
+        return {"pattern": "Tabulation", "category": "Dynamic Programming"}
+    # ===============================
+    # Heap
+    # ===============================
+    if features.get("heap_pattern"):
+        return {"pattern": "Heap-Based Algorithm", "category": "Data Structure Based"}
+    # ===============================
+    # Search
+    # ===============================
+    if features.get("binary_search_pattern"):
+        return {"pattern": "Binary Search", "category": "Search Algorithm"}
+    # ===============================
+    # Graph
+    # ===============================
+    if features.get("bfs_pattern"):
+        return {"pattern": "Breadth-First Search", "category": "Graph Algorithm"}
+    if features.get("dfs_pattern"):
+        return {"pattern": "Depth-First Search", "category": "Graph Algorithm"}
+    # ===============================
+    # Pointer Techniques
+    # ===============================
+    if features.get("sliding_window_pattern"):
+        return {"pattern": "Sliding Window", "category": "Pointer Technique"}
+    if (
+        len(features.get("pointer_variables", [])) >= 2
+        and features.get("pointer_updates", 0) >= 2
+        and features.get("while_loops", 0) >= 1
+    ):
+        return {"pattern": "Two-Pointer Technique", "category": "Pointer Technique"}
+    # ===============================
+    # Sorting Algorithms
+    # ===============================
+    if features.get("bubble_sort_pattern"):
+        return {"pattern": "Bubble Sort", "category": "Sorting Algorithm"}
+    if features.get("insertion_sort_pattern"):
+        return {"pattern": "Insertion Sort", "category": "Sorting Algorithm"}
+    if features.get("merge_sort_pattern"):
+        return {"pattern": "Merge Sort", "category": "Sorting Algorithm"}
+    if features.get("quick_sort_pattern"):
+        return {"pattern": "Quick Sort", "category": "Sorting Algorithm"}
+    # ===============================
+    # Recursive Patterns
+    # ===============================
+    if features.get("divide_and_conquer"):
+        return {"pattern": "Recursive Divide-and-Conquer", "category": "Divide-and-Conquer"}
+    if features.get("recursion") and features.get("recursive_call_count", 0) >= 2:
+        return {"pattern": "Recursive (Exponential)", "category": "Recursive Pattern"}
+    if features.get("recursion"):
+        return {"pattern": "Recursive (Linear)", "category": "Recursive Pattern"}
+    # ===============================
+    # Iterative Patterns
+    # ===============================
+    if features.get("max_loop_depth", 0) >= 2:
+        return {"pattern": "Nested Iterative", "category": "Iterative Pattern"}
+    if total_loops == 1:
+        return {"pattern": "Linear Iterative", "category": "Iterative Pattern"}
+    if total_loops == 0:
+        return {"pattern": "Constant-Time", "category": "Direct Computation"}
+    return result

codesense/similarity.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import torch
+import torch.nn.functional as F
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+from .embedder import CodeT5Embedder
+# -------- Singleton Embedder --------
+_embedder = CodeT5Embedder()
+# ============================================================
+# ML v2 PROTOTYPE STRUCTURE
+# Category → Algorithm → [Variants]
+# ============================================================
+PROTOTYPES = {
+   "Sorting Algorithm": {
+    "Bubble Sort": [
+        # Classic
+        """
+def bubble_sort(arr):
+    for i in range(len(arr)):
+        for j in range(len(arr)-i-1):
+            if arr[j] > arr[j+1]:
+                arr[j], arr[j+1] = arr[j+1], arr[j]
+""",
+        # Optimized (swapped flag)
+        """
+def bubble_sort(arr):
+    n = len(arr)
+    for i in range(n):
+        swapped = False
+        for j in range(0, n-i-1):
+            if arr[j] > arr[j+1]:
+                arr[j], arr[j+1] = arr[j+1], arr[j]
+                swapped = True
+        if not swapped:
+            break
+"""
+    ],
+    "Insertion Sort": [
+        # Shift-based
+        """
+def insertion_sort(arr):
+    for i in range(1, len(arr)):
+        key = arr[i]
+        j = i - 1
+        while j >= 0 and arr[j] > key:
+            arr[j+1] = arr[j]
+            j -= 1
+        arr[j+1] = key
+""",
+        # Swap-based variant
+        """
+def insertion_sort(arr):
+    for i in range(1, len(arr)):
+        j = i
+        while j > 0 and arr[j] < arr[j-1]:
+            arr[j], arr[j-1] = arr[j-1], arr[j]
+            j -= 1
+"""
+    ],
+    "Merge Sort": [
+        # Slicing-based
+        """
+def merge_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    mid = len(arr)//2
+    left = merge_sort(arr[:mid])
+    right = merge_sort(arr[mid:])
+    return merge(left, right)
+""",
+        # Index-based (GFG style)
+        """
+def merge(arr, l, m, r):
+    n1 = m - l + 1
+    n2 = r - m
+    L = [0] * n1
+    R = [0] * n2
+    for i in range(n1):
+        L[i] = arr[l + i]
+    for j in range(n2):
+        R[j] = arr[m + 1 + j]
+    i = j = 0
+    k = l
+    while i < n1 and j < n2:
+        if L[i] <= R[j]:
+            arr[k] = L[i]
+            i += 1
+        else:
+            arr[k] = R[j]
+            j += 1
+        k += 1
+def merge_sort(arr, l, r):
+    if l < r:
+        m = l + (r - l)//2
+        merge_sort(arr, l, m)
+        merge_sort(arr, m+1, r)
+        merge(arr, l, m, r)
+"""
+    ],
+    "Quick Sort": [
+        # List-comprehension variant
+        """
+def quick_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    pivot = arr[0]
+    left = [x for x in arr[1:] if x <= pivot]
+    right = [x for x in arr[1:] if x > pivot]
+    return quick_sort(left) + [pivot] + quick_sort(right)
+""",
+        # Partition-based (GFG style)
+        """
+def partition(arr, low, high):
+    pivot = arr[high]
+    i = low - 1
+    for j in range(low, high):
+        if arr[j] <= pivot:
+            i += 1
+            arr[i], arr[j] = arr[j], arr[i]
+    arr[i+1], arr[high] = arr[high], arr[i+1]
+    return i+1
+def quick_sort(arr, low, high):
+    if low < high:
+        pi = partition(arr, low, high)
+        quick_sort(arr, low, pi-1)
+        quick_sort(arr, pi+1, high)
+"""
+    ],
+    "Heap Sort": [
+        # heapq-based
+        """
+import heapq
+def heap_sort(arr):
+    heapq.heapify(arr)
+    return [heapq.heappop(arr) for _ in range(len(arr))]
+""",
+        # Manual heapify (GFG style)
+        """
+def heapify(arr, n, i):
+    largest = i
+    l = 2*i + 1
+    r = 2*i + 2
+    if l < n and arr[l] > arr[largest]:
+        largest = l
+    if r < n and arr[r] > arr[largest]:
+        largest = r
+    if largest != i:
+        arr[i], arr[largest] = arr[largest], arr[i]
+        heapify(arr, n, largest)
+def heap_sort(arr):
+    n = len(arr)
+    for i in range(n//2 - 1, -1, -1):
+        heapify(arr, n, i)
+    for i in range(n-1, 0, -1):
+        arr[i], arr[0] = arr[0], arr[i]
+        heapify(arr, i, 0)
+"""
+    ]
+},
+    "Dynamic Programming": {
+        "Memoization": [
+            """
+memo = {}
+def fib(n):
+    if n in memo:
+        return memo[n]
+    if n <= 1:
+        return n
+    memo[n] = fib(n-1) + fib(n-2)
+    return memo[n]
+"""
+        ],
+        "Tabulation": [
+            """
+def fib(n):
+    dp = [0]*(n+1)
+    dp[1] = 1
+    for i in range(2, n+1):
+        dp[i] = dp[i-1] + dp[i-2]
+    return dp[n]
+""",
+"""
+def knapsack(weights, values, capacity):
+    n = len(weights)
+    dp = [[0]*(capacity+1) for _ in range(n+1)]
+    for i in range(1, n+1):
+        for w in range(capacity+1):
+            if weights[i-1] <= w:
+                dp[i][w] = max(values[i-1] + dp[i-1][w-weights[i-1]],
+                               dp[i-1][w])
+            else:
+                dp[i][w] = dp[i-1][w]
+"""
+        ]
+    },
+    "Graph Algorithm": {
+        "Breadth-First Search": [
+            """
+from collections import deque
+def bfs(graph, start):
+    visited = set()
+    queue = deque([start])
+    while queue:
+        node = queue.popleft()
+        for neighbor in graph[node]:
+            if neighbor not in visited:
+                visited.add(neighbor)
+                queue.append(neighbor)
+"""
+        ],
+        "Depth-First Search": [
+            # Recursive
+            """
+def dfs(graph, node, visited):
+    visited.add(node)
+    for neighbor in graph[node]:
+        if neighbor not in visited:
+            dfs(graph, neighbor, visited)
+""",
+            # Iterative
+            """
+def dfs(graph, start):
+    visited = set()
+    stack = [start]
+    while stack:
+        node = stack.pop()
+        if node not in visited:
+            visited.add(node)
+            for neighbor in graph[node]:
+                stack.append(neighbor)
+"""
+        ]
+    },
+    "Pointer Technique": {
+        "Two-Pointer Technique": [
+            """
+def two_sum_sorted(arr, target):
+    left, right = 0, len(arr)-1
+    while left < right:
+        s = arr[left] + arr[right]
+        if s == target:
+            return True
+        elif s < target:
+            left += 1
+        else:
+            right -= 1
+"""
+        ],
+        "Sliding Window": [
+            """
+def max_subarray(arr, k):
+    current_sum = 0
+    left = 0
+    for right in range(len(arr)):
+        current_sum += arr[right]
+        if right-left+1 > k:
+            current_sum -= arr[left]
+            left += 1
+"""
+        ]
+    },
+    "Search Algorithm": {
+        "Binary Search": [
+            """
+def binary_search(arr, target):
+    left, right = 0, len(arr)-1
+    while left <= right:
+        mid = (left+right)//2
+        if arr[mid] == target:
+            return mid
+        elif arr[mid] < target:
+            left = mid+1
+        else:
+            right = mid-1
+"""
+        ]
+    },
+    "Data Structure Based": {
+        "Heap-Based Algorithm": [
+            """
+import heapq
+def top_k(nums, k):
+    heap = []
+    for num in nums:
+        heapq.heappush(heap, num)
+        if len(heap) > k:
+            heapq.heappop(heap)
+"""
+        ]
+    }
+}
+# ============================================================
+# PRECOMPUTE EMBEDDINGS
+# ============================================================
+_PROTOTYPE_EMBEDDINGS = {}
+for category, algorithms in PROTOTYPES.items():
+    _PROTOTYPE_EMBEDDINGS[category] = {}
+    for algo_name, variants in algorithms.items():
+        _PROTOTYPE_EMBEDDINGS[category][algo_name] = [
+            _embedder.embed(code) for code in variants
+        ]
+# ============================================================
+# ML v2 PREDICTION
+# ============================================================
+def predict_algorithm(code: str) -> dict:
+    user_embedding = _embedder.embed(code)
+    best_algorithm = None
+    best_category = None
+    best_score = -1.0
+    category_scores = {}
+    for category, algorithms in _PROTOTYPE_EMBEDDINGS.items():
+        category_best = -1.0
+        for algo_name, variant_embeddings in algorithms.items():
+            for proto_embedding in variant_embeddings:
+                similarity = F.cosine_similarity(
+                    torch.tensor(user_embedding).unsqueeze(0),
+                    torch.tensor(proto_embedding).unsqueeze(0)
+                ).item()
+                # Track global best
+                if similarity > best_score:
+                    best_score = similarity
+                    best_algorithm = algo_name
+                    best_category = category
+                # Track best per category
+                if similarity > category_best:
+                    category_best = similarity
+        category_scores[category] = round(category_best, 3)
+    return {
+        "ml_prediction": best_algorithm,
+        "ml_category": best_category,
+        "confidence": round(best_score, 3),
+        "category_scores": category_scores
+    }