''' Module for analyzing code snippets to determine the environments, dependencies, and other information needed to run the code. ''' from enum import StrEnum from typing import Any, Generator, TypeAlias, TypedDict, Set import base64 import ast from tree_sitter import Language, Node, Parser import tree_sitter_javascript import tree_sitter_typescript import sys import re class SandboxEnvironment(StrEnum): AUTO = 'Auto' # Web UI Frameworks HTML = 'HTML' REACT = 'React' VUE = 'Vue' GRADIO = 'Gradio' STREAMLIT = 'Streamlit' PYGAME = 'PyGame' MERMAID = 'Mermaid' # Runner PYTHON_RUNNER = 'Python Runner' JAVASCRIPT_RUNNER = 'Javascript Runner' # Compiler C_RUNNER = 'C Runner' CPP_RUNNER = 'C++ Runner' # CSHARP_RUNNER = 'C# Runner' JAVA_RUNNER = 'Java Runner' RUST_RUNNER = 'Rust Runner' GOLANG_RUNNER = 'Golang Runner' def extract_python_imports(code: str) -> list[str]: ''' Extract Python package imports using AST parsing. Returns a list of top-level package names. ''' try: tree = ast.parse(code) except SyntaxError: return [] packages: Set[str] = set() for node in ast.walk(tree): try: if isinstance(node, ast.Import): for name in node.names: # Get the top-level package name from any dotted path # e.g., 'foo.bar.baz' -> 'foo' if name.name: # Ensure there's a name packages.add(name.name.split('.')[0]) elif isinstance(node, ast.ImportFrom): # Skip relative imports (those starting with dots) if node.level == 0 and node.module: # Get the top-level package name # e.g., from foo.bar import baz -> 'foo' packages.add(node.module.split('.')[0]) # Also check for common dynamic import patterns elif isinstance(node, ast.Call): if isinstance(node.func, ast.Name) and node.func.id == 'importlib': # Handle importlib.import_module('package') if len(node.args) > 0 and isinstance(node.args[0], ast.Str): packages.add(node.args[0].s.split('.')[0]) elif isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name): # Handle __import__('package') and importlib.import_module('package') if node.func.value.id == 'importlib' and node.func.attr == 'import_module': if len(node.args) > 0 and isinstance(node.args[0], ast.Str): packages.add(node.args[0].s.split('.')[0]) elif node.func.attr == '__import__': if len(node.args) > 0 and isinstance(node.args[0], ast.Str): packages.add(node.args[0].s.split('.')[0]) except Exception as e: pass continue # Filter out standard library modules using sys.stdlib_module_names std_libs = set(sys.stdlib_module_names) return list(packages - std_libs) def extract_js_imports(code: str) -> list[str]: ''' Extract npm package imports using Tree-sitter for robust parsing. Handles both JavaScript and TypeScript code, including Vue SFC. Returns a list of package names. ''' try: # For Vue SFC, extract the script section first script_match = re.search(r'(.*?)', code, re.DOTALL) if script_match: code = script_match.group(1).strip() # Initialize parsers with language modules ts_parser = Parser(Language(tree_sitter_typescript.language_tsx())) js_parser = Parser(Language(tree_sitter_javascript.language())) # Try parsing as TypeScript first, then JavaScript code_bytes = bytes(code, "utf8") try: tree = ts_parser.parse(code_bytes) except Exception as e: pass try: tree = js_parser.parse(code_bytes) except Exception as e: pass tree = None if tree is None: raise Exception("Both TypeScript and JavaScript parsing failed") packages: Set[str] = set() def extract_package_name(node: Node) -> str | None: """Extract npm package name from string or template string. Returns None for local aliases like @/ or relative paths.""" if node.type in ['string', 'string_fragment']: pkg_path = code[node.start_byte:node.end_byte].strip('"\'') if pkg_path.startswith('.') or pkg_path.startswith('/') or pkg_path.startswith('@/'): return None # relative, absolute, or alias path # Scoped npm package: @scope/package/... if pkg_path.startswith('@'): parts = pkg_path.split('/') if len(parts) >= 2: return '/'.join(parts[:2]) # Regular npm package: "lodash/cloneDeep" -> "lodash" return pkg_path.split('/')[0] elif node.type == 'template_string': content = '' has_template_var = False for child in node.children: if child.type == 'string_fragment': content += code[child.start_byte:child.end_byte] elif child.type == 'template_substitution': has_template_var = True if not content or content.startswith('.') or content.startswith('/') or content.startswith('@/'): return None if has_template_var: if content.endswith('-literal'): return 'package-template-literal' return None if content.startswith('@'): parts = content.split('/') if len(parts) >= 2: return '/'.join(parts[:2]) return content.split('/')[0] return None def visit_node(node: Node) -> None: if node.type == 'import_statement': # Handle ES6 imports string_node = node.child_by_field_name('source') if string_node: pkg_name = extract_package_name(string_node) if pkg_name: packages.add(pkg_name) elif node.type == 'export_statement': # Handle re-exports source = node.child_by_field_name('source') if source: pkg_name = extract_package_name(source) if pkg_name: packages.add(pkg_name) elif node.type == 'call_expression': # Handle require calls and dynamic imports func_node = node.child_by_field_name('function') if func_node and func_node.text: func_name = func_node.text.decode('utf8') if func_name in ['require', 'import']: args = node.child_by_field_name('arguments') if args and args.named_children: arg = args.named_children[0] pkg_name = extract_package_name(arg) if pkg_name: packages.add(pkg_name) # Recursively visit children for child in node.children: visit_node(child) visit_node(tree.root_node) return list(packages) except Exception as e: pass # Fallback to basic regex parsing if tree-sitter fails packages: Set[str] = set() # First try to extract script section for Vue SFC script_match = re.search(r'(.*?)', code, re.DOTALL) if script_match: code = script_match.group(1).strip() # Look for imports import_patterns = [ # dynamic imports r'(?:import|require)\s*\(\s*[\'"](@?[\w-]+(?:/[\w-]+)*)[\'"]', # static imports r'(?:import|from)\s+[\'"](@?[\w-]+(?:/[\w-]+)*)[\'"]', # require statements r'require\s*\(\s*[\'"](@?[\w-]+(?:/[\w-]+)*)[\'"]', ] for pattern in import_patterns: matches = re.finditer(pattern, code) for match in matches: pkg_name = match.group(1) if not pkg_name.startswith('.'): if pkg_name.startswith('@'): parts = pkg_name.split('/') if len(parts) >= 2: packages.add('/'.join(parts[:2])) else: packages.add(pkg_name.split('/')[0]) return list(packages) def determine_python_environment(code: str, install_command: str) -> SandboxEnvironment | None: ''' Determine Python sandbox environment based on install command and AST analysis. ''' try: tree = ast.parse(code) for node in ast.walk(tree): # Check for specific framework usage patterns if isinstance(node, ast.Name) and node.id == 'gr': return SandboxEnvironment.GRADIO elif isinstance(node, ast.Name) and node.id == 'st': return SandboxEnvironment.STREAMLIT except SyntaxError: pass # Check install command for framework detection if install_command and 'pygame' in install_command: return SandboxEnvironment.PYGAME elif install_command and 'gradio' in install_command: return SandboxEnvironment.GRADIO elif install_command and 'streamlit' in install_command: return SandboxEnvironment.STREAMLIT # elif install_command and 'nicegui' in install_command: # return SandboxEnvironment.NICEGUI return SandboxEnvironment.PYTHON_RUNNER def determine_jsts_environment(code: str, install_command: str) -> SandboxEnvironment | None: ''' Determine JavaScript/TypeScript sandbox environment based on install command and AST analysis. ''' # First check for Vue SFC structure if '