Spaces:

sabssag
/

Latex_to_Python_CodeT5-base

Runtime error

App Files Files Community

sabssag commited on Sep 4, 2024

Commit

4f37b5c

verified ·

1 Parent(s): c7260b4

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -8

app.py CHANGED Viewed

@@ -8,18 +8,208 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 model_repo_path = 'sabssag/Latex_to_Python_CodeT5-base'
 model = AutoModelForSeq2SeqLM.from_pretrained(model_repo_path)
 tokenizer = AutoTokenizer.from_pretrained(model_repo_path)
-# Function to generate Python code from LaTeX expression
-def generate_code_from_latex(latex_expression, max_length=256):
-    inputs = tokenizer(f"Latex Expression: {latex_expression} Solution:", return_tensors="pt").to(model.device)
-    # Generate the output
-    outputs = model.generate(**inputs, max_length=max_length)
-    # Decode the output into Python code
-    generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_code
 # Streamlit app layout
 st.title("LaTeX to Python Code Generator")

 model_repo_path = 'sabssag/Latex_to_Python_CodeT5-base'
 model = AutoModelForSeq2SeqLM.from_pretrained(model_repo_path)
 tokenizer = AutoTokenizer.from_pretrained(model_repo_path)
+model.eval()
+import re
+import ast
+import torch
+# Fix unmatched brackets
+def fix_unmatched_brackets(code):
+    """
+    Fix unmatched brackets in the code by ensuring that all opening brackets have corresponding closing brackets,
+    and ensure that newline characters are handled correctly when adding missing brackets.
+    """
+    open_brackets = {'(': 0, '[': 0, '{': 0}
+    close_brackets = {')': 0, ']': 0, '}': 0}
+    bracket_pairs = {'(': ')', '[': ']', '{': '}'}
+    stack = []
+    new_code = ""
+    # Iterate through the code to track unmatched brackets and their positions
+    for i, char in enumerate(code):
+        if char in open_brackets:
+            stack.append(char)
+            open_brackets[char] += 1
+        elif char in close_brackets:
+            if stack and bracket_pairs[stack[-1]] == char:
+                stack.pop()  # Matching bracket
+            else:
+                # Unmatched closing bracket found, but we need to check if it's valid
+                if stack:
+                    # If we have an unmatched opening bracket, fix it by adding the correct closing bracket
+                    new_code += bracket_pairs[stack.pop()]
+                else:
+                    # If no matching opening bracket, just skip adding the closing bracket
+                    continue
+        new_code += char
+    # Append missing closing brackets at the end
+    while stack:
+        last_char = new_code[-1]
+        # If the last character is a newline, remove it before appending the closing bracket
+        if last_char == '\n':
+            new_code = new_code[:-1]
+        new_code += bracket_pairs[stack.pop()]
+    return new_code
+# Validate and correct bracket balance
+def validate_bracket_balance(code):
+    """
+    Validates if brackets are balanced and fixes common issues.
+    """
+    stack = []
+    bracket_map = {')': '(', ']': '[', '}': '{'}
+    for i, char in enumerate(code):
+        if char in bracket_map.values():
+            stack.append(char)
+        elif char in bracket_map:
+            if stack and stack[-1] == bracket_map[char]:
+                stack.pop()
+            else:
+                code = code[:i] + '#' + code[i+1:]  # Comment out the misaligned closing bracket
+                break
+    while stack:
+        code += { '(': ')', '[': ']', '{': '}' }[stack.pop()]
+    return code
+# Add missing imports based on used functions
+def add_missing_imports(code):
+    """
+    Detect missing sympy or numpy imports based on used functions in the code.
+    Also fixes incorrect import statements like `from sympy import, pi`.
+    """
+    sympy_funcs = {
+        "cot", "sqrt", "pi", "sin", "cos", "tan", "log", "Abs", "exp",
+        "factorial", "csc", "sec", "asin", "acos", "atan", "Eq", "symbols", "Function", "Derivative"
+    }
+    # Detect function calls and existing imports
+    function_pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
+    used_functions = set(re.findall(function_pattern, code))
+    # Match 'from sympy import' statements
+    existing_imports = re.findall(r'from sympy import ([a-zA-Z_, ]+)', code)
+    # Flatten the existing imports set by splitting any comma-separated imports
+    existing_imports_set = {imp.strip() for ex_imp in existing_imports for imp in ex_imp.split(',')}
+    # Find which sympy functions are required but not yet imported
+    required_imports = used_functions.intersection(sympy_funcs) - existing_imports_set
+    # If there are required imports, we will just add them on top of the existing imports
+    if required_imports:
+        # Consolidate all imports into one line, without adding duplicate imports
+        import_statement = f"from sympy import {', '.join(sorted(existing_imports_set | required_imports))}\n"
+        # Remove the current sympy imports with a consolidated import statement
+        code = re.sub(r'from sympy import [a-zA-Z_, ]+\n', '', code)
+        code = import_statement + code
+    # Fully remove incorrect import statements (like `from sympy import, pi`)
+    code = re.sub(r'from sympy import,\s*.*\n', '', code)
+    # Add numpy import if necessary
+    if "np." in code and "import numpy as np" not in code:
+        code = "import numpy as np\n" + code
+    return code
+# Enhanced removal of evalf() calls, handling malformed cases
+def remove_evalf(code):
+    """
+    Remove all occurrences of .evalf() from the code, including cases where it's misplaced or malformed.
+    """
+    # Remove evalf calls in a more comprehensive way
+    code = re.sub(r'\.evalf\(\)', '', code)  # Regular evalf calls
+    code = re.sub(r'\*evalf\(\)', '', code)  # Cases like `*evalf()`
+    # Ensure parentheses remain balanced even after removing evalf()
+    code = fix_unmatched_brackets(code)
+    return code
+def handle_sum_errors(code):
+    """
+    Detects and fixes cases where `sum()` is applied to non-iterable objects.
+    """
+    # Regex to detect invalid use of sum
+    invalid_sum_pattern = r'sum\(([^()]+)\)'
+    # Replace invalid sum usage with the content inside the sum (since it's non-iterable)
+    code = re.sub(invalid_sum_pattern, r'\1', code)
+    return code
+def complete_try_catch_block(code):
+    """
+    Ensure that the try block in the code is followed by a valid except block.
+    If missing, a generic except block will be added.
+    """
+    # Check if there's a 'try' block without an 'except' block
+    if 'try:' in code and 'except' not in code:
+        # Add a generic except block to catch any exceptions
+        code = re.sub(r'try:', r'try:\n        pass\n    except Exception as e:\n        print(f"Error: {e}")', code)
+    return code
+import re
+def remove_extra_variables_from_function(code):
+    """
+    Remove extra variables from the function definition list of arguments
+    that are not used in the function body.
+    """
+    # Find the function definition
+    match = re.search(r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)\((.*?)\):', code)
+    if match:
+        func_name = match.group(1)
+        arg_list = match.group(2).split(',')
+        arg_list = [arg.strip() for arg in arg_list]  # Clean up spaces
+        # Get the body of the function (everything after the definition)
+        func_body = code.split(':', 1)[1]
+        # Find which variables are actually used in the function body
+        used_vars = set(re.findall(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b', func_body))
+        # Filter out only the arguments that are actually used in the function body
+        filtered_args = [arg for arg in arg_list if arg in used_vars]
+        # Reconstruct the function definition with only the used arguments
+        new_func_def = f"def {func_name}({', '.join(filtered_args)}):"
+        # Replace the old function definition with the new one
+        code = re.sub(r'def\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(.*?\):', new_func_def, code)
+    return code
+# Post-process the generated code
+def post_process_code(code):
+    code = fix_unmatched_brackets(code)
+    code = validate_bracket_balance(code)
+    code = add_missing_imports(code)
+    code = remove_evalf(code)
+    code = handle_sum_errors(code)
+    code = complete_try_catch_block(code)
+    code = remove_extra_variables_from_function(code)
+    return code
+# Generate the final code from LaTeX
+def generate_code(latex_expression, max_length=512):
+    inputs = tokenizer(f"Latex Expression: {latex_expression} Solution:", return_tensors="pt").to("cuda")
+    outputs = model.generate(**inputs, max_length=max_length)
+    generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    post_processed_code = post_process_code(generated_code)
+    return post_processed_code
 # Streamlit app layout
 st.title("LaTeX to Python Code Generator")