Spaces:

sabssag
/

Latex_to_Python_CodeT5-base

Runtime error

App Files Files Community

Latex_to_Python_CodeT5-base / app.py

sabssag

Update app.py

e6bfaba verified 10 months ago

raw

history blame contribute delete

8.98 kB

	import streamlit as st
	import torch
	from transformers import T5ForConditionalGeneration, RobertaTokenizer
	import re
	import ast

	# Load the fine-tuned model and tokenizer
	model_repo_path = 'sabssag/Latex_to_Python_CodeT5-base'
	model = T5ForConditionalGeneration.from_pretrained(model_repo_path, torch_dtype=torch.float16)
	tokenizer = RobertaTokenizer.from_pretrained(model_repo_path)
	model.eval()

	# Fix unmatched brackets
	def fix_unmatched_brackets(code):
	"""
	Fix unmatched brackets in the code by ensuring that all opening brackets have corresponding closing brackets,
	and ensure that newline characters are handled correctly when adding missing brackets.
	"""
	open_brackets = {'(': 0, '[': 0, '{': 0}
	close_brackets = {')': 0, ']': 0, '}': 0}
	bracket_pairs = {'(': ')', '[': ']', '{': '}'}

	stack = []
	new_code = ""

	# Iterate through the code to track unmatched brackets and their positions
	for i, char in enumerate(code):
	if char in open_brackets:
	stack.append(char)
	open_brackets[char] += 1
	elif char in close_brackets:
	if stack and bracket_pairs[stack[-1]] == char:
	stack.pop() # Matching bracket
	else:
	# Unmatched closing bracket found, but we need to check if it's valid
	if stack:
	# If we have an unmatched opening bracket, fix it by adding the correct closing bracket
	new_code += bracket_pairs[stack.pop()]
	else:
	# If no matching opening bracket, just skip adding the closing bracket
	continue
	new_code += char

	# Append missing closing brackets at the end
	while stack:
	last_char = new_code[-1]
	# If the last character is a newline, remove it before appending the closing bracket
	if last_char == '\n':
	new_code = new_code[:-1]
	new_code += bracket_pairs[stack.pop()]

	return new_code
	# Validate and correct bracket balance
	def validate_bracket_balance(code):
	"""
	Validates if brackets are balanced and fixes common issues.
	"""
	stack = []
	bracket_map = {')': '(', ']': '[', '}': '{'}

	for i, char in enumerate(code):
	if char in bracket_map.values():
	stack.append(char)
	elif char in bracket_map:
	if stack and stack[-1] == bracket_map[char]:
	stack.pop()
	else:
	code = code[:i] + '#' + code[i+1:] # Comment out the misaligned closing bracket
	break

	while stack:
	code += { '(': ')', '[': ']', '{': '}' }[stack.pop()]

	return code

	# Add missing imports based on used functions
	def add_missing_imports(code):
	"""
	Detect missing sympy or numpy imports based on used functions in the code.
	Also fixes incorrect import statements like `from sympy import, pi`.
	"""
	sympy_funcs = {
	"cot", "sqrt", "pi", "sin", "cos", "tan", "log", "Abs", "exp",
	"factorial", "csc", "sec", "asin", "acos", "atan", "Eq", "symbols", "Function", "Derivative"
	}

	# Detect function calls and existing imports
	function_pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
	used_functions = set(re.findall(function_pattern, code))

	# Match 'from sympy import' statements
	existing_imports = re.findall(r'from sympy import ([a-zA-Z_, ]+)', code)

	# Flatten the existing imports set by splitting any comma-separated imports
	existing_imports_set = {imp.strip() for ex_imp in existing_imports for imp in ex_imp.split(',')}

	# Find which sympy functions are required but not yet imported
	required_imports = used_functions.intersection(sympy_funcs) - existing_imports_set

	# If there are required imports, we will just add them on top of the existing imports
	if required_imports:
	# Consolidate all imports into one line, without adding duplicate imports
	import_statement = f"from sympy import {', '.join(sorted(existing_imports_set \| required_imports))}\n"

	# Remove the current sympy imports with a consolidated import statement
	code = re.sub(r'from sympy import [a-zA-Z_, ]+\n', '', code)
	code = import_statement + code

	# Fully remove incorrect import statements (like `from sympy import, pi`)
	code = re.sub(r'from sympy import,\s.\n', '', code)

	# Add numpy import if necessary
	if "np." in code and "import numpy as np" not in code:
	code = "import numpy as np\n" + code

	return code
	# Enhanced removal of evalf() calls, handling malformed cases
	def remove_evalf(code):
	"""
	Remove all occurrences of .evalf() from the code, including cases where it's misplaced or malformed.
	"""
	# Remove evalf calls in a more comprehensive way
	code = re.sub(r'\.evalf\(\)', '', code) # Regular evalf calls
	code = re.sub(r'\evalf\(\)', '', code) # Cases like `evalf()`

	# Ensure parentheses remain balanced even after removing evalf()
	code = fix_unmatched_brackets(code)

	return code

	def handle_sum_errors(code):
	"""
	Detects and fixes cases where `sum()` is applied to non-iterable objects.
	"""
	# Regex to detect invalid use of sum
	invalid_sum_pattern = r'sum\(([^()]+)\)'

	# Replace invalid sum usage with the content inside the sum (since it's non-iterable)
	code = re.sub(invalid_sum_pattern, r'\1', code)

	return code

	def complete_try_catch_block(code):
	"""
	Ensure that the try block in the code is followed by a valid except block.
	If missing, a generic except block will be added.
	"""
	# Check if there's a 'try' block without an 'except' block
	if 'try:' in code and 'except' not in code:
	# Add a generic except block to catch any exceptions
	code = re.sub(r'try:', r'try:\n pass\n except Exception as e:\n print(f"Error: {e}")', code)
	return code

	import re

	def remove_extra_variables_from_function(code):
	"""
	Remove extra variables from the function definition list of arguments
	that are not used in the function body.
	"""

	# Find the function definition
	match = re.search(r'def\s+([a-zA-Z_][a-zA-Z0-9_])\((.?)\):', code)

	if match:
	func_name = match.group(1)
	arg_list = match.group(2).split(',')
	arg_list = [arg.strip() for arg in arg_list] # Clean up spaces

	# Get the body of the function (everything after the definition)
	func_body = code.split(':', 1)[1]

	# Find which variables are actually used in the function body
	used_vars = set(re.findall(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b', func_body))

	# Filter out only the arguments that are actually used in the function body
	filtered_args = [arg for arg in arg_list if arg in used_vars]

	# Reconstruct the function definition with only the used arguments
	new_func_def = f"def {func_name}({', '.join(filtered_args)}):"

	# Replace the old function definition with the new one
	code = re.sub(r'def\s+[a-zA-Z_][a-zA-Z0-9_]\s\(.*?\):', new_func_def, code)

	return code

	# Post-process the generated code
	def post_process_code(code):
	code = fix_unmatched_brackets(code)
	code = validate_bracket_balance(code)
	code = add_missing_imports(code)
	code = remove_evalf(code)
	code = handle_sum_errors(code)
	code = complete_try_catch_block(code)
	code = remove_extra_variables_from_function(code)
	return code

	# Generate the final code from LaTeX
	def generate_code(latex_expression, max_length=512):
	inputs = tokenizer(f"Latex Expression: {latex_expression} Solution:", return_tensors="pt")
	outputs = model.generate(**inputs, max_length=max_length)
	generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
	post_processed_code = post_process_code(generated_code)
	return post_processed_code




	# Streamlit app layout
	st.title("LaTeX to Python Code Generator")

	# Define session state keys
	if 'latex_expr' not in st.session_state:
	st.session_state.latex_expr = ""

	# User input for LaTeX expression
	latex_input = st.text_area("Enter the LaTeX Expression", value=st.session_state.latex_expr, height=150)

	# Update session state with the new LaTeX expression
	if st.button("Generate Code"):
	if latex_input:
	st.session_state.latex_expr = latex_input
	with st.spinner("Generating Python Code..."):
	try:
	# Correct function name here
	generated_code = generate_code(latex_expression=st.session_state.latex_expr)
	# Display the generated code
	st.subheader("Generated Python Code")
	st.code(generated_code, language='python')
	except Exception as e:
	st.error(f"Error during code generation: {e}")
	else:
	st.warning("Please enter a LaTeX expression to generate Python code.")