| | import re |
| | import pandas as pd |
| | import numpy as np |
| | from sympy import sympify, Eq |
| | from sympy.parsing.sympy_parser import parse_expr |
| | from sympy.core.sympify import SympifyError |
| | from concurrent.futures import ProcessPoolExecutor |
| | import multiprocessing as mp |
| | from sympy import simplify, sympify |
| | from sympy.core.sympify import SympifyError |
| | import swifter |
| | import random |
| |
|
| | from joblib import Parallel, delayed |
| |
|
| |
|
| | from tqdm.auto import tqdm |
| |
|
| | def apply_chunk(chunk, func): |
| | """Helper function to apply a function to a chunk of data.""" |
| | return chunk.apply(func) |
| |
|
| | def parallel_apply(series, func, n_jobs=None): |
| | n_jobs = mp.cpu_count() if n_jobs is None else n_jobs |
| | |
| | chunks = np.array_split(series, n_jobs) |
| | with mp.Pool(n_jobs) as pool: |
| | |
| | results = pool.starmap(apply_chunk, [(chunk, func) for chunk in chunks]) |
| | |
| | return pd.concat(results) |
| |
|
| | def canonicalize_expr(expr, canonicalizer=simplify): |
| | canon = canonicalizer(expr) |
| | return (hash(canon), canon, expr) |
| |
|
| | def replace_constants(equation): |
| | |
| | pattern = r'(?<![\w.])(?:[-+]?\d*\.\d+|\d+)(?![\w.])' |
| | return re.sub(pattern, 'C', equation) |
| |
|
| |
|
| | def augment_expression(equation, var_prefix='x', max_index=10, p=0.5): |
| | """ |
| | 1. Replace all standalone numeric constants (including scientific notation) with 'C'. |
| | 2. For each occurrence of a variable (e.g., x_1), with probability p replace it |
| | by a randomly chosen new variable x_1…x_max_index; otherwise leave as is. |
| | """ |
| | |
| | const_pattern = r'(?<![\w.])(?:[-+]?\d*\.\d+(?:[eE][-+]?\d+)?|\d+(?:[eE][-+]?\d+)?)(?![\w.])' |
| | equation = re.sub(const_pattern, 'C', equation) |
| | |
| | |
| | var_pattern = rf'\b{var_prefix}_\d+\b' |
| | def repl(match): |
| | if random.random() < p: |
| | new_idx = random.randint(1, max_index) |
| | return f"{var_prefix}_{new_idx}" |
| | return match.group(0) |
| | |
| | return re.sub(var_pattern, repl, equation) |
| |
|
| |
|
| |
|
| | def is_valid_equation(equation_str): |
| | """Verifica se uma string representa uma expressão matemática válida para o SymPy.""" |
| | if not isinstance(equation_str, str): |
| | return False |
| | if pd.isna(equation_str) or equation_str.strip() == '': |
| | return False |
| | |
| | try: |
| | |
| | expr = parse_expr(equation_str.strip()) |
| | return True |
| | except (SympifyError, SyntaxError, ValueError, TypeError, AttributeError): |
| | print(f"Erro ao analisar a equação: {equation_str}") |
| | |
| | return False |
| |
|
| | def canonical_form(expr_str): |
| | """ |
| | Recebe uma expressão como string e retorna sua forma canônica (simplificada). |
| | """ |
| | try: |
| | |
| | canonica = simplify(expr_str).expand() |
| | return str(canonica) |
| | except SympifyError as e: |
| | return f"Erro ao interpretar a expressão: {expr_str}" |