Spaces:

MilesCranmer
/

PySR_Old2

Sleeping

File size: 6,994 Bytes

import gradio as gr
import numpy as np
import pandas as pd
import pysr
import tempfile
from typing import Optional

empty_df = pd.DataFrame(
    {
        "equation": [],
        "loss": [],
        "complexity": [],
    }
)

test_equations = [
    "sin(x) + cos(2*x) + tan(x/3)",
]


def generate_data(s: str, num_points: int, noise_level: float):
    x = np.linspace(0, 10, num_points)
    for (k, v) in {
        "sin": "np.sin",
        "cos": "np.cos",
        "exp": "np.exp",
        "log": "np.log",
        "tan": "np.tan",
        "^": "**",
    }.items():
        s = s.replace(k, v)
    y = eval(s)
    noise = np.random.normal(0, noise_level, y.shape)
    y_noisy = y + noise
    return pd.DataFrame({"x": x}), y_noisy


def greet(
    file_obj: Optional[tempfile._TemporaryFileWrapper],
    test_equation: str,
    num_points: int,
    noise_level: float,
    niterations: int,
    maxsize: int,
    binary_operators: list,
    unary_operators: list,
    force_run: bool,
):
    if file_obj is not None:
        if len(binary_operators) == 0 and len(unary_operators) == 0:
            return (
                empty_df,
                "Please select at least one operator!",
            )
        # Look at some statistics of the file:
        df = pd.read_csv(file_obj)
        if len(df) == 0:
            return (
                empty_df,
                "The file is empty!",
            )
        if len(df.columns) == 1:
            return (
                empty_df,
                "The file has only one column!",
            )
        if len(df) > 10_000 and not force_run:
            return (
                empty_df,
                "You have uploaded a file with more than 10,000 rows. "
                "This will take very long to run. "
                "Please upload a subsample of the data, "
                "or check the box 'Ignore Warnings'.",
            )

        col_to_fit = df.columns[-1]
        y = np.array(df[col_to_fit])
        X = df.drop([col_to_fit], axis=1)
    else:
        X, y = generate_data(test_equation, num_points, noise_level)

    model = pysr.PySRRegressor(
        bumper=True,
        maxsize=maxsize,
        niterations=niterations,
        binary_operators=binary_operators,
        unary_operators=unary_operators,
        timeout_in_seconds=1000,
    )
    model.fit(X, y)

    df = model.equations_[["equation", "loss", "complexity"]]
    # Convert all columns to string type:
    df = df.astype(str)
    msg = (
        "Success!\n"
        f"You may run the model locally (faster) with "
        f"the following parameters:"
        + f"""
model = PySRRegressor(
    niterations={niterations},
    binary_operators={str(binary_operators)},
    unary_operators={str(unary_operators)},
    maxsize={maxsize},
)
model.fit(X, y)"""
    )

    df.to_csv("pysr_output.csv", index=False)
    return df, msg


def main():
    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                with gr.Row():
                    with gr.Tab("Example Data"):
                        # Plot of the example data:
                        example_plot = gr.ScatterPlot(
                            x="x",
                            y="y",
                            tooltip=["x", "y"],
                            x_lim=[0, 10],
                            y_lim=[-5, 5],
                            width=350,
                            height=300,
                        )
                        test_equation = gr.Radio(
                            test_equations,
                            value=test_equations[0],
                            label="Test Equation"
                        )
                        num_points = gr.Slider(
                            minimum=10,
                            maximum=1000,
                            value=100,
                            label="Number of Data Points",
                            step=1,
                        )
                        noise_level = gr.Slider(
                            minimum=0, maximum=1, value=0.1, label="Noise Level"
                        )
                    with gr.Tab("Upload Data"):
                        file_input = gr.File(label="Upload a CSV File")
                with gr.Row():
                    binary_operators = gr.CheckboxGroup(
                        choices=["+", "-", "*", "/", "^"],
                        label="Binary Operators",
                        value=["+", "-", "*", "/"],
                    )
                    unary_operators = gr.CheckboxGroup(
                        choices=[
                            "sin",
                            "cos",
                            "exp",
                            "log",
                            "square",
                            "cube",
                            "sqrt",
                            "abs",
                            "tan",
                        ],
                        label="Unary Operators",
                        value=[],
                    )
                    niterations = gr.Slider(
                        minimum=1,
                        maximum=1000,
                        value=40,
                        label="Number of Iterations",
                        step=1,
                    )
                    maxsize = gr.Slider(
                        minimum=7,
                        maximum=35,
                        value=20,
                        label="Maximum Complexity",
                        step=1,
                    )
                    force_run = gr.Checkbox(
                        value=False,
                        label="Ignore Warnings",
                    )

            with gr.Column():
                with gr.Row():
                    df = gr.Dataframe(
                        headers=["Equation", "Loss", "Complexity"],
                        datatype=["str", "number", "number"],
                    )
                    error_log = gr.Textbox(label="Error Log")
                with gr.Row():
                    run_button = gr.Button()

        run_button.click(
            greet,
            inputs=[
                file_input,
                test_equation,
                num_points,
                noise_level,
                niterations,
                maxsize,
                binary_operators,
                unary_operators,
                force_run,
            ],
            outputs=[df, error_log],
        )

        # Any update to the equation choice will trigger a replot:
        for eqn_component in [test_equation, num_points, noise_level]:
            eqn_component.change(replot, [test_equation, num_points, noise_level], example_plot)

    demo.launch()

def replot(test_equation, num_points, noise_level):
    X, y = generate_data(test_equation, num_points, noise_level)
    df = pd.DataFrame({"x": X["x"], "y": y})
    return df


if __name__ == "__main__":
    main()