File size: 1,293 Bytes
9fa2182
 
 
a206d6a
9fa2182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519fcb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
import pandas as pd

TEST_EQUATIONS = ["sin(2*x)/x + 0.1*x"]


def generate_data(s: str, num_points: int, noise_level: float, data_seed: int):
    rstate = np.random.RandomState(data_seed)
    x = rstate.uniform(-10, 10, num_points)
    for k, v in {
        "sin": "np.sin",
        "cos": "np.cos",
        "exp": "np.exp",
        "log": "np.log",
        "tan": "np.tan",
        "^": "**",
    }.items():
        s = s.replace(k, v)
    y = eval(s)
    noise = rstate.normal(0, noise_level, y.shape)
    y_noisy = y + noise
    return pd.DataFrame({"x": x}), y_noisy


def read_csv(file_input: str, force_run: bool):
    # Look at some statistics of the file:
    df = pd.read_csv(file_input)
    if len(df) == 0:
        raise ValueError("The file is empty!")
    if len(df.columns) == 1:
        raise ValueError("The file has only one column!")
    if len(df) > 10_000 and not force_run:
        raise ValueError(
            "You have uploaded a file with more than 10,000 rows. "
            "This will take very long to run. "
            "Please upload a subsample of the data, "
            "or check the box 'Ignore Warnings'.",
        )

    col_to_fit = df.columns[-1]
    y = np.array(df[col_to_fit])
    X = df.drop([col_to_fit], axis=1)

    return X, y