File size: 2,476 Bytes
ca6c024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gradio as gr
import pandas as pd
import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

def process_file(file):
    global df
    df = pd.read_csv(file.name)
    return df.columns.tolist()

def run_2sls(dependent_var, endogenous_vars, instruments, exogenous_vars):
    if not all([dependent_var, endogenous_vars, instruments]):
        return "Error: Please select all required variables."
    
    endogenous_vars = endogenous_vars.split(",")
    instruments = instruments.split(",")
    exogenous_vars = exogenous_vars.split(",") if exogenous_vars else []
    
    if len(instruments) < len(endogenous_vars):
        return "Error: The number of instruments must be at least equal to the number of endogenous variables."
    
    try:
        df_selected = df[[dependent_var] + endogenous_vars + instruments + exogenous_vars].dropna()
    
        # First stage
        predicted_vars = []
        for var in endogenous_vars:
            first_stage_formula = f'{var} ~ ' + ' + '.join(instruments + exogenous_vars)
            first_stage = smf.ols(first_stage_formula, data=df_selected).fit()
            df_selected[f'{var}_hat'] = first_stage.fittedvalues
            predicted_vars.append(f'{var}_hat')
    
        # Second stage
        second_stage_formula = f'{dependent_var} ~ ' + ' + '.join(predicted_vars + exogenous_vars)
        second_stage = smf.ols(second_stage_formula, data=df_selected).fit()
        
        return second_stage.summary().as_text()
    
    except Exception as e:
        return f"Error: {str(e)}"

with gr.Blocks() as app:
    gr.Markdown("## Two-Stage Least Squares Regression (2SLS)")
    
    file_input = gr.File(label="Upload CSV File")
    column_output = gr.Label(label="Available Columns")
    file_input.change(process_file, inputs=file_input, outputs=column_output)
    
    dependent_var = gr.Dropdown(label="Dependent Variable")
    endogenous_vars = gr.Textbox(label="Endogenous Variables (comma-separated)")
    instruments = gr.Textbox(label="Instruments (comma-separated)")
    exogenous_vars = gr.Textbox(label="Exogenous Variables (comma-separated, optional)")
    
    run_button = gr.Button("Run 2SLS Regression")
    output = gr.Textbox(label="Regression Output", lines=20)
    
    run_button.click(run_2sls, inputs=[dependent_var, endogenous_vars, instruments, exogenous_vars], outputs=output)

app.launch()