File size: 6,068 Bytes
026b316
 
 
79a8be6
 
 
 
 
 
026b316
79a8be6
 
 
 
 
 
 
 
026b316
79a8be6
 
026b316
79a8be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
026b316
79a8be6
026b316
 
 
 
79a8be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30b9ad3
79a8be6
30b9ad3
 
 
 
 
 
 
79a8be6
 
 
30b9ad3
79a8be6
 
 
30b9ad3
 
79a8be6
 
 
 
 
026b316
79a8be6
 
 
 
 
30b9ad3
79a8be6
026b316
 
79a8be6
 
 
 
026b316
79a8be6
 
 
 
 
 
026b316
79a8be6
 
 
 
 
026b316
79a8be6
 
 
 
 
026b316
79a8be6
 
026b316
79a8be6
 
 
026b316
79a8be6
 
026b316
 
79a8be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
026b316
79a8be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
026b316
79a8be6
026b316
 
 
 
 
 
 
 
 
0a9a54a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import gradio as gr
import json
import subprocess
import logging
import sys
import io
import threading
import queue
from contextlib import redirect_stdout, redirect_stderr

class LiveLogHandler(logging.Handler):
    def __init__(self, log_queue):
        super().__init__()
        self.log_queue = log_queue

    def emit(self, record):
        msg = self.format(record)
        self.log_queue.put(msg)


def run_pipeline(pdf_file, system_prompt, max_step, learning_rate, epochs, model_name):
    try:
        # Construct job input
        data = {
            "input": {
                "pdf_file": pdf_file.name,
                "system_prompt": system_prompt,
                "max_step": max_step,
                "learning_rate": learning_rate,
                "epochs": epochs,
                "model_name": model_name
            }
        }
        
        # Print the start of pipeline to terminal
        print("Pipeline started with inputs:", json.dumps(data, indent=2))
        
        # Run the handler in a separate process
        process = subprocess.Popen(
            ['python3', 'handler.py', '--test_input', json.dumps(data)],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        
        # Read output in real-time and print to terminal
        stdout_lines = []
        stderr_lines = []
        
        for line in process.stdout:
            print(line.strip())  # Print stdout to terminal
            stdout_lines.append(line.strip())
        
        for line in process.stderr:
            print(f"ERROR: {line.strip()}")  # Print stderr to terminal
            stderr_lines.append(line.strip())
        
        # Wait for process to complete
        process.wait()
        
        # Try to extract JSON result
        handler_output = None
        for line in stdout_lines:
            try:
                parsed_line = json.loads(line)
                if isinstance(parsed_line, dict) and "status" in parsed_line:
                    handler_output = parsed_line
                    break
            except json.JSONDecodeError:
                continue
        
        # Prepare result
        if handler_output and handler_output.get("status") == "success":
            return {
                "model_name": handler_output.get("model_name", "N/A"),
                "processing_time": handler_output.get("processing_time", "N/A"),
                "evaluation_results": handler_output.get("evaluation_results", {})
            }
        else:
            return {
                "status": "error", 
                "details": handler_output or "No valid output received"
            }
    
    except Exception as e:
        print(f"Pipeline execution error: {str(e)}")
        return {
            "status": "error", 
            "details": str(e)
        }

# Define Gradio interface with dark theme and light blue buttons
with gr.Blocks(css='''
    .gradio-container {
        background-color: #121212;
        color: #e0e0e0;
        max-width: 800px;
        margin: 0 auto;
    }
    .custom-input {
        min-height: 50px;
        height: auto;
        background-color: #1e1e1e !important;
        color: #e0e0e0 !important;
        border: 1px solid #333 !important;
    }
    .custom-input textarea, 
    .custom-input input, 
    .custom-input .upload-icon {
        background-color: #1e1e1e !important;
        color: #e0e0e0 !important;
    }
    .gr-button {
        background-color: #87CEFA !important; /* Light Sky Blue */
        color: #121212 !important;
        border: none !important;
        font-weight: bold !important;
    }
    .gr-button:hover {
        background-color: #87CEEB !important; /* Slightly different light blue on hover */
    }
    .gr-form {
        background-color: #1e1e1e !important;
        border-color: #333 !important;
    }
    .gr-label {
        color: #87CEFA !important; /* Light Sky Blue for labels */
    }
''') as demo:
    # Add Heading at the top, centered and light blue
    title = "🤖 Fine-tuning Pipeline Configurator"
    header_style = "color: #87CEFA;"  # Light Sky Blue

    html_content = f"""
    <div style="text-align: center;">
        <h2><span style="{header_style}">{title}</span></h2>
    </div>
    """

    gr.Markdown(html_content)
    
    with gr.Column(scale=1):
        # System Prompt with consistent styling
        system_prompt = gr.Textbox(
            label="System Prompt", 
            placeholder="Enter system instructions", 
            lines=4,
            elem_classes=['custom-input'],
            value="You are a helpful assistant that provides detailed information based on the provided text."
        )

        # Numeric and model name inputs in a row with consistent styling
        with gr.Row():
            # PDF File Upload
            pdf_file = gr.File(
                label="Upload PDF", 
                file_types=[".pdf"], 
                elem_classes=['custom-input']
            )

            max_step = gr.Number(
                label="Max Steps", 
                value=150, 
                elem_classes=['custom-input']
            )
            learning_rate = gr.Number(
                label="Learning Rate", 
                value=2e-4, 
                elem_classes=['custom-input']
            )
            epochs = gr.Number(
                label="Epochs", 
                value=10, 
                elem_classes=['custom-input']
            )
            model_name = gr.Textbox(
                label="Model Name", 
                placeholder="Enter model name",
                elem_classes=['custom-input']
            )

    # Results and Run Button
    result_output = gr.JSON(label="Pipeline Results")
    run_button = gr.Button("Run Pipeline", variant="primary")

    # Trigger the function when the button is clicked
    run_button.click(
        run_pipeline,
        inputs=[pdf_file, system_prompt, max_step, learning_rate, epochs, model_name],
        outputs=[result_output]
    )

# Run Gradio app
demo.launch()