Spaces:

Coffee-Gym
/

COFFEE-DEMO

Sleeping

File size: 6,144 Bytes

26d32ae

import streamlit as st
from langchain_community.llms import OpenAI
import argparse
from datasets import load_dataset
import yaml
from tqdm import tqdm
import re



def load_data(split="test"):
    data = load_dataset("bigcode/humanevalpack")
    print("=========== dataset statistics ===========")
    print(len(data[split]))
    print("==========================================")
    return data[split]



def split_function_header_and_docstring(s):
    # pattern = re.compile(r'\"\"\"(.*?)\"\"\"', re.DOTALL)
    pattern = re.compile(r"(\"\"\"(.*?)\"\"\"|\'\'\'(.*?)\'\'\')", re.DOTALL)
    match = pattern.findall(s)
    if match:
        # docstring = match.group(-1)
        docstring = match[-1][0]
        code_without_docstring = s.replace(docstring, "").replace('"' * 6, "").strip()
        docstring = docstring.replace('"', "")
    else:
        raise ValueError
    return code_without_docstring, docstring


def prepare_model_input(code_data):
    prompt = """Provide feedback on the errors in the given code and suggest the
    correct code to address the described problem.

Problem Description:
{description}

Incorrect Code:
{wrong_code}"""

    description = code_data["prompt"]
    function_header, docstring = split_function_header_and_docstring(description)
    problem = docstring.split(">>>")[0]

    wrong_code = function_header + code_data["buggy_solution"]
    template_dict = {"function_header": function_header, "description": problem, "wrong_code": wrong_code}
    model_input = prompt.format(**template_dict)
    return model_input, problem, function_header


def load_and_prepare_data():
    dataset = load_data()
    all_model_inputs = {} 
    print("### load and prepare data")
    for data in tqdm(dataset):
        problem_id = data['task_id'] 
        buggy_solution = data['buggy_solution']
        model_input, problem, function_header = prepare_model_input(data)
        new_model_input =f"Provide feedback on the errors in the given code and suggest the correct code to address the described problem.\nProblem Description:{problem}\nIncorrect Code:\n{buggy_solution}\nFeedback:"
        # data["header"] = function_header
        all_model_inputs[problem_id] = {
                "model_input": new_model_input,
                "header": function_header,
                "problem_description": problem,
                "data": data
            }
    return all_model_inputs

 


dataset = load_dataset("bigcode/humanevalpack", split='test',  trust_remote_code=True)  # Ensuring consistent split usage

problem_ids = [problem['task_id'] for problem in dataset]
all_model_inputs = load_and_prepare_data()


# Initialize with dummy ports for demonstration purposes here
parser = argparse.ArgumentParser()
parser.add_argument("--editor_port", type=str, default="6000")
parser.add_argument("--critic_port", type=str, default="6001")

# Assuming args are passed via command line interface
args = parser.parse_args()

# Initialize Langchain LLMs for our models (please replace 'your_api_key' with actual API keys)
editor_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-editor", api_key="EMPTY", openai_api_base=f"https://editor.jp.ngrok.io/v1")
# critic_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-critic", api_key="EMPTY", openai_api_base=f"http://localhost:{args.critic_port}/v1")

critic_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-critic", api_key="EMPTY", openai_api_base=f"https://critic.jp.ngrok.io/v1")

st.title("Demo for COFFEEPOTS")

selected_task_id = st.selectbox("Select a problem ID:", problem_ids)

# Retrieve selected problem details
problem_details = dataset[problem_ids.index(selected_task_id)]

st.write(f"**Selected Problem ID:** {problem_details['task_id']}")
st.write(f"**Problem Description:**\n{all_model_inputs[selected_task_id]['problem_description']}")
# Display buggy code with syntax highlighting
st.code(problem_details['buggy_solution'], language='python')

status_text = st.empty()
code_output = st.code("", language="python")

def generate_feedback():

    return critic_model.stream(input=f"{all_model_inputs[selected_task_id]['model_input']}", logit_bias=None)
    # feedback = output.generations[0][0].text
    # return feedback
# def generate_corrected_code():
#     return  "```python"+editor_model.stream(input=f"Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}", logit_bias=None)
def generate_corrected_code():
    # Stream output from the editor model
    yield "```python"
    for text_chunk in editor_model.stream(input=f"[INST]Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}[/INST]", logit_bias=None):
        yield  text_chunk  # Assuming each chunk is part of the final code
    
    yield "```"
        # time.sleep(0.02)  # Simulate processing delay; Adjust timing as necessary


if st.button("Generate Feedback and Corrected Code"):
    # Example of generating feedback and corrected code (replace these with actual model calls)
    with st.spinner("Generating feedback..."):
        # Simulate API call to critic_model
        print(f"model input for critic:")
        print(all_model_inputs[selected_task_id]['model_input'])
        # output = critic_model.generate(prompts=[f"{all_model_inputs[selected_task_id]['model_input']}"], logit_bias=None)
        # feedback = output.generations[0][0].text
        # print(feedback)
        # feedback = "dummy feedback"
        
        # status_text.markdown(f"{feedback}")
        feedback = status_text.write_stream(generate_feedback())
        # status_text.code(f"{feedback}", language='python')
    
    with st.spinner("Generating corrected code..."):
        # Simulate API call to editor_model
        # output = editor_model.generate(prompts=[f"Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}"], logit_bias=None)
        # corrected_code = output.generations[0][0].text
        # print(corrected_code)
        # corrected_code = "dummy code"
        # st.write("**Corrected Code:**")
        corrected_code = code_output.write_stream(generate_corrected_code())
        # code_output.code(corrected_code, language='python')