Spaces:

jijivski
/

FreshBench

Build error

File size: 5,632 Bytes

import gradio as gr
import os
# from transformers import AutoTokenizer
os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness')
os.system('cd lm-evaluation-harness')
os.system('pip install -e .')
# 第一个功能：基于输入文本和对应的损失值对文本进行着色展示
def color_text(text_list=["hi", "FreshEval"], loss_list=[0.1,0.7]):
    """
    根据损失值为文本着色。
    """
    highlighted_text = []
    for text, loss in zip(text_list, loss_list):
        # color = "#FF0000" if float(loss) > 0.5 else "#00FF00"
        color=loss
        # highlighted_text.append({"text": text, "bg_color": color})
        highlighted_text.append((text, color))

        print(highlighted_text)
    return highlighted_text

# 第二个功能：根据 ID 列表和 tokenizer 将 ID 转换为文本，并展示
def get_text(ids_list=[0.1,0.7], tokenizer=None):
    """
    给定一个 ID 列表和 tokenizer 名称，将这些 ID 转换成文本。
    """
    return ['Hi', 'Adam']
    # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    # text = tokenizer.decode(eval(ids_list), skip_special_tokens=True)
    # 这里只是简单地返回文本，但是可以根据实际需求添加颜色或其他样式
    # return text


def get_ids_loss(text, tokenizer, model):
    """
    给定一个文本，model and its tokenizer,返回其对应的 IDs 和损失值。
    """
    # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    # model = AutoModelForCausalLM.from_pretrained(model_name)
    # 这里只是简单地返回 IDs 和损失值，但是可以根据实际需求添加颜色或其他样式
    return [1, 2], [0.1, 0.7]


def color_pipeline(text=["hi", "FreshEval"],  model=None):
    """
    给定一个文本，返回其对应的着色文本。
    """
    tokenizer=None # get tokenizer
    ids, loss = get_ids_loss(text, tokenizer, model)
    text = get_text(ids, tokenizer)
    return color_text(text, loss)


# TODO can this be global ? maybe need session to store info of the user

# 创建 Gradio 界面
with gr.Blocks() as demo:
    with gr.Tab("color your text"):
        with gr.Row():
            text_input = gr.Textbox(label="input text", placeholder="input your text here...")
            # TODO craw and drop the file

            # loss_input = gr.Number(label="loss")
            model_input = gr.Textbox(label="model name", placeholder="input your model name here...")
            # TODO select models that can be used online
            # TODO maybe add our own models


            color_text_output = gr.HTML(label="colored text")
            # gr.Markdown("## Text Examples")
            # gr.Examples(
            #     [["hi", "Adam"], [0.1,0.7]],
            #     [text_input, loss_input],
            #     cache_examples=True,
            #     fn=color_text,
            #     outputs=color_text_output
            # )
        color_text_button = gr.Button("color the text").click(color_pipeline, inputs=[text_input, model_input], outputs=gr.HighlightedText(label="colored text"))


        date_time_input = gr.Textbox(label="the date when the text is generated")#TODO add date time input
        description_input = gr.Textbox(label="description of the text")
        submit_button = gr.Button("submit a post or record").click()
        #TODO add model and its score

    with gr.Tab('test your qeustion'):
        '''
        use extract, or use ppl
        '''
        question=gr.Textbox(placeholder='input your question here...')
        answer=gr.Textbox(placeholder='input your answer here...')
        other_choices=gr.Textbox(placeholder='input your other choices here...')
        
        test_button=gr.Button('test').click()
        #TODO add the model and its score

        def test_question(question, answer, other_choices):
            '''
            use extract, or use ppl
            '''
            answer_ppl, other_choices_ppl = get_ppl(question, answer, other_choices)
            return answer_ppl, other_choices_ppl



    with gr.Tab("model text ppl with time"):
        '''
        see the matplotlib example, to see ppl with time, select the models
        '''
        # load the json file with time,

    
    with gr.Tab("model quesion acc with time"):
        '''
        see the matplotlib example, to see ppl with time, select the models
        ''' 
        #


    with gr.Tab("hot questions"):
        '''
        see the questions and answers
        '''
        with gr.Tab("ppl"):
            '''
            see the questions
            '''
        

demo.launch(debug=True)





# import gradio as gr
# import os
# os.system('python -m spacy download en_core_web_sm')
# import spacy
# from spacy import displacy

# nlp = spacy.load("en_core_web_sm")

# def text_analysis(text):
#     doc = nlp(text)
#     html = displacy.render(doc, style="dep", page=True)
#     html = (
#         "<div style='max-width:100%; max-height:360px; overflow:auto'>"
#         + html
#         + "</div>"
#     )
#     pos_count = {
#         "char_count": len(text),
#         "token_count": 0,
#     }
#     pos_tokens = []

#     for token in doc:
#         pos_tokens.extend([(token.text, token.pos_), (" ", None)])

#     return pos_tokens, pos_count, html

# demo = gr.Interface(
#     text_analysis,
#     gr.Textbox(placeholder="Enter sentence here..."),
#     ["highlight", "json", "html"],
#     examples=[
#         ["What a beautiful morning for a walk!"],
#         ["It was the best of times, it was the worst of times."],
#     ],
# )

# demo.launch()



# # lm-eval 
# # lm-evaluation-harness