File size: 4,238 Bytes
138bd73
bc93368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e32564e
bc93368
 
 
 
 
 
138bd73
bc93368
 
 
 
 
 
 
fd5c68e
bc93368
138bd73
bc93368
 
 
 
 
fd5c68e
bc93368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import spaces
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
import torch
import gradio as gr
import random
from textwrap import wrap

def wrap_text(text, width=90):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

@spaces.GPU
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
    # Combine user input and system prompt
    formatted_input = f"<s> [INST] {example_instruction} [/INST] {example_answer}</s> [INST] {system_prompt} [/INST]"

    # Encode the input text
    encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
    model_inputs = encodeds.to(device)

    # Generate a response using the model
    output = model.generate(
        **model_inputs,
        max_length=max_length,
        use_cache=True,
        early_stopping=True,
        bos_token_id=model.config.bos_token_id,
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.eos_token_id,
        temperature=0.1,
        do_sample=True
    )

    # Decode the response
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return response_text

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Use the base model's ID
model_id = "SuperAGI/SAM"

tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = 'left'

# Specify the configuration class for the model
#model_config = AutoConfig.from_pretrained(base_model_id)

model = AutoModelForCausalLM.from_pretrained(model_id , torch_dtype=torch.float16 , device_map= "auto" )


class ChatBot:
    def __init__(self):
        # Initialize the ChatBot class with an empty history
        self.history = []

    def predict(self, user_input, system_prompt="You are an expert medical analyst:" , example_instruction="produce a json", example_answer = "please dont make small talk "):
        # Combine the user's input with the system prompt
        formatted_input = f"<s> [INST] {example_instruction} [/INST] {example_answer}</s> [INST] {system_prompt} [/INST]"

        # Encode the formatted input using the tokenizer
        user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")

        # Generate a response using the PEFT model
        response = model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)

        # Decode the generated response to text
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
        
        return response_text  # Return the generated response

bot = ChatBot()

title = "🚀👋🏻Welcome to Tonic's🤖SuperAGI/SAM Chat🚀"
description = "SAM is an Agentic-Native LLM that excels at complex reasoning. You can use this Space to test out the current model [Tonic/superagi-sam](https://huggingface.co/Tonic/superagi-sam) or duplicate this Space and use it locally or on 🤗HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
examples = [["[Question:] What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]


def main():
    with gr.Blocks() as demo:
        gr.Markdown(title)
        gr.Markdown(description)
        with gr.Row():
            with gr.Column():
                example_instruction = gr.Textbox(label="Example Instruction")
                example_answer = gr.Textbox(label="Example Answer")
            with gr.Column():
                user_input = gr.Textbox(label="Your Question")
                system_prompt = gr.Textbox(label="System Prompt", value="You are an expert medical analyst:")
        submit_btn = gr.Button("Submit")
        output = gr.Textbox(label="Response")

        submit_btn.click(
            fn=bot.predict,
            inputs=[example_instruction, example_answer, user_input, system_prompt],
            outputs=output
        )

    demo.launch()

if __name__ == "__main__":
    main()