File size: 4,679 Bytes
6ebcdab
 
 
 
55db529
 
3dc4061
3202d1b
3dc4061
 
 
 
 
 
3202d1b
3dc4061
3202d1b
3dc4061
3202d1b
3dc4061
 
 
 
3202d1b
 
3dc4061
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8867e8a
4ffc0ce
5189ab0
c30f436
3dc4061
7d0b03f
 
3dc4061
 
237d9d2
bdec0c5
bf9669d
bdec0c5
 
bf9669d
bdec0c5
81395fc
7f45f98
5b6bc45
535aa50
1b29238
237d9d2
 
 
1b29238
3202d1b
 
 
1b29238
81395fc
3202d1b
1b29238
81395fc
1b29238
 
81395fc
1b29238
 
 
237d9d2
1b29238
483b4b0
464d746
1b29238
b01335d
1b29238
b01335d
 
 
 
3202d1b
b01335d
 
 
 
1b29238
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import random
from textwrap import wrap

# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
    """
    Generates text using a large language model, given a user input and a system prompt.
    Args:
        user_input: The user's input text to generate a response for.
        system_prompt: Optional system prompt.
    Returns:
        A string containing the generated text.
    """
    # Combine user input and system prompt
    formatted_input = f"<s>[INST]{system_prompt} {user_input}[/INST]"

    # Encode the input text
    encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
    model_inputs = encodeds.to(device)

    # Generate a response using the model
    output = model.generate(
        **model_inputs,
        max_length=max_length,
        use_cache=True,
        early_stopping=True,
        bos_token_id=model.config.bos_token_id,
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.eos_token_id,
        temperature=0.1,
        do_sample=True
    )

    # Decode the response
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return response_text

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Use the base model's ID
base_model_id = "mistralai/Mistral-7B-v0.1"
model_directory = "Tonic/mistralmed"

# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("Tonic/mistralmed", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# Specify the configuration class for the model
#model_config = AutoConfig.from_pretrained(base_model_id)

# Load the PEFT model with the specified configuration
#peft_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=model_config)

# Load the PEFT model
peft_config = PeftConfig.from_pretrained("Tonic/mistralmed", token="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")
peft_model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "Tonic/mistralmed", token="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")
# Remove the memory function
class ChatBot:
    def __init__(self):
        self.history = []

    def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
        # Combine user input and system prompt
        formatted_input = f"<s>[INST]{system_prompt} {user_input}[/INST]"

        # Encode user input
        user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")

        # Generate a response using the PEFT model
        response = peft_model.generate(input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)

        # Decode and return the response
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
        return response_text

bot = ChatBot()

title = "๐Ÿ‘‹๐Ÿปํ† ๋‹‰์˜ ๋ฏธ์ŠคํŠธ๋ž„๋ฉ”๋“œ ์ฑ„ํŒ…์— ์˜ค์‹  ๊ฒƒ์„ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค๐Ÿš€๐Ÿ‘‹๐ŸปWelcome to Tonic's MistralMed Chat๐Ÿš€"
description = "์ด ๊ณต๊ฐ„์„ ์‚ฌ์šฉํ•˜์—ฌ ํ˜„์žฌ ๋ชจ๋ธ์„ ํ…Œ์ŠคํŠธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. [(Tonic/MistralMed)](https://huggingface.co/Tonic/MistralMed) ๋˜๋Š” ์ด ๊ณต๊ฐ„์„ ๋ณต์ œํ•˜๊ณ  ๋กœ์ปฌ ๋˜๋Š” ๐Ÿค—HuggingFace์—์„œ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. [Discord์—์„œ ํ•จ๊ป˜ ๋งŒ๋“ค๊ธฐ ์œ„ํ•ด Discord์— ๊ฐ€์ž…ํ•˜์‹ญ์‹œ์˜ค](https://discord.gg/VqTxc76K3u). You can use this Space to test out the current model [(Tonic/MistralMed)](https://huggingface.co/Tonic/MistralMed) or duplicate this Space and use it locally or on ๐Ÿค—HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
examples = [["[Question:] What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]

iface = gr.Interface(
    fn=bot.predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "text"],  # Take user input and system prompt separately
    outputs="text",
    theme="ParityError/Anime"
)

iface.launch()