File size: 954 Bytes
3e3c1ee
4550ee1
 
 
2b84e72
4550ee1
58b8a0e
4550ee1
58b8a0e
dfb6a5d
8ed0ff8
58b8a0e
1cb5b17
4550ee1
7d2c16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4550ee1
8ed0ff8
10c7bd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import time
import gradio as gr
from ctransformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("mistral-7b-instruct-v0.1.Q6_K.gguf", model_type="mistral", gpu_layers=0, context_length=2048)

history = []

def generateResponse(prompt, history):
    formattedPrompt = f"<s>[INST] {prompt} [/INST]"
    response = model(formattedPrompt, max_new_tokens=1024)
    history.append([prompt, response])
    return response

examples = ['Write a poem', 'Tell me a joke', 'Write a marketing catch phrase for an AI app']

title = "Mistral-7B-Instruct-v0.1-GGUF"

description = "This space is an attempt to run the GGUF 4 bit quantized version of 'Mistral-7B-Instruct-v0.1'."

UI = gr.ChatInterface(
    fn=generateResponse,
    examples=examples,
    title=title,
    description=description,
    submit_btn="Submit",
    stop_btn="Stop generating",
    clear_btn="Clear chat"
)

UI.queue(max_size=10, concurrency_count=16)
UI.launch()