|
import os |
|
import bitsandbytes as bnb |
|
import torch |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
|
|
access_token = os.environ["GATED_ACCESS_TOKEN"] |
|
|
|
quantization_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_compute_dtype="float16", |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", quantization_config=True, device_map="auto", token=access_token) |
|
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_text(prompt): |
|
text = prompt |
|
inputs = tokenizer(text, return_tensors="pt") |
|
|
|
outputs = model.generate(**inputs, max_new_tokens=20) |
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=[ |
|
gr.inputs.Textbox(lines=5, label="Input Prompt"), |
|
], |
|
outputs=gr.outputs.Textbox(label="Generated Text"), |
|
title="MixTRAL 8x22B Text Generation", |
|
description="Use this interface to generate text using the MixTRAL 8x22B language model.", |
|
) |
|
|
|
|
|
iface.launch() |