Spaces:
Build error
Build error
!wget -q https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl | |
!BUILD_CUDA_EXT=0 pip install -qqq auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl --progress-bar off | |
import gradio as gr | |
from auto_gptq import AutoGPTQForCausalLM | |
from transformers import AutoTokenizer, TextStreamer | |
title = "Npradhaph" | |
examples = [ | |
["The tower is 324 metres (1,063 ft) tall,"], | |
["The Moon's orbit around Earth has"], | |
["The smooth Borealis basin in the Northern Hemisphere covers 40%"], | |
] | |
# Load the trained model | |
model_path = "huggingface/pradhaph/medical-falcon-7b" | |
model = AutoGPTQForCausalLM.from_quantized( | |
model_path, | |
revision="main", | |
# revision="gptq-8bit-128g-actorder_True", | |
model_basename="model", | |
use_safetensors=True, | |
trust_remote_code=True, | |
inject_fused_attention=False, | |
device_map="cuda", | |
quantize_config=None, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) | |
# Define the input and output interfaces | |
def answer_question(context): | |
# Generate an answer based on the context | |
inputs = tokenizer(context, return_tensors="pt", max_length=512, truncation=True) | |
outputs = model.generate(**inputs, max_length=200, num_return_sequences=1) | |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return answer | |
# Run the interface | |
iface = gr.Interface( | |
fn=answer_question, | |
inputs="text", | |
outputs="text", | |
title="Question Answering with GPT", | |
description="Enter a context to get an answer." | |
) | |
# demo = gr.load( | |
# "huggingface/pradhaph/medical-falcon-7b", | |
# inputs=gr.Textbox(lines=5, max_lines=6, label="Input Text"), | |
# title=title, | |
# examples=examples, | |
# trust_remote_code=True, | |
# ) | |
if __name__ == "__main__": | |
iface.launch() | |
# demo.launch() |