cris177's picture
Update app.py
ab7670e verified
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
## Download the GGUF model
model_name = "cris177/Qwen2-Simple-Arguments"
model_file = "Qwen2_arguments.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
model_path = hf_hub_download(model_name, filename=model_file)
## Instantiate model from downloaded file
llm = Llama(
model_path=model_path,
n_ctx=2000, # Context length to use
n_threads=2, # Number of CPU threads to use
n_gpu_layers=0 # Number of model layers to offload to GPU
)
def analyze_argument(argument):
instruction = 'Based on the following argument, identify the following elements: premises, conclusion, propositions, type of argument, negation of propositions and validity.'
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:"""
prompt = alpaca_prompt.format(instruction, argument)
output = llm(prompt, max_tokens=1000)['choices'][0]['text'].strip()
return output
description = """This tool analyzes simple arguments, that is, arguments composed of at most two propositions.
It applies the fine-tuned LLM from https://huggingface.co/cris177/Qwen2-Simple-Arguments
For faster inference we use the 4-bit quantization model https://huggingface.co/cris177/Qwen2-Simple-Arguments/resolve/main/Qwen2_arguments.Q4_K_M.gguf.
It requires only 3 GB of RAM, and runs on just 2 vCPUs (which causes it to run somewhat slowly in this demo).
"""
gr.Interface(analyze_argument, inputs="text", outputs="text",
title="Simple Arguments Analyzer",
description=description,
examples=[["If it's wednesday it's cold, and it's cold, therefore it's wednesday."]]).launch()