Quanttum's picture
Update app.py
37f08c6 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
# Public 4-bit base (no login) + your LoRA
base_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
lora_name = "Quanttum/crypto-llama-lora-final"
tokenizer = AutoTokenizer.from_pretrained(base_name)
base_model = AutoModelForCausalLM.from_pretrained(
base_name,
device_map="auto", # auto offload = works on T4 and CPU
torch_dtype=torch.float16,
load_in_4bit=True, # 4-bit is already baked into the model → don't override
)
model = PeftModel.from_pretrained(base_model, lora_name)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=512,
temperature=0.3,
repetition_penalty=1.15,
)
def chat(message, history):
prompt = f"""You are an expert crypto news summarizer and sentiment analyzer.
Summarize the following news/tweet in 2-4 sentences (TL;DR) and then give a sentiment score from -1.0 (extremely negative) to +1.0 (extremely positive).
News/Tweet:
{message}
TL;DR:"""
output = pipe(prompt)[0]["generated_text"]
response = output[len(prompt):].strip()
return response
# Fixed Gradio 6.0 syntax
with gr.Blocks() as demo:
gr.Markdown("# Crypto News Summarizer + Sentiment Analyzer\nFine-tuned Llama 3.1 8B by @Quanttum")
gr.ChatInterface(
chat,
examples=[
["Bitcoin ETF inflows hit $1.5B this week!"],
["SEC approves spot Ethereum ETF!"],
["Mt. Gox starts repaying creditors in BTC."],
["China just banned all crypto trading again."],
],
cache_examples=True,
)
demo.launch()