Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import torch | |
| # Public 4-bit base (no login) + your LoRA | |
| base_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit" | |
| lora_name = "Quanttum/crypto-llama-lora-final" | |
| tokenizer = AutoTokenizer.from_pretrained(base_name) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_name, | |
| device_map="auto", # auto offload = works on T4 and CPU | |
| torch_dtype=torch.float16, | |
| load_in_4bit=True, # 4-bit is already baked into the model → don't override | |
| ) | |
| model = PeftModel.from_pretrained(base_model, lora_name) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=512, | |
| temperature=0.3, | |
| repetition_penalty=1.15, | |
| ) | |
| def chat(message, history): | |
| prompt = f"""You are an expert crypto news summarizer and sentiment analyzer. | |
| Summarize the following news/tweet in 2-4 sentences (TL;DR) and then give a sentiment score from -1.0 (extremely negative) to +1.0 (extremely positive). | |
| News/Tweet: | |
| {message} | |
| TL;DR:""" | |
| output = pipe(prompt)[0]["generated_text"] | |
| response = output[len(prompt):].strip() | |
| return response | |
| # Fixed Gradio 6.0 syntax | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Crypto News Summarizer + Sentiment Analyzer\nFine-tuned Llama 3.1 8B by @Quanttum") | |
| gr.ChatInterface( | |
| chat, | |
| examples=[ | |
| ["Bitcoin ETF inflows hit $1.5B this week!"], | |
| ["SEC approves spot Ethereum ETF!"], | |
| ["Mt. Gox starts repaying creditors in BTC."], | |
| ["China just banned all crypto trading again."], | |
| ], | |
| cache_examples=True, | |
| ) | |
| demo.launch() |