Spaces:

Quanttum
/

crypto-llama-summarizer

Sleeping

crypto-llama-summarizer / app.py

Update app.py

37f08c6 verified 19 days ago

1.73 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	import torch

	# Public 4-bit base (no login) + your LoRA
	base_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
	lora_name = "Quanttum/crypto-llama-lora-final"

	tokenizer = AutoTokenizer.from_pretrained(base_name)
	base_model = AutoModelForCausalLM.from_pretrained(
	base_name,
	device_map="auto", # auto offload = works on T4 and CPU
	torch_dtype=torch.float16,
	load_in_4bit=True, # 4-bit is already baked into the model → don't override
	)

	model = PeftModel.from_pretrained(base_model, lora_name)

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	temperature=0.3,
	repetition_penalty=1.15,
	)

	def chat(message, history):
	prompt = f"""You are an expert crypto news summarizer and sentiment analyzer.
	Summarize the following news/tweet in 2-4 sentences (TL;DR) and then give a sentiment score from -1.0 (extremely negative) to +1.0 (extremely positive).

	News/Tweet:
	{message}

	TL;DR:"""

	output = pipe(prompt)[0]["generated_text"]
	response = output[len(prompt):].strip()
	return response

	# Fixed Gradio 6.0 syntax
	with gr.Blocks() as demo:
	gr.Markdown("# Crypto News Summarizer + Sentiment Analyzer\nFine-tuned Llama 3.1 8B by @Quanttum")

	gr.ChatInterface(
	chat,
	examples=[
	["Bitcoin ETF inflows hit $1.5B this week!"],
	["SEC approves spot Ethereum ETF!"],
	["Mt. Gox starts repaying creditors in BTC."],
	["China just banned all crypto trading again."],
	],
	cache_examples=True,
	)

	demo.launch()