Spaces:

alxstuff
/

Lumen

Sleeping

App Files Files Community

Lumen / app.py

alxstuff

Update app.py

62793bf verified 18 days ago

raw

history blame contribute delete

1.92 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread

	BASE_MODEL = "Qwen/Qwen2.5-Coder-7B-Instruct"
	LORA_REPO = "alxstuff/Lumen-7b-v2"

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

	print("Loading base model...")
	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	)

	print("Loading LoRA adapter...")
	model.load_adapter(LORA_REPO)
	model.eval()
	print("✅ Lumen ready!")

	def chat(message, history):
	prompt = "<\|im_start\|>system\nYou are Lumen, an expert AI coding assistant built by TheAlxLabs. You write clean, efficient code and explain it clearly.<\|im_end\|>\n"
	for user, assistant in history:
	prompt += f"<\|im_start\|>user\n{user}<\|im_end\|>\n<\|im_start\|>assistant\n{assistant}<\|im_end\|>\n"
	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	thread = Thread(target=model.generate, kwargs={
	**inputs,
	"streamer": streamer,
	"max_new_tokens": 1024,
	"temperature": 0.2,
	"do_sample": True,
	})
	thread.start()

	response = ""
	for token in streamer:
	response += token
	yield response

	gr.ChatInterface(
	fn=chat,
	title="⚡ Lumen — AI Coding Assistant",
	description="Local-first AI coding assistant by TheAlxLabs.",
	examples=[
	"Write a Python function to reverse a linked list",
	"Explain what this does: `[x for x in range(10) if x % 2 == 0]`",
	"Fix this bug: TypeError: 'NoneType' object is not subscriptable"
	],
	).launch()