Spaces:

dawon5177
/

FebPrompt

Sleeping

App Files Files Community

FebPrompt / app.py

dawon5177

Update app.py

beb3fde verified 3 months ago

raw

history blame contribute delete

4.56 kB

	import gradio as gr
	import torch
	import transformers
	import os

	# --- 모델 설정 ---
	# 사용할 모델 ID를 지정합니다.
	MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"

	# --- 모델 로딩 (Space가 시작될 때 한 번만 실행됩니다) ---
	print("모델을 로드하는 중입니다... 초기 실행 시 시간이 다소 걸릴 수 있습니다.")
	try:
	# 4비트 양자화로 VRAM 사용량을 줄입니다. (T4 GPU에서 실행 가능)
	model = transformers.AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16, # T4 GPU와 호환되는 데이터 타입
	device_map="auto", # 자동으로 GPU에 할당
	load_in_4bit=True, # 4비트 양자화 활성화
	)
	tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_ID)

	# 텍스트 생성 파이프라인을 미리 만들어 둡니다.
	text_generator = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	)
	print("✅ 모델 로딩 완료!")
	except Exception as e:
	print(f"❌ 모델 로딩 실패: {e}")
	# 모델 로딩에 실패하면 오류 메시지를 반환하는 더미 함수로 대체
	def text_generator(args, *kwargs):
	yield "모델을 로드하는 데 실패했습니다. Space의 하드웨어 설정을 확인하거나 모델 이름이 올바른지 확인해주세요."


	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	"""
	사용자의 메시지에 대한 답변을 생성하는 함수
	"""
	# Qwen 모델이 요구하는 형식으로 메시지 포맷팅
	messages = [{"role": "system", "content": system_message}]

	# Gradio의 history는 [(user1, bot1), (user2, bot2)] 형태
	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})

	messages.append({"role": "user", "content": message})

	# 프롬프트를 토크나이저의 채팅 템플릿에 맞게 변환
	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# 모델로부터 답변 생성 (스트리밍)
	response = ""
	generation_args = {
	"max_new_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"do_sample": True,
	"stream": True, # 스트리밍으로 실시간 응답
	}

	for chunk in text_generator(prompt, **generation_args):
	# 스트리밍 응답에서 실제 텍스트 부분만 추출
	token = chunk[0]['generated_text'][len(prompt):]
	response = token
	yield response


	"""
	Gradio ChatInterface를 사용하여 챗봇 UI를 만듭니다.
	"""
	chatbot = gr.ChatInterface(
	respond,
	type="messages", # Gradio 4.x 이상의 최신 메시지 형식 사용
	additional_inputs_accordion="⚙️ 매개변수 설정",
	additional_inputs=[
	gr.Textbox(
	value="You are Qwen2.5-Coder, created by Alibaba Cloud. You are a helpful assistant specialized in coding and programming.",
	label="System message"
	),
	gr.Slider(
	minimum=1,
	maximum=4096,
	value=1024,
	step=1,
	label="Max new tokens"
	),
	gr.Slider(
	minimum=0.1,
	maximum=4.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	examples=[
	["PyTorch로 간단한 CNN 모델을 만들어줘."],
	["이 파이썬 코드를 최적화해줘:\n\n```python\nfor i in range(len(my_list)):\n print(my_list[i])\n```"],
	["FastAPI로 'hello world'를 출력하는 API 엔드포인트를 만들어줘."],
	],
	cache_examples=False, # 예제 캐싱 비활성화 (메모리 절약)
	)

	# Gradio Blocks를 사용하여 레이아웃 구성
	with gr.Blocks(theme=gr.themes.Soft(), title="나만의 AI 코드 리더") as demo:
	gr.Markdown("# 🤖 나만의 AI 코드 리더 (Qwen2.5-Coder)")
	gr.Markdown("이 챗봇은 Qwen2.5-Coder-7B-Instruct 모델을 기반으로 코드를 생성하고 분석합니다.")
	chatbot.render()

	if __name__ == "__main__":
	demo.launch()