Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| MODEL = "speakleash/Bielik-1.5B-v3.0-Instruct" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if not HF_TOKEN: | |
| raise RuntimeError( | |
| "Brak HF_TOKEN. Dodaj secret 'HF_TOKEN' w ustawieniach Space (Settings → Secrets)." | |
| ) | |
| # jawne ładowanie z tokenem (upewniamy się, że auth token jest przekazany) | |
| token_kwargs = {"use_auth_token": HF_TOKEN} | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL, **token_kwargs) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", **token_kwargs) | |
| chat_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| def respond(message, history): | |
| # proste sklejenie kontekstu z historii (opcjonalne, można rozbudować) | |
| prompt = "" | |
| if history: | |
| for u, b in history: | |
| prompt += f"User: {u}\nAssistant: {b}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| out = chat_pipe( | |
| prompt, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| gen = out[0]["generated_text"] | |
| # odczytanie tylko nowo wygenerowanej części (usuwamy prompt, jeśli model go powtórzył) | |
| reply = gen[len(prompt):] if gen.startswith(prompt) else gen | |
| history = history or [] | |
| history.append((message, reply)) | |
| return reply, history | |
| gr.ChatInterface(respond).launch() |