| import subprocess |
| import sys |
|
|
| |
| def install(package): |
| subprocess.check_call([sys.executable, "-m", "pip", "install", package]) |
|
|
| |
| packages = [ |
| "transformers==4.34.0", |
| "torch==2.0.1+cu118", |
| "gradio==3.39.0", |
| "accelerate==0.23.0", |
| "bitsandbytes==0.41.1", |
| "sentencepiece==0.1.99", |
| "python-dotenv==1.0.0" |
| ] |
|
|
| for package in packages: |
| try: |
| install(package) |
| except Exception as e: |
| print(f"Failed to install {package}: {e}") |
|
|
| |
| try: |
| import bitsandbytes |
| except ImportError: |
| install("bitsandbytes -i https://test.pypi.org/simple/") |
|
|
| from transformers import LlamaTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
| import gradio as gr |
| from huggingface_hub import login |
| from dotenv import load_dotenv |
| import os |
| import torch |
|
|
| |
| load_dotenv() |
|
|
| |
| huggingface_token = os.getenv("HUGGINGFACE_TOKEN") |
| if huggingface_token: |
| login(token=huggingface_token) |
| else: |
| raise ValueError("HUGGINGFACE_TOKEN is missing in .env file!") |
|
|
| |
| model_name = "mistralai/Mistral-7B-v0.3" |
|
|
| |
| tokenizer = LlamaTokenizer.from_pretrained(model_name) |
|
|
| |
| try: |
| assert torch.cuda.is_available(), "CUDA is not available. Install CUDA or use CPU mode." |
| except AssertionError as e: |
| print(e) |
| print("Falling back to CPU mode.") |
| device_map = "cpu" |
| else: |
| device_map = "auto" |
|
|
| |
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_compute_dtype=torch.float16 |
| ) |
|
|
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| quantization_config=bnb_config, |
| device_map=device_map, |
| torch_dtype=torch.float16 |
| ) |
|
|
| def respond(message, history): |
| inputs = tokenizer( |
| f"User: {message}\nAssistant:", |
| return_tensors="pt", |
| return_attention_mask=True |
| ).to(model.device) |
| |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=256, |
| temperature=0.7, |
| do_sample=True |
| ) |
| |
| return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1] |
|
|
| |
| gr.ChatInterface( |
| respond, |
| title="Shërbimi i Konsumatorit", |
| examples=["Si mund të rivendos fjalëkalimin?", "A e keni në dispozicion këtë produkt?"], |
| cache_examples=True |
| ).launch(server_port=7860, share=True) |
|
|