import gradio as gr from peft import PeftModel, PeftConfig from transformers import ( MistralForCausalLM, TextIteratorStreamer, AutoTokenizer, BitsAndBytesConfig, GenerationConfig, ) from time import sleep from threading import Thread from torch import float16 import spaces import huggingface_hub from threading import Thread from queue import Queue from time import sleep from os import getenv from data_logger import log_data from datetime import datetime def check_thread(logging_queue: Queue): logging_callback = log_data( hf_token=getenv("HF_API_TOKEN"), dataset_name=getenv("OUTPUT_DATASET"), private=True, ) while True: sleep(60) batch = [] while not logging_queue.empty(): batch.append(logging_queue.get()) if len(batch) > 0: try: logging_callback(batch) except: print( "Error happened while pushing data to HF. Puttting items back in queue..." ) for item in batch: logging_queue.put(item) if getenv("HF_API_TOKEN") is not None: #print("Starting logging thread...") #log_queue = Queue() #t = Thread(target=check_thread, args=(log_queue,)) #t.start() logging_callback = log_data( hf_token=getenv("HF_API_TOKEN"), dataset_name=getenv("OUTPUT_DATASET"), private=True, ) else: print("No HF_API_TOKEN found. Logging is disabled.") config = PeftConfig.from_pretrained("lang-uk/dragoman") quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=float16, bnb_4bit_use_double_quant=False, ) model = MistralForCausalLM.from_pretrained( "mistralai/Mistral-7B-v0.1", quantization_config=quant_config ) # device_map="auto",) model = PeftModel.from_pretrained(model, "lang-uk/dragoman").to("cuda") tokenizer = AutoTokenizer.from_pretrained( "mistralai/Mistral-7B-v0.1", use_fast=False, add_bos_token=False ) @spaces.GPU(duration=30) def translate(input_text): global log_queue # generated_text = "" input_text = input_text.strip() print(f"{datetime.utcnow()} | Translating: {input_text}") if getenv("HF_API_TOKEN") is not None: try: logging_callback = log_data( hf_token=getenv("HF_API_TOKEN"), dataset_name=getenv("OUTPUT_DATASET"), private=True, ) logging_callback([[input_text]]) except: print("Error happened while pushing data to HF.") input_text = f"[INST] {input_text} [/INST]" inputs = tokenizer([input_text], return_tensors="pt").to(model.device) generation_kwargs = dict( inputs, max_new_tokens=200, num_beams=10, temperature=1, pad_token_id=tokenizer.eos_token_id ) # streamer=streamer, # streaming support # streamer = TextIteratorStreamer( # tokenizer, skip_prompt=True, skip_special_tokens=True # ) # thread = Thread(target=model.generate, kwargs=generation_kwargs) # thread.start() # for new_text in streamer: # generated_text += new_text # yield generated_text # generated_text += "\n" # yield generated_text output = model.generate(**generation_kwargs) output = ( tokenizer.decode(output[0], skip_special_tokens=True) .split("[/INST] ")[-1] .strip() ) return output # download description of the model desc_file = huggingface_hub.hf_hub_download("lang-uk/dragoman", "README.md") with open(desc_file, "r") as f: model_description = f.read() model_description = model_description[model_description.find("---", 1) + 5 :] model_description = ( """### By using this service, users are required to agree to the following terms: you agree that user input will be collected for future research and model improvements. \n\n""" + model_description ) iface = gr.Interface( fn=translate, inputs=gr.Textbox( value='This demo contains a model from paper "Setting up the Data Printer with Improved English to Ukrainian Machine Translation", accepted to UNLP 2024 workshop at the LREC-COLING 2024 conference.', label="Source sentence", ), outputs=gr.Textbox( value='Ця демо-версія містить модель із статті "Налаштування принтера даних із покращеним машинним перекладом з англійської на українську", яка була прийнята до семінару UNLP 2024 на конференції LREC-COLING 2024.', label="Translated sentence", ), examples=[ [ "The Colosseum in Rome was a symbol of the grandeur and power of the Roman Empire and was a place for the emperor to connect with the people by providing them with entertainment and free food." ], [ "How many leaves would it drop in a month of February in a non-leap year?", ], [ "ChatGPT (Chat Generative Pre-trained Transformer) is a chatbot developed by OpenAI and launched on November 30, 2022. Based on a large language model, it enables users to refine and steer a conversation towards a desired length, format, style, level of detail, and language. Successive prompts and replies, known as prompt engineering, are considered at each conversation stage as a context.[2] ", ], [ "who holds this neighborhood?", ], ], title="Dragoman: SOTA English-Ukrainian translation model", description='This demo contains a model from paper "Setting up the Data Printer with Improved English to Ukrainian Machine Translation", accepted to UNLP 2024 workshop at the LREC-COLING 2024 conference.', article=model_description, # thumbnail: str | None = None, # css: str | None = None, # batch: bool = False, # max_batch_size: int = 4, # api_name: str | Literal[False] | None = "predict", submit_btn="Translate", ) iface.launch()