import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Ejemplos de preguntas mis_ejemplos = [ ["La cocina de los gallegos es fabulosa"], ["Los niños juegan a la pelota"], ["Los científicos son muy trabajadores"], ["Las enfermeras se esforzaron mucho durante la pandemia"], ["Los ciudadanos Marcos y Ernesto no están contentos con los políticos"] ] # Load complete model in 4bits ################## from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import torch hub_model = 'somosnlp/es-inclusivo-translator' # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(hub_model, trust_remote_code=True) ## Load model in 4bits # bnb_configuration bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type='nf4', bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=False ) # model model = AutoModelForCausalLM.from_pretrained( hub_model, quantization_config=bnb_config, trust_remote_code=True, device_map="auto" ) # generation_config generation_config = model.generation_config generation_config.max_new_tokens = 100 generation_config.temperature = 0.7 generation_config.top_p = 0.7 generation_config.num_return_sequences = 1 generation_config.pad_token_id = tokenizer.eos_token_id generation_config.eos_token_id = tokenizer.eos_token_id generation_config.do_sample = True # line added # Define inference function def translate_es_inclusivo(exclusive_text): # generate input prompt eval_prompt = f"""Reescribe el siguiente texto utilizando lenguaje inclusivo.\n Texto: {exclusive_text}\n Texto en lenguaje inclusivo:""" # tokenize input model_input = tokenizer(eval_prompt, return_tensors="pt").to(model.device) # set max_new_tokens if necessary if len(model_input['input_ids'][0]) > 80: model.generation_config.max_new_tokens = len(model_input['input_ids'][0]) + 0.2 * len(model_input['input_ids'][0]) # get length of encoded prompt prompt_token_len = len(model_input['input_ids'][0]) # generate and decode with torch.no_grad(): inclusive_text = tokenizer.decode(model.generate(**model_input, generation_config=generation_config)[0][prompt_token_len:], skip_special_tokens=True) return inclusive_text # <-- set article variable --> article = "- **Motivation:** Languages are powerful tools to communicate ideas, but their use is not impartial. The selection of words carries inherent biases and reflects subjective perspectives. In some cases, language is wielded to enforce ideologies, \ th purpose of this app is to automatically translate Spanish phrases into neutral/inclusive phrases, while mantaining grammar correctness and consistency.\n" \ "- **Team Members:** Gaia Quintana Fleitas (gaiaq), Andrés Martínez Fernández-Salguero (andresmfs), Imanuel Rozenberg (manu_20392), Miguel López (wizmik12), Josué Sauca (josue_sauca).\n " \ "- **Social Impact:** An inclusive translator holds significant social impact by promoting equity and representation within texts. By rectifying biases ingrained in language and fostering inclusivity, it combats discrimination, amplifies the visibility of marginalized groups, and contributes to the cultivation of a more inclusive and respectful society." iface = gr.Interface( fn=translate_es_inclusivo, inputs="text", outputs="text", title="ES Inclusive Language (Hackathon SomosNLP '24)", description="Enter a Spanish phrase and get it converted into neutral/inclusive form.", examples = mis_ejemplos, article = article ) iface.launch()