File size: 3,822 Bytes
5b1fbfd
 
 
 
73fa793
 
c226f5a
 
 
 
 
73fa793
 
5b1fbfd
022b401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294be51
 
c226f5a
 
294be51
5b1fbfd
 
022b401
5b1fbfd
 
294be51
f53a293
294be51
 
5b1fbfd
0337b58
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


# Ejemplos de preguntas
mis_ejemplos = [
    ["La cocina de los gallegos es fabulosa"],
    ["Los niños juegan a la pelota"],
    ["Los científicos son muy trabajadores"],
    ["Las enfermeras se esforzaron mucho durante la pandemia"],
    ["Los ciudadanos Marcos y Ernesto no están contentos los políticos"]
    
]

# Load complete model in 4bits
##################
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

hub_model = 'Andresmfs/merged_aguila-prueba-guardado'

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(hub_model, trust_remote_code=True)

## Load model in 4bits
# bnb_configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False
)

# model
model = AutoModelForCausalLM.from_pretrained(
    hub_model,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map="auto"
)

# generation_config
generation_config = model.generation_config
generation_config.max_new_tokens = 100
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config.do_sample = True # line added

# Define inference function
def translate_es_inclusivo(exclusive_text): 
    
    # generate input prompt
    eval_prompt = f"""Reescribe el siguiente texto utilizando lenguaje inclusivo.\n
      Texto: {exclusive_text}\n
      Texto en lenguaje inclusivo:"""
    
    # tokenize input
    model_input = tokenizer(eval_prompt, return_tensors="pt").to(model.device)
    
    # set max_new_tokens if necessary
    if len(model_input['input_ids'][0]) > 80:
        model.generation_config.max_new_tokens = len(model_input['input_ids'][0]) + 0.2 * len(model_input['input_ids'][0])
    
    # get length of encoded prompt
    prompt_token_len = len(model_input['input_ids'][0])
       
    # generate and decode
    with torch.no_grad():
        inclusive_text = tokenizer.decode(model.generate(**model_input, generation_config=generation_config)[0][prompt_token_len:], 
                                          skip_special_tokens=True)                                                                        
    
    return inclusive_text
    

# <-- set article variable -->
article = "- **Motivation:** Languages are powerful tools to communicate ideas, but their use is not impartial. The selection of words carries inherent biases and reflects subjective perspectives. In some cases, language is wielded to enforce ideologies, \
th purpose of this app is to automatically translate Spanish phrases into neutral/inclusive phrases, while mantaining grammar correctness and consistency.\n" \
          "- **Team Members:** Gaia Quintana Fleitas (gaiaq), Andrés Martínez Fernández-Salguero (andresmfs), Imanuel Rozenberg (manu_20392), Miguel López (wizmik12), Josué Sauca (josue_sauca).\n " \
          "- **Social Impact:** An inclusive translator holds significant social impact by promoting equity and representation within texts. By rectifying biases ingrained in language and fostering inclusivity, it combats discrimination, amplifies the visibility of marginalized groups, and contributes to the cultivation of a more inclusive and respectful society."
    
iface = gr.Interface(
    fn=translate_es_inclusivo,
    inputs="text",
    outputs="text",
    title="ES Inclusive Language (Hackathon SomosNLP '24)",
    description="Enter a Spanish phrase and get it converted into neutral/inclusive form.",
    examples = mis_ejemplos,
    article = article
)
iface.launch()