app.py
Browse filesAdd the app.py file with the AI code
app.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
4 |
+
from sklearn.tree import DecisionTreeClassifier
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
7 |
+
from sklearn.neural_network import MLPClassifier
|
8 |
+
from sklearn.ensemble import VotingClassifier
|
9 |
+
from tensorflow.keras.models import Sequential
|
10 |
+
from tensorflow.keras.layers import Embedding, LSTM, Dense
|
11 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
12 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
13 |
+
import tensorflow as tf
|
14 |
+
from joblib import dump
|
15 |
+
from joblib import load
|
16 |
+
from google.colab import drive
|
17 |
+
import gdown
|
18 |
+
from pydrive.auth import GoogleAuth
|
19 |
+
from pydrive.drive import GoogleDrive
|
20 |
+
import pickle
|
21 |
+
import gradio as gr
|
22 |
+
|
23 |
+
|
24 |
+
def train_creative_model(text_data):
|
25 |
+
if not text_data or len(text_data) == 0:
|
26 |
+
print("No hay suficientes datos para entrenar el modelo creativo.")
|
27 |
+
return None, None, None
|
28 |
+
|
29 |
+
tokenizer = Tokenizer()
|
30 |
+
tokenizer.fit_on_texts(text_data)
|
31 |
+
total_words = len(tokenizer.word_index) + 1
|
32 |
+
|
33 |
+
input_sequences = []
|
34 |
+
for line in text_data:
|
35 |
+
tokens = line.split('\t') # Separar por tabuladores
|
36 |
+
for token in tokens:
|
37 |
+
token_list = tokenizer.texts_to_sequences([token])[0]
|
38 |
+
for i in range(len(token_list)):
|
39 |
+
n_gram_sequence = token_list[i]
|
40 |
+
input_sequences.append(n_gram_sequence)
|
41 |
+
|
42 |
+
if not input_sequences or len(input_sequences) == 0:
|
43 |
+
print("No hay suficientes secuencias para entrenar el modelo creativo.")
|
44 |
+
return None, None, None
|
45 |
+
|
46 |
+
X = np.array(input_sequences)
|
47 |
+
y = tf.keras.utils.to_categorical(X, num_classes=total_words)
|
48 |
+
|
49 |
+
model = Sequential()
|
50 |
+
model.add(Embedding(total_words, 50, input_length=1))
|
51 |
+
model.add(LSTM(100))
|
52 |
+
model.add(Dense(total_words, activation='softmax'))
|
53 |
+
|
54 |
+
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
|
55 |
+
model.fit(X, y, epochs=50, verbose=0)
|
56 |
+
|
57 |
+
return model, tokenizer, None # Devolver None para creative_max_sequence_length
|
58 |
+
|
59 |
+
file_path = 'dialogs.csv'
|
60 |
+
df = pd.read_csv(file_path)
|
61 |
+
|
62 |
+
# Crear un vectorizador TF-IDF
|
63 |
+
vectorizer = TfidfVectorizer()
|
64 |
+
|
65 |
+
# Aplicar el vectorizador a las frases de Prompt y Answer
|
66 |
+
X = vectorizer.fit_transform(df['Prompt']).toarray()
|
67 |
+
y = df['Answer'] # Utilizar las respuestas como etiquetas
|
68 |
+
|
69 |
+
# Dividir los datos en conjuntos de entrenamiento y prueba
|
70 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
71 |
+
|
72 |
+
# Inicializar modelos
|
73 |
+
tree_model = DecisionTreeClassifier()
|
74 |
+
nn_model = MLPClassifier(batch_size=32)
|
75 |
+
|
76 |
+
# Crear un clasificador de votación
|
77 |
+
voting_clf = VotingClassifier(estimators=[('tree', tree_model), ('nn', nn_model)], voting='hard')
|
78 |
+
|
79 |
+
with open('Voting_model.pkl', 'rb') as file:
|
80 |
+
voting_model = pickle.load(file)
|
81 |
+
with open('Creative_model.pkl', 'rb') as file:
|
82 |
+
voting_model = pickle.load(file)
|
83 |
+
|
84 |
+
def get_combined_response(prompt, voting_model, creative_model, tokenizer, creative_max_sequence_length):
|
85 |
+
prompt_vector = vectorizer.transform([prompt]).toarray()
|
86 |
+
response_index = voting_model.predict(prompt_vector)[0]
|
87 |
+
|
88 |
+
|
89 |
+
# Utilizar el modelo de votación
|
90 |
+
#return df.loc[df['Answer'] == response_index, 'Answer'].values[0]
|
91 |
+
|
92 |
+
seed_text = df.loc[df['Answer'] == response_index, 'Prompt'].values[0]
|
93 |
+
creative_response = generate_creative_text(seed_text, CREATIVE_NEXT_WORDS, creative_model, tokenizer, creative_max_sequence_length)
|
94 |
+
#return creative_response
|
95 |
+
return "Awnser 1: " + df.loc[df['Answer'] == response_index, 'Answer'].values[0] + " // Awnser 2: " + creative_response
|
96 |
+
|
97 |
+
|
98 |
+
def generate_creative_text(seed_text, next_words, model, tokenizer, max_sequence_length):
|
99 |
+
generated_text = seed_text
|
100 |
+
for _ in range(next_words):
|
101 |
+
token_list = tokenizer.texts_to_sequences([seed_text])[0]
|
102 |
+
if max_sequence_length is not None:
|
103 |
+
token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
|
104 |
+
else:
|
105 |
+
token_list = [token_list]
|
106 |
+
|
107 |
+
predicted_probabilities = model.predict(token_list, verbose=0)
|
108 |
+
predicted = np.argmax(predicted_probabilities)
|
109 |
+
|
110 |
+
|
111 |
+
output_word = ""
|
112 |
+
for word, index in tokenizer.word_index.items():
|
113 |
+
if index == predicted:
|
114 |
+
output_word = word
|
115 |
+
break
|
116 |
+
|
117 |
+
seed_text += " " + output_word
|
118 |
+
generated_text += " " + output_word
|
119 |
+
|
120 |
+
return generated_text
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
# Load your models and other necessary components here
|
126 |
+
|
127 |
+
creative_max_sequence_length = 10 # Replace with the correct value used during training
|
128 |
+
VOTING_RESPONSE_INDEX = 0 # Replace with the correct index for voting model responses
|
129 |
+
CREATIVE_NEXT_WORDS = 10 # Replace with the desired number of creative next words
|
130 |
+
|
131 |
+
def chat_interface(user_input):
|
132 |
+
response = get_combined_response(user_input, voting_clf, creative_model, creative_tokenizer, creative_max_sequence_length)
|
133 |
+
|
134 |
+
# Display the response to the user
|
135 |
+
print(f"Model Response: {response}")
|
136 |
+
|
137 |
+
# Ask the user for a score
|
138 |
+
score = int(input(f"Puntuación para la respuesta '{response}': "))
|
139 |
+
|
140 |
+
# Efficient retraining process
|
141 |
+
if score <= 2:
|
142 |
+
# Ask the user for the correct response
|
143 |
+
correct_response = input(f"La respuesta actual es '{response}'. ¿Cuál es la respuesta correcta?: ")
|
144 |
+
|
145 |
+
# Update the model only if the correct response is different from the current response
|
146 |
+
if correct_response.lower() != response.lower():
|
147 |
+
new_data = {'Prompt': user_input, 'Answer': correct_response}
|
148 |
+
df = df.append(new_data, ignore_index=True)
|
149 |
+
|
150 |
+
with open('dialogs.txt', 'a') as dialogs_file:
|
151 |
+
dialogs_file.write(f"{user_input}\t{correct_response}\n")
|
152 |
+
|
153 |
+
new_X = vectorizer.transform([user_input]).toarray()
|
154 |
+
new_y = [correct_response]
|
155 |
+
X = np.concatenate((X, new_X))
|
156 |
+
y = np.concatenate((y, new_y))
|
157 |
+
|
158 |
+
# Re-train the voting classifier with the new data
|
159 |
+
#voting_clf.fit(X, y)
|
160 |
+
|
161 |
+
print("¡Gracias por tu corrección! El modelo ha sido actualizado para mejorar. La próxima vez el modelo tendrá en cuenta tus respuestas correctas.")
|
162 |
+
else:
|
163 |
+
print("Entendido. No se necesita corrección.")
|
164 |
+
else:
|
165 |
+
print("¡Gracias por tu retroalimentación!")
|
166 |
+
|
167 |
+
# Save the updated DataFrame to a new file
|
168 |
+
df.to_csv('dialogs.csv', index=False)
|
169 |
+
|
170 |
+
# Create a Gradio interface
|
171 |
+
iface = gr.Interface(fn=chat_interface, inputs="text", outputs="text")
|
172 |
+
iface.launch()
|