Update app.py
Browse files
app.py
CHANGED
@@ -24,22 +24,297 @@ PINECONE_ENVIRONMENT = "us-east-1" # Use the environment you set in the secrets
|
|
24 |
# Initialize Pinecone with the API key
|
25 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
print(f"
|
39 |
-
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# Initialize Pinecone with the API key
|
25 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
26 |
|
27 |
+
# Global variables to store the selected model and dimensions
|
28 |
+
EMBED_MODEL = 'BGE_M3-1024'
|
29 |
+
DIMENSIONS = 1024
|
30 |
+
|
31 |
+
# Confirm selection automatically
|
32 |
+
print(f"Model selected: {EMBED_MODEL}")
|
33 |
+
print(f"Dimensions set as: {DIMENSIONS}")
|
34 |
+
|
35 |
+
# Function to print current selection (can be used in other cells)
|
36 |
+
def print_current_selection():
|
37 |
+
print(f"Currently selected model: {EMBED_MODEL}")
|
38 |
+
print(f"Dimensions: {DIMENSIONS}")
|
39 |
+
|
40 |
+
# Establecer el nombre del 铆ndice autom谩ticamente
|
41 |
+
INDEX_NAME = 'vestidos'
|
42 |
+
|
43 |
+
# Obtener la clave API de Pinecone
|
44 |
+
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
|
45 |
+
|
46 |
+
def connect_to_pinecone(index_name):
|
47 |
+
global INDEX_NAME
|
48 |
+
try:
|
49 |
+
pc = Pinecone(api_key=PINECONE_API_KEY)
|
50 |
+
index = pc.Index(index_name)
|
51 |
+
|
52 |
+
# Asegurarse de que la conexi贸n se establezca
|
53 |
+
index_stats = index.describe_index_stats()
|
54 |
+
print(f"Successfully connected to Pinecone index '{index_name}'!")
|
55 |
+
print("Index Stats:", index_stats)
|
56 |
+
|
57 |
+
# Actualizar la variable global INDEX_NAME
|
58 |
+
INDEX_NAME = index_name
|
59 |
+
print(f"Global INDEX_NAME updated to: {INDEX_NAME}")
|
60 |
+
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Failed to connect to Pinecone index '{index_name}':", str(e))
|
63 |
+
|
64 |
+
# Conectar autom谩ticamente al 铆ndice "vestidos"
|
65 |
+
connect_to_pinecone(INDEX_NAME)
|
66 |
+
|
67 |
+
# Funci贸n para imprimir el nombre del 铆ndice actual (puede ser usada en otras celdas)
|
68 |
+
def print_current_index():
|
69 |
+
print(f"Current index name: {INDEX_NAME}")
|
70 |
+
|
71 |
+
# Verificar si las variables globales necesarias est谩n configuradas
|
72 |
+
if 'INDEX_NAME' not in globals() or INDEX_NAME is None:
|
73 |
+
raise ValueError("INDEX_NAME is not set. Please set the index name first.")
|
74 |
+
|
75 |
+
if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None:
|
76 |
+
raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.")
|
77 |
+
|
78 |
+
# Inicializar cliente de Pinecone
|
79 |
+
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
|
80 |
+
pc = Pinecone(api_key=PINECONE_API_KEY)
|
81 |
+
|
82 |
+
# Inicializar el 铆ndice de Pinecone
|
83 |
+
index = pc.Index(INDEX_NAME)
|
84 |
+
|
85 |
+
# Obtener la dimensi贸n del 铆ndice
|
86 |
+
index_stats = index.describe_index_stats()
|
87 |
+
vector_dim = index_stats['dimension']
|
88 |
+
print(f"Index dimension: {vector_dim}")
|
89 |
+
|
90 |
+
# Definir manualmente los campos de contexto y enlace
|
91 |
+
CONTEXT_FIELDS = ['Etiqueta', 'Pregunta 1', 'Pregunta 2', 'Pregunta 3', 'Respuesta Combinada']
|
92 |
+
LINK_FIELDS = ['Etiqueta', 'Respuesta Combinada']
|
93 |
+
|
94 |
+
# Imprimir confirmaci贸n de campos seleccionados
|
95 |
+
print(f"Context fields set to: {CONTEXT_FIELDS}")
|
96 |
+
print(f"Link fields set to: {LINK_FIELDS}")
|
97 |
+
|
98 |
+
# Funci贸n para obtener las selecciones actuales de campos (puede ser usada en otras celdas)
|
99 |
+
def get_field_selections():
|
100 |
+
return {
|
101 |
+
"CONTEXT_FIELDS": CONTEXT_FIELDS,
|
102 |
+
"LINK_FIELDS": LINK_FIELDS
|
103 |
+
}
|
104 |
+
|
105 |
+
#####################################
|
106 |
+
|
107 |
+
# Check if required global variables are set
|
108 |
+
if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None:
|
109 |
+
raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.")
|
110 |
+
if 'INDEX_NAME' not in globals() or INDEX_NAME is None:
|
111 |
+
raise ValueError("INDEX_NAME is not set. Please create or select an index first.")
|
112 |
+
if 'CONTEXT_FIELDS' not in globals() or 'LINK_FIELDS' not in globals():
|
113 |
+
raise ValueError("CONTEXT_FIELDS and LINK_FIELDS are not set. Please run the field selection cell first.")
|
114 |
+
|
115 |
+
# Initialize the Sentence-Transformer model
|
116 |
+
embedding_model = SentenceTransformer(model_name)
|
117 |
+
|
118 |
+
# Initialize Pinecone with the API key and connect to the index
|
119 |
+
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)
|
120 |
+
index = pinecone_client.Index(INDEX_NAME)
|
121 |
+
|
122 |
+
# Constants
|
123 |
+
LIMIT = 3750
|
124 |
+
|
125 |
+
def vector_search(query):
|
126 |
+
# Generate embedding using Sentence-Transformer model
|
127 |
+
xq = embedding_model.encode(query)
|
128 |
+
|
129 |
+
# Perform vector search on Pinecone index
|
130 |
+
res = index.query(vector=xq.tolist(), top_k=3, include_metadata=True)
|
131 |
+
if res['matches']:
|
132 |
+
return [
|
133 |
+
{
|
134 |
+
'content': ' '.join(f"{k}: {v}" for k, v in match['metadata'].items() if k in CONTEXT_FIELDS and k != 'Etiqueta'),
|
135 |
+
'metadata': match['metadata']
|
136 |
+
}
|
137 |
+
for match in res['matches']
|
138 |
+
if 'metadata' in match
|
139 |
+
]
|
140 |
+
return []
|
141 |
+
|
142 |
+
def create_prompt(query, contexts):
|
143 |
+
prompt_start = "\n\nContexto:\n"
|
144 |
+
prompt_end = f"\n\nPregunta: {query}\nRespuesta:"
|
145 |
+
|
146 |
+
current_contexts = "\n\n---\n\n".join([context['content'] for context in contexts])
|
147 |
+
if len(prompt_start + current_contexts + prompt_end) >= LIMIT:
|
148 |
+
# Truncate contexts if they exceed the limit
|
149 |
+
available_space = LIMIT - len(prompt_start) - len(prompt_end)
|
150 |
+
truncated_contexts = current_contexts[:available_space]
|
151 |
+
return prompt_start + truncated_contexts + prompt_end
|
152 |
+
else:
|
153 |
+
return prompt_start + current_contexts + prompt_end
|
154 |
+
|
155 |
+
def complete(prompt):
|
156 |
+
return [f"Hola"]
|
157 |
+
|
158 |
+
def check_image_exists(filepath):
|
159 |
+
return os.path.exists(filepath)
|
160 |
+
|
161 |
+
def chat_function(message, history):
|
162 |
+
# Perform vector search
|
163 |
+
search_results = vector_search(message)
|
164 |
+
|
165 |
+
# Create prompt with relevant contexts
|
166 |
+
query_with_contexts = create_prompt(message, search_results)
|
167 |
+
|
168 |
+
# Generate response
|
169 |
+
response = complete(query_with_contexts)
|
170 |
+
|
171 |
+
partial_message = response[0].split("\n")[0] # Solo tomar la primera l铆nea de la respuesta
|
172 |
+
|
173 |
+
# Handle the logic for processing tags and images internally
|
174 |
+
relevant_links = [result['metadata'].get(field) for result in search_results for field in LINK_FIELDS if field in result['metadata']]
|
175 |
+
full_response = partial_message
|
176 |
+
image_url = None
|
177 |
+
tags_detected = []
|
178 |
+
|
179 |
+
filtered_links = []
|
180 |
+
if relevant_links:
|
181 |
+
for link in relevant_links:
|
182 |
+
if any(tag in link for tag in ["lila_61", "lila_63", "lila_62", "lila_64", "fuxia_70", "fuxia_71", "fuxia_72", "fuxia_73", "fuxia_74", "melon_68", "melon_66", "melon_67", "melon_65", "vino_19", "vino_20", "barney_69", "loro_27", "lacre_02", "amarillo_03", "amarillo_04", "azulino_11", "azulino_14", "azulino_12", "azulino_13", "beigs_09", "beigs_10", "beigs_07", "beigs_06", "beigs_08", "beigs_05", "marina_32", "marina_29", "marina_28", "marina_31", "marina_30", "rojo_26", "rojo_23", "rojo_21", "rojo_22", "rojo_25", "rojo_24", "celeste_40", "celeste_38", "celeste_39", "celeste_33", "celeste_35", "celeste_37", "celeste_41", "celeste_42", "celeste_34", "celeste_36", "sirenita_01", "marino_18", "marino_17", "marino_16", "marino_15", "rosa_87", "rosa_86", "rosa_79", "rosa_82", "rosa_83", "rosa_78", "rosa_84", "rosa_85", "rosa_75", "rosa_80", "rosa_81", "rosa_77", "rosa_76", "blanco_55", "blanco_56", "blanco_53", "blanco_52", "blanco_57", "blanco_49", "blanco_51", "blanco_60", "blanco_47", "blanco_44", "blanco_50", "blanco_48", "blanco_59", "blanco_43", "blanco_58", "blanco_46", "blanco_45", "blanco_54"]):
|
183 |
+
tags_detected.append(link) # Save the tag but don't display it
|
184 |
+
else:
|
185 |
+
filtered_links.append(link)
|
186 |
+
|
187 |
+
# Add the first relevant link under a single "Respuestas relevantes" section
|
188 |
+
if filtered_links:
|
189 |
+
full_response += f".\n\nTe detallamos nuestro contenido a continuaci贸n:\n" + filtered_links[0]
|
190 |
+
|
191 |
+
# Now handle the images based on the detected tags
|
192 |
+
tags_to_images = {
|
193 |
+
"lila_61": "/content/lila_61.jpeg",
|
194 |
+
"lila_63": "/content/lila_63.jpeg",
|
195 |
+
"lila_62": "/content/lila_62.jpeg",
|
196 |
+
"lila_64": "/content/lila_64.jpeg",
|
197 |
+
"fuxia_70": "/content/fuxia_70.jpeg",
|
198 |
+
"fuxia_71": "/content/fuxia_71.jpeg",
|
199 |
+
"fuxia_72": "/content/fuxia_72.jpeg",
|
200 |
+
"fuxia_73": "/content/fuxia_73.jpeg",
|
201 |
+
"fuxia_74": "/content/fuxia_74.jpeg",
|
202 |
+
"melon_68": "/content/melon_68.jpeg",
|
203 |
+
"melon_66": "/content/melon_66.jpeg",
|
204 |
+
"melon_67": "/content/melon_67.jpeg",
|
205 |
+
"melon_65": "/content/melon_65.jpeg",
|
206 |
+
"vino_19": "/content/vino_19.jpeg",
|
207 |
+
"vino_20": "/content/vino_20.jpeg",
|
208 |
+
"barney_69": "/content/barney_69.jpeg",
|
209 |
+
"loro_27": "/content/loro_27.png",
|
210 |
+
"lacre_02": "/content/lacre_02.jpeg",
|
211 |
+
"amarillo_03": "/content/amarillo_03.jpeg",
|
212 |
+
"amarillo_04": "/content/amarillo_04.jpeg",
|
213 |
+
"azulino_11": "/content/azulino_11.jpeg",
|
214 |
+
"azulino_14": "/content/azulino_14.jpeg",
|
215 |
+
"azulino_12": "/content/azulino_12.jpeg",
|
216 |
+
"azulino_13": "/content/azulino_13.jpeg",
|
217 |
+
"beigs_09": "/content/beigs_09.jpeg",
|
218 |
+
"beigs_10": "/content/beigs_10.jpeg",
|
219 |
+
"beigs_07": "/content/beigs_07.jpeg",
|
220 |
+
"beigs_06": "/content/beigs_06.jpeg",
|
221 |
+
"beigs_08": "/content/beigs_08.jpeg",
|
222 |
+
"beigs_05": "/content/beigs_05.jpeg",
|
223 |
+
"marina_32": "/content/marina_32.jpeg",
|
224 |
+
"marina_29": "/content/marina_29.jpeg",
|
225 |
+
"marina_28": "/content/marina_28.jpeg",
|
226 |
+
"marina_31": "/content/marina_31.jpeg",
|
227 |
+
"marina_30": "/content/marina_30.jpeg",
|
228 |
+
"rojo_26": "/content/rojo_26.jpeg",
|
229 |
+
"rojo_23": "/content/rojo_23.jpeg",
|
230 |
+
"rojo_21": "/content/rojo_21.jpeg",
|
231 |
+
"rojo_22": "/content/rojo_22.jpeg",
|
232 |
+
"rojo_25": "/content/rojo_25.jpeg",
|
233 |
+
"rojo_24": "/content/rojo_24.jpeg",
|
234 |
+
"celeste_40": "/content/celeste_40.jpeg",
|
235 |
+
"celeste_38": "/content/celeste_38.jpeg",
|
236 |
+
"celeste_39": "/content/celeste_39.jpeg",
|
237 |
+
"celeste_33": "/content/celeste_33.jpeg",
|
238 |
+
"celeste_35": "/content/celeste_35.jpeg",
|
239 |
+
"celeste_37": "/content/celeste_37.jpeg",
|
240 |
+
"celeste_41": "/content/celeste_41.jpeg",
|
241 |
+
"celeste_42": "/content/celeste_42.jpeg",
|
242 |
+
"celeste_34": "/content/celeste_34.jpeg",
|
243 |
+
"celeste_36": "/content/celeste_36.jpeg",
|
244 |
+
"sirenita_01": "/content/sirenita_01.png",
|
245 |
+
"marino_18": "/content/marino_18.jpeg",
|
246 |
+
"marino_17": "/content/marino_17.jpeg",
|
247 |
+
"marino_16": "/content/marino_16.jpeg",
|
248 |
+
"marino_15": "/content/marino_15.jpeg",
|
249 |
+
"rosa_87": "/content/rosa_87.jpeg",
|
250 |
+
"rosa_86": "/content/rosa_86.png",
|
251 |
+
"rosa_79": "/content/rosa_79.jpeg",
|
252 |
+
"rosa_82": "/content/rosa_82.png",
|
253 |
+
"rosa_83": "/content/rosa_83.jpeg",
|
254 |
+
"rosa_78": "/content/rosa_78.jpeg",
|
255 |
+
"rosa_84": "/content/rosa_84.jpeg",
|
256 |
+
"rosa_85": "/content/rosa_85.jpeg",
|
257 |
+
"rosa_75": "/content/rosa_75.jpeg",
|
258 |
+
"rosa_80": "/content/rosa_80.png",
|
259 |
+
"rosa_81": "/content/rosa_81.png",
|
260 |
+
"rosa_77": "/content/rosa_77.jpeg",
|
261 |
+
"rosa_76": "/content/rosa_76.png",
|
262 |
+
"blanco_55": "/content/blanco_55.jpeg",
|
263 |
+
"blanco_56": "/content/blanco_56.jpeg",
|
264 |
+
"blanco_53": "/content/blanco_53.jpeg",
|
265 |
+
"blanco_52": "/content/blanco_52.jpeg",
|
266 |
+
"blanco_57": "/content/blanco_57.jpeg",
|
267 |
+
"blanco_49": "/content/blanco_49.jpeg",
|
268 |
+
"blanco_51": "/content/blanco_51.jpeg",
|
269 |
+
"blanco_60": "/content/blanco_60.jpeg",
|
270 |
+
"blanco_47": "/content/blanco_47.jpeg",
|
271 |
+
"blanco_44": "/content/blanco_44.jpeg",
|
272 |
+
"blanco_50": "/content/blanco_50.jpeg",
|
273 |
+
"blanco_48": "/content/blanco_48.jpeg",
|
274 |
+
"blanco_59": "/content/blanco_59.jpeg",
|
275 |
+
"blanco_43": "/content/blanco_43.jpeg",
|
276 |
+
"blanco_58": "/content/blanco_58.png",
|
277 |
+
"blanco_46": "/content/blanco_46.jpeg",
|
278 |
+
"blanco_45": "/content/blanco_45.jpeg",
|
279 |
+
"blanco_54": "/content/blanco_54.jpeg",
|
280 |
+
}
|
281 |
+
|
282 |
+
|
283 |
+
for tag in tags_detected:
|
284 |
+
for key, path in tags_to_images.items():
|
285 |
+
if key in tag and check_image_exists(path):
|
286 |
+
image_url = path
|
287 |
+
break
|
288 |
+
|
289 |
+
return full_response, image_url
|
290 |
+
|
291 |
+
|
292 |
+
def update_image(image_url):
|
293 |
+
if image_url:
|
294 |
+
return image_url
|
295 |
+
else:
|
296 |
+
return None
|
297 |
+
|
298 |
+
# Gradio layout setup
|
299 |
+
with gr.Blocks() as demo:
|
300 |
+
with gr.Row():
|
301 |
+
with gr.Column(scale=1):
|
302 |
+
chatbot_input = gr.Textbox(label="Tu mensaje")
|
303 |
+
chatbot_output = gr.Chatbot(label="ChatBot")
|
304 |
+
chatbot_history = gr.State(value=[])
|
305 |
+
image_url = gr.State(value=None)
|
306 |
+
submit_button = gr.Button("Enviar")
|
307 |
+
with gr.Column(scale=1):
|
308 |
+
image_output = gr.Image(label="Imagen asociada")
|
309 |
+
|
310 |
+
def process_input(message, history):
|
311 |
+
full_response, image = chat_function(message, history)
|
312 |
+
history.append((message, full_response))
|
313 |
+
return history, history, image
|
314 |
+
|
315 |
+
submit_button.click(process_input, inputs=[chatbot_input, chatbot_history], outputs=[chatbot_output, chatbot_history, image_url])
|
316 |
+
image_url.change(fn=update_image, inputs=image_url, outputs=image_output)
|
317 |
+
|
318 |
+
# Launch the interface
|
319 |
+
demo.launch(debug=True)
|
320 |
+
|