C2MV commited on
Commit
9d8a1d3
verified
1 Parent(s): 521b86f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -19
app.py CHANGED
@@ -24,22 +24,297 @@ PINECONE_ENVIRONMENT = "us-east-1" # Use the environment you set in the secrets
24
  # Initialize Pinecone with the API key
25
  pc = Pinecone(api_key=PINECONE_API_KEY)
26
 
27
- # Ruta del archivo CSV
28
- file_path = '/content/dataset.csv'
29
-
30
- # Cargar el dataset y establecerlo como variable global
31
- try:
32
- df = pd.read_csv(file_path)
33
- DATASET = Dataset.from_pandas(df)
34
- print(f"Dataset '{file_path}' loaded successfully.\n")
35
- print(DATASET)
36
- except Exception as e:
37
- DATASET = None
38
- print(f"Error loading dataset: {e}")
39
-
40
- # Funci贸n para imprimir la informaci贸n del dataset actual
41
- def print_dataset_info():
42
- print(f"Current dataset: {DATASET}" if DATASET else "No dataset loaded.")
43
-
44
- # Guardar la variable global para uso en otras celdas
45
- get_ipython().run_cell_magic('capture', '', '%store DATASET')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # Initialize Pinecone with the API key
25
  pc = Pinecone(api_key=PINECONE_API_KEY)
26
 
27
+ # Global variables to store the selected model and dimensions
28
+ EMBED_MODEL = 'BGE_M3-1024'
29
+ DIMENSIONS = 1024
30
+
31
+ # Confirm selection automatically
32
+ print(f"Model selected: {EMBED_MODEL}")
33
+ print(f"Dimensions set as: {DIMENSIONS}")
34
+
35
+ # Function to print current selection (can be used in other cells)
36
+ def print_current_selection():
37
+ print(f"Currently selected model: {EMBED_MODEL}")
38
+ print(f"Dimensions: {DIMENSIONS}")
39
+
40
+ # Establecer el nombre del 铆ndice autom谩ticamente
41
+ INDEX_NAME = 'vestidos'
42
+
43
+ # Obtener la clave API de Pinecone
44
+ PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
45
+
46
+ def connect_to_pinecone(index_name):
47
+ global INDEX_NAME
48
+ try:
49
+ pc = Pinecone(api_key=PINECONE_API_KEY)
50
+ index = pc.Index(index_name)
51
+
52
+ # Asegurarse de que la conexi贸n se establezca
53
+ index_stats = index.describe_index_stats()
54
+ print(f"Successfully connected to Pinecone index '{index_name}'!")
55
+ print("Index Stats:", index_stats)
56
+
57
+ # Actualizar la variable global INDEX_NAME
58
+ INDEX_NAME = index_name
59
+ print(f"Global INDEX_NAME updated to: {INDEX_NAME}")
60
+
61
+ except Exception as e:
62
+ print(f"Failed to connect to Pinecone index '{index_name}':", str(e))
63
+
64
+ # Conectar autom谩ticamente al 铆ndice "vestidos"
65
+ connect_to_pinecone(INDEX_NAME)
66
+
67
+ # Funci贸n para imprimir el nombre del 铆ndice actual (puede ser usada en otras celdas)
68
+ def print_current_index():
69
+ print(f"Current index name: {INDEX_NAME}")
70
+
71
+ # Verificar si las variables globales necesarias est谩n configuradas
72
+ if 'INDEX_NAME' not in globals() or INDEX_NAME is None:
73
+ raise ValueError("INDEX_NAME is not set. Please set the index name first.")
74
+
75
+ if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None:
76
+ raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.")
77
+
78
+ # Inicializar cliente de Pinecone
79
+ PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
80
+ pc = Pinecone(api_key=PINECONE_API_KEY)
81
+
82
+ # Inicializar el 铆ndice de Pinecone
83
+ index = pc.Index(INDEX_NAME)
84
+
85
+ # Obtener la dimensi贸n del 铆ndice
86
+ index_stats = index.describe_index_stats()
87
+ vector_dim = index_stats['dimension']
88
+ print(f"Index dimension: {vector_dim}")
89
+
90
+ # Definir manualmente los campos de contexto y enlace
91
+ CONTEXT_FIELDS = ['Etiqueta', 'Pregunta 1', 'Pregunta 2', 'Pregunta 3', 'Respuesta Combinada']
92
+ LINK_FIELDS = ['Etiqueta', 'Respuesta Combinada']
93
+
94
+ # Imprimir confirmaci贸n de campos seleccionados
95
+ print(f"Context fields set to: {CONTEXT_FIELDS}")
96
+ print(f"Link fields set to: {LINK_FIELDS}")
97
+
98
+ # Funci贸n para obtener las selecciones actuales de campos (puede ser usada en otras celdas)
99
+ def get_field_selections():
100
+ return {
101
+ "CONTEXT_FIELDS": CONTEXT_FIELDS,
102
+ "LINK_FIELDS": LINK_FIELDS
103
+ }
104
+
105
+ #####################################
106
+
107
+ # Check if required global variables are set
108
+ if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None:
109
+ raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.")
110
+ if 'INDEX_NAME' not in globals() or INDEX_NAME is None:
111
+ raise ValueError("INDEX_NAME is not set. Please create or select an index first.")
112
+ if 'CONTEXT_FIELDS' not in globals() or 'LINK_FIELDS' not in globals():
113
+ raise ValueError("CONTEXT_FIELDS and LINK_FIELDS are not set. Please run the field selection cell first.")
114
+
115
+ # Initialize the Sentence-Transformer model
116
+ embedding_model = SentenceTransformer(model_name)
117
+
118
+ # Initialize Pinecone with the API key and connect to the index
119
+ pinecone_client = Pinecone(api_key=PINECONE_API_KEY)
120
+ index = pinecone_client.Index(INDEX_NAME)
121
+
122
+ # Constants
123
+ LIMIT = 3750
124
+
125
+ def vector_search(query):
126
+ # Generate embedding using Sentence-Transformer model
127
+ xq = embedding_model.encode(query)
128
+
129
+ # Perform vector search on Pinecone index
130
+ res = index.query(vector=xq.tolist(), top_k=3, include_metadata=True)
131
+ if res['matches']:
132
+ return [
133
+ {
134
+ 'content': ' '.join(f"{k}: {v}" for k, v in match['metadata'].items() if k in CONTEXT_FIELDS and k != 'Etiqueta'),
135
+ 'metadata': match['metadata']
136
+ }
137
+ for match in res['matches']
138
+ if 'metadata' in match
139
+ ]
140
+ return []
141
+
142
+ def create_prompt(query, contexts):
143
+ prompt_start = "\n\nContexto:\n"
144
+ prompt_end = f"\n\nPregunta: {query}\nRespuesta:"
145
+
146
+ current_contexts = "\n\n---\n\n".join([context['content'] for context in contexts])
147
+ if len(prompt_start + current_contexts + prompt_end) >= LIMIT:
148
+ # Truncate contexts if they exceed the limit
149
+ available_space = LIMIT - len(prompt_start) - len(prompt_end)
150
+ truncated_contexts = current_contexts[:available_space]
151
+ return prompt_start + truncated_contexts + prompt_end
152
+ else:
153
+ return prompt_start + current_contexts + prompt_end
154
+
155
+ def complete(prompt):
156
+ return [f"Hola"]
157
+
158
+ def check_image_exists(filepath):
159
+ return os.path.exists(filepath)
160
+
161
+ def chat_function(message, history):
162
+ # Perform vector search
163
+ search_results = vector_search(message)
164
+
165
+ # Create prompt with relevant contexts
166
+ query_with_contexts = create_prompt(message, search_results)
167
+
168
+ # Generate response
169
+ response = complete(query_with_contexts)
170
+
171
+ partial_message = response[0].split("\n")[0] # Solo tomar la primera l铆nea de la respuesta
172
+
173
+ # Handle the logic for processing tags and images internally
174
+ relevant_links = [result['metadata'].get(field) for result in search_results for field in LINK_FIELDS if field in result['metadata']]
175
+ full_response = partial_message
176
+ image_url = None
177
+ tags_detected = []
178
+
179
+ filtered_links = []
180
+ if relevant_links:
181
+ for link in relevant_links:
182
+ if any(tag in link for tag in ["lila_61", "lila_63", "lila_62", "lila_64", "fuxia_70", "fuxia_71", "fuxia_72", "fuxia_73", "fuxia_74", "melon_68", "melon_66", "melon_67", "melon_65", "vino_19", "vino_20", "barney_69", "loro_27", "lacre_02", "amarillo_03", "amarillo_04", "azulino_11", "azulino_14", "azulino_12", "azulino_13", "beigs_09", "beigs_10", "beigs_07", "beigs_06", "beigs_08", "beigs_05", "marina_32", "marina_29", "marina_28", "marina_31", "marina_30", "rojo_26", "rojo_23", "rojo_21", "rojo_22", "rojo_25", "rojo_24", "celeste_40", "celeste_38", "celeste_39", "celeste_33", "celeste_35", "celeste_37", "celeste_41", "celeste_42", "celeste_34", "celeste_36", "sirenita_01", "marino_18", "marino_17", "marino_16", "marino_15", "rosa_87", "rosa_86", "rosa_79", "rosa_82", "rosa_83", "rosa_78", "rosa_84", "rosa_85", "rosa_75", "rosa_80", "rosa_81", "rosa_77", "rosa_76", "blanco_55", "blanco_56", "blanco_53", "blanco_52", "blanco_57", "blanco_49", "blanco_51", "blanco_60", "blanco_47", "blanco_44", "blanco_50", "blanco_48", "blanco_59", "blanco_43", "blanco_58", "blanco_46", "blanco_45", "blanco_54"]):
183
+ tags_detected.append(link) # Save the tag but don't display it
184
+ else:
185
+ filtered_links.append(link)
186
+
187
+ # Add the first relevant link under a single "Respuestas relevantes" section
188
+ if filtered_links:
189
+ full_response += f".\n\nTe detallamos nuestro contenido a continuaci贸n:\n" + filtered_links[0]
190
+
191
+ # Now handle the images based on the detected tags
192
+ tags_to_images = {
193
+ "lila_61": "/content/lila_61.jpeg",
194
+ "lila_63": "/content/lila_63.jpeg",
195
+ "lila_62": "/content/lila_62.jpeg",
196
+ "lila_64": "/content/lila_64.jpeg",
197
+ "fuxia_70": "/content/fuxia_70.jpeg",
198
+ "fuxia_71": "/content/fuxia_71.jpeg",
199
+ "fuxia_72": "/content/fuxia_72.jpeg",
200
+ "fuxia_73": "/content/fuxia_73.jpeg",
201
+ "fuxia_74": "/content/fuxia_74.jpeg",
202
+ "melon_68": "/content/melon_68.jpeg",
203
+ "melon_66": "/content/melon_66.jpeg",
204
+ "melon_67": "/content/melon_67.jpeg",
205
+ "melon_65": "/content/melon_65.jpeg",
206
+ "vino_19": "/content/vino_19.jpeg",
207
+ "vino_20": "/content/vino_20.jpeg",
208
+ "barney_69": "/content/barney_69.jpeg",
209
+ "loro_27": "/content/loro_27.png",
210
+ "lacre_02": "/content/lacre_02.jpeg",
211
+ "amarillo_03": "/content/amarillo_03.jpeg",
212
+ "amarillo_04": "/content/amarillo_04.jpeg",
213
+ "azulino_11": "/content/azulino_11.jpeg",
214
+ "azulino_14": "/content/azulino_14.jpeg",
215
+ "azulino_12": "/content/azulino_12.jpeg",
216
+ "azulino_13": "/content/azulino_13.jpeg",
217
+ "beigs_09": "/content/beigs_09.jpeg",
218
+ "beigs_10": "/content/beigs_10.jpeg",
219
+ "beigs_07": "/content/beigs_07.jpeg",
220
+ "beigs_06": "/content/beigs_06.jpeg",
221
+ "beigs_08": "/content/beigs_08.jpeg",
222
+ "beigs_05": "/content/beigs_05.jpeg",
223
+ "marina_32": "/content/marina_32.jpeg",
224
+ "marina_29": "/content/marina_29.jpeg",
225
+ "marina_28": "/content/marina_28.jpeg",
226
+ "marina_31": "/content/marina_31.jpeg",
227
+ "marina_30": "/content/marina_30.jpeg",
228
+ "rojo_26": "/content/rojo_26.jpeg",
229
+ "rojo_23": "/content/rojo_23.jpeg",
230
+ "rojo_21": "/content/rojo_21.jpeg",
231
+ "rojo_22": "/content/rojo_22.jpeg",
232
+ "rojo_25": "/content/rojo_25.jpeg",
233
+ "rojo_24": "/content/rojo_24.jpeg",
234
+ "celeste_40": "/content/celeste_40.jpeg",
235
+ "celeste_38": "/content/celeste_38.jpeg",
236
+ "celeste_39": "/content/celeste_39.jpeg",
237
+ "celeste_33": "/content/celeste_33.jpeg",
238
+ "celeste_35": "/content/celeste_35.jpeg",
239
+ "celeste_37": "/content/celeste_37.jpeg",
240
+ "celeste_41": "/content/celeste_41.jpeg",
241
+ "celeste_42": "/content/celeste_42.jpeg",
242
+ "celeste_34": "/content/celeste_34.jpeg",
243
+ "celeste_36": "/content/celeste_36.jpeg",
244
+ "sirenita_01": "/content/sirenita_01.png",
245
+ "marino_18": "/content/marino_18.jpeg",
246
+ "marino_17": "/content/marino_17.jpeg",
247
+ "marino_16": "/content/marino_16.jpeg",
248
+ "marino_15": "/content/marino_15.jpeg",
249
+ "rosa_87": "/content/rosa_87.jpeg",
250
+ "rosa_86": "/content/rosa_86.png",
251
+ "rosa_79": "/content/rosa_79.jpeg",
252
+ "rosa_82": "/content/rosa_82.png",
253
+ "rosa_83": "/content/rosa_83.jpeg",
254
+ "rosa_78": "/content/rosa_78.jpeg",
255
+ "rosa_84": "/content/rosa_84.jpeg",
256
+ "rosa_85": "/content/rosa_85.jpeg",
257
+ "rosa_75": "/content/rosa_75.jpeg",
258
+ "rosa_80": "/content/rosa_80.png",
259
+ "rosa_81": "/content/rosa_81.png",
260
+ "rosa_77": "/content/rosa_77.jpeg",
261
+ "rosa_76": "/content/rosa_76.png",
262
+ "blanco_55": "/content/blanco_55.jpeg",
263
+ "blanco_56": "/content/blanco_56.jpeg",
264
+ "blanco_53": "/content/blanco_53.jpeg",
265
+ "blanco_52": "/content/blanco_52.jpeg",
266
+ "blanco_57": "/content/blanco_57.jpeg",
267
+ "blanco_49": "/content/blanco_49.jpeg",
268
+ "blanco_51": "/content/blanco_51.jpeg",
269
+ "blanco_60": "/content/blanco_60.jpeg",
270
+ "blanco_47": "/content/blanco_47.jpeg",
271
+ "blanco_44": "/content/blanco_44.jpeg",
272
+ "blanco_50": "/content/blanco_50.jpeg",
273
+ "blanco_48": "/content/blanco_48.jpeg",
274
+ "blanco_59": "/content/blanco_59.jpeg",
275
+ "blanco_43": "/content/blanco_43.jpeg",
276
+ "blanco_58": "/content/blanco_58.png",
277
+ "blanco_46": "/content/blanco_46.jpeg",
278
+ "blanco_45": "/content/blanco_45.jpeg",
279
+ "blanco_54": "/content/blanco_54.jpeg",
280
+ }
281
+
282
+
283
+ for tag in tags_detected:
284
+ for key, path in tags_to_images.items():
285
+ if key in tag and check_image_exists(path):
286
+ image_url = path
287
+ break
288
+
289
+ return full_response, image_url
290
+
291
+
292
+ def update_image(image_url):
293
+ if image_url:
294
+ return image_url
295
+ else:
296
+ return None
297
+
298
+ # Gradio layout setup
299
+ with gr.Blocks() as demo:
300
+ with gr.Row():
301
+ with gr.Column(scale=1):
302
+ chatbot_input = gr.Textbox(label="Tu mensaje")
303
+ chatbot_output = gr.Chatbot(label="ChatBot")
304
+ chatbot_history = gr.State(value=[])
305
+ image_url = gr.State(value=None)
306
+ submit_button = gr.Button("Enviar")
307
+ with gr.Column(scale=1):
308
+ image_output = gr.Image(label="Imagen asociada")
309
+
310
+ def process_input(message, history):
311
+ full_response, image = chat_function(message, history)
312
+ history.append((message, full_response))
313
+ return history, history, image
314
+
315
+ submit_button.click(process_input, inputs=[chatbot_input, chatbot_history], outputs=[chatbot_output, chatbot_history, image_url])
316
+ image_url.change(fn=update_image, inputs=image_url, outputs=image_output)
317
+
318
+ # Launch the interface
319
+ demo.launch(debug=True)
320
+