rahgadda commited on
Commit
e02721d
1 Parent(s): 7b2240e

Initial Draft

Browse files
Files changed (1) hide show
  1. train.py +750 -0
train.py ADDED
@@ -0,0 +1,750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import time
3
+ import gradio as gr
4
+ from weaviate.client import Client
5
+ from pypdf import PdfReader
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ import tempfile
8
+ import pandas as pd
9
+ from bs4 import BeautifulSoup
10
+ from sentence_transformers import SentenceTransformer
11
+
12
+ ############################
13
+ ### Variable Declaration ###
14
+ ############################
15
+
16
+ # -- UI Variables
17
+ # Product
18
+ ui_product_name=gr.Textbox(placeholder="Product Name, OFSLL",label="Product Name")
19
+ ui_product_description=gr.Textbox(placeholder="Product Desc, Oracle Financial Lending and Leasing",label="Product Description")
20
+ ui_product_prompt=gr.Textbox(placeholder="Prompt,what {text} w.r.t OFSLL",label="Prompt")
21
+ ui_product_um=gr.File(label="Upload User Manual", file_types=[".pdf"])
22
+ ui_product_mapping=gr.File(label="Upload Mapping Excel", file_types=[".xlsx"])
23
+
24
+ # Env Variables
25
+ ui_model_name=gr.Textbox(placeholder="Semantic Search Model, https://www.sbert.net/docs/pretrained_models.html#semantic-search",label="Semantic Search Model")
26
+ ui_weaviate_url=gr.Textbox(placeholder="Weaviate URL, https://weaviate.xxx",label="Weaviate URL")
27
+
28
+ # Output
29
+ ui_output=gr.Textbox(lines=22,label="Output")
30
+
31
+
32
+ # -- Placeholder Variables
33
+ p_inputs = [
34
+ ui_model_name,
35
+ ui_weaviate_url,
36
+ ui_product_name,
37
+ ui_product_description,
38
+ ui_product_prompt,
39
+ ui_product_um,
40
+ ui_product_mapping
41
+ ]
42
+
43
+ # -- Global variables
44
+ g_ui_model_name=""
45
+ g_product_name=""
46
+ g_product_description=""
47
+ g_product_prompt=""
48
+ g_output=""
49
+ g_weaviate_url=""
50
+ g_client=None
51
+
52
+ ############################
53
+ ###### Generic Code #######
54
+ ############################
55
+
56
+ # -- Updating global variables
57
+ def update_global_variables(ui_model_name, ui_weaviate_url, ui_product_name, ui_product_description, ui_product_prompt):
58
+ global g_ui_model_name
59
+ global g_weaviate_url
60
+ global g_product_name
61
+ global g_product_description
62
+ global g_product_prompt
63
+ global g_output
64
+
65
+ # Reset values to defaults
66
+ g_ui_model_name=""
67
+ g_weaviate_url=""
68
+ g_product_name=""
69
+ g_product_description=""
70
+ g_product_prompt=""
71
+
72
+ print("started function - update_global_variables")
73
+
74
+ try:
75
+ # Setting g_ui_model_name
76
+ if ui_model_name != "":
77
+ print('Setting g_ui_model_name - '+ui_model_name)
78
+ g_ui_model_name=ui_model_name
79
+ g_output=g_output+'Setting g_ui_model_name - '+ui_model_name
80
+ else:
81
+ print("exception in function - update_global_variables")
82
+ raise ValueError('Required Sbert Model Name')
83
+
84
+ # Setting g_weaviate_url
85
+ if ui_weaviate_url != "":
86
+ print('Setting g_weaviate_url - '+ui_weaviate_url)
87
+ g_weaviate_url=ui_weaviate_url
88
+ g_output=g_output+'\nSetting g_weaviate_url - '+ui_weaviate_url
89
+ else:
90
+ print("exception in function - update_global_variables")
91
+ raise ValueError('Required Weaviate VectorDB URL')
92
+
93
+ # Setting g_product_name
94
+ if ui_product_name != "":
95
+ print('Setting g_product_name - '+ui_product_name)
96
+ g_product_name=ui_product_name
97
+ g_output=g_output+'\nSetting g_product_name - '+ui_product_name
98
+ else:
99
+ print("exception in function - update_global_variables")
100
+ raise ValueError('Required Product Name')
101
+
102
+ # Setting g_product_description
103
+ if ui_product_description != "":
104
+ print('Setting g_product_description - '+ui_product_description)
105
+ g_product_description=ui_product_description
106
+ g_output=g_output+'\nSetting g_product_description - '+ui_product_description
107
+ else:
108
+ print("exception in function - update_global_variables")
109
+ raise ValueError('Required Product Description')
110
+
111
+ # Setting g_product_prompt
112
+ if ui_product_prompt != "":
113
+ print('Setting g_product_prompt - '+ui_product_prompt)
114
+ g_product_prompt=ui_product_prompt
115
+ g_output=g_output+'\nSetting g_product_prompt - '+ui_product_prompt
116
+ else:
117
+ print("No prompting specified")
118
+ g_output=g_output+'\nNo values set for g_product_prompt'
119
+
120
+ finally:
121
+ print("completed function - update_global_variables")
122
+
123
+ # -- Create Weaviate Connection
124
+ def weaviate_client():
125
+ global g_client
126
+ global g_output
127
+ global g_weaviate_url
128
+
129
+ try:
130
+ g_client = Client(url=g_weaviate_url, timeout_config=(3.05, 9.1))
131
+ print("Weaviate client connected successfully!")
132
+ g_output=g_output+"Weaviate client connected successfully!"
133
+ except Exception as e:
134
+ print("Failed to connect to the Weaviate instance."+str(e))
135
+ raise ValueError('Failed to connect to the Weaviate instance.')
136
+
137
+ # -- Convert input to CamelCase
138
+ def convert_to_camel_case(string):
139
+ words = string.split('_')
140
+ camel_case_words = [word.capitalize() for word in words]
141
+ return ''.join(camel_case_words)
142
+
143
+ # -- Create Sbert Embedding
144
+ def creating_embeddings(sentences):
145
+ global g_ui_model_name
146
+ # print("Creating embedding for text"+ sentences)
147
+
148
+ # Create OpenAI embeddings
149
+ model = SentenceTransformer(g_ui_model_name)
150
+ embeddings = model.encode(sentences)
151
+
152
+ # for sentence, embedding in zip(sentences, embeddings):
153
+ # print(embedding) # numpy.ndarray
154
+ # print(embeddings.shape)
155
+
156
+ return embeddings
157
+
158
+ # -- Generate OpenAI Description
159
+ def generate_openAI_description(key,prompt):
160
+
161
+ text = prompt.replace('{text}', key)
162
+
163
+ # Generate text using the OpenAI model
164
+ response = openai.Completion.create(
165
+ engine='text-davinci-003',
166
+ prompt=text,
167
+ max_tokens=1000
168
+ )
169
+
170
+ openai_data = response.choices[0].text.strip()
171
+
172
+ # Extract text from HTML using BeautifulSoup
173
+ soup = BeautifulSoup(openai_data, 'html.parser')
174
+ clean_text = soup.get_text(separator=' ')
175
+
176
+ return clean_text
177
+
178
+ ############################
179
+ ##### Create Product DB ####
180
+ ############################
181
+
182
+ # -- Check for Product Class/Table
183
+ def create_product_class():
184
+ global g_client
185
+ global g_output
186
+
187
+ print("started function - create_product_class")
188
+
189
+ # Define the class "Product" with properties name,description
190
+ product_class = {
191
+ "classes": [{
192
+ "class": "Product",
193
+ "description": "Store Product Names and Description",
194
+ "vectorizer": "none",
195
+ "properties": [
196
+ {
197
+ "name": "name",
198
+ "dataType": ["text"],
199
+ "description": "Product Name"
200
+ },
201
+ {
202
+ "name": "description",
203
+ "dataType": ["text"],
204
+ "description": "Product Description"
205
+ },
206
+ {
207
+ "name": "prompt",
208
+ "dataType": ["text"],
209
+ "description": "Prompt variable to store mapping description. This is non-mandatory"
210
+ },
211
+ {
212
+ "name": "um_indicator",
213
+ "dataType": ["text"],
214
+ "description": "Indicator to check in User Manual exist"
215
+ }
216
+ ]
217
+ }]
218
+ }
219
+
220
+ # Create the class in Weaviate
221
+ try:
222
+ response = g_client.schema.create(product_class)
223
+ g_output=g_output+"Class 'Product' created successfully!\n"
224
+ print("Class 'Product' created successfully!")
225
+ except Exception as e:
226
+ g_output=g_output+f"Failed to create class 'Product': {e}"+"\n"
227
+ print(f"Failed to create class 'Product': {e}")
228
+ raise ValueError(str(e))
229
+ finally:
230
+ print("completed function - create_product_class")
231
+
232
+ # -- Check for Product Object/Row
233
+ def validate_product_object_exist():
234
+ global g_client
235
+ global g_product_name
236
+ global g_output
237
+
238
+ print("started function - validate_product_object_exist")
239
+
240
+ # Check if data exists based on input - product_name
241
+ where_filter = {
242
+ "path": ["name"],
243
+ "operator": "Equal",
244
+ "valueString": g_product_name
245
+ }
246
+
247
+ query_result = (
248
+ g_client.query
249
+ .get("Product", "name")
250
+ .with_where(where_filter)
251
+ .do()
252
+ )
253
+
254
+ print("Product Table Query Result - "+str(query_result))
255
+ if len(query_result['data']['Get']['Product']) == 0:
256
+ g_output=g_output+"Product object does not exists\n"
257
+ print("completed function - validate_product_object_exist")
258
+ return True
259
+ else:
260
+ g_output=g_output+"Product object already exists\n"
261
+ print("completed function - validate_product_object_exist")
262
+ return False
263
+
264
+ # -- Create new Product Object/Row
265
+ def create_new_product_object():
266
+ global g_client
267
+ global g_product_name
268
+ global g_product_description
269
+ global g_product_prompt
270
+ global g_output
271
+
272
+ print("started function - create_new_product_object")
273
+ try:
274
+ data_object = {
275
+ "name": g_product_name,
276
+ "description": g_product_description,
277
+ "prompt": g_product_prompt,
278
+ "um_indicator": 'N'
279
+ }
280
+
281
+ g_client.data_object.create(data_object, class_name="Product")
282
+ print("Product object Created Successfully")
283
+ g_output=g_output+"Product object Created Successfully\n"
284
+ except Exception as e:
285
+ raise ValueError("Creating Product Object"+str(e))
286
+ finally:
287
+ print("completed function - create_new_product_object")
288
+
289
+ # -- Add Product Object/Row
290
+ def add_product_data():
291
+ global g_product_name
292
+ global g_product_description
293
+ global g_client
294
+ global g_output
295
+
296
+ print("started function - add_product_data")
297
+
298
+ # -- Check if Product Table Exist
299
+ try:
300
+ g_client.schema.get("Product")
301
+ print("Class 'Product' already exists!")
302
+ g_output=g_output+"Class 'Product' already exists!\n"
303
+ except Exception as e:
304
+ print(f"Error Verifying Class Product : {e}")
305
+ create_product_class()
306
+
307
+ # -- Check & Create new Product Object
308
+ if validate_product_object_exist():
309
+ create_new_product_object()
310
+
311
+ print("completed function - add_product_data")
312
+
313
+ ############################
314
+ ##### Create Product UM ####
315
+ ############################
316
+
317
+ # -- Check for User Manual Class/Table
318
+ def create_um_class():
319
+ global g_product_name
320
+ global g_client
321
+ global g_output
322
+
323
+ print("started function - create_um_class")
324
+ product_class_name_camel_case = convert_to_camel_case(str(g_product_name+"_um"))
325
+ print("Creating UM Artefact of "+product_class_name_camel_case)
326
+
327
+ # Define the class with `ProductUm` to store user manual details
328
+ product_um = {
329
+ "classes": [{
330
+ "class": product_class_name_camel_case,
331
+ "description": "Vector store of "+g_product_name+" user manual",
332
+ "vectorizer": "none",
333
+ "properties": [
334
+ {
335
+ "name": "content",
336
+ "dataType": ["text"],
337
+ "description": "Store product "+g_product_name+" user manual details"
338
+ },
339
+ {
340
+ "name": "page_no",
341
+ "dataType": ["int"],
342
+ "description": "Page number in user manual details"
343
+ }
344
+ ]
345
+ }]
346
+ }
347
+
348
+ # Create the class in Weaviate
349
+ try:
350
+ response = g_client.schema.create(product_um)
351
+ g_output=g_output+"Class '"+product_class_name_camel_case+"' created successfully!\n"
352
+ print("Class '"+str(product_um)+"' created successfully!")
353
+ except Exception as e:
354
+ g_output=g_output+f"Failed to create class '"+str(product_um)+"': {e}"+"\n"
355
+ print(f"Failed to create class '"+str(product_um)+"': {e}")
356
+ raise ValueError(str(e))
357
+ finally:
358
+ print("completed function - create_um_class")
359
+
360
+ # -- Check for User Manual Object/Row
361
+ def validate_um_object_exist():
362
+ global g_client
363
+ global g_product_name
364
+ global g_output
365
+ return_val=False
366
+
367
+ print("started function - validate_um_object_exist")
368
+ product_class_name_camel_case = convert_to_camel_case(str(g_product_name+"_um"))
369
+
370
+ try:
371
+ schema = g_client.schema.get()
372
+ classes = schema['classes']
373
+
374
+ # Check if the class exists in the schema
375
+ if any(cls['class'] == product_class_name_camel_case for cls in classes):
376
+ g_output=g_output+"Class "+product_class_name_camel_case+" exists in Weaviate.\n"
377
+ print("Class "+product_class_name_camel_case+" exists in Weaviate.")
378
+ return_val = True
379
+ else:
380
+ g_output=g_output+"Class "+product_class_name_camel_case+" does not exists in Weaviate.\n"
381
+ print("Class "+product_class_name_camel_case+" does not exist in Weaviate.")
382
+
383
+ except Exception as e:
384
+ g_output=g_output+f"Failed to retrieve schema: {e}"+"\n"
385
+ print(f"Failed to retrieve schema: {e}"+"\n")
386
+ raise ValueError(str(e))
387
+ finally:
388
+ print("completed function - validate_um_object_exist")
389
+ return return_val
390
+
391
+ # -- Delete User Manual Class/Table
392
+ def delete_um_class():
393
+ global g_client
394
+ global g_product_name
395
+ global g_output
396
+
397
+ print("started function - delete_um_class")
398
+ product_class_name_camel_case = convert_to_camel_case(str(g_product_name+"_um"))
399
+
400
+ try:
401
+ g_client.schema.delete_class(product_class_name_camel_case)
402
+ print("Class "+product_class_name_camel_case+" deleted successfully.")
403
+ g_output=g_output+"Class "+product_class_name_camel_case+" deleted successfully.\n"
404
+ except Exception as e:
405
+ print(f"Failed to delete class: {e}")
406
+ g_output=g_output+f"Failed to delete class: {e}"+"\n"
407
+ raise ValueError(str(e))
408
+ finally:
409
+ print("completed function - delete_um_class")
410
+
411
+ # -- Create new User Manual Object/Row
412
+ def create_new_um_object(item):
413
+ global g_client
414
+ global g_product_name
415
+
416
+ print("started function - create_new_um_object")
417
+ print("Storing UM chunk data into Weaviate")
418
+
419
+ data_object = {
420
+ "content": item['text'],
421
+ 'page_no': item['page_no']
422
+ }
423
+ try:
424
+ # Add the object to Weaviate
425
+ g_client.data_object.create(data_object, class_name=convert_to_camel_case(str(g_product_name+"_um")),vector=item['embedding'])
426
+ except Exception as e:
427
+ print("Error storing UM chunk")
428
+ raise ValueError(str(e))
429
+ finally:
430
+ print("completed function - create_new_um_object")
431
+
432
+ # -- Extract text from PDF file
433
+ def extract_text_from_pdf(file):
434
+ file_path = file.name
435
+
436
+ print("started function - extract_text_from_pdf")
437
+ print("Uploaded pdf location - "+file_path)
438
+
439
+ # Text Splitter
440
+ text_splitter = CharacterTextSplitter(
441
+ chunk_size = 1000,
442
+ chunk_overlap = 0,
443
+ length_function = len,
444
+ )
445
+
446
+ # Read the PDF file page by page
447
+ try:
448
+ item = {}
449
+ with open(file_path, "rb") as pdf_file:
450
+ pdf = PdfReader(pdf_file)
451
+ for page_no, page in enumerate(pdf.pages, start=1):
452
+ text = page.extract_text()
453
+
454
+ # Merge hyphenated words
455
+ text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
456
+
457
+ # Fix newlines in the middle of sentences
458
+ text = re.sub(r"(?<!\n\s)\n(?!\s\n)", " ", text.strip())
459
+
460
+ # Remove multiple newlines
461
+ text = re.sub(r"\n\s*\n", "\n\n", text)
462
+
463
+ print('Processing Page Content - '+str(page_no))
464
+
465
+ if text:
466
+ # Split the text into smaller chunks
467
+ chunks = text_splitter.split_text(text)
468
+
469
+ # Process each chunk individually
470
+ for chunk in chunks:
471
+ item = {
472
+ 'text': chunk,
473
+ 'embedding': creating_embeddings(chunk),
474
+ 'page_no': page_no
475
+ }
476
+
477
+ create_new_um_object(item)
478
+ except Exception as e:
479
+ raise ValueError(str(e))
480
+
481
+ print("completed function - extract_text_from_pdf")
482
+
483
+ # -- Process User Manual
484
+ def process_um_data(file):
485
+
486
+ # If um table/class exists, system will delete and recreate
487
+ if validate_um_object_exist():
488
+ delete_um_class()
489
+
490
+ if not(validate_um_object_exist()):
491
+ create_um_class()
492
+ extract_text_from_pdf(file)
493
+
494
+ ############################
495
+ #### Create Product Map ####
496
+ ############################
497
+
498
+ # -- Check for Mapping Class/Table
499
+ def create_mapping_class():
500
+ global g_product_name
501
+ global g_client
502
+ global g_output
503
+
504
+ print("started function - create_mapping_class")
505
+ product_class_name_camel_case = convert_to_camel_case(str(g_product_name+"_mapping"))
506
+ print("Creating Mapping Artefact of "+product_class_name_camel_case)
507
+
508
+ # Define the class with `ProductMapping` to store user manual details
509
+ product_mapping = {
510
+ "classes": [{
511
+ "class": product_class_name_camel_case,
512
+ "description": "Vector store of "+g_product_name+" mapping",
513
+ "vectorizer": "none",
514
+ "properties": [
515
+ {
516
+ "name": "key",
517
+ "dataType": ["text"],
518
+ "description": "Key Column"
519
+ },
520
+ {
521
+ "name": "description",
522
+ "dataType": ["text"],
523
+ "description": "Description of Master Table Key Data"
524
+ }
525
+ ]
526
+ }]
527
+ }
528
+
529
+ # Create the class in Weaviate
530
+ try:
531
+ response = g_client.schema.create(product_mapping)
532
+ g_output=g_output+"Class '"+product_class_name_camel_case+"' created successfully!\n"
533
+ print("Class '"+str(product_mapping)+"' created successfully!")
534
+ except Exception as e:
535
+ g_output=g_output+f"Failed to create class '"+str(product_mapping)+"': {e}"+"\n"
536
+ print(f"Failed to create class '"+str(product_mapping)+"': {e}")
537
+ raise ValueError(str(e))
538
+ finally:
539
+ print("completed function - create_mapping_class")
540
+
541
+ # -- Check for Mapping Class/Table
542
+ def delete_mapping_class():
543
+ global g_client
544
+ global g_product_name
545
+ global g_output
546
+
547
+ print("started function - delete_mapping_class")
548
+ product_class_name_camel_case = convert_to_camel_case(str(g_product_name+"_mapping"))
549
+
550
+ try:
551
+ g_client.schema.delete_class(product_class_name_camel_case)
552
+ print("Class "+product_class_name_camel_case+" deleted successfully.")
553
+ g_output=g_output+"Class "+product_class_name_camel_case+" deleted successfully.\n"
554
+ except Exception as e:
555
+ print(f"Failed to delete class: {e}")
556
+ g_output=g_output+f"Failed to delete class: {e}"+"\n"
557
+ raise ValueError(str(e))
558
+ finally:
559
+ print("completed function - delete_mapping_class")
560
+
561
+ # -- Check for Mapping Object/Row
562
+ def validate_mapping_object_exist():
563
+ global g_client
564
+ global g_product_name
565
+ global g_output
566
+ return_val=False
567
+
568
+ print("started function - validate_mapping_object_exist")
569
+ product_class_name_camel_case = convert_to_camel_case(str(g_product_name+"_mapping"))
570
+
571
+ try:
572
+ schema = g_client.schema.get()
573
+ classes = schema['classes']
574
+
575
+ # Check if the class exists in the schema
576
+ if any(cls['class'] == product_class_name_camel_case for cls in classes):
577
+ g_output=g_output+"Class "+product_class_name_camel_case+" exists in Weaviate.\n"
578
+ print("Class "+product_class_name_camel_case+" exists in Weaviate.")
579
+ return_val = True
580
+ else:
581
+ g_output=g_output+"Class "+product_class_name_camel_case+" does not exists in Weaviate.\n"
582
+ print("Class "+product_class_name_camel_case+" does not exist in Weaviate.")
583
+
584
+ except Exception as e:
585
+ g_output=g_output+f"Failed to retrieve schema: {e}"+"\n"
586
+ print(f"Failed to retrieve schema: {e}"+"\n")
587
+ raise ValueError(str(e))
588
+ finally:
589
+ print("completed function - validate_mapping_object_exist")
590
+ return return_val
591
+
592
+ # -- Create new Mapping Object/Row
593
+ def create_new_mapping_object(item):
594
+ global g_client
595
+ global g_product_name
596
+
597
+ print("started function - create_new_mapping_object")
598
+ print("Storing mapping data into Weaviate")
599
+
600
+ data_object = {
601
+ "key": item['key'],
602
+ "description": item['description']
603
+ }
604
+ try:
605
+ # Add the object to Weaviate
606
+ g_client.data_object.create(data_object, class_name=convert_to_camel_case(str(g_product_name+"_mapping")),vector=item['embedding'])
607
+ except Exception as e:
608
+ print("Error storing mapping record/object")
609
+ raise ValueError(str(e))
610
+ finally:
611
+ print("completed function - create_new_mapping_object")
612
+
613
+ # -- Extract text from Excel Mapping File
614
+ def extract_text_from_xlsx(file):
615
+ global g_product_prompt
616
+
617
+ file_path = file.name
618
+
619
+ print("started function - extract_text_from_xlsx")
620
+ print("Uploaded xlsx location - "+file_path)
621
+
622
+ try:
623
+ # Read all tabs from the Excel file into a dictionary of dataframes
624
+ dfs = pd.read_excel(file_path, sheet_name=None)
625
+
626
+ # Create an empty dictionary to store the combined values
627
+ combined_values = {}
628
+
629
+ # Loop through each dataframe in the dictionary
630
+ for sheet_name, df in dfs.items():
631
+ # Get the column names and hints from the dataframe
632
+ column_names = df['Column Name'].tolist()
633
+ hints = df['Hint'].tolist()
634
+
635
+ # Combine the values and add them to the dictionary
636
+ combined_values.update({f"{sheet_name}.{column_names}": f"{hint}" for column_names, hint in zip(column_names, hints)})
637
+
638
+ # Print the combined values
639
+ item={}
640
+ for key, value in combined_values.items():
641
+
642
+ print(f"Key: {key}")
643
+ print(f"Initial Value: {value}")
644
+
645
+ # if g_product_prompt != "":
646
+ # value=value+" "+generate_openAI_description(key,g_product_prompt)
647
+ # print(f"Update Value: {value}")
648
+
649
+ print("-------------------------")
650
+ item= {
651
+ 'key':key,
652
+ 'description': value,
653
+ 'embedding': creating_embeddings(value)
654
+ }
655
+
656
+ create_new_mapping_object(item)
657
+
658
+ except Exception as e:
659
+ raise ValueError(str(e))
660
+ finally:
661
+ print("completed function - extract_text_from_xlsx")
662
+
663
+ # -- Process Mapping Excel Data
664
+ def process_mapping_data(file):
665
+
666
+ # If um table/class exists, system will delete and recreate
667
+ if validate_mapping_object_exist():
668
+ delete_mapping_class()
669
+
670
+ if not(validate_mapping_object_exist()):
671
+ create_mapping_class()
672
+ extract_text_from_xlsx(file)
673
+
674
+ ############################
675
+ ###### Submit Button #######
676
+ ############################
677
+
678
+ # -- On Click of Submit Button in UI
679
+ def submit(ui_model_name, ui_weaviate_url, ui_product_name, ui_product_description, ui_product_prompt, ui_product_um, ui_product_mapping):
680
+ global g_output
681
+
682
+ print("\n>>> Started Training <<<")
683
+ g_output=""
684
+
685
+ if ui_model_name != "" or ui_product_name != "" or ui_product_description != "":
686
+ try:
687
+ # Setting Global Variables
688
+ g_output=">>> 1 - Setting Variables <<<\n"
689
+ print(">>> 1 - Setting Variables <<<")
690
+ update_global_variables(ui_model_name, ui_weaviate_url, ui_product_name, ui_product_description, ui_product_prompt)
691
+ g_output=g_output+"\n>>> 1 - Completed <<<\n"
692
+ print(">>> 1 - Completed <<<\n")
693
+
694
+ # Validate Weaviate Connection
695
+ g_output=g_output+"\n>>> 2 - Validate Weaviate Connection <<<\n"
696
+ print(">>> 2 - Validate Weaviate Connection <<<")
697
+ weaviate_client()
698
+ g_output=g_output+"\n>>> 2 - Completed <<<\n"
699
+ print(">>> 2 - Completed <<<\n")
700
+
701
+ # Create Product Class & Object
702
+ g_output=g_output+"\n>>> 3 - Create Product Class & Object <<<\n"
703
+ print(">>> 3 - Create Product Class & Object <<<")
704
+ add_product_data()
705
+ g_output=g_output+">>> 3 - Completed <<<\n"
706
+ print(">>> 3 - Completed <<<\n")
707
+
708
+ # Create UM Class & Object is file is inputted
709
+ g_output=g_output+"\n>>> 4 - Create UserManual Class & Object <<<\n"
710
+ print(">>> 4 - Create UserManual Class & Object <<<")
711
+ process_um_data(ui_product_um)
712
+ g_output=g_output+">>> 4 - Completed <<<\n"
713
+ print(">>> 4 - Completed <<<\n")
714
+
715
+ # Create Mapping Class & Object is file is inputted
716
+ g_output=g_output+"\n>>> 5 - Create Mapping Class & Object <<<\n"
717
+ print(">>> 5 - Create Mapping Class & Object <<<")
718
+ process_mapping_data(ui_product_mapping)
719
+ g_output=g_output+">>> 5 - Completed <<<\n"
720
+ print(">>> 5 - Completed <<<\n")
721
+
722
+ except Exception as e:
723
+ print("Error -> " + str(e))
724
+ print(">>> Completed Training <<<\n")
725
+ return g_output+"Error -> " + str(e)
726
+ else:
727
+ print(">>> Completed Training <<<\n")
728
+ g_output="Welcome to Migration Assistance Training Bot !!!\n" \
729
+ "Enter input value to proceed"
730
+
731
+ return g_output
732
+
733
+ # -- Start of Program - Main
734
+ def main():
735
+ global p_inputs
736
+ global ui_output
737
+
738
+ interface=gr.Interface(
739
+ fn=submit,
740
+ inputs=p_inputs,
741
+ outputs=ui_output,
742
+ allow_flagging="never"
743
+ )
744
+
745
+ tempfile.SpooledTemporaryFile = tempfile.TemporaryFile
746
+ interface.queue().launch(server_name="0.0.0.0",server_port=8081)
747
+
748
+ # -- Calling Main Function
749
+ if __name__ == '__main__':
750
+ main()