nouf-sst commited on
Commit
0224aa7
·
verified ·
1 Parent(s): c794041

Add refactoring component

Browse files
Files changed (1) hide show
  1. app.py +415 -143
app.py CHANGED
@@ -156,12 +156,23 @@ def get_long_elements(elements, size_threshold): # Using RegEx
156
  for i in range(0, len(elements[key])):
157
  if len(re. findall(r'\w+', elements[key][i])) > size_threshold:
158
  long_elements.append(elements[key][i])
159
-
160
  if long_elements:
161
- long_elements = "\n".join(long_elements)
162
- return "Long elements:\n" + long_elements
 
 
 
 
 
 
 
 
 
 
 
163
  else:
164
- return "Long elements:\nNone."
165
  # #####################################
166
 
167
  # ######### Complex Sentences #########
@@ -178,18 +189,29 @@ def is_complex_sentence(sentence):
178
 
179
  def get_complex_sentences(elements):
180
 
181
- complex_sentences = []
182
-
183
  for key, value in elements.items():
184
  for i in range(0, len(elements[key])):
185
  if is_complex_sentence(elements[key][i]):
186
  complex_sentences.append(elements[key][i])
187
-
188
  if complex_sentences:
189
- complex_sentences = "\n".join(complex_sentences)
190
- return "Complex sentences:\n" + complex_sentences
 
 
 
 
 
 
 
 
 
 
 
191
  else:
192
- return "Complex sentences:\nNone."
193
 
194
  # #####################################
195
 
@@ -199,103 +221,200 @@ def get_punctuations(elements):
199
  punctuations = []
200
 
201
  for key, value in elements.items():
202
- for i in range(0, len(elements[key])):
203
- if len(re.findall("[^\s\w\d-]", elements[key][i])) > 0:
204
- punctuations.append(elements[key][i])
205
 
206
  if punctuations:
207
- punctuations = "\n".join(punctuations)
208
- return "Punctuations:\n" + punctuations
 
 
 
 
 
 
 
 
209
  else:
210
- return "Punctuations:\nNone."
211
  # #################################
212
 
213
  # ########## Incorrect Actor Syntax ##########
214
- def find_non_NPs(sentences):
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer)
217
 
218
- outputs = pipeline(sentences)
219
 
220
- Non_NPs = []
221
 
222
- for idx, output in enumerate(outputs):
223
- if output[0]['entity'].startswith('V'):
224
- Non_NPs.append(sentences[idx])
225
 
226
- return Non_NPs
227
 
228
  def check_actor_syntax(actors):
229
 
230
- incorrect_actor_syntax = find_non_NPs(actors)
231
-
232
- if incorrect_actor_syntax:
233
- incorrect_actor_syntax = "\n".join(incorrect_actor_syntax)
234
- return "Incorrect Actors Syntax:\n" + incorrect_actor_syntax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  else:
236
- return "All actors are syntactically correct."
237
  # ############################################
238
 
239
  # ########## Incorrect Goal Syntax ###########
240
  def check_goal_syntax(goals):
241
 
242
- incorrect_goal_syntax = find_non_NPs(goals)
243
-
244
- if incorrect_goal_syntax:
245
- incorrect_goal_syntax = "\n".join(incorrect_goal_syntax)
246
- return "Incorrect Goals Syntax:\n" + incorrect_goal_syntax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  else:
248
- return "All goals are syntactically correct."
249
  # ############################################
250
 
251
  # ########## Incorrect Softgoal Syntax ###########
252
  def check_softgoal_syntax(softgoals):
253
 
254
- incorrect_softgoal_syntax = find_non_NPs(softgoals)
255
-
256
- if incorrect_softgoal_syntax:
257
- incorrect_softgoal_syntax = "\n".join(incorrect_softgoal_syntax)
258
- return "Incorrect Softgoals Syntax:\n" + incorrect_softgoal_syntax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  else:
260
- return "All softgoal are syntactically correct."
261
  # ############################################
262
 
263
  # ########## Incorrect Task Syntax ###########
264
- def find_NPs(sentences):
265
 
266
- pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer)
267
 
268
- outputs = pipeline(sentences)
269
 
270
- NPs = []
271
 
272
- for idx, output in enumerate(outputs):
273
- if not output[0]['entity'].startswith('V'):
274
- NPs.append(sentences[idx])
275
 
276
- return NPs
277
 
278
  def check_task_syntax(tasks):
279
 
280
- incorrect_task_syntax = find_NPs(tasks)
281
-
282
- if incorrect_task_syntax:
283
- incorrect_task_syntax = "\n".join(incorrect_task_syntax)
284
- return "Incorrect Tasks Syntax:\n" + incorrect_task_syntax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  else:
286
- return "All tasks are syntactically correct."
287
  # ############################################
288
 
289
  # ########## Incorrect Resource Syntax ###########
290
  def check_resource_syntax(resources):
291
 
292
- incorrect_resource_syntax = find_non_NPs(resources)
293
-
294
- if incorrect_resource_syntax:
295
- incorrect_resource_syntax = "\n".join(incorrect_resource_syntax)
296
- return "Incorrect Resources Syntax:\n" + incorrect_resource_syntax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  else:
298
- return "All resources are syntactically correct."
299
  # ############################################
300
 
301
  # ########## Similarity ###########
@@ -309,56 +428,72 @@ def get_similar_elements(elements_per_actor, similarity_threshold):
309
  for i in range(len(elements_per_actor[key])):
310
  for j in range(i+1,len(elements_per_actor[key])):
311
  sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]])
312
-
313
- # Predict semantic similarity
314
  semantic_similarity_scores = sentences_similarity_model.predict(sentence_pairs, show_progress_bar=True)
315
 
316
  similar_elements = []
 
317
  for index, value in enumerate(sentence_pairs):
318
  if semantic_similarity_scores[index] > similarity_threshold:
319
  similar_elements.append(value)
 
320
  #semantic_similarity["pair_"+str(index+1)] = [value,semantic_similarity_scores[index]]
321
 
322
  if similar_elements:
323
- similar_elements = [' and '.join(ele) for ele in similar_elements]
324
- similar_elements = "\n".join(similar_elements)
325
- return "The following elements are semantically similar:\n" + similar_elements
 
 
 
 
326
  else:
327
- return "There are no similar elements."
328
 
329
  return semantic_similarity
330
  # #################################
331
 
332
  # ########## Misspelling ###########
333
- def get_misspelled_words(sentence):
334
 
335
- spell = Speller(only_replacements=True)
336
 
337
- misspelled= []
338
 
339
- for word in sentence.split():
340
- correct_word = spell(word)
341
- if word != correct_word:
342
- misspelled.append([word, correct_word])
343
 
344
- return misspelled
345
 
346
  def check_spelling(elements):
347
 
348
- spelling_mistakes = []
349
- spelling_mistakes_string = ""
350
 
351
  for key, value in elements.items():
352
- for i in range(0, len(elements[key])):
353
- if get_misspelled_words(elements[key][i]):
354
- spelling_mistakes.append([elements[key][i], get_misspelled_words(elements[key][i])])
355
-
356
- for idx, element in enumerate(spelling_mistakes):
357
- for spelling_mistake in element[1]:
358
- temp = ' should be written as '.join(spelling_mistake)
359
- spelling_mistakes_string = spelling_mistakes_string + "\n" + element[0] + ": " + temp
 
 
 
 
 
 
 
360
 
361
- return spelling_mistakes_string
 
 
 
 
362
  # ##################################
363
 
364
  # ########## NLI ###########
@@ -435,91 +570,228 @@ def check_entailment(decomposed_elements):
435
  return result
436
 
437
  # Contradiction
438
- def check_contradiction(elements_per_actor):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
- sentence_pairs = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  contradicting_elements = []
442
 
 
443
  for key, value in elements_per_actor.items():
444
 
445
  for i in range(len(elements_per_actor[key])):
446
  for j in range(i+1,len(elements_per_actor[key])):
447
  sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]])
448
-
449
- #print(sentence_pairs)
450
- # Check contradiction
451
  for sentence_pair in sentence_pairs:
452
- result = do_nli(sentence_pair[0], sentence_pair[1])
453
- #print(result)
454
- if result == "Contradiction":
455
- contradicting_elements.append(sentence_pair)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
  if contradicting_elements:
 
 
 
 
 
458
  contradicting_elements = [' and '.join(ele) for ele in contradicting_elements]
459
  contradicting_elements = "\n".join(contradicting_elements)
460
- return "The following elements are contradicting:\n" + contradicting_elements
 
461
  else:
462
- return "There are no contradicting elements."
463
  # ##########################
464
 
465
  # ************************* User Interface *************************
466
 
467
- def identify_bad_smells(tgrl_file, selected_bad_smells, size_threshold, similarity_threshold):
468
 
469
  output = ""
470
 
471
  tgrl_text = parse_tgrl(tgrl_file)
472
 
473
- elements, elements_per_actor, decomposed_elements = extract_elements(tgrl_text)
474
-
475
- if 'Size' in selected_bad_smells:
476
- output = output + get_long_elements(elements, size_threshold) + "\n\n"
477
-
478
- if 'Complexity' in selected_bad_smells:
479
- output = output + get_complex_sentences(elements) + "\n\n"
480
-
481
- if 'Punctuations' in selected_bad_smells:
482
- output = output + get_punctuations(elements) + "\n\n"
483
-
484
- if 'Actors Syntax' in selected_bad_smells:
485
- output = output + check_actor_syntax(elements['actors']) + "\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
- if 'Goals Syntax' in selected_bad_smells:
488
- output = output + check_goal_syntax(elements['goals']) + "\n\n"
489
-
490
- if 'Softgoals Syntax' in selected_bad_smells:
491
- output = output + check_softgoal_syntax(elements['softGoals']) + "\n\n"
492
-
493
- if 'Tasks Syntax' in selected_bad_smells:
494
- output = output + check_task_syntax(elements['tasks']) + "\n\n"
495
-
496
- if 'Resources Syntax' in selected_bad_smells:
497
- output = output + check_resource_syntax(elements['resources']) + "\n\n"
498
-
499
- if 'Similar Elements' in selected_bad_smells:
500
- output = output + get_similar_elements(elements_per_actor, similarity_threshold) + "\n\n"
501
-
502
- if 'Spelling Mistakes' in selected_bad_smells:
503
- output = output + check_spelling(elements) + "\n\n"
504
-
505
- if 'Goal-Subgoal Mismatch' in selected_bad_smells:
506
- output = output + check_entailment(decomposed_elements) + "\n\n"
507
-
508
- if 'Contradicting Elements' in selected_bad_smells:
509
- output = output + check_contradiction(elements_per_actor) + "\n\n"
510
-
511
  return output
512
 
513
 
514
- interface = gr.Interface(fn = identify_bad_smells,
515
  inputs = [gr.File(label="TGRL File"),
516
- gr.CheckboxGroup(["Size", "Complexity", "Punctuations", "Actors Syntax", "Goals Syntax", "Softgoals Syntax", "Tasks Syntax", "Resources Syntax", "Similar Elements", "Spelling Mistakes", "Goal-Subgoal Mismatch", "Contradicting Elements"],
517
- label="Which bad smells you want to detect?"),
518
- gr.Slider(label= "Size threshold", value = 5, minimum = 2, maximum = 10, step = 1),
519
  gr.Slider(label= "Similarity threshold", value = 0.9, minimum = 0, maximum = 1, step = 0.1)],
520
- outputs = [gr.Textbox(label= "Detected bad smells:")],
521
- title = "TGRL Linguistic Bad Smells Detection",
522
- description = "Upload your .xgrl file and we will find the bad smells for you!",
523
  theme = gr.themes.Soft())
524
 
525
 
 
156
  for i in range(0, len(elements[key])):
157
  if len(re. findall(r'\w+', elements[key][i])) > size_threshold:
158
  long_elements.append(elements[key][i])
159
+
160
  if long_elements:
161
+ output = ""
162
+ for long_element in long_elements:
163
+ refactored_element = prompt(
164
+ '''You are a specialist in English linguistics.
165
+ You will be provided with a sentence, and your task is to summarize it in''' + str(size_threshold) + ''' words or fewer.
166
+ Comply with the following conditions:
167
+ (1) Do not convert a verb phrase to a noun phrase, and vice versa.
168
+ (2) Change as few words as possible.
169
+ Answer with the new sentence only.''',
170
+ long_element)
171
+ output = output + '"' + long_element + '" should be refactored to "' + refactored_element + '"\n'
172
+ #long_elements = "\n".join(long_elements)
173
+ return "Lengthy elements:\n" + output
174
  else:
175
+ return ""
176
  # #####################################
177
 
178
  # ######### Complex Sentences #########
 
189
 
190
  def get_complex_sentences(elements):
191
 
192
+ complex_sentences = []
193
+
194
  for key, value in elements.items():
195
  for i in range(0, len(elements[key])):
196
  if is_complex_sentence(elements[key][i]):
197
  complex_sentences.append(elements[key][i])
198
+
199
  if complex_sentences:
200
+ output = ""
201
+ for complex_sentence in complex_sentences:
202
+ refactored_element = prompt(
203
+ '''
204
+ You are a specialist in English linguistics.
205
+ A complex sentence is a sentence with one independent clause and at least one dependent clause. A simple sentence has a single independent clause.
206
+ You will be provided with a complex sentence, and your task is to make it a simple sentence.
207
+ Do not convert a verb phrase to a noun phrase, and vice versa.
208
+ Answer with the new sentence only.
209
+ ''', complex_sentence)
210
+ output = output + '"' + complex_sentence + '" should be refactored to "' + refactored_element + '"\n'
211
+ #complex_sentences = "\n".join(complex_sentences)
212
+ return "Complex elements:\n" + output
213
  else:
214
+ return ""
215
 
216
  # #####################################
217
 
 
221
  punctuations = []
222
 
223
  for key, value in elements.items():
224
+ for i in range(0, len(elements[key])):
225
+ if len(re.findall("[^\s\w\d-]", elements[key][i])) > 0:
226
+ punctuations.append(elements[key][i])
227
 
228
  if punctuations:
229
+ output = ""
230
+ for punctuation in punctuations:
231
+ refactored_element = prompt(
232
+ '''
233
+ You are a specialist in English linguistics.
234
+ You will be provided with a sentence, and your task is to remove all punctuation marks.
235
+ Answer with the new sentence only.''', punctuation)
236
+ output = output + '"' + punctuation + '" should be refactored to "' + refactored_element + '"\n'
237
+ #punctuations = "\n".join(punctuations)
238
+ return "Punctuation-marked elements:\n" + output
239
  else:
240
+ return ""
241
  # #################################
242
 
243
  # ########## Incorrect Actor Syntax ##########
244
+ def check_verb_or_noun_phrase(sentence):
245
+
246
+ result = prompt(
247
+ '''
248
+ You are a specialist in English linguistics.
249
+ You will be provided with a sentence, and your task is to determine whether the sentence is a noun phrase or a verb phrase.
250
+ Answer with "noun phrase" or "verb phrase" and your reasons.
251
+ Use JSON format with keys "answer" and "reason".''', sentence)
252
+ result = json.loads(result)
253
+ return result["answer"]
254
+
255
+ # def find_non_NPs(sentences):
256
 
257
+ # pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer)
258
 
259
+ # outputs = pipeline(sentences)
260
 
261
+ # Non_NPs = []
262
 
263
+ # for idx, output in enumerate(outputs):
264
+ # if output[0]['entity'].startswith('V'):
265
+ # Non_NPs.append(sentences[idx])
266
 
267
+ # return Non_NPs
268
 
269
  def check_actor_syntax(actors):
270
 
271
+ incorrect_actors_syntax = []
272
+ for actor in actors:
273
+ result = check_verb_or_noun_phrase(actor)
274
+ if result == "verb phrase":
275
+ incorrect_actors_syntax.append(actor)
276
+
277
+
278
+ if incorrect_actors_syntax:
279
+ output = ""
280
+ for incorrect_actor_syntax in incorrect_actors_syntax:
281
+ refactored_element = prompt(
282
+ '''
283
+ You are a specialist in English linguistics.
284
+ You will be provided with a sentence that is a verb phrase, and your task is to make it a noun pharse representing an actor.
285
+ A noun phrase should start with a noun.
286
+ Example of actors: System, PC User, and Privacy Officer.
287
+ Answer with the new sentence only.''', incorrect_actor_syntax)
288
+ output = output + '"' + incorrect_actor_syntax + '" should be refactored to "' + refactored_element + '"\n'
289
+ #incorrect_actor_syntax = "\n".join(incorrect_actor_syntax)
290
+ return "Incorrect actors syntax:\n" + output
291
  else:
292
+ return ""
293
  # ############################################
294
 
295
  # ########## Incorrect Goal Syntax ###########
296
  def check_goal_syntax(goals):
297
 
298
+ incorrect_goals_syntax = []
299
+ for goal in goals:
300
+ result = check_verb_or_noun_phrase(goal)
301
+ if result == "verb phrase":
302
+ incorrect_goals_syntax.append(goal)
303
+
304
+ if incorrect_goals_syntax:
305
+ output = ""
306
+ for incorrect_goal_syntax in incorrect_goals_syntax:
307
+ refactored_element = prompt(
308
+ '''
309
+ You are a specialist in English linguistics.
310
+ You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a goal.
311
+ A noun phrase should start with a noun.
312
+ For example: high data quality, fast response time, and course registration.
313
+ Answer with the new sentence only.''', incorrect_goal_syntax)
314
+ output = output + '"' + incorrect_goal_syntax + '" should be refactored to "' + refactored_element + '"\n'
315
+ #incorrect_goal_syntax = "\n".join(incorrect_goal_syntax)
316
+ return "Incorrect goals syntax:\n" + output
317
  else:
318
+ return ""
319
  # ############################################
320
 
321
  # ########## Incorrect Softgoal Syntax ###########
322
  def check_softgoal_syntax(softgoals):
323
 
324
+ incorrect_softgoals_syntax = []
325
+ for softgoal in softgoals:
326
+ result = check_verb_or_noun_phrase(softgoal)
327
+ if result == "verb phrase":
328
+ incorrect_softgoals_syntax.append(softgoal)
329
+
330
+ if incorrect_softgoals_syntax:
331
+
332
+ output = ""
333
+ for incorrect_softgoal_syntax in incorrect_softgoals_syntax:
334
+ refactored_element = prompt(
335
+ '''
336
+ You are a specialist in English linguistics.
337
+ You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a goal.
338
+ A noun phrase should start with a noun.
339
+ For example: high data quality, fast response time, and course registration.
340
+ Answer with the new sentence only.''', incorrect_softgoal_syntax)
341
+ output = output + '"' + incorrect_softgoal_syntax + '" should be refactored to "' + refactored_element + '"\n'
342
+ #incorrect_softgoal_syntax = "\n".join(incorrect_softgoal_syntax)
343
+ return "Incorrect softgoals syntax:\n" + output
344
  else:
345
+ return ""
346
  # ############################################
347
 
348
  # ########## Incorrect Task Syntax ###########
349
+ # def find_NPs(sentences):
350
 
351
+ # pipeline = TokenClassificationPipeline(model=pos_model, tokenizer=pos_tokenizer)
352
 
353
+ # outputs = pipeline(sentences)
354
 
355
+ # NPs = []
356
 
357
+ # for idx, output in enumerate(outputs):
358
+ # if not output[0]['entity'].startswith('V'):
359
+ # NPs.append(sentences[idx])
360
 
361
+ # return NPs
362
 
363
  def check_task_syntax(tasks):
364
 
365
+ incorrect_tasks_syntax = []
366
+ for task in tasks:
367
+ result = check_verb_or_noun_phrase(task)
368
+ if result == "noun phrase":
369
+ incorrect_tasks_syntax.append(task)
370
+
371
+
372
+ if incorrect_tasks_syntax:
373
+ output = ""
374
+ for incorrect_task_syntax in incorrect_tasks_syntax:
375
+ refactored_element = prompt(
376
+ '''
377
+ You are a specialist in English linguistics.
378
+ You will be provided with a sentence that is not a verb phrase, and your task is to make it a verb pharse representing a task.
379
+ A verb phrase should start with a verb.
380
+ For example: provide maintenance services, help co-workers, and enhance quality.
381
+ Answer with the new sentence only.''', incorrect_task_syntax)
382
+ output = output + '"' + incorrect_task_syntax + '" should be refactored to "' + refactored_element + '"\n'
383
+ #incorrect_task_syntax = "\n".join(incorrect_task_syntax)
384
+ return "Incorrect tasks syntax:\n" + output
385
  else:
386
+ return ""
387
  # ############################################
388
 
389
  # ########## Incorrect Resource Syntax ###########
390
  def check_resource_syntax(resources):
391
 
392
+ if len(resources) == 0:
393
+ return ""
394
+
395
+ #incorrect_resources_syntax = find_non_NPs(resources)
396
+ incorrect_resources_syntax = []
397
+ for resource in resources:
398
+ result = check_verb_or_noun_phrase(resource)
399
+ if result == "verb phrase":
400
+ incorrect_resources_syntax.append(resource)
401
+
402
+ if incorrect_resources_syntax:
403
+ output = ""
404
+ for incorrect_resource_syntax in incorrect_resources_syntax:
405
+ refactored_element = prompt(
406
+ '''
407
+ You are a specialist in English linguistics.
408
+ You will be provided with a sentence that is not a noun phrase, and your task is to make it a noun pharse representing a resource.
409
+ A noun phrase should start with a noun.
410
+ For example: internet, database, and files system.
411
+ Answer with the new sentence only.''', incorrect_resource_syntax)
412
+ output = output + '"' + incorrect_resource_syntax + '" should be refactored to "' + refactored_element + '"\n'
413
+
414
+ #incorrect_resource_syntax = "\n".join(incorrect_resource_syntax)
415
+ return "Incorrect resources syntax:\n" + output
416
  else:
417
+ return ""
418
  # ############################################
419
 
420
  # ########## Similarity ###########
 
428
  for i in range(len(elements_per_actor[key])):
429
  for j in range(i+1,len(elements_per_actor[key])):
430
  sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]])
431
+
432
+ # Predict semantic similarity
433
  semantic_similarity_scores = sentences_similarity_model.predict(sentence_pairs, show_progress_bar=True)
434
 
435
  similar_elements = []
436
+
437
  for index, value in enumerate(sentence_pairs):
438
  if semantic_similarity_scores[index] > similarity_threshold:
439
  similar_elements.append(value)
440
+ #similar_elements.append('"'+value+'"')
441
  #semantic_similarity["pair_"+str(index+1)] = [value,semantic_similarity_scores[index]]
442
 
443
  if similar_elements:
444
+ result_string = ""
445
+ for sublist in similar_elements:
446
+ result_string += ' and '.join(f'"{item}"' for item in sublist) + '\n'
447
+
448
+ #similar_elements = [' and '.join('"' + ele + '"') for ele in similar_elements]
449
+ #similar_elements = "\n".join(similar_elements)
450
+ return "Similar elements:\n" + result_string
451
  else:
452
+ return ""
453
 
454
  return semantic_similarity
455
  # #################################
456
 
457
  # ########## Misspelling ###########
458
+ # def get_misspelled_words(sentence):
459
 
460
+ # spell = Speller(only_replacements=True)
461
 
462
+ # misspelled= []
463
 
464
+ # for word in sentence.split():
465
+ # correct_word = spell(word)
466
+ # if word != correct_word:
467
+ # misspelled.append([word, correct_word])
468
 
469
+ # return misspelled
470
 
471
  def check_spelling(elements):
472
 
473
+ refactored_elements = []
 
474
 
475
  for key, value in elements.items():
476
+ for i in range(0, len(elements[key])):
477
+ refactored_element = prompt(
478
+ '''
479
+ You are a specialist in English linguistics.
480
+ You will be provided with a sentence and your task is to report any misspilled words and correct the spilling if needed.
481
+ Answer with "correct" or "misspilled". In case the sentence is misspilled, correct it with the right spelling.
482
+ Use a JSON format with keys 'original sentence', 'answer', and 'correct sentence'.
483
+ For example: {'original sentence': 'incraese value', 'answer': 'misspilled', 'correct sentence': 'increase value'}''', elements[key][i])
484
+
485
+ refactored_element = refactored_element.replace("'", '"')
486
+ refactored_element = json.loads(refactored_element)
487
+
488
+ if refactored_element['answer'] == 'misspilled':
489
+ refactored_elements.append('"' + refactored_element["original sentence"] + '" should be written as "' + refactored_element["correct sentence"] + '"')
490
+
491
 
492
+ if refactored_elements:
493
+ refactored_elements = "\n".join(refactored_elements)
494
+ return "Misspilled elements:\n" + refactored_elements
495
+ else:
496
+ return ""
497
  # ##################################
498
 
499
  # ########## NLI ###########
 
570
  return result
571
 
572
  # Contradiction
573
+ def check_for_linguistic_conflict(pairs):
574
+
575
+ pairs = ",".join(str(element) for element in pairs)
576
+
577
+ contradicting_pairs = []
578
+ result = prompt(
579
+ '''
580
+ You are a specialist in English linguistics.
581
+ You will be provided with a list of sentencses pair, and your task is to determine whether each pair can be conflicting or not.
582
+ For example: "Inrease quality of service" AND "Cut expenses" are conflicting because increasing quality usually requires spending money.
583
+ For each pair, answer with "yes" or "no" with your reason in short.
584
+ Use a list of dictionaries format with keys "pair" and "answer". Omit "reason" from your response.''', pairs)
585
+
586
+ result = result.replace("'", '"')
587
+
588
+ results = json.loads(result)
589
+ for result in results:
590
+ if result["answer"] == "yes":
591
+ contradicting_pairs.append(result["pair"])
592
+
593
+ return contradicting_pairs
594
+
595
+ def find_paths_between_elements(elements, start_element, end_element, visited, path=[]):
596
+
597
+ visited[start_element] = True
598
+ path.append(start_element)
599
 
600
+ if start_element == end_element:
601
+ yield list(path)
602
+ else:
603
+ for contrib in elements:
604
+ if contrib[1] in visited: ## added
605
+ if contrib[0] == start_element and not visited[contrib[1]]:
606
+ yield from find_paths_between_elements(elements, contrib[1], end_element, visited, path)
607
+
608
+ path.pop()
609
+ visited[start_element] = False
610
+
611
+ def check_contradiction(elements_per_actor, contributing_elements):
612
+
613
+ pairs_to_check_1 = []
614
+ pairs_to_check_2 = []
615
+ pairs_to_check_3 = []
616
+
617
+ all_values_contributing_elements = []
618
+ for values_list in contributing_elements.values():
619
+ all_values_contributing_elements.extend(values_list)
620
+
621
+
622
+ sentence_pairs = []
623
  contradicting_elements = []
624
 
625
+ # case 1: contradicting elements contributing similarly to other elements
626
  for key, value in elements_per_actor.items():
627
 
628
  for i in range(len(elements_per_actor[key])):
629
  for j in range(i+1,len(elements_per_actor[key])):
630
  sentence_pairs.append([elements_per_actor[key][i], elements_per_actor[key][j]])
631
+
 
 
632
  for sentence_pair in sentence_pairs:
633
+ contribution_scores = []
634
+
635
+ for contributing_element in all_values_contributing_elements:
636
+
637
+ if contributing_element[0] == sentence_pair[0] or contributing_element[0] == sentence_pair[1]:
638
+
639
+ if contributing_element[2] == "make":
640
+ contribution_score = 75
641
+ elif contributing_element[2] == "help":
642
+ contribution_score = 50
643
+ elif contributing_element[2] == "somePositive":
644
+ contribution_score = 25
645
+ elif contributing_element[2] == "unknown":
646
+ contribution_score = 0
647
+ elif contributing_element[2] == "someNegative":
648
+ contribution_score = -25
649
+ elif contributing_element[2] == "break":
650
+ contribution_score = -50
651
+ elif contributing_element[2] == "hurt":
652
+ contribution_score = -75
653
+ else:
654
+ contribution_score = int(contributing_element[2])
655
+
656
+ contribution_scores.append((contributing_element[0], contribution_score))
657
+
658
+ if len(contribution_scores) < 2:
659
+ pairs_to_check_1.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")])
660
+ else:
661
+ flag = 0
662
+ for pair in itertools.combinations(contribution_scores, r=2):
663
+ if pair[0][0] != pair[1][0]:
664
+ if pair[0][1] * pair[1][1] < 0:
665
+ flag = 1
666
+
667
+ if flag == 0:
668
+ pairs_to_check_2.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")])
669
+
670
+ # case 2: contradicting elements contributing similarly to each other, taking into considration the full path between the two elements
671
+ for key, value in elements_per_actor.items():
672
+ for element1 in value:
673
+ for element2 in value:
674
+ if element1 != element2:
675
+ visited = {e: False for e in value}
676
+
677
+ for path in find_paths_between_elements(all_values_contributing_elements, element1, element2, visited):
678
+
679
+ first_edge_value = next((contrib[2] for contrib in all_values_contributing_elements if contrib[0] == path[0] and contrib[1] == path[1]), None)
680
+ last_edge_value = next((contrib[2] for contrib in all_values_contributing_elements if contrib[0] == path[-2] and contrib[1] == path[-1]), None)
681
+
682
+ if first_edge_value is not None and last_edge_value is not None and int(first_edge_value) * int(last_edge_value) > 0:
683
+ pairs_to_check_3.append([sentence_pair[0].replace("'", ""), sentence_pair[1].replace("'", "")])
684
+
685
+ pairs_to_check = pairs_to_check_1 + pairs_to_check_2 + pairs_to_check_3
686
+
687
+ # Initialize an empty list to store the divided lists
688
+ divided_lists = []
689
+
690
+ # Iterate over the long list and create sublists of 30 items each
691
+ for i in range(0, len(pairs_to_check), 30):
692
+ sublist = pairs_to_check[i:i + 30]
693
+ divided_lists.append(sublist)
694
+
695
+ for divided_list in divided_lists:
696
+ contradicting_elements = contradicting_elements + check_for_linguistic_conflict(divided_list)
697
 
698
  if contradicting_elements:
699
+ # Using a set to store unique sublists
700
+ contradicting_elements = set(tuple(sublist) for sublist in contradicting_elements)
701
+ # Converting back to a list of lists
702
+ contradicting_elements = [list(sublist) for sublist in contradicting_elements]
703
+
704
  contradicting_elements = [' and '.join(ele) for ele in contradicting_elements]
705
  contradicting_elements = "\n".join(contradicting_elements)
706
+
707
+ return "Conflicting elements:\n" + contradicting_elements
708
  else:
709
+ return ""
710
  # ##########################
711
 
712
  # ************************* User Interface *************************
713
 
714
+ def detect_bad_smells(tgrl_file, selected_bad_smells, size_threshold, similarity_threshold):
715
 
716
  output = ""
717
 
718
  tgrl_text = parse_tgrl(tgrl_file)
719
 
720
+ all_elements, elements_per_actor, decomposed_elements, contributing_elements = extract_elements(tgrl_text)
721
+
722
+ if 'Lengthy element' in selected_bad_smells:
723
+ print(output)
724
+ result = get_long_elements(all_elements, size_threshold)
725
+ if result != "":
726
+ output = output + result + "\n\n"
727
+
728
+ if 'Complex element' in selected_bad_smells:
729
+ result = get_complex_sentences(all_elements)
730
+ if result != "":
731
+ output = output + result + "\n\n"
732
+
733
+ if 'Punctuation-marked element' in selected_bad_smells:
734
+ result = get_punctuations(all_elements)
735
+ if result != "":
736
+ output = output + result + "\n\n"
737
+
738
+ if 'Incorrect actor syntax' in selected_bad_smells:
739
+ result = check_actor_syntax(all_elements['actors'])
740
+ if result != "":
741
+ output = output + result + "\n\n"
742
+
743
+ if 'Incorrect goal syntax' in selected_bad_smells:
744
+ result = check_goal_syntax(all_elements['goals'])
745
+ if result != "":
746
+ output = output + result + "\n\n"
747
+
748
+ if 'Incorrect softgoal syntax' in selected_bad_smells:
749
+ result = check_softgoal_syntax(all_elements['softGoals'])
750
+ if result != "":
751
+ output = output + result + "\n\n"
752
+
753
+ if 'Incorrect task syntax' in selected_bad_smells:
754
+ result = check_task_syntax(all_elements['tasks'])
755
+ if result != "":
756
+ output = output + result + "\n\n"
757
+
758
+ if 'Incorrect resource syntax' in selected_bad_smells:
759
+ result = check_resource_syntax(all_elements['resources'])
760
+ if result != "":
761
+ output = output + result + "\n\n"
762
+
763
+ if 'Similar elements' in selected_bad_smells:
764
+ result = get_similar_elements(elements_per_actor, similarity_threshold)
765
+ if result != "":
766
+ output = output + result + "\n\n"
767
+
768
+ if 'Misspelled element' in selected_bad_smells:
769
+ result = check_spelling(all_elements)
770
+ if result != "":
771
+ output = output + result + "\n\n"
772
+
773
+ if 'Goal/Task and Sub-goal/Sub-task mismatch' in selected_bad_smells:
774
+ result = check_entailment(decomposed_elements)
775
+ if result != "":
776
+ output = output + result + "\n\n"
777
+
778
+ if 'Conflicting elements' in selected_bad_smells:
779
+ result = check_contradiction(elements_per_actor, contributing_elements)
780
+ if result != "":
781
+ output = output + result + "\n\n"
782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  return output
784
 
785
 
786
+ interface = gr.Interface(fn = detect_bad_smells,
787
  inputs = [gr.File(label="TGRL File"),
788
+ gr.CheckboxGroup(["Lengthy element", "Complex element", "Punctuation-marked element", "Incorrect actor syntax", "Incorrect goal syntax", "Incorrect softgoal syntax", "Incorrect task syntax", "Incorrect resource syntax", "Similar elements", "Misspelled element", "Goal/Task and Sub-goal/Sub-task mismatch", "Conflicting elements"],
789
+ label="Which bad smells you want to detect and refactor?"),
790
+ gr.Slider(label= "Length threshold", value = 5, minimum = 2, maximum = 10, step = 1),
791
  gr.Slider(label= "Similarity threshold", value = 0.9, minimum = 0, maximum = 1, step = 0.1)],
792
+ outputs = [gr.Textbox(label= "Detected and refactored bad smells:")],
793
+ title = "TGRL Bad Smells Detection and Refactoring",
794
+ description = "Upload your .xgrl file and we will find the bad smells and refactor them for you!",
795
  theme = gr.themes.Soft())
796
 
797