barunsaha commited on
Commit
80a7ca8
1 Parent(s): 69fbdcb

Remove leading ```json when cleaning JSON

Browse files
Files changed (1) hide show
  1. helpers/text_helper.py +17 -23
helpers/text_helper.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import json_repair as jr
2
 
3
 
@@ -17,28 +20,19 @@ def is_valid_prompt(prompt: str) -> bool:
17
 
18
  def get_clean_json(json_str: str) -> str:
19
  """
20
- Attempt to clean a JSON response string from the LLM by removing the trailing ```
21
- and any text beyond that.
22
  CAUTION: May not be always accurate.
23
 
24
  :param json_str: The input string in JSON format.
25
  :return: The "cleaned" JSON string.
26
  """
27
 
28
- # An example of response containing JSON and other text:
29
- # {
30
- # "title": "AI and the Future: A Transformative Journey",
31
- # "slides": [
32
- # ...
33
- # ]
34
- # } <<---- This is end of valid JSON content
35
- # ```
36
- #
37
- # ```vbnet
38
- # Please note that the JSON output is in valid format but the content of the "Role of GPUs in AI" slide is just an example and may not be factually accurate. For accurate information, you should consult relevant resources and update the content accordingly.
39
- # ```
40
  response_cleaned = json_str
41
 
 
 
 
42
  while True:
43
  idx = json_str.rfind('```') # -1 on failure
44
 
@@ -46,7 +40,7 @@ def get_clean_json(json_str: str) -> str:
46
  break
47
 
48
  # In the ideal scenario, the character before the last ``` should be
49
- # a new line or a closing bracket }
50
  prev_char = json_str[idx - 1]
51
 
52
  if (prev_char == '}') or (prev_char == '\n' and json_str[idx - 2] == '}'):
@@ -69,13 +63,13 @@ def fix_malformed_json(json_str: str) -> str:
69
 
70
 
71
  if __name__ == '__main__':
72
- json1 = '''{
73
  "key": "value"
74
  }
75
  '''
76
- json2 = '''["Reason": "Regular updates help protect against known vulnerabilities."]'''
77
- json3 = '''["Reason" Regular updates help protect against known vulnerabilities."]'''
78
- json4 = '''
79
  {"bullet_points": [
80
  ">> Write without stopping or editing",
81
  >> Set daily writing goals and stick to them,
@@ -83,7 +77,7 @@ if __name__ == '__main__':
83
  ],}
84
  '''
85
 
86
- print(fix_malformed_json(json1))
87
- print(fix_malformed_json(json2))
88
- print(fix_malformed_json(json3))
89
- print(fix_malformed_json(json4))
 
1
+ """
2
+ Utility functions to help with text processing.
3
+ """
4
  import json_repair as jr
5
 
6
 
 
20
 
21
  def get_clean_json(json_str: str) -> str:
22
  """
23
+ Attempt to clean a JSON response string from the LLM by removing ```json at the beginning and
24
+ trailing ``` and any text beyond that.
25
  CAUTION: May not be always accurate.
26
 
27
  :param json_str: The input string in JSON format.
28
  :return: The "cleaned" JSON string.
29
  """
30
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  response_cleaned = json_str
32
 
33
+ if json_str.startswith('```json'):
34
+ json_str = json_str[7:]
35
+
36
  while True:
37
  idx = json_str.rfind('```') # -1 on failure
38
 
 
40
  break
41
 
42
  # In the ideal scenario, the character before the last ``` should be
43
+ # a new line or a closing bracket
44
  prev_char = json_str[idx - 1]
45
 
46
  if (prev_char == '}') or (prev_char == '\n' and json_str[idx - 2] == '}'):
 
63
 
64
 
65
  if __name__ == '__main__':
66
+ JSON1 = '''{
67
  "key": "value"
68
  }
69
  '''
70
+ JSON2 = '''["Reason": "Regular updates help protect against known vulnerabilities."]'''
71
+ JSON3 = '''["Reason" Regular updates help protect against known vulnerabilities."]'''
72
+ JSON4 = '''
73
  {"bullet_points": [
74
  ">> Write without stopping or editing",
75
  >> Set daily writing goals and stick to them,
 
77
  ],}
78
  '''
79
 
80
+ print(fix_malformed_json(JSON1))
81
+ print(fix_malformed_json(JSON2))
82
+ print(fix_malformed_json(JSON3))
83
+ print(fix_malformed_json(JSON4))