Spaces:
Running
Running
Remove leading ```json when cleaning JSON
Browse files- helpers/text_helper.py +17 -23
helpers/text_helper.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
import json_repair as jr
|
2 |
|
3 |
|
@@ -17,28 +20,19 @@ def is_valid_prompt(prompt: str) -> bool:
|
|
17 |
|
18 |
def get_clean_json(json_str: str) -> str:
|
19 |
"""
|
20 |
-
Attempt to clean a JSON response string from the LLM by removing the
|
21 |
-
and any text beyond that.
|
22 |
CAUTION: May not be always accurate.
|
23 |
|
24 |
:param json_str: The input string in JSON format.
|
25 |
:return: The "cleaned" JSON string.
|
26 |
"""
|
27 |
|
28 |
-
# An example of response containing JSON and other text:
|
29 |
-
# {
|
30 |
-
# "title": "AI and the Future: A Transformative Journey",
|
31 |
-
# "slides": [
|
32 |
-
# ...
|
33 |
-
# ]
|
34 |
-
# } <<---- This is end of valid JSON content
|
35 |
-
# ```
|
36 |
-
#
|
37 |
-
# ```vbnet
|
38 |
-
# Please note that the JSON output is in valid format but the content of the "Role of GPUs in AI" slide is just an example and may not be factually accurate. For accurate information, you should consult relevant resources and update the content accordingly.
|
39 |
-
# ```
|
40 |
response_cleaned = json_str
|
41 |
|
|
|
|
|
|
|
42 |
while True:
|
43 |
idx = json_str.rfind('```') # -1 on failure
|
44 |
|
@@ -46,7 +40,7 @@ def get_clean_json(json_str: str) -> str:
|
|
46 |
break
|
47 |
|
48 |
# In the ideal scenario, the character before the last ``` should be
|
49 |
-
# a new line or a closing bracket
|
50 |
prev_char = json_str[idx - 1]
|
51 |
|
52 |
if (prev_char == '}') or (prev_char == '\n' and json_str[idx - 2] == '}'):
|
@@ -69,13 +63,13 @@ def fix_malformed_json(json_str: str) -> str:
|
|
69 |
|
70 |
|
71 |
if __name__ == '__main__':
|
72 |
-
|
73 |
"key": "value"
|
74 |
}
|
75 |
'''
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
{"bullet_points": [
|
80 |
">> Write without stopping or editing",
|
81 |
>> Set daily writing goals and stick to them,
|
@@ -83,7 +77,7 @@ if __name__ == '__main__':
|
|
83 |
],}
|
84 |
'''
|
85 |
|
86 |
-
print(fix_malformed_json(
|
87 |
-
print(fix_malformed_json(
|
88 |
-
print(fix_malformed_json(
|
89 |
-
print(fix_malformed_json(
|
|
|
1 |
+
"""
|
2 |
+
Utility functions to help with text processing.
|
3 |
+
"""
|
4 |
import json_repair as jr
|
5 |
|
6 |
|
|
|
20 |
|
21 |
def get_clean_json(json_str: str) -> str:
|
22 |
"""
|
23 |
+
Attempt to clean a JSON response string from the LLM by removing ```json at the beginning and
|
24 |
+
trailing ``` and any text beyond that.
|
25 |
CAUTION: May not be always accurate.
|
26 |
|
27 |
:param json_str: The input string in JSON format.
|
28 |
:return: The "cleaned" JSON string.
|
29 |
"""
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
response_cleaned = json_str
|
32 |
|
33 |
+
if json_str.startswith('```json'):
|
34 |
+
json_str = json_str[7:]
|
35 |
+
|
36 |
while True:
|
37 |
idx = json_str.rfind('```') # -1 on failure
|
38 |
|
|
|
40 |
break
|
41 |
|
42 |
# In the ideal scenario, the character before the last ``` should be
|
43 |
+
# a new line or a closing bracket
|
44 |
prev_char = json_str[idx - 1]
|
45 |
|
46 |
if (prev_char == '}') or (prev_char == '\n' and json_str[idx - 2] == '}'):
|
|
|
63 |
|
64 |
|
65 |
if __name__ == '__main__':
|
66 |
+
JSON1 = '''{
|
67 |
"key": "value"
|
68 |
}
|
69 |
'''
|
70 |
+
JSON2 = '''["Reason": "Regular updates help protect against known vulnerabilities."]'''
|
71 |
+
JSON3 = '''["Reason" Regular updates help protect against known vulnerabilities."]'''
|
72 |
+
JSON4 = '''
|
73 |
{"bullet_points": [
|
74 |
">> Write without stopping or editing",
|
75 |
>> Set daily writing goals and stick to them,
|
|
|
77 |
],}
|
78 |
'''
|
79 |
|
80 |
+
print(fix_malformed_json(JSON1))
|
81 |
+
print(fix_malformed_json(JSON2))
|
82 |
+
print(fix_malformed_json(JSON3))
|
83 |
+
print(fix_malformed_json(JSON4))
|