Spaces:
Sleeping
Sleeping
File size: 7,187 Bytes
7b28cbc 2610aed 7b28cbc 58eb5eb 7b28cbc 58eb5eb 7b28cbc 58eb5eb 7b28cbc 2610aed caa11cf c97edc1 d59d12f caa11cf 2610aed c97edc1 2610aed 23812aa 2610aed 4433993 2610aed caa11cf 4433993 7b28cbc caa11cf c97edc1 caa11cf 7b28cbc 3c0c6d3 caa11cf 7b28cbc caa11cf 7b28cbc caa11cf c97edc1 caa11cf 7b28cbc 2610aed 7b28cbc c97edc1 caa11cf 7b28cbc caa11cf 7b28cbc caa11cf 849dc1e 5b74304 acff824 caa11cf 7b28cbc 8b37720 2610aed 8b37720 c1bdfcc c97edc1 c1bdfcc c97edc1 c11817c c1bdfcc caa11cf 7b28cbc 116ac08 7b28cbc 116ac08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from bert_similarity import get_similarity
from text_generator import get_gpt_response
from arte_score import ping_api
from textstat import flesch_reading_ease
def generate_user_prompt(prompt_type, base_text):
prompts = {
"too_simple": f"""
Convert this text to a higher reading level of the original text.
The higher reading level text should have more syllables per word and more words per sentence.
It should retain the core meaning of the original text.
The output text should also have a similar number of total words as the input text.
Here is the input text:
{base_text}
""",
"too_complex": f"""
Convert this text to a simpler version of the original text.
The simpler version should have simpler words, fewer syllables per word, and fewer words per sentence.
It should retain the core meaning of the original text.
The output text should also have a similar number of total words as the input text.
Here is the input text:
{base_text}
"""
}
return prompts[prompt_type].format(base_text=base_text)
model_types = ["FRE", "SBERT"]
fre_levels = {
"5th Grade (90-100)": (90, 100),
"6th Grade (80-90)": (80, 90),
"7th Grade (70-80)": (70, 80),
"8th - 9th Grade (60-70)": (60, 70),
"10th - 12th Grade (50-60)": (50, 60),
"College (30-50)": (30, 50),
"College Graduate + Professionals (0-30)": (0, 30),
"Research + Nobel laureate ((-infinity)-(-1))": (-float('inf'), -1)
}
sbert_levels = {
"Difficult (-100 to -1.88)": (-100, -1.88),
"Somewhat Difficult (-1.87 to -1.21)": (-1.87, -1.21),
"Intermediate (-1.20 to -0.65)": (-1.20, -0.65),
"Somewhat Easy (-0.64 to -0.05)": (-0.64, -0.05),
"Easy (> -0.04)": (-0.04, 100)
}
# Setting SBert to be default to avoid any issues.
reading_levels = sbert_levels
inverse_reading_levels = {v: k for k, v in reading_levels.items()}
def set_reading_levels(level_type):
global reading_levels
global inverse_reading_levels
if level_type == "FRE":
reading_levels = fre_levels
elif level_type == "SBERT":
reading_levels = sbert_levels
inverse_reading_levels = {v: k for k, v in reading_levels.items()}
return level_type
def user_input_readability_level(input_text, model_type):
current_score = ping_api(input_text, model_type)
print(f'Reading score for user input is: {current_score} for model type: {model_type}')
current_level = ''
for (min, max), level in inverse_reading_levels.items():
if min <= current_score <= max:
print(f'Reading level for user input is: {level}')
current_level = level
break
return current_score, current_level
def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter, curr_reading_level, model_type):
i = 0
completed = False
user_prompt = ""
curr_text = input_text
similarity = 0
reading_level = 0
generated_texts = []
generated_text_scores = []
result_index = -1
closeness = float('inf')
if min_reading_level < curr_reading_level < max_reading_level:
return input_text, 1, curr_reading_level, "Input text was already within the target reading level!"
else:
while i < max_iter and not completed:
if curr_reading_level > max_reading_level:
print(f"Too simple, current reading level is {curr_reading_level}")
user_prompt = generate_user_prompt("too_simple", curr_text)
elif curr_reading_level < min_reading_level:
print(f"Too complex, current reading level is {curr_reading_level}")
user_prompt = generate_user_prompt("too_complex", curr_text)
elif similarity < min_entailment:
print(f"Entailment level is too low: {similarity}")
user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'"
else:
print(f"Reading level is within target range: {curr_reading_level}")
completed = True
break
response = get_gpt_response(user_prompt, system_prompt)
# We add the generated text to the list of generated texts.
generated_texts.append(response)
similarity = get_similarity(response, input_text)
reading_level = ping_api(response, model_type) #flesch_reading_ease(response)
# We add the generated text's reading level to the list.
generated_text_scores.append((similarity, reading_level))
# Determine the closeness of the reading level to the target reading level and store.
if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level:
result_index = i
completed = True
break
elif result_index == -1:
result_index = i
elif ((reading_level < min_reading_level and abs(reading_level - min_reading_level) < closeness) \
or (reading_level > max_reading_level and abs(reading_level - max_reading_level) < closeness)) and \
(similarity > generated_text_scores[result_index][0] or similarity > min_entailment):
closeness = abs(reading_level - min_reading_level) if reading_level < min_reading_level else abs(reading_level - max_reading_level)
result_index = i
curr_text = response
curr_reading_level = reading_level
i += 1
# Printing all generated texts to the console.
print(f"=============== Model Type: {model_type} ===============\n")
for index, text in enumerate(generated_texts):
print(f"=============== Iteration {index} ===============")
print(f"Generated text: {text}\nSimilarity: {generated_text_scores[index][0]}\nReadability Score: {generated_text_scores[index][1]}\n")
# Printing the selected index and text.
print(f"=============== Start Printing Selected Information ===============")
print(f"Selected Iteration: {result_index + 1}")
print(f"Selected Result: {generated_texts[result_index]}")
print(f"Selected Result's Similarity score: {generated_text_scores[result_index][0]}")
print(f"Selected Result's Readability score: {generated_text_scores[result_index][1]}")
print(f"=============== End Printing Selected Information ===============")
# Returning the final result.
if completed:
return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Success! Please see the converted text at your target reading level."
else:
return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Failed. We could not reach the target reading level while maintaining the text meaning." |