Spaces:
Sleeping
Sleeping
from bert_similarity import get_similarity | |
from text_generator import get_gpt_response | |
from textstat import flesch_reading_ease | |
def generate_user_prompt(prompt_type, base_text): | |
prompts = { | |
"too_simple": f""" | |
Convert this text to a higher reading level of the original text. | |
The higher reading level text should have more syllables per word and more words per sentence. | |
It should retain the core meaning of the original text. | |
The output text should also have a similar number of total words as the input text. | |
Here is the input text: | |
{base_text} | |
""", | |
"too_complex": f""" | |
Convert this text to a simpler version of the original text. | |
The simpler version should have simpler words, fewer syllables per word, and fewer words per sentence. | |
It should retain the core meaning of the original text. | |
The output text should also have a similar number of total words as the input text. | |
Here is the input text: | |
{base_text} | |
""" | |
} | |
return prompts[prompt_type].format(base_text=base_text) | |
reading_levels = { | |
"5th Grade (90-100)": (90, 100), | |
"6th Grade (80-90)": (80, 90), | |
"7th Grade (70-80)": (70, 80), | |
"8th - 9th Grade (60-70)": (60, 70), | |
"10th - 12th Grade (50-60)": (50, 60), | |
"College (30-50)": (30, 50), | |
"College Graduate + Professionals (0-30)": (0, 30), | |
"Research + Nobel laureate ((-infinity)-(-1))": (-float('inf'), -1) | |
} | |
inverse_reading_levels = {v: k for k, v in reading_levels.items()} | |
def user_input_readability_level(input_text): | |
current_score = flesch_reading_ease(input_text) | |
print(f'Reading score for user input is: {current_score}') | |
current_level = '' | |
for (min, max), level in inverse_reading_levels.items(): | |
if min <= current_score <= max: | |
print(f'Reading level for user input is: {level}') | |
current_level = level | |
break | |
return current_score, current_level | |
def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter, curr_reading_level): | |
i = 0 | |
completed = False | |
user_prompt = "" | |
curr_text = input_text | |
similarity = 0 | |
reading_level = 0 | |
generated_texts = [] | |
generated_text_scores = [] | |
result_index = -1 | |
closeness = float('inf') | |
if min_reading_level < curr_reading_level < max_reading_level: | |
return input_text, 1, curr_reading_level, "Input text was already within the target reading level!" | |
else: | |
while i < max_iter and not completed: | |
if curr_reading_level > max_reading_level: | |
print(f"Too simple, current reading level is {curr_reading_level}") | |
user_prompt = generate_user_prompt("too_simple", curr_text) | |
elif curr_reading_level < min_reading_level: | |
print(f"Too complex, current reading level is {curr_reading_level}") | |
user_prompt = generate_user_prompt("too_complex", curr_text) | |
elif similarity < min_entailment: | |
print(f"Entailment level is too low: {similarity}") | |
user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'" | |
else: | |
print(f"Reading level is within target range: {curr_reading_level}") | |
completed = True | |
break | |
response = get_gpt_response(user_prompt, system_prompt) | |
# We add the generated text to the list of generated texts. | |
generated_texts.append(response) | |
similarity = get_similarity(response, input_text) | |
reading_level = flesch_reading_ease(response) | |
# We add the generated text's reading level to the list. | |
generated_text_scores.append((similarity, reading_level)) | |
# Determine the closeness of the reading level to the target reading level and store. | |
if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level: | |
result_index = i | |
completed = True | |
break | |
elif result_index == -1: | |
result_index = i | |
elif ((reading_level < min_reading_level and abs(reading_level - min_reading_level) < closeness) \ | |
or (reading_level > max_reading_level and abs(reading_level - max_reading_level) < closeness)) and \ | |
(similarity > generated_text_scores[result_index][0] or similarity > min_entailment): | |
closeness = abs(reading_level - min_reading_level) if reading_level < min_reading_level else abs(reading_level - max_reading_level) | |
result_index = i | |
curr_text = response | |
curr_reading_level = reading_level | |
i += 1 | |
# Printing all generated texts to the console. | |
for index, text in enumerate(generated_texts): | |
print(f"=============== Iteration {index} ===============") | |
print(f"Generated text: {text}\nSimilarity: {generated_text_scores[index][0]}\nReadability Score: {generated_text_scores[index][1]}\n") | |
# Printing the selected index and text. | |
print(f"=============== Start Printing Selected Information ===============") | |
print(f"Selected Iteration: {result_index + 1}") | |
print(f"Selected Result: {generated_texts[result_index]}") | |
print(f"Selected Result's Similarity score: {generated_text_scores[result_index][0]}") | |
print(f"Selected Result's Readability score: {generated_text_scores[result_index][1]}") | |
print(f"=============== End Printing Selected Information ===============") | |
# Returning the final result. | |
if completed: | |
return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Success! Please see the converted text at your target reading level." | |
else: | |
return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Failed. We could not reach the target reading level while maintaining the text meaning." |