readinglevelconverter / text_converter.py
HakshaySundar's picture
Update text_converter.py
3c0c6d3 verified
raw history blame
No virus
4.86 kB
from bert_similarity import get_similarity
from text_generator import get_gpt_response
from textstat import flesch_reading_ease
def generate_user_prompt(prompt_type, base_text):
prompts = {
"too_simple": f"""
Convert this text to a higher reading level of the original text.
The higher reading level text should have more syllables per word and more words per sentence.
It should retain the core meaning of the original text.
Here is the text:
{base_text}
""",
"too_complex": f"""
Convert this text to a simpler version of the original text.
The simpler versions of text have fewer syllables per word and fewer words per sentence.
It should retain the core meaning of the original text.
Here is the text:
{base_text}
"""
}
return prompts[prompt_type].format(base_text=base_text)
def user_input_readability_level(input_text):
reading_levels = {
"5th Grade (90-100)": (90, 100),
"6th Grade (80-90)": (80, 90),
"7th Grade (70-80)": (70, 80),
"8th - 9th Grade (60-70)": (60, 70),
"10th - 12th Grade (50-60)": (50, 60),
"College (30-50)": (30, 50),
"College Graduate + Professionals (0-30)": (0, 30)
}
inverse_reading_levels = {v: k for k, v in reading_levels.items()}
current_score = flesch_reading_ease(input_text)
print(f'Reading score for user input is: {current_score}')
current_level = ''
for (min, max), level in inverse_reading_levels.items():
if min <= current_score <= max:
print(f'Reading level for user input is: {level}')
current_level = level
break
return current_score, current_level
def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter, curr_reading_level):
i = 0
completed = False
user_prompt = ""
curr_text = input_text
similarity = 0
reading_level = 0
generated_texts = []
result_index = -1
closeness = float('inf')
if min_reading_level < curr_reading_level < max_reading_level:
return input_text, 1, curr_reading_level, "Input text was already within the target reading level!"
else:
while i < max_iter and not completed:
if curr_reading_level > max_reading_level:
print(f"Too simple, current reading level is {curr_reading_level}")
user_prompt = generate_user_prompt("too_simple", curr_text)
elif curr_reading_level < min_reading_level:
print(f"Too complex, current reading level is {curr_reading_level}")
user_prompt = generate_user_prompt("too_complex", curr_text)
elif similarity < min_entailment:
print(f"Entailment level is too low: {similarity}")
user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'"
else:
print(f"Reading level is within target range: {curr_reading_level}")
completed = True
break
response = get_gpt_response(user_prompt, system_prompt)
# First we add the generated text to the list of generated texts.
generated_texts.append(response)
similarity = get_similarity(response, input_text)
reading_level = flesch_reading_ease(response)
# Determine the closeness of the reading level to the target reading level and store.
if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level:
result_index = i
completed = True
break
elif (reading_level < min_reading_level and abs(reading_level - min_reading_level) < closeness) \
or (reading_level > max_reading_level and abs(reading_level - max_reading_level) < closeness):
closeness = abs(reading_level - min_reading_level) if reading_level < min_reading_level else abs(reading_level - max_reading_level)
result_index = i
curr_text = response
curr_reading_level = reading_level
i += 1
# Printing all generated texts to the console.
for index, text in enumerate(generated_texts):
print(f"Generated text {index}: {text}")
# Returning the final result.
if completed:
return generated_texts[result_index], similarity, reading_level, "Success! Please see the converted text at your target reading level."
else:
return generated_texts[result_index], similarity, reading_level, "Failed. We could not reach the target reading level while maintaining the text meaning."