|
from bert_similarity import get_similarity |
|
from text_generator import get_gpt_response |
|
from textstat import flesch_reading_ease |
|
|
|
def generate_user_prompt(prompt_type, base_text): |
|
prompts = { |
|
"too_simple": f""" |
|
Convert this text to a higher reading level of the original text. |
|
The higher reading level text should have more syllables per word and more words per sentence. |
|
It should retain the core meaning of the original text. |
|
Here is the text: |
|
{base_text} |
|
""", |
|
"too_complex": f""" |
|
Convert this text to a simpler version of the original text. |
|
The simpler versions of text have fewer syllables per word and fewer words per sentence. |
|
It should retain the core meaning of the original text. |
|
Here is the text: |
|
{base_text} |
|
""" |
|
} |
|
|
|
return prompts[prompt_type].format(base_text=base_text) |
|
|
|
reading_levels = { |
|
"5th Grade (90-100)": (90, 100), |
|
"6th Grade (80-90)": (80, 90), |
|
"7th Grade (70-80)": (70, 80), |
|
"8th - 9th Grade (60-70)": (60, 70), |
|
"10th - 12th Grade (50-60)": (50, 60), |
|
"College (30-50)": (30, 50), |
|
"College Graduate + Professionals (0-30)": (0, 30), |
|
"Research + Nobel laureate ((-infinity)-(-1))": (-float('inf'), -1) |
|
} |
|
|
|
inverse_reading_levels = {v: k for k, v in reading_levels.items()} |
|
|
|
def user_input_readability_level(input_text): |
|
current_score = flesch_reading_ease(input_text) |
|
print(f'Reading score for user input is: {current_score}') |
|
current_level = '' |
|
for (min, max), level in inverse_reading_levels.items(): |
|
if min <= current_score <= max: |
|
print(f'Reading level for user input is: {level}') |
|
current_level = level |
|
break |
|
return current_score, current_level |
|
|
|
def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter, curr_reading_level): |
|
i = 0 |
|
completed = False |
|
user_prompt = "" |
|
curr_text = input_text |
|
similarity = 0 |
|
reading_level = 0 |
|
generated_texts = [] |
|
generated_text_scores = [] |
|
result_index = -1 |
|
closeness = float('inf') |
|
|
|
if min_reading_level < curr_reading_level < max_reading_level: |
|
return input_text, 1, curr_reading_level, "Input text was already within the target reading level!" |
|
|
|
else: |
|
while i < max_iter and not completed: |
|
if curr_reading_level > max_reading_level: |
|
print(f"Too simple, current reading level is {curr_reading_level}") |
|
user_prompt = generate_user_prompt("too_simple", curr_text) |
|
elif curr_reading_level < min_reading_level: |
|
print(f"Too complex, current reading level is {curr_reading_level}") |
|
user_prompt = generate_user_prompt("too_complex", curr_text) |
|
elif similarity < min_entailment: |
|
print(f"Entailment level is too low: {similarity}") |
|
user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'" |
|
else: |
|
print(f"Reading level is within target range: {curr_reading_level}") |
|
completed = True |
|
break |
|
|
|
response = get_gpt_response(user_prompt, system_prompt) |
|
|
|
|
|
generated_texts.append(response) |
|
|
|
similarity = get_similarity(response, input_text) |
|
reading_level = flesch_reading_ease(response) |
|
|
|
|
|
generated_text_scores.append((similarity, reading_level)) |
|
|
|
|
|
if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level: |
|
result_index = i |
|
completed = True |
|
break |
|
elif result_index == -1: |
|
result_index = i |
|
elif ((reading_level < min_reading_level and abs(reading_level - min_reading_level) < closeness) \ |
|
or (reading_level > max_reading_level and abs(reading_level - max_reading_level) < closeness)) and \ |
|
(similarity > generated_text_scores[result_index][0]): |
|
closeness = abs(reading_level - min_reading_level) if reading_level < min_reading_level else abs(reading_level - max_reading_level) |
|
result_index = i |
|
|
|
curr_text = response |
|
curr_reading_level = reading_level |
|
i += 1 |
|
|
|
|
|
for index, text in enumerate(generated_texts): |
|
print(f"=============== Iteration {index} ===============") |
|
print(f"Generated text: {text}\nSimilarity: {generated_text_scores[index][0]}\nReadability Score: {generated_text_scores[index][1]}\n") |
|
|
|
|
|
print(f"=============== Start Printing Selected Information ===============") |
|
print(f"Selected Iteration: {result_index + 1}") |
|
print(f"Selected Result: {generated_texts[result_index]}") |
|
print(f"Selected Result's Similarity score: {generated_text_scores[result_index][0]}") |
|
print(f"Selected Result's Readability score: {generated_text_scores[result_index][1]}") |
|
print(f"=============== End Printing Selected Information ===============") |
|
|
|
|
|
if completed: |
|
return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Success! Please see the converted text at your target reading level." |
|
else: |
|
return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Failed. We could not reach the target reading level while maintaining the text meaning." |