File size: 7,187 Bytes
7b28cbc
 
2610aed
7b28cbc
 
 
 
 
 
 
 
58eb5eb
 
7b28cbc
 
 
 
58eb5eb
7b28cbc
58eb5eb
 
7b28cbc
 
 
 
 
 
2610aed
 
 
caa11cf
 
 
 
 
 
c97edc1
 
d59d12f
caa11cf
2610aed
 
 
 
 
 
 
 
 
 
 
c97edc1
 
2610aed
 
 
 
 
 
 
 
23812aa
2610aed
4433993
 
2610aed
caa11cf
 
 
 
 
 
 
 
4433993
7b28cbc
 
 
 
 
 
caa11cf
c97edc1
caa11cf
 
7b28cbc
3c0c6d3
caa11cf
7b28cbc
 
 
 
 
 
 
 
 
 
 
 
caa11cf
 
 
 
7b28cbc
 
caa11cf
c97edc1
caa11cf
 
7b28cbc
2610aed
7b28cbc
c97edc1
 
 
caa11cf
7b28cbc
caa11cf
7b28cbc
caa11cf
849dc1e
 
5b74304
 
acff824
caa11cf
 
7b28cbc
 
 
 
 
8b37720
2610aed
8b37720
c1bdfcc
 
c97edc1
 
c1bdfcc
c97edc1
 
c11817c
 
c1bdfcc
caa11cf
 
7b28cbc
116ac08
7b28cbc
116ac08
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from bert_similarity import get_similarity
from text_generator import get_gpt_response
from arte_score import ping_api
from textstat import flesch_reading_ease

def generate_user_prompt(prompt_type, base_text):
    prompts = {
        "too_simple": f"""
            Convert this text to a higher reading level of the original text.
            The higher reading level text should have more syllables per word and more words per sentence.
            It should retain the core meaning of the original text.
            The output text should also have a similar number of total words as the input text.
            Here is the input text:
            {base_text}
        """,
        "too_complex": f"""
            Convert this text to a simpler version of the original text.
            The simpler version should have simpler words, fewer syllables per word, and fewer words per sentence.
            It should retain the core meaning of the original text.
            The output text should also have a similar number of total words as the input text.
            Here is the input text:
            {base_text}
        """
    }

    return prompts[prompt_type].format(base_text=base_text)

model_types = ["FRE", "SBERT"]

fre_levels = {
    "5th Grade (90-100)": (90, 100),
    "6th Grade (80-90)": (80, 90),
    "7th Grade (70-80)": (70, 80),
    "8th - 9th Grade (60-70)": (60, 70),
    "10th - 12th Grade (50-60)": (50, 60),
    "College (30-50)": (30, 50),
    "College Graduate + Professionals (0-30)": (0, 30),
    "Research + Nobel laureate ((-infinity)-(-1))": (-float('inf'), -1)
    }

sbert_levels = {
    "Difficult (-100 to -1.88)": (-100, -1.88),
    "Somewhat Difficult (-1.87 to -1.21)": (-1.87, -1.21),
    "Intermediate (-1.20 to -0.65)": (-1.20, -0.65),
    "Somewhat Easy (-0.64 to -0.05)": (-0.64, -0.05),
    "Easy (> -0.04)": (-0.04, 100)
    }

# Setting SBert to be default to avoid any issues.
reading_levels = sbert_levels

inverse_reading_levels = {v: k for k, v in reading_levels.items()}

def set_reading_levels(level_type):
    global reading_levels
    global inverse_reading_levels
    if level_type == "FRE":
        reading_levels = fre_levels
    elif level_type == "SBERT":
        reading_levels = sbert_levels
    inverse_reading_levels = {v: k for k, v in reading_levels.items()}
    return level_type

def user_input_readability_level(input_text, model_type):
    current_score = ping_api(input_text, model_type)
    print(f'Reading score for user input is: {current_score} for model type: {model_type}')
    current_level = ''
    for (min, max), level in inverse_reading_levels.items():
        if min <= current_score <= max:
            print(f'Reading level for user input is: {level}')
            current_level = level
            break
    return current_score, current_level

def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter, curr_reading_level, model_type):
    i = 0
    completed = False
    user_prompt = ""
    curr_text = input_text
    similarity = 0
    reading_level = 0
    generated_texts = []
    generated_text_scores = []
    result_index = -1
    closeness = float('inf')

    if min_reading_level < curr_reading_level < max_reading_level:
        return input_text, 1, curr_reading_level, "Input text was already within the target reading level!"

    else:
        while i < max_iter and not completed:
            if curr_reading_level > max_reading_level:
                print(f"Too simple, current reading level is {curr_reading_level}")
                user_prompt = generate_user_prompt("too_simple", curr_text)
            elif curr_reading_level < min_reading_level:
                print(f"Too complex, current reading level is {curr_reading_level}")
                user_prompt = generate_user_prompt("too_complex", curr_text)
            elif similarity < min_entailment:
                print(f"Entailment level is too low: {similarity}")
                user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'"
            else: 
                print(f"Reading level is within target range: {curr_reading_level}")
                completed = True
                break
            
            response = get_gpt_response(user_prompt, system_prompt)

            # We add the generated text to the list of generated texts.
            generated_texts.append(response)

            similarity = get_similarity(response, input_text)
            reading_level = ping_api(response, model_type) #flesch_reading_ease(response)

            # We add the generated text's reading level to the list.
            generated_text_scores.append((similarity, reading_level))

            # Determine the closeness of the reading level to the target reading level and store.
            if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level:
                result_index = i
                completed = True
                break
            elif result_index == -1:
                result_index = i
            elif ((reading_level < min_reading_level and abs(reading_level - min_reading_level) < closeness) \
                or (reading_level > max_reading_level and abs(reading_level - max_reading_level) < closeness)) and \
                (similarity > generated_text_scores[result_index][0] or similarity > min_entailment):
                closeness = abs(reading_level - min_reading_level) if reading_level < min_reading_level else abs(reading_level - max_reading_level)
                result_index = i

            curr_text = response
            curr_reading_level = reading_level
            i += 1

        # Printing all generated texts to the console.
        print(f"=============== Model Type: {model_type} ===============\n")
        for index, text in enumerate(generated_texts):
            print(f"=============== Iteration {index} ===============")
            print(f"Generated text: {text}\nSimilarity: {generated_text_scores[index][0]}\nReadability Score: {generated_text_scores[index][1]}\n")

        # Printing the selected index and text.
        print(f"=============== Start Printing Selected Information ===============")
        print(f"Selected Iteration: {result_index + 1}")
        print(f"Selected Result: {generated_texts[result_index]}")
        print(f"Selected Result's Similarity score: {generated_text_scores[result_index][0]}")
        print(f"Selected Result's Readability score: {generated_text_scores[result_index][1]}")
        print(f"=============== End Printing Selected Information ===============")

        # Returning the final result.
        if completed:
            return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Success! Please see the converted text at your target reading level."
        else:
            return generated_texts[result_index], generated_text_scores[result_index][0], generated_text_scores[result_index][1], "Failed. We could not reach the target reading level while maintaining the text meaning."