Mjwarren3 commited on
Commit
7b28cbc
1 Parent(s): 7be825c

Addind real application

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /venv/
__pycache__/bert_similarity.cpython-311.pyc ADDED
Binary file (4.09 kB). View file
 
__pycache__/text_converter.cpython-311.pyc ADDED
Binary file (3.38 kB). View file
 
__pycache__/text_generator.cpython-311.pyc ADDED
Binary file (921 Bytes). View file
 
app.py CHANGED
@@ -1,55 +1,54 @@
1
  import gradio as gr
 
2
 
3
- def calc_input_reading_level(input_text):
4
- # Placeholder for actual implementation of the reading level calculation.
5
- return len(input_text) % 10 # Random operation as a placeholder for demonstration.
6
-
7
- def generate_and_analyze_text(input_text, target_level):
8
- # Placeholder for generating text and analyzing it against the target level.
9
- output_text = input_text[::-1] # Reversing text as a simple example operation.
10
- output_reading_level = int(target_level) # Placeholder for demonstration.
11
- similarity = 0.75 # Fixed similarity value for demonstration.
12
- input_level = calc_input_reading_level(input_text) # Reuse the reading level calc for input level.
13
- return input_level, output_text, output_reading_level, similarity
14
-
15
- with gr.Blocks() as app:
16
- with gr.Row():
17
- with gr.Column(scale=1):
18
- input_text1 = gr.Textbox(label="Input Text for Reading Level")
19
- button1 = gr.Button("Calculate Reading Level", elem_id="button1")
20
- with gr.Column(scale=1):
21
- input_reading_level = gr.Textbox(label="Input Text Reading Level")
22
-
23
- with gr.Row():
24
- with gr.Column(scale=1):
25
- input_text2 = gr.Textbox(label="Input Text for Generation")
26
- target_level = gr.Dropdown(choices=["1", "2", "3", "4", "5"], label="Target Reading Level")
27
- button2 = gr.Button("Generate and Analyze Text", elem_id="button2")
28
- with gr.Column(scale=1):
29
- display_input_level = gr.Textbox(label="Input Text Reading Level (Post-Generation)")
30
- output_text = gr.Textbox(label="Output Text")
31
- output_reading_level = gr.Textbox(label="Output Text Reading Level")
32
- output_text_similarity = gr.Textbox(label="Output Text Similarity to Input Text")
33
-
34
- button1.click(
35
- fn=calc_input_reading_level,
36
- inputs=input_text1,
37
- outputs=input_reading_level
38
- )
39
-
40
- button2.click(
41
- fn=generate_and_analyze_text,
42
- inputs=[input_text2, target_level],
43
- outputs=[display_input_level, output_text, output_reading_level, output_text_similarity]
44
- )
45
-
46
- # Custom CSS to style the buttons
47
- app.css = """
48
- #button1, #button2 {
49
- background-color: orange;
50
- color: white;
51
- width: 100%;
52
- }
53
- """
54
-
55
- app.launch()
 
1
  import gradio as gr
2
+ from text_converter import generate_similar_sentence
3
 
4
+ APP_DESCRIPTION = '''# Reading Level Converter
5
+ <div id="content_align">Convert any text to a specified reading level while retaining the core text meaning</div>'''
6
+
7
+ MIN_ENTAILMENT = 0.5
8
+ MAX_ITER = 5
9
+ SYSTEM_PROMPT = "You are a writing assistant. You help convert complex texts to simpler texts while maintaining the core meaning of the text."
10
+
11
+ # Dictionary mapping grade levels to reading ease scores
12
+ reading_levels = {
13
+ "5th Grade (90-100)": (90, 100),
14
+ "6th Grade (80-90)": (80, 90),
15
+ "7th Grade (70-80)": (70, 80),
16
+ "8th - 9th Grade (60-70)": (60, 70),
17
+ "10th - 12th Grade (50-60)": (50, 60),
18
+ "College (30-50)": (30, 50),
19
+ "College Graduate + Professionals (0-30)": (0, 30)
20
+ }
21
+
22
+ def convert_text(input_text, grade_level):
23
+ min_level, max_level = reading_levels[grade_level]
24
+ output_text, similarity, reading_level, input_reading_level, message = generate_similar_sentence(input_text, min_level, max_level, MIN_ENTAILMENT, SYSTEM_PROMPT, MAX_ITER)
25
+ return output_text, similarity, reading_level, input_reading_level, message
26
+
27
+ def main():
28
+ with gr.Blocks(css='styles.css') as app:
29
+ gr.Markdown(APP_DESCRIPTION)
30
+
31
+ with gr.Tab("Reading Level Calculator"):
32
+ input_text = gr.Textbox(label="Input Text", placeholder="Type here...", lines=4)
33
+ grade_level = gr.Radio(choices=list(reading_levels.keys()), label="Target Reading Level", value=list(reading_levels.keys())[0])
34
+
35
+ output_input_reading_level = gr.Textbox(label="Input Text Reading Level", placeholder="Input Text Reading Level...", lines=1)
36
+ output_reading_level = gr.Textbox(label="Output Reading Level", placeholder="Output Reading Level...", lines=1)
37
+ output_similarity = gr.Textbox(label="Similarity", placeholder="Similarity Score...", lines=1)
38
+ output_converted_text = gr.Textbox(label="Converted Text", placeholder="Results will appear here...", lines=4)
39
+
40
+ output_message = gr.Textbox(label="Message", placeholder="System Message...", lines=2)
41
+
42
+ convert_button = gr.Button("Convert Text")
43
+
44
+ convert_button.click(
45
+ fn=convert_text,
46
+ inputs=[input_text, grade_level],
47
+ outputs=[output_converted_text, output_similarity, output_reading_level, output_input_reading_level, output_message]
48
+ )
49
+
50
+ app.launch(inbrowser=True)
51
+
52
+
53
+ if __name__ == '__main__':
54
+ main()
 
 
bert_similarity.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import from_pretrained_keras
2
+ import tensorflow as tf
3
+ import numpy as np
4
+ import transformers
5
+
6
+ labels = ["contradiction", "entailment", "neutral"]
7
+ model = from_pretrained_keras("keras-io/bert-semantic-similarity")
8
+
9
+ class BertSemanticDataGenerator(tf.keras.utils.Sequence):
10
+ """Generates batches of data."""
11
+ def __init__(
12
+ self,
13
+ sentence_pairs,
14
+ labels,
15
+ batch_size=32,
16
+ shuffle=True,
17
+ include_targets=True,
18
+ ):
19
+ self.sentence_pairs = sentence_pairs
20
+ self.labels = labels
21
+ self.shuffle = shuffle
22
+ self.batch_size = batch_size
23
+ self.include_targets = include_targets
24
+ # Load our BERT Tokenizer to encode the text.
25
+ # We will use base-base-uncased pretrained model.
26
+ self.tokenizer = transformers.BertTokenizer.from_pretrained(
27
+ "bert-base-uncased", do_lower_case=True
28
+ )
29
+ self.indexes = np.arange(len(self.sentence_pairs))
30
+ self.on_epoch_end()
31
+
32
+ def __len__(self):
33
+ # Denotes the number of batches per epoch.
34
+ return len(self.sentence_pairs) // self.batch_size
35
+
36
+ def __getitem__(self, idx):
37
+ # Retrieves the batch of index.
38
+ indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
39
+ sentence_pairs = self.sentence_pairs[indexes]
40
+
41
+ # With BERT tokenizer's batch_encode_plus batch of both the sentences are
42
+ # encoded together and separated by [SEP] token.
43
+ encoded = self.tokenizer.batch_encode_plus(
44
+ sentence_pairs.tolist(),
45
+ add_special_tokens=True,
46
+ max_length=128,
47
+ truncation=True,
48
+ return_attention_mask=True,
49
+ return_token_type_ids=True,
50
+ pad_to_max_length=True,
51
+ return_tensors="tf",
52
+ )
53
+
54
+ # Convert batch of encoded features to numpy array.
55
+ input_ids = np.array(encoded["input_ids"], dtype="int32")
56
+ attention_masks = np.array(encoded["attention_mask"], dtype="int32")
57
+ token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
58
+
59
+ # Set to true if data generator is used for training/validation.
60
+ if self.include_targets:
61
+ labels = np.array(self.labels[indexes], dtype="int32")
62
+ return [input_ids, attention_masks, token_type_ids], labels
63
+ else:
64
+ return [input_ids, attention_masks, token_type_ids]
65
+
66
+
67
+
68
+ def get_similarity(sentence1, sentence2):
69
+ sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
70
+ test_data = BertSemanticDataGenerator(
71
+ sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
72
+ )
73
+ probs = model.predict(test_data[0])[0]
74
+
75
+ labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
76
+ return labels_probs['entailment']
requirements.txt ADDED
Binary file (254 Bytes). View file
 
styles.css ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ #content_align {
6
+ text-align: center;
7
+ }
8
+
text_converter.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bert_similarity import get_similarity
2
+ from text_generator import get_gpt_response
3
+ from textstat import flesch_reading_ease
4
+
5
+ def generate_user_prompt(prompt_type, base_text):
6
+ prompts = {
7
+ "too_simple": f"""
8
+ Convert this text to a higher reading level of the original text.
9
+ The higher reading level text should have more syllables per word and more words per sentence.
10
+ It should retain the core meaning of the original text.
11
+ Here is the text:
12
+ {base_text}
13
+ """,
14
+ "too_complex": f"""
15
+ Convert this text to a simpler version of the original text.
16
+ The simpler versions of text have fewer syllables per word and fewer words per sentence.
17
+ It should retain the core meaning of the original text.
18
+ Here is the text:
19
+ {base_text}
20
+ """
21
+ }
22
+
23
+ return prompts[prompt_type].format(base_text=base_text)
24
+
25
+
26
+ def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter):
27
+ i = 0
28
+ completed = False
29
+ user_prompt = ""
30
+ curr_reading_level = flesch_reading_ease(input_text)
31
+ input_reading_level = flesch_reading_ease(input_text)
32
+ curr_text = input_text
33
+ response = None
34
+ similarity = 0
35
+ reading_level = 0
36
+ print(f"Current reading level is: {curr_reading_level}")
37
+
38
+ if curr_reading_level > min_reading_level and curr_reading_level < max_reading_level:
39
+ return input_text, 1, curr_reading_level, input_reading_level, "Input text was already within the target reading level!"
40
+
41
+ else:
42
+ while i < max_iter and not completed:
43
+ if curr_reading_level > max_reading_level:
44
+ print(f"Too simple, current reading level is {curr_reading_level}")
45
+ user_prompt = generate_user_prompt("too_simple", curr_text)
46
+ elif curr_reading_level < min_reading_level:
47
+ print(f"Too complex, current reading level is {curr_reading_level}")
48
+ user_prompt = generate_user_prompt("too_complex", curr_text)
49
+ elif similarity < min_entailment:
50
+ print(f"Entailment level is too low: {similarity}")
51
+ user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'"
52
+
53
+ response = get_gpt_response(user_prompt, system_prompt)
54
+ similarity = get_similarity(response, input_text)
55
+ reading_level = flesch_reading_ease(response)
56
+
57
+ if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level:
58
+ completed = True
59
+
60
+ curr_text = response
61
+ curr_reading_level = reading_level
62
+ print(response)
63
+ i += 1
64
+
65
+ if completed:
66
+ return response, similarity, reading_level, input_reading_level, "Success! Please see the converted text at your target reading level."
67
+ else:
68
+ return response, similarity, reading_level, input_reading_level, "Failed. We could not reach the target reading level while maintaining the text meaning."
text_generator.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import os
3
+
4
+ OPENAI_APIKEY = os.environ.get("OPENAI_APIKEY")
5
+
6
+ client = OpenAI(api_key=OPENAI_APIKEY)
7
+
8
+ def get_gpt_response(user_prompt, system_prompt):
9
+ completion = client.chat.completions.create(
10
+ model="gpt-3.5-turbo",
11
+ messages=[
12
+ {"role": "system", "content": system_prompt},
13
+ {"role": "user", "content": user_prompt}
14
+ ],
15
+ )
16
+
17
+ return completion.choices[0].message.content