Spaces:
Sleeping
Sleeping
Addind real application
Browse files- .gitignore +1 -0
- __pycache__/bert_similarity.cpython-311.pyc +0 -0
- __pycache__/text_converter.cpython-311.pyc +0 -0
- __pycache__/text_generator.cpython-311.pyc +0 -0
- app.py +52 -53
- bert_similarity.py +76 -0
- requirements.txt +0 -0
- styles.css +8 -0
- text_converter.py +68 -0
- text_generator.py +17 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/venv/
|
__pycache__/bert_similarity.cpython-311.pyc
ADDED
Binary file (4.09 kB). View file
|
|
__pycache__/text_converter.cpython-311.pyc
ADDED
Binary file (3.38 kB). View file
|
|
__pycache__/text_generator.cpython-311.pyc
ADDED
Binary file (921 Bytes). View file
|
|
app.py
CHANGED
@@ -1,55 +1,54 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
app.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from text_converter import generate_similar_sentence
|
3 |
|
4 |
+
APP_DESCRIPTION = '''# Reading Level Converter
|
5 |
+
<div id="content_align">Convert any text to a specified reading level while retaining the core text meaning</div>'''
|
6 |
+
|
7 |
+
MIN_ENTAILMENT = 0.5
|
8 |
+
MAX_ITER = 5
|
9 |
+
SYSTEM_PROMPT = "You are a writing assistant. You help convert complex texts to simpler texts while maintaining the core meaning of the text."
|
10 |
+
|
11 |
+
# Dictionary mapping grade levels to reading ease scores
|
12 |
+
reading_levels = {
|
13 |
+
"5th Grade (90-100)": (90, 100),
|
14 |
+
"6th Grade (80-90)": (80, 90),
|
15 |
+
"7th Grade (70-80)": (70, 80),
|
16 |
+
"8th - 9th Grade (60-70)": (60, 70),
|
17 |
+
"10th - 12th Grade (50-60)": (50, 60),
|
18 |
+
"College (30-50)": (30, 50),
|
19 |
+
"College Graduate + Professionals (0-30)": (0, 30)
|
20 |
+
}
|
21 |
+
|
22 |
+
def convert_text(input_text, grade_level):
|
23 |
+
min_level, max_level = reading_levels[grade_level]
|
24 |
+
output_text, similarity, reading_level, input_reading_level, message = generate_similar_sentence(input_text, min_level, max_level, MIN_ENTAILMENT, SYSTEM_PROMPT, MAX_ITER)
|
25 |
+
return output_text, similarity, reading_level, input_reading_level, message
|
26 |
+
|
27 |
+
def main():
|
28 |
+
with gr.Blocks(css='styles.css') as app:
|
29 |
+
gr.Markdown(APP_DESCRIPTION)
|
30 |
+
|
31 |
+
with gr.Tab("Reading Level Calculator"):
|
32 |
+
input_text = gr.Textbox(label="Input Text", placeholder="Type here...", lines=4)
|
33 |
+
grade_level = gr.Radio(choices=list(reading_levels.keys()), label="Target Reading Level", value=list(reading_levels.keys())[0])
|
34 |
+
|
35 |
+
output_input_reading_level = gr.Textbox(label="Input Text Reading Level", placeholder="Input Text Reading Level...", lines=1)
|
36 |
+
output_reading_level = gr.Textbox(label="Output Reading Level", placeholder="Output Reading Level...", lines=1)
|
37 |
+
output_similarity = gr.Textbox(label="Similarity", placeholder="Similarity Score...", lines=1)
|
38 |
+
output_converted_text = gr.Textbox(label="Converted Text", placeholder="Results will appear here...", lines=4)
|
39 |
+
|
40 |
+
output_message = gr.Textbox(label="Message", placeholder="System Message...", lines=2)
|
41 |
+
|
42 |
+
convert_button = gr.Button("Convert Text")
|
43 |
+
|
44 |
+
convert_button.click(
|
45 |
+
fn=convert_text,
|
46 |
+
inputs=[input_text, grade_level],
|
47 |
+
outputs=[output_converted_text, output_similarity, output_reading_level, output_input_reading_level, output_message]
|
48 |
+
)
|
49 |
+
|
50 |
+
app.launch(inbrowser=True)
|
51 |
+
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
main()
|
|
|
|
bert_similarity.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import from_pretrained_keras
|
2 |
+
import tensorflow as tf
|
3 |
+
import numpy as np
|
4 |
+
import transformers
|
5 |
+
|
6 |
+
labels = ["contradiction", "entailment", "neutral"]
|
7 |
+
model = from_pretrained_keras("keras-io/bert-semantic-similarity")
|
8 |
+
|
9 |
+
class BertSemanticDataGenerator(tf.keras.utils.Sequence):
|
10 |
+
"""Generates batches of data."""
|
11 |
+
def __init__(
|
12 |
+
self,
|
13 |
+
sentence_pairs,
|
14 |
+
labels,
|
15 |
+
batch_size=32,
|
16 |
+
shuffle=True,
|
17 |
+
include_targets=True,
|
18 |
+
):
|
19 |
+
self.sentence_pairs = sentence_pairs
|
20 |
+
self.labels = labels
|
21 |
+
self.shuffle = shuffle
|
22 |
+
self.batch_size = batch_size
|
23 |
+
self.include_targets = include_targets
|
24 |
+
# Load our BERT Tokenizer to encode the text.
|
25 |
+
# We will use base-base-uncased pretrained model.
|
26 |
+
self.tokenizer = transformers.BertTokenizer.from_pretrained(
|
27 |
+
"bert-base-uncased", do_lower_case=True
|
28 |
+
)
|
29 |
+
self.indexes = np.arange(len(self.sentence_pairs))
|
30 |
+
self.on_epoch_end()
|
31 |
+
|
32 |
+
def __len__(self):
|
33 |
+
# Denotes the number of batches per epoch.
|
34 |
+
return len(self.sentence_pairs) // self.batch_size
|
35 |
+
|
36 |
+
def __getitem__(self, idx):
|
37 |
+
# Retrieves the batch of index.
|
38 |
+
indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
|
39 |
+
sentence_pairs = self.sentence_pairs[indexes]
|
40 |
+
|
41 |
+
# With BERT tokenizer's batch_encode_plus batch of both the sentences are
|
42 |
+
# encoded together and separated by [SEP] token.
|
43 |
+
encoded = self.tokenizer.batch_encode_plus(
|
44 |
+
sentence_pairs.tolist(),
|
45 |
+
add_special_tokens=True,
|
46 |
+
max_length=128,
|
47 |
+
truncation=True,
|
48 |
+
return_attention_mask=True,
|
49 |
+
return_token_type_ids=True,
|
50 |
+
pad_to_max_length=True,
|
51 |
+
return_tensors="tf",
|
52 |
+
)
|
53 |
+
|
54 |
+
# Convert batch of encoded features to numpy array.
|
55 |
+
input_ids = np.array(encoded["input_ids"], dtype="int32")
|
56 |
+
attention_masks = np.array(encoded["attention_mask"], dtype="int32")
|
57 |
+
token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
|
58 |
+
|
59 |
+
# Set to true if data generator is used for training/validation.
|
60 |
+
if self.include_targets:
|
61 |
+
labels = np.array(self.labels[indexes], dtype="int32")
|
62 |
+
return [input_ids, attention_masks, token_type_ids], labels
|
63 |
+
else:
|
64 |
+
return [input_ids, attention_masks, token_type_ids]
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
def get_similarity(sentence1, sentence2):
|
69 |
+
sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
|
70 |
+
test_data = BertSemanticDataGenerator(
|
71 |
+
sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
|
72 |
+
)
|
73 |
+
probs = model.predict(test_data[0])[0]
|
74 |
+
|
75 |
+
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
|
76 |
+
return labels_probs['entailment']
|
requirements.txt
ADDED
Binary file (254 Bytes). View file
|
|
styles.css
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
h1 {
|
2 |
+
text-align: center;
|
3 |
+
}
|
4 |
+
|
5 |
+
#content_align {
|
6 |
+
text-align: center;
|
7 |
+
}
|
8 |
+
|
text_converter.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bert_similarity import get_similarity
|
2 |
+
from text_generator import get_gpt_response
|
3 |
+
from textstat import flesch_reading_ease
|
4 |
+
|
5 |
+
def generate_user_prompt(prompt_type, base_text):
|
6 |
+
prompts = {
|
7 |
+
"too_simple": f"""
|
8 |
+
Convert this text to a higher reading level of the original text.
|
9 |
+
The higher reading level text should have more syllables per word and more words per sentence.
|
10 |
+
It should retain the core meaning of the original text.
|
11 |
+
Here is the text:
|
12 |
+
{base_text}
|
13 |
+
""",
|
14 |
+
"too_complex": f"""
|
15 |
+
Convert this text to a simpler version of the original text.
|
16 |
+
The simpler versions of text have fewer syllables per word and fewer words per sentence.
|
17 |
+
It should retain the core meaning of the original text.
|
18 |
+
Here is the text:
|
19 |
+
{base_text}
|
20 |
+
"""
|
21 |
+
}
|
22 |
+
|
23 |
+
return prompts[prompt_type].format(base_text=base_text)
|
24 |
+
|
25 |
+
|
26 |
+
def generate_similar_sentence(input_text, min_reading_level, max_reading_level, min_entailment, system_prompt, max_iter):
|
27 |
+
i = 0
|
28 |
+
completed = False
|
29 |
+
user_prompt = ""
|
30 |
+
curr_reading_level = flesch_reading_ease(input_text)
|
31 |
+
input_reading_level = flesch_reading_ease(input_text)
|
32 |
+
curr_text = input_text
|
33 |
+
response = None
|
34 |
+
similarity = 0
|
35 |
+
reading_level = 0
|
36 |
+
print(f"Current reading level is: {curr_reading_level}")
|
37 |
+
|
38 |
+
if curr_reading_level > min_reading_level and curr_reading_level < max_reading_level:
|
39 |
+
return input_text, 1, curr_reading_level, input_reading_level, "Input text was already within the target reading level!"
|
40 |
+
|
41 |
+
else:
|
42 |
+
while i < max_iter and not completed:
|
43 |
+
if curr_reading_level > max_reading_level:
|
44 |
+
print(f"Too simple, current reading level is {curr_reading_level}")
|
45 |
+
user_prompt = generate_user_prompt("too_simple", curr_text)
|
46 |
+
elif curr_reading_level < min_reading_level:
|
47 |
+
print(f"Too complex, current reading level is {curr_reading_level}")
|
48 |
+
user_prompt = generate_user_prompt("too_complex", curr_text)
|
49 |
+
elif similarity < min_entailment:
|
50 |
+
print(f"Entailment level is too low: {similarity}")
|
51 |
+
user_prompt = f"Can you convert this text '{input_text}' to a grade level more similar to this text '{curr_text}'"
|
52 |
+
|
53 |
+
response = get_gpt_response(user_prompt, system_prompt)
|
54 |
+
similarity = get_similarity(response, input_text)
|
55 |
+
reading_level = flesch_reading_ease(response)
|
56 |
+
|
57 |
+
if similarity >= min_entailment and min_reading_level <= reading_level <= max_reading_level:
|
58 |
+
completed = True
|
59 |
+
|
60 |
+
curr_text = response
|
61 |
+
curr_reading_level = reading_level
|
62 |
+
print(response)
|
63 |
+
i += 1
|
64 |
+
|
65 |
+
if completed:
|
66 |
+
return response, similarity, reading_level, input_reading_level, "Success! Please see the converted text at your target reading level."
|
67 |
+
else:
|
68 |
+
return response, similarity, reading_level, input_reading_level, "Failed. We could not reach the target reading level while maintaining the text meaning."
|
text_generator.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import os
|
3 |
+
|
4 |
+
OPENAI_APIKEY = os.environ.get("OPENAI_APIKEY")
|
5 |
+
|
6 |
+
client = OpenAI(api_key=OPENAI_APIKEY)
|
7 |
+
|
8 |
+
def get_gpt_response(user_prompt, system_prompt):
|
9 |
+
completion = client.chat.completions.create(
|
10 |
+
model="gpt-3.5-turbo",
|
11 |
+
messages=[
|
12 |
+
{"role": "system", "content": system_prompt},
|
13 |
+
{"role": "user", "content": user_prompt}
|
14 |
+
],
|
15 |
+
)
|
16 |
+
|
17 |
+
return completion.choices[0].message.content
|