rbughao's picture
Update app.py
8537469 verified
raw
history blame
4.6 kB
import gradio as gr
import os
import textstat
import urllib.request
import re
from bs4 import BeautifulSoup
CLEANR = re.compile('<.*?>')
DEFAULT_SYSTEM_PROMPT = """
The Flesch Reading Ease score uses the number of syllables and sentence lengths to determine the reading ease of the content.
A Flesch score of 60 is taken to be plain English. A score in the range of 60-70 corresponds to 8th/9th grade English level.
A score between 50 and 60 corresponds to a 10th/12th grade level. Below 30 is college graduate level.
The SMOG (Simple Measure of Gobbledygook) grade is commonly used in health care.
The score represents the number of years of education needed to understand a passage of writing.
The Coleman Liau Index is designed to evaluate the U.S. grade level necessary to understand text.
Your score indicates the U.S. school level a person needs to be to understand the text.
Dale-Chall is one of the most accurate readability metrics. Rather than rely on syllable counts to identify diffult words,
Dale-Chall incorporates a list of 3,000 easy words which were understood by 80% of fourth-grade students.
The readability score is then computed based on how many words present in the passage are not in the list of easy words.
A score of 4.9 or lower indicates the passage is easily readable by the average 4th grade.
Scores more than 9.0 indicate the passage is at a college level of readability.
The Gunning Fog scale is similar to the Flesch scale where it uses syllable counts and sentence length.
The scale uses the percentage of 'Foggy' words, those that contain 3 or more syllables.
A fog score of 5 is readable, 10 is hard, 15 is difficult, and 20 is very difficult.
Detailed references:
1. https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
2. https://en.wikipedia.org/wiki/SMOG
3. https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index
4. https://en.wikipedia.org/wiki/Dale%E2%80%93Chall_readability_formula
5. https://en.wikipedia.org/wiki/Gunning_fog_index
"""
def measure_readability(message,history):
if "https://" in message:
response = urllib.request.urlopen(message)
html = response.read().decode('utf8')
cleantext = BeautifulSoup(html).text
#cleantext = BeautifulSoup(html,'lxml').text
text = re.sub(CLEANR,'', cleantext)
else:
text = message
vline1 = "==== Content Info ==== " + os.linesep
vline2 = "Character Count "+str(textstat.char_count(text, ignore_spaces=True)) + os.linesep
vline3 = "Lexicon Count "+str(textstat.lexicon_count(text, removepunct=True)) + os.linesep
vline4 = "Syllable Count "+str(textstat.syllable_count(text)) + os.linesep
vline5 = "Sentence Count "+str(textstat.sentence_count(text)) + os.linesep
vline6 = " " + os.linesep
vline7 = "==== Result ==== " + os.linesep
vline8 = "Flesch Reading Ease = "+str(textstat.flesch_reading_ease(text)) + os.linesep
#print("Flesch-Kincaid Grade Level is "+str(textstat.flesch_reading_ease(text)))
vline9 = "Smog Index = "+str(textstat.smog_index(text)) + os.linesep
vline10 = "Coleman Liau Index = "+str(textstat.coleman_liau_index(text)) + os.linesep
#print("Automated Readability Index (Grade level before a reader understand) is "+str(textstat.automated_readability_index(text)))
vline11 = "Dale-Chall Readability Score = "+str(textstat.dale_chall_readability_score(text)) + os.linesep
vline12 = "Gunning Fog Index = "+str(textstat.gunning_fog(text)) + os.linesep
#print("Grade Level Comprehension is "+str(textstat.automated_readability_index(text)))
#vline13 = "Difficult Words "+str(textstat.difficult_words(text)) + os.linesep
vline14 = "Reading Time = "+str(textstat.reading_time(text, ms_per_char=14.69))+" seconds"+ os.linesep
answer = vline1+vline2+vline3+vline4+vline5+vline6+vline7+vline8+vline9+vline10+vline11+vline12+vline14
#answer = "Flesch Reading Ease (90-100 = Easy to read, 0-29 = Very confusing to read) is "+str(textstat.flesch_reading_ease(text))
return answer
Conversing = gr.ChatInterface(measure_readability, chatbot=gr.Chatbot(height=400,label = "Enter URL or String to evaluate"), retry_btn=None,theme=gr.themes.Monochrome(),
title = 'Ecommerce Content Readability Tool', description = DEFAULT_SYSTEM_PROMPT ,undo_btn = None, clear_btn = None, css='footer {visibility: hidden}').launch()
#"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability "
if __name__ == "__main__":
Conversing.launch()