Spaces:
Running
Running
import gradio as gr | |
import os | |
import textstat | |
import urllib.request | |
import re | |
from bs4 import BeautifulSoup | |
CLEANR = re.compile('<.*?>') | |
DEFAULT_SYSTEM_PROMPT = """ | |
The Flesch Reading Ease score uses the number of syllables and sentence lengths to determine the reading ease of the content. | |
A Flesch score of 60 is taken to be plain English. A score in the range of 60-70 corresponds to 8th/9th grade English level. | |
A score between 50 and 60 corresponds to a 10th/12th grade level. Below 30 is college graduate level. | |
The SMOG (Simple Measure of Gobbledygook) grade is commonly used in health care. | |
The score represents the number of years of education needed to understand a passage of writing. | |
The Coleman Liau Index is designed to evaluate the U.S. grade level necessary to understand text. | |
Your score indicates the U.S. school level a person needs to be to understand the text. | |
Dale-Chall is one of the most accurate readability metrics. Rather than rely on syllable counts to identify diffult words, | |
Dale-Chall incorporates a list of 3,000 easy words which were understood by 80% of fourth-grade students. | |
The readability score is then computed based on how many words present in the passage are not in the list of easy words. | |
A score of 4.9 or lower indicates the passage is easily readable by the average 4th grade. | |
Scores more than 9.0 indicate the passage is at a college level of readability. | |
The Gunning Fog scale is similar to the Flesch scale where it uses syllable counts and sentence length. | |
The scale uses the percentage of 'Foggy' words, those that contain 3 or more syllables. | |
A fog score of 5 is readable, 10 is hard, 15 is difficult, and 20 is very difficult. | |
Detailed references: | |
1. https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease | |
2. https://en.wikipedia.org/wiki/SMOG | |
3. https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index | |
4. https://en.wikipedia.org/wiki/Dale%E2%80%93Chall_readability_formula | |
5. https://en.wikipedia.org/wiki/Gunning_fog_index | |
""" | |
def measure_readability(message,history): | |
if "https://" in message: | |
response = urllib.request.urlopen(message) | |
html = response.read().decode('utf8') | |
cleantext = BeautifulSoup(html).text | |
#cleantext = BeautifulSoup(html,'lxml').text | |
text = re.sub(CLEANR,'', cleantext) | |
else: | |
text = message | |
vline1 = "==== Content Info ==== " + os.linesep | |
vline2 = "Character Count "+str(textstat.char_count(text, ignore_spaces=True)) + os.linesep | |
vline3 = "Lexicon Count "+str(textstat.lexicon_count(text, removepunct=True)) + os.linesep | |
vline4 = "Syllable Count "+str(textstat.syllable_count(text)) + os.linesep | |
vline5 = "Sentence Count "+str(textstat.sentence_count(text)) + os.linesep | |
vline6 = " " + os.linesep | |
vline7 = "==== Result ==== " + os.linesep | |
vline8 = "Flesch Reading Ease = "+str(textstat.flesch_reading_ease(text)) + os.linesep | |
#print("Flesch-Kincaid Grade Level is "+str(textstat.flesch_reading_ease(text))) | |
vline9 = "Smog Index = "+str(textstat.smog_index(text)) + os.linesep | |
vline10 = "Coleman Liau Index = "+str(textstat.coleman_liau_index(text)) + os.linesep | |
#print("Automated Readability Index (Grade level before a reader understand) is "+str(textstat.automated_readability_index(text))) | |
vline11 = "Dale-Chall Readability Score = "+str(textstat.dale_chall_readability_score(text)) + os.linesep | |
vline12 = "Gunning Fog Index = "+str(textstat.gunning_fog(text)) + os.linesep | |
#print("Grade Level Comprehension is "+str(textstat.automated_readability_index(text))) | |
#vline13 = "Difficult Words "+str(textstat.difficult_words(text)) + os.linesep | |
vline14 = "Reading Time = "+str(textstat.reading_time(text, ms_per_char=14.69))+" seconds"+ os.linesep | |
answer = vline1+vline2+vline3+vline4+vline5+vline6+vline7+vline8+vline9+vline10+vline11+vline12+vline14 | |
#answer = "Flesch Reading Ease (90-100 = Easy to read, 0-29 = Very confusing to read) is "+str(textstat.flesch_reading_ease(text)) | |
return answer | |
Conversing = gr.ChatInterface(measure_readability, chatbot=gr.Chatbot(height=400,label = "Enter URL or String to evaluate"), retry_btn=None,theme=gr.themes.Monochrome(), | |
title = 'Ecommerce Content Readability Tool', description = DEFAULT_SYSTEM_PROMPT ,undo_btn = None, clear_btn = None, css='footer {visibility: hidden}').launch() | |
#"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability " | |
if __name__ == "__main__": | |
Conversing.launch() |