import gradio as gr import os import textstat import urllib.request import re from bs4 import BeautifulSoup CLEANR = re.compile('<.*?>') DEFAULT_SYSTEM_PROMPT = """ The Flesch Reading Ease score uses the number of syllables and sentence lengths to determine the reading ease of the content. A Flesch score of 60 is taken to be plain English. A score in the range of 60-70 corresponds to 8th/9th grade English level. A score between 50 and 60 corresponds to a 10th/12th grade level. Below 30 is college graduate level. The SMOG (Simple Measure of Gobbledygook) grade is commonly used in health care. The score represents the number of years of education needed to understand a passage of writing. The Coleman Liau Index is designed to evaluate the U.S. grade level necessary to understand text. Your score indicates the U.S. school level a person needs to be to understand the text. Dale-Chall is one of the most accurate readability metrics. Rather than rely on syllable counts to identify diffult words, Dale-Chall incorporates a list of 3,000 easy words which were understood by 80% of fourth-grade students. The readability score is then computed based on how many words present in the passage are not in the list of easy words. A score of 4.9 or lower indicates the passage is easily readable by the average 4th grade. Scores more than 9.0 indicate the passage is at a college level of readability. The Gunning Fog scale is similar to the Flesch scale where it uses syllable counts and sentence length. The scale uses the percentage of 'Foggy' words, those that contain 3 or more syllables. A fog score of 5 is readable, 10 is hard, 15 is difficult, and 20 is very difficult. Detailed references: 1. https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease 2. https://en.wikipedia.org/wiki/SMOG 3. https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index 4. https://en.wikipedia.org/wiki/Dale%E2%80%93Chall_readability_formula 5. https://en.wikipedia.org/wiki/Gunning_fog_index """ ### Spammy or not from transformers import pipeline def analyze_output(vtext: str): pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model") x = pipe(vtext)[0] if x["label"] == "LABEL_0": spammy = "Not Spammy" #{"type":"Not Spam", "probability":x["score"]} else: spammy = "Spammy" #{"type":"Spam", "probability":x["score"]} return "spammy" ### Spammy or not def measure_readability(message,history): if "https://" in message: response = urllib.request.urlopen(message) html = response.read().decode('utf8') cleantext = BeautifulSoup(html).text #cleantext = BeautifulSoup(html,'lxml').text text = re.sub(CLEANR,'', cleantext) else: text = message vline1 = "==== Content Info ==== " + os.linesep vline2 = "Character Count "+str(textstat.char_count(text, ignore_spaces=True)) + os.linesep vline3 = "Lexicon Count "+str(textstat.lexicon_count(text, removepunct=True)) + os.linesep vline4 = "Syllable Count "+str(textstat.syllable_count(text)) + os.linesep vline5 = "Sentence Count "+str(textstat.sentence_count(text)) + os.linesep vline6 = " " + os.linesep vline7 = "==== Result ==== " + os.linesep vline8 = "Flesch Reading Ease = "+str(textstat.flesch_reading_ease(text)) + os.linesep #print("Flesch-Kincaid Grade Level is "+str(textstat.flesch_reading_ease(text))) vline9 = "Smog Index = "+str(textstat.smog_index(text)) + os.linesep vline10 = "Coleman Liau Index = "+str(textstat.coleman_liau_index(text)) + os.linesep #print("Automated Readability Index (Grade level before a reader understand) is "+str(textstat.automated_readability_index(text))) vline11 = "Dale-Chall Readability Score = "+str(textstat.dale_chall_readability_score(text)) + os.linesep vline12 = "Gunning Fog Index = "+str(textstat.gunning_fog(text)) + os.linesep #print("Grade Level Comprehension is "+str(textstat.automated_readability_index(text))) #vline13 = "Difficult Words "+str(textstat.difficult_words(text)) + os.linesep vline14 = "Reading Time = "+str(textstat.reading_time(text, ms_per_char=14.69))+" seconds"+ os.linesep pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model") x = pipe(text)[0] if x["label"] == "LABEL_0": spammy = "Not Spammy" #{"type":"Not Spam", "probability":x["score"]} else: spammy = "Spammy" #{"type":"Spam", "probability":x["score"]} answer = vline1+vline2+vline3+vline4+vline5+vline6+vline7+vline8+vline9+vline10+vline11+vline12+vline14+spammy #answer = "Flesch Reading Ease (90-100 = Easy to read, 0-29 = Very confusing to read) is "+str(textstat.flesch_reading_ease(text)) return answer Conversing = gr.ChatInterface(measure_readability, chatbot=gr.Chatbot(height=400,label = "Enter URL or String to evaluate"), retry_btn=None,theme=gr.themes.Monochrome(), title = 'Ecommerce Content Readability Tool', description = DEFAULT_SYSTEM_PROMPT ,undo_btn = None, clear_btn = None, css='footer {visibility: hidden}').launch() #"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability " if __name__ == "__main__": Conversing.launch()