Spaces:
Running
Running
File size: 5,110 Bytes
2ed84e2 2e88643 07d2d4a 99dd8f0 49461e6 174330a 07d2d4a 2e88643 9f9cc13 8537469 9f9cc13 8537469 9f9cc13 8537469 9f9cc13 1dab96d 9f9cc13 1dab96d 41ba0d5 481cd29 41ba0d5 52e141d b3dd91d 74701ca 52e141d 03e3e40 52e141d 74701ca 52e141d 56dde06 07d2d4a 130f1be 07d2d4a 130f1be 07d2d4a 130f1be 07d2d4a 6ad229a 130f1be e350981 1b229e8 52e141d 571c5ba 2ed84e2 1dab96d 05a1140 6ad229a 1dab96d c7caadb 2ed84e2 a367380 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import os
import textstat
import urllib.request
import re
from bs4 import BeautifulSoup
from transformers import pipeline
CLEANR = re.compile('<.*?>')
DEFAULT_SYSTEM_PROMPT = """
The Flesch Reading Ease score uses the number of syllables and sentence lengths to determine the reading ease of the content.
A Flesch score of 60 is taken to be plain English. A score in the range of 60-70 corresponds to 8th/9th grade English level.
A score between 50 and 60 corresponds to a 10th/12th grade level. Below 30 is college graduate level.
The SMOG (Simple Measure of Gobbledygook) grade is commonly used in health care.
The score represents the number of years of education needed to understand a passage of writing.
The Coleman Liau Index is designed to evaluate the U.S. grade level necessary to understand text.
Your score indicates the U.S. school level a person needs to be to understand the text.
Dale-Chall is one of the most accurate readability metrics. Rather than rely on syllable counts to identify diffult words,
Dale-Chall incorporates a list of 3,000 easy words which were understood by 80% of fourth-grade students.
The readability score is then computed based on how many words present in the passage are not in the list of easy words.
A score of 4.9 or lower indicates the passage is easily readable by the average 4th grade.
Scores more than 9.0 indicate the passage is at a college level of readability.
The Gunning Fog scale is similar to the Flesch scale where it uses syllable counts and sentence length.
The scale uses the percentage of 'Foggy' words, those that contain 3 or more syllables.
A fog score of 5 is readable, 10 is hard, 15 is difficult, and 20 is very difficult.
Detailed references:
1. https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
2. https://en.wikipedia.org/wiki/SMOG
3. https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index
4. https://en.wikipedia.org/wiki/Dale%E2%80%93Chall_readability_formula
5. https://en.wikipedia.org/wiki/Gunning_fog_index
"""
def measure_readability(message,history):
if "https://" in message:
response = urllib.request.urlopen(message)
html = response.read().decode('utf8')
cleantext = BeautifulSoup(html).text
#cleantext = BeautifulSoup(html,'lxml').text
text = re.sub(CLEANR,'', cleantext)
spammy=""
else:
text = message
#pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
pipe = pipeline("text-classification", model="mshenoda/roberta-spam")
x = pipe(text)[0]
if x["label"] == "LABEL_0":
spammy = "Content is not spammy-ish based on 125 million parameter AI Model called Roberta-Spam"
##{"type":"Not Spam", "probability":x["score"]}
else:
spammy = "Content is spammy-ish based on 125 million parameter AI Model called Roberta-Spam"
##{"type":"Spam", "probability":x["score"]}
vline1 = "==== Content Info ==== " + os.linesep
vline2 = "Character Count "+str(textstat.char_count(text, ignore_spaces=True)) + os.linesep
vline3 = "Lexicon Count "+str(textstat.lexicon_count(text, removepunct=True)) + os.linesep
vline4 = "Syllable Count "+str(textstat.syllable_count(text)) + os.linesep
vline5 = "Sentence Count "+str(textstat.sentence_count(text)) + os.linesep
vline6 = " " + os.linesep
vline7 = "==== Result ==== " + os.linesep
vline8 = "Flesch Reading Ease = "+str(textstat.flesch_reading_ease(text)) + os.linesep
#print("Flesch-Kincaid Grade Level is "+str(textstat.flesch_reading_ease(text)))
vline9 = "Smog Index = "+str(textstat.smog_index(text)) + os.linesep
vline10 = "Coleman Liau Index = "+str(textstat.coleman_liau_index(text)) + os.linesep
#print("Automated Readability Index (Grade level before a reader understand) is "+str(textstat.automated_readability_index(text)))
vline11 = "Dale-Chall Readability Score = "+str(textstat.dale_chall_readability_score(text)) + os.linesep
vline12 = "Gunning Fog Index = "+str(textstat.gunning_fog(text)) + os.linesep
#print("Grade Level Comprehension is "+str(textstat.automated_readability_index(text)))
#vline13 = "Difficult Words "+str(textstat.difficult_words(text)) + os.linesep
vline14 = "Reading Time = "+str(textstat.reading_time(text, ms_per_char=14.69))+" seconds"+ os.linesep
answer = vline1+vline2+vline3+vline4+vline5+vline6+vline7+vline8+vline9+vline10+vline11+vline12+vline14+spammy
return answer
Conversing = gr.ChatInterface(measure_readability, chatbot=gr.Chatbot(height=400,label = "Enter URL or String to evaluate"), retry_btn=None,theme=gr.themes.Monochrome(),
title = 'Ecommerce Content Readability Tool', description = DEFAULT_SYSTEM_PROMPT ,undo_btn = None, clear_btn = None, css='footer {visibility: hidden}').launch()
#"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability "
if __name__ == "__main__":
Conversing.launch() |