File size: 5,110 Bytes
2ed84e2
2e88643
07d2d4a
 
 
 
99dd8f0
49461e6
174330a
07d2d4a
2e88643
9f9cc13
 
 
 
 
 
 
 
 
 
 
 
 
8537469
9f9cc13
8537469
9f9cc13
8537469
9f9cc13
 
 
 
1dab96d
 
 
 
9f9cc13
1dab96d
 
41ba0d5
 
 
 
481cd29
 
41ba0d5
52e141d
b3dd91d
 
74701ca
 
52e141d
 
03e3e40
52e141d
 
74701ca
52e141d
56dde06
07d2d4a
 
 
 
 
 
 
130f1be
07d2d4a
130f1be
 
07d2d4a
130f1be
 
07d2d4a
6ad229a
130f1be
e350981
1b229e8
52e141d
571c5ba
2ed84e2
1dab96d
05a1140
6ad229a
 
1dab96d
c7caadb
2ed84e2
 
a367380
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import os
import textstat
import urllib.request
import re
from bs4 import BeautifulSoup
from transformers import pipeline


CLEANR = re.compile('<.*?>')

DEFAULT_SYSTEM_PROMPT = """
The Flesch Reading Ease score uses the number of syllables and sentence lengths to determine the reading ease of the content.
A Flesch score of 60 is taken to be plain English. A score in the range of 60-70 corresponds to 8th/9th grade English level. 
A score between 50 and 60 corresponds to a 10th/12th grade level. Below 30 is college graduate level.

The SMOG (Simple Measure of Gobbledygook) grade is commonly used in health care. 
The score represents the number of years of education needed to understand a passage of writing.

The Coleman Liau Index is designed to evaluate the U.S. grade level necessary to understand text.
Your score indicates the U.S. school level a person needs to be to understand the text.

Dale-Chall is one of the most accurate readability metrics. Rather than rely on syllable counts to identify diffult words, 
Dale-Chall incorporates a list of 3,000 easy words which were understood by 80% of fourth-grade students. 
The readability score is then computed based on how many words present in the passage are not in the list of easy words.
A score of 4.9 or lower indicates the passage is easily readable by the average 4th grade. 
Scores more than 9.0 indicate the passage is at a college level of readability.

The Gunning Fog scale is similar to the Flesch scale where it uses syllable counts and sentence length. 
The scale uses the percentage of 'Foggy' words, those that contain 3 or more syllables. 
A fog score of 5 is readable, 10 is hard, 15 is difficult, and 20 is very difficult.

Detailed references:
1. https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
2. https://en.wikipedia.org/wiki/SMOG
3. https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index
4. https://en.wikipedia.org/wiki/Dale%E2%80%93Chall_readability_formula
5. https://en.wikipedia.org/wiki/Gunning_fog_index
"""

def measure_readability(message,history):
    if "https://" in message:
        response = urllib.request.urlopen(message)
        html = response.read().decode('utf8')
        cleantext = BeautifulSoup(html).text
        #cleantext = BeautifulSoup(html,'lxml').text
        text = re.sub(CLEANR,'', cleantext)
        spammy=""
    else:
        text = message
        #pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
        pipe = pipeline("text-classification", model="mshenoda/roberta-spam")        
        x = pipe(text)[0]
        if x["label"] == "LABEL_0":
            spammy = "Content is not spammy-ish based on 125 million parameter AI Model called Roberta-Spam"
            ##{"type":"Not Spam", "probability":x["score"]}
        else:
            spammy = "Content is spammy-ish based on 125 million parameter AI Model called Roberta-Spam"
            ##{"type":"Spam", "probability":x["score"]}
    
    vline1 = "==== Content Info ==== " + os.linesep
    vline2 = "Character Count "+str(textstat.char_count(text, ignore_spaces=True)) + os.linesep
    vline3 = "Lexicon Count "+str(textstat.lexicon_count(text, removepunct=True)) + os.linesep
    vline4 = "Syllable Count "+str(textstat.syllable_count(text)) + os.linesep
    vline5 = "Sentence Count "+str(textstat.sentence_count(text)) + os.linesep
    vline6 = " " + os.linesep
    vline7 = "==== Result ==== " + os.linesep
    vline8 = "Flesch Reading Ease = "+str(textstat.flesch_reading_ease(text)) + os.linesep
    #print("Flesch-Kincaid Grade Level is "+str(textstat.flesch_reading_ease(text)))
    vline9 = "Smog Index = "+str(textstat.smog_index(text)) + os.linesep
    vline10 = "Coleman Liau Index = "+str(textstat.coleman_liau_index(text)) + os.linesep
    #print("Automated Readability Index (Grade level before a reader understand) is "+str(textstat.automated_readability_index(text)))
    vline11 = "Dale-Chall Readability Score = "+str(textstat.dale_chall_readability_score(text)) + os.linesep
    vline12 = "Gunning Fog Index = "+str(textstat.gunning_fog(text)) + os.linesep
    #print("Grade Level Comprehension is "+str(textstat.automated_readability_index(text)))
    #vline13 = "Difficult Words "+str(textstat.difficult_words(text)) + os.linesep
    vline14 = "Reading Time = "+str(textstat.reading_time(text, ms_per_char=14.69))+" seconds"+ os.linesep

    answer = vline1+vline2+vline3+vline4+vline5+vline6+vline7+vline8+vline9+vline10+vline11+vline12+vline14+spammy
        
    return answer


    
Conversing = gr.ChatInterface(measure_readability, chatbot=gr.Chatbot(height=400,label = "Enter URL or String to evaluate"), retry_btn=None,theme=gr.themes.Monochrome(),
                              title = 'Ecommerce Content Readability Tool', description = DEFAULT_SYSTEM_PROMPT ,undo_btn = None, clear_btn = None, css='footer {visibility: hidden}').launch()
#"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability "


if __name__ == "__main__":
    Conversing.launch()