Spaces:
Running
Running
import requests | |
import httpx | |
import torch | |
import re | |
from bs4 import BeautifulSoup | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import asyncio | |
from scipy.special import softmax | |
from evaluate import load | |
from datetime import date | |
import nltk | |
import fitz | |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
import nltk, spacy, subprocess, torch | |
import plotly.graph_objects as go | |
import torch.nn.functional as F | |
import nltk | |
from unidecode import unidecode | |
import time | |
import yaml | |
import nltk | |
import os | |
from explainability import * | |
from dotenv import load_dotenv | |
import subprocess | |
nltk.download("punkt") | |
nltk.download("stopwords") | |
load_dotenv() | |
with open("config.yaml", "r") as file: | |
params = yaml.safe_load(file) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
readability_model_id = params["READABILITY_MODEL_ID"] | |
gpt2_model = GPT2LMHeadModel.from_pretrained(readability_model_id).to(device) | |
gpt2_tokenizer = GPT2TokenizerFast.from_pretrained(readability_model_id) | |
command = ["python", "-m", "spacy", "download", "en_core_web_sm"] | |
subprocess.run(command) | |
nlp = spacy.load("en_core_web_sm") | |
def depth_analysis(input_text): | |
processed_words = preprocess_text1(input_text) | |
ttr_value = vocabulary_richness_ttr(processed_words) | |
gunning_fog = calculate_gunning_fog(input_text) | |
gunning_fog_norm = normalize(gunning_fog, min_value=0, max_value=20) | |
words, sentences = preprocess_text2(input_text) | |
average_sentence_length = calculate_average_sentence_length(sentences) | |
average_word_length = calculate_average_word_length(words) | |
average_sentence_length_norm = normalize( | |
average_sentence_length, min_value=0, max_value=40 | |
) | |
average_word_length_norm = normalize( | |
average_word_length, min_value=0, max_value=8 | |
) | |
average_tree_depth = calculate_syntactic_tree_depth(nlp, input_text) | |
average_tree_depth_norm = normalize( | |
average_tree_depth, min_value=0, max_value=10 | |
) | |
perplexity = calculate_perplexity( | |
input_text, gpt2_model, gpt2_tokenizer, device | |
) | |
perplexity_norm = normalize(perplexity, min_value=0, max_value=30) | |
features = { | |
"readability": gunning_fog_norm, | |
"syntactic tree depth": average_tree_depth_norm, | |
"vocabulary richness": ttr_value, | |
"perplexity": perplexity_norm, | |
"average sentence length": average_sentence_length_norm, | |
"average word length": average_word_length_norm, | |
} | |
fig = go.Figure() | |
fig.add_trace( | |
go.Scatterpolar( | |
r=list(features.values()), | |
theta=list(features.keys()), | |
fill="toself", | |
name="Radar Plot", | |
) | |
) | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 100], | |
) | |
), | |
showlegend=False, | |
margin=dict( | |
l=10, | |
r=20, | |
b=10, | |
t=10, | |
), | |
) | |
return fig | |