from hazm import * import gradio as gr from sklearn.decomposition import LatentDirichletAllocation from sklearn.feature_extraction.text import CountVectorizer lda = LatentDirichletAllocation(n_components=4,random_state=101) normalizer=Normalizer() lemmatizer=Lemmatizer() stemmer=Stemmer() vectorzer=CountVectorizer(analyzer='word', ngram_range=(1,1)) def compute_seo_score(normalized_text,keywords): tokens=sent_tokenize(normalized_text) x=vectorzer.fit_transform([normalized_text]) features=lda.fit(x) key_words=[vectorzer.get_feature_names_out()[index] for index in features.components_.argsort()[-10:]] query_terms=keywords.split('-') score=0 for i in range(len(key_words)): for query in query_terms: keyterms=key_words[i] if query in [lemmatizer.lemmatize(word) for word in key_words[i]]: score+=1 final_score=score/4 return {'Estimated_number':score/100, 'score':final_score/100} def Normalize_text(text,keywords): normalized_text=normalizer.normalize(text) label=compute_seo_score(normalized_text,keywords) return normalized_text,label demo = gr.Interface( fn=Normalize_text, inputs=["text","text"], outputs=["text","label"], ) demo.launch()