Spaces:

azaninello
/

azioni_ICC

Runtime error

App Files Files Community

azioni_ICC / app.py

azaninello

Update app.py

7398647 about 2 years ago

raw

history blame contribute delete

No virus

2.96 kB

	import gradio as gr

	import nltk
	import simplemma
	from nltk.tokenize import word_tokenize
	from nltk.tokenize import sent_tokenize
	from nltk.probability import FreqDist
	from simplemma import text_lemmatizer
	nltk.download('punkt')

	file = "text.txt"

	import spacy
	nlp_IT = spacy.load("it_core_news_sm")

	def get_lists(file):
	with open(file, 'r', encoding='utf-8') as f:
	text = f.read()

	sent_tokenized_text = sent_tokenize(text, language='italian')
	sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]

	return sent_tokenized_text, sent_tokenized_text_lower

	sentences, sentences_lower = get_lists(file)

	def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_IT, sentences_lower = sentences_lower, sentences = sentences):

	verbs = []
	adjectives = []
	nouns = []
	result = 0

	for i,sent in enumerate(sentences_lower):
	if target.lower() in sent:
	result += 1
	doc = nlp(sent)
	for token in doc:
	if 'VERB' in token.pos_:
	verbs.append(token.lemma_)
	elif 'ADJ' in token.pos_:
	adjectives.append(token.lemma_)
	elif 'NOUN' in token.pos_:
	nouns.append(token.lemma_)


	if result == 0:
	return "Non ho trovato la parola '{target}'.\n\n"

	else:
	if colloc == 'azioni' and verbs != []:
	verbs_fdist = FreqDist(verbs)

	stringed_results = ''
	for n,r in enumerate(verbs_fdist.most_common()):
	stringed_results += str(n+1) + ': ' + str(r) + '\n\n'

	return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}\n\n"

	elif verbs == []:
	return f"Non ho trovato azioni legate a '{target}'"


	if colloc == 'caratteristiche' and adjectives != []:
	adj_fdist = FreqDist(adjectives)

	stringed_results = ''
	for n,r in enumerate(adj_fdist.most_common()):
	stringed_results += str(n+1) + ': ' + str(r) + '\n\n'

	return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}\n\n"

	elif adjectives == []:
	return f"Non ho trovato caratteristiche legate a '{target}'"


	if colloc == 'concetti' and nouns != []:
	nouns_fdist = FreqDist(nouns)

	stringed_results = ''
	for n,r in enumerate(nouns_fdist.most_common()):
	stringed_results += str(n+1) + ': ' + str(r) + '\n\n'

	return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}\n\n"

	elif nouns == []:
	return f"Non ho trovato concetti legati a '{target}'"


	demo = gr.Interface(
	search_engine_collocations,
	[
	gr.Textbox(),
	gr.Radio(["azioni", "caratteristiche", "concetti"]),
	],
	"text",
	examples=[
	["scarto", "azioni"],
	["rifiuto", "caratteristiche"],
	["sostenibilità", "concetti"],
	],
	)

	demo.launch()