IinjyI commited on
Commit
a2e8dd9
·
verified ·
1 Parent(s): af38f47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -1
app.py CHANGED
@@ -3,5 +3,87 @@ import gradio as gr
3
  def greet(name):
4
  return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  demo.launch()
 
 
3
  def greet(name):
4
  return "Hello " + name + "!!"
5
 
6
+
7
+ # Load cleaned_word_embeddings
8
+ with open("cleaned_word_embeddings.pkl", "rb") as f:
9
+ cleaned_word_embeddings = pickle.load(f)
10
+
11
+ def get_clean_sentences(text):
12
+ sentences = sent_tokenize(text)
13
+ # Remove punctuations, numbers and special characters
14
+ cleaned_sentences = []
15
+ for sentence in sentences:
16
+ cleaned_sentence = re.sub(r"\\.|[^\\'\w ]", " ", sentence)
17
+ cleaned_sentences.append(cleaned_sentence)
18
+ return cleaned_sentences
19
+
20
+
21
+ def filter_sentences(text):
22
+ cleaned_sentences = get_clean_sentences(text)
23
+ # Remove stopwords
24
+ stop_words = set(stopwords.words("english"))
25
+ filtered_sentences = []
26
+ for sentence in cleaned_sentences:
27
+ words = nltk.word_tokenize(sentence)
28
+ filtered_sentence = " ".join(
29
+ [word for word in words if word.lower() not in stop_words]
30
+ )
31
+ filtered_sentences.append(filtered_sentence)
32
+ return filtered_sentences
33
+
34
+
35
+ def get_vector_representation(text):
36
+ filtered_sentences = filter_sentences(text)
37
+ # Get vector representations for each sentence in the articles
38
+ sentence_vectors = []
39
+ for sentence in filtered_sentences:
40
+ words = sentence.split()
41
+ sentence_vector = np.zeros((25,))
42
+ if len(words) != 0:
43
+ for word in words:
44
+ if word in cleaned_word_embeddings:
45
+ sentence_vector += cleaned_word_embeddings[word]
46
+ sentence_vector /= len(words)
47
+ sentence_vectors.append(sentence_vector)
48
+ return sentence_vectors
49
+
50
+
51
+ def calculate_cosine_similarity(sentence_vectors):
52
+ flat_sentence_vectors = np.array(
53
+ [vec for sublist in sentence_vectors for vec in sublist]
54
+ ).reshape(1, -1)
55
+ # Calculate cosine similarity
56
+ similarity_matrix = cosine_similarity(sentence_vectors)
57
+ return similarity_matrix
58
+
59
+
60
+ def get_scores(similarity_matrix):
61
+ # Create a graph from the similarity matrix
62
+ nx_graph = nx.from_numpy_array(similarity_matrix)
63
+ # Get scores
64
+ scores = nx.pagerank(nx_graph)
65
+ return scores
66
+
67
+
68
+ def rank_sentences(text):
69
+ sentence_vectors = get_vector_representation(text)
70
+ similarity_matrix = calculate_cosine_similarity(sentence_vectors)
71
+ scores = get_scores(similarity_matrix)
72
+ ranked_sentences = sorted(
73
+ ((scores[j], sentence) for j, sentence in enumerate(sent_tokenize(text))),
74
+ reverse=True,
75
+ )
76
+ return ranked_sentences
77
+
78
+
79
+ def summarize(text):
80
+ ranked_sentences = rank_sentences(text)
81
+ summary = ""
82
+ for j in range(len(ranked_sentences)//10):
83
+ summary += ranked_sentences[j][1] + " "
84
+ return summary
85
+
86
+
87
+ demo = gr.Interface(fn=summarize, inputs="text", outputs="text")
88
  demo.launch()
89
+