|
from flask import Flask, request, jsonify |
|
from nltk import sent_tokenize, word_tokenize, FreqDist |
|
from nltk.corpus import stopwords |
|
from nltk.tokenize.treebank import TreebankWordDetokenizer |
|
import nltk |
|
|
|
app = Flask(__name__) |
|
|
|
nltk.download("punkt") |
|
nltk.download("stopwords") |
|
|
|
stop_words = set(stopwords.words("english")) |
|
|
|
def summarize_text(text): |
|
sentences = sent_tokenize(text) |
|
words = word_tokenize(text) |
|
|
|
|
|
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words] |
|
|
|
|
|
freq_dist = FreqDist(words) |
|
|
|
|
|
sorted_sentences = sorted(sentences, key=lambda sentence: sum(freq_dist[word] for word in word_tokenize(sentence))) |
|
|
|
|
|
summary = " ".join(sorted_sentences[-3:]) |
|
|
|
return summary |
|
|
|
def extract_keywords(text): |
|
words = word_tokenize(text) |
|
keywords = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words] |
|
return keywords |
|
|
|
@app.route('/text/summarize', methods=['POST']) |
|
def summarize_text_route(): |
|
data = request.get_json() |
|
text = data['text'] |
|
|
|
summary = summarize_text(text) |
|
|
|
return jsonify({'summary': summary}) |
|
|
|
@app.route('/text/extract', methods=['POST']) |
|
def extract_keywords_route(): |
|
data = request.get_json() |
|
text = data['text'] |
|
|
|
keywords = extract_keywords(text) |
|
|
|
return jsonify({'keywords': keywords}) |
|
|
|
if __name__ == '__main__': |
|
app.run(debug=True) |
|
|