File size: 4,997 Bytes
9c37e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e58c44
275c70c
 
6e1636a
18124fd
 
 
275c70c
9c37e72
 
 
 
 
 
 
 
 
 
 
 
 
419e04c
9c37e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccaea15
9c37e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272866
fd37fd8
9c37e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
## App: NLP App with Streamlit
Credits: Streamlit Team,Marc Skov Madsen(For Awesome-streamlit gallery)
Description
This is a Natural Language Processing(NLP) Based App useful for basic NLP concepts such as follows;

+ Tokenization & Lemmatization using Spacy

+ Named Entity Recognition(NER) using SpaCy

+ Sentiment Analysis using TextBlob

+ Document/Text Summarization using Gensim/T5

This is built with Streamlit Framework, an awesome framework for building ML and NLP tools.
Purpose
To perform basic and useful NLP task with Streamlit, Spacy, Textblob and Gensim
"""
# Core Pkgs
import os
os.system('sudo apt-get install tesseract-ocr-eng')
os.system('sudo apt-get install tesseract-ocr-ben')
#os.system('sudo apt update')
os.system('wget https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata')
os.system('gunzip ben.traineddata.gz ')
os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
os.system('pip install -q pytesseract')
import streamlit as st 
import os
import torch
from transformers import AutoTokenizer, AutoModelWithLMHead

# NLP Pkgs
from textblob import TextBlob 
import spacy
from gensim.summarization import summarize
import requests
import cv2
import numpy as np
import pytesseract
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
from PIL import Image
@st.cache
def text_analyzer(my_text):
	nlp = spacy.load('en_core_web_sm')
	docx = nlp(my_text)
	# tokens = [ token.text for token in docx]
	allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
	return allData

# Function For Extracting Entities
@st.cache
def entity_analyzer(my_text):
	nlp = spacy.load('en_core_web_sm')
	docx = nlp(my_text)
	tokens = [ token.text for token in docx]
	entities = [(entity.text,entity.label_)for entity in docx.ents]
	allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
	return allData


def main():
	""" NLP Based App with Streamlit """

	# Title
	st.title("Streamlit NLP APP")
	st.markdown("""
    	#### Description
    	+ This is a Natural Language Processing(NLP) Based App useful for basic NLP task
         NER,Sentiment, Spell Corrections and Summarization
    	""")


	# Entity Extraction
	if st.checkbox("Show Named Entities"):
		st.subheader("Analyze Your Text")

		message = st.text_area("Enter your Text","Typing Here ..")
		if st.button("Extract"):
			entity_result = entity_analyzer(message)
			st.json(entity_result)

	# Sentiment Analysis
	elif st.checkbox("Show Sentiment Analysis"):
		st.subheader("Analyse Your Text")
		message = st.text_area("Enter Text plz","Type Here .")
		if st.button("Analyze"):
			blob = TextBlob(message)
			result_sentiment = blob.sentiment
			st.success(result_sentiment)
	#Text Corrections
	elif st.checkbox("Spell Corrections"):
		st.subheader("Correct Your Text")
		message = st.text_area("Enter the Text","Type please ..")
		if st.button("Spell Corrections"):
			st.text("Using TextBlob ..")
			st.success(TextBlob(message).correct())
	def change_photo_state():
		st.session_state["photo"]="done"
	st.subheader("Summary section, feed your image!")
	camera_photo = st.camera_input("Take a photo", on_change=change_photo_state)
	uploaded_photo = st.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state)
	message = st.text_input("Or, drop your text here!")
	if "photo" not in st.session_state:
		st.session_state["photo"]="not done"

	if st.session_state["photo"]=="done" or message:
		if uploaded_photo:
			img = Image.open(uploaded_photo)
			img = img.save("img.png")
			img = cv2.imread("img.png")
			text = pytesseract.image_to_string(img, lang="ben")
			st.success(text)
		if camera_photo:
			img = Image.open(camera_photo)
			img = img.save("img.png")
			img = cv2.imread("img.png")
			text = pytesseract.image_to_string(img)
			st.success(text)
		if uploaded_photo==None and camera_photo==None:
			#our_image=load_image("image.jpg")
			#img = cv2.imread("scholarly_text.jpg")
			text = message
		# Summarization
		if st.checkbox("Show Text Summarization Genism"):
			st.subheader("Summarize Your Text")
			#message = st.text_area("Enter the Text","Type please ..")
			st.text("Using Gensim Summarizer ..")
			#st.success(mess)
			summary_result = summarize(text)
			st.success(summary_result)
		elif st.checkbox("Show Text Summarization T5"):
			st.subheader("Summarize Your Text")
			tokenizer = AutoTokenizer.from_pretrained('t5-base')
			model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
			st.text("Using Google T5 Transformer ..")
			inputs = tokenizer.encode("summarize: " + text,
						return_tensors='pt',
										max_length=512,
										truncation=True)
			summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
			summary = tokenizer.decode(summary_ids[0])
			st.success(summary)
	
	st.sidebar.subheader("About App")
	st.sidebar.subheader("By")
	st.sidebar.text("Soumen Sarker")

if __name__ == '__main__':
	main()