Soumen's picture
Update app.py
18124fd
raw
history blame
No virus
5 kB
"""
## App: NLP App with Streamlit
Credits: Streamlit Team,Marc Skov Madsen(For Awesome-streamlit gallery)
Description
This is a Natural Language Processing(NLP) Based App useful for basic NLP concepts such as follows;
+ Tokenization & Lemmatization using Spacy
+ Named Entity Recognition(NER) using SpaCy
+ Sentiment Analysis using TextBlob
+ Document/Text Summarization using Gensim/T5
This is built with Streamlit Framework, an awesome framework for building ML and NLP tools.
Purpose
To perform basic and useful NLP task with Streamlit, Spacy, Textblob and Gensim
"""
# Core Pkgs
import os
os.system('sudo apt-get install tesseract-ocr-eng')
os.system('sudo apt-get install tesseract-ocr-ben')
#os.system('sudo apt update')
os.system('wget https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata')
os.system('gunzip ben.traineddata.gz ')
os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
os.system('pip install -q pytesseract')
import streamlit as st
import os
import torch
from transformers import AutoTokenizer, AutoModelWithLMHead
# NLP Pkgs
from textblob import TextBlob
import spacy
from gensim.summarization import summarize
import requests
import cv2
import numpy as np
import pytesseract
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
from PIL import Image
@st.cache
def text_analyzer(my_text):
nlp = spacy.load('en_core_web_sm')
docx = nlp(my_text)
# tokens = [ token.text for token in docx]
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
return allData
# Function For Extracting Entities
@st.cache
def entity_analyzer(my_text):
nlp = spacy.load('en_core_web_sm')
docx = nlp(my_text)
tokens = [ token.text for token in docx]
entities = [(entity.text,entity.label_)for entity in docx.ents]
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
return allData
def main():
""" NLP Based App with Streamlit """
# Title
st.title("Streamlit NLP APP")
st.markdown("""
#### Description
+ This is a Natural Language Processing(NLP) Based App useful for basic NLP task
NER,Sentiment, Spell Corrections and Summarization
""")
# Entity Extraction
if st.checkbox("Show Named Entities"):
st.subheader("Analyze Your Text")
message = st.text_area("Enter your Text","Typing Here ..")
if st.button("Extract"):
entity_result = entity_analyzer(message)
st.json(entity_result)
# Sentiment Analysis
elif st.checkbox("Show Sentiment Analysis"):
st.subheader("Analyse Your Text")
message = st.text_area("Enter Text plz","Type Here .")
if st.button("Analyze"):
blob = TextBlob(message)
result_sentiment = blob.sentiment
st.success(result_sentiment)
#Text Corrections
elif st.checkbox("Spell Corrections"):
st.subheader("Correct Your Text")
message = st.text_area("Enter the Text","Type please ..")
if st.button("Spell Corrections"):
st.text("Using TextBlob ..")
st.success(TextBlob(message).correct())
def change_photo_state():
st.session_state["photo"]="done"
st.subheader("Summary section, feed your image!")
camera_photo = st.camera_input("Take a photo", on_change=change_photo_state)
uploaded_photo = st.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state)
message = st.text_input("Or, drop your text here!")
if "photo" not in st.session_state:
st.session_state["photo"]="not done"
if st.session_state["photo"]=="done" or message:
if uploaded_photo:
img = Image.open(uploaded_photo)
img = img.save("img.png")
img = cv2.imread("img.png")
text = pytesseract.image_to_string(img, lang="ben")
st.success(text)
if camera_photo:
img = Image.open(camera_photo)
img = img.save("img.png")
img = cv2.imread("img.png")
text = pytesseract.image_to_string(img)
st.success(text)
if uploaded_photo==None and camera_photo==None:
#our_image=load_image("image.jpg")
#img = cv2.imread("scholarly_text.jpg")
text = message
# Summarization
if st.checkbox("Show Text Summarization Genism"):
st.subheader("Summarize Your Text")
#message = st.text_area("Enter the Text","Type please ..")
st.text("Using Gensim Summarizer ..")
#st.success(mess)
summary_result = summarize(text)
st.success(summary_result)
elif st.checkbox("Show Text Summarization T5"):
st.subheader("Summarize Your Text")
tokenizer = AutoTokenizer.from_pretrained('t5-base')
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
st.text("Using Google T5 Transformer ..")
inputs = tokenizer.encode("summarize: " + text,
return_tensors='pt',
max_length=512,
truncation=True)
summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
summary = tokenizer.decode(summary_ids[0])
st.success(summary)
st.sidebar.subheader("About App")
st.sidebar.subheader("By")
st.sidebar.text("Soumen Sarker")
if __name__ == '__main__':
main()