""" ## App: NLP App with Streamlit Credits: Streamlit Team,Marc Skov Madsen(For Awesome-streamlit gallery) Description This is a Natural Language Processing(NLP) Based App useful for basic NLP concepts such as follows; + Tokenization & Lemmatization using Spacy + Named Entity Recognition(NER) using SpaCy + Sentiment Analysis using TextBlob + Document/Text Summarization using Gensim/T5 This is built with Streamlit Framework, an awesome framework for building ML and NLP tools. Purpose To perform basic and useful NLP task with Streamlit, Spacy, Textblob and Gensim """ # Core Pkgs import os os.system('sudo apt-get install tesseract-ocr-eng') os.system('sudo apt-get install tesseract-ocr-ben') #os.system('sudo apt update') os.system('wget https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata') os.system('gunzip ben.traineddata.gz ') os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/') os.system('pip install -q pytesseract') import streamlit as st import os import torch from transformers import AutoTokenizer, AutoModelWithLMHead # NLP Pkgs from textblob import TextBlob import spacy from gensim.summarization import summarize import requests import cv2 import numpy as np import pytesseract #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe" from PIL import Image @st.cache def text_analyzer(my_text): nlp = spacy.load('en_core_web_sm') docx = nlp(my_text) # tokens = [ token.text for token in docx] allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ] return allData # Function For Extracting Entities @st.cache def entity_analyzer(my_text): nlp = spacy.load('en_core_web_sm') docx = nlp(my_text) tokens = [ token.text for token in docx] entities = [(entity.text,entity.label_)for entity in docx.ents] allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)] return allData def main(): """ NLP Based App with Streamlit """ # Title st.title("Streamlit NLP APP") st.markdown(""" #### Description + This is a Natural Language Processing(NLP) Based App useful for basic NLP task NER,Sentiment, Spell Corrections and Summarization """) # Entity Extraction if st.checkbox("Show Named Entities"): st.subheader("Analyze Your Text") message = st.text_area("Enter your Text","Typing Here ..") if st.button("Extract"): entity_result = entity_analyzer(message) st.json(entity_result) # Sentiment Analysis elif st.checkbox("Show Sentiment Analysis"): st.subheader("Analyse Your Text") message = st.text_area("Enter Text plz","Type Here .") if st.button("Analyze"): blob = TextBlob(message) result_sentiment = blob.sentiment st.success(result_sentiment) #Text Corrections elif st.checkbox("Spell Corrections"): st.subheader("Correct Your Text") message = st.text_area("Enter the Text","Type please ..") if st.button("Spell Corrections"): st.text("Using TextBlob ..") st.success(TextBlob(message).correct()) def change_photo_state(): st.session_state["photo"]="done" st.subheader("Summary section, feed your image!") camera_photo = st.camera_input("Take a photo", on_change=change_photo_state) uploaded_photo = st.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state) message = st.text_input("Or, drop your text here!") if "photo" not in st.session_state: st.session_state["photo"]="not done" if st.session_state["photo"]=="done" or message: if uploaded_photo: img = Image.open(uploaded_photo) img = img.save("img.png") img = cv2.imread("img.png") text = pytesseract.image_to_string(img, lang="ben") st.success(text) if camera_photo: img = Image.open(camera_photo) img = img.save("img.png") img = cv2.imread("img.png") text = pytesseract.image_to_string(img) st.success(text) if uploaded_photo==None and camera_photo==None: #our_image=load_image("image.jpg") #img = cv2.imread("scholarly_text.jpg") text = message # Summarization if st.checkbox("Show Text Summarization Genism"): st.subheader("Summarize Your Text") #message = st.text_area("Enter the Text","Type please ..") st.text("Using Gensim Summarizer ..") #st.success(mess) summary_result = summarize(text) st.success(summary_result) elif st.checkbox("Show Text Summarization T5"): st.subheader("Summarize Your Text") tokenizer = AutoTokenizer.from_pretrained('t5-base') model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True) st.text("Using Google T5 Transformer ..") inputs = tokenizer.encode("summarize: " + text, return_tensors='pt', max_length=512, truncation=True) summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2) summary = tokenizer.decode(summary_ids[0]) st.success(summary) st.sidebar.subheader("About App") st.sidebar.subheader("By") st.sidebar.text("Soumen Sarker") if __name__ == '__main__': main()