import streamlit as st from time import sleep from stqdm import stqdm import pandas as pd from transformers import pipeline import json from transformers import AutoModelForSeq2SeqLM, AutoTokenizer def draw_all( key, plot=False, ): st.write( """ # NLP Web App This Natural Language Processing Based Web App can do anything u can imagine with Text. 😱 This App is built using pretrained transformers which are capable of doing wonders with the Textual data. ```python # Key Features of this App. 1. Advanced Text Summarizer 2. Key Word Extractor 3. Question Answering 4. Question Generation ``` """ ) with st.sidebar: draw_all("sidebar") #main function that holds all the options def main(): st.title("NLP IE Web App") menu = ["--Select--","Summarizer", "Keyword Extractor","Question Answering","Question Generation"] choice = st.sidebar.selectbox("What task would you like to do?", menu) if choice=="--Select--": st.write(""" Welcome to the the Web App of Data Dynamos. As an IE student of the Master of Business Analyitics and Big Data you have the opportunity to do anything with your lectures you like """) st.write(""" Never heard of NLP? No way! Natural Language Processing (NLP) is a computational technique to process human language in all of it's complexity """) st.write(""" NLP is an vital discipline in Artificial Intelligence and keeps growing """) st.image('banner_image.jpg') elif choice=="Summarizer": st.subheader("Text Summarization") st.write(" Enter the Text you want to summarize !") raw_text = st.text_area("Your Text","Enter Your Text Here") num_words = st.number_input("Enter Number of Words in Summary") if raw_text!="" and num_words is not None: num_words = int(num_words) summarizer = pipeline('summarization') summary = summarizer(raw_text, min_length=num_words,max_length=50) s1 = json.dumps(summary[0]) d2 = json.loads(s1) result_summary = d2['summary_text'] result_summary = '. '.join(list(map(lambda x: x.strip().capitalize(), result_summary.split('.')))) st.write(f"Here's your Summary : {result_summary}") elif choice=="Keyword Extractor": st.subheader("Keyword Extraction") #loading the pipeline model_name = "yanekyuk/bert-uncased-keyword-extractor" keyword_extractor = pipeline("text2text-generation", model=model_name, tokenizer=model_name) input_text = st.text_area("Enter some text:") if st.button("Extract Keywords"): # Extract keywords using the model keywords = keyword_extractor(input_text, max_length=20, do_sample=False)[0]["generated_text"] # Display the extracted keywords st.write("Keywords:", keywords) elif choice=="Question Answering": st.subheader("Question Answering") st.write(" Enter the Context and ask the Question to find out the Answer !") question_answering = pipeline("question-answering", model = "distilbert-base-cased-distilled-squad") context = st.text_area("Context","Enter the Context Here") #This is the text box for the question question = st.text_area("Your Question","Enter your Question Here") if context !="Enter Text Here" and question!="Enter your Question Here": #we are passing question and the context result = question_answering(question=question, context=context) #dump the result in json and load it again s1 = json.dumps(result) d2 = json.loads(s1) generated_text = d2['answer'] #joining and capalizing by dot generated_text = '. '.join(list(map(lambda x: x.strip().capitalize(), generated_text.split('.')))) st.write(f" Here's your Answer :\n {generated_text}") elif choice=="Question Generation": st.subheader("Question Generation") st.write(" Enter the text to get questions generated !") # Load the T5 model and tokenizer model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") text_input2 = st.text_area("Your Text","Enter the Text to complete") # Create a button to generate questions if st.button("Generate Questions"): #Encode the input text using the tokenizer input_ids = tokenizer.encode("generate questions: " + text_input2, return_tensors="pt") # Use the T5 model to generate questions question_ids = model.generate(input_ids) # Decode the questions from the output ids using the tokenizer questions = tokenizer.decode(question_ids[0], skip_special_tokens=True) # Display the questions to the user st.write(questions) #main function to run if __name__ == '__main__': main()