Spaces:

ashishraics
/

KeyPhraseExtraction

Build error

File size: 3,691 Bytes

50f868a

import pandas as pd
import streamlit as st
from PIL import Image

padding_top = 0
st.markdown(f"""
    <style>
        .reportview-container .main .block-container{{
            padding-top: {padding_top}rem;
        }}
    </style>""",
    unsafe_allow_html=True,
)

def set_page_title(title):
    st.sidebar.markdown(unsafe_allow_html=True, body=f"""
        <iframe height=0 srcdoc="<script>
            const title = window.parent.document.querySelector('title') \
            const oldObserver = window.parent.titleObserver
            if (oldObserver) {{
                oldObserver.disconnect()
            }} \
            const newObserver = new MutationObserver(function(mutations) {{
                const target = mutations[0].target
                if (target.text !== '{title}') {{
                    target.text = '{title}'
                }}
            }}) \
            newObserver.observe(title, {{ childList: true }})
            window.parent.titleObserver = newObserver \
            title.text = '{title}'
        </script>" />
    """)


set_page_title('NLP use cases')

 #Hide Menu Option
hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

Image.open('hf2.png').convert('RGB').save('hf2.png')
img = Image.open("hf2.png")


st.markdown("<h1 style='text-align: center; color: #3366ff;'>NLP Basic Use Cases</h1>", unsafe_allow_html=True)
st.markdown("---")
with st.sidebar:
    # title using markdown
    st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
    select_task=st.selectbox(label="Select task from drop down menu",
                 options=['README',
                          'Topic Modeling Using KeyPhrases',
                          'NER Extraction',
                          'POS Extraction'])

img = Image.open("hf2.png")

if select_task=='README':
    st.header("NLP Summary")
    st.write(f"The App gives you ability to 1) Detect Topics using Key-Phrase extraction technique")
    st.markdown("---")
    st.image(img)

from keybert import KeyBERT
from keyphrase_vectorizers import KeyphraseTfidfVectorizer,KeyphraseCountVectorizer

if select_task == 'Topic Modeling Using KeyPhrases':

    default_paratext = """ A molar tooth from Southeast Asia probably belonged to a member of a cryptic group of Stone Age hominids called Denisovans, researchers say.  If so, this relatively large tooth joins only a handful of fossils from Denisovans, who are known from ancient DNA pegging them as close Neandertal relatives.  Analyses of the tooth’s internal structure and protein makeup indicate that the molar came from a girl in the Homo genus. She died between the ages of 3½ and 8½, paleoanthropologist Fabrice Demeter of the University of Copenhagen and colleagues say."""

    input_texts = st.text_area(label="Input text to classify into topics",
                               height=250, max_chars=1000,
                               value=default_paratext)


    countVect=KeyphraseCountVectorizer()
    countVect.fit([input_texts])
    keyPhrases=countVect.get_feature_names_out()

    kb=KeyBERT()
    final=kb.extract_keywords([input_texts],vectorizer=KeyphraseCountVectorizer())
    st.subheader("Topics using Count Vectorizer")
    st.dataframe(pd.DataFrame(final,columns=['Topic','Score']))

    kb = KeyBERT()
    final2 = kb.extract_keywords([input_texts], vectorizer=KeyphraseTfidfVectorizer())
    st.subheader("Topics using Tf-IDF Vectorizer")
    st.dataframe(pd.DataFrame(final2, columns=['Topic', 'Score']))