Spaces:

LampOfSocrates
/

hf_streamlit_plodcw_group27

Sleeping

App Files Files Community

Lamp Socrates commited on May 23, 2024

Commit

0f52168

1 Parent(s): aed0940

Initial commit

Browse files

Files changed (3) hide show

README.md +11 -4
app.py +230 -0
requirements.txt +31 -0

README.md CHANGED Viewed

@@ -1,13 +1,20 @@
 ---
-title: Hf Streamlit Plodcw Group27
 emoji: 👀
-colorFrom: indigo
-colorTo: yellow
 sdk: streamlit
-sdk_version: 1.35.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Hf Streamlit Cw Group27
 emoji: 👀
+colorFrom: blue
+colorTo: blue
 sdk: streamlit
+sdk_version: 1.34.0
 app_file: app.py
 pinned: false
 license: apache-2.0
+python_version: 3.11.5
+short_description: Sample space for group coursework for NLP
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Setting up Github Actions
+https://huggingface.co/docs/hub/en/spaces-github-actions

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import streamlit as st
+from transformers import pipeline
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+import pandas as pd
+from pprint import pprint
+@st.cache_resource()
+def load_trained_model():
+    tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
+    model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
+    # Mapping labels
+    id2label = model.config.id2label
+    # Print the label mapping
+    print(f"Can recognise the following labels {id2label}")
+    # Load the NER model and tokenizer from Hugging Face
+    #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
+    ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
+    return ner_pipeline
+@st.cache_data()
+def load_plod_cw_dataset():
+    from datasets import load_dataset
+    dataset = load_dataset("surrey-nlp/PLOD-CW")
+    return dataset
+def load_random_examples(dataset_name, num_examples=5):
+    """
+    Load random examples from the specified Hugging Face dataset.
+    Args:
+        dataset_name (str): The name of the dataset to load.
+        num_examples (int): The number of random examples to load.
+    Returns:
+        pd.DataFrame: A DataFrame containing the random examples.
+    """
+    # Load the dataset
+    dat = load_plod_cw_dataset()
+    # Convert the dataset to a pandas DataFrame
+    df = pd.DataFrame(dat['test'])
+    # Select random examples
+    random_examples = df.sample(n=1)
+    tokens = random_examples.tokens
+    ner_tags = random_examples.ner_tags
+    return pd.DataFrame((tokens, ner_tags))
+def render_entities(tokens, entities):
+    """
+    Renders a page with a 2-column table showing the entity corresponding to each token.
+    """
+    # Custom CSS for chilled and cool theme
+    st.markdown("""
+        <style>
+        body {
+            font-family: 'Arial', sans-serif;
+            background-color: #f0f0f5;
+            color: #333333;
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+        th, td {
+            padding: 12px;
+            text-align: left;
+            border-bottom: 1px solid #dddddd;
+        }
+        th {
+            background-color: #4CAF50;
+            color: white;
+            width: 16.66%;
+        }
+        tr:hover {
+            background-color: #f5f5f5;
+        }
+        td {
+            width: 16.66%;
+        }
+        </style>
+        """, unsafe_allow_html=True)
+    # Title and description
+    st.title("Model predicted Token vs Entities Table")
+    st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")
+    # Create the table
+    table_data = {"Token": tokens, "Entity": entities}
+    st.table(table_data)
+def render_random_examples():
+    """
+    Render random examples from the PLOD-CW dataset in a Streamlit table.
+    """
+    # Load random examples
+    # Custom CSS for chilled and cool theme
+    st.markdown("""
+        <style>
+        body {
+            font-family: 'Arial', sans-serif;
+            background-color: #f0f0f5;
+            color: #333333;
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+        th, td {
+            padding: 12px;
+            text-align: left;
+            border-bottom: 1px solid #dddddd;
+        }
+        th {
+            background-color: #4CAF50;
+            color: white;
+            width: 16.66%;
+        }
+        tr:hover {
+            background-color: #f5f5f5;
+        }
+        td {
+            width: 16.66%;
+        }
+        </style>
+        """, unsafe_allow_html=True)
+    # Title and description
+    st.title("Random Examples from PLOD-CW")
+    st.write("This table shows 1 random examples from the PLOD-CW dataset in a cool and chilled theme.")
+    # Add a button to select a different set of random samples
+    if st.button('Show another set of random examples'):
+        st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
+    # Load random examples if not already loaded
+    if 'random_examples' not in st.session_state:
+        st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
+    # Display the table
+    st.table(st.session_state['random_examples'])
+def predict_using_trained(sentence):
+    model = load_trained_model()
+    entities = model(sentence)
+    return entities
+def prep_page():
+    model = load_trained_model()
+    # Streamlit app
+    # Page configuration
+    #st.set_page_config(page_title="NER Token Entities", layout="centered")
+    st.title("Named Entity Recognition with BERT on PLOD-CW")
+    st.write("Enter a sentence to see the named entities recognized by the model.")
+    # Text input
+    text = st.text_area("Enter your sentence here:")
+    # Perform NER and display results
+    if text:
+        st.write("Entities recognized:")
+        entities = model(text)
+        pprint(entities)
+        # Create a dictionary to map entity labels to colors
+        label_colors = {
+            'B-LF': 'lightblue',
+            'B-O': 'lightgreen',
+            'B-AC': 'lightcoral',
+            'I-LF': 'lightyellow'
+        }
+        # Prepare the HTML output with styled entities
+        def get_entity_html(text, entities):
+            html = "<div>"
+            last_idx = 0
+            for entity in entities:
+                start = entity['start']
+                end = entity['end']
+                label = entity['entity']
+                entity_text = text[start:end]
+                color = label_colors.get(label, 'lightgray')
+                # Append the text before the entity
+                html += text[last_idx:start].replace(" ", "<br>")
+                # Append the entity with styling
+                html += f'<div style="background-color: {color}; padding: 5px; border-radius: 3px; margin: 5px 0;">{entity_text}</div>'
+                last_idx = end
+            # Append any remaining text after the last entity
+            html += text[last_idx:].replace(" ", "<br>")
+            html += "</div>"
+            return html
+        # Generate and display the styled HTML
+        styled_text = get_entity_html(text, entities)
+        st.markdown(styled_text, unsafe_allow_html=True)
+        render_entities(text, entities)
+    render_random_examples()
+if __name__ == '__main__':
+    query_params = st.query_params
+    if 'api' in query_params:
+        sentence = query_params.get('sentence')
+        entities = predict_using_trained(sentence)
+        response = {"sentence" : sentence , "entities" : entities}
+        pprint(response)
+        st.write(response)
+    else:
+        prep_page()

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+torch==2.2.0
+tensorflow==2.15.0
+datasets==2.18.0
+torchtext==0.17.0
+torchvision==0.17.0
+torchsummary==1.5.1
+accelerate==0.26.0
+gensim==4.3.2
+transformers==4.39.3
+pynvml==11.5.0
+seqeval==1.2.2
+triton==2.2.0
+jupyter==1.0.0
+jupyterlab-git==0.50.0
+urllib3<2
+scikit-learn
+scipy==1.10.1
+numpy
+fastai==2.7.14
+timm==0.9.12
+tensorboard
+albumentations==1.4.3
+seaborn
+tqdm==4.66.2
+nbdime
+matplotlib
+opencv-python
+Keras-Preprocessing==1.1.2
+flask==2.1.0
+Werkzeug==2.2.2
+wandb==0.17.0