Spaces:

saccharinedreams
/

sentiment-analysis-app

Runtime error

App Files Files Community

benliang99 commited on May 3, 2023

Commit

2d7ed25

•

1 Parent(s): 480b6a8

Updated app with patent abstract dropdown

Browse files

Files changed (2) hide show

app.py +28 -6
finetunehupd.ipynb +46 -4

app.py CHANGED Viewed

@@ -5,6 +5,24 @@ from transformers import pipeline, DistilBertForSequenceClassification, DistilBe
 MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
 FINETUNED_OPT = MODEL_OPTS[0]
 DEFAULT_OPT = MODEL_OPTS[1]
 # returns loaded model and tokenizer, if any
 def load_model(opt):
@@ -37,19 +55,23 @@ def sentiment_analysis(model, tokenizer):
         return pipeline('text-classification', model=model, tokenizer=tokenizer)
     else: return pipeline('text-classification')
-# Title the Streamlit app 'Sentiment Analysis'
-st.title('Sentiment Analysis')
 st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
 # Take in user input
-user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
 # The user can interact with a dropdown menu to choose a sentiment analysis model.
-dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
-model, tokenizer = load_model(dropdown_value)
 # Perform sentiment analysis on the user's input
-result = sentiment_analysis(model, tokenizer)(user_text)
 # Display the sentiment analysis results
 st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])

 MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
 FINETUNED_OPT = MODEL_OPTS[0]
 DEFAULT_OPT = MODEL_OPTS[1]
+abstracts = []
+# Helper function
+def map_decision_to_string(example):
+    return {'decision': decision_to_str[example['decision']]}
+def load_abstracts():
+    dataset_dict = load_dataset('HUPD/hupd',
+        name='sample',
+        data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
+        icpr_label=None,
+        train_filing_start_date='2016-01-01',
+        train_filing_end_date='2016-01-31',
+        val_filing_start_date='2016-01-01',
+        val_filing_end_date='2016-01-01',
+    )
+    abstracts = dataset_dict['train']['abstract']
+    dataset_dict = [] # free up space
 # returns loaded model and tokenizer, if any
 def load_model(opt):
         return pipeline('text-classification', model=model, tokenizer=tokenizer)
     else: return pipeline('text-classification')
+# Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)'
+st.title('Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)')
 st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
+dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0]))
+model, tokenizer = load_model('finetuned')
 # Take in user input
+#user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
 # The user can interact with a dropdown menu to choose a sentiment analysis model.
+#dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
+#model, tokenizer = load_model(dropdown_value)
 # Perform sentiment analysis on the user's input
+result = sentiment_analysis(model, tokenizer)(dropdown_abstracts)
 # Display the sentiment analysis results
 st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])

finetunehupd.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "1df3c609-62a6-49c3-bcc6-29c520f9501c",
    "metadata": {},
    "outputs": [],
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "58167c28-eb27-4f82-b7d0-8216dbeaf650",
    "metadata": {},
    "outputs": [
@@ -33,7 +33,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "345008775bf549b5a548948949710507",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1262,9 +1262,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "9b496593-c0de-4ce2-95d5-d5d3bf09d93c",
    "metadata": {},
    "outputs": [],
    "source": []
   }

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "1df3c609-62a6-49c3-bcc6-29c520f9501c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "58167c28-eb27-4f82-b7d0-8216dbeaf650",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a2f090474cb148548ce3eb73698fcc6c",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "9b496593-c0de-4ce2-95d5-d5d3bf09d93c",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The present invention relates to passive optical network (PON), and in particular, to an optical network terminal (ONT) in the PON system. In one embodiment, the optical network terminal includes a first interface coupled to a communications network, a second interface coupled to a network client and a processor including a memory coupled to the first interface and to the second interface, wherein the processor is capable of converting optical signals to electric signals, such that the network client can access the communications network.'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset_dict['train']['abstract'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6b6ad778-15aa-492a-9484-40106269e10d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Embodiments of the invention provide a method of reading and verifying a tag based on inherent disorder during a manufacturing process. The method includes using a first reader to take a first reading of an inherent disorder feature of the tag, and using a second reader to take a second reading of the inherent disorder feature of the tag. The method further includes matching the first reading with the second reading, and determining one or more acceptance criteria, wherein at least one of the acceptance criteria is based on whether the first reading and the second reading match within a predetermined threshold. If the acceptance criteria are met, then the tag is accepted, and a fingerprint for the tag is recorded. The invention further provides a method of testing and characterizing a reader of inherent disorder tags during a manufacturing process. The method includes taking a reading of a known inherent disorder tag, using the reading to measure a characteristic of the reader, and storing the measured characteristic for use when reading inherent disorder tags.'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset_dict['train']['abstract'][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf9d94a7-4d7e-4f6b-88f8-46c9855289f4",
+   "metadata": {},
    "outputs": [],
    "source": []
   }