dbleek commited on
Commit
806796c
1 Parent(s): 4d1c892

new classifier

Browse files
Files changed (3) hide show
  1. milestone-3.py +1 -2
  2. milestone_2.py +0 -26
  3. patent_classification_v2.pt +3 -0
milestone-3.py CHANGED
@@ -25,7 +25,7 @@ dataset = filtered_dataset.shuffle(seed=42).select(range(20))
25
  dataset = dataset.sort("patent_number")
26
 
27
  # Create pipeline using model trainned on Colab
28
- model = torch.load("patent_classifier.pt", map_location=torch.device("cpu"))
29
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
30
  classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
31
 
@@ -74,4 +74,3 @@ if submitted:
74
  pred, score
75
  )
76
  )
77
- check = st.markdown("Actual Label: **{}**.".format(label))
 
25
  dataset = dataset.sort("patent_number")
26
 
27
  # Create pipeline using model trainned on Colab
28
+ model = torch.load("patent_classifier_v2.pt", map_location=torch.device("cpu"))
29
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
30
  classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
31
 
 
74
  pred, score
75
  )
76
  )
 
milestone_2.py DELETED
@@ -1,26 +0,0 @@
1
- import streamlit as st
2
- from transformers import (AutoTokenizer, TFAutoModelForSequenceClassification,
3
- pipeline)
4
-
5
- st.title("CS-GY-6613 Project Milestone 2")
6
- model_choices = (
7
- "distilbert-base-uncased-finetuned-sst-2-english",
8
- "j-hartmann/emotion-english-distilroberta-base",
9
- "joeddav/distilbert-base-uncased-go-emotions-student",
10
- )
11
-
12
- with st.form("Input Form"):
13
- text = st.text_area("Write your text here:", "CS-GY-6613 is a great course!")
14
- model_name = st.selectbox("Select a model:", model_choices)
15
- submitted = st.form_submit_button("Submit")
16
-
17
- if submitted:
18
- model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
19
- tokenizer = AutoTokenizer.from_pretrained(model_name)
20
- classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
21
- res = classifier(text)
22
- label = res[0]["label"].upper()
23
- score = res[0]["score"]
24
- st.markdown(
25
- f"This text was classified as **{label}** with a confidence score of **{score}**."
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
patent_classification_v2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fbbdc470f673703431aa31cc7451af0d0608df3bd6e7006ab32866803f4eece
3
+ size 267882633