Spaces:
Runtime error
Runtime error
Nick Canu
commited on
Commit
•
ae88252
1
Parent(s):
25bc366
spacy fix
Browse files- Home.py +1 -0
- description_generator.py +2 -3
- requirements.txt +1 -3
- title_generator.py +3 -3
Home.py
CHANGED
@@ -160,6 +160,7 @@ def application():
|
|
160 |
###Models
|
161 |
@st.cache_resource
|
162 |
def setup_models():
|
|
|
163 |
return Title_Generator('./t5_model', slim_df), input_manager(vector_df, slim_df, search_tokens), model_control(apikey=st.secrets.key,model_id=st.secrets.model)
|
164 |
|
165 |
Tgen, iman, mctrl = setup_models()
|
|
|
160 |
###Models
|
161 |
@st.cache_resource
|
162 |
def setup_models():
|
163 |
+
spacy.cli.download("en_core_web_md")
|
164 |
return Title_Generator('./t5_model', slim_df), input_manager(vector_df, slim_df, search_tokens), model_control(apikey=st.secrets.key,model_id=st.secrets.model)
|
165 |
|
166 |
Tgen, iman, mctrl = setup_models()
|
description_generator.py
CHANGED
@@ -7,13 +7,12 @@ from operator import itemgetter
|
|
7 |
#user input manager class
|
8 |
class input_manager:
|
9 |
|
10 |
-
#initialize key dictionary from vector data frame
|
11 |
-
def __init__(self,key_df, slim_df, search_tokens
|
12 |
self.key_df = key_df
|
13 |
self.slim_df = slim_df
|
14 |
self.search_tokens = search_tokens
|
15 |
self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
|
16 |
-
self.top_n = top_n
|
17 |
self.nlp = spacy.load("en_core_web_md")
|
18 |
|
19 |
#translate input text to vector
|
|
|
7 |
#user input manager class
|
8 |
class input_manager:
|
9 |
|
10 |
+
#initialize key dictionary from vector data frame
|
11 |
+
def __init__(self,key_df, slim_df, search_tokens):
|
12 |
self.key_df = key_df
|
13 |
self.slim_df = slim_df
|
14 |
self.search_tokens = search_tokens
|
15 |
self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
|
|
|
16 |
self.nlp = spacy.load("en_core_web_md")
|
17 |
|
18 |
#translate input text to vector
|
requirements.txt
CHANGED
@@ -8,6 +8,4 @@ sentencepiece==0.1.97
|
|
8 |
spacy==3.5.1
|
9 |
streamlit==1.20.0
|
10 |
torch==2.0.0
|
11 |
-
transformers==4.27.3
|
12 |
-
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
|
13 |
-
en_core_web_md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.0/en_core_web_md-3.4.0-py3-none-any.whl
|
|
|
8 |
spacy==3.5.1
|
9 |
streamlit==1.20.0
|
10 |
torch==2.0.0
|
11 |
+
transformers==4.27.3
|
|
|
|
title_generator.py
CHANGED
@@ -5,10 +5,12 @@ from gensim.parsing import preprocess_string, strip_tags, strip_numeric, strip_m
|
|
5 |
import spacy
|
6 |
import torch
|
7 |
from transformers import T5ForConditionalGeneration,T5Tokenizer
|
|
|
|
|
8 |
|
9 |
#Custom text tokenizer from https://github.com/canunj/deconstructing_games by N Canu & K Chen
|
10 |
def doc_text_preprocessing(ser):
|
11 |
-
nlp=spacy.load("
|
12 |
|
13 |
"""text processing steps"""
|
14 |
import re
|
@@ -68,8 +70,6 @@ class Title_Generator:
|
|
68 |
return candidates, description
|
69 |
|
70 |
def candidate_score(self,candidates,ex_check=None):
|
71 |
-
import random
|
72 |
-
from operator import itemgetter
|
73 |
|
74 |
if ex_check != None:
|
75 |
pat = re.compile("((?:" + "|".join(map(re.escape, candidates[0]+[cand.upper() for cand in candidates[0]])) + "|" + "|".join(ex_check) +"))")
|
|
|
5 |
import spacy
|
6 |
import torch
|
7 |
from transformers import T5ForConditionalGeneration,T5Tokenizer
|
8 |
+
import random
|
9 |
+
import itemgetter
|
10 |
|
11 |
#Custom text tokenizer from https://github.com/canunj/deconstructing_games by N Canu & K Chen
|
12 |
def doc_text_preprocessing(ser):
|
13 |
+
nlp=spacy.load("en_core_web_md", exclude=['parser','ner','textcat'])
|
14 |
|
15 |
"""text processing steps"""
|
16 |
import re
|
|
|
70 |
return candidates, description
|
71 |
|
72 |
def candidate_score(self,candidates,ex_check=None):
|
|
|
|
|
73 |
|
74 |
if ex_check != None:
|
75 |
pat = re.compile("((?:" + "|".join(map(re.escape, candidates[0]+[cand.upper() for cand in candidates[0]])) + "|" + "|".join(ex_check) +"))")
|