nickmuchi commited on
Commit
f4b6788
·
1 Parent(s): 23253c8

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +4 -5
functions.py CHANGED
@@ -81,7 +81,7 @@ def inference(link, upload, _asr_model):
81
  def sentiment_pipe(earnings_text):
82
  '''Determine the sentiment of the text'''
83
 
84
- earnings_sentences = chunk_long_text(earnings_text,200,1)
85
  earnings_sentiment = sent_pipe(earnings_sentences)
86
 
87
  return earnings_sentiment, earnings_sentences
@@ -99,12 +99,11 @@ def clean_text(text):
99
  return text
100
 
101
  @st.experimental_memo(suppress_st_warning=True)
102
- def chunk_long_text(text,threshold,window_size=3):
103
  '''Preprocess text and chunk for semantic search and sentiment analysis'''
104
 
105
  #Convert cleaned text into sentences
106
  sentences = sent_tokenize(text)
107
-
108
  out = []
109
 
110
  #Limit the length of each sentence to a threshold
@@ -121,12 +120,12 @@ def chunk_long_text(text,threshold,window_size=3):
121
 
122
  #Combine sentences into a window of size window_size
123
  for paragraph in [out]:
124
- for start_idx in range(0, len(paragraph), window_size):
125
  end_idx = min(start_idx+window_size, len(paragraph))
126
  passages.append(" ".join(paragraph[start_idx:end_idx]))
127
 
128
  return passages
129
-
130
  @st.experimental_memo(suppress_st_warning=True)
131
  def chunk_and_preprocess_text(text,thresh=500):
132
 
 
81
  def sentiment_pipe(earnings_text):
82
  '''Determine the sentiment of the text'''
83
 
84
+ earnings_sentences = chunk_long_text(earnings_text,150,1,1)
85
  earnings_sentiment = sent_pipe(earnings_sentences)
86
 
87
  return earnings_sentiment, earnings_sentences
 
99
  return text
100
 
101
  @st.experimental_memo(suppress_st_warning=True)
102
+ def chunk_long_text(text,threshold,window_size=3,stride=2):
103
  '''Preprocess text and chunk for semantic search and sentiment analysis'''
104
 
105
  #Convert cleaned text into sentences
106
  sentences = sent_tokenize(text)
 
107
  out = []
108
 
109
  #Limit the length of each sentence to a threshold
 
120
 
121
  #Combine sentences into a window of size window_size
122
  for paragraph in [out]:
123
+ for start_idx in range(0, len(paragraph), stride):
124
  end_idx = min(start_idx+window_size, len(paragraph))
125
  passages.append(" ".join(paragraph[start_idx:end_idx]))
126
 
127
  return passages
128
+
129
  @st.experimental_memo(suppress_st_warning=True)
130
  def chunk_and_preprocess_text(text,thresh=500):
131