awinml commited on
Commit
fbd690d
1 Parent(s): 8cd1f1e

Upload 3 files

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +30 -2
  3. earnings_calls_sentencewise.csv +3 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -12,10 +12,13 @@ from transformers import (
12
  import streamlit as st
13
  import openai
14
 
 
 
 
 
15
 
16
  # Initialize models from HuggingFace
17
 
18
-
19
  @st.experimental_singleton
20
  def get_t5_model():
21
  return pipeline("summarization", model="t5-small", tokenizer="t5-small")
@@ -66,6 +69,26 @@ def format_query(query_results):
66
  context = [result["metadata"]["Text"] for result in query_results["matches"]]
67
  return context
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def gpt3_summary(text):
71
  response = openai.Completion.create(
@@ -136,7 +159,12 @@ elif encoder_model == "SGPT":
136
 
137
  query_results = query_pinecone(query_text, num_results, retriever_model, pinecone_index)
138
 
139
- context_list = format_query(query_results)
 
 
 
 
 
140
 
141
 
142
  st.subheader("Answer:")
 
12
  import streamlit as st
13
  import openai
14
 
15
+ @st.experimental_singleton
16
+ def get_data():
17
+ data = pd.read_csv("earnings_calls_sentencewise.csv")
18
+ return data
19
 
20
  # Initialize models from HuggingFace
21
 
 
22
  @st.experimental_singleton
23
  def get_t5_model():
24
  return pipeline("summarization", model="t5-small", tokenizer="t5-small")
 
69
  context = [result["metadata"]["Text"] for result in query_results["matches"]]
70
  return context
71
 
72
+ def sentence_id_combine(data, query_results, lag=2):
73
+ # Extract sentence IDs from query results
74
+ ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
75
+ # Generate new IDs by adding a lag value to the original IDs
76
+ new_ids = [id + i for id in ids for i in range(-lag, lag + 1)]
77
+ # Remove duplicates and sort the new IDs
78
+ new_ids = sorted(set(new_ids))
79
+ # Create a list of lookup IDs by grouping the new IDs in groups of lag*2+1
80
+ lookup_ids = [
81
+ new_ids[i : i + (lag * 2 + 1)] for i in range(0, len(new_ids), lag * 2 + 1)
82
+ ]
83
+ # Create a list of context sentences by joining the sentences corresponding to the lookup IDs
84
+ context_list = [
85
+ " ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
86
+ ]
87
+ return context_list
88
+
89
+ def text_lookup(data, sentence_ids):
90
+ context = " ".join(data.iloc[sentence_ids].to_list())
91
+ return context
92
 
93
  def gpt3_summary(text):
94
  response = openai.Completion.create(
 
159
 
160
  query_results = query_pinecone(query_text, num_results, retriever_model, pinecone_index)
161
 
162
+ window = int(st.number_input("Sentence Window Size", 1, 3, value=1))
163
+
164
+ data = get_data()
165
+
166
+ #context_list = format_query(query_results)
167
+ context_list = sentence_id_combine(data, query_results, lag=window)
168
 
169
 
170
  st.subheader("Answer:")
earnings_calls_sentencewise.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24373c9cb8d68b4681f7590b5d94916ef748bd259636d93728e99b8e50678a5
3
+ size 12926317