awinml commited on
Commit
a1d1460
1 Parent(s): 92808fd

Upload 7 files

Browse files
Files changed (2) hide show
  1. app.py +28 -21
  2. utils/models.py +3 -3
app.py CHANGED
@@ -28,7 +28,7 @@ st.title("Instructor XL Embeddings")
28
 
29
 
30
  st.write(
31
- "The app compares the performance of the Instructor-XL Embedding Model on the text from AMD's Q1 2020 Earnings Call Transcript.'"
32
  )
33
 
34
  data = get_data()
@@ -114,29 +114,36 @@ index_mapping = {
114
  "Represent the earnings call transcript answer for retrieval:": "week14-instructor-xl-amd-ecta-6",
115
  }
116
 
 
 
 
 
 
 
117
 
118
- with st.form("my_form"):
119
- text_embedding_instruction = st.selectbox(
120
- "Select instruction for Text Embedding",
121
- text_embedding_instructions_choice,
122
- )
123
-
124
- pinecone_index_name = index_mapping[text_embedding_instruction]
125
- pinecone.init(
126
- api_key=st.secrets[f"pinecone_{pinecone_index_name}"],
127
- environment="asia-southeast1-gcp-free",
128
- )
129
-
130
- pinecone_index = pinecone.Index(pinecone_index_name)
131
-
132
- submitted = st.form_submit_button("Submit")
133
- if submitted:
134
- matches = query_pinecone(
135
- dense_embedding, num_results, pinecone_index, indices
136
  )
137
- context = format_query(matches)
138
- output_text = format_context(context)
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  tab1 = st.tabs(["View transcript"])
142
 
 
28
 
29
 
30
  st.write(
31
+ "The app compares the performance of the Instructor-XL Embedding Model on the text from AMD's Q1 2020 Earnings Call Transcript."
32
  )
33
 
34
  data = get_data()
 
114
  "Represent the earnings call transcript answer for retrieval:": "week14-instructor-xl-amd-ecta-6",
115
  }
116
 
117
+ with col2:
118
+ with st.form("my_form"):
119
+ text_embedding_instruction = st.selectbox(
120
+ "Select instruction for Text Embedding",
121
+ text_embedding_instructions_choice,
122
+ )
123
 
124
+ pinecone_index_name = index_mapping[text_embedding_instruction]
125
+ pinecone.init(
126
+ api_key=st.secrets[f"pinecone_{pinecone_index_name}"],
127
+ environment="asia-southeast1-gcp-free",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  )
 
 
129
 
130
+ pinecone_index = pinecone.Index(pinecone_index_name)
131
+
132
+ submitted = st.form_submit_button("Submit")
133
+ if submitted:
134
+ matches = query_pinecone(
135
+ dense_embedding, num_results, pinecone_index, indices
136
+ )
137
+ context = format_query(matches)
138
+ output_text = format_context(context)
139
+
140
+ st.subheader("Retrieved Text:")
141
+ for output in output_text:
142
+ output = f"""{output}"""
143
+ st.write(
144
+ f"<ul><li><p>{output}</p></li></ul>",
145
+ unsafe_allow_html=True,
146
+ )
147
 
148
  tab1 = st.tabs(["View transcript"])
149
 
utils/models.py CHANGED
@@ -37,19 +37,19 @@ def preprocess_text(text):
37
  return preprocessed_text
38
 
39
 
40
- @st.experimental_singleton
41
  def get_data():
42
  data = pd.read_csv("AMD_Q1_2020_earnings_call_data_keywords.csv")
43
  return data
44
 
45
 
46
- @st.experimental_singleton
47
  def get_instructor_embedding_model():
48
  client = Client("https://awinml-api-instructor-xl-1.hf.space/")
49
  return client
50
 
51
 
52
- @st.experimental_singleton
53
  def get_bm25_model(data):
54
  corpus = data.Text.tolist()
55
  corpus_clean = [preprocess_text(x) for x in corpus]
 
37
  return preprocessed_text
38
 
39
 
40
+ @st.cache_resource
41
  def get_data():
42
  data = pd.read_csv("AMD_Q1_2020_earnings_call_data_keywords.csv")
43
  return data
44
 
45
 
46
+ @st.cache_resource
47
  def get_instructor_embedding_model():
48
  client = Client("https://awinml-api-instructor-xl-1.hf.space/")
49
  return client
50
 
51
 
52
+ @st.cache_resource
53
  def get_bm25_model(data):
54
  corpus = data.Text.tolist()
55
  corpus_clean = [preprocess_text(x) for x in corpus]