Spaces:

awinml
/

2-qa-earnings-sentencewise

Build error

App Files Files Community

awinml commited on Mar 13, 2023

Commit

ac5b87a

•

1 Parent(s): bf8b612

Upload 2 files

Browse files

Files changed (2) hide show

app.py +6 -2
utils.py +65 -14

app.py CHANGED Viewed

@@ -42,13 +42,16 @@ with col1:
     )
 with col1:
-    years_choice = ["2020", "2019", "2018", "2017", "2016"]
 with col1:
     year = st.selectbox("Year", years_choice)
 with col1:
-    quarter = st.selectbox("Quarter", ["Q1", "Q2", "Q3", "Q4"])
 ticker_choice = [
     "AAPL",
@@ -127,6 +130,7 @@ query_results = query_pinecone(
     year,
     quarter,
     ticker,
     threshold,
 )

     )
 with col1:
+    years_choice = ["2020", "2019", "2018", "2017", "2016", "All"]
 with col1:
     year = st.selectbox("Year", years_choice)
 with col1:
+    quarter = st.selectbox("Quarter", ["Q1", "Q2", "Q3", "Q4", "All"])
+with col1:
+    participant_type = st.selectbox("Speaker", ["Company Speaker", "Analyst"])
 ticker_choice = [
     "AAPL",
     year,
     quarter,
     ticker,
+    participant_type,
     threshold,
 )

utils.py CHANGED Viewed

@@ -61,21 +61,72 @@ def save_key(api_key):
     return api_key
-def query_pinecone(query, top_k, model, index, year, quarter, ticker, threshold=0.5):
     # generate embeddings for the query
     xq = model.encode([query]).tolist()
-    # search pinecone index for context passage with the answer
-    xc = index.query(
-        xq,
-        top_k=top_k,
-        filter={
-            "Year": int(year),
-            "Quarter": {"$eq": quarter},
-            "Ticker": {"$eq": ticker},
-            "QA_Flag": {"$eq": "Answer"},
-        },
-        include_metadata=True,
-    )
     # filter the context passages based on the score threshold
     filtered_matches = []
     for match in xc["matches"]:
@@ -91,7 +142,7 @@ def format_query(query_results):
     return context
-def sentence_id_combine(data, query_results, lag=2):
     # Extract sentence IDs from query results
     ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
     # Generate new IDs by adding a lag value to the original IDs

     return api_key
+def query_pinecone(
+    query, top_k, model, index, year, quarter, ticker, participant_type, threshold=0.25
+):
     # generate embeddings for the query
     xq = model.encode([query]).tolist()
+    if participant_type == "Company Speaker":
+        participant = "Speaker"
+    else:
+        participant = participant_type
+    if year == "All":
+        if quarter == "All":
+            xc = index.query(
+                xq,
+                top_k=top_k,
+                filter={
+                    "Year": {
+                        "$in": [
+                            int("2020"),
+                            int("2019"),
+                            int("2018"),
+                            int("2017"),
+                            int("2016"),
+                        ]
+                    },
+                    "Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
+                    "Ticker": {"$eq": ticker},
+                    "QA_Flag": {"$eq": participant},
+                },
+                include_metadata=True,
+            )
+        else:
+            xc = index.query(
+                xq,
+                top_k=top_k,
+                filter={
+                    "Year": {
+                        "$in": [
+                            int("2020"),
+                            int("2019"),
+                            int("2018"),
+                            int("2017"),
+                            int("2016"),
+                        ]
+                    },
+                    "Quarter": {"$eq": quarter},
+                    "Ticker": {"$eq": ticker},
+                    "QA_Flag": {"$eq": participant},
+                },
+                include_metadata=True,
+            )
+    else:
+        # search pinecone index for context passage with the answer
+        xc = index.query(
+            xq,
+            top_k=top_k,
+            filter={
+                "Year": int(year),
+                "Quarter": {"$eq": quarter},
+                "Ticker": {"$eq": ticker},
+                "QA_Flag": {"$eq": participant},
+            },
+            include_metadata=True,
+        )
     # filter the context passages based on the score threshold
     filtered_matches = []
     for match in xc["matches"]:
     return context
+def sentence_id_combine(data, query_results, lag=1):
     # Extract sentence IDs from query results
     ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
     # Generate new IDs by adding a lag value to the original IDs