Spaces:

vectara
/

media-demo

Running

App Files Files Community

ofermend commited on May 23, 2024

Commit

893c5d9

verified ·

1 Parent(s): 3324b14

Upload 4 files

Browse files

Files changed (4) hide show

Vectara-logo.png +0 -0
app.py +56 -0
query.py +74 -0
requirements.txt +5 -0

Vectara-logo.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from omegaconf import OmegaConf
+from query import VectaraQuery
+import os
+import requests
+import streamlit as st
+from PIL import Image
+def launch_bot():
+    if 'cfg' not in st.session_state:
+        cfg = OmegaConf.create({
+            'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
+            'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
+            'api_key': str(os.environ['VECTARA_API_KEY']),
+            'omdb_api_key': str(os.environ['OMDB_API_KEY']),
+            'streaming': False
+        })
+        st.session_state.cfg = cfg
+        st.session_state.vq = VectaraQuery(cfg.api_key, cfg.customer_id, [cfg.corpus_id],
+                                           "vectara-summary-ext-24-05-large")
+    cfg = st.session_state.cfg
+    vq = st.session_state.vq
+    st.set_page_config(page_title="Media Demo", layout="wide")
+    # left side content
+    with st.sidebar:
+        image = Image.open('Vectara-logo.png')
+        st.markdown("## Welcome to Media Demo\n\n"
+                    "This demo uses Vectara to find the movie where a quote is from\n\n")
+        st.markdown("---")
+        st.markdown(
+            "## How this works?\n"
+            "This app was built with [Vectara](https://vectara.com).\n"
+        )
+        st.markdown("---")
+        st.image(image, width=250)
+    st.markdown("<center> <h2> Vectara Media Demo</h2> </center>", unsafe_allow_html=True)
+    question = st.text_input("Enter your question:")
+    if st.button("find the match"):
+        movie_name, match_url = vq.submit_query(question)
+        video_url, start_time = match_url.split('&t=')
+        start_time = start_time[:-1]   # remove the trailing 's'
+        col1, col2, col3 = st.columns([1, 2, 1])
+        with col2:
+            st.write(f"Here's a useful video for you: {movie_name}")
+            st.video(video_url, start_time=int(float(start_time)))
+if __name__ == "__main__":
+    launch_bot()

query.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import requests
+import json
+import re
+class VectaraQuery():
+    def __init__(self, api_key: str, customer_id: str, corpus_ids: list[str], prompt_name: str = None):
+        self.customer_id = customer_id
+        self.corpus_ids = corpus_ids
+        self.api_key = api_key
+        self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-sml"
+    def get_body(self, query_str: str):
+        corpora_key_list = [{
+                'customer_id': self.customer_id, 'corpus_id': corpus_id, 'lexical_interpolation_config': {'lambda': 0.005}
+            } for corpus_id in self.corpus_ids
+        ]
+        return {
+            'query': [
+                {
+                    'query': query_str,
+                    'start': 0,
+                    'numResults': 10,
+                    'corpusKey': corpora_key_list,
+                    'context_config': {
+                        'sentences_before': 2,
+                        'sentences_after': 2,
+                        'start_tag': "%START_SNIPPET%",
+                        'end_tag': "%END_SNIPPET%",
+                    },
+                    'rerankingConfig':
+                    {
+                        'rerankerId': 272725719,
+                    },
+                }
+            ]
+        }
+    def get_headers(self):
+        return {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "customer-id": self.customer_id,
+            "x-api-key": self.api_key,
+            "grpc-timeout": "60S"
+        }
+    def submit_query(self, query_str: str):
+        endpoint = "https://api.vectara.io/v1/query"
+        body = self.get_body(query_str)
+        response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
+        if response.status_code != 200:
+            print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
+            return "Sorry, something went wrong in my brain. Please try again later."
+        res = response.json()
+        top_k = 3
+        responses = res['responseSet'][0]['response'][:top_k]
+        documents = res['responseSet'][0]['document']
+        metadatas = []
+        for x in responses:
+            md = {m["name"]: m["value"] for m in x["metadata"]}
+            doc_num = x["documentIndex"]
+            doc_md = {f'doc_{m["name"]}': m["value"] for m in documents[doc_num]["metadata"]}
+            md.update(doc_md)
+            metadatas.append(md)
+        movie_title = metadatas[0].get("doc_title", None)
+        snippet_url = metadatas[0].get("url", None)
+        return movie_title, snippet_url

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+requests_to_curl==1.1.0
+toml==0.10.2
+omegaconf==2.3.0
+syrupy==4.0.8
+streamlit==1.32.2