Spaces:
Running
Running
Upload 4 files
Browse files- Vectara-logo.png +0 -0
- app.py +56 -0
- query.py +74 -0
- requirements.txt +5 -0
Vectara-logo.png
ADDED
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from omegaconf import OmegaConf
|
2 |
+
from query import VectaraQuery
|
3 |
+
import os
|
4 |
+
import requests
|
5 |
+
|
6 |
+
import streamlit as st
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
+
def launch_bot():
|
10 |
+
|
11 |
+
if 'cfg' not in st.session_state:
|
12 |
+
cfg = OmegaConf.create({
|
13 |
+
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
|
14 |
+
'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
|
15 |
+
'api_key': str(os.environ['VECTARA_API_KEY']),
|
16 |
+
'omdb_api_key': str(os.environ['OMDB_API_KEY']),
|
17 |
+
'streaming': False
|
18 |
+
})
|
19 |
+
st.session_state.cfg = cfg
|
20 |
+
st.session_state.vq = VectaraQuery(cfg.api_key, cfg.customer_id, [cfg.corpus_id],
|
21 |
+
"vectara-summary-ext-24-05-large")
|
22 |
+
|
23 |
+
cfg = st.session_state.cfg
|
24 |
+
vq = st.session_state.vq
|
25 |
+
st.set_page_config(page_title="Media Demo", layout="wide")
|
26 |
+
|
27 |
+
# left side content
|
28 |
+
with st.sidebar:
|
29 |
+
image = Image.open('Vectara-logo.png')
|
30 |
+
st.markdown("## Welcome to Media Demo\n\n"
|
31 |
+
"This demo uses Vectara to find the movie where a quote is from\n\n")
|
32 |
+
|
33 |
+
st.markdown("---")
|
34 |
+
st.markdown(
|
35 |
+
"## How this works?\n"
|
36 |
+
"This app was built with [Vectara](https://vectara.com).\n"
|
37 |
+
)
|
38 |
+
st.markdown("---")
|
39 |
+
st.image(image, width=250)
|
40 |
+
|
41 |
+
st.markdown("<center> <h2> Vectara Media Demo</h2> </center>", unsafe_allow_html=True)
|
42 |
+
|
43 |
+
question = st.text_input("Enter your question:")
|
44 |
+
if st.button("find the match"):
|
45 |
+
movie_name, match_url = vq.submit_query(question)
|
46 |
+
video_url, start_time = match_url.split('&t=')
|
47 |
+
start_time = start_time[:-1] # remove the trailing 's'
|
48 |
+
|
49 |
+
col1, col2, col3 = st.columns([1, 2, 1])
|
50 |
+
with col2:
|
51 |
+
st.write(f"Here's a useful video for you: {movie_name}")
|
52 |
+
st.video(video_url, start_time=int(float(start_time)))
|
53 |
+
|
54 |
+
if __name__ == "__main__":
|
55 |
+
launch_bot()
|
56 |
+
|
query.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
|
5 |
+
class VectaraQuery():
|
6 |
+
def __init__(self, api_key: str, customer_id: str, corpus_ids: list[str], prompt_name: str = None):
|
7 |
+
self.customer_id = customer_id
|
8 |
+
self.corpus_ids = corpus_ids
|
9 |
+
self.api_key = api_key
|
10 |
+
self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-sml"
|
11 |
+
|
12 |
+
def get_body(self, query_str: str):
|
13 |
+
corpora_key_list = [{
|
14 |
+
'customer_id': self.customer_id, 'corpus_id': corpus_id, 'lexical_interpolation_config': {'lambda': 0.005}
|
15 |
+
} for corpus_id in self.corpus_ids
|
16 |
+
]
|
17 |
+
return {
|
18 |
+
'query': [
|
19 |
+
{
|
20 |
+
'query': query_str,
|
21 |
+
'start': 0,
|
22 |
+
'numResults': 10,
|
23 |
+
'corpusKey': corpora_key_list,
|
24 |
+
'context_config': {
|
25 |
+
'sentences_before': 2,
|
26 |
+
'sentences_after': 2,
|
27 |
+
'start_tag': "%START_SNIPPET%",
|
28 |
+
'end_tag': "%END_SNIPPET%",
|
29 |
+
},
|
30 |
+
'rerankingConfig':
|
31 |
+
{
|
32 |
+
'rerankerId': 272725719,
|
33 |
+
},
|
34 |
+
}
|
35 |
+
]
|
36 |
+
}
|
37 |
+
|
38 |
+
def get_headers(self):
|
39 |
+
return {
|
40 |
+
"Content-Type": "application/json",
|
41 |
+
"Accept": "application/json",
|
42 |
+
"customer-id": self.customer_id,
|
43 |
+
"x-api-key": self.api_key,
|
44 |
+
"grpc-timeout": "60S"
|
45 |
+
}
|
46 |
+
|
47 |
+
def submit_query(self, query_str: str):
|
48 |
+
|
49 |
+
endpoint = "https://api.vectara.io/v1/query"
|
50 |
+
body = self.get_body(query_str)
|
51 |
+
|
52 |
+
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
|
53 |
+
if response.status_code != 200:
|
54 |
+
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
|
55 |
+
return "Sorry, something went wrong in my brain. Please try again later."
|
56 |
+
|
57 |
+
res = response.json()
|
58 |
+
top_k = 3
|
59 |
+
responses = res['responseSet'][0]['response'][:top_k]
|
60 |
+
documents = res['responseSet'][0]['document']
|
61 |
+
|
62 |
+
metadatas = []
|
63 |
+
for x in responses:
|
64 |
+
md = {m["name"]: m["value"] for m in x["metadata"]}
|
65 |
+
doc_num = x["documentIndex"]
|
66 |
+
doc_md = {f'doc_{m["name"]}': m["value"] for m in documents[doc_num]["metadata"]}
|
67 |
+
md.update(doc_md)
|
68 |
+
metadatas.append(md)
|
69 |
+
|
70 |
+
movie_title = metadatas[0].get("doc_title", None)
|
71 |
+
snippet_url = metadatas[0].get("url", None)
|
72 |
+
|
73 |
+
return movie_title, snippet_url
|
74 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
requests_to_curl==1.1.0
|
2 |
+
toml==0.10.2
|
3 |
+
omegaconf==2.3.0
|
4 |
+
syrupy==4.0.8
|
5 |
+
streamlit==1.32.2
|