ofermend commited on
Commit
893c5d9
·
verified ·
1 Parent(s): 3324b14

Upload 4 files

Browse files
Files changed (4) hide show
  1. Vectara-logo.png +0 -0
  2. app.py +56 -0
  3. query.py +74 -0
  4. requirements.txt +5 -0
Vectara-logo.png ADDED
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from omegaconf import OmegaConf
2
+ from query import VectaraQuery
3
+ import os
4
+ import requests
5
+
6
+ import streamlit as st
7
+ from PIL import Image
8
+
9
+ def launch_bot():
10
+
11
+ if 'cfg' not in st.session_state:
12
+ cfg = OmegaConf.create({
13
+ 'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
14
+ 'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
15
+ 'api_key': str(os.environ['VECTARA_API_KEY']),
16
+ 'omdb_api_key': str(os.environ['OMDB_API_KEY']),
17
+ 'streaming': False
18
+ })
19
+ st.session_state.cfg = cfg
20
+ st.session_state.vq = VectaraQuery(cfg.api_key, cfg.customer_id, [cfg.corpus_id],
21
+ "vectara-summary-ext-24-05-large")
22
+
23
+ cfg = st.session_state.cfg
24
+ vq = st.session_state.vq
25
+ st.set_page_config(page_title="Media Demo", layout="wide")
26
+
27
+ # left side content
28
+ with st.sidebar:
29
+ image = Image.open('Vectara-logo.png')
30
+ st.markdown("## Welcome to Media Demo\n\n"
31
+ "This demo uses Vectara to find the movie where a quote is from\n\n")
32
+
33
+ st.markdown("---")
34
+ st.markdown(
35
+ "## How this works?\n"
36
+ "This app was built with [Vectara](https://vectara.com).\n"
37
+ )
38
+ st.markdown("---")
39
+ st.image(image, width=250)
40
+
41
+ st.markdown("<center> <h2> Vectara Media Demo</h2> </center>", unsafe_allow_html=True)
42
+
43
+ question = st.text_input("Enter your question:")
44
+ if st.button("find the match"):
45
+ movie_name, match_url = vq.submit_query(question)
46
+ video_url, start_time = match_url.split('&t=')
47
+ start_time = start_time[:-1] # remove the trailing 's'
48
+
49
+ col1, col2, col3 = st.columns([1, 2, 1])
50
+ with col2:
51
+ st.write(f"Here's a useful video for you: {movie_name}")
52
+ st.video(video_url, start_time=int(float(start_time)))
53
+
54
+ if __name__ == "__main__":
55
+ launch_bot()
56
+
query.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import re
4
+
5
+ class VectaraQuery():
6
+ def __init__(self, api_key: str, customer_id: str, corpus_ids: list[str], prompt_name: str = None):
7
+ self.customer_id = customer_id
8
+ self.corpus_ids = corpus_ids
9
+ self.api_key = api_key
10
+ self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-sml"
11
+
12
+ def get_body(self, query_str: str):
13
+ corpora_key_list = [{
14
+ 'customer_id': self.customer_id, 'corpus_id': corpus_id, 'lexical_interpolation_config': {'lambda': 0.005}
15
+ } for corpus_id in self.corpus_ids
16
+ ]
17
+ return {
18
+ 'query': [
19
+ {
20
+ 'query': query_str,
21
+ 'start': 0,
22
+ 'numResults': 10,
23
+ 'corpusKey': corpora_key_list,
24
+ 'context_config': {
25
+ 'sentences_before': 2,
26
+ 'sentences_after': 2,
27
+ 'start_tag': "%START_SNIPPET%",
28
+ 'end_tag': "%END_SNIPPET%",
29
+ },
30
+ 'rerankingConfig':
31
+ {
32
+ 'rerankerId': 272725719,
33
+ },
34
+ }
35
+ ]
36
+ }
37
+
38
+ def get_headers(self):
39
+ return {
40
+ "Content-Type": "application/json",
41
+ "Accept": "application/json",
42
+ "customer-id": self.customer_id,
43
+ "x-api-key": self.api_key,
44
+ "grpc-timeout": "60S"
45
+ }
46
+
47
+ def submit_query(self, query_str: str):
48
+
49
+ endpoint = "https://api.vectara.io/v1/query"
50
+ body = self.get_body(query_str)
51
+
52
+ response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
53
+ if response.status_code != 200:
54
+ print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
55
+ return "Sorry, something went wrong in my brain. Please try again later."
56
+
57
+ res = response.json()
58
+ top_k = 3
59
+ responses = res['responseSet'][0]['response'][:top_k]
60
+ documents = res['responseSet'][0]['document']
61
+
62
+ metadatas = []
63
+ for x in responses:
64
+ md = {m["name"]: m["value"] for m in x["metadata"]}
65
+ doc_num = x["documentIndex"]
66
+ doc_md = {f'doc_{m["name"]}': m["value"] for m in documents[doc_num]["metadata"]}
67
+ md.update(doc_md)
68
+ metadatas.append(md)
69
+
70
+ movie_title = metadatas[0].get("doc_title", None)
71
+ snippet_url = metadatas[0].get("url", None)
72
+
73
+ return movie_title, snippet_url
74
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ requests_to_curl==1.1.0
2
+ toml==0.10.2
3
+ omegaconf==2.3.0
4
+ syrupy==4.0.8
5
+ streamlit==1.32.2