eaglelandsonce commited on
Commit
e84a43c
1 Parent(s): 37d3b4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py CHANGED
@@ -3,6 +3,110 @@ import requests
3
  import json
4
  import os
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Assuming the environment variables are already set, we directly use them.
8
  # However, in a Streamlit app, you might want to set them up within the script for demonstration purposes
@@ -84,3 +188,5 @@ if st.button("Query Vectara"):
84
  st.write("No results found.")
85
 
86
  # Note: The integration of the model for HHEM scores is omitted as it requires the specific model details and implementation.
 
 
 
3
  import json
4
  import os
5
  import pandas as pd
6
+ from sentence_transformers import CrossEncoder
7
+ import numpy as np
8
+
9
+ # Initialize the HHEM model
10
+ model = CrossEncoder('vectara/hallucination_evaluation_model')
11
+
12
+ # Function to compute HHEM scores
13
+ def compute_hhem_scores(texts, summary):
14
+ pairs = [[text, summary] for text in texts]
15
+ scores = model.predict(pairs)
16
+ return scores
17
+
18
+ # Define the Vectara query function
19
+ def vectara_query(query: str, config: dict):
20
+ corpus_key = [{
21
+ "customerId": config["customer_id"],
22
+ "corpusId": config["corpus_id"],
23
+ "lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
24
+ }]
25
+ data = {
26
+ "query": [{
27
+ "query": query,
28
+ "start": 0,
29
+ "numResults": config.get("top_k", 10),
30
+ "contextConfig": {
31
+ "sentencesBefore": 2,
32
+ "sentencesAfter": 2,
33
+ },
34
+ "corpusKey": corpus_key,
35
+ "summary": [{
36
+ "responseLang": "eng",
37
+ "maxSummarizedResults": 5,
38
+ }]
39
+ }]
40
+ }
41
+
42
+ headers = {
43
+ "x-api-key": config["api_key"],
44
+ "customer-id": config["customer_id"],
45
+ "Content-Type": "application/json",
46
+ }
47
+ response = requests.post(
48
+ headers=headers,
49
+ url="https://api.vectara.io/v1/query",
50
+ data=json.dumps(data),
51
+ )
52
+ if response.status_code != 200:
53
+ st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
54
+ return [], ""
55
+
56
+ result = response.json()
57
+ responses = result["responseSet"][0]["response"]
58
+ summary = result["responseSet"][0]["summary"][0]["text"]
59
+
60
+ res = [[r['text'], r['score']] for r in responses]
61
+ return res, summary
62
+
63
+ # Streamlit UI setup
64
+ st.title("Vectara Content Query Interface")
65
+
66
+ # User inputs
67
+ query = st.text_input("Enter your query here", "")
68
+ lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
69
+ top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)
70
+
71
+ if st.button("Query Vectara"):
72
+ config = {
73
+ "api_key": os.environ.get("VECTARA_API_KEY", ""),
74
+ "customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
75
+ "corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),
76
+ "lambda_val": lambda_val,
77
+ "top_k": top_k,
78
+ }
79
+
80
+ results, summary = vectara_query(query, config)
81
+
82
+ if results:
83
+ st.subheader("Summary")
84
+ st.write(summary)
85
+
86
+ st.subheader("Top Results")
87
+
88
+ # Extract texts from results
89
+ texts = [r[0] for r in results[:5]]
90
+
91
+ # Compute HHEM scores
92
+ scores = compute_hhem_scores(texts, summary)
93
+
94
+ # Prepare and display the dataframe
95
+ df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
96
+ st.dataframe(df)
97
+ else:
98
+ st.write("No results found.")
99
+
100
+
101
+
102
+
103
+
104
+ """
105
+ import streamlit as st
106
+ import requests
107
+ import json
108
+ import os
109
+ import pandas as pd
110
 
111
  # Assuming the environment variables are already set, we directly use them.
112
  # However, in a Streamlit app, you might want to set them up within the script for demonstration purposes
 
188
  st.write("No results found.")
189
 
190
  # Note: The integration of the model for HHEM scores is omitted as it requires the specific model details and implementation.
191
+
192
+ """