Spaces:
Running
Running
tanveeshsingh
commited on
Commit
·
23648c7
1
Parent(s):
c3ccbd8
Change
Browse files
app.py
CHANGED
@@ -32,6 +32,7 @@ def update_inputs(input_style):
|
|
32 |
|
33 |
|
34 |
async def lynx(input_style_dropdown,document_input,question_input,answer_input):
|
|
|
35 |
if input_style_dropdown=='QA format':
|
36 |
client = AsyncOpenAI(
|
37 |
base_url="https://s6mipt5j797e6fql.us-east-1.aws.endpoints.huggingface.cloud/v1/",
|
@@ -59,13 +60,19 @@ Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
|
|
59 |
frequency_penalty=None,
|
60 |
presence_penalty=None
|
61 |
)
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
else:
|
65 |
-
|
66 |
-
|
|
|
67 |
# Function to judge reliability based on the selected input format
|
68 |
async def judge_reliability(input_style, document, conversation, claim, question, answer):
|
|
|
69 |
if input_style == "Dialog":
|
70 |
conversation = json.loads(conversation)
|
71 |
print(conversation)
|
@@ -74,8 +81,13 @@ async def judge_reliability(input_style, document, conversation, claim, question
|
|
74 |
outputs = await collinear.judge.veritas.natural_language_inference(document,claim)
|
75 |
elif input_style == "QA format":
|
76 |
outputs = await collinear.judge.veritas.question_answer(document,question,answer)
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
|
@@ -115,9 +127,11 @@ Raises Full Year FY25 Operating Cash Flow Growth Guidance to 23% to 25% Y/Y""")
|
|
115 |
answer_input = gr.Textbox(label="Answer", lines=5, visible=False, value="Salesforce revenue guidance for next year is about $37.8Bn ")
|
116 |
|
117 |
with gr.Row():
|
118 |
-
result_output = gr.Textbox(label="Veritas Model")
|
|
|
119 |
|
120 |
-
lynx_output = gr.Textbox(label="Lynx Model")
|
|
|
121 |
|
122 |
|
123 |
# Set the visibility of inputs based on the selected input style
|
@@ -131,11 +145,11 @@ Raises Full Year FY25 Operating Cash Flow Growth Guidance to 23% to 25% Y/Y""")
|
|
131 |
gr.Button("Submit").click(
|
132 |
fn=judge_reliability,
|
133 |
inputs=[input_style_dropdown, document_input, conversation_input, claim_input, question_input, answer_input],
|
134 |
-
outputs=result_output
|
135 |
).then(
|
136 |
fn=lynx,
|
137 |
inputs=[input_style_dropdown,document_input,question_input,answer_input],
|
138 |
-
outputs=lynx_output
|
139 |
)
|
140 |
|
141 |
# Launch the demo
|
|
|
32 |
|
33 |
|
34 |
async def lynx(input_style_dropdown,document_input,question_input,answer_input):
|
35 |
+
start_time = time.time()
|
36 |
if input_style_dropdown=='QA format':
|
37 |
client = AsyncOpenAI(
|
38 |
base_url="https://s6mipt5j797e6fql.us-east-1.aws.endpoints.huggingface.cloud/v1/",
|
|
|
60 |
frequency_penalty=None,
|
61 |
presence_penalty=None
|
62 |
)
|
63 |
+
message = chat_completion.choices.pop().message.content
|
64 |
+
message_new = message[len(message)-6:len(message)]
|
65 |
+
if 'FAIL' in message_new:
|
66 |
+
results = "🟥"
|
67 |
+
else:
|
68 |
+
results = "🟩"
|
69 |
else:
|
70 |
+
results = 'NA'
|
71 |
+
lynx_time = round(time.time() - start_time, 2) # Calculate time taken for Lynx
|
72 |
+
return results, lynx_time
|
73 |
# Function to judge reliability based on the selected input format
|
74 |
async def judge_reliability(input_style, document, conversation, claim, question, answer):
|
75 |
+
start_time = time.time()
|
76 |
if input_style == "Dialog":
|
77 |
conversation = json.loads(conversation)
|
78 |
print(conversation)
|
|
|
81 |
outputs = await collinear.judge.veritas.natural_language_inference(document,claim)
|
82 |
elif input_style == "QA format":
|
83 |
outputs = await collinear.judge.veritas.question_answer(document,question,answer)
|
84 |
+
output = outputs.judgement
|
85 |
+
if output ==1:
|
86 |
+
results = "🟩"
|
87 |
+
else:
|
88 |
+
results = "🟥"
|
89 |
+
veritas_time = round(time.time() - start_time, 2) # Calculate time taken for Veritas
|
90 |
+
return result, veritas_time
|
91 |
|
92 |
|
93 |
|
|
|
127 |
answer_input = gr.Textbox(label="Answer", lines=5, visible=False, value="Salesforce revenue guidance for next year is about $37.8Bn ")
|
128 |
|
129 |
with gr.Row():
|
130 |
+
result_output = gr.Textbox(label="Veritas Model Result")
|
131 |
+
veritas_time_output = gr.Textbox(label="Veritas Model Time (seconds)")
|
132 |
|
133 |
+
lynx_output = gr.Textbox(label="Lynx Model Result")
|
134 |
+
lynx_time_output = gr.Textbox(label="Lynx Model Time (seconds)")
|
135 |
|
136 |
|
137 |
# Set the visibility of inputs based on the selected input style
|
|
|
145 |
gr.Button("Submit").click(
|
146 |
fn=judge_reliability,
|
147 |
inputs=[input_style_dropdown, document_input, conversation_input, claim_input, question_input, answer_input],
|
148 |
+
outputs=[result_output,veritas_time_output]
|
149 |
).then(
|
150 |
fn=lynx,
|
151 |
inputs=[input_style_dropdown,document_input,question_input,answer_input],
|
152 |
+
outputs=[lynx_output, lynx_time_output]
|
153 |
)
|
154 |
|
155 |
# Launch the demo
|