whyu commited on
Commit
d873adb
1 Parent(s): 9133cc3

initial commit

Browse files
Files changed (1) hide show
  1. app.py +3 -5
app.py CHANGED
@@ -16,10 +16,8 @@ openai.api_version = os.environ.get("AZURE_OPENAI_API_VERSION")
16
  deployment_id = os.environ.get("AZURE_OPENAI_DEP_ID")
17
  gpt_model = deployment_id
18
 
19
- print(os.environ.get("AZURE_OPENAI_KEY"))
20
- print(os.environ.get("AZURE_OPENAI_ENDPOINT"))
21
- print(os.environ.get("AZURE_OPENAI_API_VERSION"))
22
- print(gpt_model)
23
 
24
  prompt = """Compare the ground truth and prediction from AI models, to give a correctness score for the prediction. <AND> in the ground truth means it is totally right only when all elements in the ground truth are present in the prediction, and <OR> means it is totally right when any one element in the ground truth is present in the prediction. The correctness score is 0.0 (totally wrong), 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, or 1.0 (totally right). Just complete the last space of the correctness score.
25
 
@@ -300,7 +298,7 @@ markdown = """
300
 
301
  In this demo, we offer MM-Vet LLM-based (GPT-4) evaluator to grade open-ended outputs from your models.
302
 
303
- Plese upload your json file of your model results containing `\{v1_0\: ..., v1_1\: ..., \}`.
304
 
305
  The grading may last 5 minutes. Sine we only support 1 queue, the grading time may be longer when you need to wait for other users' grading to finish.
306
 
 
16
  deployment_id = os.environ.get("AZURE_OPENAI_DEP_ID")
17
  gpt_model = deployment_id
18
 
19
+
20
+
 
 
21
 
22
  prompt = """Compare the ground truth and prediction from AI models, to give a correctness score for the prediction. <AND> in the ground truth means it is totally right only when all elements in the ground truth are present in the prediction, and <OR> means it is totally right when any one element in the ground truth is present in the prediction. The correctness score is 0.0 (totally wrong), 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, or 1.0 (totally right). Just complete the last space of the correctness score.
23
 
 
298
 
299
  In this demo, we offer MM-Vet LLM-based (GPT-4) evaluator to grade open-ended outputs from your models.
300
 
301
+ Plese upload your json file of your model results containing `\{v1_0\: ..., v1_1\: ..., \}`like
302
 
303
  The grading may last 5 minutes. Sine we only support 1 queue, the grading time may be longer when you need to wait for other users' grading to finish.
304