Timothy-Vinzent commited on
Commit
821e9b3
·
verified ·
1 Parent(s): 8217321

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -53
app.py CHANGED
@@ -1,64 +1,146 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- response = ""
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
 
1
+ import os
2
+ import re
3
+ from datetime import datetime
 
 
 
 
4
 
5
+ import gradio as gr
6
+ import openai
7
+ import gspread
8
+ from oauth2client.service_account import ServiceAccountCredentials
9
 
10
+ # Set OpenAI API key from an environment variable.
11
+ openai.api_key = os.environ["OPENAI_API_KEY"]
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ def get_evaluation_questions():
14
+ """
15
+ Loads evaluation questions and expected answers from environment variables.
16
+ Expected environment variable names are:
17
+ TEST_QUESTION_1, TEST_EXPECTED_1,
18
+ TEST_QUESTION_2, TEST_EXPECTED_2, and so on.
19
+ """
20
+ questions = []
21
+ i = 1
22
+ while True:
23
+ question = os.environ.get(f"TEST_QUESTION_{i}")
24
+ expected = os.environ.get(f"TEST_EXPECTED_{i}")
25
+ if not question or not expected:
26
+ break
27
+ questions.append({"question": question, "expected": expected})
28
+ i += 1
29
+ return questions
30
 
31
+ # Load the evaluation questions once at startup.
32
+ EVALUATION_QUESTIONS = get_evaluation_questions()
33
 
34
+ def init_sheet():
35
+ """
36
+ Initializes and returns the Google Sheet.
37
+ The sheet name is taken from the SHEET_NAME environment variable,
38
+ defaulting to "Prompt Evaluations" if not set.
39
+
40
+ Ensure that your service account credentials (credentials.json)
41
+ are available and that the sheet is shared with the service account's email.
42
+ """
43
+ scopes = [
44
+ "https://spreadsheets.google.com/feeds",
45
+ "https://www.googleapis.com/auth/spreadsheets",
46
+ "https://www.googleapis.com/auth/drive"
47
+ ]
48
+ creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scopes)
49
+ client = gspread.authorize(creds)
50
+ sheet_name = os.environ.get("SHEET_NAME", "Prompt Evaluations")
51
+ sheet = client.open(sheet_name).sheet1
52
+ return sheet
53
 
54
+ def sanitize_input(text):
55
+ """
56
+ Sanitizes input to prevent harmful content and limits its length.
57
+ """
58
+ # Allow alphanumerics and some punctuation, then truncate to 500 characters.
59
+ clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
60
+ return clean_text.strip()[:500]
61
 
62
+ def evaluate_prompt(email, name, system_prompt):
63
+ """
64
+ For each test question:
65
+ - Uses the provided system prompt to generate a response with GPT-4o Mini.
66
+ - Checks if the expected substring is present.
67
+ - Computes an aggregate score.
68
+ Logs the user's email, name, system prompt, and score to a Google Sheet.
69
+ """
70
+ # Sanitize the inputs.
71
+ email = sanitize_input(email)
72
+ name = sanitize_input(name)
73
+ system_prompt = sanitize_input(system_prompt)
74
 
75
+ score = 0
76
+ responses = []
77
+ for item in EVALUATION_QUESTIONS:
78
+ question = item["question"]
79
+ expected = item["expected"]
80
+ try:
81
+ response = openai.ChatCompletion.create(
82
+ model="gpt-4o-mini", # Ensure this identifier matches the deployed model.
83
+ messages=[
84
+ {"role": "system", "content": system_prompt},
85
+ {"role": "user", "content": question}
86
+ ]
87
+ )
88
+ answer = response.choices[0].message["content"].strip()
89
+ except Exception as e:
90
+ answer = f"Error during OpenAI API call: {str(e)}"
91
+
92
+ # Simple evaluation: check if the answer contains the expected substring.
93
+ if expected.lower() in answer.lower():
94
+ score += 1
95
+ verdict = "Correct"
96
+ else:
97
+ verdict = "Incorrect"
98
+
99
+ responses.append(
100
+ f"Question: {question}\n"
101
+ f"Answer: {answer}\n"
102
+ f"Expected: {expected}\n"
103
+ f"Result: {verdict}\n"
104
+ )
105
+
106
+ result_details = "\n".join(responses)
107
+
108
+ try:
109
+ sheet = init_sheet()
110
+ timestamp = datetime.now().isoformat()
111
+ row = [timestamp, email, name, score, system_prompt]
112
+ sheet.append_row(row)
113
+ except Exception as err:
114
+ print("Error writing to Google Sheet:", err)
115
+
116
+ return f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\nDetails:\n{result_details}"
117
 
118
+ def build_interface():
119
+ """
120
+ Constructs the Gradio interface.
121
+ """
122
+ with gr.Blocks() as demo:
123
+ gr.Markdown("# GPT-4o Mini Prompt Evaluation")
124
+ gr.Markdown("Enter your email, name, and a system prompt below:")
125
+
126
+ email_input = gr.Textbox(label="Email", placeholder="your.email@example.com")
127
+ name_input = gr.Textbox(label="Name", placeholder="Your name")
128
+ system_prompt_input = gr.Textbox(
129
+ label="System Prompt",
130
+ placeholder="Enter your system prompt here...",
131
+ lines=6
132
+ )
133
+ eval_button = gr.Button("Evaluate")
134
+ output_text = gr.Textbox(label="Results", lines=15)
135
+
136
+ eval_button.click(
137
+ fn=evaluate_prompt,
138
+ inputs=[email_input, name_input, system_prompt_input],
139
+ outputs=output_text
140
+ )
141
+ return demo
142
 
143
  if __name__ == "__main__":
144
+ interface = build_interface()
145
+ # Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
146
+ interface.launch(server_name="0.0.0.0", server_port=7860)