Spaces:
Runtime error
Runtime error
Merge pull request #7 from Badrivishal/prometheus
Browse files- Dockerfile +9 -1
- app.py +15 -2
- prometheus.yml +11 -0
- prometheus_helper.py +56 -0
- requirements.txt +2 -1
Dockerfile
CHANGED
|
@@ -6,7 +6,15 @@ RUN pip install --no-cache-dir --upgrade pip \
|
|
| 6 |
&& if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; else pip install --no-cache-dir gradio; fi
|
| 7 |
COPY . .
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
EXPOSE 7860
|
|
|
|
|
|
|
| 10 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 11 |
|
| 12 |
-
CMD
|
|
|
|
| 6 |
&& if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; else pip install --no-cache-dir gradio; fi
|
| 7 |
COPY . .
|
| 8 |
|
| 9 |
+
ENV DEBIAN_FRONTEND noninteractive
|
| 10 |
+
RUN apt-get update && \
|
| 11 |
+
apt-get upgrade -yq ca-certificates && \
|
| 12 |
+
apt-get install -yq --no-install-recommends \
|
| 13 |
+
prometheus-node-exporter
|
| 14 |
+
|
| 15 |
EXPOSE 7860
|
| 16 |
+
EXPOSE 8000
|
| 17 |
+
EXPOSE 9100
|
| 18 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 19 |
|
| 20 |
+
CMD bash -c "prometheus-node-exporter --web.listen-address=':9100' & python app.py"
|
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import gradio as gr
|
|
| 7 |
from datasets import load_dataset
|
| 8 |
from sentence_transformers import SentenceTransformer
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
-
|
| 11 |
# --- Credit ---
|
| 12 |
# Most of this code was generated using AI (ChatGPT, GitHub Copilot).
|
| 13 |
# Please refer to the references of the report for concrete links to the respective AI interactions.
|
|
@@ -16,9 +16,11 @@ from huggingface_hub import InferenceClient
|
|
| 16 |
INDEX_FILE = "xkcd.index"
|
| 17 |
META_FILE = "meta.pkl"
|
| 18 |
CHAT_MODEL = os.getenv("CHAT_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
|
|
|
|
| 19 |
|
| 20 |
# --- Build / load index ---
|
| 21 |
def build_index():
|
|
|
|
| 22 |
print("Building FAISS index...")
|
| 23 |
ds = load_dataset("olivierdehaene/xkcd", split="train")
|
| 24 |
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
@@ -48,6 +50,7 @@ def build_index():
|
|
| 48 |
with open(META_FILE, "wb") as f:
|
| 49 |
pickle.dump(meta, f)
|
| 50 |
|
|
|
|
| 51 |
return index, meta
|
| 52 |
|
| 53 |
def get_index():
|
|
@@ -77,11 +80,16 @@ def respond(
|
|
| 77 |
else:
|
| 78 |
return "⚠️ Please sign in with your Hugging Face account (top of the page) or set the HF_TOKEN environment variable"
|
| 79 |
|
|
|
|
| 80 |
# Embed the query and search FAISS
|
|
|
|
| 81 |
query_vec = embedder.encode([message], convert_to_numpy=True)
|
| 82 |
D, I = index.search(query_vec, 5)
|
| 83 |
candidates = [meta[int(i)] for i in I[0]]
|
| 84 |
|
|
|
|
|
|
|
|
|
|
| 85 |
context = "\n".join(
|
| 86 |
f"[{c['id']}] {c['title']}\nTranscript: {c['transcript']}\nExplanation: {c['explanation']}"
|
| 87 |
for c in candidates
|
|
@@ -109,6 +117,8 @@ EXPLANATION
|
|
| 109 |
temperature=0.0, # TODO
|
| 110 |
)
|
| 111 |
|
|
|
|
|
|
|
| 112 |
# Be tolerant to slight schema differences
|
| 113 |
try:
|
| 114 |
choice = resp.choices[0]
|
|
@@ -132,14 +142,17 @@ EXPLANATION
|
|
| 132 |
img_url = json.load(url)["img"]
|
| 133 |
print(f'Got image url: {img_url}')
|
| 134 |
|
|
|
|
| 135 |
return [out_text, gr.Image(value=img_url)]
|
| 136 |
except ValueError:
|
| 137 |
print("Couldn't parse xkcd ID or get image! That should not happen.")
|
| 138 |
-
|
|
|
|
| 139 |
return out_text
|
| 140 |
|
| 141 |
if __name__ == "__main__":
|
| 142 |
# --- UI ---
|
|
|
|
| 143 |
with gr.Blocks(theme='gstaff/xkcd') as demo:
|
| 144 |
gr.Markdown("# xkcd Comic Finder")
|
| 145 |
gr.Markdown(
|
|
|
|
| 7 |
from datasets import load_dataset
|
| 8 |
from sentence_transformers import SentenceTransformer
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
+
from prometheus_helper import PrometheusHelper
|
| 11 |
# --- Credit ---
|
| 12 |
# Most of this code was generated using AI (ChatGPT, GitHub Copilot).
|
| 13 |
# Please refer to the references of the report for concrete links to the respective AI interactions.
|
|
|
|
| 16 |
INDEX_FILE = "xkcd.index"
|
| 17 |
META_FILE = "meta.pkl"
|
| 18 |
CHAT_MODEL = os.getenv("CHAT_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 19 |
+
prometheus_helper = PrometheusHelper()
|
| 20 |
|
| 21 |
# --- Build / load index ---
|
| 22 |
def build_index():
|
| 23 |
+
prometheus_helper.start_index_build_timer()
|
| 24 |
print("Building FAISS index...")
|
| 25 |
ds = load_dataset("olivierdehaene/xkcd", split="train")
|
| 26 |
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
|
|
| 50 |
with open(META_FILE, "wb") as f:
|
| 51 |
pickle.dump(meta, f)
|
| 52 |
|
| 53 |
+
prometheus_helper.stop_index_build_timer()
|
| 54 |
return index, meta
|
| 55 |
|
| 56 |
def get_index():
|
|
|
|
| 80 |
else:
|
| 81 |
return "⚠️ Please sign in with your Hugging Face account (top of the page) or set the HF_TOKEN environment variable"
|
| 82 |
|
| 83 |
+
prometheus_helper.start_request_timer()
|
| 84 |
# Embed the query and search FAISS
|
| 85 |
+
prometheus_helper.start_faiss_index_search_timer()
|
| 86 |
query_vec = embedder.encode([message], convert_to_numpy=True)
|
| 87 |
D, I = index.search(query_vec, 5)
|
| 88 |
candidates = [meta[int(i)] for i in I[0]]
|
| 89 |
|
| 90 |
+
prometheus_helper.stop_faiss_index_search_timer()
|
| 91 |
+
prometheus_helper.start_chat_model_call_timer()
|
| 92 |
+
|
| 93 |
context = "\n".join(
|
| 94 |
f"[{c['id']}] {c['title']}\nTranscript: {c['transcript']}\nExplanation: {c['explanation']}"
|
| 95 |
for c in candidates
|
|
|
|
| 117 |
temperature=0.0, # TODO
|
| 118 |
)
|
| 119 |
|
| 120 |
+
prometheus_helper.stop_chat_model_call_timer()
|
| 121 |
+
|
| 122 |
# Be tolerant to slight schema differences
|
| 123 |
try:
|
| 124 |
choice = resp.choices[0]
|
|
|
|
| 142 |
img_url = json.load(url)["img"]
|
| 143 |
print(f'Got image url: {img_url}')
|
| 144 |
|
| 145 |
+
prometheus_helper.record_frequency(int(id))
|
| 146 |
return [out_text, gr.Image(value=img_url)]
|
| 147 |
except ValueError:
|
| 148 |
print("Couldn't parse xkcd ID or get image! That should not happen.")
|
| 149 |
+
prometheus_helper.record_request(True)
|
| 150 |
+
prometheus_helper.stop_request_timer()
|
| 151 |
return out_text
|
| 152 |
|
| 153 |
if __name__ == "__main__":
|
| 154 |
# --- UI ---
|
| 155 |
+
prometheus_helper.setup_prometheus()
|
| 156 |
with gr.Blocks(theme='gstaff/xkcd') as demo:
|
| 157 |
gr.Markdown("# xkcd Comic Finder")
|
| 158 |
gr.Markdown(
|
prometheus.yml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
global:
|
| 2 |
+
scrape_interval: 15s
|
| 3 |
+
|
| 4 |
+
scrape_configs:
|
| 5 |
+
- job_name: 'gradio_app'
|
| 6 |
+
static_configs:
|
| 7 |
+
- targets: ['xkcd_finder_app:8000']
|
| 8 |
+
|
| 9 |
+
- job_name: 'node_exporter'
|
| 10 |
+
static_configs:
|
| 11 |
+
- targets: ['xkcd_finder_app:9100']
|
prometheus_helper.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from prometheus_client import start_http_server, Counter, Summary, Histogram
|
| 2 |
+
import time
|
| 3 |
+
|
| 4 |
+
class PrometheusHelper:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.request_counter = Counter('app_requests_total', 'Total number of requests')
|
| 7 |
+
self.successful_requests_counter = Counter('app_successful_requests_total', 'Total number of successful requests')
|
| 8 |
+
self.failed_requests_counter = Counter('app_failed_requests_total', 'Total number of failed requests')
|
| 9 |
+
self.request_duration_summary = Summary('app_request_duration_seconds', 'Time spent processing request')
|
| 10 |
+
self.comic_frequency = Counter('comic_frequency', 'Frequency of comics being selected', ['comic_id'])
|
| 11 |
+
self.index_build_start_time = None
|
| 12 |
+
self.index_build_duration = Summary('index_build_duration_seconds', 'Time spent building the index')
|
| 13 |
+
self.faiss_index_search_start_time = None
|
| 14 |
+
self.faiss_index_search_duration = Summary('faiss_index_search_duration_seconds', 'Time spent searching the index')
|
| 15 |
+
self.chat_model_call_start_time = None
|
| 16 |
+
self.chat_model_call_duration = Summary('chat_model_call_duration_seconds', 'Time spent calling the chat model')
|
| 17 |
+
# self.frequency_histogram = Histogram('comic_frequency', 'Frequency of comics being selected')
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def start_faiss_index_search_timer(self):
|
| 21 |
+
self.faiss_index_search_start_time = time.time()
|
| 22 |
+
|
| 23 |
+
def stop_faiss_index_search_timer(self):
|
| 24 |
+
self.faiss_index_search_duration.observe(time.time() - self.faiss_index_search_start_time)
|
| 25 |
+
|
| 26 |
+
def start_chat_model_call_timer(self):
|
| 27 |
+
self.chat_model_call_start_time = time.time()
|
| 28 |
+
|
| 29 |
+
def stop_chat_model_call_timer(self):
|
| 30 |
+
self.chat_model_call_duration.observe(time.time() - self.chat_model_call_start_time)
|
| 31 |
+
|
| 32 |
+
def record_request(self, success: bool):
|
| 33 |
+
self.request_counter.inc()
|
| 34 |
+
if success:
|
| 35 |
+
self.successful_requests_counter.inc()
|
| 36 |
+
else:
|
| 37 |
+
self.failed_requests_counter.inc()
|
| 38 |
+
|
| 39 |
+
def start_index_build_timer(self):
|
| 40 |
+
self.index_build_start_time = time.time()
|
| 41 |
+
|
| 42 |
+
def stop_index_build_timer(self):
|
| 43 |
+
self.index_build_duration.observe(time.time() - self.index_build_start_time)
|
| 44 |
+
|
| 45 |
+
def setup_prometheus(self):
|
| 46 |
+
start_http_server(8000)
|
| 47 |
+
|
| 48 |
+
def start_request_timer(self):
|
| 49 |
+
self.start_time = time.time()
|
| 50 |
+
|
| 51 |
+
def stop_request_timer(self):
|
| 52 |
+
self.request_duration_summary.observe(time.time() - self.start_time)
|
| 53 |
+
|
| 54 |
+
def record_frequency(self, comic_id: int):
|
| 55 |
+
self.comic_frequency.labels(comic_id=comic_id).inc()
|
| 56 |
+
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ faiss-cpu
|
|
| 3 |
transformers
|
| 4 |
sentence-transformers
|
| 5 |
datasets
|
| 6 |
-
gradio[oauth]
|
|
|
|
|
|
| 3 |
transformers
|
| 4 |
sentence-transformers
|
| 5 |
datasets
|
| 6 |
+
gradio[oauth]
|
| 7 |
+
prometheus_client==0.16.*
|