File size: 2,182 Bytes
67dbb33
 
785c044
67dbb33
 
785c044
67dbb33
 
 
 
 
 
 
 
 
785c044
 
 
 
 
 
 
 
 
6780f80
785c044
6780f80
 
785c044
6780f80
 
 
785c044
 
6780f80
 
 
785c044
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6780f80
 
785c044
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os

import pandas as pd
import pymupdf4llm
import weave
import weave.trace
from firerequests import FireRequests


@weave.op()
def get_markdown_from_pdf_url(url: str) -> str:
    FireRequests().download(url, "temp.pdf", show_progress=False)
    markdown = pymupdf4llm.to_markdown("temp.pdf", show_progress=False)
    os.remove("temp.pdf")
    return markdown


class EvaluationCallManager:
    def __init__(self, entity: str, project: str, call_id: str, max_count: int = 10):
        self.base_call = weave.init(f"{entity}/{project}").get_call(call_id=call_id)
        self.max_count = max_count
        self.show_warning_in_app = False
        self.call_list = []

    def collect_guardrail_guard_calls_from_eval(self):
        guard_calls, count = [], 0
        for eval_predict_and_score_call in self.base_call.children():
            if "Evaluation.summarize" in eval_predict_and_score_call._op_name:
                break
            guardrail_predict_call = eval_predict_and_score_call.children()[0]
            guard_call = guardrail_predict_call.children()[0]
            score_call = eval_predict_and_score_call.children()[1]
            guard_calls.append(
                {
                    "input_prompt": str(guard_call.inputs["prompt"]),
                    "outputs": dict(guard_call.output),
                    "score": dict(score_call.output),
                }
            )
            count += 1
            if count >= self.max_count:
                self.show_warning_in_app = True
                break
        return guard_calls

    def render_calls_to_streamlit(self):
        dataframe = {
            "input_prompt": [
                call["input_prompt"] for call in self.call_list[0]["calls"]
            ]
        }
        for guardrail_call in self.call_list:
            dataframe[guardrail_call["guardrail_name"] + ".safe"] = [
                call["outputs"]["safe"] for call in guardrail_call["calls"]
            ]
            dataframe[guardrail_call["guardrail_name"] + ".prediction_correctness"] = [
                call["score"]["correct"] for call in guardrail_call["calls"]
            ]
        return pd.DataFrame(dataframe)