File size: 7,502 Bytes
2875866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import json
import os
import logging
import re
import subprocess
from functools import wraps

from tools.tools import verify_sql_query
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

class ValidLM:
    """Validation & Logging System for LLM Applications"""

    PROJECTS_DIR = "projects"  # Define the directory for project files

    def __init__(self, project_name="default_project"):
        self.project_name = project_name
        self.project_file = os.path.join(self.PROJECTS_DIR, f"{project_name}.json")
        self.knowledge_base = None  # Could be a link, PDF, or CSV
        self._initialize_project()
        # self._start_streamlit_ui

    def _initialize_project(self):
        """Create an empty project file if it doesn't exist"""
        if not os.path.exists(self.project_file):
            initial_data = {
                "project_name": self.project_name,
                "assertions": {
                    "deterministic": [],
                    "misc": [],
                    "factual": False,
                    "sql-only": False,
                    "knowledgebase": None
                },
                "log_history": [],
                "accuracy_history": []
            }
            with open(self.project_file, "w") as f:
                json.dump(initial_data, f, indent=4)

    def _load_project(self):
        """Load the project data from the JSON file"""
        with open(self.project_file, "r") as f:
            return json.load(f)

    def _save_project(self, data):
        """Save the project data to the JSON file"""
        with open(self.project_file, "w") as f:
            json.dump(data, f, indent=4)

    def _start_streamlit_ui(self):
        """Start Streamlit UI in the background"""
        app_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "app.py"))

        # Start Streamlit without blocking the main thread
        subprocess.Popen(
            ["streamlit", "run", app_path],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
        print(f"βœ… Streamlit UI started for project '{self.project_name}'")


    def add_assertion(self, assertion_type, assertion):
        """Add an assertion to the project file"""
        valid_types = {"deterministic", "factual", "misc", "sql-only", "knowledgebase"}
        if assertion_type not in valid_types:
            raise ValueError(f"Invalid assertion type. Choose from {valid_types}")

        project_data = self._load_project()
        if assertion_type in {"factual", "sql-only"}:
            project_data["assertions"][assertion_type] = assertion
        elif assertion_type == "knowledgebase":
            project_data["assertions"]["knowledgebase"] = assertion
        else:
            project_data["assertions"][assertion_type].append(assertion)

        self._save_project(project_data)
        logging.info(f"Added {assertion_type} assertion: {assertion}")

    def generate_clarifying_questions(self, user_input):
        """Generate clarifying questions using ChatGroq in JSON mode."""
        llm = ChatGroq(temperature=0, response_format="json")

        prompt = ChatPromptTemplate.from_template("""

        Given the user prompt: "{user_input}", generate clarifying multiple-choice questions

        to define constraints, preferences, and requirements.



        Example Output:

        [

            {

                "question": "What is the preferred programming language?",

                "options": ["Python", "Java", "C++"]

            },

            {

                "question": "Should the solution be optimized for speed?",

                "options": ["Yes", "No"]

            }

        ]



        Return ONLY valid JSON as per the format above.

        """)

        response = llm.predict(prompt.format(user_input=user_input))

        try:
            clarifying_questions = json.loads(response)
            self.clarifying_questions = clarifying_questions
            return clarifying_questions
        except json.JSONDecodeError:
            logging.error("Invalid JSON response from LLM.")
            self.clarifying_questions = []
            return []

    def verify_assertions(self, user_input, llm_output):


        """Run checks against stored assertions"""
        # 1. Deterministic
        # 2. Fact correction
        # 3. Misc check via llm
        # 4. Behaviour check

        project_data = self._load_project()
        assertions = project_data["assertions"]
        results = {"deterministic": [], "factual": [], "misc": []}

        # πŸ”΅ Deterministic Assertions
        for assertion in assertions["deterministic"]:
            pattern = assertion.get("value")
            check_type = assertion.get("check_type")

            if check_type == "regex":
                match = re.search(pattern, llm_output) is not None
            elif check_type == "contains":
                match = pattern in llm_output
            elif check_type == "not-contains":
                match = pattern not in llm_output
            elif check_type == "json_format":
                try:
                    json.loads(llm_output)
                    match = True
                except json.JSONDecodeError:
                    match = False
            elif check_type == "sql_format":
                match = verify_sql_query(llm_output)
            else:
                match = False

            results["deterministic"].append((assertion, match))

        # 🟑 Factual Assertions ############################# use module 3
        if assertions["factual"] and assertions["knowledgebase"]:
            # Load and parse the knowledge base (PDF, etc.) here for comparison
            kb_path = assertions["knowledgebase"]
            # Placeholder for actual factual verification
            for fact in ["sample fact"]:
                match = fact in llm_output
                results["factual"].append((fact, match))
        else:
            results["factual"].append(("Knowledge Base Missing or Disabled", False))

        # 🟒 Miscellaneous Assertions
        for assertion in assertions["misc"]:    #########################
            validation = "complex check passed"  # Placeholder for complex checks
            results["misc"].append((assertion, validation))

        return results

    # def trace(self, func):
    #     """Decorator for tracing function calls and verifying LLM responses"""
    #     @wraps(func)
    #     def wrapper(*args, **kwargs):
    #         user_input = args[0] if args else None
    #         logging.info(f"Executing {func.__name__} with input: {user_input}")

    #         result = func(*args, **kwargs)
    #         logging.info(f"Received Output: {result}")

    #         verification_results = self.verify_assertions(user_input, result)
    #         logging.info(f"Verification Results: {verification_results}")

    #         # Update accuracy history
    #         project_data = self._load_project()
    #         project_data["accuracy_history"].append(verification_results)
    #         self._save_project(project_data)

    #         return result
    #     return wrapper