Spaces:
No application file
No application file
File size: 7,502 Bytes
2875866 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import json
import os
import logging
import re
import subprocess
from functools import wraps
from tools.tools import verify_sql_query
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
class ValidLM:
"""Validation & Logging System for LLM Applications"""
PROJECTS_DIR = "projects" # Define the directory for project files
def __init__(self, project_name="default_project"):
self.project_name = project_name
self.project_file = os.path.join(self.PROJECTS_DIR, f"{project_name}.json")
self.knowledge_base = None # Could be a link, PDF, or CSV
self._initialize_project()
# self._start_streamlit_ui
def _initialize_project(self):
"""Create an empty project file if it doesn't exist"""
if not os.path.exists(self.project_file):
initial_data = {
"project_name": self.project_name,
"assertions": {
"deterministic": [],
"misc": [],
"factual": False,
"sql-only": False,
"knowledgebase": None
},
"log_history": [],
"accuracy_history": []
}
with open(self.project_file, "w") as f:
json.dump(initial_data, f, indent=4)
def _load_project(self):
"""Load the project data from the JSON file"""
with open(self.project_file, "r") as f:
return json.load(f)
def _save_project(self, data):
"""Save the project data to the JSON file"""
with open(self.project_file, "w") as f:
json.dump(data, f, indent=4)
def _start_streamlit_ui(self):
"""Start Streamlit UI in the background"""
app_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "app.py"))
# Start Streamlit without blocking the main thread
subprocess.Popen(
["streamlit", "run", app_path],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
print(f"β
Streamlit UI started for project '{self.project_name}'")
def add_assertion(self, assertion_type, assertion):
"""Add an assertion to the project file"""
valid_types = {"deterministic", "factual", "misc", "sql-only", "knowledgebase"}
if assertion_type not in valid_types:
raise ValueError(f"Invalid assertion type. Choose from {valid_types}")
project_data = self._load_project()
if assertion_type in {"factual", "sql-only"}:
project_data["assertions"][assertion_type] = assertion
elif assertion_type == "knowledgebase":
project_data["assertions"]["knowledgebase"] = assertion
else:
project_data["assertions"][assertion_type].append(assertion)
self._save_project(project_data)
logging.info(f"Added {assertion_type} assertion: {assertion}")
def generate_clarifying_questions(self, user_input):
"""Generate clarifying questions using ChatGroq in JSON mode."""
llm = ChatGroq(temperature=0, response_format="json")
prompt = ChatPromptTemplate.from_template("""
Given the user prompt: "{user_input}", generate clarifying multiple-choice questions
to define constraints, preferences, and requirements.
Example Output:
[
{
"question": "What is the preferred programming language?",
"options": ["Python", "Java", "C++"]
},
{
"question": "Should the solution be optimized for speed?",
"options": ["Yes", "No"]
}
]
Return ONLY valid JSON as per the format above.
""")
response = llm.predict(prompt.format(user_input=user_input))
try:
clarifying_questions = json.loads(response)
self.clarifying_questions = clarifying_questions
return clarifying_questions
except json.JSONDecodeError:
logging.error("Invalid JSON response from LLM.")
self.clarifying_questions = []
return []
def verify_assertions(self, user_input, llm_output):
"""Run checks against stored assertions"""
# 1. Deterministic
# 2. Fact correction
# 3. Misc check via llm
# 4. Behaviour check
project_data = self._load_project()
assertions = project_data["assertions"]
results = {"deterministic": [], "factual": [], "misc": []}
# π΅ Deterministic Assertions
for assertion in assertions["deterministic"]:
pattern = assertion.get("value")
check_type = assertion.get("check_type")
if check_type == "regex":
match = re.search(pattern, llm_output) is not None
elif check_type == "contains":
match = pattern in llm_output
elif check_type == "not-contains":
match = pattern not in llm_output
elif check_type == "json_format":
try:
json.loads(llm_output)
match = True
except json.JSONDecodeError:
match = False
elif check_type == "sql_format":
match = verify_sql_query(llm_output)
else:
match = False
results["deterministic"].append((assertion, match))
# π‘ Factual Assertions ############################# use module 3
if assertions["factual"] and assertions["knowledgebase"]:
# Load and parse the knowledge base (PDF, etc.) here for comparison
kb_path = assertions["knowledgebase"]
# Placeholder for actual factual verification
for fact in ["sample fact"]:
match = fact in llm_output
results["factual"].append((fact, match))
else:
results["factual"].append(("Knowledge Base Missing or Disabled", False))
# π’ Miscellaneous Assertions
for assertion in assertions["misc"]: #########################
validation = "complex check passed" # Placeholder for complex checks
results["misc"].append((assertion, validation))
return results
# def trace(self, func):
# """Decorator for tracing function calls and verifying LLM responses"""
# @wraps(func)
# def wrapper(*args, **kwargs):
# user_input = args[0] if args else None
# logging.info(f"Executing {func.__name__} with input: {user_input}")
# result = func(*args, **kwargs)
# logging.info(f"Received Output: {result}")
# verification_results = self.verify_assertions(user_input, result)
# logging.info(f"Verification Results: {verification_results}")
# # Update accuracy history
# project_data = self._load_project()
# project_data["accuracy_history"].append(verification_results)
# self._save_project(project_data)
# return result
# return wrapper
|