MrAl3x0
commited on
Commit
·
01e1af5
1
Parent(s):
da46652
refactor(service): update LexAIService to use new formatter utilities
Browse files- Improved module and class docstrings for clarity and professionalism
- Integrated `format_legal_response` and `format_references` to standardize HTML formatting
- Enhanced maintainability by separating formatting logic from business logic
- lexai/services/lexai_service.py +42 -9
- lexai/ui/formatters.py +26 -5
- lexai/ui/gradio_interface.py +21 -13
- tests/test_matcher.py +1 -2
- tests/test_openai_client.py +86 -26
lexai/services/lexai_service.py
CHANGED
|
@@ -1,23 +1,56 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
|
| 4 |
-
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
from lexai.core.match_engine import generate_matches
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class LexAIService:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
@staticmethod
|
| 12 |
def handle_query(query: str, location: str) -> str:
|
| 13 |
"""
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
Parameters
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
Returns
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
LexAI service layer for handling user queries.
|
| 3 |
|
| 4 |
+
This module defines a service class that interfaces with the core match engine,
|
| 5 |
+
processes the results, and formats them for display in the UI.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from lexai.core.match_engine import generate_matches
|
| 9 |
+
from lexai.ui.formatters import format_legal_response, format_references
|
| 10 |
|
| 11 |
|
| 12 |
class LexAIService:
|
| 13 |
+
"""
|
| 14 |
+
Service class that handles legal queries by invoking the match engine and
|
| 15 |
+
formatting the results for UI presentation.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
@staticmethod
|
| 19 |
def handle_query(query: str, location: str) -> str:
|
| 20 |
"""
|
| 21 |
+
Handles a user query and returns an HTML-formatted response.
|
| 22 |
+
|
| 23 |
+
This method:
|
| 24 |
+
- Calls the semantic match engine with the given query and location.
|
| 25 |
+
- Extracts both the AI-generated response and the list of matched legal entries.
|
| 26 |
+
- Constructs an HTML string that includes the AI's response followed by
|
| 27 |
+
a reference list linking to legal documents.
|
| 28 |
|
| 29 |
+
Parameters
|
| 30 |
+
----------
|
| 31 |
+
query : str
|
| 32 |
+
The legal question asked by the user.
|
| 33 |
+
location : str
|
| 34 |
+
The jurisdiction to search within.
|
| 35 |
|
| 36 |
+
Returns
|
| 37 |
+
-------
|
| 38 |
+
str
|
| 39 |
+
A formatted HTML string with the AI response and relevant matches.
|
| 40 |
"""
|
| 41 |
+
result = generate_matches(query, location)
|
| 42 |
+
|
| 43 |
+
gpt_response = result.get("response", "").strip()
|
| 44 |
+
matches = result.get("matches", [])
|
| 45 |
+
|
| 46 |
+
if (
|
| 47 |
+
not isinstance(matches, list)
|
| 48 |
+
or not matches
|
| 49 |
+
or not isinstance(matches[0], dict)
|
| 50 |
+
):
|
| 51 |
+
return format_legal_response(gpt_response or "No matches found.")
|
| 52 |
+
|
| 53 |
+
return (
|
| 54 |
+
format_legal_response(gpt_response) +
|
| 55 |
+
format_references(matches)
|
| 56 |
+
)
|
lexai/ui/formatters.py
CHANGED
|
@@ -11,6 +11,16 @@ from html import escape
|
|
| 11 |
def format_legal_response(response_text: str) -> str:
|
| 12 |
"""
|
| 13 |
Wrap the AI-generated legal response in HTML for UI rendering.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"""
|
| 15 |
return (
|
| 16 |
"<p><strong>Response:</strong></p>"
|
|
@@ -21,18 +31,29 @@ def format_legal_response(response_text: str) -> str:
|
|
| 21 |
def format_references(matches: list[dict]) -> str:
|
| 22 |
"""
|
| 23 |
Format a list of top document matches into an HTML reference list.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
"""
|
| 25 |
if not matches:
|
| 26 |
return "<p><strong>References:</strong> None found.</p>"
|
| 27 |
|
| 28 |
-
|
| 29 |
for match in matches:
|
| 30 |
url = escape(match.get("url", "#"))
|
| 31 |
title = escape(match.get("title", "Untitled"))
|
| 32 |
subtitle = escape(match.get("subtitle", ""))
|
| 33 |
-
|
| 34 |
-
|
|
|
|
| 35 |
f"{title}: {subtitle}</a></li>"
|
| 36 |
)
|
| 37 |
-
|
| 38 |
-
return
|
|
|
|
| 11 |
def format_legal_response(response_text: str) -> str:
|
| 12 |
"""
|
| 13 |
Wrap the AI-generated legal response in HTML for UI rendering.
|
| 14 |
+
|
| 15 |
+
Parameters
|
| 16 |
+
----------
|
| 17 |
+
response_text : str
|
| 18 |
+
The main response text from the assistant.
|
| 19 |
+
|
| 20 |
+
Returns
|
| 21 |
+
-------
|
| 22 |
+
str
|
| 23 |
+
HTML-formatted string with a 'Response' header and the content.
|
| 24 |
"""
|
| 25 |
return (
|
| 26 |
"<p><strong>Response:</strong></p>"
|
|
|
|
| 31 |
def format_references(matches: list[dict]) -> str:
|
| 32 |
"""
|
| 33 |
Format a list of top document matches into an HTML reference list.
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
matches : list of dict
|
| 38 |
+
List of matched legal documents, each containing 'url', 'title', and 'subtitle'.
|
| 39 |
+
|
| 40 |
+
Returns
|
| 41 |
+
-------
|
| 42 |
+
str
|
| 43 |
+
HTML-formatted reference section with clickable links.
|
| 44 |
"""
|
| 45 |
if not matches:
|
| 46 |
return "<p><strong>References:</strong> None found.</p>"
|
| 47 |
|
| 48 |
+
html = "<p><strong>References:</strong></p><ul>"
|
| 49 |
for match in matches:
|
| 50 |
url = escape(match.get("url", "#"))
|
| 51 |
title = escape(match.get("title", "Untitled"))
|
| 52 |
subtitle = escape(match.get("subtitle", ""))
|
| 53 |
+
html += (
|
| 54 |
+
"<li>"
|
| 55 |
+
f"<a href=\"{url}\" target=\"_blank\" rel=\"noopener noreferrer\">"
|
| 56 |
f"{title}: {subtitle}</a></li>"
|
| 57 |
)
|
| 58 |
+
html += "</ul>"
|
| 59 |
+
return html
|
lexai/ui/gradio_interface.py
CHANGED
|
@@ -15,10 +15,25 @@ from lexai.services.lexai_service import LexAIService
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
APP_DESCRIPTION = """
|
| 18 |
-
LexAI is an AI-powered legal assistant that
|
| 19 |
-
|
| 20 |
"""
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def build_interface():
|
| 24 |
"""
|
|
@@ -55,9 +70,7 @@ def build_interface():
|
|
| 55 |
gr.Button("Flag", variant="secondary")
|
| 56 |
|
| 57 |
def handle_submit(query, location):
|
| 58 |
-
return gr.update(
|
| 59 |
-
value=LexAIService.handle_query(query, location)
|
| 60 |
-
)
|
| 61 |
|
| 62 |
def handle_clear():
|
| 63 |
return gr.update(value="Response will appear here.")
|
|
@@ -73,16 +86,11 @@ def build_interface():
|
|
| 73 |
)
|
| 74 |
|
| 75 |
gr.Examples(
|
| 76 |
-
examples=
|
| 77 |
-
["Is building a rock cairn outdoors allowed by law?", "Boulder"],
|
| 78 |
-
["Can I legally possess a dog as a pet?", "Denver"],
|
| 79 |
-
["Am I allowed to go shirtless in public?", "Boulder"],
|
| 80 |
-
["What is the max legal height for a structure?", "Denver"],
|
| 81 |
-
["Is indoor furniture on porches allowed?", "Boulder"],
|
| 82 |
-
["Can I graze llamas on public land?", "Denver"],
|
| 83 |
-
],
|
| 84 |
inputs=[query_input, location_input]
|
| 85 |
)
|
| 86 |
|
|
|
|
|
|
|
| 87 |
logger.info("LexAI interface built.")
|
| 88 |
return iface
|
|
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
APP_DESCRIPTION = """
|
| 18 |
+
LexAI is an AI-powered legal assistant that provides jurisdiction-specific guidance.
|
| 19 |
+
It combines GPT-4 with semantic search to retrieve relevant legal information quickly.
|
| 20 |
"""
|
| 21 |
|
| 22 |
+
DISCLAIMER_TEXT = """
|
| 23 |
+
<div style='text-align: center; font-size: 0.9em; color: gray; margin-top: 1em;'>
|
| 24 |
+
Results may be inaccurate. Always verify with a legal professional.
|
| 25 |
+
</div>
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
EXAMPLE_QUERIES = [
|
| 29 |
+
["Is building a rock cairn outdoors allowed by law?", "Boulder"],
|
| 30 |
+
["Can I legally possess a dog as a pet?", "Denver"],
|
| 31 |
+
["Am I allowed to go shirtless in public?", "Boulder"],
|
| 32 |
+
["What is the max legal height for a structure?", "Denver"],
|
| 33 |
+
["Is indoor furniture on porches allowed?", "Boulder"],
|
| 34 |
+
["Can I graze llamas on public land?", "Denver"],
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
|
| 38 |
def build_interface():
|
| 39 |
"""
|
|
|
|
| 70 |
gr.Button("Flag", variant="secondary")
|
| 71 |
|
| 72 |
def handle_submit(query, location):
|
| 73 |
+
return gr.update(value=LexAIService.handle_query(query, location))
|
|
|
|
|
|
|
| 74 |
|
| 75 |
def handle_clear():
|
| 76 |
return gr.update(value="Response will appear here.")
|
|
|
|
| 86 |
)
|
| 87 |
|
| 88 |
gr.Examples(
|
| 89 |
+
examples=EXAMPLE_QUERIES,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
inputs=[query_input, location_input]
|
| 91 |
)
|
| 92 |
|
| 93 |
+
gr.HTML(DISCLAIMER_TEXT)
|
| 94 |
+
|
| 95 |
logger.info("LexAI interface built.")
|
| 96 |
return iface
|
tests/test_matcher.py
CHANGED
|
@@ -14,14 +14,13 @@ from lexai.core.matcher import find_top_matches
|
|
| 14 |
|
| 15 |
@pytest.fixture
|
| 16 |
def sample_embeddings():
|
| 17 |
-
"""Sample array of
|
| 18 |
return np.array(
|
| 19 |
[
|
| 20 |
[1.0, 0.1, 0.1],
|
| 21 |
[0.8, 0.3, 0.2],
|
| 22 |
[0.5, 0.5, 0.5],
|
| 23 |
[0.1, 0.1, 1.0],
|
| 24 |
-
[0.0, 0.0, 0.0],
|
| 25 |
],
|
| 26 |
dtype=np.float32,
|
| 27 |
)
|
|
|
|
| 14 |
|
| 15 |
@pytest.fixture
|
| 16 |
def sample_embeddings():
|
| 17 |
+
"""Sample array of 4 embedding vectors."""
|
| 18 |
return np.array(
|
| 19 |
[
|
| 20 |
[1.0, 0.1, 0.1],
|
| 21 |
[0.8, 0.3, 0.2],
|
| 22 |
[0.5, 0.5, 0.5],
|
| 23 |
[0.1, 0.1, 1.0],
|
|
|
|
| 24 |
],
|
| 25 |
dtype=np.float32,
|
| 26 |
)
|
tests/test_openai_client.py
CHANGED
|
@@ -1,39 +1,99 @@
|
|
| 1 |
"""
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
|
| 6 |
|
| 7 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
embedding = get_embedding("Test input")
|
| 20 |
-
assert isinstance(embedding, np.ndarray)
|
| 21 |
-
np.testing.assert_array_equal(embedding, np.array([0.1, 0.2, 0.3]))
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
)
|
| 38 |
-
|
| 39 |
-
assert response == "Here is your legal summary."
|
|
|
|
| 1 |
"""
|
| 2 |
+
OpenAI client functions for embedding generation and GPT-4 completions.
|
| 3 |
+
|
| 4 |
+
This module provides utilities to interact with OpenAI’s API, including
|
| 5 |
+
embedding generation and chat-based completion using the configured models.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import os
|
| 9 |
|
| 10 |
import numpy as np
|
| 11 |
+
from openai import OpenAI
|
| 12 |
+
from openai.types.chat import ChatCompletion
|
| 13 |
+
from openai.types.embedding import Embedding
|
| 14 |
+
|
| 15 |
+
from lexai.config import (
|
| 16 |
+
EMBEDDING_MODEL,
|
| 17 |
+
GPT4_FREQUENCY_PENALTY,
|
| 18 |
+
GPT4_MAX_TOKENS,
|
| 19 |
+
GPT4_MODEL,
|
| 20 |
+
GPT4_PRESENCE_PENALTY,
|
| 21 |
+
GPT4_TEMPERATURE,
|
| 22 |
+
GPT4_TOP_P,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_client() -> OpenAI:
|
| 27 |
+
"""
|
| 28 |
+
Returns a new instance of the OpenAI client using the current API key.
|
| 29 |
+
|
| 30 |
+
Returns
|
| 31 |
+
-------
|
| 32 |
+
OpenAI
|
| 33 |
+
An authenticated OpenAI client.
|
| 34 |
+
"""
|
| 35 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 36 |
+
if not api_key:
|
| 37 |
+
raise EnvironmentError(
|
| 38 |
+
"OPENAI_API_KEY environment variable is not set.")
|
| 39 |
+
return OpenAI(api_key=api_key)
|
| 40 |
+
|
| 41 |
|
| 42 |
+
def get_embedding(text: str) -> np.ndarray:
|
| 43 |
+
"""
|
| 44 |
+
Generates a numerical embedding for the provided text using OpenAI's model.
|
| 45 |
|
| 46 |
+
Parameters
|
| 47 |
+
----------
|
| 48 |
+
text : str
|
| 49 |
+
The input text to embed.
|
| 50 |
|
| 51 |
+
Returns
|
| 52 |
+
-------
|
| 53 |
+
np.ndarray
|
| 54 |
+
The embedding vector as a NumPy array.
|
| 55 |
+
"""
|
| 56 |
+
client = get_client()
|
| 57 |
+
response: Embedding = client.embeddings.create(
|
| 58 |
+
input=text, model=EMBEDDING_MODEL)
|
| 59 |
+
return np.array(response.data[0].embedding)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
def get_chat_completion(
|
| 63 |
+
role_description: str,
|
| 64 |
+
jurisdiction_summary: str,
|
| 65 |
+
query: str,
|
| 66 |
+
) -> str:
|
| 67 |
+
"""
|
| 68 |
+
Generates a GPT-4 response based on the user’s query and legal context.
|
| 69 |
|
| 70 |
+
Parameters
|
| 71 |
+
----------
|
| 72 |
+
role_description : str
|
| 73 |
+
Describes the assistant's role and intended tone or expertise.
|
| 74 |
+
jurisdiction_summary : str
|
| 75 |
+
A stringified summary of relevant legal documents or search results.
|
| 76 |
+
query : str
|
| 77 |
+
The user's legal question.
|
| 78 |
|
| 79 |
+
Returns
|
| 80 |
+
-------
|
| 81 |
+
str
|
| 82 |
+
The assistant's response.
|
| 83 |
+
"""
|
| 84 |
+
client = get_client()
|
| 85 |
+
response: ChatCompletion = client.chat.completions.create(
|
| 86 |
+
model=GPT4_MODEL,
|
| 87 |
+
messages=[
|
| 88 |
+
{"role": "system", "content": role_description.strip()},
|
| 89 |
+
{"role": "system", "content": jurisdiction_summary.strip()},
|
| 90 |
+
{"role": "user", "content": query.strip()},
|
| 91 |
+
{"role": "assistant", "content": ""},
|
| 92 |
+
],
|
| 93 |
+
temperature=GPT4_TEMPERATURE,
|
| 94 |
+
max_tokens=GPT4_MAX_TOKENS,
|
| 95 |
+
top_p=GPT4_TOP_P,
|
| 96 |
+
frequency_penalty=GPT4_FREQUENCY_PENALTY,
|
| 97 |
+
presence_penalty=GPT4_PRESENCE_PENALTY,
|
| 98 |
)
|
| 99 |
+
return response.choices[0].message.content.strip()
|
|
|