Upload 25 files
Browse files- app.py +32 -0
- config.py +12 -0
- pipeline/__pycache__/chromadb_search.cpython-311.pyc +0 -0
- pipeline/__pycache__/document_processor.cpython-311.pyc +0 -0
- pipeline/__pycache__/embeddings.cpython-311.pyc +0 -0
- pipeline/__pycache__/generate_sql.cpython-311.pyc +0 -0
- pipeline/__pycache__/groq_client.cpython-311.pyc +0 -0
- pipeline/__pycache__/main_pipeline.cpython-311.pyc +0 -0
- pipeline/__pycache__/qa_pipeline.cpython-311.pyc +0 -0
- pipeline/__pycache__/sql_query.cpython-311.pyc +0 -0
- pipeline/__pycache__/sql_response.cpython-311.pyc +0 -0
- pipeline/chromadb_search.py +25 -0
- pipeline/document_processor.py +16 -0
- pipeline/embeddings.py +8 -0
- pipeline/groq_client.py +38 -0
- pipeline/main_pipeline.py +60 -0
- pipeline/sql_query.py +45 -0
- pipeline/sql_response.py +38 -0
- requirements.txt +6 -0
- utils/__pycache__/mysql_util.cpython-311.pyc +0 -0
- utils/__pycache__/regex.cpython-311.pyc +0 -0
- utils/__pycache__/sentence_transformer_util.cpython-311.pyc +0 -0
- utils/mysql_util.py +43 -0
- utils/regex.py +24 -0
- utils/sentence_transformer_util.py +10 -0
app.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from config import Config
|
3 |
+
from pipeline.main_pipeline import QAPipeline
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
def create_app():
|
7 |
+
config = Config()
|
8 |
+
pipeline = QAPipeline(config)
|
9 |
+
|
10 |
+
def process_message(message: str, collection_name: str, history: List[List[str]]):
|
11 |
+
retrieved_docs, response = pipeline.process(message, collection_name)
|
12 |
+
return response, retrieved_docs
|
13 |
+
|
14 |
+
interface = gr.Interface(
|
15 |
+
fn=process_message,
|
16 |
+
inputs=[
|
17 |
+
gr.Textbox(label="Your Question"),
|
18 |
+
gr.Dropdown(choices=["QnA data", "Semantic Data"], label="Select Collection", value="Semantic Data")
|
19 |
+
],
|
20 |
+
outputs=[
|
21 |
+
gr.Textbox(label="AI Response"),
|
22 |
+
gr.Textbox(label="Retrieved Documents from ChromaDB"),
|
23 |
+
],
|
24 |
+
title="Muhammad Adhiem Wicaksana Vidavox Technical Test",
|
25 |
+
description="Ask questions and choose a crhoma db collection, read my documentation for the purpose of the collection :)",
|
26 |
+
)
|
27 |
+
|
28 |
+
return interface
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
app = create_app()
|
32 |
+
app.launch()
|
config.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Config:
|
2 |
+
CHROMADB_HOST = "https://chromadb-production-7af5.up.railway.app"
|
3 |
+
CHROMADB_TOKEN = "w587yz1cvfk8tcbf21taesnxmdt2rcsm"
|
4 |
+
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
5 |
+
GROQ_API_KEY = "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"
|
6 |
+
DEFAULT_COLLECTION = "test11"
|
7 |
+
N_RESULTS = 5
|
8 |
+
MYSQL_HOST = "junction.proxy.rlwy.net"
|
9 |
+
MYSQL_USER = "root"
|
10 |
+
MYSQL_PASSWORD = "YtxIHzEUaRIJyNKaecBOTylHKGbbEitk"
|
11 |
+
MYSQL_DATABASE = "railway"
|
12 |
+
MYSQL_PORT = 54826
|
pipeline/__pycache__/chromadb_search.cpython-311.pyc
ADDED
Binary file (1.92 kB). View file
|
|
pipeline/__pycache__/document_processor.cpython-311.pyc
ADDED
Binary file (1.6 kB). View file
|
|
pipeline/__pycache__/embeddings.cpython-311.pyc
ADDED
Binary file (829 Bytes). View file
|
|
pipeline/__pycache__/generate_sql.cpython-311.pyc
ADDED
Binary file (2.89 kB). View file
|
|
pipeline/__pycache__/groq_client.cpython-311.pyc
ADDED
Binary file (2.67 kB). View file
|
|
pipeline/__pycache__/main_pipeline.cpython-311.pyc
ADDED
Binary file (3.8 kB). View file
|
|
pipeline/__pycache__/qa_pipeline.cpython-311.pyc
ADDED
Binary file (3.02 kB). View file
|
|
pipeline/__pycache__/sql_query.cpython-311.pyc
ADDED
Binary file (3.05 kB). View file
|
|
pipeline/__pycache__/sql_response.cpython-311.pyc
ADDED
Binary file (2.56 kB). View file
|
|
pipeline/chromadb_search.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
from typing import List, Dict, Any
|
3 |
+
from config import Config
|
4 |
+
|
5 |
+
class ChromaDBPipeline:
|
6 |
+
def __init__(self, config: Config):
|
7 |
+
self.client = chromadb.HttpClient(
|
8 |
+
host=config.CHROMADB_HOST,
|
9 |
+
headers={"Authorization": f"Bearer {config.CHROMADB_TOKEN}"}
|
10 |
+
)
|
11 |
+
self.n_results = config.N_RESULTS
|
12 |
+
|
13 |
+
def query(self, embedding: List[float], collection_name: str) -> Dict[str, Any]:
|
14 |
+
"""Search ChromaDB with embedding"""
|
15 |
+
try:
|
16 |
+
collection = self.client.get_collection(
|
17 |
+
name=collection_name,
|
18 |
+
embedding_function=None
|
19 |
+
)
|
20 |
+
return collection.query(
|
21 |
+
query_embeddings=[embedding],
|
22 |
+
n_results=self.n_results
|
23 |
+
)
|
24 |
+
except Exception as e:
|
25 |
+
raise Exception(f"ChromaDB search error: {str(e)}")
|
pipeline/document_processor.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
class DocumentProcessor:
|
4 |
+
@staticmethod
|
5 |
+
def process(results: dict) -> str:
|
6 |
+
"""Process and combine ChromaDB results"""
|
7 |
+
if not results or 'documents' not in results:
|
8 |
+
return ""
|
9 |
+
|
10 |
+
relevant_documents = results['documents']
|
11 |
+
if isinstance(relevant_documents, list):
|
12 |
+
if relevant_documents and isinstance(relevant_documents[0], list):
|
13 |
+
relevant_documents = [item for sublist in relevant_documents for item in sublist]
|
14 |
+
relevant_documents = [str(doc) for doc in relevant_documents]
|
15 |
+
|
16 |
+
return " ".join(relevant_documents)
|
pipeline/embeddings.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.sentence_transformer_util import encode_query
|
2 |
+
|
3 |
+
class EmbeddingPipeline:
|
4 |
+
@staticmethod
|
5 |
+
def process(text: str):
|
6 |
+
"""Convert input text to embedding"""
|
7 |
+
chat = ("pertanyaan", text)
|
8 |
+
return encode_query(chat)
|
pipeline/groq_client.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from typing import Dict
|
3 |
+
from config import Config
|
4 |
+
class GroqPipeline:
|
5 |
+
def __init__(self, config: Config):
|
6 |
+
self.api_url = config.GROQ_API_URL
|
7 |
+
self.headers = {
|
8 |
+
"Content-Type": "application/json",
|
9 |
+
"Authorization": f"Bearer {config.GROQ_API_KEY}"
|
10 |
+
}
|
11 |
+
|
12 |
+
def generate_response(self, context: str, question: str) -> str:
|
13 |
+
"""Generate response using Groq API"""
|
14 |
+
try:
|
15 |
+
data = {
|
16 |
+
"model": "llama-3.1-8b-instant",
|
17 |
+
"messages": [{
|
18 |
+
"role": "user",
|
19 |
+
"content": (f"embedding result: {context}\n"
|
20 |
+
f"Answer the following user question: {question}, "
|
21 |
+
"the name of the robot is AV-826 "
|
22 |
+
"and answer it as a professional CS."
|
23 |
+
"just use embedding result as a fact for the answer and "
|
24 |
+
"if the question is has relation with effecency just add '#table' tag as a regex at the end of your output the db contain data about av-826 testing data and performance such as operation temperature and sensor accuracy"
|
25 |
+
"dont create any fake data, fake table or fake sql code. just summary the embedding result"
|
26 |
+
"if the question doesnt has relation with the robot product give polite feedback")
|
27 |
+
}],
|
28 |
+
"temperature": 0.1
|
29 |
+
}
|
30 |
+
|
31 |
+
response = requests.post(self.api_url, json=data, headers=self.headers)
|
32 |
+
|
33 |
+
if response.status_code == 200:
|
34 |
+
return response.json()['choices'][0]['message']['content']
|
35 |
+
else:
|
36 |
+
raise Exception(f"Groq API error: {response.status_code}")
|
37 |
+
except Exception as e:
|
38 |
+
raise Exception(f"Groq generation error: {str(e)}")
|
pipeline/main_pipeline.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from config import Config
|
2 |
+
from pipeline.embeddings import EmbeddingPipeline
|
3 |
+
from pipeline.chromadb_search import ChromaDBPipeline
|
4 |
+
from pipeline.document_processor import DocumentProcessor
|
5 |
+
from pipeline.groq_client import GroqPipeline
|
6 |
+
from pipeline.sql_query import MYSQL_Generator
|
7 |
+
from pipeline.sql_response import DB_Response_Generator
|
8 |
+
from utils.regex import remove_table_and_text, detect_none_in_text
|
9 |
+
from utils.mysql_util import MySQLDatabase_execute
|
10 |
+
|
11 |
+
class QAPipeline:
|
12 |
+
def __init__(self, config: Config):
|
13 |
+
self.config = config
|
14 |
+
self.embedding_pipeline = EmbeddingPipeline()
|
15 |
+
self.chromadb_pipeline = ChromaDBPipeline(config)
|
16 |
+
self.document_processor = DocumentProcessor()
|
17 |
+
self.groq_pipeline = GroqPipeline(config)
|
18 |
+
self.sql_generator = MYSQL_Generator(config)
|
19 |
+
self.sql_execute = MySQLDatabase_execute(config)
|
20 |
+
self.sql_response = DB_Response_Generator(config)
|
21 |
+
|
22 |
+
def process(self, question: str, collection_name: str):
|
23 |
+
"""Run the complete QA pipeline and return retrieved documents + AI response"""
|
24 |
+
try:
|
25 |
+
if collection_name == "QnA data":
|
26 |
+
collection_name = "test11"
|
27 |
+
elif collection_name == "Semantic Data":
|
28 |
+
collection_name = "test12"
|
29 |
+
|
30 |
+
embedding = self.embedding_pipeline.process(question)
|
31 |
+
|
32 |
+
search_results = self.chromadb_pipeline.query(
|
33 |
+
embedding,
|
34 |
+
collection_name
|
35 |
+
)
|
36 |
+
|
37 |
+
retrieved_text = self.document_processor.process(search_results)
|
38 |
+
|
39 |
+
if not retrieved_text:
|
40 |
+
return "No relevant documents found.", "No relevant information found."
|
41 |
+
|
42 |
+
response = self.groq_pipeline.generate_response(retrieved_text, question)
|
43 |
+
tabledetect = remove_table_and_text(response)
|
44 |
+
if tabledetect == False:
|
45 |
+
return retrieved_text, response
|
46 |
+
else:
|
47 |
+
generateqsl = self.sql_generator.generate_response(question)
|
48 |
+
if generateqsl is not None:
|
49 |
+
sql_execute = self.sql_execute.execute_query(generateqsl)
|
50 |
+
if sql_execute == "None":
|
51 |
+
return retrieved_text, tabledetect
|
52 |
+
else:
|
53 |
+
sql_response = self.sql_response.generate_response(context=question, db_result=sql_execute, sqlcode=generateqsl)
|
54 |
+
final_response = tabledetect + "\n\n --------connected with SQL Agent--------\n\n--------Database analyzing result--------\n\n" + sql_response
|
55 |
+
return retrieved_text, final_response
|
56 |
+
else :
|
57 |
+
return retrieved_text, tabledetect
|
58 |
+
|
59 |
+
except Exception as e:
|
60 |
+
return f"Error retrieving documents: {str(e)}", f"Pipeline error: {str(e)}"
|
pipeline/sql_query.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from config import Config
|
3 |
+
from utils.regex import extract_sql_query
|
4 |
+
|
5 |
+
class MYSQL_Generator:
|
6 |
+
def __init__(self, config: Config):
|
7 |
+
self.api_url = config.GROQ_API_URL
|
8 |
+
self.headers = {
|
9 |
+
"Content-Type": "application/json",
|
10 |
+
"Authorization": f"Bearer {config.GROQ_API_KEY}"
|
11 |
+
}
|
12 |
+
|
13 |
+
def generate_response(self, context: str) -> str:
|
14 |
+
"""Generate SQL query using Groq API"""
|
15 |
+
try:
|
16 |
+
data = {
|
17 |
+
"model": "llama-3.1-8b-instant",
|
18 |
+
"messages": [{
|
19 |
+
"role": "user",
|
20 |
+
"content": (f"question: {context}\n"
|
21 |
+
"the table name is vidavox"
|
22 |
+
"the table is about av826 testing data"
|
23 |
+
"---below is the column table of MySQL db---"
|
24 |
+
"test_id VARCHAR(10) PRIMARY KEY,test_date DATE,model_name VARCHAR(50),noise_level_db DECIMAL(5, 2),cleaning_efficiency_percent DECIMAL(5, 2), battery_duration_minutes INT, area_covered_sqm DECIMAL(10, 2), dust_collection_grams DECIMAL(10, 2), operating_temperature_celsius DECIMAL(5, 2),maintenance_score DECIMAL(5, 2), navigation_accuracy_percent DECIMAL(5, 2),software_version VARCHAR(50)"
|
25 |
+
"---and below is the data example from the DB---"
|
26 |
+
"('TST0000020', '2024-01-07', 'AV826', 56.9, 95.8, 112, 42.41, 87.0, 26.6, 9, 96.9, 'v2.1.0')"
|
27 |
+
"your task is to generate just 1 MySQL code relateed above column and dont create any description to answer user question just use MySQL code "
|
28 |
+
"always limit the data only 10 line "
|
29 |
+
"dont create any fake mysql code"
|
30 |
+
"return none if the question doesn't has relation with the column and example data above"
|
31 |
+
)
|
32 |
+
}],
|
33 |
+
"temperature": 0.1
|
34 |
+
}
|
35 |
+
|
36 |
+
response = requests.post(self.api_url, json=data, headers=self.headers)
|
37 |
+
|
38 |
+
if response.status_code == 200:
|
39 |
+
response_text = response.json()['choices'][0]['message']['content']
|
40 |
+
return extract_sql_query(response_text)
|
41 |
+
else:
|
42 |
+
raise Exception(f"Groq API error: {response.status_code}, {response.text}")
|
43 |
+
|
44 |
+
except Exception as e:
|
45 |
+
return str(e)
|
pipeline/sql_response.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from config import Config
|
3 |
+
|
4 |
+
class DB_Response_Generator:
|
5 |
+
def __init__(self, config: Config):
|
6 |
+
self.api_url = config.GROQ_API_URL
|
7 |
+
self.headers = {
|
8 |
+
"Content-Type": "application/json",
|
9 |
+
"Authorization": f"Bearer {config.GROQ_API_KEY}"
|
10 |
+
}
|
11 |
+
|
12 |
+
def generate_response(self, context: str, db_result:str, sqlcode:str) -> str:
|
13 |
+
"""Generate SQL query using Groq API"""
|
14 |
+
try:
|
15 |
+
data = {
|
16 |
+
"model": "llama-3.1-8b-instant",
|
17 |
+
"messages": [{
|
18 |
+
"role": "user",
|
19 |
+
"content": (f"question: {context}\n"
|
20 |
+
"heres the table column test_id,test_date,model_name,noise_level_db,cleaning_efficiency_percent, battery_duration_minutes INT, area_covered_sqm, dust_collection_grams, operating_temperature_celsius ,maintenance_score, navigation_accuracy_percent ,software_version\n"
|
21 |
+
f"and heres the sql code: {sqlcode} "
|
22 |
+
"below is result of sql query\n"
|
23 |
+
f"{db_result}\n"
|
24 |
+
"make summary and use non technical sentence to explain it into user and use it to answer user question ")
|
25 |
+
}],
|
26 |
+
"temperature": 0.1
|
27 |
+
}
|
28 |
+
|
29 |
+
response = requests.post(self.api_url, json=data, headers=self.headers)
|
30 |
+
|
31 |
+
if response.status_code == 200:
|
32 |
+
response_text = response.json()['choices'][0]['message']['content']
|
33 |
+
return response_text
|
34 |
+
else:
|
35 |
+
raise Exception(f"Groq API error: {response.status_code}, {response.text}")
|
36 |
+
|
37 |
+
except Exception as e:
|
38 |
+
return str(e)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sentence-transformers
|
2 |
+
scikit-learn
|
3 |
+
requests
|
4 |
+
regex
|
5 |
+
chromadb
|
6 |
+
mysql-connector-python
|
utils/__pycache__/mysql_util.cpython-311.pyc
ADDED
Binary file (2.86 kB). View file
|
|
utils/__pycache__/regex.cpython-311.pyc
ADDED
Binary file (1.45 kB). View file
|
|
utils/__pycache__/sentence_transformer_util.cpython-311.pyc
ADDED
Binary file (915 Bytes). View file
|
|
utils/mysql_util.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mysql.connector
|
2 |
+
from config import Config
|
3 |
+
|
4 |
+
class MySQLDatabase_execute:
|
5 |
+
def __init__(self, config: Config):
|
6 |
+
self.config = config
|
7 |
+
self.connection = None
|
8 |
+
|
9 |
+
def connect(self):
|
10 |
+
"""Establish connection to MySQL database"""
|
11 |
+
try:
|
12 |
+
self.connection = mysql.connector.connect(
|
13 |
+
host=self.config.MYSQL_HOST,
|
14 |
+
user=self.config.MYSQL_USER,
|
15 |
+
password=self.config.MYSQL_PASSWORD,
|
16 |
+
database=self.config.MYSQL_DATABASE,
|
17 |
+
port=self.config.MYSQL_PORT
|
18 |
+
)
|
19 |
+
print("Connected to MySQL database successfully.")
|
20 |
+
except mysql.connector.Error as err:
|
21 |
+
print(f"Error: {err}")
|
22 |
+
|
23 |
+
def execute_query(self, query):
|
24 |
+
"""Execute the SQL query and return the results"""
|
25 |
+
try:
|
26 |
+
if self.connection is None:
|
27 |
+
self.connect()
|
28 |
+
|
29 |
+
cursor = self.connection.cursor()
|
30 |
+
cursor.execute(query)
|
31 |
+
results = cursor.fetchall()
|
32 |
+
cursor.close()
|
33 |
+
return results
|
34 |
+
|
35 |
+
except mysql.connector.Error as err:
|
36 |
+
print(f"Query Error: {err}")
|
37 |
+
return None
|
38 |
+
|
39 |
+
def close(self):
|
40 |
+
"""Close the database connection"""
|
41 |
+
if self.connection:
|
42 |
+
self.connection.close()
|
43 |
+
print("Database connection closed.")
|
utils/regex.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def remove_table_and_text(text):
|
4 |
+
pattern = r"#table.*"
|
5 |
+
if not re.search(pattern, text, re.DOTALL):
|
6 |
+
return False
|
7 |
+
cleaned_text = re.sub(pattern, "", text, flags=re.DOTALL).strip()
|
8 |
+
return cleaned_text
|
9 |
+
|
10 |
+
|
11 |
+
def extract_sql_query(text):
|
12 |
+
"""Extract and clean up SQL query from the response text"""
|
13 |
+
match = re.search(r'```sql\n(.*?)```', text, re.DOTALL)
|
14 |
+
if match:
|
15 |
+
return match.group(1).strip()
|
16 |
+
return None
|
17 |
+
import re
|
18 |
+
|
19 |
+
def detect_none_in_text(text: str) -> bool:
|
20 |
+
if not text:
|
21 |
+
return False
|
22 |
+
|
23 |
+
pattern = r"None"
|
24 |
+
return bool(re.search(pattern, text))
|
utils/sentence_transformer_util.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
|
3 |
+
model = SentenceTransformer('LazarusNLP/all-indo-e5-small-v4')
|
4 |
+
|
5 |
+
def encode_query(query: str):
|
6 |
+
"""Encodes a query into an embedding vector using SentenceTransformer."""
|
7 |
+
try:
|
8 |
+
return model.encode([query])[0]
|
9 |
+
except Exception as e:
|
10 |
+
raise Exception(f"Error encoding query: {str(e)}")
|