Spaces:

madhiemw
/

Muhammad_Adhiem_Wicaksana_vidavox_technical_test

Sleeping

App Files Files Community

madhiemw commited on 15 days ago

Commit

aa2d91f

verified ·

1 Parent(s): 0444545

Upload 25 files

Browse files

Files changed (25) hide show

app.py +32 -0
config.py +12 -0
pipeline/__pycache__/chromadb_search.cpython-311.pyc +0 -0
pipeline/__pycache__/document_processor.cpython-311.pyc +0 -0
pipeline/__pycache__/embeddings.cpython-311.pyc +0 -0
pipeline/__pycache__/generate_sql.cpython-311.pyc +0 -0
pipeline/__pycache__/groq_client.cpython-311.pyc +0 -0
pipeline/__pycache__/main_pipeline.cpython-311.pyc +0 -0
pipeline/__pycache__/qa_pipeline.cpython-311.pyc +0 -0
pipeline/__pycache__/sql_query.cpython-311.pyc +0 -0
pipeline/__pycache__/sql_response.cpython-311.pyc +0 -0
pipeline/chromadb_search.py +25 -0
pipeline/document_processor.py +16 -0
pipeline/embeddings.py +8 -0
pipeline/groq_client.py +38 -0
pipeline/main_pipeline.py +60 -0
pipeline/sql_query.py +45 -0
pipeline/sql_response.py +38 -0
requirements.txt +6 -0
utils/__pycache__/mysql_util.cpython-311.pyc +0 -0
utils/__pycache__/regex.cpython-311.pyc +0 -0
utils/__pycache__/sentence_transformer_util.cpython-311.pyc +0 -0
utils/mysql_util.py +43 -0
utils/regex.py +24 -0
utils/sentence_transformer_util.py +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gradio as gr
+from config import Config
+from pipeline.main_pipeline import QAPipeline
+from typing import List
+def create_app():
+    config = Config()
+    pipeline = QAPipeline(config)
+    def process_message(message: str, collection_name: str, history: List[List[str]]):
+        retrieved_docs, response = pipeline.process(message, collection_name)
+        return response, retrieved_docs
+    interface = gr.Interface(
+        fn=process_message,
+        inputs=[
+            gr.Textbox(label="Your Question"),
+            gr.Dropdown(choices=["QnA data", "Semantic Data"], label="Select Collection", value="Semantic Data")
+        ],
+        outputs=[
+            gr.Textbox(label="AI Response"),
+            gr.Textbox(label="Retrieved Documents from ChromaDB"),
+        ],
+        title="Muhammad Adhiem Wicaksana Vidavox Technical Test",
+        description="Ask questions and choose a crhoma db collection, read my documentation for the purpose of the collection :)",
+    )
+    return interface
+if __name__ == "__main__":
+    app = create_app()
+    app.launch()

config.py ADDED Viewed

	@@ -0,0 +1,12 @@

+class Config:
+    CHROMADB_HOST = "https://chromadb-production-7af5.up.railway.app"
+    CHROMADB_TOKEN = "w587yz1cvfk8tcbf21taesnxmdt2rcsm"
+    GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+    GROQ_API_KEY = "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"
+    DEFAULT_COLLECTION = "test11"
+    N_RESULTS = 5
+    MYSQL_HOST = "junction.proxy.rlwy.net"
+    MYSQL_USER = "root"
+    MYSQL_PASSWORD = "YtxIHzEUaRIJyNKaecBOTylHKGbbEitk"
+    MYSQL_DATABASE = "railway"
+    MYSQL_PORT = 54826

pipeline/__pycache__/chromadb_search.cpython-311.pyc ADDED Viewed

Binary file (1.92 kB). View file

pipeline/__pycache__/document_processor.cpython-311.pyc ADDED Viewed

Binary file (1.6 kB). View file

pipeline/__pycache__/embeddings.cpython-311.pyc ADDED Viewed

Binary file (829 Bytes). View file

pipeline/__pycache__/generate_sql.cpython-311.pyc ADDED Viewed

Binary file (2.89 kB). View file

pipeline/__pycache__/groq_client.cpython-311.pyc ADDED Viewed

Binary file (2.67 kB). View file

pipeline/__pycache__/main_pipeline.cpython-311.pyc ADDED Viewed

Binary file (3.8 kB). View file

pipeline/__pycache__/qa_pipeline.cpython-311.pyc ADDED Viewed

Binary file (3.02 kB). View file

pipeline/__pycache__/sql_query.cpython-311.pyc ADDED Viewed

Binary file (3.05 kB). View file

pipeline/__pycache__/sql_response.cpython-311.pyc ADDED Viewed

Binary file (2.56 kB). View file

pipeline/chromadb_search.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import chromadb
+from typing import List, Dict, Any
+from config import Config
+class ChromaDBPipeline:
+    def __init__(self, config: Config):
+        self.client = chromadb.HttpClient(
+            host=config.CHROMADB_HOST,
+            headers={"Authorization": f"Bearer {config.CHROMADB_TOKEN}"}
+        )
+        self.n_results = config.N_RESULTS
+    def query(self, embedding: List[float], collection_name: str) -> Dict[str, Any]:
+        """Search ChromaDB with embedding"""
+        try:
+            collection = self.client.get_collection(
+                name=collection_name,
+                embedding_function=None
+            )
+            return collection.query(
+                query_embeddings=[embedding],
+                n_results=self.n_results
+            )
+        except Exception as e:
+            raise Exception(f"ChromaDB search error: {str(e)}")

pipeline/document_processor.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import List
+class DocumentProcessor:
+    @staticmethod
+    def process(results: dict) -> str:
+        """Process and combine ChromaDB results"""
+        if not results or 'documents' not in results:
+            return ""
+        relevant_documents = results['documents']
+        if isinstance(relevant_documents, list):
+            if relevant_documents and isinstance(relevant_documents[0], list):
+                relevant_documents = [item for sublist in relevant_documents for item in sublist]
+            relevant_documents = [str(doc) for doc in relevant_documents]
+        return " ".join(relevant_documents)

pipeline/embeddings.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from utils.sentence_transformer_util import encode_query
+class EmbeddingPipeline:
+    @staticmethod
+    def process(text: str):
+        """Convert input text to embedding"""
+        chat = ("pertanyaan", text)
+        return encode_query(chat)

pipeline/groq_client.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import requests
+from typing import Dict
+from config import Config
+class GroqPipeline:
+    def __init__(self, config: Config):
+        self.api_url = config.GROQ_API_URL
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {config.GROQ_API_KEY}"
+        }
+    def generate_response(self, context: str, question: str) -> str:
+        """Generate response using Groq API"""
+        try:
+            data = {
+                "model": "llama-3.1-8b-instant",
+                "messages": [{
+                    "role": "user",
+                    "content": (f"embedding result: {context}\n"
+                              f"Answer the following user question: {question}, "
+                              "the name of the robot is AV-826 "
+                              "and answer it as a professional CS."
+                              "just use embedding result as a fact for the answer and "
+                              "if the question is has relation with effecency just add '#table' tag as a regex at the end of your output the db contain data about av-826 testing data and performance such as operation temperature and sensor accuracy"
+                              "dont create any fake data, fake table or fake sql code. just summary the embedding result"
+                              "if the question doesnt has relation with the robot product give polite feedback")
+                }],
+                "temperature": 0.1
+            }
+            response = requests.post(self.api_url, json=data, headers=self.headers)
+            if response.status_code == 200:
+                return response.json()['choices'][0]['message']['content']
+            else:
+                raise Exception(f"Groq API error: {response.status_code}")
+        except Exception as e:
+            raise Exception(f"Groq generation error: {str(e)}")

pipeline/main_pipeline.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from config import Config
+from pipeline.embeddings import EmbeddingPipeline
+from pipeline.chromadb_search import ChromaDBPipeline
+from pipeline.document_processor import DocumentProcessor
+from pipeline.groq_client import GroqPipeline
+from pipeline.sql_query import MYSQL_Generator
+from pipeline.sql_response import DB_Response_Generator
+from utils.regex import remove_table_and_text, detect_none_in_text
+from utils.mysql_util import MySQLDatabase_execute
+class QAPipeline:
+    def __init__(self, config: Config):
+        self.config = config
+        self.embedding_pipeline = EmbeddingPipeline()
+        self.chromadb_pipeline = ChromaDBPipeline(config)
+        self.document_processor = DocumentProcessor()
+        self.groq_pipeline = GroqPipeline(config)
+        self.sql_generator = MYSQL_Generator(config)
+        self.sql_execute = MySQLDatabase_execute(config)
+        self.sql_response = DB_Response_Generator(config)
+    def process(self, question: str, collection_name: str):
+        """Run the complete QA pipeline and return retrieved documents + AI response"""
+        try:
+            if collection_name == "QnA data":
+                    collection_name = "test11"
+            elif collection_name == "Semantic Data":
+                    collection_name = "test12"
+            embedding = self.embedding_pipeline.process(question)
+            search_results = self.chromadb_pipeline.query(
+                embedding,
+                collection_name
+            )
+            retrieved_text = self.document_processor.process(search_results)
+            if not retrieved_text:
+                return "No relevant documents found.", "No relevant information found."
+            response = self.groq_pipeline.generate_response(retrieved_text, question)
+            tabledetect = remove_table_and_text(response)
+            if tabledetect == False:
+                return retrieved_text, response
+            else:
+                generateqsl = self.sql_generator.generate_response(question)
+                if generateqsl is not None:
+                    sql_execute = self.sql_execute.execute_query(generateqsl)
+                    if sql_execute == "None":
+                         return retrieved_text, tabledetect
+                    else:
+                        sql_response = self.sql_response.generate_response(context=question, db_result=sql_execute, sqlcode=generateqsl)
+                        final_response = tabledetect + "\n\n --------connected with SQL Agent--------\n\n--------Database analyzing result--------\n\n" + sql_response
+                    return retrieved_text, final_response
+                else :
+                    return retrieved_text, tabledetect
+        except Exception as e:
+            return f"Error retrieving documents: {str(e)}", f"Pipeline error: {str(e)}"

pipeline/sql_query.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import requests
+from config import Config
+from utils.regex import extract_sql_query
+class MYSQL_Generator:
+    def __init__(self, config: Config):
+        self.api_url = config.GROQ_API_URL
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {config.GROQ_API_KEY}"
+        }
+    def generate_response(self, context: str) -> str:
+        """Generate SQL query using Groq API"""
+        try:
+            data = {
+                "model": "llama-3.1-8b-instant",
+                "messages": [{
+                    "role": "user",
+                    "content": (f"question: {context}\n"
+                                "the table name is vidavox"
+                                "the table is about av826 testing data"
+                                "---below is the column table of MySQL db---"
+                                "test_id VARCHAR(10) PRIMARY KEY,test_date DATE,model_name VARCHAR(50),noise_level_db DECIMAL(5, 2),cleaning_efficiency_percent DECIMAL(5, 2), battery_duration_minutes INT, area_covered_sqm DECIMAL(10, 2), dust_collection_grams DECIMAL(10, 2), operating_temperature_celsius DECIMAL(5, 2),maintenance_score DECIMAL(5, 2), navigation_accuracy_percent DECIMAL(5, 2),software_version VARCHAR(50)"
+                                "---and below is the data example from the DB---"
+                                "('TST0000020', '2024-01-07', 'AV826', 56.9, 95.8, 112, 42.41, 87.0, 26.6, 9, 96.9, 'v2.1.0')"
+                                "your task is to generate just 1 MySQL code relateed above column and dont create any description to answer user question just use MySQL code "
+                                "always limit the data only 10 line "
+                                "dont create any fake mysql code"
+                                "return none if the question doesn't has relation with the column and example data above"
+                                )
+                }],
+                "temperature": 0.1
+            }
+            response = requests.post(self.api_url, json=data, headers=self.headers)
+            if response.status_code == 200:
+                response_text = response.json()['choices'][0]['message']['content']
+                return extract_sql_query(response_text)
+            else:
+                raise Exception(f"Groq API error: {response.status_code}, {response.text}")
+        except Exception as e:
+            return str(e)

pipeline/sql_response.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import requests
+from config import Config
+class DB_Response_Generator:
+    def __init__(self, config: Config):
+        self.api_url = config.GROQ_API_URL
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {config.GROQ_API_KEY}"
+        }
+    def generate_response(self, context: str, db_result:str, sqlcode:str) -> str:
+        """Generate SQL query using Groq API"""
+        try:
+            data = {
+                "model": "llama-3.1-8b-instant",
+                "messages": [{
+                    "role": "user",
+                    "content": (f"question: {context}\n"
+                                "heres the table column test_id,test_date,model_name,noise_level_db,cleaning_efficiency_percent, battery_duration_minutes INT, area_covered_sqm, dust_collection_grams, operating_temperature_celsius ,maintenance_score, navigation_accuracy_percent ,software_version\n"
+                                f"and heres the sql code: {sqlcode} "
+                                "below is result of sql query\n"
+                                f"{db_result}\n"
+                                "make summary and use non technical sentence to explain it into user and use it to answer user question ")
+                }],
+                "temperature": 0.1
+            }
+            response = requests.post(self.api_url, json=data, headers=self.headers)
+            if response.status_code == 200:
+                response_text = response.json()['choices'][0]['message']['content']
+                return response_text
+            else:
+                raise Exception(f"Groq API error: {response.status_code}, {response.text}")
+        except Exception as e:
+            return str(e)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+sentence-transformers
+scikit-learn
+requests
+regex
+chromadb
+mysql-connector-python

utils/__pycache__/mysql_util.cpython-311.pyc ADDED Viewed

Binary file (2.86 kB). View file

utils/__pycache__/regex.cpython-311.pyc ADDED Viewed

Binary file (1.45 kB). View file

utils/__pycache__/sentence_transformer_util.cpython-311.pyc ADDED Viewed

Binary file (915 Bytes). View file

utils/mysql_util.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import mysql.connector
+from config import Config
+class MySQLDatabase_execute:
+    def __init__(self, config: Config):
+        self.config = config
+        self.connection = None
+    def connect(self):
+        """Establish connection to MySQL database"""
+        try:
+            self.connection = mysql.connector.connect(
+                host=self.config.MYSQL_HOST,
+                user=self.config.MYSQL_USER,
+                password=self.config.MYSQL_PASSWORD,
+                database=self.config.MYSQL_DATABASE,
+                port=self.config.MYSQL_PORT
+            )
+            print("Connected to MySQL database successfully.")
+        except mysql.connector.Error as err:
+            print(f"Error: {err}")
+    def execute_query(self, query):
+        """Execute the SQL query and return the results"""
+        try:
+            if self.connection is None:
+                self.connect()
+            cursor = self.connection.cursor()
+            cursor.execute(query)
+            results = cursor.fetchall()
+            cursor.close()
+            return results
+        except mysql.connector.Error as err:
+            print(f"Query Error: {err}")
+            return None
+    def close(self):
+        """Close the database connection"""
+        if self.connection:
+            self.connection.close()
+            print("Database connection closed.")

utils/regex.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import re
+def remove_table_and_text(text):
+    pattern = r"#table.*"
+    if not re.search(pattern, text, re.DOTALL):
+        return False
+    cleaned_text = re.sub(pattern, "", text, flags=re.DOTALL).strip()
+    return cleaned_text
+def extract_sql_query(text):
+    """Extract and clean up SQL query from the response text"""
+    match = re.search(r'```sql\n(.*?)```', text, re.DOTALL)
+    if match:
+            return match.group(1).strip()
+    return None
+import re
+def detect_none_in_text(text: str) -> bool:
+    if not text:
+        return False
+    pattern = r"None"
+    return bool(re.search(pattern, text))

utils/sentence_transformer_util.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer('LazarusNLP/all-indo-e5-small-v4')
+def encode_query(query: str):
+    """Encodes a query into an embedding vector using SentenceTransformer."""
+    try:
+        return model.encode([query])[0]
+    except Exception as e:
+        raise Exception(f"Error encoding query: {str(e)}")