Spaces:

avnishkanungo
/

AudioNLtoSQL

Sleeping

App Files Files Community

avnishkanungo commited on Jul 6, 2024

Commit

7c30d23

verified ·

1 Parent(s): 6bdf49f

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

NLToSQL.py +1 -0
NLToSQL_test.ipynb +6 -6
app.py +35 -20
gradio_demo.ipynb +529 -16
requirements.txt +276 -16

NLToSQL.py CHANGED Viewed

@@ -226,6 +226,7 @@ def install_libportaudio():
 if __name__ == '__main__':
     parser = ArgumentParser()
     parser.add_argument('--example_path', type=str, default=os.getcwd()+"/few_shot_samples.json")
     parser.add_argument('--desc_path', type=str, default=os.getcwd()+"/database_table_descriptions.csv")

 if __name__ == '__main__':
+    # Please configure your DB credentials and paths of the files for few shot learning and fine tuning
     parser = ArgumentParser()
     parser.add_argument('--example_path', type=str, default=os.getcwd()+"/few_shot_samples.json")
     parser.add_argument('--desc_path', type=str, default=os.getcwd()+"/database_table_descriptions.csv")

NLToSQL_test.ipynb CHANGED Viewed

@@ -317,14 +317,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(5,)\n"
      ]
     }
    ],
@@ -332,13 +332,13 @@
     "import mysql.connector\n",
     "\n",
     "# Create a connection to the MySQL server\n",
-    "conn = mysql.connector.connect(user='root', password = '' ,host='localhost', database='classicmodels')\n",
     "\n",
     "# Create a cursor to interact with the MySQL server\n",
     "cursor = conn.cursor()\n",
     "\n",
     "# Execute the SELECT query\n",
-    "cursor.execute(\"SELECT COUNT(*) AS numberOfEmployees FROM employees e JOIN offices o ON e.officeCode = o.officeCode WHERE o.city = 'Paris'\")\n",
     "\n",
     "# Fetch all the results\n",
     "records = cursor.fetchall()\n",
@@ -361,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -547,7 +547,7 @@
     "import os\n",
     "\n",
     "db_user = \"root\"\n",
-    "db_password = \"root\"\n",
     "db_host = \"localhost\"\n",
     "db_name = \"classicmodels\"\n",
     "from langchain_community.utilities.sql_database import SQLDatabase\n",

   },
   {
    "cell_type": "code",
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "(23,)\n"
      ]
     }
    ],
     "import mysql.connector\n",
     "\n",
     "# Create a connection to the MySQL server\n",
+    "conn = mysql.connector.connect(user='admin', password = 'avnishk96' ,host='demo-db.cdm44iseol25.us-east-1.rds.amazonaws.com', database='classicmodels')\n",
     "\n",
     "# Create a cursor to interact with the MySQL server\n",
     "cursor = conn.cursor()\n",
     "\n",
     "# Execute the SELECT query\n",
+    "cursor.execute(\"SELECT COUNT(*) AS numberOfEmployees FROM employees;\")\n",
     "\n",
     "# Fetch all the results\n",
     "records = cursor.fetchall()\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
     "import os\n",
     "\n",
     "db_user = \"root\"\n",
+    "db_password = \"\"\n",
     "db_host = \"localhost\"\n",
     "db_name = \"classicmodels\"\n",
     "from langchain_community.utilities.sql_database import SQLDatabase\n",

app.py CHANGED Viewed

@@ -226,9 +226,9 @@ def sql_translator(filepath, key):
         return True
-    db_user = "root"
-    db_password = ""
-    db_host = "localhost"
     db_name = "classicmodels"
     db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}")
@@ -265,7 +265,12 @@ def sql_translator(filepath, key):
     else:
         print("libportaudio installation failed. Please install it manually.")
-    sql_query = transcribe_speech(filepath)
     chain = (
     RunnablePassthrough.assign(table_names_to_use=select_table(os.getcwd()+"/database_table_descriptions.csv")) |
     RunnablePassthrough.assign(query=generate_query).assign(
@@ -282,25 +287,35 @@ def sql_translator(filepath, key):
 def create_interface():
-    with gr.Blocks() as interface:
-        gr.Markdown("## Audio and Text Processing Interface")
-        # Text input component
-        text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text")
-        # Audio input component
-        audio_input = gr.Audio(sources="microphone", type="filepath", label="Record or Upload Audio")
-        # Button to trigger processing
-        process_button = gr.Button("Process")
-        # Output component
-        output_text = gr.Textbox(label="Output")
-        # Define the action for the button click
-        process_button.click(fn=sql_translator, inputs=[audio_input, text_input], outputs=output_text)
-    return interface
 if __name__ == "__main__":

         return True
+    db_user = "admin"
+    db_password = "avnishk96"
+    db_host = "demo-db.cdm44iseol25.us-east-1.rds.amazonaws.com"
     db_name = "classicmodels"
     db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}")
     else:
         print("libportaudio installation failed. Please install it manually.")
+    if os.path.isfile(filepath):
+        sql_query = transcribe_speech(filepath)
+    else:
+        sql_query = filepath
+    # sql_query = transcribe_speech(filepath)
     chain = (
     RunnablePassthrough.assign(table_names_to_use=select_table(os.getcwd()+"/database_table_descriptions.csv")) |
     RunnablePassthrough.assign(query=generate_query).assign(
 def create_interface():
+    demo = gr.Blocks()
+    mic_transcribe = gr.Interface(
+        fn=sql_translator,
+        # key_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Open AI Key"),
+        # audio_input = gr.Audio(sources="microphone", type="filepath"),
+        inputs = [gr.Audio(sources="microphone", type="filepath"),gr.Textbox(lines=2, placeholder="Enter text here...", label="Open AI Key")],
+        outputs=gr.components.Textbox(),
+    )
+    file_transcribe = gr.Interface(
+        fn=sql_translator,
+        # key_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Open AI Key"),
+        # query_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text..."),
+        inputs = [gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text...") ,gr.Textbox(lines=2, placeholder="Enter text here...", label="Open AI Key")],
+        # inputs=gr.Audio(sources="upload", type="filepath"),
+        outputs=gr.components.Textbox(),
+    )
+    with demo:
+        gr.TabbedInterface(
+            [mic_transcribe, file_transcribe],
+            ["Transcribe Microphone", "Transcribe Audio File"],
+        )
+    demo.launch(debug=True)
+    # return interface
 if __name__ == "__main__":

gradio_demo.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 71,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
    "metadata": {},
    "outputs": [
     {
@@ -63,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -105,7 +105,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -156,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -177,7 +177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -210,7 +210,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -258,11 +258,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
    "metadata": {},
    "outputs": [],
    "source": [
     "def sql_translator(filepath,key):\n",
     "    db_user = \"root\"\n",
     "    db_password = \"\"\n",
     "    db_host = \"localhost\"\n",
@@ -302,7 +303,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -311,10 +312,19 @@
     "        gr.Markdown(\"## Audio and Text Processing Interface\")\n",
     "\n",
     "        # Text input component\n",
-    "        text_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text\")\n",
     "        \n",
     "        # Audio input component\n",
     "        audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\", label=\"Record or Upload Audio\")\n",
     "        \n",
     "        # Button to trigger processing\n",
     "        process_button = gr.Button(\"Process\")\n",
@@ -328,15 +338,31 @@
     "    return interface"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<div><iframe src=\"https://dc05dcb51237bf845d.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -345,27 +371,514 @@
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "data": {
       "text/plain": []
      },
-     "execution_count": 81,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "interface = create_interface()\n",
-    "interface.launch(share=True)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
    "metadata": {},
    "outputs": [],
    "source": [
-    "!gradio deploy"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
     "def sql_translator(filepath,key):\n",
+    "    # Please configure your DB credentials and paths of the files for few shot learning and fine tuning\n",
     "    db_user = \"root\"\n",
     "    db_password = \"\"\n",
     "    db_host = \"localhost\"\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
     "        gr.Markdown(\"## Audio and Text Processing Interface\")\n",
     "\n",
     "        # Text input component\n",
+    "        text_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\")\n",
     "        \n",
     "        # Audio input component\n",
     "        audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\", label=\"Record or Upload Audio\")\n",
+    "\n",
+    "        # with gr.TabbedInterface([\"Audio Input\", \"Text Input\"]) as tabs:\n",
+    "        #     with gr.Tab(\"Audio Input\"):\n",
+    "        #         audio_input = gr.Audio(source=\"microphone\", type=\"filepath\", label=\"Record or Upload Audio\")\n",
+    "        #         selected_input = audio_input  # Reference to the selected input component\n",
+    "\n",
+    "        #     with gr.Tab(\"Text Input\"):\n",
+    "        #         query_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text\")\n",
+    "        #         selected_input = query_input  # Reference to the selected input component\n",
     "        \n",
     "        # Button to trigger processing\n",
     "        process_button = gr.Button(\"Process\")\n",
     "    return interface"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Just Audio UI"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
      "metadata": {},
      "output_type": "display_data"
     },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Keyboard interruption in main thread... closing server.\n"
+     ]
+    },
     {
      "data": {
       "text/plain": []
      },
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "interface = create_interface()\n",
+    "interface.launch(debug=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Audio and Text UI(Version 1)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
+    "demo = gr.Blocks()\n",
+    "\n",
+    "# key_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\")\n",
+    "# audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\")\n",
+    "# query_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text...\")\n",
+    "\n",
+    "mic_transcribe = gr.Interface(\n",
+    "    fn=sql_translator,\n",
+    "    # key_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\"),\n",
+    "    # audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\"),\n",
+    "    inputs = [gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\"), gr.Audio(sources=\"microphone\", type=\"filepath\")],\n",
+    "    outputs=gr.components.Textbox(),\n",
+    ")\n",
+    "\n",
+    "file_transcribe = gr.Interface(\n",
+    "    fn=sql_translator,\n",
+    "    # key_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\"),\n",
+    "    # query_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text...\"),\n",
+    "    inputs = [gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\"), gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text...\")],\n",
+    "    # inputs=gr.Audio(sources=\"upload\", type=\"filepath\"),\n",
+    "    outputs=gr.components.Textbox(),\n",
+    ")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Keyboard interruption in main thread... closing server.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with demo:\n",
+    "    gr.TabbedInterface(\n",
+    "        [mic_transcribe, file_transcribe],\n",
+    "        [\"Transcribe Microphone\", \"Transcribe Audio File\"],\n",
+    "    )\n",
+    "\n",
+    "demo.launch(debug=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Audio and Text UI (Version 2)\n",
+    "## Run all the 3 cells below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_new_interface(translate_function):\n",
+    "    demo = gr.Blocks()\n",
+    "    \n",
+    "    # demo = gr.Blocks()\n",
+    "\n",
+    "# key_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\")\n",
+    "# audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\")\n",
+    "# query_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text...\")\n",
+    "\n",
+    "    mic_transcribe = gr.Interface(\n",
+    "        fn=translate_function,\n",
+    "        # key_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\"),\n",
+    "        # audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\"),\n",
+    "        inputs = [gr.Audio(sources=\"microphone\", type=\"filepath\"),gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\")],\n",
+    "        outputs=gr.components.Textbox(),\n",
+    "    )\n",
+    "\n",
+    "    file_transcribe = gr.Interface(\n",
+    "        fn=translate_function,\n",
+    "        # key_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\"),\n",
+    "        # query_input = gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text...\"),\n",
+    "        inputs = [gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Input Text...\") ,gr.Textbox(lines=2, placeholder=\"Enter text here...\", label=\"Open AI Key\")],\n",
+    "        # inputs=gr.Audio(sources=\"upload\", type=\"filepath\"),\n",
+    "        outputs=gr.components.Textbox(),\n",
+    "    )\n",
+    "\n",
+    "    with demo:\n",
+    "        gr.TabbedInterface(\n",
+    "            [mic_transcribe, file_transcribe],\n",
+    "            [\"Transcribe Microphone\", \"Transcribe Audio File\"],\n",
+    "        )\n",
+    "    \n",
+    "    demo.launch(debug=True)\n",
+    "\n",
+    "    # return interface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sql_translator1(filepath, key):    \n",
+    "    def select_table(desc_path):\n",
+    "        def get_table_details():\n",
+    "            # Read the CSV file into a DataFrame\n",
+    "            table_description = pd.read_csv(desc_path) ##\"/teamspace/studios/this_studio/database_table_descriptions.csv\"\n",
+    "            table_docs = []\n",
+    "\n",
+    "            # Iterate over the DataFrame rows to create Document objects\n",
+    "            table_details = \"\"\n",
+    "            for index, row in table_description.iterrows():\n",
+    "                table_details = table_details + \"Table Name:\" + row['Table'] + \"\\n\" + \"Table Description:\" + row['Description'] + \"\\n\\n\"\n",
+    "\n",
+    "            return table_details\n",
+    "        \n",
+    "        class Table(BaseModel):\n",
+    "            \"\"\"Table in SQL database.\"\"\"\n",
+    "\n",
+    "            name: str = Field(description=\"Name of table in SQL database.\")\n",
+    "        \n",
+    "        table_details_prompt = f\"\"\"Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \\\n",
+    "                The tables are:\n",
+    "\n",
+    "                {get_table_details()}\n",
+    "\n",
+    "                Remember to include ALL POTENTIALLY RELEVANT tables, even if you're not sure that they're needed.\"\"\"\n",
+    "\n",
+    "        table_chain = create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt)\n",
+    "        \n",
+    "        def get_tables(tables: List[Table]) -> List[str]:\n",
+    "            tables  = [table.name for table in tables]\n",
+    "            return tables\n",
+    "\n",
+    "        select_table = {\"input\": itemgetter(\"question\")} | create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt) | get_tables\n",
+    "\n",
+    "        return select_table\n",
+    "\n",
+    "\n",
+    "    def prompt_creation(example_path):\n",
+    "\n",
+    "        with open(example_path, 'r') as file: ##'/teamspace/studios/this_studio/few_shot_samples.json'\n",
+    "            data = json.load(file)\n",
+    "\n",
+    "        examples = data[\"examples\"]\n",
+    "\n",
+    "        example_prompt = ChatPromptTemplate.from_messages(\n",
+    "            [\n",
+    "                (\"human\", \"{input}\\nSQLQuery:\"),\n",
+    "                (\"ai\", \"{query}\"),\n",
+    "            ]\n",
+    "        )\n",
+    "\n",
+    "        vectorstore = Chroma()\n",
+    "        vectorstore.delete_collection()\n",
+    "        example_selector = SemanticSimilarityExampleSelector.from_examples(\n",
+    "            examples,\n",
+    "            OpenAIEmbeddings(),\n",
+    "            vectorstore,\n",
+    "            k=2,\n",
+    "            input_keys=[\"input\"],\n",
+    "        )\n",
+    "        \n",
+    "        few_shot_prompt = FewShotChatMessagePromptTemplate(\n",
+    "            example_prompt=example_prompt,\n",
+    "            example_selector=example_selector,\n",
+    "            input_variables=[\"input\",\"top_k\"],\n",
+    "        )\n",
+    "\n",
+    "\n",
+    "        final_prompt = ChatPromptTemplate.from_messages(\n",
+    "            [\n",
+    "                (\"system\", \"You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\\n\\nHere is the relevant table info: {table_info}\\n\\nBelow are a number of examples of questions and their corresponding SQL queries.\"),\n",
+    "                few_shot_prompt,\n",
+    "                MessagesPlaceholder(variable_name=\"messages\"),\n",
+    "                (\"human\", \"{input}\"),\n",
+    "            ]\n",
+    "        )\n",
+    "\n",
+    "        print(few_shot_prompt.format(input=\"How many products are there?\"))\n",
+    "            \n",
+    "        return final_prompt\n",
+    "\n",
+    "    def rephrase_answer():\n",
+    "        answer_prompt = PromptTemplate.from_template(\n",
+    "        \"\"\"Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n",
+    "\n",
+    "            Question: {question}\n",
+    "            SQL Query: {query}\n",
+    "            SQL Result: {result}\n",
+    "            Answer: \"\"\"\n",
+    "        )\n",
+    "\n",
+    "        rephrase_answer = answer_prompt | llm | StrOutputParser()\n",
+    "\n",
+    "        return rephrase_answer\n",
+    "\n",
+    "\n",
+    "    def is_ffmpeg_installed():\n",
+    "        try:\n",
+    "            # Run `ffmpeg -version` to check if ffmpeg is installed\n",
+    "            subprocess.run(['ffmpeg', '-version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            return True\n",
+    "        except (subprocess.CalledProcessError, FileNotFoundError):\n",
+    "            return False\n",
+    "\n",
+    "    def install_ffmpeg():\n",
+    "        try:\n",
+    "            if sys.platform.startswith('linux'):\n",
+    "                subprocess.run(['sudo', 'apt-get', 'update'], check=True)\n",
+    "                subprocess.run(['sudo', 'apt-get', 'install', '-y', 'ffmpeg'], check=True)\n",
+    "            elif sys.platform == 'darwin':  # macOS\n",
+    "                subprocess.run(['/bin/bash', '-c', 'brew install ffmpeg'], check=True)\n",
+    "            elif sys.platform == 'win32':\n",
+    "                print(\"Please download ffmpeg from https://ffmpeg.org/download.html and install it manually.\")\n",
+    "                return False\n",
+    "            else:\n",
+    "                print(\"Unsupported OS. Please install ffmpeg manually.\")\n",
+    "                return False\n",
+    "        except subprocess.CalledProcessError as e:\n",
+    "            print(f\"Failed to install ffmpeg: {e}\")\n",
+    "            return False\n",
+    "        return True\n",
+    "\n",
+    "    def transcribe_speech(filepath):\n",
+    "            output = pipe(\n",
+    "                filepath,\n",
+    "                max_new_tokens=256,\n",
+    "                generate_kwargs={\n",
+    "                    \"task\": \"transcribe\",\n",
+    "                    \"language\": \"english\",\n",
+    "                },  # update with the language you've fine-tuned on\n",
+    "                chunk_length_s=30,\n",
+    "                batch_size=8,\n",
+    "            )\n",
+    "            return output[\"text\"]\n",
+    "        \n",
+    "    # def record_command():\n",
+    "    #         sample_rate = 16000  # Sample rate in Hz\n",
+    "    #         duration = 8  # Duration in seconds\n",
+    "\n",
+    "    #         print(\"Recording...\")\n",
+    "\n",
+    "    #         # Record audio\n",
+    "    #         audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1, dtype='float32')\n",
+    "    #         sd.wait()  # Wait until recording is finished\n",
+    "\n",
+    "    #         print(\"Recording finished\")\n",
+    "\n",
+    "    #         # Convert the audio to a binary stream and save it to a variable\n",
+    "    #         audio_buffer = io.BytesIO()\n",
+    "    #         soundfile.write(audio_buffer, audio, sample_rate, format='WAV')\n",
+    "    #         audio_buffer.seek(0)  # Reset buffer position to the beginning\n",
+    "\n",
+    "    #         # The audio file is now saved in audio_buffer\n",
+    "    #         # You can read it again using soundfile or any other audio library\n",
+    "    #         audio_data, sample_rate = soundfile.read(audio_buffer)\n",
+    "\n",
+    "    #         # Optional: Save the audio to a file for verification\n",
+    "    #         # with open('recorded_audio.wav', 'wb') as f:\n",
+    "    #         #     f.write(audio_buffer.getbuffer())\n",
+    "\n",
+    "    #         print(\"Audio saved to variable\")\n",
+    "    #         return audio_data\n",
+    "    \n",
+    "    def check_libportaudio_installed():\n",
+    "        try:\n",
+    "            # Run `ffmpeg -version` to check if ffmpeg is installed\n",
+    "            subprocess.run(['libportaudio2', '-version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            return True\n",
+    "        except (subprocess.CalledProcessError, FileNotFoundError):\n",
+    "            return False\n",
+    "\n",
+    "    def install_libportaudio():\n",
+    "        try:\n",
+    "            if sys.platform.startswith('linux'):\n",
+    "                subprocess.run(['sudo', 'apt-get', 'update'], check=True)\n",
+    "                subprocess.run(['sudo', 'apt-get', 'install', '-y', 'libportaudio2'], check=True)\n",
+    "            elif sys.platform == 'darwin':  # macOS\n",
+    "                subprocess.run(['/bin/bash', '-c', 'brew install portaudio'], check=True)\n",
+    "            elif sys.platform == 'win32':\n",
+    "                print(\"Please download ffmpeg from https://ffmpeg.org/download.html and install it manually.\")\n",
+    "                return False\n",
+    "            else:\n",
+    "                print(\"Unsupported OS. Please install ffmpeg manually.\")\n",
+    "                return False\n",
+    "        except subprocess.CalledProcessError as e:\n",
+    "            print(f\"Failed to install ffmpeg: {e}\")\n",
+    "            return False\n",
+    "        return True\n",
+    "\n",
+    "    # Please configure your DB credentials and paths of the files for few shot learning and fine tuning\n",
+    "    db_user = \"root\"\n",
+    "    db_password = \"\"\n",
+    "    db_host = \"localhost\"\n",
+    "    db_name = \"classicmodels\"\n",
+    "\n",
+    "    db = SQLDatabase.from_uri(f\"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}\")\n",
+    "    # print(db.dialect)\n",
+    "    # print(db.get_usable_table_names())\n",
+    "    # print(db.table_info)\n",
+    "    os.environ[\"OPENAI_API_KEY\"] =  key\n",
+    "\n",
+    "    llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
+    "\n",
+    "    history = ChatMessageHistory()\n",
+    "\n",
+    "    final_prompt = prompt_creation(os.getcwd()+\"/few_shot_samples.json\")\n",
+    "\n",
+    "    generate_query = create_sql_query_chain(llm, db, final_prompt)\n",
+    "\n",
+    "    execute_query = QuerySQLDataBaseTool(db=db)\n",
+    "\n",
+    "    if is_ffmpeg_installed():\n",
+    "        print(\"ffmpeg is already installed.\")\n",
+    "    else:\n",
+    "        print(\"ffmpeg is not installed. Installing ffmpeg...\")\n",
+    "    if install_ffmpeg():\n",
+    "        print(\"ffmpeg installation successful.\")\n",
+    "    else:\n",
+    "        print(\"ffmpeg installation failed. Please install it manually.\")\n",
+    "    \n",
+    "    if check_libportaudio_installed():\n",
+    "        print(\"libportaudio is already installed.\")\n",
+    "    else:\n",
+    "        print(\"libportaudio is not installed. Installing ffmpeg...\")\n",
+    "    if install_libportaudio():\n",
+    "        print(\"libportaudio installation successful.\")\n",
+    "    else:\n",
+    "        print(\"libportaudio installation failed. Please install it manually.\")\n",
+    "\n",
+    "    if os.path.isfile(filepath):\n",
+    "        sql_query = transcribe_speech(filepath)\n",
+    "    else:\n",
+    "        sql_query = filepath\n",
+    "        \n",
+    "    chain = (\n",
+    "    RunnablePassthrough.assign(table_names_to_use=select_table(os.getcwd()+\"/database_table_descriptions.csv\")) |\n",
+    "    RunnablePassthrough.assign(query=generate_query).assign(\n",
+    "    result=itemgetter(\"query\") | execute_query\n",
+    "    )\n",
+    "    | rephrase_answer()\n",
+    "    )\n",
+    "\n",
+    "    output = chain.invoke({\"question\": sql_query, \"messages\":history.messages})\n",
+    "    history.add_user_message(sql_query)\n",
+    "    history.add_ai_message(output)\n",
+    "\n",
+    "    return output\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Human: List the product lines and the number of products in each line.\n",
+      "SQLQuery:\n",
+      "AI: SELECT productLine, COUNT(*) AS numberOfProducts FROM products GROUP BY productLine;\n",
+      "Human: How many employees are there\n",
+      "SQLQuery:\n",
+      "AI: SELECT COUNT(*) FROM 'Employee'\n",
+      "ffmpeg is already installed.\n",
+      "Failed to install ffmpeg: Command '['/bin/bash', '-c', 'brew install ffmpeg']' returned non-zero exit status 127.\n",
+      "ffmpeg installation failed. Please install it manually.\n",
+      "libportaudio is not installed. Installing ffmpeg...\n",
+      "Failed to install ffmpeg: Command '['/bin/bash', '-c', 'brew install portaudio']' returned non-zero exit status 127.\n",
+      "libportaudio installation failed. Please install it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/bin/bash: brew: command not found\n",
+      "/bin/bash: brew: command not found\n",
+      "/Users/avnishkanungo/anaconda3/envs/DLStudioEnv/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py:480: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Human: List the product lines and the number of products in each line.\n",
+      "SQLQuery:\n",
+      "AI: SELECT productLine, COUNT(*) AS numberOfProducts FROM products GROUP BY productLine;\n",
+      "Human: List the product names and their respective buy prices.\n",
+      "SQLQuery:\n",
+      "AI: SELECT productName, buyPrice FROM products;\n",
+      "ffmpeg is already installed.\n",
+      "Failed to install ffmpeg: Command '['/bin/bash', '-c', 'brew install ffmpeg']' returned non-zero exit status 127.\n",
+      "ffmpeg installation failed. Please install it manually.\n",
+      "libportaudio is not installed. Installing ffmpeg...\n",
+      "Failed to install ffmpeg: Command '['/bin/bash', '-c', 'brew install portaudio']' returned non-zero exit status 127.\n",
+      "libportaudio installation failed. Please install it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/bin/bash: brew: command not found\n",
+      "/bin/bash: brew: command not found\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Keyboard interruption in main thread... closing server.\n"
+     ]
+    }
+   ],
+   "source": [
+    "create_new_interface(sql_translator1)\n",
+    "# interface_new.launch(debug=True)"
    ]
   },
   {

requirements.txt CHANGED Viewed

@@ -1,16 +1,276 @@
-langchain==0.2.5  # Replace with the correct version for langchain
-transformers
-langchain_huggingface == 0.0.3  # Replace with the correct version for langchain_huggingface
-langchain_openai == 0.1.9  # Replace with the correct version for langchain_openai
-langchain_community == 0.2.5  # Replace with the correct version for langchain_community
-langchain_core == 0.2.9
-langchain-text-splitters == 0.2.1
-chromadb==0.3.22  # Replace with the correct version for chromadb
-pandas==1.5.3
-pydantic==1.10.8  # Ensure to match the version if langchain requires a specific one
-librosa == 0.10.2
-soundfile == 0.12.1
-gradio == 1.0.2
-datasets == 2.20.0
-PyMySQL == 1.1.1
-sounddevice == 0.4.7

+absl-py==2.1.0
+accelerate==0.31.0
+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asgiref==3.8.1
+asttokens==2.4.1
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==23.2.0
+audioread==3.0.1
+awscrt==0.20.11
+Babel==2.15.0
+backoff==2.2.1
+bcrypt==4.1.3
+beautifulsoup4==4.12.3
+bitsandbytes==0.43.1
+bleach==6.1.0
+boto3==1.34.129
+botocore==1.34.129
+build==1.2.1
+cachetools==5.3.3
+certifi==2024.6.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+chroma-hnswlib==0.7.3
+chromadb==0.5.3
+click==8.1.7
+coloredlogs==15.0.1
+comm==0.2.2
+contourpy==1.2.1
+cycler==0.12.1
+dataclasses-json==0.6.7
+datasets==2.20.0
+debugpy==1.8.1
+decorator==5.1.1
+defusedxml==0.7.1
+Deprecated==1.2.14
+dill==0.3.8
+distro==1.9.0
+dnspython==2.6.1
+email_validator==2.1.2
+exceptiongroup==1.2.1
+executing==2.0.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+fastjsonschema==2.20.0
+ffmpy==0.3.2
+filelock==3.15.3
+fire==0.6.0
+flatbuffers==24.3.25
+fonttools==4.53.0
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.5.0
+google-auth==2.30.0
+google-auth-oauthlib==1.2.0
+googleapis-common-protos==1.63.2
+gradio==4.37.2
+gradio_client==1.0.2
+greenlet==3.0.3
+grpcio==1.64.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.4
+humanfriendly==10.0
+idna==3.7
+importlib_metadata==7.1.0
+importlib_resources==6.4.0
+ipykernel==6.26.0
+ipython==8.17.2
+ipywidgets==8.1.1
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.4
+jmespath==1.0.1
+joblib==1.4.2
+json5==0.9.25
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyter_server==2.14.1
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.0
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.2
+jupyterlab_widgets==3.0.11
+kiwisolver==1.4.5
+kubernetes==30.1.0
+langchain==0.2.5
+langchain-community==0.2.5
+langchain-core==0.2.9
+langchain-huggingface==0.0.3
+langchain-openai==0.1.9
+langchain-text-splitters==0.2.1
+langsmith==0.1.82
+lazy_loader==0.4
+librosa==0.10.2.post1
+lightning==2.3.0
+lightning-cloud==0.5.69
+lightning-utilities==0.11.2
+lightning_sdk==0.1.10
+litdata==0.2.10
+llvmlite==0.43.0
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.3
+matplotlib==3.8.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+mmh3==4.1.0
+monotonic==1.6
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multiprocess==0.70.16
+mypy-extensions==1.0.0
+mysql-connector-python==8.4.0
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.3
+notebook_shim==0.2.4
+numba==0.60.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.5.40
+nvidia-nvtx-cu12==12.1.105
+oauthlib==3.2.2
+onnxruntime==1.18.0
+openai==1.35.3
+opentelemetry-api==1.25.0
+opentelemetry-exporter-otlp-proto-common==1.25.0
+opentelemetry-exporter-otlp-proto-grpc==1.25.0
+opentelemetry-instrumentation==0.46b0
+opentelemetry-instrumentation-asgi==0.46b0
+opentelemetry-instrumentation-fastapi==0.46b0
+opentelemetry-proto==1.25.0
+opentelemetry-sdk==1.25.0
+opentelemetry-semantic-conventions==0.46b0
+opentelemetry-util-http==0.46b0
+orjson==3.10.5
+overrides==7.7.0
+packaging==24.1
+pandas==2.1.4
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.3.0
+platformdirs==4.2.2
+pooch==1.8.2
+posthog==3.5.0
+prometheus_client==0.20.0
+prompt_toolkit==3.0.47
+protobuf==4.23.4
+psutil==6.0.0
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==16.1.0
+pyarrow-hotfix==0.6
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.7.4
+pydantic_core==2.18.4
+pydub==0.25.1
+Pygments==2.18.0
+PyJWT==2.8.0
+PyMySQL==1.1.1
+pyparsing==3.1.2
+PyPika==0.48.9
+pyproject_hooks==1.1.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+python-multipart==0.0.9
+pytorch-lightning==2.3.0
+pytz==2024.1
+PyYAML==6.0.1
+pyzmq==26.0.3
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+requests-oauthlib==2.0.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.18.1
+rsa==4.9
+ruff==0.5.0
+s3transfer==0.10.1
+safetensors==0.4.3
+scikit-learn==1.3.2
+scipy==1.11.4
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentence-transformers==3.0.1
+sentencepiece==0.2.0
+shellingham==1.5.4
+simple-term-menu==1.6.4
+six==1.16.0
+sniffio==1.3.1
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+SQLAlchemy==2.0.31
+stack-data==0.6.3
+starlette==0.37.2
+sympy==1.12.1
+tenacity==8.4.2
+tensorboard==2.15.1
+tensorboard-data-server==0.7.2
+termcolor==2.4.0
+terminado==0.18.1
+threadpoolctl==3.5.0
+tiktoken==0.7.0
+tinycss2==1.3.0
+tokenizers==0.19.1
+tomli==2.0.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.2.1+cu121
+torchmetrics==1.3.1
+torchvision==0.17.1+cu121
+tornado==6.4.1
+tqdm==4.66.4
+traitlets==5.14.3
+transformers==4.41.2
+triton==2.2.0
+typer==0.12.3
+types-python-dateutil==2.9.0.20240316
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+uri-template==1.3.0
+urllib3==2.2.2
+uvicorn==0.30.1
+uvloop==0.19.0
+watchfiles==0.22.0
+wcwidth==0.2.13
+webcolors==24.6.0
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==11.0.3
+Werkzeug==3.0.3
+widgetsnbextension==4.0.11
+wrapt==1.16.0
+xxhash==3.4.1
+yarl==1.9.4
+zipp==3.19.2