import os import sys # insert current directory to sys.path sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) import re import numpy as np import pandas as pd import streamlit as st import requests from googletrans import Translator from langdetect import detect translator = Translator() st.set_page_config( layout="wide", page_title="Text To SQL", page_icon="📊", ) # TEXT_2_SQL_API = "http://83.219.197.235:40172/api/text2sql/ask" TEXT_2_SQL_API = os.environ.get( "TEXT_2_SQL_API", "http://localhost:8501/api/text2sql/ask" ) @st.cache_data def ask_text2sql(question, context): if detect(question) != "en": question = translate_question(question) st.write("The question is translated to Vietnamese:") st.code(question, language="en") r = requests.post( TEXT_2_SQL_API, json={ "context": context, "question": question, }, ) return r.json()["answers"][0] @st.cache_data def translate_question(question): return translator.translate(question, dest="en").text @st.cache_data def load_example_df(): example_df = pd.read_csv("resources/examples.csv") return example_df def introduction(): st.title("📊 Introduction") st.write("👋 Welcome to the Text to SQL app!") st.write( "🔍 This app allows you to explore the ability of Text to SQL model. The model is CodeLlama-13b finetuned using QLoRA on NSText2SQL dataset." ) st.write( "📈 The NSText2SQL dataset contains more than 290.000 training samples. Then, the model is evaluated on Spider and VMLP datasets." ) st.write("📑 The other pages in this app include:") st.write( " - 📊 EDA Page: This page includes several visualizations to help you understand the two dataset: Spider and VMLP." ) st.write( " - 💰 Text2SQL Page: This page allows you to generate SQL query from a given question and context." ) st.write( " - 🧑‍💻 About Page: This page provides information about the app and its creators." ) st.write( " - 📚 Reference Page: This page lists the references used in building this app." ) # Define a function for the EDA page def eda(): st.title("📊 Dataset Exploration") # st.subheader("Candlestick Chart") # fig = go.Figure( # data=[ # go.Candlestick( # x=df["date"], # open=df["open"], # high=df["high"], # low=df["low"], # close=df["close"], # increasing_line_color="green", # decreasing_line_color="red", # ) # ], # layout=go.Layout( # title="Tesla Stock Price", # xaxis_title="Date", # yaxis_title="Price (USD)", # xaxis_rangeslider_visible=True, # ), # ) # st.plotly_chart(fig) # st.subheader("Line Chart") # # Plot the closing price over time # plot_column = st.selectbox( # "Select a column", ["open", "close", "low", "high"], index=0 # ) # fig = px.line( # df, x="date", y=plot_column, title=f"Tesla {plot_column} Price Over Time" # ) # st.plotly_chart(fig) # st.subheader("Distribution of Closing Price") # # Plot the distribution of the closing price # closing_price_hist = px.histogram( # df, x="close", nbins=30, title="Distribution of Tesla Closing Price" # ) # st.plotly_chart(closing_price_hist) # st.subheader("Raw Data") # st.write("You can see the raw data below.") # # Display the dataset # st.dataframe(df) def preprocess_context(context): context = context.replace("\n", " ").replace("\t", " ").replace("\r", " ") # Remove multiple spaces context = re.sub(" +", " ", context) return context def examples(): st.title("Examples") st.write( "This page uses CodeLlama-13b finetuned using QLoRA on NSText2SQL dataset to generate SQL query from a given question and context.\nThe examples are listed below" ) example_df = load_example_df() example_tabs = st.tabs([f"Example {i+1}" for i in range(len(example_df))]) example_btns = [] for idx, row in example_df.iterrows(): with example_tabs[idx]: st.markdown("##### Context:") st.code(row["context"], language="sql") st.markdown("##### Question:") st.text(row["question"]) example_btns.append(st.button("Generate SQL query", key=f"exp-btn-{idx}")) if example_btns[idx]: st.markdown("##### SQL query:") query = ask_text2sql(row["question"], row["context"]) st.code(query, language="sql") # Define a function for the Stock Prediction page def interactive_demo(): st.title("Text to SQL using CodeLlama-13b") st.write( "This page uses CodeLlama-13b finetuned using QLoRA on NSText2SQL dataset to generate SQL query from a given question and context." ) st.subheader("Input") context_placeholder = st.empty() question_placeholder = st.empty() context = context_placeholder.text_area( "##### Context", "CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)", key="context", ) question = question_placeholder.text_input( "##### Question", "List the name, born state and age of the heads of departments ordered by age.", key="question", ) get_sql_button = st.button("Generate SQL query") if get_sql_button: st.markdown("##### Output") query = ask_text2sql(question, context) st.write("The SQL query generated by the model is:") # Display the SQL query in a code block st.code(query, language="sql") # Define a function for the About page def about(): st.title("🧑‍💻 About") st.write( "This Streamlit app allows you to explore stock prices and make predictions using an LSTM model." ) st.header("Author") st.write( "This app was developed by Minh Nam. You can contact the author at trminhnam20082002@gmail.com." ) st.header("Data Sources") st.markdown( "The Spider dataset was sourced from [Spider](https://yale-lily.github.io/spider)." ) st.markdown("The vMLP dataset is a private dataset from Viettel.") st.header("Acknowledgments") st.write( "The author would like to thank Dr. Nguyen Van Nam for his proper guidance, Mr. Nguyen Chi Dong for his support." ) st.header("License") st.write( # "This app is licensed under the MIT License. See LICENSE.txt for more information." "N/A" ) def references(): st.title("📚 References") st.header( "References for Text to SQL project using foundation model - CodeLlama-13b" ) st.subheader("1. 'Project for time-series data' by AI VIET NAM, et al.") st.write( "This organization provides a tutorial on how to build a stock price prediction model using LSTM in the AIO2022 course." ) st.write("Link: https://www.facebook.com/aivietnam.edu.vn") st.subheader( "2. 'PyTorch LSTMs for time series forecasting of Indian Stocks' by Vinayak Nayak" ) st.write( "This blog post describes how to build a stock price prediction model using LSTM, RNN and CNN-sliding window model." ) st.write( "Link: https://medium.com/analytics-vidhya/pytorch-lstms-for-time-series-forecasting-of-indian-stocks-8a49157da8b9#b052" ) st.header("References for Streamlit") st.subheader("1. Streamlit Documentation") st.write( "The official documentation for Streamlit provides detailed information about how to use the library and build Streamlit apps." ) st.write("Link: https://docs.streamlit.io/") st.subheader("2. Streamlit Community") st.write( "The Streamlit community includes a forum and a GitHub repository with examples and resources for building Streamlit apps." ) st.write( "Link: https://discuss.streamlit.io/ and https://github.com/streamlit/streamlit/" ) # Create the sidebar st.sidebar.title("Menu") pages = [ "Introduction", "Datasets", "Examples", "Interactive Demo", "About", "References", ] selected_page = st.sidebar.radio("Go to", pages) # Show the appropriate page based on the selection if selected_page == "Introduction": introduction() elif selected_page == "EDA": eda() elif selected_page == "Examples": examples() elif selected_page == "Interactive Demo": interactive_demo() elif selected_page == "About": about() elif selected_page == "References": references()