Spaces:

Hjayswal
/

Huggingface_test1

Sleeping

File size: 4,417 Bytes

# -*- coding: utf-8 -*-
"""
Created on Fri May 26 14:07:22 2023

@author: vibin
"""

import streamlit as st
from pandasql import sqldf
import pandas as pd
import re
from typing import List
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import re






### Main

nav = st.sidebar.radio("Navigation",["TAPAS","Text2SQL"])
if nav == "TAPAS":

    
    
    col1 , col2, col3 = st.columns(3)
    col2.title("TAPAS")
    
    col3 , col4 = st.columns([3,12])
    col4.text("Tabular Data Text Extraction using text")
    
    table = pd.read_csv("data.csv")
    table = table.astype(str)
    st.text("DataSet - ")
    st.dataframe(table,width=3000,height= 400)
    
    st.title("")
    
    lst_q = ["Which country has low medicare","Who are the patients from india","Who are the patients from india","Patients who have Edema","CUI code for diabetes patients","Patients having oxygen less than 94 but 91"]
   
    v2 = st.selectbox("Choose your text",lst_q,index = 0)

    st.title("")
    
    sql_txt = st.text_area("TAPAS Input",v2)
    
    if st.button("Predict"):
        tqa = pipeline(task="table-question-answering", 
                   model="google/tapas-base-finetuned-wtq")
        
        txt_sql = tqa(table=table, query=sql_txt)["answer"]
        st.text("Output - ") 
        st.success(f"{txt_sql}")
        # st.write(all_students)
    
    
    
elif nav == "Text2SQL":
    
    ### Function

    def prepare_input(question: str, table: List[str]):
        table_prefix = "table:"
        question_prefix = "question:"
        join_table = ",".join(table)
        inputs = f"{question_prefix} {question} {table_prefix} {join_table}"
        input_ids = tokenizer(inputs, max_length=512, return_tensors="pt").input_ids
        return input_ids

    def inference(question: str, table: List[str]) -> str:
        input_data = prepare_input(question=question, table=table)
        input_data = input_data.to(model.device)
        outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=700)
        result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)
        return result
    
    
    col1 , col2, col3 = st.columns(3)
    col2.title("Text2SQL")
    
    col3 , col4 = st.columns([1,20])
    col4.text("Text will be converted to SQL Query and can extract the data from DataSet")
    
    # Import Data
    
    df_qna = pd.read_csv("data.csv", encoding= 'unicode_escape')
    
    st.title("")
    
    st.text("DataSet - ")
    st.dataframe(df_qna,width=3000,height= 500)
    
    st.title("")
    
    lst_q = ["what interface is measure indicator code = 72_HR_ABX and version is 1 and source is TD", "get class code with measure = 72_HR_ABX", "get sum of version for Class_Code is Antibiotic Stewardship", "what interface is measure indicator code = 72_HR_ABX"]
    v2 = st.selectbox("Choose your text",lst_q,index = 0)

    st.title("")
    
    
    sql_txt = st.text_area("Text for SQL Conversion",v2)
    
    
    if st.button("Predict"):
        tokenizer = AutoTokenizer.from_pretrained("juierror/flan-t5-text2sql-with-schema")
        model = AutoModelForSeq2SeqLM.from_pretrained("juierror/flan-t5-text2sql-with-schema")
        
        # text = "what interface is measure indicator code = 72_HR_ABX and version is 1 and source is TD"
        table_name = "df_qna"
        table_col = ["Patient_Name","Country","Disease","CUI","Snomed","Oxygen_Rate","Med_Type","Admission_Date"]
        
        txt_sql = inference(question=sql_txt, table=table_col)
        
        
        ### SQL Modification
        txt_sql = txt_sql.replace("table",table_name)
        sql_quotes = []
        for match in re.finditer("=",txt_sql):
            new_txt = txt_sql[match.span()[1]+1:]
            try:
                match2 = re.search("AND",new_txt)
                sql_quotes.append((new_txt[:match2.span()[0]]).strip())
            except:
                sql_quotes.append(new_txt.strip())
        
        for i in sql_quotes:
            qts = "'" + i + "'"
            txt_sql = txt_sql.replace(i, qts)
            
        st.success(f"{txt_sql}")
        all_students = sqldf(txt_sql)
        
        st.text("Output - ")
        st.write(all_students)