File size: 3,275 Bytes
1d35b34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import sqlite3
from sqlite3 import Error
import csv
import pandas as pd
import os


def create_connection(db_file):
    """ create a database connection to a database that resides
        in the memory
    """
    conn = None;
    try:
        conn = sqlite3.connect(db_file)
        return conn
    
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql, table_name):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(f"""DROP TABLE IF EXISTS {table_name}""")
        c.execute(create_table_sql)
    except Error as e:
        print(e)


def insert_values(conn, task, sql):

    cur = conn.cursor()
    cur.execute(sql, task)
    conn.commit()
    return cur.lastrowid


def populate(csv_file, db_file, table_insert):
    
    conn = create_connection(db_file)
    with conn:
        
        
        with open(csv_file, mode ='r')as file:
            csvfile = csv.reader(file)

            for n,lines in enumerate(csvfile):
                
                if n>0:
                    lines = tuple(i for i in lines)
                    insert_values(conn, lines, table_insert)

                else:pass

def main():

    # name = "sql_pdf.xlsx"
    name="ticket_dataset.xlsx"
    excel_file = (pd.read_excel(name))
    csv_file = f"""{name.split(".")[0]}.csv"""
    excel_file.to_csv(csv_file,
                    index=None,
                    header=True)
    column = [x for x in excel_file.columns]
    column_type = {}
    type_map = {
        "<class 'str'>": "TEXT",
        "<class 'int'>": "INTEGER",
        "<class 'float'>": "REAL",
    }


    for i in range(len(column)):
        datatype = {}
        for j in excel_file.values:
            if type(j[i]) not in list(datatype.keys()):datatype[type(j[i])] = 1
            else: datatype[type(j[i])] += 1
        
        ma_x = 0
        max_type = "<class 'str'>"
        
        for k in list(datatype.keys()):
            if ma_x < datatype[k]:max_type = str(k)

        try:
            column_type[column[i]] = type_map[max_type]

        except KeyError:
            column_type[column[i]] = "TEXT"

    print(column_type)

    table_construct = f"""CREATE TABLE IF NOT EXISTS {name.split(".")[0]}( """
    table_insert = f"""INSERT INTO {name.split(".")[0]}("""
    table_values = f"""VALUES ("""
    for l in list(column_type.keys()):
        table_construct += f"""{l} {column_type[l]}, """
        table_insert += f"""{l}, """
        table_values += "?, "
    table_construct = f"""{table_construct[:-2]});"""
    table_values = f"""{table_values[:-2]})"""
    table_insert = f"""{table_insert[:-2]})\n{table_values}"""

    print(table_construct)
    print("\n\n", table_insert)

    database = f"""{name.split(".")[0]}.db"""
    
    conn = create_connection(database)

    # create tables
    if conn is not None:
        # create projects table
        create_table(conn, table_construct, name.split(".")[0])
    else:
        print("Error! cannot create the database connection.")


    populate(csv_file, database, table_insert)

if __name__ == '__main__':
    main()