Post
303
how to load a dataset using the datasets library and save it to an SQLite database. It also includes a function to query the database and print the first five rows.
from datasets import load_dataset
import sqlite3
# Load the dataset
dataset = load_dataset('aifeifei798/song_lyrics_min', split='train')
# Define a function to save the dataset to an SQLite database
def save_dataset_to_sqlite(dataset, db_path='temp_dataset.db'):
# Connect to the SQLite database (creates a new database if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Create a table to store the dataset
cursor.execute('''CREATE TABLE IF NOT EXISTS songs
(id INTEGER PRIMARY KEY, title TEXT, tag TEXT, lyrics TEXT)''')
# Insert each row of the dataset into the database table
for i, row in enumerate(dataset):
cursor.execute("INSERT INTO songs (id, title, tag, lyrics) VALUES (?, ?, ?, ?)",
(i, row['title'], row['tag'], row['lyrics']))
# Commit the transaction and close the connection
conn.commit()
conn.close()
# Save the dataset to the SQLite database
save_dataset_to_sqlite(dataset)
# Define a function to query the database
def query_database(db_path='temp_dataset.db'):
# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Query the first five rows of the database
cursor.execute("SELECT * FROM songs LIMIT 5")
rows = cursor.fetchall()
# Print each row
for row in rows:
print(row)
# Close the connection
conn.close()
# Query the database
query_database()