Spaces:
Build error
Build error
import tabula | |
import yfinance as yfi | |
import sqlite3 | |
import pandas as pd | |
import json | |
import talib | |
import time | |
import datetime | |
import warnings | |
warnings.filterwarnings("ignore") | |
with open("patterns.json", "r") as f: | |
patterns = json.load(f) | |
update_time = datetime.datetime.now() | |
# dummy update time | |
def find_patterns(df): | |
result = pd.DataFrame( | |
columns=[ | |
"Date", | |
"Kode", | |
"Pattern", | |
"Pattern_Score", | |
"Open_Close_Change", | |
"High_Low_Change", | |
] | |
) | |
for attr, pattern in patterns.items(): | |
scores = getattr(talib, attr)(df["Open"], df["High"], df["Low"], df["Close"]) | |
mask = scores != 0 | |
temp_result = df[mask] | |
if len(temp_result) > 0: | |
temp_result = temp_result.assign( | |
Open_Close_Change=(temp_result["Close"] - temp_result["Open"]) / temp_result["Open"], | |
High_Low_Change=(temp_result["High"] - temp_result["Low"]) / temp_result["Low"], | |
Pattern=[pattern] * len(temp_result), | |
Pattern_Score=scores[mask].values, | |
)[result.columns] | |
result = result.append(temp_result) | |
result = result.assign(time_updated = update_time) | |
return result | |
def pull_data_yfi(): | |
start = time.time() | |
with sqlite3.connect("ihsg.db") as con: | |
tickers = pd.read_sql( | |
""" | |
SELECT Kode FROM list_perusahaan | |
WHERE Kode != "IHSG" | |
""", | |
con=con, | |
).values.flatten() | |
ihsg = ( | |
yfi.download("^JKSE", start="2017-01-01", end="2023-01-10", progress=False) | |
.reset_index() | |
.dropna() | |
.assign(Kode="IHSG") | |
) | |
ihsg = ihsg[["Date", "Kode", "Open", "High", "Low", "Close", "Volume"]] | |
ihsg = ihsg.assign(time_updated = update_time) | |
ihsg.to_sql("historical", if_exists="replace", con=con, index=False) | |
pattern_search = find_patterns(ihsg) | |
pattern_search.to_sql("patterns", if_exists="replace", con=con, index=False) | |
print("INSERTION RESULT: \n") | |
print(pd.read_sql("SELECT * FROM historical", con=con).tail(10)) | |
print(pd.read_sql("SELECT * FROM historical", con=con).shape) | |
print("\n\n*--\n") | |
for i in range(0, len(tickers), 50): | |
ticker = [f"{kode}.JK" for kode in tickers[i : i + 50]] | |
df = ( | |
yfi.download(ticker, start="2017-01-01", end="2023-01-10", progress=False) | |
.T.unstack(level=1) | |
.T.reset_index() | |
.dropna() | |
.rename(columns={"level_1": "Kode"}) | |
) | |
df = df[["Date", "Kode", "Open", "High", "Low", "Close", "Volume"]] | |
df["Kode"] = df["Kode"].str.replace(".JK", "") | |
for j, kode in enumerate(df["Kode"].unique()): | |
print(f"Finding Patterns for {kode} #{i+j+1}\t\t time elapsed = {time.time() - start:.2f} s") | |
pattern_search = find_patterns(df[df["Kode"] == kode]) | |
pattern_search.to_sql("patterns", if_exists="append", con=con, index=False) | |
df = df.assign(time_updated = update_time) | |
df.to_sql("historical", if_exists="append", con=con, index=False) | |
print("INSERTION RESULT: \n") | |
print(pd.read_sql("SELECT * FROM historical", con=con).tail(10)) | |
print(pd.read_sql("SELECT * FROM historical", con=con).shape) | |
print("\n\n*--\n") | |
time.sleep(60) | |
con.commit() | |
def pull_data_klasifikasi_industri(): | |
with sqlite3.connect("ihsg.db") as con: | |
cur = con.cursor() | |
cur.execute("DROP TABLE IF EXISTS list_perusahaan") | |
cur.execute(""" | |
CREATE TABLE list_perusahaan ( | |
Kode VARCHAR(4), | |
Nama TEXT, | |
Sektor TEXT, | |
Instrumen TEXT) | |
""") | |
cur.execute(""" | |
INSERT INTO list_perusahaan VALUES | |
('IHSG', 'Indeks Harga Saham Gabungan', NULL, 'Indeks') | |
""") | |
# TODO: Change Schema from Star Schema to Snowflake Schema | |
# list_perusahaan table will be the dimension table for sector and sub-sector fact tables | |
# note: list_perusahaan table is a dimension table for historical fact table | |
dfs = tabula.read_pdf("Klasifikasi Industri Perusahaan Tercatat.pdf", pages="all", stream=True) | |
# print(len(dfs)) | |
for df in dfs: | |
kode, nama, sektor = None, None, None | |
for row in df.iloc[2:,:].itertuples(): | |
if kode is not None and pd.notna(row[2]): | |
cur.execute(f""" | |
INSERT INTO list_perusahaan VALUES | |
('{kode}', '{nama}', '{sektor}', 'Saham') | |
""") | |
kode, nama, sektor = None, None, None | |
elif kode is not None and pd.isna(row[2]): | |
if pd.notna(row[3]): | |
nama += " " + row[3] | |
if pd.notna(row[5]): | |
sektor += " " + row[5] | |
if kode is None and nama is None and sektor is None and pd.notna(row[2]): | |
if "saham" in row[8].lower(): | |
kode = row[2] | |
nama = row[3] | |
sektor = row[5] | |
else: | |
if kode is not None: | |
cur.execute(f""" | |
INSERT INTO list_perusahaan VALUES | |
('{kode}', '{nama}', '{sektor}', 'Saham') | |
""") | |
print("INSERTION RESULT: \n") | |
print(pd.read_sql("SELECT * FROM list_perusahaan", con=con).tail(10)) | |
print(pd.read_sql("SELECT * FROM list_perusahaan", con=con).shape) | |
print("\n\n*--\n") | |
con.commit() | |
if __name__ == "__main__": | |
pull_data_klasifikasi_industri() | |
pull_data_yfi() | |