Spaces:

lamonkey
/

portfolio_management

Runtime error

File size: 11,552 Bytes

import sys
# sys.path.append('/Users/lamonkey/Desktop/risk-monitor-dashboard')
import panel as pn
import datetime as dt
import asyncio
import random
from sqlalchemy import create_engine, text
import pandas as pd
from streamz import Stream
from datetime import timedelta
import settings
import os
import utils
import api
import numpy as np
import pytz
import table_schema as ts
import db_operation as db
# fetch new stock price
stock_price_stream = Stream()

# save stock price to db
# stock_price_stream.sink(save_stock_price)
# from dask.distributed import Client
# client = Client()
# import nest_asyncio
# nest_asyncio.apply()
# import settings

# run using  --setup
db_url = 'sqlite:///instance/local.db'


def create_portfolio_profile_df(stocks: list[dict]):
    profile_df = pd.DataFrame(stocks)
    profile_df = add_details_to_stock_df(profile_df)

    # check if there is duplicate ticker
    if profile_df.ticker.duplicated().any():
        raise Exception(
            'VALIDATION ERROR: cannot have duplicate ticker with the same date')

    return profile_df


def need_to_update(table_name: str, freq: dt.datetime):
    '''check table with table_name need to update
    Return
    ------
    None if no need to update
    (start_date, end_date, freq) if need to update
    '''
    with create_engine(db_url).connect() as conn:
        max_date = conn.execute(
            text(f"SELECT MAX(date) FROM {table_name}")).fetchone()[0]
        max_date = utils.convert_string_to_datetime(max_date)
        current_time = utils.time_in_beijing()
        if current_time - max_date > freq:
            return (max_date + freq, current_time, freq)
        else:
            return None


def need_to_fetch_new_stock_price():
    '''
    check if need to pull new stock price from jq

    RETURN
    ------
    (min_date, max_date) : if update is needed
       the start and end date need to fetch new stock price
    None if no need to fetch new stock price

    '''
    # get min date from portfolio_profile
    with create_engine(db_url).connect() as conn:
        table_name = 'portfolio_profile'
        query = f"SELECT DISTINCT date FROM {table_name} ORDER BY date ASC LIMIT 1"
        df = pd.read_sql(query, con=conn)
        df.date = pd.to_datetime(df.date)
        min_date = df.date[0]

    # compare to min date from stocks_price
    with create_engine(db_url).connect() as conn:
        table_name = 'stocks_price'
        query = f"SELECT DISTINCT time FROM {table_name} ORDER BY time ASC LIMIT 1"
        df = pd.read_sql(query, con=conn)
        df.time = pd.to_datetime(df.time)

    # return
    if min_date <= df.time[0]:
        return (min_date, df.time[0] - dt.timedelta(days=1))
    else:
        return None


def get_most_recent_profile(type):
    table_name = 'benchmark_profile' if type == 'benchmark' else 'portfolio_profile'
    query = f"SELECT * FROM {table_name} WHERE date = (SELECT MAX(date) FROM {table_name})"
    with create_engine(db_url).connect() as conn:
        df = pd.read_sql(query, con=conn)
        # convert date to datetime object
        df['date'] = pd.to_datetime(df['date'])
        return df


def update_stocks_details_to_db():
    '''create table contain all stocks detail in db
    will override existing table if exists
    Table Schema
    ------------
    'display_name', 'name', 'start_date', 'end_date', 'type', 'ticker',
       'sector', 'aggregate_sector'
    '''
    df = api.get_all_stocks_detail()
    # validation
    if not _validate_schema(df, ts.STOCKS_DETAILS_TABLE_SCHEMA):
        raise ValueError(
            'df has different schema than STOCKS_DETAILS_TABLE_SCHEMA')
    with create_engine(db_url).connect() as conn:
        df.to_sql(ts.STOCKS_DETAILS_TABLE, con=conn,
                  if_exists='replace', index=False)


def fetch_new_stocks_price():
    '''
    get a df contain updated stock prices for both benchmark and portfolio,
    also indicate if the stock is in portfolio and benchmark
    '''
    # most recent profiles
    p_portfolio = get_most_recent_profile('portfolio')
    p_benchmark = get_most_recent_profile('benchmark')
    # combine ticker
    unique_tickers = pd.concat([p_portfolio, p_benchmark])[
        'ticker'].unique().tolist()
    # fetch list of stock
    # TODO: hard code delta time to 1 day
    start_date = p_portfolio.date[0] + dt.timedelta(days=1)
    end_date = utils.time_in_beijing()
    freq = 'daily'
    stock_df = api.fetch_stocks_price(
        unique_tickers, start_date, end_date, freq)
    stock_df['in_portfolio'] = stock_df['ticker'].isin(
        p_portfolio['ticker'].unique().tolist())
    stock_df['in_benchmark'] = stock_df['ticker'].isin(
        p_benchmark['ticker'].unique().tolist())
    return stock_df


def need_to_update_stocks_price(delta_time):
    # convert p_portfolio.date[0] to timezone-aware datetime object
    tz = pytz.timezone('Asia/Shanghai')
    # get stock price df
    with create_engine(db_url).connect() as conn:
        # check if a table exist
        if not conn.dialect.has_table(conn, 'stocks_price'):
            return True
        else:
            query = "SELECT * FROM stocks_price WHERE time = (SELECT MAX(time) FROM stocks_price)"
            most_recent_price = pd.read_sql(query, con=conn)
            most_recent_price.time = pd.to_datetime(most_recent_price.time)
            date_time = tz.localize(most_recent_price.time[0].to_pydatetime())
            if utils.time_in_beijing() - date_time > delta_time:
                return True
            else:
                return False


def processing():
    '''
    run the whole processing pipeline here
    '''
    pass


def add_details_to_stock_df(stock_df):
    with create_engine(db_url).connect() as conn:
        detail_df = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
        merged_df = pd.merge(stock_df, detail_df[
            ['sector', 'name',
             'aggregate_sector',
             'display_name',
             'ticker']
        ], on='ticker', how='left')
        merged_df['aggregate_sector'].fillna('其他', inplace=True)
        return merged_df


def _validate_schema(df, schema):
    '''
    validate df has the same columns and data types as schema

    Parameters
    ----------
    df: pd.DataFrame
    schema: dict
        {column_name: data_type}

    Returns
    -------
    bool
        True if df has the same columns and data types as schema
        False otherwise
    '''

    # check if the DataFrame has the same columns as the schema
    if set(df.columns) != set(schema.keys()):
        return False
    # check if the data types of the columns match the schema
    # TODO: ignoring type check for now
    # for col, dtype in schema.items():
    #     if df[col].dtype != dtype:
    #         return False
    return True


def save_stock_price_to_db(df: pd.DataFrame):
    print('saving to stock to db')
    with create_engine(db_url).connect() as conn:
        df.to_sql('stocks_price', con=conn, if_exists='append', index=False)


def update_portfolio_profile_to_db(portfolio_df):
    '''overwrite the portfolio profile table in db'''

    if (_validate_schema(portfolio_df, ts.PORTFOLIO_TABLE_SCHEMA)):
        raise ValueError(
            'portfoliijuo_df has different schema than PORTFOLIO_DB_SCHEMA')

    with create_engine(db_url).connect() as conn:
        print("updating profile to db")
        try:
            portfolio_df[ts.PORTFOLIO_TABLE_SCHEMA.keys()].to_sql(
                ts.PORTFOLIO_TABLE, con=conn, if_exists='append', index=False)
            return True
        except:
            return False
    # TODO trigger recomputation of analysis

def update_daily_stocks_price():
    '''
    update all stocks price until today. used for fetching new stock price

    if no portfolio, terminate without warning
    default start date is the most recent date in portfolio
    '''
    most_recent_portfolio = db.get_most_recent_portfolio_profile()
    most_recent_stocks_price = db.get_most_recent_stocks_price()

    # fetch all stocks price until today
    stocks_dates = most_recent_stocks_price.time
    portfolio_dates = most_recent_portfolio.date
    if len(portfolio_dates) == 0:
        return
    start = stocks_dates[0] if len(stocks_dates) > 0 else portfolio_dates[0]
    end = utils.time_in_beijing()

    # frequency is set to daily
    if end - start > dt.timedelta(days=1):
        new_stocks_price = fetch_all_stocks_price_between(start, end)
        db.append_to_stocks_price_table(new_stocks_price)

def update_stock_price():
    '''get daily stocks price until today'''
    # most recent profiles
    p_portfolio = get_most_recent_profile('portfolio')
    p_benchmark = get_most_recent_profile('benchmark')
    # combine ticker
    unique_tickers = pd.concat([p_portfolio, p_benchmark])[
        'ticker'].unique().tolist()
    # fetch list of stock
    # TODO: hard code delta time to 1 day
    start_date = p_portfolio.date[0] + dt.timedelta(days=1)
    end_date = utils.time_in_beijing()
    freq = 'daily'
    stock_df = api.fetch_stocks_price(
        unique_tickers, start_date, end_date, freq)
    stock_df['in_portfolio'] = stock_df['ticker'].isin(
        p_portfolio['ticker'].unique().tolist())
    stock_df['in_benchmark'] = stock_df['ticker'].isin(
        p_benchmark['ticker'].unique().tolist())
    return stock_df


def fetch_all_stocks_price_between(start, end):
    '''
    patch stock price db with all daily stock price within window
    inclusive on both start and end date
    Parameters
    ----------
    window: tuple
        (start, end) date of the window

    Returns
    -------
    None
    '''
    # all trading stocks available between start day and end date
    all_stocks = db.get_all_stocks()
    selected_stocks = all_stocks[(all_stocks.start_date <= end) & (
        all_stocks.end_date >= start)]
    tickers = selected_stocks.ticker.to_list()
    # fetch stock price and append to db
    stock_price = api.fetch_stocks_price(
        security=tickers, start_date=start, end_date=end, frequency='daily')
    # drop where closing price is null
    stock_price.dropna(subset=['close'], inplace=True)
    return stock_price


def update():
    '''
    run only once, update stock price and benchmark profile
    '''
    print("Checking stock_price table")
    # collect daily stock price until today in beijing time
    if need_to_update_stocks_price(dt.timedelta(days=1)):
        print("Updating stock_price table")
        stock_df = update_stock_price()
        stock_df = add_details_to_stock_df(stock_df)
        save_stock_price_to_db(stock_df)
        stock_price_stream.emit(stock_df)


async def run():
    '''
    start the pipeline here to check update and fetch new data
    '''
    print("background_task running!")
    # TODO: update benchmark_profile
    # if (need_to_update_stocks_price()):
    if True:
        print("running update")
        # TODO testing code get stock price df
        with create_engine(db_url).connect() as conn:
            stock_df = pd.read_sql('stocks_price', con=conn)
            print('sending data!')
            # print(stock_df)
            stock_price_stream.emit(stock_df)

        # # latest stock price
        # stock_df = update_stocks_price()
        # # add display name and sector to stock_df
        # stock_df = add_details_to_stock_df(stock_df)
        # save_stock_price_to_db(stock_df)
        # stock_price_stream.emit(stock_df)
    # update sotck_price

    # send fetched data

    # run processing

    # send fetched data