# Inference Data # get company news online from datetime import date import akshare as ak import pandas as pd from datetime import date, datetime, timedelta from Ashare_data import * #default symbol symbol = "600519" B_INST, E_INST = "[INST]", "[/INST]" B_SYS, E_SYS = "<>\n", "\n<>\n\n" def get_curday(): return date.today().strftime("%Y%m%d") def n_weeks_before(date_string, n, format = "%Y%m%d"): date = datetime.strptime(date_string, "%Y%m%d") - timedelta(days=7*n) return date.strftime(format=format) def get_news(symbol, max_page = 3): df_list = [] for page in range(1, max_page): try: df_list.append(ak.stock_news_em(symbol, page)) except KeyError: print(str(symbol) + "pages obtained for symbol: " + page) break news_df = pd.concat(df_list, ignore_index=True) return news_df # get return def get_cur_return(symbol, start_date, end_date, adjust="qfq"): """ date = "yyyymmdd" """ # load data return_data = ak.stock_zh_a_hist(symbol=symbol, period="daily", start_date=start_date, end_date=end_date, adjust=adjust) # process timestamp return_data["日期"] = pd.to_datetime(return_data["日期"]) return_data.set_index("日期", inplace=True) # resample and filled with forward data weekly_data = return_data["收盘"].resample("W").ffill() weekly_returns = weekly_data.pct_change()[1:] weekly_start_prices = weekly_data[:-1] weekly_end_prices = weekly_data[1:] weekly_data = pd.DataFrame({ '起始日期': weekly_start_prices.index, '起始价': weekly_start_prices.values, '结算日期': weekly_end_prices.index, '结算价': weekly_end_prices.values, '周收益': weekly_returns.values }) weekly_data["简化周收益"] = weekly_data["周收益"].map(return_transform) # check enddate if weekly_data.iloc[-1, 2] > pd.to_datetime(end_date): weekly_data.iloc[-1, 2] = pd.to_datetime(end_date) return weekly_data # get basics def cur_financial_data(symbol, start_date, end_date, with_basics = True): # get data data = get_cur_return(symbol=symbol, start_date=start_date, end_date=end_date) news_df = get_news(symbol=symbol) news_df["发布时间"] = pd.to_datetime(news_df["发布时间"], exact=False, format="%Y-%m-%d") news_df.sort_values(by=["发布时间"], inplace=True) # match weekly news for return data news_list = [] for a, row in data.iterrows(): week_start_date = row['起始日期'].strftime('%Y-%m-%d') week_end_date = row['结算日期'].strftime('%Y-%m-%d') print(symbol, ': ', week_start_date, ' - ', week_end_date) weekly_news = news_df.loc[(news_df["发布时间"]>week_start_date) & (news_df["发布时间"]