import baostock as bsimport pandas as pdimport numpy as npimport datetimeimport os bs.login()# 读取股票基本信息info_df =pd.read_csv("./data/股票基础信息.csv",index_col=0)convert={"XSHG":"sh","XSHE":"sz"}info_df['code']=[convert[y]+"."+x.split(".")[0] for x,y in list(zip(info_df['secID'],info_df['exchangeCD']))]# 下载和更新每日数据data_root = "./data/stock/baostock/day/"file_list = os.listdir(data_root)for i,row in info_df.iterrows(): code = row['code'] # print(code) fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,\ tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST" fromdate = row['listDate'] end_date = row['delistDate'] # if np.isnan(end_date): if not isinstance(end_date,str): today = datetime.date.today() end_date = today.strftime('%Y-%m-%d') frequency = "d" adjustflag = "1" # 第一次下载数据 if code+".csv" not in file_list: try: rs = bs.query_history_k_data_plus(code, fields, start_date = fromdate, end_date = end_date, frequency = frequency, adjustflag = adjustflag) data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) df = pd.DataFrame(data_list, columns=rs.fields) if len(df)>0: df.to_csv(data_root+code+".csv", index = False) # except: # print(f"下载{code}成功") except: print(f"下载{code}失败") # 更新数据 if code+".csv" in file_list: df = pd.read_csv(data_root+code+".csv") df_end_date = list(df['date'])[-1] if df_end_date<end_date: rs = bs.query_history_k_data_plus(code, fields, start_date = df_end_date, end_date = end_date, frequency = frequency, adjustflag = adjustflag) data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) data = pd.DataFrame(data_list, columns=rs.fields) if len(data)>0: df = df.append(data) df = df[df['volume']!=""] df['volume']=df['volume'].astype("float") df = df[df['volume']>0] df.to_csv(data_root+code+".csv", index = False) # 获取复权因子data_root = "./data/stock/baostock/day/"file_list = os.listdir(data_root)factor_df = pd.DataFrame()for i,row in info_df.iterrows(): code = row['code'] try: print(code) rs_list = [] rs_factor = bs.query_adjust_factor(code=code, start_date="1990-01-01", end_date="2021-09-12") while (rs_factor.error_code == '0') & rs_factor.next(): rs_list.append(rs_factor.get_row_data()) result_factor = pd.DataFrame(rs_list, columns=rs_factor.fields) if len(result_factor)>0: factor_df = factor_df.append(result_factor) except: print(f"下载{code}失败")factor_df.to_csv("./data/baostock复权数据.csv")# 把复权因子合并到行情数据中file_list = os.listdir("./data/stock/baostock/day/")for file in file_list: df = pd.read_csv("./data/stock/baostock/day/"+file) df1=factor_df[factor_df['code']==file[:-4]] if len(df1)==0: df['backAdjustFactor']=[1]*len(df) print(file,"没有除权除息") else: df1['date']=df1['dividOperateDate'] df1 = df1[['date','backAdjustFactor']] df = pd.merge(df,df1,on=['date'],how='left') df = df.fillna(method="ffill") df.to_csv("./data/stock/baostock/day/"+file,index=False)
【答读者问20】从baostock中获取股票行情数据(尽可能避免了幸存者偏差)
作者:yunjinqi
类别:教程
日期:2021-12-23 17:59:21
阅读:2294 次
消耗积分:0 分
版权所有,转载本站文章请注明出处:云子量化, https://www.yunjinqi.top/article/49
最新文章
系统当前共有 404 篇文章