抓資料
import requests
from io import StringIO
import pandas as pd
import numpy as np
import datetime
import time
def crawl_price(date):
r = requests.post('http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=' + str(date).split(' ')[0].replace('-','') + '&type=ALL')
ret = pd.read_csv(StringIO(r.text.replace("=", "")), header=["證券代號" in l for l in r.text.split("\n")].index(True)-1)
ret = ret.set_index('證券代號')
ret['成交金額'] = ret['成交金額'].str.replace(',','')
ret['成交股數'] = ret['成交股數'].str.replace(',','')
return ret
data = {}
n_days = 60
date = datetime.datetime.now()
fail_count = 0
allow_continuous_fail_count = 5
while len(data) < n_days:
print('parsing', date)
# 使用 crawl_price 抓資料
try:
# 抓資料
data[date.date()] = crawl_price(date)
print('success!')
fail_count = 0
except:
# 假日爬不到
print('fail! check the date is holiday')
fail_count += 1
if fail_count == allow_continuous_fail_count:
raise
break
# 減一天
date -= datetime.timedelta(days=1)
time.sleep(10)
輸出結果
parsing 2020-04-22 18:11:39.219593
success!
parsing 2020-04-21 18:11:39.219593
success!
parsing 2020-04-20 18:11:39.219593
success!
parsing 2020-04-19 18:11:39.219593
fail! check the date is holiday
parsing 2020-04-18 18:11:39.219593
fail! check the date is holiday
parsing 2020-04-17 18:11:39.219593
success!
parsing 2020-04-16 18:11:39.219593
success!
parsing 2020-04-15 18:11:39.219593
success!
parsing 2020-04-14 18:11:39.219593
success!
parsing 2020-04-13 18:11:39.219593
success!
:
:
close = pd.DataFrame({k:d['收盤價'] for k,d in data.items()}).transpose()
close.index = pd.to_datetime(close.index)
open = pd.DataFrame({k:d['開盤價'] for k,d in data.items()}).transpose()
open.index = pd.to_datetime(open.index)
high = pd.DataFrame({k:d['最高價'] for k,d in data.items()}).transpose()
high.index = pd.to_datetime(high.index)
low = pd.DataFrame({k:d['最低價'] for k,d in data.items()}).transpose()
low.index = pd.to_datetime(low.index)
volume = pd.DataFrame({k:d['成交股數'] for k,d in data.items()}).transpose()
volume.index = pd.to_datetime(volume.index)
tsmc = {
'close':close['2330']['2020'].dropna().astype(float),
'open':open['2330']['2020'].dropna().astype(float),
'high':high['2330']['2020'].dropna().astype(float),
'low':low['2330']['2020'].dropna().astype(float),
'volume': volume['2330']['2020'].dropna().astype(float),
}
tsmc['close'].plot()