# 선요약
1. polyfit, poly1d로 데이터를 대표하는 함수를 만들 수 있다.
2. prophet, predict로 간단하게 시계열 데이터를 예측할 수 있다.
시계열 데이터 분석하기
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import matplotlib.pyplot as plt
from fbprophet import Prophet
from datetime import datetime
from matplotlib import font_manager, rc
#한글읽기
plt.rcParams['axes.unicode_minus']=False
rc('font',family='AppleGothic')
#get csv
pinkwink_web = pd.read_csv('./data/08. PinkWink Web Traffic.csv', encoding='utf-8', thousands=',', names=['date','hit'], index_col=0)
pinkwink_web = pinkwink_web[pinkwink_web['hit'].notnull()]
#함수, 변수 정의
time = np.arange(0,len(pinkwink_web))
traffic = pinkwink_web['hit'].values
fx = np.linspace(0, time[-1], 1000)
#오차반환 함수
def error(f,x,y):
return np.sqrt(np.mean((f(x)-y)**2))
#다항식 1차~15차 정의
f1p = np.polyfit(time, traffic, 1)
f1 = np.poly1d(f1p)
f2p = np.polyfit(time, traffic, 2)
f2 = np.poly1d(f2p)
f3p = np.polyfit(time, traffic, 3)
f3 = np.poly1d(f3p)
f15p = np.polyfit(time, traffic, 15)
f15 = np.poly1d(f15p)
f100p = np.polyfit(time, traffic, 100)
f100 = np.poly1d(f100p)
print(error(f1, time, traffic))
print(error(f2, time, traffic))
print(error(f3, time, traffic))
print(error(f15, time, traffic))
plt.figure(figsize=(10, 6))
#original data 점으로 표시
plt.scatter(time, traffic, s=10)
plt.plot(fx, f1(fx), lw=4, label='f1')
plt.plot(fx, f2(fx), lw=4, label='f2')
plt.plot(fx, f3(fx), lw=4, label='f3')
plt.plot(fx, f15(fx), lw=4, label='f15')
plt.plot(fx, f100(fx), lw=4, label='f100')
#그리기
plt.grid(True, linestyle='-', color='0.75')
plt.legend(loc=2)
plt.show()
#forecasting using Prophet
df = pd.DataFrame({'ds':pinkwink_web.index, 'y':pinkwink_web['hit']})
df.reset_index(inplace=True)
df['ds'] = pd.to_datetime(df['ds'], format='%y. %m. %d.')
del df['date']
#주기성은 연단위, Prophet객체 생성 후 df 전달
m = Prophet(yearly_seasonality=True)
m.fit(df)
#향후 60일간 데이터 예측
future = m.make_future_dataframe(periods=60)
#예측 데이터 변수 forecast에 저장
forecast = m.predict(future)
forecast[['ds','yhat','yhat_lower','yhat_upper']]
m.plot(forecast)
plt.show()
m.plot_components(forecast)
plt.show()
예측 라이브러리 사용
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import matplotlib.pyplot as plt
from fbprophet import Prophet
from datetime import datetime
from matplotlib import font_manager, rc
#한글읽기
plt.rcParams['axes.unicode_minus']=False
rc('font',family='AppleGothic')
start = datetime(2000, 1, 1)
end = datetime(2020, 3, 31)
SS = web.DataReader('005930.KS', 'yahoo', start, end)
SS['Close'].plot(figsize=(12,6),grid=True)
plt.show()
#df에는 예측을 위해 2018년도 까지만 저장
SS_trunc = SS[:'2018-12-31']
df = pd.DataFrame({'ds':SS_trunc.index, 'y':SS_trunc['Close']})
df.reset_index(inplace=True)
del df['Date']
m=Prophet()
m.fit(df)
future=m.make_future_dataframe(periods=730)
forecast = m.predict(future)
m.plot(forecast)
plt.show()
예측 라이브러리 사용
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import matplotlib.pyplot as plt
from fbprophet import Prophet
from datetime import datetime
from matplotlib import font_manager, rc
#한글읽기
plt.rcParams['axes.unicode_minus']=False
rc('font',family='AppleGothic')
#True 데이터 전체
start = datetime(2015, 1, 1)
end = datetime(2020, 3, 31)
SS = web.DataReader('005930.KS', 'yahoo', start, end)
#True 데이터 일부
SS_trunc = SS[:'2019-12-31']
df = pd.DataFrame({'ds':SS_trunc.index, 'y':SS_trunc['Close']})
df.reset_index(inplace=True)
del df['Date']
#Forecast 데이터
m=Prophet()
m.fit(df)
future=m.make_future_dataframe(periods=90)
forecast = m.predict(future)
#Draw Graph
plt.figure(figsize=(12,6))
plt.plot(SS.index, SS['Close'], label='real')
plt.plot(forecast['ds'], forecast['yhat'], label='forecast')
plt.grid()
plt.legend()
plt.show()
'데이터 사이언스 공부 > 파이썬으로데이터주무르기' 카테고리의 다른 글
self gas station analysis (0) | 2020.05.12 |
---|---|
chicago analysis (0) | 2020.05.12 |
crime analysis (0) | 2020.05.12 |
cctv analysis (0) | 2020.05.12 |