Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 46 additions & 28 deletions scripts/forecast_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,44 @@
warnings.filterwarnings("ignore", category=FitFailedWarning)


def forecast_dec2019():
def forecast_dec2019(two_features=False):
''' forecast model for Dec 2019 eruption
'''
# constants
month = timedelta(days=365.25/12)
day = timedelta(days=1)
td = TremorData()

# construct model object
data_streams = ['rsam','mf','hf','dsar']
fm = ForecastModel(ti='2011-01-01', tf='2020-01-01', window=2., overlap=0.75,
look_forward=2., data_streams=data_streams)

# columns to manually drop from feature matrix because they are highly correlated to other
# linear regressors
drop_features = ['linear_trend_timewise','agg_linear_trend']


# set the available CPUs higher or lower as appropriate
n_jobs = 6

# construct model object
if two_features:
data_streams = ['rsam']
fm = ForecastModel(ti='2012-04-01', tf='2012-10-01', window=2., overlap=0.75,
look_forward=2., data_streams=data_streams, root='twoFeatures')
use_only_features = ['rsam__maximum', 'rsam__fft_coefficient__coeff_12__attr_"abs"']
classifier = 'DTPBF'
drop_features = []
else:
data_streams = ['rsam','mf','hf','dsar']
fm = ForecastModel(ti='2011-01-01', tf='2020-01-01', window=2., overlap=0.75,
look_forward=2., data_streams=data_streams)

# columns to manually drop from feature matrix because they are highly correlated to other
# linear regressors
drop_features = ['linear_trend_timewise', 'agg_linear_trend']
use_only_features = []
classifier = "DT"

# train the model, excluding 2019 eruption
# note: building the feature matrix may take several hours, but only has to be done once
# note: building the feature matrix may take several hours, but only has to be done once
# and will intermittantly save progress in ../features/
# trained scikit-learn models will be saved to ../models/*root*/
te = td.tes[-1]
fm.train(ti='2011-01-01', tf='2020-01-01', drop_features=drop_features, retrain=True,
exclude_dates=[[te-month,te+month],], n_jobs=n_jobs)
fm.train(ti='2011-01-01', tf='2020-01-01', drop_features=drop_features, retrain=True,
exclude_dates=[[te-month,te+month],], n_jobs=n_jobs,
classifier=classifier, use_only_features=use_only_features)

# run forecast from 2011 to 2020
# model predictions will be saved to ../predictions/*root*/
Expand All @@ -58,24 +69,31 @@ def forecast_dec2019():
fm.hires_forecast(ti=te-fm.dtw-fm.dtf, tf=te+month/30, recalculate=True,
save=r'{:s}/forecast_hires.png'.format(fm.plotdir), n_jobs=n_jobs)

def forecast_test():
def forecast_test(two_features=False):
''' test scale forecast model
'''
# constants
month = timedelta(days=365.25/12)

# set up model
data_streams = ['rsam','mf','hf','dsar']
fm = ForecastModel(ti='2012-04-01', tf='2012-10-01', window=2., overlap=0.75,
look_forward=2., data_streams=data_streams, root='test')


# set the available CPUs higher or lower as appropriate
n_jobs = 6

# train the model
drop_features = ['linear_trend_timewise','agg_linear_trend']
fm.train(ti='2012-04-01', tf='2012-10-01', drop_features=drop_features, retrain=True,
n_jobs=n_jobs)

# set up model
if two_features:
data_streams = ['rsam']
fm = ForecastModel(ti='2012-04-01', tf='2012-10-01', window=2., overlap=0.75,
look_forward=2., data_streams=data_streams, root='testPF')
fm.train(ti='2012-04-01', tf='2012-10-01', retrain=True,
n_jobs=n_jobs, classifier='DTPBF',
use_only_features=['rsam__maximum', 'rsam__fft_coefficient__coeff_12__attr_"abs"'])
else:
data_streams = ['rsam', 'mf', 'hf', 'dsar']
fm = ForecastModel(ti='2012-04-01', tf='2012-10-01', window=2., overlap=0.75,
look_forward=2., data_streams=data_streams, root='test')
# train the model
drop_features = ['linear_trend_timewise', 'agg_linear_trend']
fm.train(ti='2012-04-01', tf='2012-10-01', drop_features=drop_features, retrain=True,
n_jobs=n_jobs)

# plot a forecast for a future eruption
te = fm.data.tes[1]
Expand Down Expand Up @@ -115,7 +133,7 @@ def forecast_now():
save='current_forecast.png', nztimezone=True, n_jobs=n_jobs)

if __name__ == "__main__":
#forecast_dec2019()
forecast_test()
#forecast_dec2019(two_features=True)
forecast_test(two_features=True)
#forecast_now()

13 changes: 12 additions & 1 deletion whakaari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import PolynomialFeatures

datas = ['rsam','mf','hf','dsar']
all_classifiers = ["SVM","KNN",'DT','RF','NN','NB','LR']
all_classifiers = ["SVM","KNN",'DT', 'DTPBF', 'RF','NN','NB','LR']
_MONTH = timedelta(days=365.25/12)
_DAY = timedelta(days=1.)

Expand Down Expand Up @@ -969,6 +971,7 @@ def train(self, ti=None, tf=None, Nfts=20, Ncl=100, retrain=False, classifier="D
SVM - Support Vector Machine.
KNN - k-Nearest Neighbors
DT - Decision Tree
DTPBF - Decision Tree with Polynomial Basis Functions
RF - Random Forest
NN - Neural Network
NB - Naive Bayes
Expand Down Expand Up @@ -1590,6 +1593,7 @@ def get_classifier(classifier):
SVM - Support Vector Machine.
KNN - k-Nearest Neighbors
DT - Decision Tree
DTPBF - Decision Tree with Polynomial Basis Functions
RF - Random Forest
NN - Neural Network
NB - Naive Bayes
Expand All @@ -1607,6 +1611,13 @@ def get_classifier(classifier):
model = DecisionTreeClassifier(class_weight='balanced')
grid = {'max_depth': [3,5,7], 'criterion': ['gini','entropy'],
'max_features': ['auto','sqrt','log2',None]}
elif classifier == 'DTPBF':
model = Pipeline([('polynomial', PolynomialFeatures()),
('clf', DecisionTreeClassifier())
])
grid = {'clf__max_depth': [3, 5, 7], 'clf__criterion': ['gini', 'entropy'],
'clf__max_features': ['auto', 'sqrt', 'log2', None],
'polynomial__degree': [1, 2, 3, 4, 5]}
elif classifier == "RF": # random forest
model = RandomForestClassifier(class_weight='balanced')
grid = {'n_estimators': [10,30,100], 'max_depth': [3,5,7], 'criterion': ['gini','entropy'],
Expand Down