From 20f2a415d0335fcf65ab14df33d2e180e51f0864 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Sat, 13 Jun 2015 00:02:46 -0700 Subject: [PATCH 01/13] started working on StreamingOlympicModel Started working on proper batch training --- .../egads/batch/TrainForecastingModel.java | 3 +- .../adm/AnomalyDetectionAbstractModel.java | 4 +- .../yahoo/egads/models/tsmm/OlympicModel.java | 20 --- .../models/tsmm/StreamingOlympicModel.java | 158 ++++++++++++++++++ .../models/tsmm/TimeSeriesAbstractModel.java | 2 +- 5 files changed, 164 insertions(+), 23 deletions(-) create mode 100644 src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java diff --git a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java index 72bfc7f..8aea763 100644 --- a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java @@ -8,6 +8,7 @@ import com.yahoo.egads.data.ModelStore; import com.yahoo.egads.data.TimeSeries; import com.yahoo.egads.models.tsmm.OlympicModel; +import com.yahoo.egads.models.tsmm.StreamingOlympicModel; public class TrainForecastingModel { public static void main(String[] args) { @@ -42,7 +43,7 @@ public static void main(String[] args) { sc.close(); for (String series : inputs.keySet()) { TimeSeries.DataSequence seq = inputs.get(series); - OlympicModel o = new OlympicModel(osProps); + StreamingOlympicModel o = new StreamingOlympicModel(osProps); o.train(seq); System.out.println (series + ":"); for (TimeSeries.Entry e : seq) { diff --git a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java index 159cc87..2460cb4 100644 --- a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java @@ -12,15 +12,17 @@ import org.json.JSONStringer; import com.yahoo.egads.data.JsonEncoder; +import com.yahoo.egads.models.tsmm.TimeSeriesModel; public abstract class AnomalyDetectionAbstractModel implements AnomalyDetectionModel { - protected org.apache.logging.log4j.Logger logger; protected float sDAutoSensitivity = 3; protected float amntAutoSensitivity = (float) 0.05; protected String outputDest = ""; protected String modelName; + protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(AnomalyDetectionModel.class.getName()); + public String getModelName() { return modelName; } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/OlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/OlympicModel.java index f6097fd..078a941 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/OlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/OlympicModel.java @@ -23,26 +23,6 @@ public class OlympicModel extends TimeSeriesAbstractModel { private static final long serialVersionUID = 1L; - public int getNumWeeks() { - return numWeeks; - } - - public int getNumToDrop() { - return numToDrop; - } - - public int[] getTimeShifts() { - return timeShifts; - } - - public int[] getBaseWindows() { - return baseWindows; - } - - public ArrayList getModel() { - return model; - } - // Number of weeks to look back when computing the // estimate. protected int numWeeks; diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java new file mode 100644 index 0000000..aeee40f --- /dev/null +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -0,0 +1,158 @@ +/* + * Copyright 2015, Yahoo Inc. + * Copyrights licensed under the GPL License. + * See the accompanying LICENSE file for terms. + */ + +// Olympic scoring model considers the average of the last k weeks +// (dropping the b highest and lowest values) as the current prediction. + +package com.yahoo.egads.models.tsmm; + +import com.yahoo.egads.data.*; +import com.yahoo.egads.data.TimeSeries.Entry; + +import java.util.HashMap; +import java.util.Properties; +import java.util.ArrayList; +import java.util.Collections; + +import com.yahoo.egads.utilities.FileUtils; + +public class StreamingOlympicModel extends TimeSeriesAbstractModel { + // methods //////////////////////////////////////////////// + + private static final long serialVersionUID = 1L; + + private HashMap model; + private int period; + private double smoothingFactor; + + public StreamingOlympicModel(Properties config) { + super(config); + smoothingFactor = 0.5; + period = 86400 * 7; + model = new HashMap(); + } + + public void reset() { + model = new HashMap(); + } + private long timeToModelTime (long time) { + return time % period; + } + + private void update (TimeSeries.Entry entry) { + long modelTime = timeToModelTime(entry.time); + if (model.containsKey(timeToModelTime(modelTime))) { + model.put(modelTime, model.get(modelTime) * (1 - smoothingFactor) + entry.value * smoothingFactor); + } else { + model.put(modelTime, (double)entry.value); + } + } + + private double forecast (TimeSeries.Entry entry) { + long modelTime = timeToModelTime(entry.time); + if (model.containsKey(timeToModelTime(modelTime))) { + return model.get(modelTime); + } else { + return entry.value; + } + } + + private void runSeries (TimeSeries.DataSequence data) { + // Reset various helper summations + double sumErr = 0.0; + double sumAbsErr = 0.0; + double sumAbsPercentErr = 0.0; + double sumErrSquared = 0.0; + int processedPoints = 0; + for (TimeSeries.Entry entry : data) { + double error = entry.value - forecast(entry); + update(entry); + sumErr += error; + sumAbsErr += Math.abs(error); + sumAbsPercentErr += Math.abs(error / entry.value); + sumErrSquared += error * error; + processedPoints++; + } + this.bias = sumErr / processedPoints; + this.mad = sumAbsErr / processedPoints; + this.mape = sumAbsPercentErr / processedPoints; + this.mse = sumErrSquared / processedPoints; + this.sae = sumAbsErr; + errorsInit = true; + } + + public void train(TimeSeries.DataSequence data) { + reset(); + runSeries(data); + + logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0); + } + + public void update(TimeSeries.DataSequence data) { + + } + + public String getModelName() { + return "OlympicModel"; + } + + private Float sum(ArrayList list) { + float sum = 0; + for (float i : list) { + sum = sum + i; + } + return sum; + } + + private float computeExpected(int i, int pl) { + ArrayList vals = new ArrayList(); + float precision = (float) 0.000001; + + int j = 1; + + if ((i - pl * j) < 0) { + return Float.POSITIVE_INFINITY; + } + while (j <= this.numWeeks && (i - pl * j) >= 0) { + float lastWeeksVal = data.get(i - pl * j).value; + // If dynamic parameters are turned on, + // then we check if our error improved from last time, + // if not, then we stop and use the old result. + if (dynamicParameters == 1 && vals.size() > 0) { + float withNewVal = (sum(vals) + lastWeeksVal) / (vals.size() + 1); + float withoutNewVal = (sum(vals)) / (vals.size()); + if ((Math.abs(withNewVal - data.get(i).value) - Math.abs(withoutNewVal - data.get(i).value)) > precision) { + break; + } + } + vals.add(lastWeeksVal); + j++; + } + + Collections.sort(vals); + j = 0; + + if (vals.size() > (2 * this.numToDrop)) { + while (j < this.numToDrop) { + vals.remove(vals.size() - 1); + vals.remove(0); + j++; + } + } + + float baseVal = sum(vals) / vals.size(); + return baseVal; + } + + public void predict(TimeSeries.DataSequence sequence) throws Exception { + int n = data.size(); + for (int i = 0; i < n; i++) { + sequence.set(i, (new Entry(data.get(i).time, model.get(i)))); + logger.info(data.get(i).time + "," + data.get(i).value + "," + model.get(i)); + } + } + +} diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java index 35c23b5..515d6a3 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java @@ -29,7 +29,7 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { protected double sae; protected String modelName; - static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(TimeSeriesModel.class.getName()); + protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(TimeSeriesModel.class.getName()); protected boolean errorsInit = false; protected int dynamicParameters = 0; From 6bbd9b0da3d0a85e7c69a0a5021431ad2d92768d Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Tue, 16 Jun 2015 15:53:10 -0700 Subject: [PATCH 02/13] Continued testing streaming olympic model and batch training/serialization --- .../egads/batch/TrainForecastingModel.java | 6 ++- .../models/tsmm/StreamingOlympicModel.java | 45 ++----------------- 2 files changed, 7 insertions(+), 44 deletions(-) diff --git a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java index 8aea763..5a60cd6 100644 --- a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java @@ -1,5 +1,7 @@ package com.yahoo.egads.batch; +import java.io.File; +import java.io.FileNotFoundException; import java.util.Properties; import java.util.Scanner; import java.util.HashMap; @@ -11,9 +13,9 @@ import com.yahoo.egads.models.tsmm.StreamingOlympicModel; public class TrainForecastingModel { - public static void main(String[] args) { + public static void main(String[] args) throws FileNotFoundException { HashMap inputs = new HashMap(); - Scanner sc = new Scanner(System.in); + Scanner sc = new Scanner(new File (args[0])); ModelStore m = new FileModelStore ("models"); Properties osProps = new Properties(); osProps.setProperty("TIME_SHIFTS", "0,1"); diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index aeee40f..bff2f75 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -38,6 +38,7 @@ public StreamingOlympicModel(Properties config) { public void reset() { model = new HashMap(); } + private long timeToModelTime (long time) { return time % period; } @@ -108,51 +109,11 @@ private Float sum(ArrayList list) { } private float computeExpected(int i, int pl) { - ArrayList vals = new ArrayList(); - float precision = (float) 0.000001; - - int j = 1; - - if ((i - pl * j) < 0) { - return Float.POSITIVE_INFINITY; - } - while (j <= this.numWeeks && (i - pl * j) >= 0) { - float lastWeeksVal = data.get(i - pl * j).value; - // If dynamic parameters are turned on, - // then we check if our error improved from last time, - // if not, then we stop and use the old result. - if (dynamicParameters == 1 && vals.size() > 0) { - float withNewVal = (sum(vals) + lastWeeksVal) / (vals.size() + 1); - float withoutNewVal = (sum(vals)) / (vals.size()); - if ((Math.abs(withNewVal - data.get(i).value) - Math.abs(withoutNewVal - data.get(i).value)) > precision) { - break; - } - } - vals.add(lastWeeksVal); - j++; - } - - Collections.sort(vals); - j = 0; - - if (vals.size() > (2 * this.numToDrop)) { - while (j < this.numToDrop) { - vals.remove(vals.size() - 1); - vals.remove(0); - j++; - } - } - - float baseVal = sum(vals) / vals.size(); - return baseVal; + return (float)0.0; } public void predict(TimeSeries.DataSequence sequence) throws Exception { - int n = data.size(); - for (int i = 0; i < n; i++) { - sequence.set(i, (new Entry(data.get(i).time, model.get(i)))); - logger.info(data.get(i).time + "," + data.get(i).value + "," + model.get(i)); - } + return; } } From e46b7aa1225b087113c0cd96b3eb8d3c5abd21ef Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Tue, 16 Jun 2015 22:41:58 -0700 Subject: [PATCH 03/13] Better training algorithm --- .../egads/batch/TrainForecastingModel.java | 8 +-- .../models/tsmm/StreamingOlympicModel.java | 61 ++++++++++++++++--- .../models/tsmm/TimeSeriesAbstractModel.java | 40 +++++++++--- 3 files changed, 88 insertions(+), 21 deletions(-) diff --git a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java index 5a60cd6..dfbb4ef 100644 --- a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java @@ -47,10 +47,10 @@ public static void main(String[] args) throws FileNotFoundException { TimeSeries.DataSequence seq = inputs.get(series); StreamingOlympicModel o = new StreamingOlympicModel(osProps); o.train(seq); - System.out.println (series + ":"); - for (TimeSeries.Entry e : seq) { - System.out.println(e.time + ": " + e.value); - } +// System.out.println (series + ":"); +// for (TimeSeries.Entry e : seq) { +// System.out.println(e.time + ": " + e.value); +// } m.storeModel(series, o); } } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index bff2f75..0a0a074 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -25,27 +25,39 @@ public class StreamingOlympicModel extends TimeSeriesAbstractModel { private static final long serialVersionUID = 1L; private HashMap model; - private int period; - private double smoothingFactor; + protected int period; + protected double smoothingFactor; public StreamingOlympicModel(Properties config) { super(config); - smoothingFactor = 0.5; + smoothingFactor = 0.4; period = 86400 * 7; model = new HashMap(); } + public StreamingOlympicModel(Properties config, double smoothingFactor, int period) { + super(config); + this.smoothingFactor = smoothingFactor; + this.period = period; + this.model = new HashMap(); + } public void reset() { model = new HashMap(); } private long timeToModelTime (long time) { + if (period == 86400 * 7) { + return weeklyOffset(time); + } + if (period == 86400) { + return dailyOffset(time); + } return time % period; } private void update (TimeSeries.Entry entry) { long modelTime = timeToModelTime(entry.time); - if (model.containsKey(timeToModelTime(modelTime))) { + if (model.containsKey(modelTime)) { model.put(modelTime, model.get(modelTime) * (1 - smoothingFactor) + entry.value * smoothingFactor); } else { model.put(modelTime, (double)entry.value); @@ -54,7 +66,7 @@ private void update (TimeSeries.Entry entry) { private double forecast (TimeSeries.Entry entry) { long modelTime = timeToModelTime(entry.time); - if (model.containsKey(timeToModelTime(modelTime))) { + if (model.containsKey(modelTime)) { return model.get(modelTime); } else { return entry.value; @@ -73,7 +85,7 @@ private void runSeries (TimeSeries.DataSequence data) { update(entry); sumErr += error; sumAbsErr += Math.abs(error); - sumAbsPercentErr += Math.abs(error / entry.value); + sumAbsPercentErr += 100 * Math.abs(error / entry.value); sumErrSquared += error * error; processedPoints++; } @@ -86,13 +98,42 @@ private void runSeries (TimeSeries.DataSequence data) { } public void train(TimeSeries.DataSequence data) { + StreamingOlympicModel winner = null; + double sf = 0.0; + for (sf = 0.0; sf <= 1; sf += 0.1) { + StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); + m.runSeries(data); + logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); + if (betterThan(m, winner)) { + winner = m; + } + } + double min = winner.smoothingFactor - 0.09; + if (min < 0) min = 0; + double max = winner.smoothingFactor + 0.09; + if (max >= 1) max = .99; + for (sf = min; sf < max; sf += 0.01) { + StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); + m.runSeries(data); + logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); + if (betterThan(m, winner)) { + winner = m; + } + } + this.smoothingFactor = winner.smoothingFactor; reset(); - runSeries(data); - - logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0); + logger.debug ("Winner: Smoothing Factor = " + String.format("%.2f", sf)); } - public void update(TimeSeries.DataSequence data) { + public double getSmoothingFactor() { + return smoothingFactor; + } + + public void setSmoothingFactor(double smoothingFactor) { + this.smoothingFactor = smoothingFactor; + } + + public void update(TimeSeries.DataSequence data) { } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java index 515d6a3..ed8d5ae 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java @@ -6,6 +6,8 @@ package com.yahoo.egads.models.tsmm; +import java.util.Calendar; +import java.util.Date; import java.util.Properties; import org.json.JSONObject; @@ -28,12 +30,32 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { protected double mse; protected double sae; protected String modelName; + protected Properties config; protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(TimeSeriesModel.class.getName()); protected boolean errorsInit = false; protected int dynamicParameters = 0; + public TimeSeriesAbstractModel(Properties config) { + this.config = config; + if (config.getProperty("DYNAMIC_PARAMETERS") != null) { + this.dynamicParameters = new Integer(config.getProperty("DYNAMIC_PARAMETERS")); + } + } + + protected long weeklyOffset (long time) { + Date d = new Date(time * 1000); + long ret = d.getDay() * 86400 + d.getHours() * 3600 + d.getMinutes() * 60 + d.getSeconds(); + return ret; + } + + protected long dailyOffset (long time) { + Date d = new Date(time * 1000); + long ret = d.getHours() * 3600 + d.getMinutes() * 60 + d.getSeconds(); + return ret; + } + public String getModelName() { return modelName; } @@ -53,13 +75,6 @@ public void fromJson(JSONObject json_obj) throws Exception { } // Acts as a factory method. - public TimeSeriesAbstractModel(Properties config) { - if (config.getProperty("DYNAMIC_PARAMETERS") != null) { - this.dynamicParameters = new Integer(config.getProperty("DYNAMIC_PARAMETERS")); - } - - } - protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAbstractModel model2) { // Special case. Any model is better than no model! if (model2 == null) { @@ -97,6 +112,8 @@ protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAb } else if (model1.getSAE() - model2.getSAE() >= tolerance) { score--; } + + logger.debug ("Comparison score: " + score); if (score == 0) { // At this point, we're still unsure which one is best @@ -105,11 +122,20 @@ protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAb model1.getBias() - model2.getBias() + model1.getMAD() - model2.getMAD() + model1.getMAPE() - model2.getMAPE() + model1.getMSE() - model2.getMSE() + model1.getSAE() - model2.getSAE(); + logger.debug ("Diff: " + diff); return (diff < 0); } return (score > 0); } + + public String errorSummaryString () { + return ("B:" + String.format("%.2f", getBias()) + + "\tMAD:" + String.format("%.2f", getMAD()) + + "\tMAPE:" + String.format("%.2f", getMAPE()) + + "\tMSE:" + String.format("%.2f", getMSE()) + + "\tSAE:" + String.format("%.2f", getSAE())); + } /* * Forecasting model already has the errors defined. From 8f3c0ba52e91eae80aecbb9b6d8d49ca924eab15 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Tue, 16 Jun 2015 23:40:23 -0700 Subject: [PATCH 04/13] Fixed a bug in output --- .../com/yahoo/egads/models/tsmm/StreamingOlympicModel.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index 0a0a074..742e4cf 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -112,7 +112,7 @@ public void train(TimeSeries.DataSequence data) { if (min < 0) min = 0; double max = winner.smoothingFactor + 0.09; if (max >= 1) max = .99; - for (sf = min; sf < max; sf += 0.01) { + for (sf = min; sf <= max; sf += 0.01) { StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); m.runSeries(data); logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); @@ -122,7 +122,7 @@ public void train(TimeSeries.DataSequence data) { } this.smoothingFactor = winner.smoothingFactor; reset(); - logger.debug ("Winner: Smoothing Factor = " + String.format("%.2f", sf)); + logger.debug ("Winner: Smoothing Factor = " + String.format("%.2f", this.smoothingFactor)); } public double getSmoothingFactor() { From dca4c7ae5e7eb4f072eba6879e1c8d06c353fce3 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Wed, 17 Jun 2015 16:11:15 -0700 Subject: [PATCH 05/13] First cut at streaming prediction - only enabled for streaming olympic scoring at this point --- src/main/java/com/yahoo/egads/data/Entry.java | 23 -------------- .../com/yahoo/egads/data/FileModelStore.java | 12 +++++-- .../java/com/yahoo/egads/data/ModelStore.java | 2 +- .../models/tsmm/StreamingOlympicModel.java | 7 +++-- .../models/tsmm/TimeSeriesAbstractModel.java | 8 ++++- .../egads/models/tsmm/TimeSeriesModel.java | 2 ++ .../yahoo/egads/streaming/ProcessStream.java | 31 ------------------- 7 files changed, 24 insertions(+), 61 deletions(-) delete mode 100644 src/main/java/com/yahoo/egads/data/Entry.java delete mode 100644 src/main/java/com/yahoo/egads/streaming/ProcessStream.java diff --git a/src/main/java/com/yahoo/egads/data/Entry.java b/src/main/java/com/yahoo/egads/data/Entry.java deleted file mode 100644 index a7d1b54..0000000 --- a/src/main/java/com/yahoo/egads/data/Entry.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2015, Yahoo Inc. - * Copyrights licensed under the GPL License. - * See the accompanying LICENSE file for terms. - */ - -// A simple egads entry class. - -package com.yahoo.egads.data; - -public class Entry { - public T ts; - public F val; - - public Entry(T ts, F val) { - this.ts = ts; - this.val = val; - } - - public String toString() { - return this.ts + "," + this.val; - } -} diff --git a/src/main/java/com/yahoo/egads/data/FileModelStore.java b/src/main/java/com/yahoo/egads/data/FileModelStore.java index c5a72f9..1e8d995 100644 --- a/src/main/java/com/yahoo/egads/data/FileModelStore.java +++ b/src/main/java/com/yahoo/egads/data/FileModelStore.java @@ -5,13 +5,16 @@ import java.io.FileOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.HashMap; public class FileModelStore implements ModelStore { + HashMap cache; String path; public FileModelStore (String path) { File dir = new File (path); dir.mkdirs(); this.path = path; + cache = new HashMap(); } @Override @@ -28,18 +31,23 @@ public void storeModel(String tag, Model m) { } @Override - public Model retrieveModel(String tag) { + public Model getModel(String tag) { String filename = tag.replaceAll("[^\\w_-]", "_"); + if (cache.containsKey(filename)) { + return cache.get(filename); + } String fqn = path + "/" + filename; Model m = null; try { ObjectInputStream o = new ObjectInputStream(new FileInputStream(fqn)); m = (Model) o.readObject(); o.close(); + cache.put(filename, m); + return m; } catch (Exception e) { e.printStackTrace(); } - return m; + return null; } } diff --git a/src/main/java/com/yahoo/egads/data/ModelStore.java b/src/main/java/com/yahoo/egads/data/ModelStore.java index a8316e2..4165987 100644 --- a/src/main/java/com/yahoo/egads/data/ModelStore.java +++ b/src/main/java/com/yahoo/egads/data/ModelStore.java @@ -2,5 +2,5 @@ public interface ModelStore { public void storeModel(String tag, Model m); - public Model retrieveModel (String tag); + public Model getModel (String tag); } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index 742e4cf..b4703c7 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -64,7 +64,7 @@ private void update (TimeSeries.Entry entry) { } } - private double forecast (TimeSeries.Entry entry) { + public double predict (TimeSeries.Entry entry) { long modelTime = timeToModelTime(entry.time); if (model.containsKey(modelTime)) { return model.get(modelTime); @@ -81,7 +81,7 @@ private void runSeries (TimeSeries.DataSequence data) { double sumErrSquared = 0.0; int processedPoints = 0; for (TimeSeries.Entry entry : data) { - double error = entry.value - forecast(entry); + double error = entry.value - predict(entry); update(entry); sumErr += error; sumAbsErr += Math.abs(error); @@ -115,6 +115,7 @@ public void train(TimeSeries.DataSequence data) { for (sf = min; sf <= max; sf += 0.01) { StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); m.runSeries(data); + m.runSeries(data); logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); if (betterThan(m, winner)) { winner = m; @@ -122,6 +123,7 @@ public void train(TimeSeries.DataSequence data) { } this.smoothingFactor = winner.smoothingFactor; reset(); + runSeries(data); logger.debug ("Winner: Smoothing Factor = " + String.format("%.2f", this.smoothingFactor)); } @@ -156,5 +158,4 @@ private float computeExpected(int i, int pl) { public void predict(TimeSeries.DataSequence sequence) throws Exception { return; } - } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java index ed8d5ae..0dc2e05 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java @@ -23,7 +23,8 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { - // Accuracy stats for this model. + private static final long serialVersionUID = 1L; + // Accuracy stats for this model. protected double bias; protected double mad; protected double mape; @@ -73,6 +74,11 @@ public void toJson(JSONStringer json_out) throws Exception { public void fromJson(JSONObject json_obj) throws Exception { JsonEncoder.fromJson(this, json_obj); } + + public double predict (TimeSeries.Entry entry) { + return 0.0; + } + // Acts as a factory method. protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAbstractModel model2) { diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java index 562b485..1ef0c13 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java @@ -20,4 +20,6 @@ public interface TimeSeriesModel extends Model { // predicts the values of the time series specified by the 'time' fields of the sequence and sets the 'value' fields of the sequence public abstract void predict(TimeSeries.DataSequence sequence) throws Exception; + + public abstract double predict (TimeSeries.Entry entry); } diff --git a/src/main/java/com/yahoo/egads/streaming/ProcessStream.java b/src/main/java/com/yahoo/egads/streaming/ProcessStream.java deleted file mode 100644 index fa6bc51..0000000 --- a/src/main/java/com/yahoo/egads/streaming/ProcessStream.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.yahoo.egads.streaming; - -import java.util.Scanner; - -public class ProcessStream { - public static void main(String[] args) { - Scanner sc = new Scanner(System.in); - while (sc.hasNextLine()) { - String line = sc.nextLine(); - String[] fields = line.split(","); - String series; - int timestamp; - double measured; - double forecast = Double.NaN; - try { - series = fields[0]; - timestamp = Integer.parseInt(fields[1]); - measured = Double.parseDouble(fields[2]); - if (fields.length > 3) { - forecast = Double.parseDouble(fields[3]); - } - } catch (Exception e) { - System.err.println("Invalid input line " + line); - continue; - } - System.out.println(series + "/" + timestamp + ": " + measured + "/" + forecast); - } - sc.close(); - } - -} From f6dcaf4ebd603ace1634a8cf1becfaca40d66147 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Wed, 17 Jun 2015 16:35:45 -0700 Subject: [PATCH 06/13] Finished streaming capability for streaming olympic scoring model SOS updates itself when streaming in data points Bootstrap new model if new data comes in --- .../java/com/yahoo/egads/data/FileModelStore.java | 13 ++++++++++++- src/main/java/com/yahoo/egads/data/Model.java | 3 +++ src/main/java/com/yahoo/egads/data/ModelStore.java | 1 + .../models/adm/AnomalyDetectionAbstractModel.java | 9 +++++++++ .../egads/models/tsmm/StreamingOlympicModel.java | 3 ++- .../egads/models/tsmm/TimeSeriesAbstractModel.java | 12 ++++++++++++ .../yahoo/egads/models/tsmm/TimeSeriesModel.java | 3 +++ 7 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/yahoo/egads/data/FileModelStore.java b/src/main/java/com/yahoo/egads/data/FileModelStore.java index 1e8d995..2ead4ba 100644 --- a/src/main/java/com/yahoo/egads/data/FileModelStore.java +++ b/src/main/java/com/yahoo/egads/data/FileModelStore.java @@ -10,6 +10,8 @@ public class FileModelStore implements ModelStore { HashMap cache; String path; + protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(FileModelStore.class.getName()); + public FileModelStore (String path) { File dir = new File (path); dir.mkdirs(); @@ -22,6 +24,7 @@ public void storeModel(String tag, Model m) { String filename = tag.replaceAll("[^\\w_-]", "_"); String fqn = path + "/" + filename; try { + m.clearModified(); ObjectOutputStream o = new ObjectOutputStream(new FileOutputStream (fqn)); o.writeObject(m); o.close(); @@ -45,9 +48,17 @@ public Model getModel(String tag) { cache.put(filename, m); return m; } catch (Exception e) { - e.printStackTrace(); + logger.debug("Model not found: " + tag); } return null; } + public void writeCachedModels() { + for (String key : cache.keySet()) { + Model model = cache.get(key); + if (model.isModified()) { + storeModel(key, model); + } + } + } } diff --git a/src/main/java/com/yahoo/egads/data/Model.java b/src/main/java/com/yahoo/egads/data/Model.java index f6b6f22..8916e6c 100644 --- a/src/main/java/com/yahoo/egads/data/Model.java +++ b/src/main/java/com/yahoo/egads/data/Model.java @@ -20,4 +20,7 @@ public interface Model extends JsonAble, Serializable { // Gets the model name and type public String getModelName(); public String getModelType(); + + public abstract boolean isModified (); + public abstract void clearModified(); } diff --git a/src/main/java/com/yahoo/egads/data/ModelStore.java b/src/main/java/com/yahoo/egads/data/ModelStore.java index 4165987..d53c28e 100644 --- a/src/main/java/com/yahoo/egads/data/ModelStore.java +++ b/src/main/java/com/yahoo/egads/data/ModelStore.java @@ -3,4 +3,5 @@ public interface ModelStore { public void storeModel(String tag, Model m); public Model getModel (String tag); + public void writeCachedModels(); } diff --git a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java index 6950735..b792885 100644 --- a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java @@ -20,6 +20,7 @@ public abstract class AnomalyDetectionAbstractModel implements AnomalyDetectionM protected float amntAutoSensitivity = (float) 0.05; protected String outputDest = ""; protected String modelName; + protected boolean modified; protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(AnomalyDetectionModel.class.getName()); @@ -67,4 +68,12 @@ public AnomalyDetectionAbstractModel(Properties config) { } this.outputDest = config.getProperty("OUTPUT"); } + + public boolean isModified () { + return modified; + } + public void clearModified() { + modified = false; + } + } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index b4703c7..f71b0bf 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -55,13 +55,14 @@ private long timeToModelTime (long time) { return time % period; } - private void update (TimeSeries.Entry entry) { + public void update (TimeSeries.Entry entry) { long modelTime = timeToModelTime(entry.time); if (model.containsKey(modelTime)) { model.put(modelTime, model.get(modelTime) * (1 - smoothingFactor) + entry.value * smoothingFactor); } else { model.put(modelTime, (double)entry.value); } + modified = true; } public double predict (TimeSeries.Entry entry) { diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java index 0dc2e05..e40a660 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java @@ -32,6 +32,7 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { protected double sae; protected String modelName; protected Properties config; + protected boolean modified; protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(TimeSeriesModel.class.getName()); @@ -40,6 +41,7 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { public TimeSeriesAbstractModel(Properties config) { this.config = config; + this.modified = false; if (config.getProperty("DYNAMIC_PARAMETERS") != null) { this.dynamicParameters = new Integer(config.getProperty("DYNAMIC_PARAMETERS")); } @@ -78,6 +80,16 @@ public void fromJson(JSONObject json_obj) throws Exception { public double predict (TimeSeries.Entry entry) { return 0.0; } + public void update (TimeSeries.Entry entry) { + return; + } + public boolean isModified () { + return modified; + } + public void clearModified() { + modified = false; + } + // Acts as a factory method. diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java index 1ef0c13..35182cb 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java @@ -22,4 +22,7 @@ public interface TimeSeriesModel extends Model { public abstract void predict(TimeSeries.DataSequence sequence) throws Exception; public abstract double predict (TimeSeries.Entry entry); + public abstract void update (TimeSeries.Entry entry); + public abstract boolean isModified (); + public abstract void clearModified(); } From 311169a9ee5a697068ec0a53298030e1e96f19e8 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Wed, 17 Jun 2015 16:45:19 -0700 Subject: [PATCH 07/13] Fixed a small bug in batch training Removed abstract keyword from interfaces --- src/main/java/com/yahoo/egads/data/Model.java | 4 ++-- .../egads/models/tsmm/StreamingOlympicModel.java | 1 - .../com/yahoo/egads/models/tsmm/TimeSeriesModel.java | 12 +++++------- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/yahoo/egads/data/Model.java b/src/main/java/com/yahoo/egads/data/Model.java index 8916e6c..f7bc11f 100644 --- a/src/main/java/com/yahoo/egads/data/Model.java +++ b/src/main/java/com/yahoo/egads/data/Model.java @@ -21,6 +21,6 @@ public interface Model extends JsonAble, Serializable { public String getModelName(); public String getModelType(); - public abstract boolean isModified (); - public abstract void clearModified(); + public boolean isModified (); + public void clearModified(); } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index f71b0bf..1be7b7d 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -116,7 +116,6 @@ public void train(TimeSeries.DataSequence data) { for (sf = min; sf <= max; sf += 0.01) { StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); m.runSeries(data); - m.runSeries(data); logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); if (betterThan(m, winner)) { winner = m; diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java index 35182cb..c0c8488 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java @@ -14,15 +14,13 @@ public interface TimeSeriesModel extends Model { // methods //////////////////////////////////////////////// - public abstract void train(TimeSeries.DataSequence data) throws Exception; + public void train(TimeSeries.DataSequence data) throws Exception; - public abstract void update(TimeSeries.DataSequence data) throws Exception; + public void update(TimeSeries.DataSequence data) throws Exception; // predicts the values of the time series specified by the 'time' fields of the sequence and sets the 'value' fields of the sequence - public abstract void predict(TimeSeries.DataSequence sequence) throws Exception; + public void predict(TimeSeries.DataSequence sequence) throws Exception; - public abstract double predict (TimeSeries.Entry entry); - public abstract void update (TimeSeries.Entry entry); - public abstract boolean isModified (); - public abstract void clearModified(); + public double predict (TimeSeries.Entry entry); + public void update (TimeSeries.Entry entry); } From 994a971c46ff7031cefa5bb6234eceef654623df Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Wed, 17 Jun 2015 21:00:40 -0700 Subject: [PATCH 08/13] Moving files around --- .../java/com/yahoo/egads/StreamForecast.java | 48 +++++++++++++++++++ .../{batch => }/TrainForecastingModel.java | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/yahoo/egads/StreamForecast.java rename src/main/java/com/yahoo/egads/{batch => }/TrainForecastingModel.java (98%) diff --git a/src/main/java/com/yahoo/egads/StreamForecast.java b/src/main/java/com/yahoo/egads/StreamForecast.java new file mode 100644 index 0000000..44ee9dc --- /dev/null +++ b/src/main/java/com/yahoo/egads/StreamForecast.java @@ -0,0 +1,48 @@ +package com.yahoo.egads; + +import java.util.Properties; +import java.util.Scanner; + +import com.yahoo.egads.data.FileModelStore; +import com.yahoo.egads.data.Model; +import com.yahoo.egads.data.ModelStore; +import com.yahoo.egads.data.TimeSeries; +import com.yahoo.egads.data.TimeSeries.Entry; +import com.yahoo.egads.models.tsmm.StreamingOlympicModel; +import com.yahoo.egads.models.tsmm.TimeSeriesAbstractModel; + +public class StreamForecast { + public static void main(String[] args) { + Scanner sc = new Scanner(System.in); + ModelStore ms = new FileModelStore ("models"); + while (sc.hasNextLine()) { + String line = sc.nextLine(); + String[] fields = line.split(","); + String series; + int timestamp; + double measured; + double forecast = Double.NaN; + try { + series = fields[0]; + timestamp = Integer.parseInt(fields[1]); + measured = Double.parseDouble(fields[2]); + TimeSeriesAbstractModel model = (TimeSeriesAbstractModel) ms.getModel(series); + if (model == null) { + model = new StreamingOlympicModel(new Properties()); + ms.storeModel(series, model); + System.err.println ("No such model " + series); + } + TimeSeries.Entry e = new TimeSeries.Entry(timestamp, (float)measured); + forecast = model.predict(e); + model.update(e); + System.out.println(String.join(",", series, String.format("%d", timestamp), String.format("%f", measured), String.format("%f", forecast))); + } catch (Exception e) { + System.err.println("Invalid input line " + line); + continue; + } + } + ms.writeCachedModels(); + sc.close(); + } + +} diff --git a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/TrainForecastingModel.java similarity index 98% rename from src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java rename to src/main/java/com/yahoo/egads/TrainForecastingModel.java index dfbb4ef..7a0540f 100644 --- a/src/main/java/com/yahoo/egads/batch/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/TrainForecastingModel.java @@ -1,4 +1,4 @@ -package com.yahoo.egads.batch; +package com.yahoo.egads; import java.io.File; import java.io.FileNotFoundException; From 04b032e29def57a81d642cf2cb808bc4e2383f0a Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Wed, 17 Jun 2015 21:03:08 -0700 Subject: [PATCH 09/13] Read from stdin rather than a file --- src/main/java/com/yahoo/egads/TrainForecastingModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/yahoo/egads/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/TrainForecastingModel.java index 7a0540f..4cbbbcc 100644 --- a/src/main/java/com/yahoo/egads/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/TrainForecastingModel.java @@ -15,7 +15,7 @@ public class TrainForecastingModel { public static void main(String[] args) throws FileNotFoundException { HashMap inputs = new HashMap(); - Scanner sc = new Scanner(new File (args[0])); + Scanner sc = new Scanner(System.in); ModelStore m = new FileModelStore ("models"); Properties osProps = new Properties(); osProps.setProperty("TIME_SHIFTS", "0,1"); From 1f06ef12c838c85816c8787a3520e91f10192665 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Thu, 18 Jun 2015 13:48:09 -0700 Subject: [PATCH 10/13] Working on option processing --- pom.xml | 5 ++ .../yahoo/egads/TrainForecastingModel.java | 51 ++++++++++++++++--- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index fd2f3ba..5073e90 100644 --- a/pom.xml +++ b/pom.xml @@ -20,6 +20,11 @@ + + gnu.getopt + java-getopt + 1.0.13 + org.apache.logging.log4j log4j-api diff --git a/src/main/java/com/yahoo/egads/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/TrainForecastingModel.java index 4cbbbcc..3e7f2e8 100644 --- a/src/main/java/com/yahoo/egads/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/TrainForecastingModel.java @@ -1,27 +1,31 @@ package com.yahoo.egads; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.ResultSet; import java.util.Properties; import java.util.Scanner; import java.util.HashMap; import com.yahoo.egads.data.FileModelStore; +import com.yahoo.egads.data.Model; +import com.yahoo.egads.data.ModelFactory; import com.yahoo.egads.data.ModelStore; import com.yahoo.egads.data.TimeSeries; -import com.yahoo.egads.models.tsmm.OlympicModel; import com.yahoo.egads.models.tsmm.StreamingOlympicModel; +import gnu.getopt.*; + public class TrainForecastingModel { - public static void main(String[] args) throws FileNotFoundException { + public static void main(String[] args) throws IOException { + HashMap options = processOptions(args); HashMap inputs = new HashMap(); Scanner sc = new Scanner(System.in); - ModelStore m = new FileModelStore ("models"); + ModelStore ms = new FileModelStore ("models"); Properties osProps = new Properties(); - osProps.setProperty("TIME_SHIFTS", "0,1"); - osProps.setProperty("BASE_WINDOWS", "24,168"); - osProps.setProperty("NUM_WEEKS", "6"); - osProps.setProperty("NUM_TO_DROP", "1"); + osProps.load (new FileInputStream(options.get('p'))); while (sc.hasNextLine()) { String line = sc.nextLine(); String[] fields = line.split(","); @@ -43,16 +47,47 @@ public static void main(String[] args) throws FileNotFoundException { seq.add(new TimeSeries.Entry(timestamp, measured)); } sc.close(); + ModelFactory mf = new ModelFactory(osProps); for (String series : inputs.keySet()) { TimeSeries.DataSequence seq = inputs.get(series); + Model m = mf.getModel(options.get('m')); StreamingOlympicModel o = new StreamingOlympicModel(osProps); o.train(seq); // System.out.println (series + ":"); // for (TimeSeries.Entry e : seq) { // System.out.println(e.time + ": " + e.value); // } - m.storeModel(series, o); + ms.storeModel(series, o); + } + } + + public static void usage() { + System.out.println ("Usage: TrainForecastingModel [-m ] [-p ] [-h]"); + System.out.println (" Modeltypes:"); + System.out.println (" sos: Streaming Olympic Scoring"); + System.out.println (" Default preperties file is config.ini"); + System.exit(0); + } + + public static HashMap processOptions (String[] args) { + HashMap result = new HashMap(); + // defaults + result.put(new Integer('m'), "sos"); + result.put(new Integer('p'), "config.ini"); + + Getopt g = new Getopt("TrainForecastingModel", args, "m:p:h"); + int c; + while ((c = g.getopt()) != -1) { + switch (c) { + case 'm': + case 'p': + result.put(c, g.getOptarg()); + break; + case 'h': + usage(); + } } + return result; } } From b12379104ffffb0700c68ea981f12d041f2023ba Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Thu, 18 Jun 2015 16:21:59 -0700 Subject: [PATCH 11/13] Added a simple command line wrapper (so I don't have to remember how to invoke java) --- egads | 29 +++++++++++++++++ src/main/java/com/yahoo/egads/Egads.java | 7 ++-- .../java/com/yahoo/egads/StreamForecast.java | 2 -- .../yahoo/egads/TrainForecastingModel.java | 32 ++++++++----------- 4 files changed, 47 insertions(+), 23 deletions(-) create mode 100755 egads diff --git a/egads b/egads new file mode 100755 index 0000000..ab8b304 --- /dev/null +++ b/egads @@ -0,0 +1,29 @@ +#!/usr/bin/perl + +sub usage { + print "Usage: egads args\n"; + print " Commands:\n"; + print " StreamForecast\n"; + print " TrainForecastingModel\n"; + print " MakeEmptyModel\n"; + exit (0); +} + +sub launch { + my $cmd = "java -cp lib/OpenForecast-0.5.0.jar:target/egads-jar-with-dependencies.jar -Dlog4j.configurationFile=log4j.xml com.yahoo.egads." . join (' ', @ARGV); + print STDERR $cmd, "\n"; + system $cmd; +} + +my $command = $ARGV[0]; + +if ($command eq "MakeEmptyModel") { + launch(); +} elsif ($command eq "StreamForecast") { + launch(); +} elsif ($command eq "TrainForecastingModel") { + launch(); +} else { + usage(); +} + diff --git a/src/main/java/com/yahoo/egads/Egads.java b/src/main/java/com/yahoo/egads/Egads.java index 9ee8be7..07c4eeb 100644 --- a/src/main/java/com/yahoo/egads/Egads.java +++ b/src/main/java/com/yahoo/egads/Egads.java @@ -2,10 +2,13 @@ package com.yahoo.egads; -import java.util.Properties; import java.io.FileInputStream; import java.io.InputStream; -import com.yahoo.egads.utilities.*; +import java.util.Properties; + +import com.yahoo.egads.utilities.FileInputProcessor; +import com.yahoo.egads.utilities.InputProcessor; +import com.yahoo.egads.utilities.StdinProcessor; /* * Call stack. diff --git a/src/main/java/com/yahoo/egads/StreamForecast.java b/src/main/java/com/yahoo/egads/StreamForecast.java index 44ee9dc..b495d65 100644 --- a/src/main/java/com/yahoo/egads/StreamForecast.java +++ b/src/main/java/com/yahoo/egads/StreamForecast.java @@ -4,10 +4,8 @@ import java.util.Scanner; import com.yahoo.egads.data.FileModelStore; -import com.yahoo.egads.data.Model; import com.yahoo.egads.data.ModelStore; import com.yahoo.egads.data.TimeSeries; -import com.yahoo.egads.data.TimeSeries.Entry; import com.yahoo.egads.models.tsmm.StreamingOlympicModel; import com.yahoo.egads.models.tsmm.TimeSeriesAbstractModel; diff --git a/src/main/java/com/yahoo/egads/TrainForecastingModel.java b/src/main/java/com/yahoo/egads/TrainForecastingModel.java index 3e7f2e8..1d53d7d 100644 --- a/src/main/java/com/yahoo/egads/TrainForecastingModel.java +++ b/src/main/java/com/yahoo/egads/TrainForecastingModel.java @@ -1,31 +1,26 @@ package com.yahoo.egads; -import java.io.File; +import gnu.getopt.Getopt; + import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.sql.ResultSet; +import java.util.HashMap; import java.util.Properties; import java.util.Scanner; -import java.util.HashMap; import com.yahoo.egads.data.FileModelStore; -import com.yahoo.egads.data.Model; import com.yahoo.egads.data.ModelFactory; import com.yahoo.egads.data.ModelStore; import com.yahoo.egads.data.TimeSeries; -import com.yahoo.egads.models.tsmm.StreamingOlympicModel; - -import gnu.getopt.*; +import com.yahoo.egads.models.tsmm.TimeSeriesModel; public class TrainForecastingModel { - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws Exception { HashMap options = processOptions(args); HashMap inputs = new HashMap(); Scanner sc = new Scanner(System.in); ModelStore ms = new FileModelStore ("models"); Properties osProps = new Properties(); - osProps.load (new FileInputStream(options.get('p'))); + osProps.load (new FileInputStream(options.get(new Integer('p')))); while (sc.hasNextLine()) { String line = sc.nextLine(); String[] fields = line.split(","); @@ -50,14 +45,11 @@ public static void main(String[] args) throws IOException { ModelFactory mf = new ModelFactory(osProps); for (String series : inputs.keySet()) { TimeSeries.DataSequence seq = inputs.get(series); - Model m = mf.getModel(options.get('m')); - StreamingOlympicModel o = new StreamingOlympicModel(osProps); - o.train(seq); -// System.out.println (series + ":"); -// for (TimeSeries.Entry e : seq) { -// System.out.println(e.time + ": " + e.value); -// } - ms.storeModel(series, o); + TimeSeriesModel m = mf.getTSModel(options.get(new Integer('m'))); + if (m != null) { + m.train(seq); + ms.storeModel(series, m); + } } } @@ -80,6 +72,8 @@ public static HashMap processOptions (String[] args) { while ((c = g.getopt()) != -1) { switch (c) { case 'm': + result.put(c, g.getOptarg()); + break; case 'p': result.put(c, g.getOptarg()); break; From f8d17db3f12f3c79c10d29c75469ef1d263fc83e Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Thu, 18 Jun 2015 17:25:14 -0700 Subject: [PATCH 12/13] More work on streaming prediction models - allow to measure error even if the data is streamed over time rather than all at once --- .../java/com/yahoo/egads/StreamForecast.java | 49 ++++++++++++++++- .../com/yahoo/egads/data/FileModelStore.java | 4 ++ src/main/java/com/yahoo/egads/data/Model.java | 3 ++ .../java/com/yahoo/egads/data/ModelStore.java | 3 ++ .../adm/AnomalyDetectionAbstractModel.java | 7 +++ .../models/tsmm/StreamingOlympicModel.java | 32 +++++------ .../models/tsmm/TimeSeriesAbstractModel.java | 54 ++++++++++++++----- 7 files changed, 118 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/yahoo/egads/StreamForecast.java b/src/main/java/com/yahoo/egads/StreamForecast.java index b495d65..8ff007c 100644 --- a/src/main/java/com/yahoo/egads/StreamForecast.java +++ b/src/main/java/com/yahoo/egads/StreamForecast.java @@ -1,9 +1,13 @@ package com.yahoo.egads; +import gnu.getopt.Getopt; + +import java.util.HashMap; import java.util.Properties; import java.util.Scanner; import com.yahoo.egads.data.FileModelStore; +import com.yahoo.egads.data.Model; import com.yahoo.egads.data.ModelStore; import com.yahoo.egads.data.TimeSeries; import com.yahoo.egads.models.tsmm.StreamingOlympicModel; @@ -11,6 +15,7 @@ public class StreamForecast { public static void main(String[] args) { + HashMap options = processOptions(args); Scanner sc = new Scanner(System.in); ModelStore ms = new FileModelStore ("models"); while (sc.hasNextLine()) { @@ -33,14 +38,56 @@ public static void main(String[] args) { TimeSeries.Entry e = new TimeSeries.Entry(timestamp, (float)measured); forecast = model.predict(e); model.update(e); - System.out.println(String.join(",", series, String.format("%d", timestamp), String.format("%f", measured), String.format("%f", forecast))); + if (!options.containsKey(new Integer('q'))) { + System.out.println(String.join(",", series, String.format("%d", timestamp), String.format("%f", measured), String.format("%f", forecast))); + } } catch (Exception e) { System.err.println("Invalid input line " + line); continue; } } ms.writeCachedModels(); + if (options.containsKey(new Integer('t'))) { + for (Model m : ms.getCachedModels()) { + System.out.println(m.errorSummaryString()); + } + } sc.close(); } + public static void usage() { + System.out.println ("Usage: StreamForecast [-m ] [-p ] [-t] [-q] [-h]"); + System.out.println (" Modeltypes:"); + System.out.println (" sos: Streaming Olympic Scoring"); + System.out.println (" Default preperties file is config.ini"); + System.out.println (" -t: Run in model test mode. This outputs error stats at the end of the model run"); + System.exit(0); + } + + public static HashMap processOptions (String[] args) { + HashMap result = new HashMap(); + // defaults + result.put(new Integer('m'), "sos"); + result.put(new Integer('p'), "config.ini"); + + Getopt g = new Getopt("TrainForecastingModel", args, "m:p:htq"); + int c; + while ((c = g.getopt()) != -1) { + switch (c) { + case 'm': + case 'n': + case 'p': + result.put(c, g.getOptarg()); + break; + case 't': + case 'q': + result.put(c, "True"); + break; + case 'h': + usage(); + } + } + return result; + } + } diff --git a/src/main/java/com/yahoo/egads/data/FileModelStore.java b/src/main/java/com/yahoo/egads/data/FileModelStore.java index 2ead4ba..6bf3586 100644 --- a/src/main/java/com/yahoo/egads/data/FileModelStore.java +++ b/src/main/java/com/yahoo/egads/data/FileModelStore.java @@ -5,6 +5,7 @@ import java.io.FileOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.Collection; import java.util.HashMap; public class FileModelStore implements ModelStore { @@ -60,5 +61,8 @@ public void writeCachedModels() { } } } + public Collection getCachedModels() { + return cache.values(); + } } diff --git a/src/main/java/com/yahoo/egads/data/Model.java b/src/main/java/com/yahoo/egads/data/Model.java index f7bc11f..98e1e5a 100644 --- a/src/main/java/com/yahoo/egads/data/Model.java +++ b/src/main/java/com/yahoo/egads/data/Model.java @@ -23,4 +23,7 @@ public interface Model extends JsonAble, Serializable { public boolean isModified (); public void clearModified(); + + public void clearErrorStats(); + public String errorSummaryString(); } diff --git a/src/main/java/com/yahoo/egads/data/ModelStore.java b/src/main/java/com/yahoo/egads/data/ModelStore.java index d53c28e..3b09b12 100644 --- a/src/main/java/com/yahoo/egads/data/ModelStore.java +++ b/src/main/java/com/yahoo/egads/data/ModelStore.java @@ -1,7 +1,10 @@ package com.yahoo.egads.data; +import java.util.Collection; + public interface ModelStore { public void storeModel(String tag, Model m); public Model getModel (String tag); public void writeCachedModels(); + public Collection getCachedModels(); } diff --git a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java index b792885..7847309 100644 --- a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java @@ -76,4 +76,11 @@ public void clearModified() { modified = false; } + public String errorSummaryString() { + return ""; + } + + public void clearErrorStats() { + } + } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index 1be7b7d..4c1f0a7 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -67,35 +67,27 @@ public void update (TimeSeries.Entry entry) { public double predict (TimeSeries.Entry entry) { long modelTime = timeToModelTime(entry.time); + double prediction; if (model.containsKey(modelTime)) { - return model.get(modelTime); + prediction = model.get(modelTime); } else { - return entry.value; + prediction = entry.value; } + double error = entry.value - prediction; + sumErr += error; + sumAbsErr += Math.abs(error); + sumAbsPercentErr += 100 * Math.abs(error / entry.value); + sumErrSquared += error * error; + processedPoints++; + return prediction; } private void runSeries (TimeSeries.DataSequence data) { - // Reset various helper summations - double sumErr = 0.0; - double sumAbsErr = 0.0; - double sumAbsPercentErr = 0.0; - double sumErrSquared = 0.0; - int processedPoints = 0; + clearErrorStats(); for (TimeSeries.Entry entry : data) { - double error = entry.value - predict(entry); + predict(entry); update(entry); - sumErr += error; - sumAbsErr += Math.abs(error); - sumAbsPercentErr += 100 * Math.abs(error / entry.value); - sumErrSquared += error * error; - processedPoints++; } - this.bias = sumErr / processedPoints; - this.mad = sumAbsErr / processedPoints; - this.mape = sumAbsPercentErr / processedPoints; - this.mse = sumErrSquared / processedPoints; - this.sae = sumAbsErr; - errorsInit = true; } public void train(TimeSeries.DataSequence data) { diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java index e40a660..74d08e6 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java @@ -33,6 +33,11 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { protected String modelName; protected Properties config; protected boolean modified; + protected double sumErr; + protected double sumAbsErr; + protected double sumAbsPercentErr; + protected double sumErrSquared; + protected int processedPoints; protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(TimeSeriesModel.class.getName()); @@ -44,7 +49,7 @@ public TimeSeriesAbstractModel(Properties config) { this.modified = false; if (config.getProperty("DYNAMIC_PARAMETERS") != null) { this.dynamicParameters = new Integer(config.getProperty("DYNAMIC_PARAMETERS")); - } + } } protected long weeklyOffset (long time) { @@ -90,7 +95,19 @@ public void clearModified() { modified = false; } - + public void clearErrorStats() { + sumErr = 0.0; + sumAbsErr = 0.0; + sumAbsPercentErr = 0.0; + sumErrSquared = 0.0; + processedPoints = 0; + bias = 0.0; + mad = 0.0; + mape = 0.0; + mse = 0.0; + sae = 0.0; + errorsInit = false; + } // Acts as a factory method. protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAbstractModel model2) { @@ -148,6 +165,7 @@ protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAb } public String errorSummaryString () { + computeForecastErrors(); return ("B:" + String.format("%.2f", getBias()) + "\tMAD:" + String.format("%.2f", getMAD()) + "\tMAPE:" + String.format("%.2f", getMAPE()) + @@ -183,12 +201,7 @@ protected void initForecastErrors(ForecastingModel forecaster, TimeSeries.DataSe * Initializes all errors given the model. */ protected void initForecastErrors(ArrayList model, TimeSeries.DataSequence data) { - // Reset various helper summations - double sumErr = 0.0; - double sumAbsErr = 0.0; - double sumAbsPercentErr = 0.0; - double sumErrSquared = 0.0; - int processedPoints = 0; + clearErrorStats(); int n = data.size(); @@ -201,11 +214,26 @@ protected void initForecastErrors(ArrayList model, TimeSeries.DataSequenc sumErrSquared += error * error; processedPoints++; } - this.bias = sumErr / processedPoints; - this.mad = sumAbsErr / processedPoints; - this.mape = sumAbsPercentErr / processedPoints; - this.mse = sumErrSquared / processedPoints; - this.sae = sumAbsErr; + bias = sumErr / processedPoints; + mad = sumAbsErr / processedPoints; + mape = sumAbsPercentErr / processedPoints; + mse = sumErrSquared / processedPoints; + sae = sumAbsErr; + errorsInit = true; + } + + protected void computeForecastErrors() { + if (processedPoints <= 0) { + return; + } + if (errorsInit) { + return; + } + bias = sumErr / processedPoints; + mad = sumAbsErr / processedPoints; + mape = sumAbsPercentErr / processedPoints; + mse = sumErrSquared / processedPoints; + sae = sumAbsErr; errorsInit = true; } From 98d8dd61927cb1d263f0f4f7af49abfdabdc59b2 Mon Sep 17 00:00:00 2001 From: Ian Flint Date: Thu, 18 Jun 2015 23:40:16 -0700 Subject: [PATCH 13/13] Separate forecast and anomaly models in file storage Made a new type of model - a Streaming model, and pulled all of the streaming specific functions into it TODO: Make everything streaming and then get rid of the differentiation --- .../java/com/yahoo/egads/StreamForecast.java | 7 +-- .../com/yahoo/egads/data/FileModelStore.java | 21 +++++-- src/main/java/com/yahoo/egads/data/Model.java | 6 +- .../java/com/yahoo/egads/data/ModelStore.java | 4 +- .../adm/AnomalyDetectionAbstractModel.java | 6 +- .../models/tsmm/StreamingOlympicModel.java | 26 +++------ .../models/tsmm/TimeSeriesAbstractModel.java | 56 ++++++------------- .../egads/models/tsmm/TimeSeriesModel.java | 3 - 8 files changed, 51 insertions(+), 78 deletions(-) diff --git a/src/main/java/com/yahoo/egads/StreamForecast.java b/src/main/java/com/yahoo/egads/StreamForecast.java index 8ff007c..fa43aa1 100644 --- a/src/main/java/com/yahoo/egads/StreamForecast.java +++ b/src/main/java/com/yahoo/egads/StreamForecast.java @@ -3,7 +3,6 @@ import gnu.getopt.Getopt; import java.util.HashMap; -import java.util.Properties; import java.util.Scanner; import com.yahoo.egads.data.FileModelStore; @@ -11,7 +10,7 @@ import com.yahoo.egads.data.ModelStore; import com.yahoo.egads.data.TimeSeries; import com.yahoo.egads.models.tsmm.StreamingOlympicModel; -import com.yahoo.egads.models.tsmm.TimeSeriesAbstractModel; +import com.yahoo.egads.models.tsmm.TimeSeriesStreamingModel; public class StreamForecast { public static void main(String[] args) { @@ -29,9 +28,9 @@ public static void main(String[] args) { series = fields[0]; timestamp = Integer.parseInt(fields[1]); measured = Double.parseDouble(fields[2]); - TimeSeriesAbstractModel model = (TimeSeriesAbstractModel) ms.getModel(series); + TimeSeriesStreamingModel model = (TimeSeriesStreamingModel) ms.getModel(series, Model.ModelType.FORECAST); if (model == null) { - model = new StreamingOlympicModel(new Properties()); + model = new StreamingOlympicModel(); ms.storeModel(series, model); System.err.println ("No such model " + series); } diff --git a/src/main/java/com/yahoo/egads/data/FileModelStore.java b/src/main/java/com/yahoo/egads/data/FileModelStore.java index 6bf3586..65f3eff 100644 --- a/src/main/java/com/yahoo/egads/data/FileModelStore.java +++ b/src/main/java/com/yahoo/egads/data/FileModelStore.java @@ -14,15 +14,24 @@ public class FileModelStore implements ModelStore { protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(FileModelStore.class.getName()); public FileModelStore (String path) { - File dir = new File (path); - dir.mkdirs(); this.path = path; cache = new HashMap(); + new File (path).mkdirs(); + } + + private String getFilename (String tag, Model.ModelType type) { + String filename = tag.replaceAll("[^\\w_-]", "_"); + if (type == Model.ModelType.ANOMALY) { + filename = "anomaly." + filename; + } else if (type == Model.ModelType.FORECAST) { + filename = "forecast." + filename; + } + return filename; } @Override public void storeModel(String tag, Model m) { - String filename = tag.replaceAll("[^\\w_-]", "_"); + String filename = getFilename(tag, m.getModelType()); String fqn = path + "/" + filename; try { m.clearModified(); @@ -35,8 +44,8 @@ public void storeModel(String tag, Model m) { } @Override - public Model getModel(String tag) { - String filename = tag.replaceAll("[^\\w_-]", "_"); + public Model getModel(String tag, Model.ModelType type) { + String filename = getFilename(tag, type); if (cache.containsKey(filename)) { return cache.get(filename); } @@ -57,6 +66,8 @@ public void writeCachedModels() { for (String key : cache.keySet()) { Model model = cache.get(key); if (model.isModified()) { +// The key always has the model type prepended - remove it before storing + key = key.replaceFirst("[a-zA-Z]*\\.", ""); storeModel(key, model); } } diff --git a/src/main/java/com/yahoo/egads/data/Model.java b/src/main/java/com/yahoo/egads/data/Model.java index 98e1e5a..f681970 100644 --- a/src/main/java/com/yahoo/egads/data/Model.java +++ b/src/main/java/com/yahoo/egads/data/Model.java @@ -13,17 +13,15 @@ import java.io.Serializable; public interface Model extends JsonAble, Serializable { + enum ModelType {FORECAST, ANOMALY}; // resets the model. public void reset(); // Gets the model name and type public String getModelName(); - public String getModelType(); - + public ModelType getModelType(); public boolean isModified (); public void clearModified(); - - public void clearErrorStats(); public String errorSummaryString(); } diff --git a/src/main/java/com/yahoo/egads/data/ModelStore.java b/src/main/java/com/yahoo/egads/data/ModelStore.java index 3b09b12..ab83b2b 100644 --- a/src/main/java/com/yahoo/egads/data/ModelStore.java +++ b/src/main/java/com/yahoo/egads/data/ModelStore.java @@ -2,9 +2,11 @@ import java.util.Collection; +import com.yahoo.egads.data.Model.ModelType; + public interface ModelStore { public void storeModel(String tag, Model m); - public Model getModel (String tag); + Model getModel(String tag, ModelType type); public void writeCachedModels(); public Collection getCachedModels(); } diff --git a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java index 7847309..b6bd87d 100644 --- a/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java @@ -12,6 +12,8 @@ import org.json.JSONStringer; import com.yahoo.egads.data.JsonEncoder; +import com.yahoo.egads.data.Model; +import com.yahoo.egads.data.Model.ModelType; import com.yahoo.egads.models.tsmm.TimeSeriesModel; public abstract class AnomalyDetectionAbstractModel implements AnomalyDetectionModel { @@ -28,8 +30,8 @@ public String getModelName() { return modelName; } - public String getModelType() { - return "Anomaly"; + public ModelType getModelType() { + return Model.ModelType.ANOMALY; } @Override diff --git a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java index 4c1f0a7..3c00b37 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/StreamingOlympicModel.java @@ -19,7 +19,7 @@ import com.yahoo.egads.utilities.FileUtils; -public class StreamingOlympicModel extends TimeSeriesAbstractModel { +public class StreamingOlympicModel extends TimeSeriesStreamingModel { // methods //////////////////////////////////////////////// private static final long serialVersionUID = 1L; @@ -28,14 +28,14 @@ public class StreamingOlympicModel extends TimeSeriesAbstractModel { protected int period; protected double smoothingFactor; - public StreamingOlympicModel(Properties config) { - super(config); + public StreamingOlympicModel() { + super(); smoothingFactor = 0.4; period = 86400 * 7; model = new HashMap(); } - public StreamingOlympicModel(Properties config, double smoothingFactor, int period) { - super(config); + public StreamingOlympicModel(double smoothingFactor, int period) { + super(); this.smoothingFactor = smoothingFactor; this.period = period; this.model = new HashMap(); @@ -94,7 +94,7 @@ public void train(TimeSeries.DataSequence data) { StreamingOlympicModel winner = null; double sf = 0.0; for (sf = 0.0; sf <= 1; sf += 0.1) { - StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); + StreamingOlympicModel m = new StreamingOlympicModel(sf, this.period); m.runSeries(data); logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); if (betterThan(m, winner)) { @@ -106,7 +106,7 @@ public void train(TimeSeries.DataSequence data) { double max = winner.smoothingFactor + 0.09; if (max >= 1) max = .99; for (sf = min; sf <= max; sf += 0.01) { - StreamingOlympicModel m = new StreamingOlympicModel(this.config, sf, this.period); + StreamingOlympicModel m = new StreamingOlympicModel(sf, this.period); m.runSeries(data); logger.debug ("Testing Smoothing Factor " + String.format("%.2f", m.smoothingFactor) + " -> "+ m.errorSummaryString()); if (betterThan(m, winner)) { @@ -135,18 +135,6 @@ public String getModelName() { return "OlympicModel"; } - private Float sum(ArrayList list) { - float sum = 0; - for (float i : list) { - sum = sum + i; - } - return sum; - } - - private float computeExpected(int i, int pl) { - return (float)0.0; - } - public void predict(TimeSeries.DataSequence sequence) throws Exception { return; } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java index 74d08e6..84bec2e 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesAbstractModel.java @@ -33,11 +33,6 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { protected String modelName; protected Properties config; protected boolean modified; - protected double sumErr; - protected double sumAbsErr; - protected double sumAbsPercentErr; - protected double sumErrSquared; - protected int processedPoints; protected static org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getLogger(TimeSeriesModel.class.getName()); @@ -46,7 +41,7 @@ public abstract class TimeSeriesAbstractModel implements TimeSeriesModel { public TimeSeriesAbstractModel(Properties config) { this.config = config; - this.modified = false; + modified = false; if (config.getProperty("DYNAMIC_PARAMETERS") != null) { this.dynamicParameters = new Integer(config.getProperty("DYNAMIC_PARAMETERS")); } @@ -68,8 +63,8 @@ public String getModelName() { return modelName; } - public String getModelType() { - return "Forecast"; + public ModelType getModelType() { + return Model.ModelType.FORECAST; } @Override @@ -82,25 +77,8 @@ public void fromJson(JSONObject json_obj) throws Exception { JsonEncoder.fromJson(this, json_obj); } - public double predict (TimeSeries.Entry entry) { - return 0.0; - } - public void update (TimeSeries.Entry entry) { - return; - } - public boolean isModified () { - return modified; - } - public void clearModified() { - modified = false; - } public void clearErrorStats() { - sumErr = 0.0; - sumAbsErr = 0.0; - sumAbsPercentErr = 0.0; - sumErrSquared = 0.0; - processedPoints = 0; bias = 0.0; mad = 0.0; mape = 0.0; @@ -165,7 +143,6 @@ protected static boolean betterThan(TimeSeriesAbstractModel model1, TimeSeriesAb } public String errorSummaryString () { - computeForecastErrors(); return ("B:" + String.format("%.2f", getBias()) + "\tMAD:" + String.format("%.2f", getMAD()) + "\tMAPE:" + String.format("%.2f", getMAPE()) + @@ -204,6 +181,11 @@ protected void initForecastErrors(ArrayList model, TimeSeries.DataSequenc clearErrorStats(); int n = data.size(); + double sumErr = 0.0; + double sumAbsErr = 0.0; + double sumAbsPercentErr = 0.0; + double sumErrSquared = 0.0; + int processedPoints = 0; for (int i = 0; i < n; i++) { // Calculate error in forecast, and update sums appropriately @@ -222,20 +204,6 @@ protected void initForecastErrors(ArrayList model, TimeSeries.DataSequenc errorsInit = true; } - protected void computeForecastErrors() { - if (processedPoints <= 0) { - return; - } - if (errorsInit) { - return; - } - bias = sumErr / processedPoints; - mad = sumAbsErr / processedPoints; - mape = sumAbsPercentErr / processedPoints; - mse = sumErrSquared / processedPoints; - sae = sumAbsErr; - errorsInit = true; - } /** * Returns the bias - the arithmetic mean of the errors - obtained from applying the current forecasting model to @@ -306,4 +274,12 @@ public double getSAE() { } return sae; } + public boolean isModified () { + return modified; + } + + public void clearModified() { + modified = false; + } + } diff --git a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java index c0c8488..4ef7449 100644 --- a/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java +++ b/src/main/java/com/yahoo/egads/models/tsmm/TimeSeriesModel.java @@ -20,7 +20,4 @@ public interface TimeSeriesModel extends Model { // predicts the values of the time series specified by the 'time' fields of the sequence and sets the 'value' fields of the sequence public void predict(TimeSeries.DataSequence sequence) throws Exception; - - public double predict (TimeSeries.Entry entry); - public void update (TimeSeries.Entry entry); }