ddf-project · binhmop · Jun 29, 2016 · Jun 29, 2016 · Jun 29, 2016 · Jun 30, 2016
diff --git a/core/src/main/java/io/ddf/content/ViewHandler.java b/core/src/main/java/io/ddf/content/ViewHandler.java
@@ -269,7 +269,7 @@ public String toString() {
     @Override
     public String toSql() {
       if (name == null) {
-        throw new IllegalArgumentException("Missing Operator name from Adatao client for operands[] "
+        throw new IllegalArgumentException("Missing Operator name from DDF client for operands[] "
             + Arrays.toString(operands));
       }
       switch (name) {

diff --git a/core/src/main/java/io/ddf/etl/ATimeSeriesHandler.java b/core/src/main/java/io/ddf/etl/ATimeSeriesHandler.java
@@ -0,0 +1,127 @@
+package io.ddf.etl;
+
+
+import io.ddf.DDF;
+import io.ddf.analytics.ABinningHandler.BinningType;
+import io.ddf.exception.DDFException;
+import io.ddf.misc.ADDFFunctionalGroupHandler;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import com.google.common.collect.Lists;
+
+public abstract class ATimeSeriesHandler extends ADDFFunctionalGroupHandler implements IHandleTimeSeries {
+
+  protected String mTimestampColumn;
+  protected String mTsIDColumn = null;
+
+
+  public ATimeSeriesHandler(DDF theDDF) {
+    super(theDDF);
+
+  }
+
+  public void setTimeStampColumn(String colName) {
+    mTimestampColumn = colName;
+  }
+
+  public String getTimeStampColumn() {
+    return mTimestampColumn;
+  }
+
+
+  public String getTsIDColumn() {
+    return mTsIDColumn;
+  }
+
+  public void setTsIDColumn(String colName) {
+    this.mTsIDColumn = colName;
+  }
+
+  @Override
+  public DDF downsample(String timestampColumn, List<String> aggregateFunctions, int interval, TimeUnit timeUnit)
+      throws DDFException {
+
+    this.mTimestampColumn = timestampColumn;
+    List<String> groupByCols = Lists.newArrayList(timestampColumn);
+    if (mTsIDColumn != null && !mTsIDColumn.isEmpty()) {
+      groupByCols.add(mTsIDColumn);
+    }
+
+    long intervalInSeconds = timeUnit.toSeconds(interval);
+
+    int numBins = getNumBins(intervalInSeconds);
+    DDF binnedDDF = this.getDDF().binning(timestampColumn, BinningType.EQUALINTERVAL.toString(), numBins, null, false,
+        true, true);
+    DDF newDDF = binnedDDF.groupBy(groupByCols, aggregateFunctions);
+
+    return newDDF;
+  }
+
+  @Override
+  public DDF downsample(String timestampColumn, String tsIDColumn, List<String> aggregateFunctions, int interval,
+      TimeUnit timeUnit) throws DDFException {
+
+    this.mTsIDColumn = tsIDColumn;
+    List<String> rs = getDistinctValues(tsIDColumn);
+
+    DDF ddf0 = filterByValue(tsIDColumn, rs.get(0));
+
+    ddf0.getTimeSeriesHandler().setTsIDColumn(tsIDColumn);
+    DDF newDDF = ddf0.getTimeSeriesHandler().downsample(timestampColumn, aggregateFunctions, interval, timeUnit);
+    if (rs.size() > 1) {
+      for (int i = 1; i < rs.size(); i++) {
+        DDF filteredDDF = filterByValue(tsIDColumn, rs.get(i));
+        filteredDDF.getTimeSeriesHandler().setTsIDColumn(tsIDColumn);
+        DDF nextDDF = filteredDDF.getTimeSeriesHandler().downsample(timestampColumn, aggregateFunctions, interval,
+            timeUnit);
+        newDDF = newDDF.getJoinsHandler().merge(nextDDF);
+      }
+    }
+    return newDDF;
+  }
+
+  @Override
+  public DDF addDiffColumn(String timestampColumn, String colToGetDiff, String diffColumn) throws DDFException{
+    return addDiffColumn(timestampColumn, null, colToGetDiff, diffColumn);
+  }
+
+  @Override
+  public DDF addDiffColumn(String timestampColumn, String tsIDColumn, String colToGetDiff, String diffColumn)
+      throws DDFException {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  @Override
+  public DDF computeMovingAverage(String timestampColumn, String tsIDColumn, String colToComputeMovingAverage,
+      String movingAverageColName, int windowSize) throws DDFException {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  @Override
+  public void saveTimeSeriesToCSV(String pathToStorage) {
+    // TODO Auto-generated method stub
+
+  }
+
+  private int getNumBins(long intervalInSeconds) throws DDFException {
+    long minTimeStamp = this.getDDF().getVectorMin(mTimestampColumn).longValue();
+    long maxTimeStamp = this.getDDF().getVectorMax(mTimestampColumn).longValue();
+    int numBins = (int) ((maxTimeStamp - minTimeStamp) / intervalInSeconds);
+    return numBins;
+
+  }
+
+  private List<String> getDistinctValues(String colName) throws DDFException {
+    String sqlCmd = String.format("SELECT distinct(%s) FROM %s", colName, this.getDDF().getTableName());
+    List<String> rs = this.getManager().sql(sqlCmd, this.getEngine()).getRows();
+    return rs;
+  }
+
+  private DDF filterByValue(String colName, String value) throws DDFException {
+    String sqlCmd = String.format("SELECT * FROM %s WHERE %s = '%s'", this.getDDF().getTableName(), colName, value);
+    DDF filteredDDF = this.getDDF().getSqlHandler().sql2ddf(sqlCmd);
+    return filteredDDF;
+  }
+}
diff --git a/core/src/main/java/io/ddf/etl/IHandleTimeSeries.java b/core/src/main/java/io/ddf/etl/IHandleTimeSeries.java
@@ -0,0 +1,29 @@
+package io.ddf.etl;
+
+import java.util.List;
+import io.ddf.DDF;
+import io.ddf.exception.DDFException;
+import io.ddf.misc.IHandleDDFFunctionalGroup;
+import java.util.concurrent.TimeUnit;
+
+public interface IHandleTimeSeries extends IHandleDDFFunctionalGroup {
+
+  void setTimeStampColumn(String colName);
+
+  void setTsIDColumn(String colName);
+
+  String getTimeStampColumn() throws DDFException;
+
+  DDF downsample(String timestampColumn, List<String> aggregateFunctions, int interval, TimeUnit timeUnit) throws DDFException;
+
+  DDF downsample(String timestampColumn, String tsIDColumn, List<String> aggregateFunctions, int interval, TimeUnit timeUnit) throws DDFException;
+
+  DDF addDiffColumn(String timestampColumn, String colToGetDiff, String diffColName) throws DDFException;
+
+  DDF addDiffColumn(String timestampColumn, String tsIDColumn, String colToGetDiff, String diffColName) throws DDFException;
+
+  DDF computeMovingAverage(String timestampColumn, String tsIDColumn, String colToComputeMovingAverage, String movingAverageColName, 
+      int windowSize) throws DDFException;
+
+  void saveTimeSeriesToCSV(String path);
+}
diff --git a/core/src/main/java/io/ddf/misc/IHandleTimeSeries.java b/core/src/main/java/io/ddf/misc/IHandleTimeSeries.java
diff --git a/ddf-conf/ddf.ini b/ddf-conf/ddf.ini
@@ -19,6 +19,7 @@ ISupportMLMetrics = io.ddf.spark.ml.MLMetricsSupporter
 IHandleTransformations = io.ddf.etl.TransformationHandler
 IHandleMutability = io.ddf.content.MutabilityHandler
 IHandleMissingData = io.ddf.etl.MissingDataHandler
+;IHandleTimeSeries = io.ddf.etl.TimeSeriesHandler
 ;IHandleSql = io.ddf.etl.SqlHandler
 ;IRunAlgorithms = io.ddf.analytics.AlgorithmRunner
 MAX_LEVELS_COUNT = 10000
@@ -45,6 +46,8 @@ ISupportMLMetrics = io.ddf.spark.ml.MLMetricsSupporter
 IHandleBinning = io.ddf.spark.analytics.BinningHandler
 IHandleMutability = io.ddf.content.MutabilityHandler
 IHandleMissingData = io.ddf.etl.MissingDataHandler
+IHandleTimeSeries = io.ddf.spark.etl.TimeSeriesHandler
+IHandlePersistence = io.ddf.spark.content.PersistenceHandler
 kmeans = org.apache.spark.mllib.clustering.KMeans
 linearRegressionLasso = org.apache.spark.mllib.regression.LassoWithSGD
 linearRegressionWithSGD = org.apache.spark.mllib.regression.LinearRegressionWithSGD