sparktk.frame.ops.timeseries_augmented_dickey_fuller_test module
# vim: set encoding=utf-8
# Copyright (c) 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from sparktk.propobj import PropertiesObject
def timeseries_augmented_dickey_fuller_test(self, ts_column, max_lag, regression = "c"):
"""
Performs the Augmented Dickey-Fuller (ADF) Test, which tests the null hypothesis of whether a unit root is present
in a time series sample. The test statistic that is returned in a negative number. The lower the value, the
stronger the rejection of the hypothesis that there is a unit root at some level of confidence.
Parameters
----------
:param ts_column: (str) Name of the column that contains the time series values to use with the ADF test.
:param max_lag: (int) The lag order to calculate the test statistic.
:param regression: (Optional(str)) The method of regression that was used. Following MacKinnon's notation, this
can be "c" for constant, "nc" for no constant, "ct" for constant and trend, and "ctt" for
constant, trend, and trend-squared.
:return: (AugmentedDickeyFullerTestResult) Object contains the ADF test statistic and p-value.
Example
-------
Consider the following frame of time series values:
>>> frame.inspect()
[#] timeseries_values
======================
[0] 3.201
[1] 3.3178
[2] 3.6279
[3] 3.5902
[4] 3.43
[5] 4.0546
[6] 3.7606
[7] 3.1231
[8] 3.2077
[9] 4.3383
Calculate augmented Dickey-Fuller test statistic by giving it the name of the column that has the time series
values and the max_lag. The function returns an object that has properties for the p-value and test statistic.
>>> frame.timeseries_augmented_dickey_fuller_test("timeseries_values", 0)
p_value = 0.0
test_stat = -9.93422373369
"""
if not isinstance(ts_column, str):
raise TypeError("ts_column parameter should be a str")
if not isinstance(max_lag, int):
raise TypeError("max_lag parameter should be a int")
if not isinstance(regression, str):
raise TypeError("regression parameter should be a str")
scala_result = self._scala.timeSeriesAugmentedDickeyFullerTest(ts_column, max_lag, regression)
return AugmentedDickeyFullerTestResult(scala_result)
class AugmentedDickeyFullerTestResult(PropertiesObject):
"""
AugmentedDickeyFullerTestResult class contains values that are returned from the augmented_dickey_fuller_test.
"""
def __init__(self, scala_result):
self._test_stat = scala_result.testStat()
self._p_value = scala_result.pValue()
@property
def test_stat(self):
"""
ADF test statistic
"""
return self._test_stat
@property
def p_value(self):
"""
p-value
"""
return self._p_value
Functions
def timeseries_augmented_dickey_fuller_test(
self, ts_column, max_lag, regression='c')
Performs the Augmented Dickey-Fuller (ADF) Test, which tests the null hypothesis of whether a unit root is present in a time series sample. The test statistic that is returned in a negative number. The lower the value, the stronger the rejection of the hypothesis that there is a unit root at some level of confidence.
ts_column | (str): | Name of the column that contains the time series values to use with the ADF test. |
max_lag | (int): | The lag order to calculate the test statistic. |
regression | (Optional(str)): | The method of regression that was used. Following MacKinnon's notation, this can be "c" for constant, "nc" for no constant, "ct" for constant and trend, and "ctt" for constant, trend, and trend-squared. |
Returns | (AugmentedDickeyFullerTestResult): | Object contains the ADF test statistic and p-value. |
Consider the following frame of time series values:
>>> frame.inspect()
[#] timeseries_values
======================
[0] 3.201
[1] 3.3178
[2] 3.6279
[3] 3.5902
[4] 3.43
[5] 4.0546
[6] 3.7606
[7] 3.1231
[8] 3.2077
[9] 4.3383
Calculate augmented Dickey-Fuller test statistic by giving it the name of the column that has the time series values and the max_lag. The function returns an object that has properties for the p-value and test statistic.
>>> frame.timeseries_augmented_dickey_fuller_test("timeseries_values", 0)
p_value = 0.0
test_stat = -9.93422373369
def timeseries_augmented_dickey_fuller_test(self, ts_column, max_lag, regression = "c"):
"""
Performs the Augmented Dickey-Fuller (ADF) Test, which tests the null hypothesis of whether a unit root is present
in a time series sample. The test statistic that is returned in a negative number. The lower the value, the
stronger the rejection of the hypothesis that there is a unit root at some level of confidence.
Parameters
----------
:param ts_column: (str) Name of the column that contains the time series values to use with the ADF test.
:param max_lag: (int) The lag order to calculate the test statistic.
:param regression: (Optional(str)) The method of regression that was used. Following MacKinnon's notation, this
can be "c" for constant, "nc" for no constant, "ct" for constant and trend, and "ctt" for
constant, trend, and trend-squared.
:return: (AugmentedDickeyFullerTestResult) Object contains the ADF test statistic and p-value.
Example
-------
Consider the following frame of time series values:
>>> frame.inspect()
[#] timeseries_values
======================
[0] 3.201
[1] 3.3178
[2] 3.6279
[3] 3.5902
[4] 3.43
[5] 4.0546
[6] 3.7606
[7] 3.1231
[8] 3.2077
[9] 4.3383
Calculate augmented Dickey-Fuller test statistic by giving it the name of the column that has the time series
values and the max_lag. The function returns an object that has properties for the p-value and test statistic.
>>> frame.timeseries_augmented_dickey_fuller_test("timeseries_values", 0)
p_value = 0.0
test_stat = -9.93422373369
"""
if not isinstance(ts_column, str):
raise TypeError("ts_column parameter should be a str")
if not isinstance(max_lag, int):
raise TypeError("max_lag parameter should be a int")
if not isinstance(regression, str):
raise TypeError("regression parameter should be a str")
scala_result = self._scala.timeSeriesAugmentedDickeyFullerTest(ts_column, max_lag, regression)
return AugmentedDickeyFullerTestResult(scala_result)
Classes
class AugmentedDickeyFullerTestResult
AugmentedDickeyFullerTestResult class contains values that are returned from the augmented_dickey_fuller_test.
class AugmentedDickeyFullerTestResult(PropertiesObject):
"""
AugmentedDickeyFullerTestResult class contains values that are returned from the augmented_dickey_fuller_test.
"""
def __init__(self, scala_result):
self._test_stat = scala_result.testStat()
self._p_value = scala_result.pValue()
@property
def test_stat(self):
"""
ADF test statistic
"""
return self._test_stat
@property
def p_value(self):
"""
p-value
"""
return self._p_value
Ancestors (in MRO)
- AugmentedDickeyFullerTestResult
- sparktk.propobj.PropertiesObject
- __builtin__.object
Instance variables
var p_value
p-value
var test_stat
ADF test statistic
Methods
def __init__(
self, scala_result)
def __init__(self, scala_result):
self._test_stat = scala_result.testStat()
self._p_value = scala_result.pValue()
def to_dict(
self)
def to_dict(self):
d = self._properties()
d.update(self._attributes())
return d
def to_json(
self)
def to_json(self):
return json.dumps(self.to_dict())