sparktk.frame.ops.timeseries_augmented_dickey_fuller_test module

Show source ≡

# vim: set encoding=utf-8

#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from sparktk.propobj import PropertiesObject

def timeseries_augmented_dickey_fuller_test(self, ts_column, max_lag, regression = "c"):
    """
    Performs the Augmented Dickey-Fuller (ADF) Test, which tests the null hypothesis of whether a unit root is present
    in a time series sample. The test statistic that is returned in a negative number.  The lower the value, the
    stronger the rejection of the hypothesis that there is a unit root at some level of confidence.

    Parameters
    ----------

    :param ts_column: (str) Name of the column that contains the time series values to use with the ADF test.
    :param max_lag: (int) The lag order to calculate the test statistic.
    :param regression: (Optional(str)) The method of regression that was used. Following MacKinnon's notation, this
                       can be "c" for constant, "nc" for no constant, "ct" for constant and trend, and "ctt" for
                       constant, trend, and trend-squared.
    :return: (AugmentedDickeyFullerTestResult) Object contains the ADF test statistic and p-value.

    Example
    -------


    Consider the following frame of time series values:

        >>> frame.inspect()
        [#]  timeseries_values
        ======================
        [0]              3.201
        [1]             3.3178
        [2]             3.6279
        [3]             3.5902
        [4]               3.43
        [5]             4.0546
        [6]             3.7606
        [7]             3.1231
        [8]             3.2077
        [9]             4.3383

    Calculate augmented Dickey-Fuller test statistic by giving it the name of the column that has the time series
    values and the max_lag.  The function returns an object that has properties for the p-value and test statistic.

        >>> frame.timeseries_augmented_dickey_fuller_test("timeseries_values", 0)
        p_value   = 0.0
        test_stat = -9.93422373369


    """

    if not isinstance(ts_column, str):
        raise TypeError("ts_column parameter should be a str")
    if not isinstance(max_lag, int):
        raise TypeError("max_lag parameter should be a int")
    if not isinstance(regression, str):
        raise TypeError("regression parameter should be a str")

    scala_result = self._scala.timeSeriesAugmentedDickeyFullerTest(ts_column, max_lag, regression)
    return AugmentedDickeyFullerTestResult(scala_result)

class AugmentedDickeyFullerTestResult(PropertiesObject):
    """
    AugmentedDickeyFullerTestResult class contains values that are returned from the augmented_dickey_fuller_test.
    """

    def __init__(self, scala_result):
        self._test_stat = scala_result.testStat()
        self._p_value = scala_result.pValue()

    @property
    def test_stat(self):
        """
        ADF test statistic
        """
        return self._test_stat

    @property
    def p_value(self):
        """
        p-value
        """
        return self._p_value

Functions

def timeseries_augmented_dickey_fuller_test(

self, ts_column, max_lag, regression='c')

Performs the Augmented Dickey-Fuller (ADF) Test, which tests the null hypothesis of whether a unit root is present in a time series sample. The test statistic that is returned in a negative number. The lower the value, the stronger the rejection of the hypothesis that there is a unit root at some level of confidence.

Parameters:

ts_column

(str):

Name of the column that contains the time series values to use with the ADF test.

max_lag

(int):

The lag order to calculate the test statistic.

regression

(Optional(str)):

The method of regression that was used. Following MacKinnon's notation, this can be "c" for constant, "nc" for no constant, "ct" for constant and trend, and "ctt" for constant, trend, and trend-squared.

Returns

(AugmentedDickeyFullerTestResult):

Object contains the ADF test statistic and p-value.

Example:

Consider the following frame of time series values:

>>> frame.inspect()
[#]  timeseries_values
======================
[0]              3.201
[1]             3.3178
[2]             3.6279
[3]             3.5902
[4]               3.43
[5]             4.0546
[6]             3.7606
[7]             3.1231
[8]             3.2077
[9]             4.3383

Calculate augmented Dickey-Fuller test statistic by giving it the name of the column that has the time series values and the max_lag. The function returns an object that has properties for the p-value and test statistic.

>>> frame.timeseries_augmented_dickey_fuller_test("timeseries_values", 0)
p_value   = 0.0
test_stat = -9.93422373369

Show source ≡

def timeseries_augmented_dickey_fuller_test(self, ts_column, max_lag, regression = "c"):
    """
    Performs the Augmented Dickey-Fuller (ADF) Test, which tests the null hypothesis of whether a unit root is present
    in a time series sample. The test statistic that is returned in a negative number.  The lower the value, the
    stronger the rejection of the hypothesis that there is a unit root at some level of confidence.

    Parameters
    ----------

    :param ts_column: (str) Name of the column that contains the time series values to use with the ADF test.
    :param max_lag: (int) The lag order to calculate the test statistic.
    :param regression: (Optional(str)) The method of regression that was used. Following MacKinnon's notation, this
                       can be "c" for constant, "nc" for no constant, "ct" for constant and trend, and "ctt" for
                       constant, trend, and trend-squared.
    :return: (AugmentedDickeyFullerTestResult) Object contains the ADF test statistic and p-value.

    Example
    -------


    Consider the following frame of time series values:

        >>> frame.inspect()
        [#]  timeseries_values
        ======================
        [0]              3.201
        [1]             3.3178
        [2]             3.6279
        [3]             3.5902
        [4]               3.43
        [5]             4.0546
        [6]             3.7606
        [7]             3.1231
        [8]             3.2077
        [9]             4.3383

    Calculate augmented Dickey-Fuller test statistic by giving it the name of the column that has the time series
    values and the max_lag.  The function returns an object that has properties for the p-value and test statistic.

        >>> frame.timeseries_augmented_dickey_fuller_test("timeseries_values", 0)
        p_value   = 0.0
        test_stat = -9.93422373369


    """

    if not isinstance(ts_column, str):
        raise TypeError("ts_column parameter should be a str")
    if not isinstance(max_lag, int):
        raise TypeError("max_lag parameter should be a int")
    if not isinstance(regression, str):
        raise TypeError("regression parameter should be a str")

    scala_result = self._scala.timeSeriesAugmentedDickeyFullerTest(ts_column, max_lag, regression)
    return AugmentedDickeyFullerTestResult(scala_result)

Classes

class AugmentedDickeyFullerTestResult

AugmentedDickeyFullerTestResult class contains values that are returned from the augmented_dickey_fuller_test.

Show source ≡

class AugmentedDickeyFullerTestResult(PropertiesObject):
    """
    AugmentedDickeyFullerTestResult class contains values that are returned from the augmented_dickey_fuller_test.
    """

    def __init__(self, scala_result):
        self._test_stat = scala_result.testStat()
        self._p_value = scala_result.pValue()

    @property
    def test_stat(self):
        """
        ADF test statistic
        """
        return self._test_stat

    @property
    def p_value(self):
        """
        p-value
        """
        return self._p_value

Ancestors (in MRO)

AugmentedDickeyFullerTestResult
sparktk.propobj.PropertiesObject
__builtin__.object

Instance variables

var p_value

p-value

var test_stat

ADF test statistic

Methods

def __init__(

self, scala_result)

Show source ≡

def __init__(self, scala_result):
    self._test_stat = scala_result.testStat()
    self._p_value = scala_result.pValue()

def to_dict(

self)

Show source ≡

def to_dict(self):
    d = self._properties()
    d.update(self._attributes())
    return d

def to_json(

self)

Show source ≡

def to_json(self):
    return json.dumps(self.to_dict())

Index

Functions

Classes

Functions

Classes

Ancestors (in MRO)

Instance variables

Methods