Up

sparktk.frame.ops.tally module

# vim: set encoding=utf-8

#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

def tally(self, sample_col, count_val):
    """
    Count number of times a value is seen.

    Parameters
    ----------

    :param sample_col: (str) The name of the column from which to compute the cumulative count.
    :param count_val: (str) The column value to be used for the counts.

    A cumulative count is computed by sequentially stepping through the rows,
    observing the column values and keeping track of the number of times the specified
    *count_value* has been seen.

    Examples
    --------
    Consider Frame *my_frame*, which accesses a frame that contains a single
    column named *obs*:

        >>> my_frame.inspect()
        [#]  obs
        ========
        [0]    0
        [1]    1
        [2]    2
        [3]    0
        [4]    1
        [5]    2

    The cumulative percent count for column *obs* is obtained by:

        >>> my_frame.tally("obs", "1")
        [===Job Progress===]

    The Frame *my_frame* accesses the original frame that now contains two
    columns, *obs* that contains the original column values, and
    *obsCumulativePercentCount* that contains the cumulative percent count:

        >>> my_frame.inspect()
        [#]  obs  obs_tally
        ===================
        [0]    0        0.0
        [1]    1        1.0
        [2]    2        1.0
        [3]    0        1.0
        [4]    1        2.0
        [5]    2        2.0

    """
    self._scala.tally(sample_col, count_val)

Functions

def tally(

self, sample_col, count_val)

Count number of times a value is seen.

Parameters:
sample_col(str):The name of the column from which to compute the cumulative count.
count_val(str):The column value to be used for the counts.

A cumulative count is computed by sequentially stepping through the rows, observing the column values and keeping track of the number of times the specified count_value has been seen.

Examples:

Consider Frame my_frame, which accesses a frame that contains a single column named obs:

>>> my_frame.inspect()
[#]  obs
========
[0]    0
[1]    1
[2]    2
[3]    0
[4]    1
[5]    2

The cumulative percent count for column obs is obtained by:

>>> my_frame.tally("obs", "1")
[===Job Progress===]

The Frame my_frame accesses the original frame that now contains two columns, obs that contains the original column values, and obsCumulativePercentCount that contains the cumulative percent count:

>>> my_frame.inspect()
[#]  obs  obs_tally
===================
[0]    0        0.0
[1]    1        1.0
[2]    2        1.0
[3]    0        1.0
[4]    1        2.0
[5]    2        2.0
def tally(self, sample_col, count_val):
    """
    Count number of times a value is seen.

    Parameters
    ----------

    :param sample_col: (str) The name of the column from which to compute the cumulative count.
    :param count_val: (str) The column value to be used for the counts.

    A cumulative count is computed by sequentially stepping through the rows,
    observing the column values and keeping track of the number of times the specified
    *count_value* has been seen.

    Examples
    --------
    Consider Frame *my_frame*, which accesses a frame that contains a single
    column named *obs*:

        >>> my_frame.inspect()
        [#]  obs
        ========
        [0]    0
        [1]    1
        [2]    2
        [3]    0
        [4]    1
        [5]    2

    The cumulative percent count for column *obs* is obtained by:

        >>> my_frame.tally("obs", "1")
        [===Job Progress===]

    The Frame *my_frame* accesses the original frame that now contains two
    columns, *obs* that contains the original column values, and
    *obsCumulativePercentCount* that contains the cumulative percent count:

        >>> my_frame.inspect()
        [#]  obs  obs_tally
        ===================
        [0]    0        0.0
        [1]    1        1.0
        [2]    2        1.0
        [3]    0        1.0
        [4]    1        2.0
        [5]    2        2.0

    """
    self._scala.tally(sample_col, count_val)