sparktk.frame.ops.ecdf module
# vim: set encoding=utf-8
# Copyright (c) 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
def ecdf(self, column):
"""
Builds new frame with columns for data and distribution.
Parameters
----------
:param column: (str) The name of the input column containing sample.
:return: (Frame) A new Frame containing each distinct value in the sample and its corresponding ECDF value.
Generates the :term:`empirical cumulative distribution` for the input column.
Examples
--------
Consider the following sample data set in *frame* 'frame' containing several numbers.
>>> frame.inspect()
[#] numbers
============
[0] 1
[1] 3
[2] 1
[3] 0
[4] 2
[5] 1
[6] 4
[7] 3
>>> ecdf_frame = frame.ecdf('numbers')
[===Job Progress===]
>>> ecdf_frame.inspect()
[#] numbers numbers_ecdf
==========================
[0] 0 0.125
[1] 1 0.5
[2] 2 0.625
[3] 3 0.875
[4] 4 1.0
"""
from sparktk.frame.frame import Frame
return Frame(self._tc, self._scala.ecdf(column))
Functions
def ecdf(
self, column)
Builds new frame with columns for data and distribution.
Parameters:
column | (str): | The name of the input column containing sample. |
Returns | (Frame): | A new Frame containing each distinct value in the sample and its corresponding ECDF value. |
Generates the :term:empirical cumulative distribution
for the input column.
Examples:
Consider the following sample data set in frame 'frame' containing several numbers.
>>> frame.inspect()
[#] numbers
============
[0] 1
[1] 3
[2] 1
[3] 0
[4] 2
[5] 1
[6] 4
[7] 3
>>> ecdf_frame = frame.ecdf('numbers')
[===Job Progress===]
>>> ecdf_frame.inspect()
[#] numbers numbers_ecdf
==========================
[0] 0 0.125
[1] 1 0.5
[2] 2 0.625
[3] 3 0.875
[4] 4 1.0
def ecdf(self, column):
"""
Builds new frame with columns for data and distribution.
Parameters
----------
:param column: (str) The name of the input column containing sample.
:return: (Frame) A new Frame containing each distinct value in the sample and its corresponding ECDF value.
Generates the :term:`empirical cumulative distribution` for the input column.
Examples
--------
Consider the following sample data set in *frame* 'frame' containing several numbers.
>>> frame.inspect()
[#] numbers
============
[0] 1
[1] 3
[2] 1
[3] 0
[4] 2
[5] 1
[6] 4
[7] 3
>>> ecdf_frame = frame.ecdf('numbers')
[===Job Progress===]
>>> ecdf_frame.inspect()
[#] numbers numbers_ecdf
==========================
[0] 0 0.125
[1] 1 0.5
[2] 2 0.625
[3] 3 0.875
[4] 4 1.0
"""
from sparktk.frame.frame import Frame
return Frame(self._tc, self._scala.ecdf(column))