import pandas as pd
import numpy as np
import xgboost
import scipy.cluster.hierarchy as sch

import warnings
warnings.filterwarnings('ignore')

from QMS_tables import *

import scipy.stats as stats
from scipy.stats import shapiro, normaltest

import logging
_logger = logging.getLogger(__name__)

def DFPreprocess(rawdata_in):

    rawdata_in['Value'] = rawdata_in.Value.astype('float')

    rawdata_in = rawdata_in.replace('',np.nan)
    rawdata_in.dropna(inplace = True)
    # dict4rename = dict(zip(fmap_in['Items'].tolist(), fmap_in['Alias'].tolist()))
    ftr_list = rawdata_in['ItemName'].unique().tolist()

    CC_list = rawdata_in.ID.unique()

    df2output = pd.DataFrame()
        
    for currCC in CC_list:
        singleCC = rawdata_in.loc[rawdata_in.ID == currCC]

        # ftr_list = fmap_in['Items'].tolist()
        

        entryCache = {}
        # entryCache['ID'] = currCC
        entryCache['time'] = singleCC.CreateTime.astype('datetime64').min()

        for currFtr in ftr_list:
            currBlock = singleCC.loc[singleCC.ItemName == currFtr]
            if len(currBlock) == 0:
                currValue = np.nan
            else:
                currValue = np.mean(currBlock.Value)
            entryCache[currFtr] = currValue


        df2output = df2output.append(entryCache, ignore_index = True)

    # df2output.rename(columns = dict4rename, inplace = True)

    return df2output



def DFPreprocessComplete(rawdata_in):
    
    rawdata_in['newItem'] = rawdata_in

    subbed_rawdata = rawdata_in.loc[rawdata_in.newItem.isin(feature_mapping['simpleItems'])]
    subbed_rawdata['Value'] = subbed_rawdata.Value.astype('float')

    # dict4rename = dict(zip(fmap_in['Items'].tolist(), fmap_in['Alias'].tolist()))

    SN_list = subbed_rawdata.SN.unique()

    df2output = pd.DataFrame()
        
    for currSN in SN_list:
        singleCC = subbed_rawdata.loc[subbed_rawdata.SN == currSN]

        # ftr_list = fmap_in['Items'].tolist()
        ftr_list = fmap_in['simpleItems'].tolist()

        entryCache = {}
        entryCache['SN'] = currSN
        entryCache['time'] = singleCC.TransDateTime.astype('datetime64').min()

        for currFtr in ftr_list:
            currBlock = singleCC.loc[singleCC.newItem == currFtr]
            if len(currBlock) == 0:
                currValue = np.nan
            else:
                currValue = np.mean(currBlock.Value)
            entryCache[currFtr] = currValue


        df2output = df2output.append(entryCache, ignore_index = True)

    # df2output.rename(columns = dict4rename, inplace = True)

    return df2output


def CoerceInput(data: (pd.Series, np.array)):
    """
    Ensures that the data is of a type that can be easily manipulated.

    :param data:
    :return:
    """
    if not isinstance(data, pd.Series):
        print('attempting to convert data into pandas.Series...')
        data = pd.Series(data)
        print('...conversion successful')

    if not isinstance(data, pd.Series):
        raise ValueError('data is not of the correct type; expecting a list of integers, '
                         'floats, a pandas.Series, or numpy.array')

    return data.reset_index(drop=True)




def normality_test(data,
                   alpha: float = 0.05):
    """
    Checks the data for normality and returns True if normality can't be demonstrated False.

    :param data: the data to be analyzed
    :param alpha: the P-value for the threshold; the standard is 0.05, but this can be manipulated
    :return: True if the data cannot be demonstrated to be non-normal; else False
    """
    _logger.debug('checking data for normality')

    stat, p = shapiro(data)
    _logger.debug(f'shapiro statistics={stat:.03f}, p={p:.03f}')
    if p > alpha:
        is_normal_shapiro_test = True
        _logger.debug('shapiro test indicates that the distribution is normal')
    else:
        is_normal_shapiro_test = False
        _logger.warning('shapiro test indicates that the distribution is NOT normal')

    try:
        stat, p = normaltest(data)
        success = True
    except ValueError as e:
        _logger.warning(e)
        success = False

    if success:
        _logger.debug(f'k^2 statistics={stat:.03f}, p={p:.03f}')
        if p > alpha:
            is_normal_k2 = True
            _logger.debug('k^2 test indicates that the distribution is normal')
        else:
            is_normal_k2 = False
            _logger.warning('k^2 test indicates that the distribution is NOT normal')
    else:
        is_normal_k2 = True

    is_normal = is_normal_shapiro_test and is_normal_k2

    if is_normal:
        _logger.info('there is a strong likelyhood that the data set is normally distributed')
    else:
        _logger.warning('the data set is most likely not normally distributed')

    return is_normal




def calc_ppu(data,
             upper_control_limit: (int, float), skip_normality_test: bool = True):
    """
    Calculate and return the Pp (upper) of the provided dataset given the upper control limit.

    :param data: the data to be analyzed
    :param upper_control_limit: the upper control limit
    :param skip_normality_test: used when the normality test is not necessary
    :return: the pp level
    """

    if not skip_normality_test:
        normality_test(data)

    mean = data.mean()
    std_dev = data.std()

    ppu = (upper_control_limit - mean) / (3 * std_dev)

    _logger.debug(f'dataset of length {len(data)}, '
                  f'mean={mean}, '
                  f'std_dev={std_dev}')
    _logger.debug(f'ppu = {ppu}')

    return ppu


def calc_ppl(data,
             lower_control_limit: (int, float), skip_normality_test = True):
    """
    Calculate and return the Pp (lower) of the provided dataset given the lower control limit.

    :param data: the data to be analyzed
    :param lower_control_limit: the lower control limit
    :param skip_normality_test: used when the normality test is not necessary
    :return: the pp level
    """


    if not skip_normality_test:
        normality_test(data)

    mean = data.mean()
    std_dev = data.std()

    ppl = (mean - lower_control_limit) / (3 * std_dev)

    _logger.debug(f'dataset of length {len(data)}, '
                  f'mean={mean}, '
                  f'std_dev={std_dev}')
    _logger.debug(f'ppl = {ppl}')

    return ppl


def calc_ppk(data,
             upper_control_limit: (int, float), lower_control_limit: (int, float)):
    """
    Calculate and return the Pp (upper) of the provided dataset given the upper control limit.

    :param data: the data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: the ppk level
    """
    zupper = abs(calc_ppu(data=data, upper_control_limit=upper_control_limit, skip_normality_test=True))
    zlower = abs(calc_ppl(data=data, lower_control_limit=lower_control_limit, skip_normality_test=True))

    cpk = min(zupper, zlower)

    _logger.debug(f'dataset of length {len(data)}, '
                  f'zupper={zupper:.03f}, '
                  f'zlower={zlower:.03f}')
    _logger.debug(f'cpk = {cpk:.03f}')

    ratio = zupper / zlower
    if ratio < 1:
        ratio = 1.0 / ratio
    if ratio > 1.5:
        _logger.warning(f'the zupper and zlower limits are strongly '
                        f'imbalanced, indicating that the process is off-center '
                        f'with reference to the limits')

    return cpk


#-------------------------------------------------------------------------
# input：
# data_in: a pd.Series
# UCL, upper control limit
# LCL, lower control limit

#规则1 beyond_limits
def control_beyond_limits(data,
                          upper_control_limit: (int, float, list), lower_control_limit: (int, float, list)):
    """
    Returns a pandas.Series with all points which are beyond the limits.

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """
    if isinstance(upper_control_limit, list) != True:
        return data.where((data > upper_control_limit) | (data < lower_control_limit)).dropna()
    else:
        return data.where((data > np.array(upper_control_limit)) | (data < np.array(lower_control_limit))).dropna()

#规则2 C区
def control_zone_c(data,
                   upper_control_limit: (int, float,list), lower_control_limit: (int, float,list), center_in = None):
    """
    Returns a pandas.Series containing the data in which 7 consecutive points are on the same side.

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """
    if isinstance(upper_control_limit,list) != True:
        spec_center = (upper_control_limit + lower_control_limit) / 2
    else:
        spec_center = center_in
    
    # looking for violations in which 2 out of 3 are in zone A or beyond
    
    violations = []
    for i in range(len(data) - 6):
        points = data[i:i+7].to_numpy()

        count = 1
        above = data[i] > spec_center
        for point in points[1:]:
            if above is True:
                if point > spec_center:
                    count += 1
                else:
                    break
            else:
                if point < spec_center:
                    count += 1
                else:
                    break

        if count >= 7:
            index = i + np.arange(7)
            violations.append(pd.Series(data=points, index=index))

    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s


#规则3 trend
def control_trend(data):
    """
    Returns a pandas.Series containing the data in which 7 consecutive points trending up or down.

    :param data: The data to be analyzed
    :return: a pandas.Series object with all out-of-control points
    """
    # looking for violations in which 2 out of 3 are in zone A or beyond
    violations = []
    for i in range(len(data) - 6):
        points = data[i:i+7].to_numpy()

        up = 0
        down = 0
        for j in range(1, 7):
            if points[j] > points[j-1]:
                up += 1
            elif points[j] < points[j-1]:
                down += 1

        if up >= 6 or down >= 6:
            index = i + np.arange(7)
            violations.append(pd.Series(data=points, index=index))

    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s

#规则4 A区

def control_zone_a(data,
                   upper_control_limit: (int, float, list), lower_control_limit: (int, float, list),  center_in = None):
    """
    Returns a pandas.Series containing the data in which 2 out of 3 are in zone A or beyond.

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """
    if isinstance(upper_control_limit, list)!= True:
        
        spec_center = (upper_control_limit + lower_control_limit) / 2
    else:
        spec_center = center_in    
        
    spec_range = np.std(data)

    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range
    # looking for violations in which 2 out of 3 are in zone A or beyond
    violations = []
    for i in range(len(data) - 2):
        points = data[i:i+3].to_numpy()

        count = 0
        above = data[i] > zone_b_upper_limit
        for point in points:
            # if point < zone_b_lower_limit or point > zone_b_upper_limit:
            #     count += 1
            if above is True:
                if point > zone_b_upper_limit:
                    count += 1
                else:
                    break
            else:
                if point < zone_b_lower_limit:
                    count += 1
                else:
                    break


        if count >= 2:
            index = i + np.arange(3)
            violations.append(pd.Series(data=points, index=index))


    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s


#规则5 stratification
def control_stratification(data,
                                upper_control_limit: (int, float,list), lower_control_limit: (int, float, list), center_in = None):
    """
    Returns a pandas.Series containing the data in which 15 consecutive points occur within zone C

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """

    # spec_range = (upper_control_limit - lower_control_limit) / 2
    # spec_center = lower_control_limit + spec_range
    # zone_c_upper_limit = spec_center + spec_range / 3
    # zone_c_lower_limit = spec_center - spec_range / 3
    spec_range = np.std(data)
    if isinstance(upper_control_limit, list)!=True:
        spec_center = (upper_control_limit + lower_control_limit) / 2
    else:
        spec_center = center_in

    
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    # looking for violations in which 2 out of 3 are in zone A or beyond
    violations = []
    for i in range(len(data) - 14):
        points = data[i:i+15].to_numpy()

        points = points[np.logical_and(points < zone_c_upper_limit, points > zone_c_lower_limit)]

        if len(points) >= 15:
            index = i + np.arange(15)
            violations.append(pd.Series(data=points, index=index))

    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s

#规则6 mixure

def control_mixture(data,
                         upper_control_limit: (int, float), lower_control_limit: (int, float), center_in = None):
    """
    Returns a pandas.Series containing the data in which 8 consecutive points occur with none in zone C

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """
    spec_range = np.std(data)
    if isinstance(upper_control_limit, list)!=True:
        spec_center = (upper_control_limit + lower_control_limit) / 2
    else:
        spec_center = center_in
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    
    # looking for violations in which 2 out of 3 are in zone A or beyond
    violations = []
    for i in range(len(data) - 7):
        points = data[i:i+8].to_numpy()

        count = 0
        for point in points:
            if not zone_c_lower_limit < point < zone_c_upper_limit:
                count += 1
            else:
                break

        if count >= 8:
            index = i + np.arange(8)
            violations.append(pd.Series(data=points, index=index))

    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s

#规则7 B区

def control_zone_b(data,
                   upper_control_limit: (int, float,list), lower_control_limit: (int, float,list), center_in = None):
    """
    Returns a pandas.Series containing the data in which 4 out of 5 are in zone B or beyond.

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """
    if isinstance(upper_control_limit, list)!=True:
        spec_center = (upper_control_limit + lower_control_limit) / 2
    else:
        spec_center = center_in
    
    spec_range = np.std(data)
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range
    # looking for violations in which 2 out of 3 are in zone A or beyond
    violations = []
    for i in range(len(data) - 4):
        points = data[i:i+5].to_numpy()

        count = 0
        above = data[i] > zone_c_upper_limit
        for point in points:
            # if point < zone_b_lower_limit or point > zone_b_upper_limit:
            #     count += 1
            if above is True:
                if point > zone_c_upper_limit:
                    count += 1
                else:
                    break
            else:
                if point < zone_c_lower_limit:
                    count += 1
                else:
                    break
        if count >= 4:
            index = i + np.arange(5)
            violations.append(pd.Series(data=points, index=index))

    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s


#规则8 overcontrol

def control_overcontrol(data,
                             upper_control_limit: (int, float), lower_control_limit: (int, float), center_in = None):
    """
    Returns a pandas.Series containing the data in which 14 consecutive points are alternating above/below the center.

    :param data: The data to be analyzed
    :param upper_control_limit: the upper control limit
    :param lower_control_limit: the lower control limit
    :return: a pandas.Series object with all out-of-control points
    """

    spec_range = np.std(data)#(upper_control_limit - lower_control_limit) / 2
    
    if isinstance(upper_control_limit, list) !=True:
        spec_center = (upper_control_limit - lower_control_limit) / 2 #lower_control_limit + spec_range
    else:
        spec_center = center_in
    # 14 consecutive points alternating up and down
    violations = []
    for i in range(len(data) - 14):
        points = data[i:i+14].to_numpy()
        odds = points[::2]
        evens = points[1::2]

        if odds[0] - spec_center > 0.0:
            odds = odds[odds > spec_center]
            evens = evens[evens < spec_center]
        else:
            odds = odds[odds < spec_center]
            evens = evens[evens > spec_center]

        if len(odds) == len(evens) == 7:
            index = i + np.arange(14)
            violations.append(pd.Series(data=points, index=index))


    if len(violations) == 0:
        return pd.Series()

    s = pd.concat(violations)
    s = s.loc[~s.index.duplicated()]
    return s




#-------------------------------------------------------------------------
#控制规则 总计8条
#1 beyond_limits 落在控制限之外
#2 C区 连续7点或以上在中心线同侧(C区或以上)
#3 trend 连续7点或以上单调增或减
#4 A区 连续3点中有两点距离中心线2倍标准差或以上(A区或以上)
#5 stratification 连续15点落在距离中心线1倍标准差内(C区)
#6 mixture 连续8点落在距离中心线一倍标准差外
#7 B区 连续5点中有4点距离中心线一倍标准差及以上
#8 over-control 连续14点上下交错
def CalcControlArgs(series_in,
                 upper_control_limit: (int, float), lower_control_limit: (int, float)):
    
    series_in = CoerceInput(series_in)

    spec_range = np.std(series_in)
    spec_center = (upper_control_limit + lower_control_limit) / 2
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range

    rule1_res = control_beyond_limits(series_in, upper_control_limit, lower_control_limit)
    rule2_res = control_zone_c(series_in, upper_control_limit, lower_control_limit)
    rule3_res = control_trend(series_in)
    rule4_res = control_zone_a(series_in, upper_control_limit, lower_control_limit)
    rule5_res = control_stratification(series_in, upper_control_limit, lower_control_limit)
    rule6_res = control_mixture(series_in, upper_control_limit, lower_control_limit)
    rule7_res = control_zone_b(series_in, upper_control_limit, lower_control_limit)
    rule8_res = control_overcontrol(series_in, upper_control_limit, lower_control_limit)

    dict2output ={
        'UCL':float(upper_control_limit),
        'LCL':float(lower_control_limit),
        'Center':float(spec_center),

        'Zone_a_limit':[zone_a_upper_limit,zone_a_lower_limit],
        'Zone_b_limit':[zone_b_upper_limit,zone_b_lower_limit],
        'Zone_c_limit':[zone_c_upper_limit,zone_c_lower_limit],

        'Rule1':rule1_res,
        'Rule2':rule2_res,
        'Rule3':rule3_res,
        'Rule4':rule4_res,
        'Rule5':rule5_res,
        'Rule6':rule6_res,
        'Rule7':rule7_res,
        'Rule8':rule8_res

    }


    return dict2output

def CalcMRArgs(series_in):

    lower_control_limit = 0
    
    series_in = CoerceInput(series_in)

    seq = np.array(series_in)

    R = np.array([np.nan] + [abs(seq[i] - seq[i + 1]) for i in range(len(seq) - 1)])
    Rbar = np.nanmean(R)
    upper_control_limit = 3.267*Rbar

    spec_range = np.std(R)
    spec_center = Rbar
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range

    R = pd.Series(R)

    rule1_res = control_beyond_limits(R, upper_control_limit, lower_control_limit)
    rule2_res = control_zone_c(R, upper_control_limit, lower_control_limit)
    rule3_res = control_trend(R)
    rule4_res = control_zone_a(R, upper_control_limit, lower_control_limit)
    rule5_res = control_stratification(R, upper_control_limit, lower_control_limit)
    rule6_res = control_mixture(R, upper_control_limit, lower_control_limit)
    rule7_res = control_zone_b(R, upper_control_limit, lower_control_limit)
    rule8_res = control_overcontrol(R, upper_control_limit, lower_control_limit)

    dict2output ={
        'UCL':float(upper_control_limit),
        'LCL':float(lower_control_limit),
        'Center':float(spec_center),

        'Zone_a_limit':[zone_a_upper_limit,zone_a_lower_limit],
        'Zone_b_limit':[zone_b_upper_limit,zone_b_lower_limit],
        'Zone_c_limit':[zone_c_upper_limit,zone_c_lower_limit],

        'Rule1':rule1_res,
        'Rule2':rule2_res,
        'Rule3':rule3_res,
        'Rule4':rule4_res,
        'Rule5':rule5_res,
        'Rule6':rule6_res,
        'Rule7':rule7_res,
        'Rule8':rule8_res

    }

    return R, dict2output

def CalcXbarControlArgs(series_in,chunksize):
    
    
    series_in = CoerceInput(series_in)

    chunked_series = np.array_split(series_in, len(series_in)/(chunksize+1))

    Xbar_series = pd.Series(list(map(np.mean,chunked_series)))

    spec_center = np.mean(Xbar_series)
    spec_range = np.std(Xbar_series)


    R = []
    
    for ele in chunked_series:
        R.append(np.max(ele) - np.min(ele))  # values
    
    Rbar = np.mean(R)

    upper_control_limit = spec_center + A2[chunksize]*Rbar
    lower_control_limit = spec_center - A2[chunksize]*Rbar
    
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range

    rule1_res = control_beyond_limits(Xbar_series, upper_control_limit, lower_control_limit)
    rule2_res = control_zone_c(Xbar_series, upper_control_limit, lower_control_limit)
    rule3_res = control_trend(Xbar_series)
    rule4_res = control_zone_a(Xbar_series, upper_control_limit, lower_control_limit)
    rule5_res = control_stratification(Xbar_series, upper_control_limit, lower_control_limit)
    rule6_res = control_mixture(Xbar_series, upper_control_limit, lower_control_limit)
    rule7_res = control_zone_b(Xbar_series, upper_control_limit, lower_control_limit)
    rule8_res = control_overcontrol(Xbar_series, upper_control_limit, lower_control_limit)

    dict2output ={
        'UCL':float(upper_control_limit),
        'LCL':float(lower_control_limit),
        'Center':float(spec_center),

        'Zone_a_limit':[zone_a_upper_limit,zone_a_lower_limit],
        'Zone_b_limit':[zone_b_upper_limit,zone_b_lower_limit],
        'Zone_c_limit':[zone_c_upper_limit,zone_c_lower_limit],

        'Rule1':rule1_res,
        'Rule2':rule2_res,
        'Rule3':rule3_res,
        'Rule4':rule4_res,
        'Rule5':rule5_res,
        'Rule6':rule6_res,
        'Rule7':rule7_res,
        'Rule8':rule8_res

    }


    return Xbar_series, dict2output


def CalcXbarMRControlArgs(series_in,chunksize):
        
    
    series_in = CoerceInput(series_in)

    chunked_series = np.array_split(series_in, len(series_in)/(chunksize+1))


    R = []
    
    for ele in chunked_series:
        R.append(np.max(ele) - np.min(ele))  # values
    
    Rbar = np.mean(R)

    spec_center = Rbar
    spec_range = np.std(R)

    upper_control_limit = D4[chunksize] * Rbar
    lower_control_limit = D3[chunksize] * Rbar
    
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range
    
    R = pd.Series(R)
    
    rule1_res = control_beyond_limits(R, upper_control_limit, lower_control_limit)
    rule2_res = control_zone_c(R, upper_control_limit, lower_control_limit)
    rule3_res = control_trend(R)
    rule4_res = control_zone_a(R, upper_control_limit, lower_control_limit)
    rule5_res = control_stratification(R, upper_control_limit, lower_control_limit)
    rule6_res = control_mixture(R, upper_control_limit, lower_control_limit)
    rule7_res = control_zone_b(R, upper_control_limit, lower_control_limit)
    rule8_res = control_overcontrol(R, upper_control_limit, lower_control_limit)

    dict2output ={
        'UCL':float(upper_control_limit),
        'LCL':float(lower_control_limit),
        'Center':float(spec_center),

        'Zone_a_limit':[zone_a_upper_limit,zone_a_lower_limit],
        'Zone_b_limit':[zone_b_upper_limit,zone_b_lower_limit],
        'Zone_c_limit':[zone_c_upper_limit,zone_c_lower_limit],

        'Rule1':rule1_res,
        'Rule2':rule2_res,
        'Rule3':rule3_res,
        'Rule4':rule4_res,
        'Rule5':rule5_res,
        'Rule6':rule6_res,
        'Rule7':rule7_res,
        'Rule8':rule8_res

    }


    return R, dict2output


def CalcPControlArgs(test_count, bad_count):
    p_vec = bad_count/test_count

    pBar = np.sum(bad_count)/np.sum(test_count)

    spec_center = pBar
    spec_range = np.std(p_vec)

    lower_control_limit, upper_control_limit = [],[]

    for currTestCount in test_count:
        lower_control_limit.append(pBar - 3 * np.sqrt(( pBar * (1 - pBar)) / currTestCount))
        upper_control_limit.append(pBar + 3 * np.sqrt(( pBar * (1 - pBar)) / currTestCount))
    
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range
    
    p_vec = pd.Series(p_vec)
    
    rule1_res = control_beyond_limits(p_vec, upper_control_limit, lower_control_limit)
    rule2_res = control_zone_c(p_vec, upper_control_limit, lower_control_limit,pBar)
    rule3_res = control_trend(p_vec)
    rule4_res = control_zone_a(p_vec, upper_control_limit, lower_control_limit,pBar)
    rule5_res = control_stratification(p_vec, upper_control_limit, lower_control_limit,pBar)
    rule6_res = control_mixture(p_vec, upper_control_limit, lower_control_limit,pBar)
    rule7_res = control_zone_b(p_vec, upper_control_limit, lower_control_limit,pBar)
    rule8_res = control_overcontrol(p_vec, upper_control_limit, lower_control_limit,pBar)

    dict2output ={
        'UCL':list(map(float,upper_control_limit)),
        'LCL':list(map(float,lower_control_limit)),
        'Center':float(spec_center),

        'Zone_a_limit':[zone_a_upper_limit,zone_a_lower_limit],
        'Zone_b_limit':[zone_b_upper_limit,zone_b_lower_limit],
        'Zone_c_limit':[zone_c_upper_limit,zone_c_lower_limit],

        'Rule1':rule1_res,
        'Rule2':rule2_res,
        'Rule3':rule3_res,
        'Rule4':rule4_res,
        'Rule5':rule5_res,
        'Rule6':rule6_res,
        'Rule7':rule7_res,
        'Rule8':rule8_res

    }


    return p_vec, dict2output

    
def CalcUControlArgs(sample_size, fault_count):

    u_vec = fault_count/sample_size

    uBar = np.sum(fault_count)/np.sum(sample_size)

    spec_center = uBar
    spec_range = np.std(u_vec)

    lower_control_limit, upper_control_limit = [],[]

    for currSize in sample_size:
        lower_control_limit.append(uBar - 3 * np.sqrt(uBar/currSize))
        upper_control_limit.append(uBar + 3 * np.sqrt(uBar/currSize))

    
    zone_c_upper_limit = spec_center + spec_range
    zone_c_lower_limit = spec_center - spec_range
    zone_b_upper_limit = spec_center + 2 * spec_range
    zone_b_lower_limit = spec_center - 2 * spec_range
    zone_a_upper_limit = spec_center + 3 * spec_range
    zone_a_lower_limit = spec_center - 3 * spec_range
    
    u_vec = pd.Series(u_vec)
    
    rule1_res = control_beyond_limits(u_vec, upper_control_limit, lower_control_limit)
    rule2_res = control_zone_c(u_vec, upper_control_limit, lower_control_limit,uBar)
    rule3_res = control_trend(u_vec)
    rule4_res = control_zone_a(u_vec, upper_control_limit, lower_control_limit,uBar)
    rule5_res = control_stratification(u_vec, upper_control_limit, lower_control_limit,uBar)
    rule6_res = control_mixture(u_vec, upper_control_limit, lower_control_limit,uBar)
    rule7_res = control_zone_b(u_vec, upper_control_limit, lower_control_limit,uBar)
    rule8_res = control_overcontrol(u_vec, upper_control_limit, lower_control_limit,uBar)

    dict2output ={
        'UCL':list(map(float,upper_control_limit)),
        'LCL':list(map(float,lower_control_limit)),
        'Center':float(spec_center),

        'Zone_a_limit':[zone_a_upper_limit,zone_a_lower_limit],
        'Zone_b_limit':[zone_b_upper_limit,zone_b_lower_limit],
        'Zone_c_limit':[zone_c_upper_limit,zone_c_lower_limit],

        'Rule1':rule1_res,
        'Rule2':rule2_res,
        'Rule3':rule3_res,
        'Rule4':rule4_res,
        'Rule5':rule5_res,
        'Rule6':rule6_res,
        'Rule7':rule7_res,
        'Rule8':rule8_res

    }


    return u_vec, dict2output


#------------------------------------------------------------------------------------------------------------
def CalcCpkArgs(data,
             upper_control_limit: (int, float), lower_control_limit: (int, float),
             subgroup_size: int = 10, max_subgroups: int = 30):
    """
    Boxplot the Cpk in subgroups os size `subgroup_size`.

    :param data: a list, pandas.Series, or numpy.array representing the data set
    :param upper_control_limit: an integer or float which represents the upper control limit, commonly called the UCL
    :param lower_control_limit: an integer or float which represents the upper control limit, commonly called the UCL
    :param subgroup_size: the number of samples to include in each subgroup
    :param max_subgroups: the maximum number of subgroups to display
    :param axs: two instances of matplotlib.axis.Axis
    :return: None
    """

    def chunk(seq, size):
        return (seq[pos:pos + size] for pos in range(0, len(seq), size))

    # todo: offer options of historical subgrouping, such as subgroup history = 'all' or 'recent', something that
    # allows a better historical subgrouping
    data_subgroups = []
    for i, c in enumerate(chunk(data[::-1], subgroup_size)):
        if i >= max_subgroups:
            break
        data_subgroups.append(c)

    data_subgroups = data_subgroups[::-1]


    cpks = []
    for i in range(len(data_subgroups)):
        cpk = calc_ppk(data_subgroups[i], upper_control_limit=upper_control_limit, lower_control_limit=lower_control_limit)
        cpks.append(cpk)
    cpks = pd.Series(cpks)

    ppk = calc_ppk(data, upper_control_limit=upper_control_limit, lower_control_limit=lower_control_limit)

    dict2output = {
        'UCL':upper_control_limit,
        'LCL':lower_control_limit,
        'ListedDataForBoxplot':data_subgroups,
        'CpkSeq':cpks,
        'Ppk':ppk

    }

    return dict2output




def CalcPpkArgs(series_in,
                 upper_control_limit: (int, float), lower_control_limit: (int, float)):

    mean = np.mean(series_in)
    std = np.std(series_in)

    x_grids =  np.linspace(mean - 4 * std, mean + 4 * std, 300)
    pdf = stats.norm.pdf(x_grids, mean, std)

    lower_percent = 100.0 * stats.norm.cdf(lower_control_limit, mean, std)
    higher_percent = 100.0 - 100.0 * stats.norm.cdf(upper_control_limit, mean, std)

    cpk = calc_ppk(series_in, upper_control_limit=upper_control_limit, lower_control_limit=lower_control_limit)

    lower_sigma_level = (mean - lower_control_limit) / std
    upper_sigma_level = (upper_control_limit - mean) / std

    dict2output = {
        'Sample_mean':mean,
        'Sigma':std,
        'DensityCurve_x':x_grids,
        'DensityCurve_y':pdf,
        'HigherPercent>UCL':higher_percent,
        'LowerPercent<LCL':lower_percent,
        'Ppk':cpk,
        'LowerSigmaLvl':lower_sigma_level,
        'UpperSigmaLvl':upper_sigma_level
    }

    return dict2output


def CalcXGBArgs(obs_in,outcomes_in):
        
    model = xgboost.XGBClassifier()
    model.fit(obs_in, outcomes_in)

        
    res = model._Booster.trees_to_dataframe()
    tree2output = res.loc[res.Tree == 0]

    return tree2output




















