Source code for limix.stats._chi2

[docs]class Chi2Mixture(object): """ Mixture of 𝜒² distributions. Class for evaluation of P values for a test statistic that follows a two-component mixture of chi2 .. math:: p(x) = (1-𝑝)𝜒²(0) + 𝑝𝑎𝜒²(𝑑). Here 𝑝 is the probability being in the first component and 𝑎 and 𝑑 are the scale parameter and the number of degrees of freedom of the second component. .. warning:: This class is not production-ready. Keep in mind that this interface is likely to change. Parameters ---------- scale_min : float Minimum value used for fitting the scale parameter. scale_max : float Maximum value used for fitting the scale parameter. dofmin : float Minimum value used for fitting the dof parameter. dofmax : float Maximum value used for fitting the dof parameter. qmax : float Only the top qmax quantile is used for the fit. n_interval : int Number of intervals when performing gridsearch. tol : float Tolerance of being zero. Example ------- .. doctest:: >>> from numpy.random import RandomState >>> import scipy as sp >>> from limix.stats import Chi2Mixture >>> >>> scale = 0.3 >>> dof = 2 >>> mixture = 0.2 >>> n = 100 >>> >>> random = RandomState(1) >>> x = random.chisquare(dof, n) >>> n0 = int( (1-mixture) * n) >>> idxs = random.choice(n, n0, replace=False) >>> x[idxs] = 0 >>> >>> chi2mix = Chi2Mixture(scale_min=0.1, scale_max=5.0, ... dof_min=0.1, dof_max=5.0, ... qmax=0.1, tol=4e-3) >>> chi2mix.estimate_chi2mixture(x) >>> pv = chi2mix.sf(x) >>> print(pv[:4]) # doctest: +FLOAT_CMP [0.2 0.2 0.2 0.2] >>> >>> print('%.2f' % chi2mix.scale) 1.98 >>> print('%.2f' % chi2mix.dof) 0.89 >>> print('%.2f' % chi2mix.mixture) 0.20 """
[docs] def __init__( self, scale_min=0.1, scale_max=5.0, dof_min=0.1, dof_max=5.0, n_intervals=100, qmax=0.1, tol=0, lrt=None, ): self.scale_min = scale_min self.scale_max = scale_max self.dof_min = dof_min self.dof_max = dof_max self.qmax = qmax self.n_intervals = n_intervals self.tol = tol
[docs] def estimate_chi2mixture(self, lrt): r"""Estimates the parameters of a chi2 mixture. Estimates the parameters of a chi2 mixture by fitting the empirical distribution of null test statistic. Parameters ---------- lrt : array_like Null test statistcs. """ import scipy as sp import scipy.stats as st # step 1: estimate the probability of being in component one self.mixture = 1 - (lrt <= self.tol).mean() n_false = sp.sum(lrt > self.tol) # step 2: only use the largest qmax fraction of test statistics to # estimate the remaining parameters n_fitting = int(sp.ceil(self.qmax * n_false)) lrt_sorted = -sp.sort(-lrt)[:n_fitting] q = sp.linspace(0, 1, n_false)[1 : n_fitting + 1] log_q = sp.log10(q) # step 3: fitting scale and dof by minimizing the squared error # of the log10 p-values with their theorietical values # [uniform distribution] MSE_opt = sp.inf MSE = sp.zeros((self.n_intervals, self.n_intervals)) for i, scale in enumerate( sp.linspace(self.scale_min, self.scale_max, self.n_intervals) ): for j, dof in enumerate( sp.linspace(self.dof_min, self.dof_max, self.n_intervals) ): p = st.chi2.sf(lrt_sorted / scale, dof) log_p = sp.log10(p) MSE[i, j] = sp.mean((log_q - log_p) ** 2) if MSE[i, j] < MSE_opt: MSE_opt = MSE[i, j] self.scale = scale self.dof = dof
[docs] def sf(self, lrt): r"""Computes the p-values from test statistics lrt. Parameters ---------- lrt : array_like Test statistics. Returns ------- array_like P-values. """ from numpy import asarray import scipy as sp import scipy.stats as st lrt = asarray(lrt, float) _lrt = sp.copy(lrt) _lrt[lrt < self.tol] = 0 pv = self.mixture * st.chi2.sf(_lrt / self.scale, self.dof) return pv