# Source code for limix.stats._kinship

from __future__ import division

[docs]def linear_kinship(G, out=None, verbose=True):
"""
Estimate Kinship matrix via linear kernel.

Let 𝑑 be the number of columns of G. The resulting matrix is given by:

.. math::

𝙺 = 𝚇𝚇ᵀ/𝑑

where

.. math::

𝚇ᵢⱼ = (𝙶ᵢⱼ - 𝑚ⱼ) / 𝑠ⱼ

is the matrix G column-wise normalized by means 𝑚ⱼ and standard deviations 𝑠ⱼ.
NaNs are ignored so as to produce matrix K having only real numbers.

This functions is specially designed to also handle large matrices G that would
otherwise require a large amount of memory if it were to be loaded in memory first.
For those cases, libraries like Dask come in handy.

Parameters
----------
G : array_like
Samples-by-variants matrix.
out : ndarray
A location into which the result is stored.
verbose : bool, optional
True for showing progress; False otherwise. Defauts to True.

Examples
--------
.. doctest::

>>> from numpy.random import RandomState
>>> from limix.stats import linear_kinship
>>>
>>> random = RandomState(1)
>>> X = random.randn(4, 100)
>>> K = linear_kinship(X, verbose=False)
>>> print(K) # doctest: +FLOAT_CMP
[[ 0.91314823 -0.19283362 -0.34133897 -0.37897564]
[-0.19283362  0.89885153 -0.2356003  -0.47041761]
[-0.34133897 -0.2356003   0.95777313 -0.38083386]
[-0.37897564 -0.47041761 -0.38083386  1.23022711]]
"""
from numpy import sqrt, zeros, asfortranarray, where, asarray, nanmean, std, isnan
from scipy.linalg.blas import get_blas_funcs
from tqdm import tqdm

(n, p) = G.shape
if out is None:
out = zeros((n, n), order="F")
else:
out = asfortranarray(out)

chunks = _get_chunks(G)
gemm = get_blas_funcs("gemm", [out])

start = 0
for chunk in tqdm(chunks, desc="Kinship", disable=not verbose):
end = start + chunk

g = asarray(G[:, start:end])
m = nanmean(g, 0)
g = where(isnan(g), m, g)
g = g - m
g /= std(g, 0)
g /= sqrt(p)

gemm(1.0, g, g, 1.0, out, 0, 1, 1)

start = end

return out

def _get_chunks(G):
from numpy import isfinite

if hasattr(G, "chunks") and G.chunks is not None:
if len(G.chunks) > 1 and all(isfinite(G.chunks)):
return G.chunks

siz = G.shape // 100
sizl = G.shape - siz * 100
chunks = [siz] * 100
if sizl > 0:
chunks += [sizl]
return tuple(chunks)