import numpy as np
import scipy.stats
[docs]def unweighted_entropy_distance(p, q):
r"""
Unweighted entropy distance:
.. math::
-\frac{2\times S_{PQ}-S_P-S_Q} {ln(4)}, S_I=\sum_{i} {I_i ln(I_i)}
"""
merged = p + q
entropy_increase = 2 * \
scipy.stats.entropy(merged) - scipy.stats.entropy(p) - \
scipy.stats.entropy(q)
return entropy_increase
[docs]def entropy_distance(p, q):
r"""
Entropy distance:
.. math::
-\frac{2\times S_{PQ}^{'}-S_P^{'}-S_Q^{'}} {ln(4)}, S_I^{'}=\sum_{i} {I_i^{'} ln(I_i^{'})}, I^{'}=I^{w}, with\ w=0.25+S\times 0.5\ (S<1.5)
"""
p = _weight_intensity_by_entropy(p)
q = _weight_intensity_by_entropy(q)
return unweighted_entropy_distance(p, q)
def _weight_intensity_by_entropy(x):
WEIGHT_START = 0.25
ENTROPY_CUTOFF = 3
weight_slope = (1 - WEIGHT_START) / ENTROPY_CUTOFF
if np.sum(x) > 0:
entropy_x = scipy.stats.entropy(x)
if entropy_x < ENTROPY_CUTOFF:
weight = WEIGHT_START + weight_slope * entropy_x
x = np.power(x, weight)
x_sum = np.sum(x)
x = x / x_sum
return x
def _select_common_peaks(p, q):
select = q > 0
p = p[select]
p_sum = np.sum(p)
if p_sum > 0:
p = p / p_sum
q = q[select]
q = q / np.sum(q)
return p, q
[docs]def euclidean_distance(p, q):
r"""
Euclidean distance:
.. math::
(\sum|P_{i}-Q_{i}|^2)^{1/2}
"""
return np.sqrt(np.sum(np.power(p - q, 2)))
[docs]def manhattan_distance(p, q):
r"""
Manhattan distance:
.. math::
\sum|P_{i}-Q_{i}|
"""
return np.sum(np.abs(p - q))
[docs]def chebyshev_distance(p, q):
r"""
Chebyshev distance:
.. math::
\underset{i}{\max}{(|P_{i}\ -\ Q_{i}|)}
"""
return np.max(np.abs(p - q))
[docs]def squared_euclidean_distance(p, q):
r"""
Squared Euclidean distance:
.. math::
\sum(P_{i}-Q_{i})^2
"""
return np.sum(np.power(p - q, 2))
[docs]def fidelity_distance(p, q):
r"""
Fidelity distance:
.. math::
1-\sum\sqrt{P_{i}Q_{i}}
"""
return 1 - np.sum(np.sqrt(p * q))
[docs]def matusita_distance(p, q):
r"""
Matusita distance:
.. math::
\sqrt{\sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2}
"""
return np.sqrt(np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2)))
[docs]def squared_chord_distance(p, q):
r"""
Squared-chord distance:
.. math::
\sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2
"""
return np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2))
[docs]def bhattacharya_1_distance(p, q):
r"""
Bhattacharya 1 distance:
.. math::
(\arccos{(\sum\sqrt{P_{i}Q_{i}})})^2
"""
s = np.sum(np.sqrt(p * q))
if s > 1:
s = 1
return np.power(np.arccos(s), 2)
[docs]def bhattacharya_2_distance(p, q):
r"""
Bhattacharya 2 distance:
.. math::
-\ln{(\sum\sqrt{P_{i}Q_{i}})}
"""
s = np.sum(np.sqrt(p * q))
if s == 0:
return np.inf
else:
return -np.log(s)
[docs]def harmonic_mean_distance(p, q):
r"""
Harmonic mean distance:
.. math::
1-2\sum(\frac{P_{i}Q_{i}}{P_{i}+Q_{i}})
"""
return 1 - 2 * np.sum(p * q / (p + q))
[docs]def probabilistic_symmetric_chi_squared_distance(p, q):
r"""
Probabilistic symmetric χ2 distance:
.. math::
\frac{1}{2} \times \sum\frac{(P_{i}-Q_{i}\ )^2}{P_{i}+Q_{i}\ }
"""
return 1 / 2 * np.sum(np.power(p - q, 2) / (p + q))
[docs]def ruzicka_distance(p, q):
r"""
Ruzicka distance:
.. math::
\frac{\sum{|P_{i}-Q_{i}|}}{\sum{\max(P_{i},Q_{i})}}
"""
dist = np.sum(np.abs(p - q)) / np.sum(np.maximum(p, q))
return dist
[docs]def roberts_distance(p, q):
r"""
Roberts distance:
.. math::
1-\sum\frac{(P_{i}+Q_{i})\frac{\min{(P_{i},Q_{i})}}{\max{(P_{i},Q_{i})}}}{\sum(P_{i}+Q_{i})}
"""
return 1 - np.sum((p + q) / np.sum(p + q) * np.minimum(p, q) / np.maximum(p, q))
[docs]def intersection_distance(p, q):
r"""
Intersection distance:
.. math::
1-\frac{\sum\min{(P_{i},Q_{i})}}{\min(\sum{P_{i},\sum{Q_{i})}}}
"""
return 1 - np.sum(np.minimum(p, q)) / min(np.sum(p), np.sum(q))
[docs]def motyka_distance(p, q):
r"""
Motyka distance:
.. math::
-\frac{\sum\min{(P_{i},Q_{i})}}{\sum(P_{i}+Q_{i})}
"""
dist = np.sum(np.minimum(p, q)) / np.sum(p + q)
return -dist
[docs]def canberra_distance(p, q):
r"""
Canberra distance:
.. math::
\sum\frac{|P_{i}-Q_{i}|}{|P_{i}|+|Q_{i}|}
"""
return np.sum(np.abs(p - q) / (np.abs(p) + np.abs(q)))
[docs]def baroni_urbani_buser_distance(p, q):
r"""
Baroni-Urbani-Buser distance:
.. math::
1-\frac{\sum\min{(P_i,Q_i)}+\sqrt{\sum\min{(P_i,Q_i)}\sum(\max{(P)}-\max{(P_i,Q_i)})}}{\sum{\max{(P_i,Q_i)}+\sqrt{\sum{\min{(P_i,Q_i)}\sum(\max{(P)}-\max{(P_i,Q_i)})}}}}
"""
if np.max(p) < np.max(q):
p, q = q, p
d1 = np.sqrt(np.sum(np.minimum(p, q) * np.sum(max(p) - np.maximum(p, q))))
return 1 - (np.sum(np.minimum(p, q)) + d1) / (np.sum(np.maximum(p, q)) + d1)
[docs]def penrose_size_distance(p, q):
r"""
Penrose size distance:
.. math::
\sqrt N\sum{|P_i-Q_i|}
"""
n = np.sum(p > 0)
return np.sqrt(n) * np.sum(np.abs(p - q))
[docs]def mean_character_distance(p, q):
r"""
Mean character distance:
.. math::
\frac{1}{N}\sum{|P_i-Q_i|}
"""
n = np.sum(p > 0)
return 1 / n * np.sum(np.abs(p - q))
[docs]def lorentzian_distance(p, q):
r"""
Lorentzian distance:
.. math::
\sum{\ln(1+|P_i-Q_i|)}
"""
return np.sum(np.log(1 + np.abs(p - q)))
[docs]def penrose_shape_distance(p, q):
r"""
Penrose shape distance:
.. math::
\sqrt{\sum((P_i-\bar{P})-(Q_i-\bar{Q}))^2}
"""
p_avg = np.mean(p)
q_avg = np.mean(q)
return np.sqrt(np.sum(np.power((p - p_avg) - (q - q_avg), 2)))
[docs]def clark_distance(p, q):
r"""
Clark distance:
.. math::
(\frac{1}{N}\sum(\frac{P_i-Q_i}{|P_i|+|Q_i|})^2)^\frac{1}{2}
"""
n = np.sum(p > 0)
return np.sqrt(1 / n * np.sum(np.power((p - q) / (np.abs(p) + np.abs(q)), 2)))
[docs]def hellinger_distance(p, q):
r"""
Hellinger distance:
.. math::
\sqrt{2\sum(\sqrt{\frac{P_i}{\bar{P}}}-\sqrt{\frac{Q_i}{\bar{Q}}})^2}
"""
p_avg = np.mean(p)
q_avg = np.mean(q)
return np.sqrt(2 * np.sum(np.power(np.sqrt(p / p_avg) - np.sqrt(q / q_avg), 2)))
[docs]def whittaker_index_of_association_distance(p, q):
r"""
Whittaker index of association distance:
.. math::
\frac{1}{2}\sum|\frac{P_i}{\bar{P}}-\frac{Q_i}{\bar{Q}}|
"""
p_avg = np.mean(p)
q_avg = np.mean(q)
return 1 / 2 * np.sum(np.abs(p / p_avg - q / q_avg))
[docs]def symmetric_chi_squared_distance(p, q):
r"""
Symmetric χ2 distance:
.. math::
\sqrt{\sum{\frac{\bar{P}+\bar{Q}}{N(\bar{P}+\bar{Q})^2}\frac{(P_i\bar{Q}-Q_i\bar{P})^2}{P_i+Q_i}\ }}
"""
p_avg = np.mean(p)
q_avg = np.mean(q)
n = np.sum(p > 0)
d1 = (p_avg + q_avg) / (n * np.power(p_avg + q_avg, 2))
return np.sqrt(d1 * np.sum(np.power(p * q_avg - q * p_avg, 2) / (p + q)))
[docs]def pearson_correlation_distance(p, q):
r"""
Pearson/Spearman Correlation Coefficient:
.. math::
\frac{\sum[(Q_i-\bar{Q})(P_i-\bar{P})]}{\sqrt{\sum(Q_i-\bar{Q})^2\sum(P_i-\bar{P})^2}}
"""
p_avg = np.mean(p)
q_avg = np.mean(q)
x = np.sum((q - q_avg) * (p - p_avg))
y = np.sqrt(np.sum(np.power(q - q_avg, 2)) * np.sum(np.power(p - p_avg, 2)))
if x == 0 and y == 0:
return 0.
else:
return -x / y
[docs]def improved_similarity_distance(p, q):
r"""
Improved Similarity Index:
.. math::
\sqrt{\frac{1}{N}\sum\{\frac{P_i-Q_i}{P_i+Q_i}\}^2}
"""
n = np.sum(p > 0)
return np.sqrt(1 / n * np.sum(np.power((p - q) / (p + q), 2)))
[docs]def absolute_value_distance(p, q):
r"""
Absolute Value Distance:
.. math::
\frac { \sum(|Q_i-P_i|)}{\sum P_i}
"""
dist = np.sum(np.abs(q - p)) / np.sum(p)
return dist
[docs]def dot_product_distance(p, q):
r"""
Dot product distance:
.. math::
1 - \sqrt{\frac{(\sum{Q_iP_i})^2}{\sum{Q_i^2\sum P_i^2}}}
"""
score = np.power(np.sum(q * p), 2) / \
(np.sum(np.power(q, 2)) * np.sum(np.power(p, 2)))
return 1 - np.sqrt(score)
[docs]def cosine_distance(p, q):
r"""
Cosine distance, it gives the same result as the dot product.
.. math::
1 - \sqrt{\frac{(\sum{Q_iP_i})^2}{\sum{Q_i^2\sum P_i^2}}}
"""
return dot_product_distance(p, q)
[docs]def dot_product_reverse_distance(p, q):
r"""
Reverse dot product distance, only consider peaks existed in spectrum Q.
.. math::
1 - \sqrt{\frac{(\sum{{} {P_i^{'}}})^2}{{\sum{(Q_i^{'})^2}{\sum (P_i^{'})^2}}}}, with:
P^{'}_{i}=\frac{P^{''}_{i}}{\sum_{i}{P^{''}_{i}}},
P^{''}_{i}=\begin{cases}
0 & \text{ if } Q_{i}=0 \\
P_{i} & \text{ if } Q_{i}\neq0
\end{cases}
"""
p, q = _select_common_peaks(p, q)
if np.sum(p) == 0:
score = 0
else:
score = np.power(np.sum(q * p), 2) / \
(np.sum(np.power(q, 2)) * np.sum(np.power(p, 2)))
return 1 - np.sqrt(score)
[docs]def spectral_contrast_angle_distance(p, q):
r"""
Spectral Contrast Angle distance.
Please note that the value calculated here is :math:`\cos\theta`.
If you want to get the :math:`\theta`, you can calculate with: :math:`\arccos(1-distance)`
.. math::
1 - \frac{\sum{Q_iP_i}}{\sqrt{\sum Q_i^2\sum P_i^2}}
"""
return 1 - np.sum(q * p) / \
np.sqrt(np.sum(np.power(q, 2)) * np.sum(np.power(p, 2)))
[docs]def wave_hedges_distance(p, q):
r"""
Wave Hedges distance:
.. math::
\sum\frac{|P_i-Q_i|}{\max{(P_i,Q_i)}}
"""
return np.sum(np.abs(p - q) / np.maximum(p, q))
[docs]def jaccard_distance(p, q):
r"""
Jaccard distance:
.. math::
\frac{\sum(P_i-Q_i)^2}{\sum P_i^2+\sum{Q_i^2-\sum{P_iQ_i}}}
"""
return np.sum(np.power(p - q, 2)) / \
(np.sum(np.power(p, 2)) + np.sum(np.power(q, 2)) - np.sum(p * q))
[docs]def dice_distance(p, q):
r"""
Dice distance:
.. math::
\frac{\sum(P_i-Q_i)^2}{\sum P_i^2+\sum Q_i^2}
"""
return np.sum(np.power(p - q, 2)) / \
(np.sum(np.power(p, 2)) + np.sum(np.power(q, 2)))
[docs]def inner_product_distance(p, q):
r"""
Inner Product distance:
.. math::
1-\sum{P_iQ_i}
"""
return 1 - np.sum(p * q)
[docs]def divergence_distance(p, q):
r"""
Divergence distance:
.. math::
2\sum\frac{(P_i-Q_i)^2}{(P_i+Q_i)^2}
"""
return 2 * np.sum((np.power(p - q, 2)) / np.power(p + q, 2))
[docs]def avg_l_distance(p, q):
r"""
Avg (L1, L∞) distance:
.. math::
\frac{1}{2}(\sum|P_i-Q_i|+\underset{i}{\max}{|P_i-Q_i|})
"""
return (np.sum(np.abs(p - q)) + max(np.abs(p - q)))
[docs]def vicis_symmetric_chi_squared_3_distance(p, q):
r"""
Vicis-Symmetric χ2 3 distance:
.. math::
\sum\frac{(P_i-Q_i)^2}{\max{(P_i,Q_i)}}
"""
return np.sum(np.power(p - q, 2) / np.maximum(p, q))