Source code for math_distance

import numpy as np
import scipy.stats


[docs]def unweighted_entropy_distance(p, q): r""" Unweighted entropy distance: .. math:: -\frac{2\times S_{PQ}-S_P-S_Q} {ln(4)}, S_I=\sum_{i} {I_i ln(I_i)} """ merged = p + q entropy_increase = 2 * \ scipy.stats.entropy(merged) - scipy.stats.entropy(p) - \ scipy.stats.entropy(q) return entropy_increase
[docs]def entropy_distance(p, q): r""" Entropy distance: .. math:: -\frac{2\times S_{PQ}^{'}-S_P^{'}-S_Q^{'}} {ln(4)}, S_I^{'}=\sum_{i} {I_i^{'} ln(I_i^{'})}, I^{'}=I^{w}, with\ w=0.25+S\times 0.5\ (S<1.5) """ p = _weight_intensity_by_entropy(p) q = _weight_intensity_by_entropy(q) return unweighted_entropy_distance(p, q)
def _weight_intensity_by_entropy(x): WEIGHT_START = 0.25 ENTROPY_CUTOFF = 3 weight_slope = (1 - WEIGHT_START) / ENTROPY_CUTOFF if np.sum(x) > 0: entropy_x = scipy.stats.entropy(x) if entropy_x < ENTROPY_CUTOFF: weight = WEIGHT_START + weight_slope * entropy_x x = np.power(x, weight) x_sum = np.sum(x) x = x / x_sum return x def _select_common_peaks(p, q): select = q > 0 p = p[select] p_sum = np.sum(p) if p_sum > 0: p = p / p_sum q = q[select] q = q / np.sum(q) return p, q
[docs]def euclidean_distance(p, q): r""" Euclidean distance: .. math:: (\sum|P_{i}-Q_{i}|^2)^{1/2} """ return np.sqrt(np.sum(np.power(p - q, 2)))
[docs]def manhattan_distance(p, q): r""" Manhattan distance: .. math:: \sum|P_{i}-Q_{i}| """ return np.sum(np.abs(p - q))
[docs]def chebyshev_distance(p, q): r""" Chebyshev distance: .. math:: \underset{i}{\max}{(|P_{i}\ -\ Q_{i}|)} """ return np.max(np.abs(p - q))
[docs]def squared_euclidean_distance(p, q): r""" Squared Euclidean distance: .. math:: \sum(P_{i}-Q_{i})^2 """ return np.sum(np.power(p - q, 2))
[docs]def fidelity_distance(p, q): r""" Fidelity distance: .. math:: 1-\sum\sqrt{P_{i}Q_{i}} """ return 1 - np.sum(np.sqrt(p * q))
[docs]def matusita_distance(p, q): r""" Matusita distance: .. math:: \sqrt{\sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2} """ return np.sqrt(np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2)))
[docs]def squared_chord_distance(p, q): r""" Squared-chord distance: .. math:: \sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2 """ return np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2))
[docs]def bhattacharya_1_distance(p, q): r""" Bhattacharya 1 distance: .. math:: (\arccos{(\sum\sqrt{P_{i}Q_{i}})})^2 """ s = np.sum(np.sqrt(p * q)) if s > 1: s = 1 return np.power(np.arccos(s), 2)
[docs]def bhattacharya_2_distance(p, q): r""" Bhattacharya 2 distance: .. math:: -\ln{(\sum\sqrt{P_{i}Q_{i}})} """ s = np.sum(np.sqrt(p * q)) if s == 0: return np.inf else: return -np.log(s)
[docs]def harmonic_mean_distance(p, q): r""" Harmonic mean distance: .. math:: 1-2\sum(\frac{P_{i}Q_{i}}{P_{i}+Q_{i}}) """ return 1 - 2 * np.sum(p * q / (p + q))
[docs]def probabilistic_symmetric_chi_squared_distance(p, q): r""" Probabilistic symmetric χ2 distance: .. math:: \frac{1}{2} \times \sum\frac{(P_{i}-Q_{i}\ )^2}{P_{i}+Q_{i}\ } """ return 1 / 2 * np.sum(np.power(p - q, 2) / (p + q))
[docs]def ruzicka_distance(p, q): r""" Ruzicka distance: .. math:: \frac{\sum{|P_{i}-Q_{i}|}}{\sum{\max(P_{i},Q_{i})}} """ dist = np.sum(np.abs(p - q)) / np.sum(np.maximum(p, q)) return dist
[docs]def roberts_distance(p, q): r""" Roberts distance: .. math:: 1-\sum\frac{(P_{i}+Q_{i})\frac{\min{(P_{i},Q_{i})}}{\max{(P_{i},Q_{i})}}}{\sum(P_{i}+Q_{i})} """ return 1 - np.sum((p + q) / np.sum(p + q) * np.minimum(p, q) / np.maximum(p, q))
[docs]def intersection_distance(p, q): r""" Intersection distance: .. math:: 1-\frac{\sum\min{(P_{i},Q_{i})}}{\min(\sum{P_{i},\sum{Q_{i})}}} """ return 1 - np.sum(np.minimum(p, q)) / min(np.sum(p), np.sum(q))
[docs]def motyka_distance(p, q): r""" Motyka distance: .. math:: -\frac{\sum\min{(P_{i},Q_{i})}}{\sum(P_{i}+Q_{i})} """ dist = np.sum(np.minimum(p, q)) / np.sum(p + q) return -dist
[docs]def canberra_distance(p, q): r""" Canberra distance: .. math:: \sum\frac{|P_{i}-Q_{i}|}{|P_{i}|+|Q_{i}|} """ return np.sum(np.abs(p - q) / (np.abs(p) + np.abs(q)))
[docs]def baroni_urbani_buser_distance(p, q): r""" Baroni-Urbani-Buser distance: .. math:: 1-\frac{\sum\min{(P_i,Q_i)}+\sqrt{\sum\min{(P_i,Q_i)}\sum(\max{(P)}-\max{(P_i,Q_i)})}}{\sum{\max{(P_i,Q_i)}+\sqrt{\sum{\min{(P_i,Q_i)}\sum(\max{(P)}-\max{(P_i,Q_i)})}}}} """ if np.max(p) < np.max(q): p, q = q, p d1 = np.sqrt(np.sum(np.minimum(p, q) * np.sum(max(p) - np.maximum(p, q)))) return 1 - (np.sum(np.minimum(p, q)) + d1) / (np.sum(np.maximum(p, q)) + d1)
[docs]def penrose_size_distance(p, q): r""" Penrose size distance: .. math:: \sqrt N\sum{|P_i-Q_i|} """ n = np.sum(p > 0) return np.sqrt(n) * np.sum(np.abs(p - q))
[docs]def mean_character_distance(p, q): r""" Mean character distance: .. math:: \frac{1}{N}\sum{|P_i-Q_i|} """ n = np.sum(p > 0) return 1 / n * np.sum(np.abs(p - q))
[docs]def lorentzian_distance(p, q): r""" Lorentzian distance: .. math:: \sum{\ln(1+|P_i-Q_i|)} """ return np.sum(np.log(1 + np.abs(p - q)))
[docs]def penrose_shape_distance(p, q): r""" Penrose shape distance: .. math:: \sqrt{\sum((P_i-\bar{P})-(Q_i-\bar{Q}))^2} """ p_avg = np.mean(p) q_avg = np.mean(q) return np.sqrt(np.sum(np.power((p - p_avg) - (q - q_avg), 2)))
[docs]def clark_distance(p, q): r""" Clark distance: .. math:: (\frac{1}{N}\sum(\frac{P_i-Q_i}{|P_i|+|Q_i|})^2)^\frac{1}{2} """ n = np.sum(p > 0) return np.sqrt(1 / n * np.sum(np.power((p - q) / (np.abs(p) + np.abs(q)), 2)))
[docs]def hellinger_distance(p, q): r""" Hellinger distance: .. math:: \sqrt{2\sum(\sqrt{\frac{P_i}{\bar{P}}}-\sqrt{\frac{Q_i}{\bar{Q}}})^2} """ p_avg = np.mean(p) q_avg = np.mean(q) return np.sqrt(2 * np.sum(np.power(np.sqrt(p / p_avg) - np.sqrt(q / q_avg), 2)))
[docs]def whittaker_index_of_association_distance(p, q): r""" Whittaker index of association distance: .. math:: \frac{1}{2}\sum|\frac{P_i}{\bar{P}}-\frac{Q_i}{\bar{Q}}| """ p_avg = np.mean(p) q_avg = np.mean(q) return 1 / 2 * np.sum(np.abs(p / p_avg - q / q_avg))
[docs]def symmetric_chi_squared_distance(p, q): r""" Symmetric χ2 distance: .. math:: \sqrt{\sum{\frac{\bar{P}+\bar{Q}}{N(\bar{P}+\bar{Q})^2}\frac{(P_i\bar{Q}-Q_i\bar{P})^2}{P_i+Q_i}\ }} """ p_avg = np.mean(p) q_avg = np.mean(q) n = np.sum(p > 0) d1 = (p_avg + q_avg) / (n * np.power(p_avg + q_avg, 2)) return np.sqrt(d1 * np.sum(np.power(p * q_avg - q * p_avg, 2) / (p + q)))
[docs]def pearson_correlation_distance(p, q): r""" Pearson/Spearman Correlation Coefficient: .. math:: \frac{\sum[(Q_i-\bar{Q})(P_i-\bar{P})]}{\sqrt{\sum(Q_i-\bar{Q})^2\sum(P_i-\bar{P})^2}} """ p_avg = np.mean(p) q_avg = np.mean(q) x = np.sum((q - q_avg) * (p - p_avg)) y = np.sqrt(np.sum(np.power(q - q_avg, 2)) * np.sum(np.power(p - p_avg, 2))) if x == 0 and y == 0: return 0. else: return -x / y
[docs]def improved_similarity_distance(p, q): r""" Improved Similarity Index: .. math:: \sqrt{\frac{1}{N}\sum\{\frac{P_i-Q_i}{P_i+Q_i}\}^2} """ n = np.sum(p > 0) return np.sqrt(1 / n * np.sum(np.power((p - q) / (p + q), 2)))
[docs]def absolute_value_distance(p, q): r""" Absolute Value Distance: .. math:: \frac { \sum(|Q_i-P_i|)}{\sum P_i} """ dist = np.sum(np.abs(q - p)) / np.sum(p) return dist
[docs]def dot_product_distance(p, q): r""" Dot product distance: .. math:: 1 - \sqrt{\frac{(\sum{Q_iP_i})^2}{\sum{Q_i^2\sum P_i^2}}} """ score = np.power(np.sum(q * p), 2) / \ (np.sum(np.power(q, 2)) * np.sum(np.power(p, 2))) return 1 - np.sqrt(score)
[docs]def cosine_distance(p, q): r""" Cosine distance, it gives the same result as the dot product. .. math:: 1 - \sqrt{\frac{(\sum{Q_iP_i})^2}{\sum{Q_i^2\sum P_i^2}}} """ return dot_product_distance(p, q)
[docs]def dot_product_reverse_distance(p, q): r""" Reverse dot product distance, only consider peaks existed in spectrum Q. .. math:: 1 - \sqrt{\frac{(\sum{{} {P_i^{'}}})^2}{{\sum{(Q_i^{'})^2}{\sum (P_i^{'})^2}}}}, with: P^{'}_{i}=\frac{P^{''}_{i}}{\sum_{i}{P^{''}_{i}}}, P^{''}_{i}=\begin{cases} 0 & \text{ if } Q_{i}=0 \\ P_{i} & \text{ if } Q_{i}\neq0 \end{cases} """ p, q = _select_common_peaks(p, q) if np.sum(p) == 0: score = 0 else: score = np.power(np.sum(q * p), 2) / \ (np.sum(np.power(q, 2)) * np.sum(np.power(p, 2))) return 1 - np.sqrt(score)
[docs]def spectral_contrast_angle_distance(p, q): r""" Spectral Contrast Angle distance. Please note that the value calculated here is :math:`\cos\theta`. If you want to get the :math:`\theta`, you can calculate with: :math:`\arccos(1-distance)` .. math:: 1 - \frac{\sum{Q_iP_i}}{\sqrt{\sum Q_i^2\sum P_i^2}} """ return 1 - np.sum(q * p) / \ np.sqrt(np.sum(np.power(q, 2)) * np.sum(np.power(p, 2)))
[docs]def wave_hedges_distance(p, q): r""" Wave Hedges distance: .. math:: \sum\frac{|P_i-Q_i|}{\max{(P_i,Q_i)}} """ return np.sum(np.abs(p - q) / np.maximum(p, q))
[docs]def jaccard_distance(p, q): r""" Jaccard distance: .. math:: \frac{\sum(P_i-Q_i)^2}{\sum P_i^2+\sum{Q_i^2-\sum{P_iQ_i}}} """ return np.sum(np.power(p - q, 2)) / \ (np.sum(np.power(p, 2)) + np.sum(np.power(q, 2)) - np.sum(p * q))
[docs]def dice_distance(p, q): r""" Dice distance: .. math:: \frac{\sum(P_i-Q_i)^2}{\sum P_i^2+\sum Q_i^2} """ return np.sum(np.power(p - q, 2)) / \ (np.sum(np.power(p, 2)) + np.sum(np.power(q, 2)))
[docs]def inner_product_distance(p, q): r""" Inner Product distance: .. math:: 1-\sum{P_iQ_i} """ return 1 - np.sum(p * q)
[docs]def divergence_distance(p, q): r""" Divergence distance: .. math:: 2\sum\frac{(P_i-Q_i)^2}{(P_i+Q_i)^2} """ return 2 * np.sum((np.power(p - q, 2)) / np.power(p + q, 2))
[docs]def avg_l_distance(p, q): r""" Avg (L1, L∞) distance: .. math:: \frac{1}{2}(\sum|P_i-Q_i|+\underset{i}{\max}{|P_i-Q_i|}) """ return (np.sum(np.abs(p - q)) + max(np.abs(p - q)))
[docs]def vicis_symmetric_chi_squared_3_distance(p, q): r""" Vicis-Symmetric χ2 3 distance: .. math:: \sum\frac{(P_i-Q_i)^2}{\max{(P_i,Q_i)}} """ return np.sum(np.power(p - q, 2) / np.maximum(p, q))