From 5f0664e8e93eb2e1ba47b0f3758b32adb0351d38 Mon Sep 17 00:00:00 2001 From: Benjamin Pedigo Date: Tue, 14 Mar 2023 09:27:56 -0400 Subject: [PATCH] Delete sandbox directory --- sandbox/align.py | 35 ----- sandbox/binom_sims.py | 114 -------------- sandbox/gm_ranking.py | 53 ------- sandbox/rdpg_lr_comparison.py | 278 ---------------------------------- sandbox/rounding.py | 5 - sandbox/test_fishers_exact.py | 28 ---- sandbox/test_poetry.py | 3 - sandbox/test_pymaid.py | 12 -- 8 files changed, 528 deletions(-) delete mode 100644 sandbox/align.py delete mode 100644 sandbox/binom_sims.py delete mode 100644 sandbox/gm_ranking.py delete mode 100644 sandbox/rdpg_lr_comparison.py delete mode 100644 sandbox/rounding.py delete mode 100644 sandbox/test_fishers_exact.py delete mode 100644 sandbox/test_poetry.py delete mode 100644 sandbox/test_pymaid.py diff --git a/sandbox/align.py b/sandbox/align.py deleted file mode 100644 index becfeb5..0000000 --- a/sandbox/align.py +++ /dev/null @@ -1,35 +0,0 @@ -#%% -import numpy as np -from scipy.stats import special_ortho_group -import matplotlib.pyplot as plt - -np.random.seed(88887) -Q = special_ortho_group.rvs(2) -X = np.random.uniform(0, 1, (20, 2)) -Y = X @ Q -fig, ax = plt.subplots(1, 1, figsize=(6, 6)) -ax.axvline(0, color="k") -ax.axhline(0, color="k") -ax.scatter(X[:, 0], X[:, 1], color="lightblue") -ax.scatter(Y[:, 0], Y[:, 1], color="red") - -rads_start = 0 -rads_end = np.arctan2(Q[1, 0], Q[0, 0]) - -Zs = [] -for rads in np.linspace(rads_start, rads_end, 100): - semi_Q = np.array( - [ - [np.cos(rads), -np.sin(rads)], - [np.sin(rads), np.cos(rads)], - ] - ) - Z = X @ semi_Q - Zs.append(Z) -Zs = np.stack(Zs) - -ax.plot(Zs[:, :, 0], Zs[:, :, 1], color="lightgrey", zorder=-1) - -ax.axis("off") - -#%% diff --git a/sandbox/binom_sims.py b/sandbox/binom_sims.py deleted file mode 100644 index 50c14b1..0000000 --- a/sandbox/binom_sims.py +++ /dev/null @@ -1,114 +0,0 @@ -#%% -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sns -from pkg.plot import set_theme -from scipy.stats import binom, chi2_contingency, ks_1samp, uniform - -c = 1.5 -ns = np.array([1000, 1500, 750, 1250]) -ps = np.array([0.2, 0.02, 0.05, 0.1]) - -rng = np.random.default_rng(8888) - - -# for i in range(len(ns)): - -K = len(ns) - -n_sims = 1000 -rows = [] -for sim in range(n_sims): - for i in range(K): - x = rng.binomial(ns, ps) - y = rng.binomial(ns, c * ps) - - cont_table = np.array([[x[i], ns[i] - x[i]], [y[i], ns[i] - y[i]]]) - stat, pvalue, _, _ = chi2_contingency(cont_table) - rows.append( - { - "dim": i, - "stat": stat, - "pvalue": pvalue, - "sim": sim, - "c": c, - "correction": False, - } - ) - - corrected_cont_table = cont_table.copy() - # corrected_cont_table[1, 0] = cont_table[1, 0] + - n = corrected_cont_table[1].sum() - phat = corrected_cont_table[1, 0] / n - phat = 1 / c * phat - corrected_cont_table[1, 0] = phat * n - corrected_cont_table[1, 1] = (1 - phat) * n - stat, pvalue, _, _ = chi2_contingency(corrected_cont_table) - rows.append( - { - "dim": i, - "stat": stat, - "pvalue": pvalue, - "sim": sim, - "c": c, - "correction": True, - } - ) - -results = pd.DataFrame(rows) - -#%% - -set_theme() -for i in range(K): - fig, ax = plt.subplots(1, 1, figsize=(8, 6)) - sns.histplot(data=results[results["dim"] == i], x="pvalue", hue="correction", ax=ax) - - -def subuniformity_plot(x, ax=None, **kwargs): - if ax is None: - _, ax = plt.subplots(1, 1, figsize=(6, 6)) - sns.histplot(x, ax=ax, stat="density", cumulative=True, **kwargs) - stat, pvalue = ks_1samp(x, uniform(0, 1).cdf, alternative="greater") - ax.plot([0, 1], [0, 1], linewidth=3, linestyle=":", color="black") - ax.text(0, 1, f"p-value: {pvalue:.3f}") - ax.set_ylabel("Cumulative density") - return ax, stat, pvalue - - -for i in range(K): - data = results[(results["dim"] == i) & (results["correction"])]["pvalue"] - subuniformity_plot(data) - -#%% - - -ps = np.linspace(0.1, 0.3, 1000) - -x = 30 # p ~= 0.2 -n = 100 - -y = 40 # p ~= 0.3 so c ~= 1.5 -m = 200 - -c = 1.5 - - -def func(p): - return ( - x * np.log(c * p) - + (n - x) * np.log(1 - c * p) - + y * np.log(p) - + (m - y) * np.log(1 - p) - ) - - -vals = func(ps) - -fig, ax = plt.subplots(1, 1, figsize=(10, 10)) -ax.plot(ps, vals) - -est = 1 / c * x / (n + m) + y / (n + m) - -ax.axvline(est, color="red") diff --git a/sandbox/gm_ranking.py b/sandbox/gm_ranking.py deleted file mode 100644 index 0814cd8..0000000 --- a/sandbox/gm_ranking.py +++ /dev/null @@ -1,53 +0,0 @@ -from graspologic.utils import remove_loops -import numpy as np -from graspologic.match import graph_match - - -def signal_flow(A): - """Implementation of the signal flow metric from Varshney et al 2011 - - Parameters - ---------- - A : [type] - [description] - - Returns - ------- - [type] - [description] - """ - A = A.copy() - A = remove_loops(A) - W = (A + A.T) / 2 - - D = np.diag(np.sum(W, axis=1)) - - L = D - W - - b = np.sum(W * np.sign(A - A.T), axis=1) - L_pinv = np.linalg.pinv(L) - z = L_pinv @ b - - return z - - -def rank_signal_flow(A): - sf = signal_flow(A) - perm_inds = np.argsort(-sf) - return perm_inds - - -def rank_graph_match_flow(A, n_init=10, max_iter=30, eps=1e-4, **kwargs): - n = len(A) - try: - initial_perm = rank_signal_flow(A) - init = np.eye(n)[initial_perm] - except np.linalg.LinAlgError: - print("SVD did not converge in signal flow") - init = np.full((n, n), 1 / n) - match_mat = np.zeros((n, n)) - triu_inds = np.triu_indices(n, k=1) - match_mat[triu_inds] = 1 - gm = GraphMatch(n_init=n_init, max_iter=max_iter, init=init, eps=eps, **kwargs) - perm_inds = gm.fit_predict(match_mat, A) - return perm_inds diff --git a/sandbox/rdpg_lr_comparison.py b/sandbox/rdpg_lr_comparison.py deleted file mode 100644 index 8bb518c..0000000 --- a/sandbox/rdpg_lr_comparison.py +++ /dev/null @@ -1,278 +0,0 @@ -#%% -from pkg.data import load_matched - -left_adj, left_nodes = load_matched("left", weights=False) -right_adj, right_nodes = load_matched("right", weights=False) -assert (left_nodes["pair_id"].values == right_nodes["pair_id"].values).all() - -#%% -max_n_components = 48 - -from graspologic.embed import AdjacencySpectralEmbed - - -ase = AdjacencySpectralEmbed(n_components=max_n_components) -X_left, Y_left = ase.fit_transform(left_adj) -X_right, Y_right = ase.fit_transform(right_adj) - -#%% - -import numpy as np - - -def regularize_P(P): - P -= np.diag(np.diag(P)) - n = P.shape[0] - lower_lim = 1 / n**2 - upper_lim = 1 - lower_lim - P[P >= upper_lim] = upper_lim - P[P <= lower_lim] = lower_lim - return P - - -def compute_P(X, Y): - P = X @ Y.T - P = regularize_P(P) - return P - - -#%% -from scipy.stats import bernoulli - - -#%% -from tqdm.notebook import tqdm - -n_components_range = np.arange(1, max_n_components + 1) -rows = [] -for n_components in tqdm(n_components_range): - P_left = compute_P(X_left[:, :n_components], Y_left[:, :n_components]) - log_lik = bernoulli(P_left).logpmf(right_adj).sum() - norm_log_lik = log_lik / right_adj.sum() - rows.append( - { - "n_components": n_components, - "log_lik": log_lik, - "norm_log_lik": norm_log_lik, - "train_side": "left", - "test_side": "right", - } - ) -for n_components in tqdm(n_components_range): - P_right = compute_P(X_right[:, :n_components], Y_right[:, :n_components]) - log_lik = bernoulli(P_right).logpmf(left_adj).sum() - norm_log_lik = log_lik / left_adj.sum() - rows.append( - { - "n_components": n_components, - "log_lik": log_lik, - "norm_log_lik": norm_log_lik, - "train_side": "right", - "test_side": "left", - } - ) - -# %% -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -import seaborn as sns - -fig, ax = plt.subplots(1, 1, figsize=(10, 10)) - -results = pd.DataFrame(rows) -y = "norm_log_lik" -sns.lineplot(data=results, x="n_components", y=y, hue="train_side", ax=ax) - -for side, side_results in results.groupby("train_side"): - idx = side_results[y].idxmax() - n_components = side_results.loc[idx, "n_components"] - ax.axvline(n_components, color="k", linestyle="--", alpha=0.5) - print(n_components) - -#%% - - -from graspologic.models import DCSBMEstimator, SBMEstimator -from graspologic.utils import binarize, remove_loops -from scipy.stats import poisson - - -def calculate_blockmodel_likelihood(source_adj, target_adj, labels, model="DCSBM"): - if model == "DCSBM": - Model = DCSBMEstimator - elif model == "SBM": - Model = SBMEstimator - - estimator = Model(directed=True, loops=False) - uni_labels, inv = np.unique(labels, return_inverse=True) - estimator.fit(source_adj, inv) - train_P = estimator.p_mat_ - train_P = regularize_P(train_P) - - n_params = estimator._n_parameters() + len(labels) - - score = poisson.logpmf(target_adj, train_P).sum() - out = dict( - score=score, - model=model, - n_params=n_params, - norm_score=score / target_adj.sum(), - n_communities=len(uni_labels), - ) - - return out - - -#%% - -n_components = 24 -method = "average" -metric = "cosine" -from scipy.cluster import hierarchy - -n_clusters_range = np.arange(5, 125, 1) - -rows = [] -for (X_fit, adj_fit), side_fit in zip( - [(X_left, left_adj), (X_right, right_adj)], ["left", "right"] -): - Z = hierarchy.linkage(X_fit[:, :n_components], method=method, metric=metric) - for n_clusters in tqdm(n_clusters_range): - labels = hierarchy.fcluster(Z, n_clusters, criterion="maxclust") - for adj_eval, side_eval in zip([left_adj, right_adj], ["left", "right"]): - row = calculate_blockmodel_likelihood(adj_fit, adj_eval, labels) - row["side_fit"] = side_fit - row["side_eval"] = side_eval - rows.append(row) - -results = pd.DataFrame(rows) -results -# %% -results["fit_eval"] = results["side_fit"] + "-" + results["side_eval"] -results["is_same"] = results["side_fit"] == results["side_eval"] -from pkg.plot import set_theme - -set_theme() - -fig, ax = plt.subplots(1, 1, figsize=(10, 10)) -sns.lineplot( - data=results.query("model == 'DCSBM'"), - x="n_communities", - y="norm_score", - hue="side_eval", - style="is_same", -) - -# %% -mean_results = ( - results.groupby(["n_communities", "is_same"]).mean(numeric_only=True).reset_index() -) - -fig, ax = plt.subplots(1, 1, figsize=(10, 10)) -sns.lineplot( - data=mean_results, - x="n_communities", - y="norm_score", - style="is_same", -) - - -#%% - -import numpy as np -from sklearn.preprocessing import QuantileTransformer -from graspologic.utils import pass_to_ranks, symmetrize -from scipy.stats.mstats import gmean - - -def preprocess_nblast(nblast_scores, symmetrize_mode="geom", transform="ptr"): - if isinstance(nblast_scores, np.ndarray): - distance = nblast_scores # the raw nblast scores are dissimilarities/distances - index = None - else: - distance = nblast_scores.values - index = nblast_scores.index - indices = np.triu_indices_from(distance, k=1) - - if symmetrize_mode == "geom": - fwd_dist = distance[indices] - back_dist = distance[indices[::-1]] - stack_dist = np.concatenate( - (fwd_dist.reshape(-1, 1), back_dist.reshape(-1, 1)), axis=1 - ) - geom_mean = gmean(stack_dist, axis=1) - sym_distance = np.zeros_like(distance) - sym_distance[indices] = geom_mean - sym_distance[indices[::-1]] = geom_mean - else: # simple average - sym_distance = symmetrize(distance) - # make the distances between 0 and 1 - sym_distance /= sym_distance.max() - sym_distance -= sym_distance.min() - # and then convert to similarity - morph_sim = 1 - sym_distance - - if transform == "quantile": - quant = QuantileTransformer(n_quantiles=2000) - transformed_vals = quant.fit_transform(morph_sim[indices].reshape(-1, 1)) - transformed_vals = np.squeeze(transformed_vals) - transformed_morph = np.ones_like(morph_sim) - transformed_morph[indices] = transformed_vals - transformed_morph[indices[::-1]] = transformed_vals - elif transform == "ptr": - transformed_morph = pass_to_ranks(morph_sim) - np.fill_diagonal( - transformed_morph, 1 - ) # should be exactly 1, isnt cause of ties - elif transform == "log": - transformed_morph = np.log(morph_sim) + 1 - else: - transformed_morph = morph_sim - - if index is not None: - transformed_morph = pd.DataFrame(transformed_morph, index=index, columns=index) - - return transformed_morph - - -symmetrize_mode = "geom" -transform = "ptr" -nblast_type = "scores" - -from pathlib import Path - -data_dir = Path( - "/Users/bpedigo/JHU_code/maggot_models/maggot_models/experiments/nblast/outs" -) - - -side = "left" -nblast_sim = pd.read_csv(data_dir / f"{side}-nblast-{nblast_type}.csv", index_col=0) -nblast_sim.columns = nblast_sim.columns.values.astype(int) - -# intersect_index = np.intersect1d(nblast_sim.index, left_nodes.index) -left_sim = preprocess_nblast( - nblast_sim, symmetrize_mode=symmetrize_mode, transform=transform -) - -nblast_sim = left_sim.reindex(index=left_nodes.index, columns=left_nodes.index) -valid = ~nblast_sim.isna().all(axis=1) - -#%% -cluster_win_nblasts = [] -valid_labels = labels[valid] -uni_clusters = np.unique(valid_labels) -for label in uni_clusters: - within_sim = nblast_sim.values[labels == label][:, labels == label] - triu_inds = np.triu_indices_from(within_sim, k=1) - upper = within_sim[triu_inds] - if len(upper) > 0: - mean_within_sim = np.nanmean(upper) - else: - mean_within_sim = np.nan - cluster_win_nblasts.append(mean_within_sim) - -# %% -fig, ax = plt.subplots(1, 1, figsize=(8, 6)) -sns.histplot(x=cluster_win_nblasts, ax=ax) diff --git a/sandbox/rounding.py b/sandbox/rounding.py deleted file mode 100644 index 06e2ec7..0000000 --- a/sandbox/rounding.py +++ /dev/null @@ -1,5 +0,0 @@ -#%% -import numpy as np - -x = 0.00001 -10 ** (np.ceil(np.log10(x))) diff --git a/sandbox/test_fishers_exact.py b/sandbox/test_fishers_exact.py deleted file mode 100644 index ed3e993..0000000 --- a/sandbox/test_fishers_exact.py +++ /dev/null @@ -1,28 +0,0 @@ -#%% -from scipy.stats import binom, fisher_exact -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns -from pkg.plot import set_theme - -set_theme() - -n_trials = 10000 - -num = 3 -denom = 18960 - -pvalues = [] -for i in range(n_trials): - x1 = binom(denom, num / denom).rvs() - x2 = binom(denom, num / denom).rvs() - table = np.array([[x1, denom - x1], [x2, denom - x2]]) - stat, pvalue = fisher_exact(table, alternative="two-sided") - pvalues.append(pvalue) -pvalues = np.array(pvalues) - -fig, ax = plt.subplots(1, 1, figsize=(8, 6)) -sns.histplot(x=pvalues, bins=np.linspace(0, 1, 11), stat="probability") -ax.tick_params(axis="x", length=5) - -# %% diff --git a/sandbox/test_poetry.py b/sandbox/test_poetry.py deleted file mode 100644 index cfdbc6d..0000000 --- a/sandbox/test_poetry.py +++ /dev/null @@ -1,3 +0,0 @@ -import sys - -print(sys.version) diff --git a/sandbox/test_pymaid.py b/sandbox/test_pymaid.py deleted file mode 100644 index c76990c..0000000 --- a/sandbox/test_pymaid.py +++ /dev/null @@ -1,12 +0,0 @@ -#%% -from pkg.data import load_unmatched -from pkg.plot.neuron import simple_plot_neurons -from pkg.pymaid import start_instance -import pymaid - -start_instance() - - -left_adj, left_nodes = load_unmatched("left") - -print(pymaid.get_names(left_nodes.index.values))