From 5f0664e8e93eb2e1ba47b0f3758b32adb0351d38 Mon Sep 17 00:00:00 2001
From: Benjamin Pedigo <benjamindpedigo@gmail.com>
Date: Tue, 14 Mar 2023 09:27:56 -0400
Subject: [PATCH] Delete sandbox directory

---
 sandbox/align.py              |  35 -----
 sandbox/binom_sims.py         | 114 --------------
 sandbox/gm_ranking.py         |  53 -------
 sandbox/rdpg_lr_comparison.py | 278 ----------------------------------
 sandbox/rounding.py           |   5 -
 sandbox/test_fishers_exact.py |  28 ----
 sandbox/test_poetry.py        |   3 -
 sandbox/test_pymaid.py        |  12 --
 8 files changed, 528 deletions(-)
 delete mode 100644 sandbox/align.py
 delete mode 100644 sandbox/binom_sims.py
 delete mode 100644 sandbox/gm_ranking.py
 delete mode 100644 sandbox/rdpg_lr_comparison.py
 delete mode 100644 sandbox/rounding.py
 delete mode 100644 sandbox/test_fishers_exact.py
 delete mode 100644 sandbox/test_poetry.py
 delete mode 100644 sandbox/test_pymaid.py

diff --git a/sandbox/align.py b/sandbox/align.py
deleted file mode 100644
index becfeb5..0000000
--- a/sandbox/align.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#%%
-import numpy as np
-from scipy.stats import special_ortho_group
-import matplotlib.pyplot as plt
-
-np.random.seed(88887)
-Q = special_ortho_group.rvs(2)
-X = np.random.uniform(0, 1, (20, 2))
-Y = X @ Q
-fig, ax = plt.subplots(1, 1, figsize=(6, 6))
-ax.axvline(0, color="k")
-ax.axhline(0, color="k")
-ax.scatter(X[:, 0], X[:, 1], color="lightblue")
-ax.scatter(Y[:, 0], Y[:, 1], color="red")
-
-rads_start = 0
-rads_end = np.arctan2(Q[1, 0], Q[0, 0])
-
-Zs = []
-for rads in np.linspace(rads_start, rads_end, 100):
-    semi_Q = np.array(
-        [
-            [np.cos(rads), -np.sin(rads)],
-            [np.sin(rads), np.cos(rads)],
-        ]
-    )
-    Z = X @ semi_Q
-    Zs.append(Z)
-Zs = np.stack(Zs)
-
-ax.plot(Zs[:, :, 0], Zs[:, :, 1], color="lightgrey", zorder=-1)
-
-ax.axis("off")
-
-#%%
diff --git a/sandbox/binom_sims.py b/sandbox/binom_sims.py
deleted file mode 100644
index 50c14b1..0000000
--- a/sandbox/binom_sims.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#%%
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-from pkg.plot import set_theme
-from scipy.stats import binom, chi2_contingency, ks_1samp, uniform
-
-c = 1.5
-ns = np.array([1000, 1500, 750, 1250])
-ps = np.array([0.2, 0.02, 0.05, 0.1])
-
-rng = np.random.default_rng(8888)
-
-
-# for i in range(len(ns)):
-
-K = len(ns)
-
-n_sims = 1000
-rows = []
-for sim in range(n_sims):
-    for i in range(K):
-        x = rng.binomial(ns, ps)
-        y = rng.binomial(ns, c * ps)
-
-        cont_table = np.array([[x[i], ns[i] - x[i]], [y[i], ns[i] - y[i]]])
-        stat, pvalue, _, _ = chi2_contingency(cont_table)
-        rows.append(
-            {
-                "dim": i,
-                "stat": stat,
-                "pvalue": pvalue,
-                "sim": sim,
-                "c": c,
-                "correction": False,
-            }
-        )
-
-        corrected_cont_table = cont_table.copy()
-        # corrected_cont_table[1, 0] = cont_table[1, 0] +
-        n = corrected_cont_table[1].sum()
-        phat = corrected_cont_table[1, 0] / n
-        phat = 1 / c * phat
-        corrected_cont_table[1, 0] = phat * n
-        corrected_cont_table[1, 1] = (1 - phat) * n
-        stat, pvalue, _, _ = chi2_contingency(corrected_cont_table)
-        rows.append(
-            {
-                "dim": i,
-                "stat": stat,
-                "pvalue": pvalue,
-                "sim": sim,
-                "c": c,
-                "correction": True,
-            }
-        )
-
-results = pd.DataFrame(rows)
-
-#%%
-
-set_theme()
-for i in range(K):
-    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
-    sns.histplot(data=results[results["dim"] == i], x="pvalue", hue="correction", ax=ax)
-
-
-def subuniformity_plot(x, ax=None, **kwargs):
-    if ax is None:
-        _, ax = plt.subplots(1, 1, figsize=(6, 6))
-    sns.histplot(x, ax=ax, stat="density", cumulative=True, **kwargs)
-    stat, pvalue = ks_1samp(x, uniform(0, 1).cdf, alternative="greater")
-    ax.plot([0, 1], [0, 1], linewidth=3, linestyle=":", color="black")
-    ax.text(0, 1, f"p-value: {pvalue:.3f}")
-    ax.set_ylabel("Cumulative density")
-    return ax, stat, pvalue
-
-
-for i in range(K):
-    data = results[(results["dim"] == i) & (results["correction"])]["pvalue"]
-    subuniformity_plot(data)
-
-#%%
-
-
-ps = np.linspace(0.1, 0.3, 1000)
-
-x = 30  # p ~= 0.2
-n = 100
-
-y = 40  # p ~= 0.3 so c ~= 1.5
-m = 200
-
-c = 1.5
-
-
-def func(p):
-    return (
-        x * np.log(c * p)
-        + (n - x) * np.log(1 - c * p)
-        + y * np.log(p)
-        + (m - y) * np.log(1 - p)
-    )
-
-
-vals = func(ps)
-
-fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-ax.plot(ps, vals)
-
-est = 1 / c * x / (n + m) + y / (n + m)
-
-ax.axvline(est, color="red")
diff --git a/sandbox/gm_ranking.py b/sandbox/gm_ranking.py
deleted file mode 100644
index 0814cd8..0000000
--- a/sandbox/gm_ranking.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from graspologic.utils import remove_loops
-import numpy as np
-from graspologic.match import graph_match
-
-
-def signal_flow(A):
-    """Implementation of the signal flow metric from Varshney et al 2011
-
-    Parameters
-    ----------
-    A : [type]
-        [description]
-
-    Returns
-    -------
-    [type]
-        [description]
-    """
-    A = A.copy()
-    A = remove_loops(A)
-    W = (A + A.T) / 2
-
-    D = np.diag(np.sum(W, axis=1))
-
-    L = D - W
-
-    b = np.sum(W * np.sign(A - A.T), axis=1)
-    L_pinv = np.linalg.pinv(L)
-    z = L_pinv @ b
-
-    return z
-
-
-def rank_signal_flow(A):
-    sf = signal_flow(A)
-    perm_inds = np.argsort(-sf)
-    return perm_inds
-
-
-def rank_graph_match_flow(A, n_init=10, max_iter=30, eps=1e-4, **kwargs):
-    n = len(A)
-    try:
-        initial_perm = rank_signal_flow(A)
-        init = np.eye(n)[initial_perm]
-    except np.linalg.LinAlgError:
-        print("SVD did not converge in signal flow")
-        init = np.full((n, n), 1 / n)
-    match_mat = np.zeros((n, n))
-    triu_inds = np.triu_indices(n, k=1)
-    match_mat[triu_inds] = 1
-    gm = GraphMatch(n_init=n_init, max_iter=max_iter, init=init, eps=eps, **kwargs)
-    perm_inds = gm.fit_predict(match_mat, A)
-    return perm_inds
diff --git a/sandbox/rdpg_lr_comparison.py b/sandbox/rdpg_lr_comparison.py
deleted file mode 100644
index 8bb518c..0000000
--- a/sandbox/rdpg_lr_comparison.py
+++ /dev/null
@@ -1,278 +0,0 @@
-#%%
-from pkg.data import load_matched
-
-left_adj, left_nodes = load_matched("left", weights=False)
-right_adj, right_nodes = load_matched("right", weights=False)
-assert (left_nodes["pair_id"].values == right_nodes["pair_id"].values).all()
-
-#%%
-max_n_components = 48
-
-from graspologic.embed import AdjacencySpectralEmbed
-
-
-ase = AdjacencySpectralEmbed(n_components=max_n_components)
-X_left, Y_left = ase.fit_transform(left_adj)
-X_right, Y_right = ase.fit_transform(right_adj)
-
-#%%
-
-import numpy as np
-
-
-def regularize_P(P):
-    P -= np.diag(np.diag(P))
-    n = P.shape[0]
-    lower_lim = 1 / n**2
-    upper_lim = 1 - lower_lim
-    P[P >= upper_lim] = upper_lim
-    P[P <= lower_lim] = lower_lim
-    return P
-
-
-def compute_P(X, Y):
-    P = X @ Y.T
-    P = regularize_P(P)
-    return P
-
-
-#%%
-from scipy.stats import bernoulli
-
-
-#%%
-from tqdm.notebook import tqdm
-
-n_components_range = np.arange(1, max_n_components + 1)
-rows = []
-for n_components in tqdm(n_components_range):
-    P_left = compute_P(X_left[:, :n_components], Y_left[:, :n_components])
-    log_lik = bernoulli(P_left).logpmf(right_adj).sum()
-    norm_log_lik = log_lik / right_adj.sum()
-    rows.append(
-        {
-            "n_components": n_components,
-            "log_lik": log_lik,
-            "norm_log_lik": norm_log_lik,
-            "train_side": "left",
-            "test_side": "right",
-        }
-    )
-for n_components in tqdm(n_components_range):
-    P_right = compute_P(X_right[:, :n_components], Y_right[:, :n_components])
-    log_lik = bernoulli(P_right).logpmf(left_adj).sum()
-    norm_log_lik = log_lik / left_adj.sum()
-    rows.append(
-        {
-            "n_components": n_components,
-            "log_lik": log_lik,
-            "norm_log_lik": norm_log_lik,
-            "train_side": "right",
-            "test_side": "left",
-        }
-    )
-
-# %%
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-
-results = pd.DataFrame(rows)
-y = "norm_log_lik"
-sns.lineplot(data=results, x="n_components", y=y, hue="train_side", ax=ax)
-
-for side, side_results in results.groupby("train_side"):
-    idx = side_results[y].idxmax()
-    n_components = side_results.loc[idx, "n_components"]
-    ax.axvline(n_components, color="k", linestyle="--", alpha=0.5)
-    print(n_components)
-
-#%%
-
-
-from graspologic.models import DCSBMEstimator, SBMEstimator
-from graspologic.utils import binarize, remove_loops
-from scipy.stats import poisson
-
-
-def calculate_blockmodel_likelihood(source_adj, target_adj, labels, model="DCSBM"):
-    if model == "DCSBM":
-        Model = DCSBMEstimator
-    elif model == "SBM":
-        Model = SBMEstimator
-
-    estimator = Model(directed=True, loops=False)
-    uni_labels, inv = np.unique(labels, return_inverse=True)
-    estimator.fit(source_adj, inv)
-    train_P = estimator.p_mat_
-    train_P = regularize_P(train_P)
-
-    n_params = estimator._n_parameters() + len(labels)
-
-    score = poisson.logpmf(target_adj, train_P).sum()
-    out = dict(
-        score=score,
-        model=model,
-        n_params=n_params,
-        norm_score=score / target_adj.sum(),
-        n_communities=len(uni_labels),
-    )
-
-    return out
-
-
-#%%
-
-n_components = 24
-method = "average"
-metric = "cosine"
-from scipy.cluster import hierarchy
-
-n_clusters_range = np.arange(5, 125, 1)
-
-rows = []
-for (X_fit, adj_fit), side_fit in zip(
-    [(X_left, left_adj), (X_right, right_adj)], ["left", "right"]
-):
-    Z = hierarchy.linkage(X_fit[:, :n_components], method=method, metric=metric)
-    for n_clusters in tqdm(n_clusters_range):
-        labels = hierarchy.fcluster(Z, n_clusters, criterion="maxclust")
-        for adj_eval, side_eval in zip([left_adj, right_adj], ["left", "right"]):
-            row = calculate_blockmodel_likelihood(adj_fit, adj_eval, labels)
-            row["side_fit"] = side_fit
-            row["side_eval"] = side_eval
-            rows.append(row)
-
-results = pd.DataFrame(rows)
-results
-# %%
-results["fit_eval"] = results["side_fit"] + "-" + results["side_eval"]
-results["is_same"] = results["side_fit"] == results["side_eval"]
-from pkg.plot import set_theme
-
-set_theme()
-
-fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-sns.lineplot(
-    data=results.query("model == 'DCSBM'"),
-    x="n_communities",
-    y="norm_score",
-    hue="side_eval",
-    style="is_same",
-)
-
-# %%
-mean_results = (
-    results.groupby(["n_communities", "is_same"]).mean(numeric_only=True).reset_index()
-)
-
-fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-sns.lineplot(
-    data=mean_results,
-    x="n_communities",
-    y="norm_score",
-    style="is_same",
-)
-
-
-#%%
-
-import numpy as np
-from sklearn.preprocessing import QuantileTransformer
-from graspologic.utils import pass_to_ranks, symmetrize
-from scipy.stats.mstats import gmean
-
-
-def preprocess_nblast(nblast_scores, symmetrize_mode="geom", transform="ptr"):
-    if isinstance(nblast_scores, np.ndarray):
-        distance = nblast_scores  # the raw nblast scores are dissimilarities/distances
-        index = None
-    else:
-        distance = nblast_scores.values
-        index = nblast_scores.index
-    indices = np.triu_indices_from(distance, k=1)
-
-    if symmetrize_mode == "geom":
-        fwd_dist = distance[indices]
-        back_dist = distance[indices[::-1]]
-        stack_dist = np.concatenate(
-            (fwd_dist.reshape(-1, 1), back_dist.reshape(-1, 1)), axis=1
-        )
-        geom_mean = gmean(stack_dist, axis=1)
-        sym_distance = np.zeros_like(distance)
-        sym_distance[indices] = geom_mean
-        sym_distance[indices[::-1]] = geom_mean
-    else:  # simple average
-        sym_distance = symmetrize(distance)
-    # make the distances between 0 and 1
-    sym_distance /= sym_distance.max()
-    sym_distance -= sym_distance.min()
-    # and then convert to similarity
-    morph_sim = 1 - sym_distance
-
-    if transform == "quantile":
-        quant = QuantileTransformer(n_quantiles=2000)
-        transformed_vals = quant.fit_transform(morph_sim[indices].reshape(-1, 1))
-        transformed_vals = np.squeeze(transformed_vals)
-        transformed_morph = np.ones_like(morph_sim)
-        transformed_morph[indices] = transformed_vals
-        transformed_morph[indices[::-1]] = transformed_vals
-    elif transform == "ptr":
-        transformed_morph = pass_to_ranks(morph_sim)
-        np.fill_diagonal(
-            transformed_morph, 1
-        )  # should be exactly 1, isnt cause of ties
-    elif transform == "log":
-        transformed_morph = np.log(morph_sim) + 1
-    else:
-        transformed_morph = morph_sim
-
-    if index is not None:
-        transformed_morph = pd.DataFrame(transformed_morph, index=index, columns=index)
-
-    return transformed_morph
-
-
-symmetrize_mode = "geom"
-transform = "ptr"
-nblast_type = "scores"
-
-from pathlib import Path
-
-data_dir = Path(
-    "/Users/bpedigo/JHU_code/maggot_models/maggot_models/experiments/nblast/outs"
-)
-
-
-side = "left"
-nblast_sim = pd.read_csv(data_dir / f"{side}-nblast-{nblast_type}.csv", index_col=0)
-nblast_sim.columns = nblast_sim.columns.values.astype(int)
-
-# intersect_index = np.intersect1d(nblast_sim.index, left_nodes.index)
-left_sim = preprocess_nblast(
-    nblast_sim, symmetrize_mode=symmetrize_mode, transform=transform
-)
-
-nblast_sim = left_sim.reindex(index=left_nodes.index, columns=left_nodes.index)
-valid = ~nblast_sim.isna().all(axis=1)
-
-#%%
-cluster_win_nblasts = []
-valid_labels = labels[valid]
-uni_clusters = np.unique(valid_labels)
-for label in uni_clusters:
-    within_sim = nblast_sim.values[labels == label][:, labels == label]
-    triu_inds = np.triu_indices_from(within_sim, k=1)
-    upper = within_sim[triu_inds]
-    if len(upper) > 0:
-        mean_within_sim = np.nanmean(upper)
-    else:
-        mean_within_sim = np.nan
-    cluster_win_nblasts.append(mean_within_sim)
-
-# %%
-fig, ax = plt.subplots(1, 1, figsize=(8, 6))
-sns.histplot(x=cluster_win_nblasts, ax=ax)
diff --git a/sandbox/rounding.py b/sandbox/rounding.py
deleted file mode 100644
index 06e2ec7..0000000
--- a/sandbox/rounding.py
+++ /dev/null
@@ -1,5 +0,0 @@
-#%%
-import numpy as np
-
-x = 0.00001
-10 ** (np.ceil(np.log10(x)))
diff --git a/sandbox/test_fishers_exact.py b/sandbox/test_fishers_exact.py
deleted file mode 100644
index ed3e993..0000000
--- a/sandbox/test_fishers_exact.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#%%
-from scipy.stats import binom, fisher_exact
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-from pkg.plot import set_theme
-
-set_theme()
-
-n_trials = 10000
-
-num = 3
-denom = 18960
-
-pvalues = []
-for i in range(n_trials):
-    x1 = binom(denom, num / denom).rvs()
-    x2 = binom(denom, num / denom).rvs()
-    table = np.array([[x1, denom - x1], [x2, denom - x2]])
-    stat, pvalue = fisher_exact(table, alternative="two-sided")
-    pvalues.append(pvalue)
-pvalues = np.array(pvalues)
-
-fig, ax = plt.subplots(1, 1, figsize=(8, 6))
-sns.histplot(x=pvalues, bins=np.linspace(0, 1, 11), stat="probability")
-ax.tick_params(axis="x", length=5)
-
-# %%
diff --git a/sandbox/test_poetry.py b/sandbox/test_poetry.py
deleted file mode 100644
index cfdbc6d..0000000
--- a/sandbox/test_poetry.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import sys
-
-print(sys.version)
diff --git a/sandbox/test_pymaid.py b/sandbox/test_pymaid.py
deleted file mode 100644
index c76990c..0000000
--- a/sandbox/test_pymaid.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#%%
-from pkg.data import load_unmatched
-from pkg.plot.neuron import simple_plot_neurons
-from pkg.pymaid import start_instance
-import pymaid
-
-start_instance()
-
-
-left_adj, left_nodes = load_unmatched("left")
-
-print(pymaid.get_names(left_nodes.index.values))