Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5823920
add lambertw gaussian and lambertw weibull
gmgeorg Nov 19, 2023
cf112f8
Add file
gmgeorg Nov 19, 2023
960f356
add weibull file
gmgeorg Nov 19, 2023
9433561
clean up gaussian
gmgeorg Nov 19, 2023
5b315e5
add gamm/exponential/weibull/lognormal distributions
gmgeorg Nov 23, 2023
c431439
update dist utuls
gmgeorg Nov 23, 2023
426a35f
set mean variance = true
gmgeorg Nov 23, 2023
62e03d2
fix gamma distr
gmgeorg Nov 23, 2023
9b73c10
update distribution utils
gmgeorg Nov 26, 2023
e575da3
fix pandas issue
gmgeorg Nov 26, 2023
950d147
updte notebooks
gmgeorg Nov 27, 2023
d11e642
change order of scale/conc
gmgeorg Nov 29, 2023
9ea4f9f
update notebook
gmgeorg Nov 30, 2023
3ceec0d
add lambertw gaussian and lambertw weibull
gmgeorg Nov 19, 2023
86a68ef
Add file
gmgeorg Nov 19, 2023
81b70dd
add weibull file
gmgeorg Nov 19, 2023
fe2cb54
clean up gaussian
gmgeorg Nov 19, 2023
2c481f1
add gamm/exponential/weibull/lognormal distributions
gmgeorg Nov 23, 2023
8610ffa
update dist utuls
gmgeorg Nov 23, 2023
cd8127b
set mean variance = true
gmgeorg Nov 23, 2023
51c359b
fix gamma distr
gmgeorg Nov 23, 2023
334220d
update distribution utils
gmgeorg Nov 26, 2023
a3d1761
fix pandas issue
gmgeorg Nov 26, 2023
ecae1ed
updte notebooks
gmgeorg Nov 27, 2023
803008e
change order of scale/conc
gmgeorg Nov 29, 2023
86ff341
update notebook
gmgeorg Nov 30, 2023
2e11176
update setup.py
gmgeorg Nov 30, 2023
8642160
fix unit tests; update torchlambertw; speedup computations for lamber…
gmgeorg Dec 24, 2023
131cf09
resolve conflict
gmgeorg May 8, 2025
434998a
fix test and update dependnecy
gmgeorg May 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
.idea/
dist/
latex_distributions
venv/
xgboostlss.egg-info/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
806 changes: 507 additions & 299 deletions docs/examples/Gamma_Regression_CaliforniaHousing.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/examples/Gaussian_Regression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -1181,7 +1181,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
"version": "3.8.10"
}
},
"nbformat": 4,
Expand Down
354 changes: 296 additions & 58 deletions docs/examples/How_To_Select_A_Univariate_Distribution.ipynb

Large diffs are not rendered by default.

1,311 changes: 1,311 additions & 0 deletions docs/examples/LambertWGamma_Regression_CaliforniaHousing.ipynb

Large diffs are not rendered by default.

1,756 changes: 1,756 additions & 0 deletions docs/examples/LambertW_Gaussian_Regression.ipynb

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ license = { text = "Apache License 2.0" }
requires-python = ">=3.10"
dependencies = [
"xgboost~=2.0.3",
"torch~=2.1.2",
"torch>=2.1.2",
"pyro-ppl~=1.8.6",
"optuna~=3.5.0",
"properscoring~=0.1",
Expand All @@ -23,7 +23,8 @@ dependencies = [
"seaborn~=0.13.1",
"tqdm~=4.66.1",
"matplotlib~=3.8.2",
"ipython~=8.20.0"
"ipython~=8.20.0",
"torchlambertw @ git+ssh://[email protected]/gmgeorg/torchlambertw.git#egg=torchlambertw-0.0.3"
]

[project.optional-dependencies]
Expand Down Expand Up @@ -53,3 +54,7 @@ xgboostlss = ["datasets/*.csv"]
[build-system]
requires = ["setuptools>=61", "wheel"]
build-backend = "setuptools.build_meta"

dependencies = [
"torchlambertw @ git+ssh://[email protected]/gmgeorg/torchlambertw.git#egg=torchlambertw-0.0.3"
]
143 changes: 97 additions & 46 deletions tests/test_distribution_utils/test_dist_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,25 @@
LogNormal,
Weibull,
Gumbel,
Laplace)
Laplace,
)
from xgboostlss.distributions.Mixture import *
from xgboostlss.distributions.SplineFlow import *
from xgboostlss.distributions.MVN import *
from xgboostlss.distributions.MVT import *
from xgboostlss.distributions.MVN_LoRa import *
from xgboostlss.distributions.distribution_utils import DistributionClass as univariate_dist_class
from xgboostlss.distributions.multivariate_distribution_utils import Multivariate_DistributionClass as multivariate_dist_class
from xgboostlss.distributions.distribution_utils import (
DistributionClass as univariate_dist_class,
)
from xgboostlss.distributions.multivariate_distribution_utils import (
Multivariate_DistributionClass as multivariate_dist_class,
)
from xgboostlss.distributions.flow_utils import NormalizingFlowClass as flow_dist_class
from xgboostlss.distributions.mixture_distribution_utils import MixtureDistributionClass as mixture_dist_class
from xgboostlss.distributions.mixture_distribution_utils import (
MixtureDistributionClass as mixture_dist_class,
)

import xgboostlss.distributions.distribution_utils as du


class TestClass(BaseTestClass):
Expand All @@ -28,38 +37,58 @@ class TestClass(BaseTestClass):
def test_univar_dist_select(self):
# Create data for testing
target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1)
candidate_distributions = [Beta, Gaussian, StudentT, Gamma, Cauchy, LogNormal, Weibull, Gumbel, Laplace]
candidate_distributions = [
Beta.Beta(),
Gaussian.Gaussian(),
StudentT.StudentT(),
Gamma.Gamma(),
Cauchy.Cauchy(),
LogNormal.LogNormal(),
Weibull.Weibull(),
Gumbel.Gumbel(),
Laplace.Laplace(),
]

# Call the function
dist_df = univariate_dist_class().dist_select(
dist_df = du.dist_select(
target, candidate_distributions, plot=False, max_iter=2
).reset_index(drop=True)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
assert not dist_df.isna().any().any()
assert isinstance(dist_df["distribution"].values[0], str)
assert np.issubdtype(dist_df["nll"].dtype, np.float64)
assert not np.isnan(dist_df["nll"].values).any()
assert not np.isinf(dist_df["nll"].values).any()
assert np.issubdtype(dist_df["loss"].dtype, np.float64)
assert not np.isnan(dist_df["loss"].values).any()
assert not np.isinf(dist_df["loss"].values).any()

def test_univar_dist_select_plot(self):
# Create data for testing
target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1)
candidate_distributions = [Beta, Gaussian, StudentT, Gamma, Cauchy, LogNormal, Weibull, Gumbel, Laplace]
candidate_distributions = [
Beta.Beta(),
Gaussian.Gaussian(),
StudentT.StudentT(),
Gamma.Gamma(),
Cauchy.Cauchy(),
LogNormal.LogNormal(),
Weibull.Weibull(),
Gumbel.Gumbel(),
Laplace.Laplace(),
]

# Call the function
dist_df = univariate_dist_class().dist_select(
dist_df = du.dist_select(
target, candidate_distributions, plot=True, max_iter=2
).reset_index(drop=True)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
assert not dist_df.isna().any().any()
assert isinstance(dist_df["distribution"].values[0], str)
assert np.issubdtype(dist_df["nll"].dtype, np.float64)
assert not np.isnan(dist_df["nll"].values).any()
assert not np.isinf(dist_df["nll"].values).any()
assert np.issubdtype(dist_df["loss"].dtype, np.float64)
assert not np.isnan(dist_df["loss"].values).any()
assert not np.isinf(dist_df["loss"].values).any()

####################################################################################################################
# Normalizing Flows
Expand All @@ -71,14 +100,23 @@ def test_flow_select(self):
target_support = "real"

candidate_flows = [
SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="linear"),
SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="quadratic")
SplineFlow(
target_support=target_support, count_bins=2, bound=bound, order="linear"
),
SplineFlow(
target_support=target_support,
count_bins=2,
bound=bound,
order="quadratic",
),
]

# Call the function
dist_df = flow_dist_class().flow_select(
target, candidate_flows, plot=False, max_iter=2
).reset_index(drop=True)
dist_df = (
flow_dist_class()
.flow_select(target, candidate_flows, plot=False, max_iter=2)
.reset_index(drop=True)
)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
Expand All @@ -95,14 +133,23 @@ def test_flow_select_plot(self):
target_support = "real"

candidate_flows = [
SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="linear"),
SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="quadratic")
SplineFlow(
target_support=target_support, count_bins=2, bound=bound, order="linear"
),
SplineFlow(
target_support=target_support,
count_bins=2,
bound=bound,
order="quadratic",
),
]

# Call the function
dist_df = flow_dist_class().flow_select(
target, candidate_flows, plot=True, max_iter=2
).reset_index(drop=True)
dist_df = (
flow_dist_class()
.flow_select(target, candidate_flows, plot=True, max_iter=2)
.reset_index(drop=True)
)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
Expand All @@ -127,13 +174,15 @@ def test_mixture_dist_select(self):
Mixture(LogNormal.LogNormal()),
Mixture(Weibull.Weibull()),
Mixture(Gumbel.Gumbel()),
Mixture(Laplace.Laplace())
Mixture(Laplace.Laplace()),
]

# Call the function
dist_df = mixture_dist_class().dist_select(
target, candidate_distributions, plot=False, max_iter=2
).reset_index(drop=True)
dist_df = (
mixture_dist_class()
.dist_select(target, candidate_distributions, plot=False, max_iter=2)
.reset_index(drop=True)
)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
Expand All @@ -155,13 +204,15 @@ def test_mixture_dist_select_plot(self):
Mixture(LogNormal.LogNormal()),
Mixture(Weibull.Weibull()),
Mixture(Gumbel.Gumbel()),
Mixture(Laplace.Laplace())
Mixture(Laplace.Laplace()),
]

# Call the function
dist_df = mixture_dist_class().dist_select(
target, candidate_distributions, plot=True, max_iter=2
).reset_index(drop=True)
dist_df = (
mixture_dist_class()
.dist_select(target, candidate_distributions, plot=True, max_iter=2)
.reset_index(drop=True)
)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
Expand All @@ -179,16 +230,16 @@ def test_multivar_dist_select(self):
multivar_dist_class = MVN()
target = np.arange(0.1, 0.9, 0.1)
target = multivar_dist_class.target_append(
target,
multivar_dist_class.n_targets,
multivar_dist_class.n_dist_param
)[:, :multivar_dist_class.n_targets]
target, multivar_dist_class.n_targets, multivar_dist_class.n_dist_param
)[:, : multivar_dist_class.n_targets]
candidate_distributions = [MVN(), MVT(), MVN_LoRa()]

# Call the function
dist_df = multivariate_dist_class().dist_select(
target, candidate_distributions, plot=False, max_iter=2
).reset_index(drop=True)
dist_df = (
multivariate_dist_class()
.dist_select(target, candidate_distributions, plot=False, max_iter=2)
.reset_index(drop=True)
)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
Expand All @@ -203,16 +254,16 @@ def test_multivar_dist_select_plot(self):
multivar_dist_class = MVN()
target = np.arange(0.1, 0.9, 0.1)
target = multivar_dist_class.target_append(
target,
multivar_dist_class.n_targets,
multivar_dist_class.n_dist_param
)[:, :multivar_dist_class.n_targets]
target, multivar_dist_class.n_targets, multivar_dist_class.n_dist_param
)[:, : multivar_dist_class.n_targets]
candidate_distributions = [MVN(), MVT(), MVN_LoRa()]

# Call the function
dist_df = multivariate_dist_class().dist_select(
target, candidate_distributions, plot=True, ncol=1, max_iter=2
).reset_index(drop=True)
dist_df = (
multivariate_dist_class()
.dist_select(target, candidate_distributions, plot=True, ncol=1, max_iter=2)
.reset_index(drop=True)
)

# Assertions
assert isinstance(dist_df, pd.DataFrame)
Expand Down
38 changes: 25 additions & 13 deletions tests/test_distribution_utils/test_draw_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ class TestClass(BaseTestClass):
def test_draw_samples(self, dist_class):
if dist_class.dist.univariate:
# Create data for testing
predt_params = pd.DataFrame(np.array([0.5 for _ in range(dist_class.dist.n_dist_param)], dtype="float32")).T

predt_params_dict = {
k: [float(v(torch.tensor(0.0)).numpy())]
for k, v in dist_class.dist.param_dict.items()
}
predt_params = pd.DataFrame.from_dict(predt_params_dict).astype("float32")
# Call the function
dist_samples = dist_class.dist.draw_samples(predt_params)

# Assertions
if str(dist_class.dist).split(".")[2] != "Expectile":
assert isinstance(dist_samples, (pd.DataFrame, type(None)))
Expand All @@ -25,18 +27,26 @@ def test_draw_samples(self, dist_class):
predt = np.array([0.5 for _ in range(dist_class.dist.n_dist_param)])
predt = predt.reshape(-1, dist_class.dist.n_dist_param)
predt = [
torch.tensor(predt[:, i].reshape(-1, 1), requires_grad=False) for i in
range(dist_class.dist.n_dist_param)
torch.tensor(predt[:, i].reshape(-1, 1), requires_grad=False)
for i in range(dist_class.dist.n_dist_param)
]
predt_transformed = dist_class.dist.param_transform(predt, dist_class.dist.param_dict,
dist_class.dist.n_targets, rank=dist_class.dist.rank,
n_obs=n_obs)
predt_transformed = dist_class.dist.param_transform(
predt,
dist_class.dist.param_dict,
dist_class.dist.n_targets,
rank=dist_class.dist.rank,
n_obs=n_obs,
)

# Call the function
if dist_class.dist.distribution.__name__ == "Dirichlet":
dist_kwargs = dict(zip(dist_class.dist.distribution_arg_names, [predt_transformed]))
dist_kwargs = dict(
zip(dist_class.dist.distribution_arg_names, [predt_transformed])
)
else:
dist_kwargs = dict(zip(dist_class.dist.distribution_arg_names, predt_transformed))
dist_kwargs = dict(
zip(dist_class.dist.distribution_arg_names, predt_transformed)
)
dist_pred = dist_class.dist.distribution(**dist_kwargs)
dist_samples = dist_class.dist.draw_samples(dist_pred)

Expand All @@ -47,7 +57,11 @@ def test_draw_samples(self, dist_class):

def test_draw_samples_mixture(self, mixture_class):
# Create data for testing
predt_params = pd.DataFrame(np.array([0.5 for _ in range(mixture_class.dist.n_dist_param)], dtype="float32")).T
predt_params = pd.DataFrame(
np.array(
[0.5 for _ in range(mixture_class.dist.n_dist_param)], dtype="float32"
)
).T

# Call the function
dist_samples = mixture_class.dist.draw_samples(predt_params)
Expand All @@ -56,5 +70,3 @@ def test_draw_samples_mixture(self, mixture_class):
assert isinstance(dist_samples, (pd.DataFrame, type(None)))
assert not dist_samples.isna().any().any()
assert not np.isinf(dist_samples).any().any()


Loading