Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions src/cooler/create/_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -1239,34 +1239,62 @@ def create_scool(
n_chroms = len(chroms)
n_bins = len(bins)

# Create root group
# Create root group (updated)
bins_identical = False
with h5py.File(file_path, mode) as f:
logger.info(f'Creating cooler at "{file_path}::{group_path}"')
if group_path == "/":
for name in ["chroms", "bins"]:
if name in f:
del f[name]
if group_path in f:
grp = f[group_path]
# Check if a bins group already exists
if "bins" in grp:
# Read the existing bins datasets
existing_chrom = grp["bins/chrom"][()]
existing_start = grp["bins/start"][()]
existing_end = grp["bins/end"][()]
# If stored as bytes, decode to string
if existing_chrom.dtype.kind == "S":
existing_chrom = np.char.decode(existing_chrom, "utf-8")
existing_bins = pd.DataFrame({
"chrom": existing_chrom,
"start": existing_start,
"end": existing_end,
}).reset_index(drop=True)
new_bins = bins[["chrom", "start", "end"]].reset_index(drop=True)
if existing_bins.equals(new_bins):
logger.info(
"Existing bins are identical; skipping bins re-creation."
)
bins_identical = True
else:
logger.info("Bins differ; re-creating bins group.")
del grp["bins"]
# Always re-create chroms for consistency
# (or add a similar check if desired)
if "chroms" in grp:
del grp["chroms"]
else:
try:
f.create_group(group_path)
except ValueError:
del f[group_path]
f.create_group(group_path)

# Write chroms and bins
with h5py.File(file_path, "r+") as f:
h5 = f[group_path]

logger.info("Writing chroms")
grp = h5.create_group("chroms")
write_chroms(grp, chroms, h5opts)
if not bins_identical:
logger.info("Writing bins")
grp = h5.create_group("bins")
write_bins(grp, bins, chroms["name"], h5opts)
else:
logger.info("Skipping writing bins since they are identical.")

logger.info("Writing bins")
grp = h5.create_group("bins")
write_bins(grp, bins, chroms["name"], h5opts)

# Write info
with h5py.File(file_path, "r+") as f:
h5 = f[group_path]

logger.info("Writing info")
info = {}
info["bin-type"] = "fixed" if binsize is not None else "variable"
Expand All @@ -1282,11 +1310,7 @@ def create_scool(

# Append single cells
for key in cell_names:
if "/" in key:
cell_name = key.split("/")[-1]
else:
cell_name = key

cell_name = key.split("/")[-1] if "/" in key else key
create(
cool_uri + "::/cells/" + cell_name,
bins_dict[key],
Expand Down