Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
683 changes: 683 additions & 0 deletions docs/data2_recovery_survey.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dependencies = [
"polars>=1.33.0",
"hydra-submitit-launcher>=1.2.0",
"rdkit>=2024.9.4",
"openbabel-wheel>=3.1.1", # For ligand structure minimization
]

[build-system]
Expand Down
44 changes: 44 additions & 0 deletions slurm/scripts/train_latent_generator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

#SBATCH --partition b200
#SBATCH --nodes 1
#SBATCH --ntasks-per-node 8
#SBATCH --gpus-per-node 8
#SBATCH --cpus-per-task 16
#SBATCH -o /data2/ume/latent_generator_/slurm/logs/train/%J_%x.out
#SBATCH -q preempt
#SBATCH --mem=256G
#SBATCH --job-name=latent_generator
#SBATCH -t 7-00:00:00


nvidia-smi

source .venv/bin/activate
echo "SLURM_JOB_ID = ${SLURM_JOB_ID}"

export LD_LIBRARY_PATH=/opt/amazon/efa/lib64:/opt/amazon/openmpi/lib64:/opt/amazon/ofi-nccl/lib64

export WANDB_INSECURE_DISABLE_SSL=true
export HYDRA_FULL_ERROR=1
export PYTHONUNBUFFERED=1
export NCCL_DEBUG=INFO

export LOBSTER_RUNS_DIR="/data2/ume/latent_generator_/runs/" #"s3://prescient-lobster/ume/runs" # CHANGE TO YOUR S3 BUCKET
export LOBSTER_DATA_DIR="/data2/ume/.cache2/" # CHANGE TO YOUR DATA DIRECTORY
export LOBSTER_USER=$(whoami) # CHANGE TO YOUR WANDB USERNAME IF NOT YOUR UNIXID
export WANDB_BASE_URL=https://genentech.wandb.io

export TOKENIZERS_PARALLELISM=true

# Sets default permissions to allow group write
# access for newly created files. Remove if not needed
umask g+w

srun -u --cpus-per-task $SLURM_CPUS_PER_TASK --cpu-bind=cores,verbose \
lobster_train \
experiment=train_latent_generator \
data.num_workers=8 \
++trainer.num_nodes=$SLURM_JOB_NUM_NODES \
trainer.num_sanity_val_steps=0 \
+trainer.strategy=ddp_find_unused_parameters_true \
59 changes: 59 additions & 0 deletions slurm/scripts/train_latent_generator_ligand.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash

#SBATCH --partition b200
#SBATCH --nodes 1
#SBATCH --ntasks-per-node 7
#SBATCH --gpus-per-node 7
#SBATCH --cpus-per-task 16
#SBATCH -o /data2/ume/latent_generator_/slurm/logs/train/%J_%x.out
#SBATCH -q preempt
#SBATCH --mem=256G
#SBATCH --job-name=latent_generator
#SBATCH -t 7-00:00:00


nvidia-smi

source .venv/bin/activate
echo "SLURM_JOB_ID = ${SLURM_JOB_ID}"

export LD_LIBRARY_PATH=/opt/amazon/efa/lib64:/opt/amazon/openmpi/lib64:/opt/amazon/ofi-nccl/lib64

export WANDB_INSECURE_DISABLE_SSL=true
export HYDRA_FULL_ERROR=1
export PYTHONUNBUFFERED=1
export NCCL_DEBUG=INFO

export LOBSTER_RUNS_DIR="/data2/ume/latent_generator_/runs/" #"s3://prescient-lobster/ume/runs" # CHANGE TO YOUR S3 BUCKET
export LOBSTER_DATA_DIR="/data2/ume/.cache2/" # CHANGE TO YOUR DATA DIRECTORY
export LOBSTER_USER=$(whoami) # CHANGE TO YOUR WANDB USERNAME IF NOT YOUR UNIXID
export WANDB_BASE_URL=https://genentech.wandb.io

export TOKENIZERS_PARALLELISM=true

# Sets default permissions to allow group write
# access for newly created files. Remove if not needed
umask g+w

srun -u --cpus-per-task $SLURM_CPUS_PER_TASK --cpu-bind=cores,verbose \
lobster_train \
experiment=train_latent_generator \
data=structure_ligand \
model=latent_generator_ligand \
model.num_warmup_steps=10000 \
model.num_training_steps=500000 \
model.lr_scheduler.num_warmup_steps=10000 \
model.lr_scheduler.num_training_steps=500000 \
data.num_workers=8 \
++trainer.num_nodes=$SLURM_JOB_NUM_NODES \
trainer.num_sanity_val_steps=0 \
+trainer.strategy=ddp_find_unused_parameters_true \
model.structure_encoder.encode_ligand=true \
model.structure_encoder.embed_dim=256 \
model.quantizer.ligand_n_tokens=512 \
model.decoder_factory.decoder_mapping.vit_decoder.encode_ligand=true \
+model.decoder_factory.decoder_mapping.vit_decoder.ligand_struc_token_codebook_size=512 \
+model.decoder_factory.decoder_mapping.vit_decoder.ligand_struc_token_dim=512 \



14 changes: 14 additions & 0 deletions src/lobster/callbacks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
from ._structure_decode import StructureDecodeCallback
from ._unconditional_generation import UnconditionalGenerationCallback
from ._auxiliary_task_loss_weight_scheduler import AuxiliaryTaskWeightScheduler, MultiTaskWeightScheduler
from ._inverse_folding_callback import InverseFoldingCallback
from ._forward_folding_callback import ForwardFoldingCallback
from ._protein_ligand_decode import ProteinLigandDecodeCallback
from ._protein_ligand_inverse_folding import ProteinLigandInverseFoldingCallback
from ._protein_ligand_forward_folding import ProteinLigandForwardFoldingCallback
from ._s3_checkpoint_callback import S3CheckpointBackupCallback

__all__ = [
"MoleculeACELinearProbeCallback",
Expand All @@ -27,4 +33,12 @@
"UmeGrpoLoggingCallback",
"AuxiliaryTaskWeightScheduler",
"MultiTaskWeightScheduler",
"StructureDecodeCallback",
"UnconditionalGenerationCallback",
"InverseFoldingCallback",
"ForwardFoldingCallback",
"ProteinLigandDecodeCallback",
"ProteinLigandInverseFoldingCallback",
"ProteinLigandForwardFoldingCallback",
"S3CheckpointBackupCallback",
]
Loading