diff --git a/.gitignore b/.gitignore index bf0e1ac..c7091ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,12 @@ *.pyc .molns/ molns_install.log +.ec2_creds_molns +.idea/ +*.tar.gz +*.tar +notes +qsubscript +/dockerfile_* +\#qsubscript\# +docker_test.py diff --git a/MolnsLib/DockerProvider.py b/MolnsLib/DockerProvider.py new file mode 100644 index 0000000..b2f7ccd --- /dev/null +++ b/MolnsLib/DockerProvider.py @@ -0,0 +1,245 @@ +import logging +import os +import tempfile +import time +import DockerProxy +import constants +import installSoftware +from collections import OrderedDict +from DockerSSH import DockerSSH +from constants import Constants +from molns_provider import ProviderBase, ProviderException + + +def docker_provider_default_key_name(): + user = os.environ.get('USER') or 'USER' + return "{0}_molns_docker_sshkey_{1}".format(user, hex(int(time.time())).replace('0x', '')) + + +class DockerBase(ProviderBase): + """ Base class for Docker. """ + + SSH_KEY_EXTENSION = ".pem" + PROVIDER_TYPE = 'Docker' + + def __init__(self, name, config=None, config_dir=None, **kwargs): + ProviderBase.__init__(self, name, config, config_dir, **kwargs) + self.docker = DockerProxy.DockerProxy() + self.ssh = DockerSSH(self.docker) + + def _get_container_status(self, container_id): + self.docker.container_status(container_id) + + def start_instance(self, num=1): + """ Start given number of (or 1) containers. """ + started_containers = [] + for i in range(num): + container_id = self.docker.create_container(self.provider.config["molns_image_name"], name=self.name, + port_bindings={ + Constants.DEFAULT_PUBLIC_WEBSERVER_PORT: + ('127.0.0.1', self.config['web_server_port']), + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT: + ('127.0.0.1', self.config['notebook_port'])}, + working_directory=self.config["working_directory"]) + stored_container = self.datastore.get_instance(provider_instance_identifier=container_id, + ip_address=self.docker.get_container_ip_address(container_id) + , provider_id=self.provider.id, controller_id=self.id, + provider_type=constants.Constants.DockerProvider) + started_containers.append(stored_container) + if num == 1: + return started_containers[0] + return started_containers + + def resume_instance(self, instances): + instance_ids = [] + if isinstance(instances, list): + for instance in instances: + instance_ids.append(instance.provider_instance_identifier) + else: + instance_ids.append(instances.provider_instance_identifier) + self.docker.start_containers(instance_ids) + + def stop_instance(self, instances): + instance_ids = [] + if isinstance(instances, list): + for instance in instances: + instance_ids.append(instance.provider_instance_identifier) + else: + instance_ids.append(instances.provider_instance_identifier) + self.docker.stop_containers(instance_ids) + + def terminate_instance(self, instances): + instance_ids = [] + if isinstance(instances, list): + for instance in instances: + instance_ids.append(instance.provider_instance_identifier) + self.datastore.delete_instance(instance) + else: + instance_ids.append(instances.provider_instance_identifier) + self.datastore.delete_instance(instances) + self.docker.terminate_containers(instance_ids) + + def exec_command(self, container_id, command): + self.docker.execute_command(container_id, command) + + +class DockerProvider(DockerBase): + """ Provider handle for local Docker based service. """ + + OBJ_NAME = 'DockerProvider' + + CONFIG_VARS = OrderedDict([ + ('ubuntu_image_name', + {'q': 'Base Ubuntu image to use', 'default': constants.Constants.DOCKER_DEFAULT_IMAGE, + 'ask': True}), + ('molns_image_name', + {'q': 'Local MOLNs image (Docker image ID or image tag) to use ', 'default': 'briandrawert/molns-default-image:latest', 'ask': True}), + ('key_name', + {'q': 'Docker Key Pair name', 'default': "docker-default", 'ask': False}), # Unused. + ('group_name', + {'q': 'Docker Security Group name', 'default': 'molns', 'ask': False}), # Unused. + ('login_username', + {'default': 'ubuntu', 'ask': False}), # Unused. + ('provider_type', + {'default': constants.Constants.DockerProvider, 'ask': False}) + ]) + + def get_config_credentials(self): + return None + + @staticmethod + def __get_new_dockerfile_name(): + import uuid + filename = constants.Constants.DOCKERFILE_NAME + str(uuid.uuid4()) + return filename + + def check_ssh_key(self): + """ Returns true. (Implementation does not use SSH.) """ + return True + + def create_ssh_key(self): + """ Returns true. """ + ssh_key_dir = os.path.join(self.config_dir, self.name) + with open(ssh_key_dir, 'w') as fp: + fp.write("This is a dummy key.") + os.chmod(ssh_key_dir, 0o600) + + def check_security_group(self): + """ Returns true. (Implementation does not use SSH.) """ + return True + + def create_seurity_group(self): + """ Returns true. (Implementation does not use SSH.) """ + return True + + def create_molns_image(self): + """ Create a molns image, save it on localhost and return DockerImage ID of created image. """ + file_to_remove = None + try: + dockerfile, file_to_remove = self._create_dockerfile(installSoftware.InstallSW.get_command_list()) + image_id = self.docker.build_image(dockerfile) + return image_id + except Exception as e: + logging.exception(e) + raise ProviderException("Failed to create molns image: {0}".format(e)) + finally: + if file_to_remove is not None: + os.remove(file_to_remove) + + def check_molns_image(self): + """ Check if the molns image exists. """ + if 'molns_image_name' in self.config and self.config['molns_image_name'] is not None \ + and self.config['molns_image_name'] != '': + return self.docker.image_exists(self.config['molns_image_name']) + return False + + def _create_dockerfile(self, commands): + """ Create Dockerfile from given commands. """ + import Utils + + user_id = Utils.get_sudo_user_id() + dockerfile = '''FROM ubuntu:14.04\nRUN apt-get update\n\n# Add user ubuntu.\nRUN useradd -u {0} -ms /bin/bash ubuntu\n + # Set up base environment.\nRUN apt-get install -yy \ \n software-properties-common \ \n + python-software-properties \ \n wget \ \n curl \ \n git \ \n ipython \ \n sudo \ \n + screen \ \n iptables \nRUN echo "ubuntu ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + \nWORKDIR /home/ubuntu\n\nUSER ubuntu\nENV HOME /home/ubuntu'''.format(user_id) + + flag = False + + for entry in commands: + if isinstance(entry, list): + dockerfile += '''\n\nRUN ''' + first = True + flag = False + for sub_entry in entry: + if first is True: + dockerfile += self._preprocess(sub_entry) + first = False + else: + dockerfile += ''' && \ \n ''' + self._preprocess(sub_entry) + else: + if flag is False: + dockerfile += '''\n\nRUN ''' + flag = True + dockerfile += self._preprocess(entry) + else: + dockerfile += ''' && \ \n ''' + self._preprocess(entry) + + dockerfile += '''\n\n\n''' + + dockerfile_file = DockerProvider.__get_new_dockerfile_name() + with open(dockerfile_file, 'w') as Dockerfile: + Dockerfile.write(dockerfile) + named_dockerfile = tempfile.NamedTemporaryFile() + named_dockerfile.write(dockerfile) + named_dockerfile.seek(0) + + return named_dockerfile, dockerfile_file + + @staticmethod + def _preprocess(command): + """ Prepends "shell only" commands with '/bin/bash -c'. """ + for shell_command in DockerProxy.DockerProxy.shell_commands: + if shell_command in command: + replace_string = "/bin/bash -c \"" + shell_command + command = command.replace(shell_command, replace_string) + command += "\"" + return command + + +def get_default_working_directory(config=None): + if config is None: + raise Exception("Config should not be None.") + return os.path.realpath(os.path.join(config.config_dir, "docker_controller_working_dirs", config.name)) + + +class DockerController(DockerBase): + """ Provider handle for a Docker controller. """ + + OBJ_NAME = 'DockerController' + CONFIG_VARS = OrderedDict([ + ('web_server_port', + {'q': 'Port to use for web server', 'default': "8080", + 'ask': True}), + ('notebook_port', + {'q': 'Port to use for jupyter notebook', 'default': "8081", + 'ask': True}), + ('working_directory', + {'q': 'Working directory for this controller', 'default': get_default_working_directory, 'ask': True}), + ('ssh_key_file', + {'q': 'SSH key to a qsub and docker enabled cluster', 'default': "None", 'ask': True}) + ]) + + def get_instance_status(self, instance): + return self.docker.container_status(instance.provider_instance_identifier) + + +class DockerWorkerGroup(DockerController): + """ Provider handle for Docker worker group. """ + + OBJ_NAME = 'DockerWorkerGroup' + + CONFIG_VARS = OrderedDict([ + ('num_vms', + {'q': 'Number of containers in group', 'default': '1', 'ask': True}), + ]) diff --git a/MolnsLib/DockerProxy.py b/MolnsLib/DockerProxy.py new file mode 100644 index 0000000..d8a7fdc --- /dev/null +++ b/MolnsLib/DockerProxy.py @@ -0,0 +1,335 @@ +import logging +import os +import re +import time +import constants +from molns_provider import ProviderBase +from constants import Constants +from docker import APIClient as Client +from docker.errors import NotFound, NullResource, APIError + + +class InvalidVolumeName(Exception): + pass + +class ExecuteCommandException(Exception): + pass + + +class DockerProxy: + + """ A wrapper over docker-py and some utility methods and classes. """ + + LOG_TAG = "Docker " + + shell_commands = ["source"] + + class ImageBuildException(Exception): + def __init__(self, message=None): + super("Something went wrong while building docker container image.\n{0}".format(message)) + + def __init__(self): + if os.environ.get('DOCKER_HOST') is not None: + self.client = Client(base_url=os.environ.get('DOCKER_HOST')) + else: + self.client = Client(base_url=Constants.DOCKER_BASE_URL) + self.build_count = 0 + logging.basicConfig(level=logging.DEBUG) + + @staticmethod + def get_container_volume_from_working_dir(working_directory): + import os + return os.path.join("/home/ubuntu/", os.path.basename(working_directory)) + + def create_container(self, image_str, working_directory=None, name=None, + port_bindings={Constants.DEFAULT_PUBLIC_WEBSERVER_PORT: ('127.0.0.1', 8080), + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT: ('127.0.0.1', 8081)}): + """Creates a new container with elevated privileges. Returns the container ID. Maps port 80 of container + to 8080 of locahost by default""" + + docker_image = DockerImage.from_string(image_str) + volume_dir = DockerProxy.get_container_volume_from_working_dir(working_directory) + + if name is None: + import uuid + random_str = str(uuid.uuid4()) + name = constants.Constants.MolnsDockerContainerNamePrefix + random_str[:8] + image = docker_image.image_id if docker_image.image_id is not Constants.DockerNonExistentTag \ + else docker_image.image_tag + + logging.info("Using image {0}".format(image)) + import os + if DockerProxy._verify_directory(working_directory) is False: + if working_directory is not None: + raise InvalidVolumeName("\n\nMOLNs uses certain reserved names for its configuration files in the " + "controller environment, and unfortunately the provided name for working " + "directory of the controller cannot be one of these. Please configure this " + "controller again with a different volume name and retry. " + "Here is the list of forbidden names: \n{0}" + .format(Constants.ForbiddenVolumeNames)) + + logging.warning(DockerProxy.LOG_TAG + "Unable to verify provided directory to use to as volume. Volume will NOT " + "be created.") + hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings) + container = self.client.create_container(image=image, name=name, command="/bin/bash", tty=True, detach=True, + ports=[Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT], + host_config=hc, + environment={"PYTHONPATH": "/usr/local/"}) + + else: + container_mount_point = '/home/ubuntu/{0}'.format(os.path.basename(working_directory)) + hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings, + binds={working_directory: {'bind': container_mount_point, + 'mode': 'rw'}}) + + container = self.client.create_container(image=image, name=name, command="/bin/bash", tty=True, detach=True, + ports=[Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT], + volumes=container_mount_point, host_config=hc, + working_dir=volume_dir, + environment={"PYTHONPATH": "/usr/local/"}) + + container_id = container.get("Id") + + return container_id + + # noinspection PyBroadException + @staticmethod + def _verify_directory(working_directory): + import os + if working_directory is None or os.path.basename(working_directory) in Constants.ForbiddenVolumeNames: + return False + try: + if not os.path.exists(working_directory): + os.makedirs(working_directory) + return True + except: + return False + + def stop_containers(self, container_ids): + """Stops given containers.""" + for container_id in container_ids: + self.stop_container(container_id) + + def stop_container(self, container_id): + """Stops the container with given ID.""" + self.client.stop(container_id) + + def container_status(self, container_id): + """Checks if container with given ID running.""" + status = ProviderBase.STATUS_TERMINATED + try: + ret_val = str(self.client.inspect_container(container_id).get('State').get('Status')) + if ret_val.startswith("running"): + status = ProviderBase.STATUS_RUNNING + else: + status = ProviderBase.STATUS_STOPPED + except NotFound: + pass + return status + + def start_containers(self, container_ids): + """Starts each container in given list of container IDs.""" + for container_id in container_ids: + self.start_container(container_id) + + def start_container(self, container_id): + """ Start the container with given ID.""" + logging.info(DockerProxy.LOG_TAG + " Starting container " + container_id) + try: + self.client.start(container=container_id) + except (NotFound, NullResource) as e: + print (DockerProxy.LOG_TAG + "Something went wrong while starting container.", e) + return False + return True + + def execute_command(self, container_id, command): + """Executes given command as a shell command in the given container. Returns None is anything goes wrong.""" + run_command = "/bin/bash -c \"" + command + "\"" + # print("CONTAINER: {0} COMMAND: {1}".format(container_id, run_command)) + if self.start_container(container_id) is False: + raise ExecuteCommandException(DockerProxy.LOG_TAG + "Could not start container.") + #return None + try: + exec_instance = self.client.exec_create(container_id, run_command) + response = self.client.exec_start(exec_instance) + return [self.client.exec_inspect(exec_instance), response] + except (NotFound, APIError) as e: + raise ExecuteCommandException(DockerProxy.LOG_TAG + " Could not execute command." + str(e)) + + def build_image(self, dockerfile): + """ Build image from given Dockerfile object and return ID of the image created. """ + import uuid + logging.info("Building image...") + random_string = str(uuid.uuid4()) + image_tag = Constants.DOCKER_IMAGE_PREFIX + "{0}".format(random_string[:]) + last_line = "" + try: + for line in self.client.build(fileobj=dockerfile, rm=True, tag=image_tag): + print(DockerProxy._decorate(line)) + if "errorDetail" in line: + raise DockerProxy.ImageBuildException() + last_line = line + + # Return image ID. It's a hack around the fact that docker-py's build image command doesn't return an image + # id. + image_id = get_docker_image_id_from_string(str(last_line)) + logging.info("Image ID: {0}".format(image_id)) + return str(DockerImage(image_id, image_tag)) + + except (DockerProxy.ImageBuildException, IndexError) as e: + raise DockerProxy.ImageBuildException(e) + + @staticmethod + def _decorate(some_line): + return some_line[11:-4].rstrip() + + def image_exists(self, image_str): + """Checks if an image with the given ID/tag exists locally.""" + docker_image = DockerImage.from_string(image_str) + logging.debug("DockerProxy.image_exists(image={0})".format(image_str)) + logging.debug("DockerProxy.image_exists() docker_image.image_id={0}".format(docker_image.image_id)) + + if docker_image.image_id is Constants.DockerNonExistentTag \ + and docker_image.image_tag is Constants.DockerNonExistentTag: + logging.debug("DockerProxy.image_exists() raising 'raise InvalidDockerImageException'"); + raise InvalidDockerImageException("Neither image_id nor image_tag provided.") + + for image in self.client.images(): + some_id = image["Id"] + some_tags = image["RepoTags"] or [None] + logging.debug("DockerProxy.image_exists() some_id={0}".format(some_id)) + logging.debug("DockerProxy.image_exists() some_tags={0}".format(some_tags)) + if docker_image.image_id in \ + some_id[:(Constants.DOCKER_PY_IMAGE_ID_PREFIX_LENGTH + Constants.DOKCER_IMAGE_ID_LENGTH)]: + return True + if docker_image.image_tag in some_tags: + return True + return False + + def terminate_containers(self, container_ids): + """ Terminates containers with given container ids.""" + for container_id in container_ids: + try: + if self.container_status(container_id) == ProviderBase.STATUS_RUNNING: + self.stop_container(container_id) + self.terminate_container(container_id) + except NotFound: + pass + + def terminate_container(self, container_id): + self.client.remove_container(container_id) + + def get_mapped_ports(self, container_id): + container_ins = self.client.inspect_container(container_id) + mapped_ports = container_ins['HostConfig']['PortBindings'] + ret_val = [] + if mapped_ports is None: + logging.info("No mapped ports for {0}".format(container_id)) + return + for k, v in mapped_ports.iteritems(): + host_port = v[0]['HostPort'] + ret_val.append(host_port) + return ret_val + + def get_working_directory(self, container_id): + wd = self.client.inspect_container(container_id)["Config"]["WorkingDir"] + if len(wd)==0: + return '/' + return wd + + def get_home_directory(self, container_id): + try: + env_vars = self.client.inspect_container(container_id)["Config"]["Env"] + home = [i for i in env_vars if i.startswith("HOME")] + if len(home) == 0: + return self.get_working_directory(container_id) + logging.debug("DockerProxy.get_home_directory(container_id={0}) home={1}".format(container_id, home)) + return home[0].split("=")[1] + except IndexError as e: + logging.debug("DockerProxy.get_home_directory(container_id={0}): {1} ".format(container_id,e)) + return self.get_working_directory(container_id) + + def put_archive(self, container_id, tar_file_bytes, target_path_in_container): + """ Copies and unpacks a given tarfile in the container at specified location. + Location must exist in container.""" + if self.start_container(container_id) is False: + raise Exception("Could not start container.") + + # Prepend file path with /home/ubuntu/. TODO Should be refined. + if not target_path_in_container.startswith("/home/ubuntu/"): + import os + target_path_in_container = os.path.join("/home/ubuntu/", target_path_in_container) + + logging.info("target path in container: {0}".format(target_path_in_container)) + if not self.client.put_archive(container_id, target_path_in_container, tar_file_bytes): + logging.error(DockerProxy.LOG_TAG + "Failed to copy.") + + def get_container_ip_address(self, container_id): + """ Returns the IP Address of given container.""" + self.start_container(container_id) + ins = self.client.inspect_container(container_id) + ip_address = str(ins.get("NetworkSettings").get("IPAddress")) + while True: + ip_address = str(ins.get("NetworkSettings").get("IPAddress")) + if ip_address == "": + time.sleep(3) + if ip_address.startswith("1") is True: + break + return ip_address + + +def get_docker_image_id_from_string(some_string): + exp = r'[a-z0-9]{12}' + matches = re.findall(exp, some_string) + if len(matches) is 0: + return None + else: + return matches[0] + + +class InvalidDockerImageException(Exception): + def __init__(self, message): + super(message) + + +class DockerImage: + def __init__(self, image_id=None, image_tag=None): + if image_id in [None, Constants.DockerNonExistentTag] and image_tag in [None, Constants.DockerNonExistentTag]: + raise InvalidDockerImageException("Both image_id and image_tag cannot be None.") + + self.image_id = image_id if image_id is not None else Constants.DockerNonExistentTag + self.image_tag = image_tag if image_tag is not None else Constants.DockerNonExistentTag + + def __str__(self): + if self.image_id is Constants.DockerNonExistentTag and self.image_tag is Constants.DockerNonExistentTag: + raise InvalidDockerImageException( + "Cannot serialize DockerImage object because both image_id and image_tag are None.") + + return "{0}{1}{2}".format(self.image_id, Constants.DockerImageDelimiter, self.image_tag) + + @staticmethod + def from_string(serialized_docker_image): + temp = serialized_docker_image.split(Constants.DockerImageDelimiter) + + if len(temp) is 2: + return DockerImage(image_id=temp[0], image_tag=temp[1]) + + if len(temp) > 2 or len(temp) is 0: + raise InvalidDockerImageException("Unexpected format, cannot serialize to DockerImage.") + + temp = temp[0] + # Figure out if temp is image_id or image_name. + if DockerImage.looks_like_image_id(temp): + return DockerImage(image_id=temp) + else: + return DockerImage(image_tag=temp) + + @staticmethod + def looks_like_image_id(some_string): + possible_image_id = get_docker_image_id_from_string(some_string) + if some_string is possible_image_id: + return True + else: + return False diff --git a/MolnsLib/DockerSSH.py b/MolnsLib/DockerSSH.py new file mode 100644 index 0000000..d4f6f23 --- /dev/null +++ b/MolnsLib/DockerSSH.py @@ -0,0 +1,98 @@ +import StringIO +import tarfile +import os +import re +import paramiko + + +# "unused" arguments to some methods are added to maintain compatibility with existing upper level APIs. + +class DockerSSH(object): + def __init__(self, docker): + self.docker = docker + self.container_id = None + + def exec_command(self, command, verbose=None): + cmd = re.sub("\"", "\\\"", command) # Escape all occurrences of ". + ret_val, response = self.docker.execute_command(self.container_id, cmd) + if 'ExitCode' in ret_val and ret_val['ExitCode'] > 0: + raise paramiko.SSHException("DOckerSSH.exec_command({0}) exit_code={1}: {2}".format(command,ret_val['ExitCode'],response)) + return response + + def exec_multi_command(self, command, verbose=None): + return self.exec_command(command) + + def open_sftp(self): + return MockSFTP(self.docker, self.container_id) + + def connect(self, instance, endpoint, username=None, key_filename=None): + self.container_id = instance.provider_instance_identifier + + def connect_cluster_node(self, ip_address, port, username, keyfile): + raise DockerSSHException("This invocation means that an error has occurred.") + + def close(self): + self.container_id = None + + +class MockSFTPFileException(Exception): + pass + + +class DockerSSHException(Exception): + pass + + +class MockSFTP: + def __init__(self, docker, container_id): + self.docker = docker + self.container_id = container_id + + def file(self, filename, flag): + return MockSFTPFile(filename, flag, self.docker, self.container_id) + + def close(self): + pass + + +class MockSFTPFile: + def __init__(self, filename, flag, docker, container_id): + self.filename = filename # Absolute path of file. + self.file_contents = "" + self.docker = docker + self.container_id = container_id + if flag is 'w': + self.flag = flag + # else: + # print("WARNING Unrecognized file mode. Filename: {0}, Flag: {1}".format(filename, flag)) + + def write(self, write_this): + self.file_contents += write_this + + def close(self): + # Make tarfile. + import uuid + rand_str = str(uuid.uuid4()) + temp_tar = "transport-{0}.tar".format(rand_str[:8]) + try: + tar = tarfile.TarFile(temp_tar, "w") + string = StringIO.StringIO() + string.write(self.file_contents) + string.seek(0) + tar_file_info = tarfile.TarInfo(name=os.path.basename(self.filename)) + tar_file_info.size = len(string.buf) + tar.addfile(tarinfo=tar_file_info, fileobj=string) + tar.close() + + path_to_file = os.path.dirname(self.filename) + + if not path_to_file.startswith("/home"): + path_to_file = os.path.join(self.docker.get_home_directory(self.container_id), path_to_file) + + with open(temp_tar, mode='rb') as f: + tar_file_bytes = f.read() + + # print("path to file: {0}".format(path_to_file)) + self.docker.put_archive(self.container_id, tar_file_bytes, path_to_file) + finally: + os.remove(temp_tar) # Remove temporary tar file. diff --git a/MolnsLib/EC2Provider.py b/MolnsLib/EC2Provider.py index 3451570..42f706f 100644 --- a/MolnsLib/EC2Provider.py +++ b/MolnsLib/EC2Provider.py @@ -169,7 +169,7 @@ def _get_image_name(self): ########################################## class EC2Controller(EC2Base): - """ Provider handle for an open stack controller. """ + """ Provider handle for an EC2 controller. """ OBJ_NAME = 'EC2Controller' @@ -272,7 +272,7 @@ def get_instance_status(self, instance): ########################################## class EC2WorkerGroup(EC2Controller): - """ Provider handle for an open stack controller. """ + """ Provider handle for EC2 worker group. """ OBJ_NAME = 'EC2WorkerGroup' @@ -464,7 +464,7 @@ def start_ec2_instances(self, image_id=None, key_name=None, group_name=None, num raise ProviderException("Could not find image_id={0}".format(image_id)) if img.state != "available": if img.state != "pending": - raise ProviderException("Image {0} is not available, it has state is {1}.".format(image_id, img.state)) + raise ProviderException("Image {0} is not available, it's state is {1}.".format(image_id, img.state)) while img.state == "pending": print "Image {0} has state {1}, waiting {2} seconds for it to become available.".format(image_id, img.state, self.PENDING_IMAGE_WAITTIME) time.sleep(self.PENDING_IMAGE_WAITTIME) diff --git a/MolnsLib/OpenStackProvider.py b/MolnsLib/OpenStackProvider.py index 26c8779..5a35c98 100644 --- a/MolnsLib/OpenStackProvider.py +++ b/MolnsLib/OpenStackProvider.py @@ -43,6 +43,8 @@ class OpenStackProvider(OpenStackBase): {'q':'OpenStack project_name', 'default':os.environ.get('OS_TENANT_NAME'), 'ask':True}), ('neutron_nic', {'q':'Network ID (leave empty if only one possible network)', 'default':None, 'ask':True}), + ('region_name', + {'q':'Specify the region (leave empty if only one region)', 'default':os.environ.get('OS_REGION_NAME'), 'ask':True}), ('floating_ip_pool', {'q':'Name of Floating IP Pool (leave empty if only one possible pool)', 'default':None, 'ask':True}), ('nova_version', @@ -192,6 +194,8 @@ def _connect(self): creds['api_key'] = self.config['nova_password'] creds['auth_url'] = self.config['nova_auth_url'] creds['project_id'] = self.config['nova_project_id'] + if 'region_name' in self.config and self.config['region_name'] is not None: + creds['region_name'] = self.config['region_name'] self.nova = novaclient.Client(self.config['nova_version'], **creds) self.connected = True diff --git a/MolnsLib/Utils.py b/MolnsLib/Utils.py new file mode 100644 index 0000000..a6b75f2 --- /dev/null +++ b/MolnsLib/Utils.py @@ -0,0 +1,45 @@ +def get_user_name(): + try: + import os + return os.environ['SUDO_USER'] + except KeyError: + import getpass + return getpass.getuser() + + +def get_sudo_user_id(): + import pwd + u_name = get_user_name() + return pwd.getpwnam(u_name).pw_uid + + +def get_sudo_group_id(): + import grp + u_name = get_user_name() + return grp.getgrnam(u_name).gr_gid + + +def ensure_sudo_mode(some_function): + import os + import sys + if sys.platform.startswith("linux") and os.getuid() != 0: + pass + raise NoPrivilegedMode("\n\nOn Linux platforms, 'docker' is a priviledged command. " + "To use 'docker' functionality, please run in sudo mode or as root user.") + return some_function + + +class Log: + verbose = True + + def __init__(self): + pass + + @staticmethod + def write_log(message): + if Log.verbose: + print message + + +class NoPrivilegedMode(Exception): + pass diff --git a/MolnsLib/constants.py b/MolnsLib/constants.py new file mode 100644 index 0000000..7ac7ffb --- /dev/null +++ b/MolnsLib/constants.py @@ -0,0 +1,28 @@ +import os + + +class Constants: + DockerWorkingDirectoryPrefix = "working_dir_" + LOGGING_DIRECTORY = "~/MOLNS_LOG" + DOCKER_BASE_URL = "unix://var/run/docker.sock" + DOCKER_DEFAULT_IMAGE = "ubuntu:latest" + DOCKER_DEFAULT_PORT = '9000' + DOCKER_CONTAINER_RUNNING = "running" + DOCKER_CONTAINER_EXITED = "exited" + DOCKERFILE_NAME = "dockerfile_" + DOKCER_IMAGE_ID_LENGTH = 12 + DOCKER_IMAGE_PREFIX = "molns-docker-provider-" + DOCKER_PY_IMAGE_ID_PREFIX_LENGTH = 7 + DockerProvider = "Docker" + DockerNonExistentTag = "**NA**" + DockerImageDelimiter = "|||" + MolnsDockerContainerNamePrefix = "Molns-" + MolnsExecHelper = "molns_exec_helper.py" + DEFAULT_PRIVATE_NOTEBOOK_PORT = 8081 + DEFAULT_PUBLIC_NOTEBOOK_PORT = 443 + DEFAULT_PRIVATE_WEBSERVER_PORT = 8001 + DEFAULT_PUBLIC_WEBSERVER_PORT = 80 + DEFAULT_QSUB_SSH_PORT = 22 + ForbiddenVolumeNames = [".ssh", ".ipython", ".molns", "ipython", "localarea", "shared"] + ConfigDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".molns/") + ClusterKeyFileNameOnController = "molns_cluster_secretkey" diff --git a/MolnsLib/installSoftware.py b/MolnsLib/installSoftware.py index 92cb2e5..fcd74e8 100644 --- a/MolnsLib/installSoftware.py +++ b/MolnsLib/installSoftware.py @@ -40,29 +40,40 @@ class InstallSW: "sudo pip install dill cloud pygments", "sudo pip install tornado Jinja2", - # Molnsutil + # Molnsutil develop [ "sudo pip install jsonschema jsonpointer", # EC2/S3 and OpenStack APIs "sudo pip install boto", "sudo apt-get -y install pandoc", - # This set of packages is needed for OpenStack, as molnsutil uses them for hybrid cloud deployment + # This set of packages is needed for OpenStack, as molns_util uses them for hybrid cloud deployment "sudo apt-get -y install libxml2-dev libxslt1-dev python-dev", "sudo pip install python-novaclient", - "sudo easy_install -U pip", "sudo pip install python-keystoneclient", "sudo pip install python-swiftclient", ], - [ - "sudo rm -rf /usr/local/molnsutil;sudo mkdir -p /usr/local/molnsutil;sudo chown ubuntu /usr/local/molnsutil", - "cd /usr/local/ && git clone https://github.com/Molns/molnsutil.git", - "cd /usr/local/molnsutil && sudo python setup.py install" + "sudo rm -rf /usr/local/molnsutil;sudo mkdir -p /usr/local/molnsutil;sudo chown ubuntu /usr/local/molnsutil", + #"cd /usr/local/ && git clone https://github.com/briandrawert/molnsutil.git && cd /usr/local/molnsutil && git checkout qsub_support" + "cd /usr/local/ && git clone https://github.com/briandrawert/molnsutil.git" + ], + + # Molns develop + [ + "sudo rm -rf /usr/local/molns;sudo mkdir -p /usr/local/molns;sudo chown ubuntu /usr/local/molns", + "cd /usr/local/ && git clone https://github.com/briandrawert/molns.git && cd /usr/local/molns" + ], + + # Cluster execution + [ + "sudo rm -rf /usr/local/cluster_execution;sudo mkdir -p /usr/local/cluster_execution;sudo chown ubuntu /usr/local/cluster_execution", + "cd /usr/local/ && git clone https://github.com/briandrawert/cluster_execution.git" ], # So the workers can mount the controller via SSHfs [ "sudo apt-get -y install sshfs", "sudo gpasswd -a ubuntu fuse", + "mkdir -p /home/ubuntu/.ssh/", "echo 'ServerAliveInterval 60' >> /home/ubuntu/.ssh/config", ], @@ -83,19 +94,20 @@ class InstallSW: "cd /usr/local/ && git clone https://github.com/StochSS/stochkit.git StochKit", "cd /usr/local/StochKit && ./install.sh", - "sudo rm -rf /usr/local/ode-1.0.2;sudo mkdir -p /usr/local/ode-1.0.2/;sudo chown ubuntu /usr/local/ode-1.0.2", - "wget https://github.com/StochSS/stochss/blob/master/ode-1.0.2.tgz?raw=true -q -O /tmp/ode.tgz", - "cd /usr/local/ && tar -xzf /tmp/ode.tgz", + #"wget https://github.com/StochSS/stochss/blob/master/ode-1.0.4.tgz?raw=true -q -O /tmp/ode.tgz", + "wget https://github.com/StochSS/StochKit_ode/archive/master.tar.gz?raw=true -q -O /tmp/ode.tgz", + "cd /tmp && tar -xzf /tmp/ode.tgz", + "sudo mv /tmp/StochKit_ode-master /usr/local/ode", "rm /tmp/ode.tgz", - "cd /usr/local/ode-1.0.2/cvodes/ && tar -xzf \"cvodes-2.7.0.tar.gz\"", - "cd /usr/local/ode-1.0.2/cvodes/cvodes-2.7.0/ && ./configure --prefix=\"/usr/local/ode-1.0.2/cvodes/cvodes-2.7.0/cvodes\" 1>stdout.log 2>stderr.log", - "cd /usr/local/ode-1.0.2/cvodes/cvodes-2.7.0/ && make 1>stdout.log 2>stderr.log", - "cd /usr/local/ode-1.0.2/cvodes/cvodes-2.7.0/ && make install 1>stdout.log 2>stderr.log", - "cd /usr/local/ode-1.0.2/ && STOCHKIT_HOME=/usr/local/StochKit/ STOCHKIT_ODE=/usr/local/ode-1.0.2/ make 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/cvodes/ && tar -xzf \"cvodes-2.7.0.tar.gz\"", + "cd /usr/local/ode/cvodes/cvodes-2.7.0/ && ./configure --prefix=\"/usr/local/ode/cvodes/cvodes-2.7.0/cvodes\" 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/cvodes/cvodes-2.7.0/ && make 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/cvodes/cvodes-2.7.0/ && make install 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/ && STOCHKIT_HOME=/usr/local/StochKit/ STOCHKIT_ODE=/usr/local/ode/ make 1>stdout.log 2>stderr.log", "sudo rm -rf /usr/local/gillespy;sudo mkdir -p /usr/local/gillespy;sudo chown ubuntu /usr/local/gillespy", - "cd /usr/local/ && git clone https://github.com/MOLNs/gillespy.git", - "cd /usr/local/gillespy && sudo STOCHKIT_HOME=/usr/local/StochKit/ STOCHKIT_ODE_HOME=/usr/local/ode-1.0.2/ python setup.py install" + "cd /usr/local/ && git clone https://github.com/briandrawert/gillespy.git", + "cd /usr/local/gillespy && sudo STOCHKIT_HOME=/usr/local/StochKit/ STOCHKIT_ODE_HOME=/usr/local/ode/ python setup.py install" ], @@ -106,18 +118,20 @@ class InstallSW: # Gmsh for Finite Element meshes "sudo apt-get install -y gmsh", ], - + + ["sudo apt-get install docker", "sudo pip install docker", "sudo pip install sqlalchemy", + "sudo pip install boto", "sudo pip install python-novaclient", "sudo pip install paramiko"], # pyurdme - [ "sudo rm -rf /usr/local/pyurdme;sudo mkdir -p /usr/local/pyurdme;sudo chown ubuntu /usr/local/pyurdme", + [ "sudo rm -rf /usr/local/pyurdme && sudo mkdir -p /usr/local/pyurdme && sudo chown ubuntu /usr/local/pyurdme", "cd /usr/local/ && git clone https://github.com/MOLNs/pyurdme.git", #"cd /usr/local/pyurdme && git checkout develop", # for development only - "cp /usr/local/pyurdme/pyurdme/data/three.js_templates/js/* .ipython/profile_default/static/custom/", + "cp /usr/local/pyurdme/pyurdme/data/three.js_templates/js/* $HOME/.ipython/profile_default/static/custom/", "source /usr/local/pyurdme/pyurdme_init && python -c 'import pyurdme'", ], # example notebooks - [ "rm -rf MOLNS_notebooks;git clone https://github.com/Molns/MOLNS_notebooks.git", - "cp MOLNS_notebooks/*.ipynb .;rm -rf MOLNS_notebooks;", + [ "rm -rf MOLNS_notebooks && git clone https://github.com/Molns/MOLNS_notebooks.git", + "cp MOLNS_notebooks/*.ipynb . && rm -rf MOLNS_notebooks", "ls *.ipynb" ], @@ -125,10 +139,10 @@ class InstallSW: "sudo apt-get -y remove python-scipy", "sudo pip install scipy", - "sudo pip install jsonschema jsonpointer", #redo this install to be sure it has not been removed. - + "sudo pip install jsonschema jsonpointer", # redo this install to be sure it has not been removed. + "sudo pip install paramiko", - "sync", # This is critial for some infrastructures. + "sync", # This is critical for some infrastructures. ] # How many time do we try to install each package. @@ -268,7 +282,6 @@ def exec_command_list_switch(self, command_list): raise SystemExit("CRITICAL ERROR: could not complete command '{0}'. Exiting.".format(command)) print "Installation complete in {0}s".format(time.time() - tic) - def log_exec(self, msg): if self.log_file is not None: self.log_file.write(msg) @@ -332,6 +345,11 @@ def exec_multi_command(self, command, next_command): print "FAILED......\t{0}:{1}\t{2}\t{3}".format(self.hostname, self.ssh_endpoint, command, e) raise InstallSWException() + @staticmethod + def get_command_list(): + """Returns the whole list of dependency installation commands. """ + return InstallSW.command_list + if __name__ == "__main__": print "{0}".format(InstallSW.command_list) print "len={0}".format(len(InstallSW.command_list)) @@ -342,4 +360,3 @@ def exec_multi_command(self, command, next_command): else: cnt += 1 print "cnt={0}".format(cnt) - diff --git a/MolnsLib/molns_datastore.py b/MolnsLib/molns_datastore.py index 00093c5..dee8428 100644 --- a/MolnsLib/molns_datastore.py +++ b/MolnsLib/molns_datastore.py @@ -1,15 +1,20 @@ #!/usr/bin/env python from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base + Base = declarative_base() from sqlalchemy import Column, Integer, String, Sequence from sqlalchemy.orm import sessionmaker import os import logging import sys +import uuid +import datetime + ############################################################# -#VALID_PROVIDER_TYPES = ['OpenStack', 'EC2', 'Rackspace'] -VALID_PROVIDER_TYPES = ['OpenStack', 'EC2', 'Eucalyptus'] +VALID_PROVIDER_TYPES = ['OpenStack', 'EC2', 'Eucalyptus', 'Docker'] + + ############################################################# #### SCHEMA ################################################# ############################################################# @@ -18,12 +23,13 @@ class Provider(Base): """ DB object for an infrastructure service provider. """ __tablename__ = 'providers' id = Column(Integer, Sequence('provider_id_seq'), primary_key=True) - type = Column(String) #'EC2', 'Azure', 'OpenStack' + type = Column(String) # 'EC2', 'Azure', 'OpenStack' name = Column(String) def __str__(self): return "Provider({0}): name={1} type={2}".format(self.id, self.name, self.type) + class ProviderData(Base): """ DB object to store the key/value pairs for a service provider. """ __tablename__ = 'provider_data' @@ -33,19 +39,22 @@ class ProviderData(Base): value = Column(String) def __str__(self): - return "ProviderData({0}): provider_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, self.value) + return "ProviderData({0}): provider_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, + self.value) + class Controller(Base): """ DB object for a MOLNS controller. """ __tablename__ = 'controllers' id = Column(Integer, Sequence('controller_id_seq'), primary_key=True) - type = Column(String) #'EC2', 'Azure', 'OpenStack' + type = Column(String) # 'EC2', 'Azure', 'OpenStack' name = Column(String) provider_id = Column(Integer) - + def __str__(self): return "Controller({0}): name={1} provider_id={2}".format(self.id, self.name, self.provider_id) + class ControllerData(Base): """ DB object to store the key/value pairs for a controller. """ __tablename__ = 'controller_data' @@ -55,19 +64,24 @@ class ControllerData(Base): value = Column(String) def __str__(self): - return "ControllerData({0}): controller_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, self.value) + return "ControllerData({0}): controller_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, + self.value) + class WorkerGroup(Base): """ DB object for a MOLNS WorkerGroup. """ __tablename__ = 'worker_groups' id = Column(Integer, Sequence('worker_group_id_seq'), primary_key=True) - type = Column(String) #'EC2', 'Azure', 'OpenStack' + type = Column(String) # 'EC2', 'Azure', 'OpenStack' name = Column(String) provider_id = Column(Integer) controller_id = Column(Integer) - + def __str__(self): - return "WorkerGroup({0}): name={1} provider_id={2} controller_id={3}".format(self.id, self.name, self.provider_id, self.controller_id) + return "WorkerGroup({0}): name={1} provider_id={2} controller_id={3}".format(self.id, self.name, + self.provider_id, + self.controller_id) + class WorkerGroupData(Base): """ DB object to store the key/value pairs for a worker groups. """ @@ -78,36 +92,54 @@ class WorkerGroupData(Base): value = Column(String) def __str__(self): - return "WorkerGrouprData({0}): worker_group_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, self.value) + return "WorkerGrouprData({0}): worker_group_id={1} name={2} value={3}".format(self.id, self.parent_id, + self.name, self.value) class Instance(Base): """ DB object for a MOLNS VM instance. """ __tablename__ = 'instances' id = Column(Integer, Sequence('instance_id_seq'), primary_key=True) - type = Column(String) #'head-node' or 'worker' + type = Column(String) # 'head-node' or 'worker' controller_id = Column(Integer) worker_group_id = Column(Integer) provider_id = Column(Integer) ip_address = Column(String) provider_instance_identifier = Column(String) - + + def __str__(self): + return "Instance({0}): provider_instance_identifier={1} provider_id={2} controller_id={3} worker_group_id={4}".format( + self.id, self.provider_instance_identifier, self.provider_id, self.controller_id, self.worker_group_id) + + +class ExecJob(Base): + """ DB object for MOLNS exec jobs. """ + __tablename__ = 'jobs' + id = Column(Integer, Sequence('instance_id_seq'), primary_key=True) + controller_id = Column(Integer) + exec_str = Column(String) + jobID = Column(String) + date = Column(String) + def __str__(self): - return "Instance({0}): provider_instance_identifier={1} provider_id={2} controller_id={3} worker_group_id={4}".format(self.id, self.provider_instance_identifier, self.provider_id, self.controller_id, self.worker_group_id) + return "ExecJob({0}): jobID={1} controller_id={2}, exec_str={3}".format(self.id, self.jobID, self.controller_id, + self.exec_str) class DatastoreException(Exception): pass + ############################################################# HANDLE_MAPPING = { - 'Provider':(Provider,ProviderData), - 'Controller':(Controller,ControllerData), - 'WorkerGroup':(WorkerGroup,WorkerGroupData), + 'Provider': (Provider, ProviderData), + 'Controller': (Controller, ControllerData), + 'WorkerGroup': (WorkerGroup, WorkerGroupData), } -#from OpenStackProvider import OpenStackProvider, OpenStackController, OpenStackWorkerGroup -#from EC2Provider import EC2Provider, EC2Controller, EC2WorkerGroup + +# from OpenStackProvider import OpenStackProvider, OpenStackController, OpenStackWorkerGroup +# from EC2Provider import EC2Provider, EC2Controller, EC2WorkerGroup def dynamic_module_import(name): mod = __import__(name) @@ -116,26 +148,30 @@ def dynamic_module_import(name): mod = getattr(mod, comp) return mod + def get_provider_handle(kind, ptype): """ Return object of 'kind' (Provider, Controller or WokerGroup) for provider of type 'ptype'. Load the module if necessary. """ - #logging.debug("get_provider_handle(kind={0}, ptype={1})".format(kind, ptype)) + # logging.debug("get_provider_handle(kind={0}, ptype={1})".format(kind, ptype)) valid_handles = ['Provider', 'Controller', 'WorkerGroup'] if kind not in valid_handles: raise DatastoreException("Unknown kind {0}".format(kind)) if ptype not in VALID_PROVIDER_TYPES: - raise DatastoreException("Unknown {1} type {0}".format(ptype, kind)) + # raise DatastoreException("Unknown {1} type {0}".format(ptype, kind)) + return None cls_name = "{0}{1}".format(ptype, kind) pkg_name = "MolnsLib.{0}Provider".format(ptype) if pkg_name not in sys.modules: logging.debug("loading {0} from {1}".format(cls_name, pkg_name)) + # pkg = dynamic_module_import(pkg_name) pkg = dynamic_module_import(pkg_name) try: - #logging.debug("dir(pkg={0})={1}".format(pkg, dir(pkg))) + # logging.debug("dir(pkg={0})={1}".format(pkg, dir(pkg))) mod = getattr(pkg, cls_name) except AttributeError: raise DatastoreException("module {0} does not contain {1}".format(pkg_name, cls_name)) return mod + ############################################################# @@ -160,14 +196,13 @@ def __init__(self, db_file=None, config_dir=None): os.makedirs(self.MOLNS_CONFIG_DIR) self.engine = create_engine('sqlite:///{0}/{1}'.format(self.MOLNS_CONFIG_DIR, self.MOLNS_DATASTORE)) - Base.metadata.create_all(self.engine) # Create all the tables + Base.metadata.create_all(self.engine) # Create all the tables Session = sessionmaker(bind=self.engine) self.session = Session() def __del__(self): """ Destructor. """ self.session.commit() - def list_objects(self, kind): """ Get all the currently configured objects of kind (Provider, Controller, WorkerGroup). @@ -197,16 +232,16 @@ def create_object(self, ptype, name, kind, **kwargs): raise DatastoreException("{1} {0} already exists with type".format(name, kind, p.type)) p_handle = get_provider_handle(kind, ptype) - #logging.debug("create_object() {1}(name={0})".format(name, p_handle)) + # logging.debug("create_object() {1}(name={0})".format(name, p_handle)) p = p_handle(name=name, config_dir=self.config_dir) if 'provider_id' in kwargs: p.provider_id = kwargs['provider_id'] - #logging.debug("create_object() provider_id={0}".format(kwargs['provider_id'])) + # logging.debug("create_object() provider_id={0}".format(kwargs['provider_id'])) if 'controller_id' in kwargs: p.controller_id = kwargs['controller_id'] - #logging.debug("create_object() controller_id={0}".format(kwargs['controller_id'])) + # logging.debug("create_object() controller_id={0}".format(kwargs['controller_id'])) return p - + def delete_object(self, name, kind): """ Delete a objects of kind (Provider, Controller, WorkerGroup). @@ -225,7 +260,7 @@ def delete_object(self, name, kind): logging.debug("Deleting entry: {0}".format(p)) self.session.delete(p) self.session.commit() - + def get_object(self, name, kind): """ Get a config object of of kind (Provider, Controller, WorkerGroup). @@ -271,19 +306,21 @@ def _get_object_data(self, d_handle, kind, ptype, p): data[d.name] = d.value p_handle = get_provider_handle(kind, ptype) - #logging.debug("{2}(name={0}, data={1})".format(name,data,p_handle)) + # logging.debug("{2}(name={0}, data={1})".format(name,data,p_handle)) + if p_handle is None: + return None ret = p_handle(name=p.name, config=data, config_dir=self.config_dir) ret.id = p.id ret.datastore = self if 'provider_id' in p.__dict__: - #logging.debug("_get_object_data(): provider_id={0}".format(p.provider_id)) + # logging.debug("_get_object_data(): provider_id={0}".format(p.provider_id)) try: ret.provider = self.get_object_by_id(id=p.provider_id, kind='Provider') except DatastoreException as e: logging.debug('Error: provider {0} not found'.format(p.provider_id)) ret.provider = None if 'controller_id' in p.__dict__: - #logging.debug("_get_object_data(): controller_id={0}".format(p.controller_id)) + # logging.debug("_get_object_data(): controller_id={0}".format(p.controller_id)) try: ret.controller = self.get_object_by_id(id=p.controller_id, kind='Controller') except DatastoreException as e: @@ -291,8 +328,6 @@ def _get_object_data(self, d_handle, kind, ptype, p): ret.controller = None return ret - - def save_object(self, config, kind): """ Save the configuration of a provider object. @@ -308,15 +343,16 @@ def save_object(self, config, kind): # Add new entry. p = handle(name=config.name, type=config.type) self.session.add(p) - #logging.debug("Created new DB entry: {0}".format(p)) - #print "save_object() config.__dict__={0}".format(config.__dict__) + # logging.debug("Created new DB entry: {0}".format(p)) + # print "save_object() config.__dict__={0}".format(config.__dict__) if 'provider_id' in config.__dict__: - logging.debug("provider_id is in config.__dict__ {0} {1}".format(config.provider_id, type(config.provider_id))) + logging.debug( + "provider_id is in config.__dict__ {0} {1}".format(config.provider_id, type(config.provider_id))) p.provider_id = config.provider_id if 'controller_id' in config.__dict__: logging.debug("controller_id is in config.__dict__ {0}".format(config.controller_id)) p.controller_id = config.controller_id - #logging.debug("Updated DB entry: {0}".format(p)) + # logging.debug("Updated DB entry: {0}".format(p)) self.session.commit() data = config.config.copy() @@ -326,33 +362,34 @@ def save_object(self, config, kind): d.value = data[d.name] del data[d.name] else: - #logging.debug("Deleting entry: {0}".format(d)) + # logging.debug("Deleting entry: {0}".format(d)) self.session.delete(d) for d in data.keys(): dd = d_handle(parent_id=p.id, name=d, value=data[d]) - #logging.debug("Created new entry: {0}".format(dd)) + # logging.debug("Created new entry: {0}".format(dd)) self.session.add(dd) self.session.commit() - def get_instance_by_id(self, id): """ Create or get the value for an instance. """ return self.session.query(Instance).filter_by(id=id).first() - - def get_instance(self, provider_instance_identifier, ip_address, provider_id=None, controller_id=None, worker_group_id=None): + + def get_instance(self, provider_instance_identifier, ip_address, provider_id=None, controller_id=None, + worker_group_id=None, provider_type=None): """ Create or get the value for an instance. """ p = self.session.query(Instance).filter_by(provider_instance_identifier=provider_instance_identifier).first() if p is None: - p = Instance(provider_instance_identifier=provider_instance_identifier, ip_address=ip_address, provider_id=provider_id, controller_id=controller_id, worker_group_id=worker_group_id) + p = Instance(provider_instance_identifier=provider_instance_identifier, ip_address=ip_address, + provider_id=provider_id, controller_id=controller_id, worker_group_id=worker_group_id) self.session.add(p) self.session.commit() - #logging.debug("Creating instance: {0}".format(p)) + # logging.debug("Creating instance: {0}".format(p)) else: - #logging.debug("Fetching instance: {0}".format(p)) + # logging.debug("Fetching instance: {0}".format(p)) pass return p - def get_controller_instances(self,controller_id=None): + def get_controller_instances(self, controller_id=None): logging.debug("get_controller_instances by controller_id={0}".format(controller_id)) ret = self.session.query(Instance).filter_by(controller_id=controller_id, worker_group_id=None).all() if ret is None: @@ -360,24 +397,24 @@ def get_controller_instances(self,controller_id=None): else: return ret - def get_worker_instances(self,controller_id=None): - #logging.debug("get_worker_instances by controller_id={0}".format(controller_id)) - ret = self.session.query(Instance).filter_by(controller_id=controller_id).filter(Instance.worker_group_id!=None).all() + def get_worker_instances(self, controller_id=None): + # logging.debug("get_worker_instances by controller_id={0}".format(controller_id)) + ret = self.session.query(Instance).filter_by(controller_id=controller_id).filter( + Instance.worker_group_id != None).all() if ret is None: return [] else: return ret - def get_all_instances(self, provider_id=None, controller_id=None, worker_group_id=None): if provider_id is not None: - #logging.debug("get_all_instances by provider_id={0}".format(provider_id)) + # logging.debug("get_all_instances by provider_id={0}".format(provider_id)) ret = self.session.query(Instance).filter_by(provider_id=provider_id).all() elif controller_id is not None: - #logging.debug("get_all_instances by controller_id={0}".format(controller_id)) + # logging.debug("get_all_instances by controller_id={0}".format(controller_id)) ret = self.session.query(Instance).filter_by(controller_id=controller_id).all() elif worker_group_id is not None: - #logging.debug("get_all_instances by worker_group_id={0}".format(worker_group_id)) + # logging.debug("get_all_instances by worker_group_id={0}".format(worker_group_id)) ret = self.session.query(Instance).filter_by(worker_group_id=worker_group_id).all() else: ret = self.session.query(Instance).all() @@ -388,9 +425,43 @@ def get_all_instances(self, provider_id=None, controller_id=None, worker_group_i def delete_instance(self, instance): """ Delete an instance. """ - #logging.debug("Deleting instance: {0}".format(instance)) + # logging.debug("Deleting instance: {0}".format(instance)) self.session.delete(instance) self.session.commit() + def get_all_jobs(self, controller_id=None): + if controller_id is not None: + # logging.debug("get_all_instances by controller_id={0}".format(controller_id)) + ret = self.session.query(ExecJob).filter_by(controller_id=controller_id).all() + else: + ret = self.session.query(ExecJob).all() + if ret is None: + return [] + else: + return ret + def get_job(self, jobID): + """ Get the objet for a job. """ + # logging.debug("get_job(jobID={0})".format(jobID)) + try: + id = int(jobID) + j = self.session.query(ExecJob).filter_by(id=id).first() + except Exception: + j = self.session.query(ExecJob).filter_by(jobID=jobID).first() + if j is None: + raise DatastoreException("Job {0} not found".format(jobID)) + return j + + def start_job(self, controller_id=None, exec_str=None): + """ Create the objet for a job. """ + date_str = str(datetime.datetime.now()) + jobID = str(uuid.uuid4()) + j = ExecJob(jobID=jobID, controller_id=controller_id, exec_str=exec_str, date=date_str) + self.session.add(j) + self.session.commit() + logging.debug("Creating ExecJob: {0}".format(j)) + return j + def delete_job(self, job): + self.session.delete(job) + self.session.commit() diff --git a/MolnsLib/molns_exec_helper.py b/MolnsLib/molns_exec_helper.py new file mode 100644 index 0000000..6f3a5b2 --- /dev/null +++ b/MolnsLib/molns_exec_helper.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +import os +import subprocess +import shlex +import json +import traceback +import sys + + +def run_job(exec_str, stdout_file): + with open(stdout_file, 'w') as stdout_fh: + try: + p = subprocess.Popen( + shlex.split(exec_str), + stdout=stdout_fh, + stderr=stdout_fh, + ) + pid = p.pid + # create pid file + pid_file = ".molns/pid" + return_code_file = ".molns/return_value" + with open(pid_file, 'w+') as fd: + fd.write(str(pid)) + # Wait on program execution... + return_code = p.wait() + print "Return code:", return_code + if return_code_file is not None: + with open(return_code_file, 'w+') as fd: + fd.write(str(return_code)) + except Exception as e: + stdout_fh.write('Error: {}'.format(str(e))) + stdout_fh.write(traceback.format_exc()) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + +if __name__ == "__main__": + with open(".molns/cmd",'r') as fd: + exec_str = fd.read() + print "exec_str", exec_str + run_job(exec_str, ".molns/stdout") diff --git a/MolnsLib/molns_landing_page.py b/MolnsLib/molns_landing_page.py new file mode 100644 index 0000000..0238f26 --- /dev/null +++ b/MolnsLib/molns_landing_page.py @@ -0,0 +1,93 @@ +from pipes import quote + +class MolnsLandingPage: + def __init__(self, port): + self.molns_landing_page = quote(""" + + + + + MOLNs + + + + + + +
+ +
+
+
+ +
+

MOLNs

+

A cloud computing appliance for spatial stochastic simulation of biochemical systems.

+

+ + + To the IPython Interface +

+ + + + +

Please note that due to the self-signed certification, you will see a warning before you can view the page. Please accept the warning and proceed.

+
+
+
+
+
+   +
+
+

+

+ +

+
+
+
+
+
+

Write PyURDME models as sharable IPython notebooks

+ + PyURDME API reference + +
+
+

Advanced analysis with Python scientific libraries

+ + +
+
+

Large scale computational experiments made easy

+ +
+
+ +
+ +
+ + + + + +
+ +
+ + +""".format(port)) diff --git a/MolnsLib/molns_provider.py b/MolnsLib/molns_provider.py index 587c3eb..fbee62f 100644 --- a/MolnsLib/molns_provider.py +++ b/MolnsLib/molns_provider.py @@ -1,16 +1,21 @@ import os import collections +from ssh import SSH + + class ProviderException(Exception): pass -class ProviderBase(): + +class ProviderBase: """ Abstract class. """ STATUS_RUNNING = 'running' STATUS_STOPPED = 'stopped' STATUS_TERMINATED = 'terminated' - SecurityGroupRule = collections.namedtuple("SecurityGroupRule", ["ip_protocol", "from_port", "to_port", "cidr_ip", "src_group_name"]) + SecurityGroupRule = collections.namedtuple("SecurityGroupRule", ["ip_protocol", "from_port", "to_port", "cidr_ip", + "src_group_name"]) FIREWALL_RULES = [ SecurityGroupRule("tcp", "22", "22", "0.0.0.0/0", None), @@ -22,7 +27,7 @@ class ProviderBase(): SecurityGroupRule("tcp", "9000", "65535", "0.0.0.0/0", None), ] - def __init__(self, name, config=None, config_dir=None,**kwargs): + def __init__(self, name, config=None, config_dir=None, **kwargs): self.config = {} self.name = name self.type = self.PROVIDER_TYPE @@ -35,6 +40,7 @@ def __init__(self, name, config=None, config_dir=None,**kwargs): self.config[k] = v for k,v in kwargs.iteritems(): self.__dict__[k] = v + self.ssh = SSH() def __getitem__(self, key): if key not in self.CONFIG_VARS.keys(): diff --git a/MolnsLib/ssh.py b/MolnsLib/ssh.py new file mode 100644 index 0000000..d4f7d0a --- /dev/null +++ b/MolnsLib/ssh.py @@ -0,0 +1,71 @@ +import paramiko +import time + + +class SSHException(Exception): + pass + + +class SSH: + def __init__(self): + self.ssh = paramiko.SSHClient() + self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + def exec_command(self, command, verbose=True): + try: + stdout_data = [] + stderr_data = [] + session = self.ssh.get_transport().open_session() + session.exec_command(command) + nbytes = 4096 + # TODO add a timeout here, don't wait for commands forever. + while True: + if session.recv_ready(): + msg = session.recv(nbytes) + stdout_data.append(msg) + if session.recv_stderr_ready(): + msg = session.recv_stderr(nbytes) + stderr_data.append(msg) + if session.exit_status_ready(): + break + time.sleep(0.1) # Sleep briefly to prevent over-polling + + status = session.recv_exit_status() + str_return = ''.join(stdout_data).splitlines() + stderr_str = ''.join(stderr_data) + session.close() + if status != 0: + raise paramiko.SSHException( + "Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, "\n".join(str_return), stderr_str)) + if verbose: + print "EXECUTING...\t{0}".format(command) + return str_return + except paramiko.SSHException as e: + if verbose: + print "FAILED......\t{0}\t{1}".format(command, e) + raise SSHException("{0}\t{1}".format(command, e)) + + def exec_multi_command(self, command, next_command): + try: + stdin, stdout, stderr = self.ssh.exec_command(command) + stdin.write(next_command) + stdin.flush() + status = stdout.channel.recv_exit_status() + if status != 0: + raise paramiko.SSHException("Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, stdout.read(), + stderr.read())) + except paramiko.SSHException as e: + print "FAILED......\t{0}\t{1}".format(command, e) + raise e + + def open_sftp(self): + return self.ssh.open_sftp() + + def connect(self, instance, port, username=None, key_filename=None): + return self.ssh.connect(instance.ip_address, port, username, key_filename=key_filename) + + def connect_cluster_node(self, ip_address, port, username, key_filename): + return self.ssh.connect(ip_address, port, username, key_filename=key_filename) + + def close(self): + self.ssh.close() \ No newline at end of file diff --git a/MolnsLib/ssh_deploy.py b/MolnsLib/ssh_deploy.py index 129b5ce..22771e9 100644 --- a/MolnsLib/ssh_deploy.py +++ b/MolnsLib/ssh_deploy.py @@ -1,8 +1,7 @@ - import json import logging import os -import paramiko +import Utils import string import sys import time @@ -10,9 +9,17 @@ import webbrowser import urllib2 +from constants import Constants + +from DockerProxy import DockerProxy +from ssh import SSH +from DockerSSH import DockerSSH + + class SSHDeployException(Exception): pass + class SSHDeploy: ''' This class is used for deploy IPython @@ -20,19 +27,16 @@ class SSHDeploy: DEFAULT_STOCHSS_PORT = 1443 DEFAULT_INTERNAL_STOCHSS_PORT = 8080 DEFAULT_GAE_ADMIN_PORT = 8000 - DEFAULT_PRIVATE_NOTEBOOK_PORT = 8081 - DEFAULT_PUBLIC_NOTEBOOK_PORT = 443 - DEFAULT_PRIVATE_WEBSERVER_PORT = 8001 - DEFAULT_PUBLIC_WEBSERVER_PORT = 80 SSH_CONNECT_WAITTIME = 5 MAX_NUMBER_SSH_CONNECT_ATTEMPTS = 25 DEFAULT_SSH_PORT = 22 DEFAULT_IPCONTROLLER_PORT = 9000 - DEFAULT_PYURDME_TEMPDIR="/mnt/pyurdme_tmp" - + DEFAULT_PYURDME_TEMPDIR = "/mnt/pyurdme_tmp" + + REMOTE_EXEC_JOB_PATH = "/mnt/molnsexec" - def __init__(self, config=None, config_dir=None): + def __init__(self, ssh, config=None, config_dir=None): if config is None: raise SSHDeployException("No config given") self.config = config @@ -40,23 +44,25 @@ def __init__(self, config=None, config_dir=None): if config_dir is None: self.config_dir = os.path.join(os.path.dirname(__file__), '/../.molns/') self.username = config['login_username'] - self.endpoint = self.DEFAULT_PRIVATE_NOTEBOOK_PORT + self.endpoint = Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT self.ssh_endpoint = self.DEFAULT_SSH_PORT self.keyfile = config.sshkeyfilename() - self.ssh = paramiko.SSHClient() - self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + if not (isinstance(ssh, SSH) or isinstance(ssh, DockerSSH)): + raise SSHDeployException("SSH object invalid.") + self.ssh = ssh + self.provider_name = config.name self.profile = 'default' - self.profile_dir = "/home/%s/.ipython/profile_default/" %(self.username) - self.ipengine_env = 'export INSTANT_OS_CALL_METHOD=SUBPROCESS;export PYURDME_TMPDIR={0};'.format(self.DEFAULT_PYURDME_TEMPDIR) + self.profile_dir = "/home/%s/.ipython/profile_default/" % (self.username) + self.ipengine_env = 'export INSTANT_OS_CALL_METHOD=SUBPROCESS;export PYURDME_TMPDIR={0};'.format( + self.DEFAULT_PYURDME_TEMPDIR) self.profile_dir_server = self.profile_dir self.profile_dir_client = self.profile_dir self.ipython_port = self.DEFAULT_IPCONTROLLER_PORT - - def scp_command(self, hostname): + def scp_command(self, hostname): return "scp -o 'StrictHostKeyChecking no' \ %s@%s:%ssecurity/ipcontroller-engine.json %ssecurity/" \ - %(self.username, hostname, self.profile_dir_server, self.profile_dir_client) + % (self.username, hostname, self.profile_dir_server, self.profile_dir_client) def prompt_for_password(self): import getpass @@ -72,12 +78,12 @@ def prompt_for_password(self): print "Passwords do not match, try again." def create_ssl_cert(self, cert_directory, cert_name_prefix, hostname): - self.exec_command("mkdir -p '{0}'".format(cert_directory)) + self.ssh.exec_command("mkdir -p '{0}'".format(cert_directory)) user_cert = cert_directory + '{0}-user_cert.pem'.format(cert_name_prefix) ssl_key = cert_directory + '{0}-ssl_key.pem'.format(cert_name_prefix) ssl_cert = cert_directory + '{0}-ssl_cert.pem'.format(cert_name_prefix) - ssl_subj = "/C=CN/ST=SH/L=STAR/O=Dis/CN=%s" % hostname - self.exec_command( + ssl_subj = "/C=CN/ST=SH/L=STAR/O=Dis/CN=%s" % hostname + self.ssh.exec_command( "openssl req -new -newkey rsa:4096 -days 365 " '-nodes -x509 -subj %s -keyout %s -out %s' % (ssl_subj, ssl_key, ssl_cert)) @@ -94,67 +100,73 @@ def create_ipython_config(self, hostname, notebook_password=None): else: passwd = notebook_password try: - sha1pass_out = self.exec_command(sha1cmd % passwd , verbose=False) - sha1pass = sha1pass_out[0].strip() + sha1pass_out = self.ssh.exec_command(sha1cmd % passwd, verbose=False) + if isinstance(sha1pass_out, list): + sha1pass = sha1pass_out[0].strip() + else: + sha1pass = sha1pass_out.strip() + logging.debug("SHA1PASS_OUT: " + sha1pass_out) + logging.debug("SHA1PASS: " + sha1pass) except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, hostname, self.ssh_endpoint) raise e - sftp = self.ssh.open_sftp() notebook_config_file = sftp.file(remote_file_name, 'w+') - notebook_config_file.write('\n'.join([ - "c = get_config()", - "c.IPKernelApp.pylab = 'inline'", - "c.NotebookApp.certfile = u'%s'" % ssl_cert, - "c.NotebookApp.keyfile = u'%s'" % ssl_key, - "c.NotebookApp.ip = '*'", - "c.NotebookApp.open_browser = False", - "c.NotebookApp.password = u'%s'" % sha1pass, - "c.NotebookApp.port = %d" % int(notebook_port), - #"c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()', 'import logging','logging.getLogger(\'UFL\').setLevel(logging.ERROR)','logging.getLogger(\'FFC\').setLevel(logging.ERROR)']", - ])) + notebook_config_file.write('\n'.join([ + "c = get_config()", + "c.IPKernelApp.pylab = 'inline'", + "c.NotebookApp.certfile = u'%s'" % ssl_cert, + "c.NotebookApp.keyfile = u'%s'" % ssl_key, + "c.NotebookApp.ip = '*'", + "c.NotebookApp.open_browser = False", + "c.NotebookApp.password = u'%s'" % sha1pass, + "c.NotebookApp.port = %d" % int(notebook_port), + # "c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()', 'import logging','logging.getLogger(\'UFL\').setLevel(logging.ERROR)','logging.getLogger(\'FFC\').setLevel(logging.ERROR)']", + ])) notebook_config_file.close() - - remote_file_name='%sipcontroller_config.py' % self.profile_dir_server + + remote_file_name = '%sipcontroller_config.py' % self.profile_dir_server notebook_config_file = sftp.file(remote_file_name, 'w+') notebook_config_file.write('\n'.join([ "c = get_config()", "c.IPControllerApp.log_level=20", "c.HeartMonitor.period=10000", "c.HeartMonitor.max_heartmonitor_misses=10", + "c.HubFactory.db_class = \"SQLiteDB\"", ])) notebook_config_file.close() - # IPython startup code - remote_file_name='{0}startup/molns_dill_startup.py'.format(self.profile_dir_server) - dill_init_file = sftp.file(remote_file_name, 'w+') - dill_init_file.write('\n'.join([ - 'import dill', - 'from IPython.utils import pickleutil', - 'pickleutil.use_dill()', - 'import logging', - "logging.getLogger('UFL').setLevel(logging.ERROR)", - "logging.getLogger('FFC').setLevel(logging.ERROR)" - "import cloud", - "logging.getLogger('Cloud').setLevel(logging.ERROR)" - ])) - dill_init_file.close() +# # IPython startup code +# remote_file_name='{0}startup/molns_dill_startup.py'.format(self.profile_dir_server) +# dill_init_file = sftp.file(remote_file_name, 'w+') +# dill_init_file.write('\n'.join([ +# 'import dill', +# 'from IPython.utils import pickleutil', +# 'pickleutil.use_dill()', +# 'import logging', +# "logging.getLogger('UFL').setLevel(logging.ERROR)", +# "logging.getLogger('FFC').setLevel(logging.ERROR)" +# "import cloud", +# "logging.getLogger('Cloud').setLevel(logging.ERROR)" +# ])) +# dill_init_file.close() sftp.close() def create_s3_config(self): + self.ssh.exec_command("mkdir -p .molns/") sftp = self.ssh.open_sftp() - remote_file_name='.molns/s3.json' + remote_file_name = '.molns/s3.json' s3_config_file = sftp.file(remote_file_name, 'w') config = {} config["provider_type"] = self.config.type - config["bucket_name"] = "molns_storage_{0}".format(self.get_cluster_id()) + config["bucket_name"] = "molns_storage_{1}_{0}".format(self.get_cluster_id(), self.provider_name) config["credentials"] = self.config.get_config_credentials() s3_config_file.write(json.dumps(config)) s3_config_file.close() sftp.close() def get_cluster_id(self): - """ retreive the cluster id from the config. """ + """ Retrieve the cluster id from the config. """ filename = os.path.join(self.config_dir, 'cluster_id') if not os.path.isfile(filename): new_id = str(uuid.uuid4()) @@ -163,22 +175,22 @@ def get_cluster_id(self): wfd.write(new_id) with open(filename) as fd: idstr = fd.readline().rstrip() - logging.debug("get_cluster_id() file {0} found id = {1}".format(filename,idstr)) + logging.debug("get_cluster_id() file {0} found id = {1}".format(filename, idstr)) if idstr is None or len(idstr) == 0: - raise SSHDeployException("error getting id for cluster from file, please check your file '{0}'".format(filename)) + raise SSHDeployException( + "error getting id for cluster from file, please check your file '{0}'".format(filename)) return idstr - def create_engine_config(self): sftp = self.ssh.open_sftp() - remote_file_name='%sipengine_config.py' % self.profile_dir_server + remote_file_name = '%sipengine_config.py' % self.profile_dir_server notebook_config_file = sftp.file(remote_file_name, 'w+') notebook_config_file.write('\n'.join([ - "c = get_config()", - "c.IPEngineApp.log_level=20", - "c.IPEngineApp.log_to_file = True", - "c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()']", - ])) + "c = get_config()", + "c.IPEngineApp.log_level=20", + "c.IPEngineApp.log_to_file = True", + "c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()']", + ])) notebook_config_file.close() sftp.close() self.create_s3_config() @@ -191,7 +203,7 @@ def _get_ipython_client_file(self): engine_file.close() sftp.close() return file_data - + def _put_ipython_client_file(self, file_data): sftp = self.ssh.open_sftp() engine_file = sftp.file(self.profile_dir_server + 'security/ipcontroller-client.json', 'w+') @@ -207,7 +219,7 @@ def _get_ipython_engine_file(self): engine_file.close() sftp.close() return file_data - + def _put_ipython_engine_file(self, file_data): sftp = self.ssh.open_sftp() engine_file = sftp.file(self.profile_dir_server + 'security/ipcontroller-engine.json', 'w+') @@ -217,90 +229,71 @@ def _put_ipython_engine_file(self, file_data): def exec_command_list_switch(self, command_list): for command in command_list: - self.exec_command(command) + self.ssh.exec_command(command) - def exec_command(self, command, verbose=True): - try: - stdout_data = [] - stderr_data = [] - session = self.ssh.get_transport().open_session() - session.exec_command(command) - nbytes = 4096 - #TODO add a timeout here, don't wait for commands forever. - while True: - if session.recv_ready(): - msg = session.recv(nbytes) - stdout_data.append(msg) - if session.recv_stderr_ready(): - msg = session.recv_stderr(nbytes) - stderr_data.append(msg) - if session.exit_status_ready(): - break - time.sleep(0.1) # Sleep breifly to prevent over-polling - - status = session.recv_exit_status() - str_return = ''.join(stdout_data).splitlines() - stderr_str = ''.join(stderr_data) - session.close() - if status != 0: - raise paramiko.SSHException("Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, "\n".join(str_return), stderr_str)) - if verbose: - print "EXECUTING...\t{0}".format(command) - return str_return - except paramiko.SSHException as e: - if verbose: - print "FAILED......\t{0}\t{1}".format(command,e) - raise SSHDeployException("{0}\t{1}".format(command,e)) - - def exec_multi_command(self, command, next_command): - try: - stdin, stdout, stderr = self.ssh.exec_command(command) - stdin.write(next_command) - stdin.flush() - status = stdout.channel.recv_exit_status() - if status != 0: - raise paramiko.SSHException("Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, stdout.read(), stderr.read())) - except paramiko.SSHException as e: - print "FAILED......\t{0}\t{1}".format(command,e) - raise e - - def connect(self, hostname, port): - print "Connecting to {0}:{1} keyfile={2}".format(hostname,port,self.keyfile) + def connect(self, instance, port=None): + if port is None: + port = self.ssh_endpoint + print "Connecting to {0}:{1} keyfile={2}".format(instance.ip_address, port, self.keyfile) for i in range(self.MAX_NUMBER_SSH_CONNECT_ATTEMPTS): try: - self.ssh.connect(hostname, port, username=self.username, - key_filename=self.keyfile) - print "SSH connection established" + self.ssh.connect(instance, self.ssh_endpoint, username=self.username, + key_filename=self.keyfile) + if not isinstance(self.ssh, DockerSSH): + print "SSH connection established" + else: + print "Ready to execute commands in local container." return except Exception as e: - print "Retry in {0} seconds...\t\t{1}".format(self.SSH_CONNECT_WAITTIME,e) + print "Retry in {0} seconds...\t\t{1}".format(self.SSH_CONNECT_WAITTIME, e) time.sleep(self.SSH_CONNECT_WAITTIME) - raise SSHDeployException("ssh connect Failed!!!\t{0}:{1}".format(hostname,self.ssh_endpoint)) + raise SSHDeployException("ssh connect Failed!!!\t{0}:{1}".format(instance.ip_address, self.ssh_endpoint)) + + def deploy_molns_webserver(self, instance, controller_obj, openWebBrowser=True): + ip_address = instance.ip_address + logging.debug('deploy_molns_webserver(): openWebBrowser={0}, controller_obj.provider.type={1}\n'.format( + openWebBrowser, controller_obj.provider.type)) + + if controller_obj.provider.type == Constants.DockerProvider: + ip_address = "0.0.0.0:{0}".format(controller_obj.config["web_server_port"]) + logging.debug('deploy_molns_webserver(): ip_address={0}\n'.format(ip_address)) - def deploy_molns_webserver(self, ip_address): try: - self.connect(ip_address, self.ssh_endpoint) - self.exec_command("sudo rm -rf /usr/local/molns_webroot") - self.exec_command("sudo mkdir -p /usr/local/molns_webroot") - self.exec_command("sudo chown ubuntu /usr/local/molns_webroot") - self.exec_command("git clone https://github.com/Molns/MOLNS_web_landing_page.git /usr/local/molns_webroot") - self.exec_multi_command("cd /usr/local/molns_webroot; python -m SimpleHTTPServer {0} > ~/.molns_webserver.log 2>&1 &".format(self.DEFAULT_PRIVATE_WEBSERVER_PORT), '\n') - self.exec_command("sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format(self.DEFAULT_PUBLIC_WEBSERVER_PORT,self.DEFAULT_PRIVATE_WEBSERVER_PORT)) + self.connect(instance, self.ssh_endpoint) + self.ssh.exec_command("sudo rm -rf /usr/local/molns_webroot") + self.ssh.exec_command("sudo mkdir -p /usr/local/molns_webroot") + self.ssh.exec_command("sudo chown ubuntu /usr/local/molns_webroot") + self.ssh.exec_command( + "git clone https://github.com/Molns/MOLNS_web_landing_page.git /usr/local/molns_webroot") + + # If DockerProvider, replace index page. + if controller_obj.provider.type == Constants.DockerProvider: + from molns_landing_page import MolnsLandingPage + index_page = MolnsLandingPage(controller_obj.config["notebook_port"]).molns_landing_page + self.ssh.exec_command("echo {0} > /usr/local/molns_webroot/index.html".format(index_page)) + + self.ssh.exec_multi_command( + "cd /usr/local/molns_webroot; python -m SimpleHTTPServer {0} > ~/.molns_webserver.log 2>&1 &".format( + Constants.DEFAULT_PRIVATE_WEBSERVER_PORT), '\n') + self.ssh.exec_command( + "sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format( + Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, Constants.DEFAULT_PRIVATE_WEBSERVER_PORT)) self.ssh.close() - print "Deploying MOLNs webserver" url = "http://{0}/".format(ip_address) - while True: - try: - req = urllib2.urlopen(url) - sys.stdout.write("\n") - sys.stdout.flush() - break - except Exception as e: - #sys.stdout.write("{0}".format(e)) - sys.stdout.write(".") - sys.stdout.flush() - time.sleep(1) - webbrowser.open(url) + print "Deploying MOLNs webserver at {0}".format(url) + if openWebBrowser: + while True: + try: + req = urllib2.urlopen(url) + sys.stdout.write("\n") + sys.stdout.flush() + break + except Exception as e: + #sys.stdout.write("{0}".format(e)) + sys.stdout.write(".") + sys.stdout.flush() + time.sleep(1) + webbrowser.open(url) except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] @@ -308,11 +301,139 @@ def deploy_molns_webserver(self, ip_address): def get_number_processors(self): cmd = 'python -c "import multiprocessing;print multiprocessing.cpu_count()"' try: - output = self.exec_command(cmd)[0].strip() + output = self.ssh.exec_command(cmd)[0].strip() return int(output) except Exception as e: raise SSHDeployException("Could not determine the number of processors on the remote system: {0}".format(e)) + def deploy_remote_execution_job(self, ip_address, jobID, exec_str): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + EXEC_HELPER_FILENAME = 'molns_exec_helper.py' + try: + self.connect(ip_address, self.ssh_endpoint) + # parse command, retreive files to upload (iff they are in the local directory) + # create remote direct=ory + self.ssh.exec_command("sudo mkdir -p {0}".format(base_path)) + self.ssh.exec_command("sudo chown ubuntu {0}".format(base_path)) + self.ssh.exec_command("mkdir -p {0}/.molns/".format(base_path)) + sftp = self.ssh.open_sftp() + # Parse exec_str to get job files + files_to_transfer = [] + remote_command_list = [] + for c in exec_str.split(): + c2 = c + if c.startswith('~'): + c2 = os.path.expanduser(c) + if os.path.isfile(c2): + files_to_transfer.append(c2) + remote_command_list.append(os.path.basename(c2)) + else: + remote_command_list.append(c) + # Transfer job files + for f in files_to_transfer: + logging.debug('Uploading file {0}'.format(f)) + sftp.put(f, "{0}/{1}".format(base_path, os.path.basename(f))) + # Transfer helper file (to .molns subdirectory) + logging.debug('Uploading file {0}'.format(EXEC_HELPER_FILENAME)) + sftp.put( + os.path.join(os.path.dirname(os.path.abspath(__file__)),EXEC_HELPER_FILENAME), + "{0}/.molns/{1}".format(base_path,EXEC_HELPER_FILENAME) + ) + # Write 'cmd' file + remote_command = " ".join(remote_command_list) + logging.debug("Writing remote_command = {0}".format(remote_command)) + cmd_file = sftp.file("{0}/.molns/{1}".format(base_path,'cmd'), 'w') + cmd_file.write(remote_command) + cmd_file.close() + # execute command + logging.debug("Executing command") + self.ssh.exec_command("cd {0};python {0}/.molns/{1} &".format(base_path, EXEC_HELPER_FILENAME)) + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_job_status(self, ip_address, jobID): + ''' Check the status of a remote process. + + Returns: Tuple with two elements: (Is_Running, Message) + Is_Running: bool True if the process is running + Message: str Description of the status + ''' + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + sftp = self.ssh.open_sftp() + # Does the 'pid' file exists remotely? + try: + sftp.stat("{0}/.molns/pid".format(base_path)) + except (IOError, OSError) as e: + self.ssh.close() + raise SSHDeployException("Remote process not started (pid file not found") + # Does the 'return_value' file exist? + try: + sftp.stat("{0}/.molns/return_value".format(base_path)) + # Process is complete + return (False, "Remote process finished") + except (IOError, OSError) as e: + pass + # is the process running? + try: + self.ssh.exec_command("kill -0 `cat {0}/.molns/pid` > /dev/null 2&>1".format(base_path)) + return (True, "Remote process running") + except SSHDeployException as e: + raise SSHDeployException("Remote process not running (process not found)") + finally: + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_get_job_logs(self, ip_address, jobID, seek): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + sftp = self.ssh.open_sftp() + log = sftp.file("{0}/.molns/stdout".format(base_path), 'r') + log.seek(seek) + output = log.read() + self.ssh.close() + return output + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_delete_job(self, ip_address, jobID): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + ### If process is still running, terminate it + try: + self.ssh.exec_command("kill -TERM `cat {0}/.molns/pid` > /dev/null 2&>1".format(base_path)) + except Exception as e: + pass + ### Remove the filess on the remote server + self.ssh.exec_command("rm -rf {0}/* {0}/.molns*".format(base_path)) + self.ssh.exec_command("sudo rmdir {0}".format(base_path)) + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_fetch_file(self, ip_address, jobID, filename, localfilename): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + sftp = self.ssh.open_sftp() + sftp.get("{0}/{1}".format(base_path, filename), localfilename) + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% def deploy_stochss(self, ip_address, port=1443): try: print "{0}:{1}".format(ip_address, self.ssh_endpoint) @@ -320,7 +441,8 @@ def deploy_stochss(self, ip_address, port=1443): print "Configure Nginx" (ssl_key, ssl_cert) = self.create_ssl_cert('/home/ubuntu/.nginx_cert/', 'stochss', ip_address) sftp = self.ssh.open_sftp() - with open(os.path.dirname(os.path.abspath(__file__))+os.sep+'..'+os.sep+'templates'+os.sep+'nginx.conf') as fd: + with open(os.path.dirname( + os.path.abspath(__file__)) + os.sep + '..' + os.sep + 'templates' + os.sep + 'nginx.conf') as fd: web_file = sftp.file("/tmp/nginx.conf", 'w+') buff = fd.read() buff = string.replace(buff, '###LISTEN_PORT###', str(port)) @@ -329,16 +451,16 @@ def deploy_stochss(self, ip_address, port=1443): print buff web_file.write(buff) web_file.close() - self.exec_command("sudo chown root /tmp/nginx.conf") - self.exec_command("sudo mv /tmp/nginx.conf /etc/nginx/nginx.conf") + self.ssh.exec_command("sudo chown root /tmp/nginx.conf") + self.ssh.exec_command("sudo mv /tmp/nginx.conf /etc/nginx/nginx.conf") print "Starting Nginx" - self.exec_command("sudo nginx") + self.ssh.exec_command("sudo nginx") print "Modifying StochSS to not open a webbrowser (TODO: move to install)" - self.exec_command("sed -i 's/webbrowser.open_new(stochss_url)/pass/' /usr/local/stochss/run.ubuntu.sh") + self.ssh.exec_command("sed -i 's/webbrowser.open_new(stochss_url)/pass/' /usr/local/stochss/launchapp.py") print "Starting StochSS" - self.exec_command("cd /usr/local/stochss/ && screen -d -m ./run.ubuntu.sh") + self.ssh.exec_command("cd /usr/local/stochss/ && screen -d -m ./run.ubuntu.sh") print "Waiting for StochSS to become available:" stochss_url = "https://{0}/".format(ip_address) while True: @@ -346,15 +468,16 @@ def deploy_stochss(self, ip_address, port=1443): req = urllib2.urlopen(stochss_url) break except Exception as e: - #sys.stdout.write("{0}".format(e)) + # sys.stdout.write("{0}".format(e)) sys.stdout.write(".") sys.stdout.flush() time.sleep(1) print "Success!" print "Configuring StochSS" admin_token = uuid.uuid4() - create_and_exchange_admin_token = "python /usr/local/stochss/generate_admin_token.py {0}".format(admin_token) - self.exec_command(create_and_exchange_admin_token) + create_and_exchange_admin_token = "python /usr/local/stochss/generate_admin_token.py {0}".format( + admin_token) + self.ssh.exec_command(create_and_exchange_admin_token) time.sleep(1) stochss_url = "{0}login?secret_key={1}".format(stochss_url, admin_token) print "StochSS available: {0}".format(stochss_url) @@ -363,50 +486,124 @@ def deploy_stochss(self, ip_address, port=1443): print "StochSS launch failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - def deploy_ipython_controller(self, ip_address, notebook_password=None): - controller_hostname = '' - engine_file_data = '' + def __transfer_cluster_ssh_key_file(self, remote_target_dir, controller_obj): + local_ssh_key_file_path = controller_obj.config["ssh_key_file"] + + if local_ssh_key_file_path is None: + print "No SSH key file provided for cluster access." + return + + if not os.access(local_ssh_key_file_path, os.R_OK): + print "No read access to SSH key file. Skipping transfer." + return + + # Transfer secret key file. + sftp = self.ssh.open_sftp() + remote_file_abs_path = os.path.join(remote_target_dir, Constants.ClusterKeyFileNameOnController) + remote_ssh_key_file = sftp.file(remote_file_abs_path, 'w') + + with open(local_ssh_key_file_path, "r") as local_ssh_key_file: + remote_ssh_key_file.write(local_ssh_key_file.read()) + + remote_ssh_key_file.close() + sftp.close() + + # Only user ubuntu has permission to access file. + self.ssh.exec_command("sudo chown ubuntu:ubuntu {0}".format(remote_file_abs_path)) + self.ssh.exec_command("sudo chmod 400 {0}".format(remote_file_abs_path)) + + def deploy_ipython_controller(self, instance, controller_obj, notebook_password=None, reserved_cpus=2, + resume=False): + ip_address = instance.ip_address + + logging.debug('deploy_ipython_controller(ip_address={0}, reserved_cpus={1})'.format(ip_address, reserved_cpus)) try: print "{0}:{1}".format(ip_address, self.ssh_endpoint) - self.connect(ip_address, self.ssh_endpoint) - + self.connect(instance, self.ssh_endpoint) + # Set up the symlink to local scratch space - self.exec_command("sudo mkdir -p /mnt/molnsarea") - self.exec_command("sudo chown ubuntu /mnt/molnsarea") - self.exec_command("sudo mkdir -p /mnt/molnsarea/cache") - self.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea") + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea/cache") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") + + self.ssh.exec_command( + "test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) - self.exec_command("test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) - # Setup symlink to the shared scratch space - self.exec_command("sudo mkdir -p /mnt/molnsshared") - self.exec_command("sudo chown ubuntu /mnt/molnsshared") - self.exec_command("test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsshared {0}".format('/home/ubuntu/shared')) + self.ssh.exec_command("sudo mkdir -p /mnt/molnsshared") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsshared") + self.ssh.exec_command( + "test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsshared {0}".format('/home/ubuntu/shared')) # - self.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) - self.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) # - #self.exec_command("cd /usr/local/molnsutil && git pull && sudo python setup.py install") - self.exec_command("mkdir -p .molns") - self.create_s3_config() + # self.exec_command("cd /usr/local/molns_util && git pull && sudo python setup.py install") + + home_dir = "" + if controller_obj.provider.type == Constants.DockerProvider: + home_dir = "/home/ubuntu/" + + # If its not a DockerController being resumed, then create config files and move sample notebooks to volume. + if not (controller_obj.provider.type == Constants.DockerProvider and resume is True): + self.ssh.exec_command("mkdir -p {0}.molns".format(home_dir)) + self.create_s3_config() + self.ssh.exec_command("ipython profile create {0}".format(self.profile)) + self.create_ipython_config(ip_address, notebook_password) + self.create_engine_config() + self.__transfer_cluster_ssh_key_file(remote_target_dir=home_dir, controller_obj=controller_obj) + #if controller_obj.provider.type == Constants.DockerProvider: + # self.ssh.exec_command("mv {0}*.ipynb {1}".format(home_dir, + # DockerProxy.get_container_volume_from_working_dir( + # controller_obj.config["working_directory"]))) + + # If provider is Docker, then ipython controller and ipengines aren't started + + if controller_obj.provider.type != Constants.DockerProvider: + self.ssh.exec_command( + "source /usr/local/pyurdme/pyurdme_init; screen -d -m ipcontroller --profile={1} --ip='*' --location={0} " + "--port={2} --log-to-file".format( + ip_address, self.profile, self.ipython_port), '\n') + # Give the controller time to startup + import time + logging.debug('Waiting 5 seconds for the IPython controller to start.') + time.sleep(5) + + # Start one ipengine per processor + num_procs = self.get_number_processors() + num_engines = num_procs - reserved_cpus + logging.debug( + 'Starting {0} engines (#cpu={1}, reserved_cpus={2})'.format(num_engines, num_procs, reserved_cpus)) + + for _ in range(num_engines): + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/; screen -d -m ipengine --profile={0} --debug".format( + self.profile, self.ipengine_env)) + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipython notebook --profile={0}".format( + self.profile, self.ipengine_env)) + else: + #print "{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipython notebook --profile={0} --port={2} --ip='*'".format(self.profile, self.ipengine_env,Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT) + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipython notebook --profile={0} --port={2} --ip='*'".format(self.profile, self.ipengine_env,Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT) + ) + + + if controller_obj.provider.type != Constants.DockerProvider: + self.ssh.exec_command( + "sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format( + Constants.DEFAULT_PUBLIC_NOTEBOOK_PORT, Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT)) - self.exec_command("ipython profile create {0}".format(self.profile)) - self.create_ipython_config(ip_address, notebook_password) - self.create_engine_config() - self.exec_command("source /usr/local/pyurdme/pyurdme_init; screen -d -m ipcontroller --profile={1} --ip='*' --location={0} --port={2} --log-to-file".format(ip_address, self.profile, self.ipython_port), '\n') - # Start one ipengine per processor - num_procs = self.get_number_processors() - num_engines = num_procs - 2 - for _ in range(num_engines): - self.exec_command("{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipengine --profile={0} --debug".format(self.profile, self.ipengine_env)) - self.exec_command("{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipython notebook --profile={0}".format(self.profile, self.ipengine_env)) - self.exec_command("sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format(self.DEFAULT_PUBLIC_NOTEBOOK_PORT,self.DEFAULT_PRIVATE_NOTEBOOK_PORT)) - self.ssh.close() except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - url = "http://%s" %(ip_address) - print "\nThe URL for your MOLNs cluster is: %s." % url + + finally: + self.ssh.close() + + url = "https://%s" % (ip_address) + print "\nThe URL for your MOLNs head node is: %s." % url def get_ipython_engine_file(self, ip_address): try: @@ -430,30 +627,28 @@ def get_ipython_client_file(self, ip_address): print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - def deploy_ipython_engine(self, ip_address, controler_ip, engine_file_data, controller_ssh_keyfile): try: print "{0}:{1}".format(ip_address, self.ssh_endpoint) self.connect(ip_address, self.ssh_endpoint) - - # Setup the symlink to local scratch space - self.exec_command("sudo mkdir -p /mnt/molnsarea") - self.exec_command("sudo chown ubuntu /mnt/molnsarea") - self.exec_command("sudo mkdir -p /mnt/molnsarea/cache") - self.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") + # Setup the symlink to local scratch space + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea") + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea/cache") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") - self.exec_command("test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) + self.ssh.exec_command( + "test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) # - self.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) - self.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) # Setup config for object store - self.exec_command("mkdir -p .molns") + self.ssh.exec_command("mkdir -p .molns") self.create_s3_config() - - + # SSH mount the controller on each engine - remote_file_name='.ssh/id_dsa' + remote_file_name='/home/ubuntu/.ssh/controller_ssh_key' with open(controller_ssh_keyfile) as fd: sftp = self.ssh.open_sftp() controller_keyfile = sftp.file(remote_file_name, 'w') @@ -464,27 +659,25 @@ def deploy_ipython_engine(self, ip_address, controler_ip, engine_file_data, cont print "Remote file {0} has {1} bytes".format(remote_file_name, sftp.stat(remote_file_name).st_size) sftp.close() self.exec_command("chmod 0600 {0}".format(remote_file_name)) + self.exec_command("sudo rm -rf {0}".format('/home/ubuntu/shared')) self.exec_command("mkdir -p /home/ubuntu/shared") - self.exec_command("sshfs -o Ciphers=arcfour -o Compression=no -o reconnect -o idmap=user -o StrictHostKeyChecking=no ubuntu@{0}:/mnt/molnsshared /home/ubuntu/shared".format(controler_ip)) + self.exec_command("sshfs -o IdentityFile={1} -o Ciphers=arcfour -o Compression=no -o reconnect -o idmap=user -o StrictHostKeyChecking=no ubuntu@{0}:/mnt/molnsshared /home/ubuntu/shared".format(controler_ip,remote_file_name)) - # Update the Molnsutil package: TODO remove when molnsutil is stable - #self.exec_command("cd /usr/local/molnsutil && git pull && sudo python setup.py install") + # Update the Molnsutil package: TODO remove when molns_util is stable + # self.exec_command("cd /usr/local/molns_util && git pull && sudo python setup.py install") - self.exec_command("ipython profile create {0}".format(self.profile)) + self.ssh.exec_command("ipython profile create {0}".format(self.profile)) self.create_engine_config() # Just write the engine_file to the engine self._put_ipython_engine_file(engine_file_data) # Start one ipengine per processor for _ in range(self.get_number_processors()): - self.exec_command("{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipengine --profile={0} --debug".format(self.profile, self.ipengine_env)) + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipengine --profile={0} --debug".format( + self.profile, self.ipengine_env)) self.ssh.close() except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - - -if __name__ == "__main__": - sshdeploy = SSHDeploy() - sshdeploy.deploy_ipython_controller() diff --git a/README.md b/README.md index 0cfee6c..55a1cab 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ MOLNs is a cloud appliance that will set up, start and manage a virtual platform for scalable, distributed computational experiments using (spatial) stochastic simulation software such as PyURDME (www.pyurdme.org) and StochKit/Gillespy (www.github.com/Gillespy/gillespy). In addition, MOLNs by default makes FEniCS/Dolfin available as-a Service. -Since MOLNs will configure and manage a virtual IPython Cluster (with a Notebook frontend), with Numpy, SciPy and Ipython Parallel enabled, it can also be useful for general contextualization and management of dynamic, cloud-agnostic (supports EC2 and OpenStack-based clouds) virtual IPython environments, even if you are not into spatial stochstic simulations in systems biology. +Since MOLNs will configure and manage a virtual IPython Cluster (with a Notebook frontend), with Numpy, SciPy and Ipython Parallel enabled, it can also be useful for general contextualization and management of dynamic, cloud-agnostic (supports EC2 and OpenStack-based clouds) virtual IPython environments, even if you are not into spatial stochastic simulations in systems biology. -Note: MOLNs is currenly compatible only with 'EC2-Classic', we are working on supporting Amazon VPC. +Note: MOLNs is currently compatible only with 'EC2-Classic', we are working on supporting Amazon VPC. ### Prerequisites ### To use MOLNs, you need valid credentials to an OpenStack cloud, Amazon Elastic Compute Cloud (EC2) or HP Helion public cloud. You also need Python, and the following packages: diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..ff7cc60 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +from molns import * diff --git a/molns.py b/molns.py index d7f9acd..766d1d4 100755 --- a/molns.py +++ b/molns.py @@ -1,35 +1,43 @@ #!/usr/bin/env python import os -import re import sys + +from MolnsLib.Utils import Log from MolnsLib.molns_datastore import Datastore, DatastoreException, VALID_PROVIDER_TYPES, get_provider_handle from MolnsLib.molns_provider import ProviderException -from collections import OrderedDict import subprocess from MolnsLib.ssh_deploy import SSHDeploy import multiprocessing import json - import logging + +from MolnsLib import constants + logger = logging.getLogger() -#logger.setLevel(logging.INFO) #for Debugging +# logger.setLevel(logging.INFO) #for Debugging logger.setLevel(logging.CRITICAL) + + +from collections import OrderedDict + ############################################### class MOLNSException(Exception): pass + ############################################### class MOLNSConfig(Datastore): def __init__(self, config_dir=None, db_file=None): - Datastore.__init__(self,config_dir=config_dir, db_file=db_file) - + Datastore.__init__(self, config_dir=config_dir, db_file=db_file) + def __str__(self): return "MOLNSConfig(config_dir={0})".format(self.config_dir) + ############################################### class MOLNSbase(): @classmethod - def merge_config(self, obj, config): + def merge_config(cls, obj, config): for key, conf, value in obj.get_config_vars(): if key not in config: if value is not None: @@ -61,9 +69,10 @@ def _get_workerobj(cls, args, config): worker_obj = config.get_object(name=worker_name, kind='WorkerGroup') except DatastoreException: worker_obj = None - #logging.debug("controller_obj {0}".format(controller_obj)) + # logging.debug("controller_obj {0}".format(controller_obj)) if worker_obj is None: - print "worker group '{0}' is not initialized, use 'molns worker setup {0}' to initialize the controller.".format(worker_name) + print "worker group '{0}' is not initialized, use 'molns worker setup {0}' to initialize the controller.".format( + worker_name) else: print "No worker name specified, please specify a name" return worker_obj @@ -80,18 +89,21 @@ def _get_controllerobj(cls, args, config): controller_obj = config.get_object(name=controller_name, kind='Controller') except DatastoreException: controller_obj = None - #logging.debug("controller_obj {0}".format(controller_obj)) + # logging.debug("controller_obj {0}".format(controller_obj)) if controller_obj is None: - raise MOLNSException("controller '{0}' is not initialized, use 'molns controller setup {0}' to initialize the controller.".format(controller_name)) + raise MOLNSException( + "controller '{0}' is not initialized, use 'molns controller setup {0}' to initialize the controller.".format( + controller_name)) return controller_obj + class MOLNSController(MOLNSbase): @classmethod def controller_export(cls, args, config): """ Export the configuration of a controller. """ if len(args) < 1: - raise MOLNSException("USAGE: molns controller export name [Filename]\n"\ - "\tExport the data from the controller with the given name.") + raise MOLNSException("USAGE: molns controller export name [Filename]\n" \ + "\tExport the data from the controller with the given name.") controller_name = args[0] if len(args) > 1: filename = args[1] @@ -114,8 +126,8 @@ def controller_import(cls, args, config, json_data=None): """ Import the configuration of a controller. """ if json_data is None: if len(args) < 1: - raise MOLNSException("USAGE: molns controller import [Filename.json]\n"\ - "\Import the data from the controller with the given name.") + raise MOLNSException("USAGE: molns controller import [Filename.json]\n" \ + "\Import the data from the controller with the given name.") filename = args[0] with open(filename) as fd: data = json.load(fd) @@ -131,25 +143,28 @@ def controller_import(cls, args, config, json_data=None): controller_obj = config.get_object(controller_name, kind='Controller') msg += "Found existing controller\n" if controller_obj.provider.name != provider_obj.name: - raise MOLNSException("Import data has provider '{0}'. Controller {1} exists with provider {2}. provider conversion is not possible.".format(data['provider_name'], controller_obj.name, controller_obj.provider.name)) + raise MOLNSException( + "Import data has provider '{0}'. Controller {1} exists with provider {2}. provider conversion is not possible.".format( + data['provider_name'], controller_obj.name, controller_obj.provider.name)) except DatastoreException as e: - controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', provider_id=provider_obj.id) + controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', + provider_id=provider_obj.id) msg += "Creating new controller\n" cls.merge_config(controller_obj, data['config']) config.save_object(controller_obj, kind='Controller') msg += "Controller data imported\n" - return {'msg':msg} + return {'msg': msg} @classmethod def controller_get_config(cls, name=None, provider_type=None, config=None): """ Return a list of dict of config var for the controller config. Each dict in the list has the keys: 'key', 'value', 'type' - + Either 'name' or 'provider_type' must be specified. If 'name' is specified, then it will retreive the value from that config and return it in 'value' (or return the string '********' if that config is obfuscated, such passwords). - + """ if config is None: raise MOLNSException("no config specified") @@ -164,8 +179,8 @@ def controller_get_config(cls, name=None, provider_type=None, config=None): if obj is None and provider_type is not None: if provider_type not in VALID_PROVIDER_TYPES: raise MOLNSException("Unknown provider type '{0}'".format(provider_type)) - p_hand = get_provider_handle('Controller',provider_type) - obj = p_hand('__tmp__',data={},config_dir=config.config_dir) + p_hand = get_provider_handle('Controller', provider_type) + obj = p_hand('__tmp__', data={}, config_dir=config.config_dir) if obj is None: raise MOLNSException("Controller {0} not found".format(name)) @@ -191,16 +206,17 @@ def controller_get_config(cls, name=None, provider_type=None, config=None): if myval is not None and 'obfuscate' in conf and conf['obfuscate']: myval = '********' ret.append({ - 'question':question, - 'key':key, + 'question': question, + 'key': key, 'value': myval, - 'type':'string' + 'type': 'string' }) return ret @classmethod def setup_controller(cls, args, config): - """Setup a controller. Set the provider configuration for the head node. Use 'worker setup' to set the configuration for worker nodes + """Setup a controller. Set the provider configuration for the head node. + Use 'worker setup' to set the configuration for worker nodes. """ logging.debug("MOLNSController.setup_controller(config={0})".format(config)) # name @@ -214,19 +230,21 @@ def setup_controller(cls, args, config): except DatastoreException as e: # provider providers = config.list_objects(kind='Provider') - if len(providers)==0: - print "No providers configured, please configure one ('molns provider setup') before initializing controller." + if len(providers) == 0: + print "No providers configured, " \ + "please configure one ('molns provider setup') before initializing controller." return print "Select a provider:" - for n,p in enumerate(providers): - print "\t[{0}] {1}".format(n,p.name) - provider_ndx = int(raw_input_default("enter the number of provider:", default='0')) + for n, p in enumerate(providers): + print "\t[{0}] {1}".format(n, p.name) + provider_ndx = int(raw_input_default("Enter the number of provider:", default='0')) provider_id = providers[provider_ndx].id provider_obj = config.get_object(name=providers[provider_ndx].name, kind='Provider') logging.debug("using provider {0}".format(provider_obj)) # create object try: - controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', provider_id=provider_id) + controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', + provider_id=provider_id) except DatastoreException as e: print e return @@ -238,7 +256,7 @@ def list_controller(cls, args, config): """ List all the currently configured controllers.""" controllers = config.list_objects(kind='Controller') if len(controllers) == 0: - return {'msg':"No controllers configured"} + return {'msg': "No controllers configured"} else: table_data = [] for c in controllers: @@ -248,19 +266,19 @@ def list_controller(cls, args, config): except DatastoreException as e: provider_name = 'ERROR: {0}'.format(e) table_data.append([c.name, provider_name]) - return {'type':'table','column_names':['name', 'provider'], 'data':table_data} - + return {'type': 'table', 'column_names': ['name', 'provider'], 'data': table_data} + @classmethod def show_controller(cls, args, config): """ Show all the details of a controller config. """ if len(args) == 0: raise MOLNSException("USAGE: molns controller show name") - return {'msg':str(config.get_object(name=args[0], kind='Controller'))} + return {'msg': str(config.get_object(name=args[0], kind='Controller'))} @classmethod def delete_controller(cls, args, config): """ Delete a controller config. """ - #print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) if len(args) == 0: raise MOLNSException("USAGE: molns cluser delete name") config.delete_object(name=args[0], kind='Controller') @@ -273,7 +291,7 @@ def ssh_controller(cls, args, config): if controller_obj is None: return # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) - #logging.debug("instance_list={0}".format(instance_list)) + # logging.debug("instance_list={0}".format(instance_list)) # Check if they are running ip = None if len(instance_list) > 0: @@ -283,11 +301,11 @@ def ssh_controller(cls, args, config): if status == controller_obj.STATUS_RUNNING: ip = i.ip_address if ip is None: - print "No active instance for this controller" - return + raise MOLNSException("No active instance for this controller") #print " ".join(['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)]) #os.execl('/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)) - cmd = ['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)] + cmd = ['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i', + controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)] print " ".join(cmd) subprocess.call(cmd) print "SSH process completed" @@ -297,10 +315,54 @@ def upload_controller(cls, args, config): """ Copy a local file to the controller's home directory. """ logging.debug("MOLNSController.upload_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return + # Check if any instances are assigned to this controller + instance_list = config.get_controller_instances(controller_id=controller_obj.id) + + # Check if they are running + inst = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + logging.debug("instance={0} has status={1}".format(i, status)) + if status == controller_obj.STATUS_RUNNING: + inst = i + if inst is None: + raise MOLNSException("No active instance for this controller") + + file_to_transfer = args[1] + logging.debug("File to transfer: {0}".format(file_to_transfer)) + + remote_file_path = os.path.join("/home/ubuntu/", os.path.basename(file_to_transfer)) + + controller_obj.ssh.connect(inst, SSHDeploy.DEFAULT_SSH_PORT, "ubuntu", controller_obj.provider.sshkeyfilename()) + + sftp = controller_obj.ssh.open_sftp() + remote_fh = sftp.file(remote_file_path, "w") + try: + with open(file_to_transfer, "r") as fh: + remote_fh.write(fh.read()) + finally: + remote_fh.close() + sftp.close() + + print "Transferred {0} to {1}@{2}:{3}".format(file_to_transfer, inst.ip_address, "ubuntu", remote_file_path) + + @classmethod + def get_controller(cls, args, config): + """ Copy a controller's file to the local filesystem. """ + logging.debug("MOLNSController.get_controller(args={0})".format(args)) + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + return + + if controller_obj.provider.type == constants.Constants.DockerProvider: + raise NotImplementedError("DockerController does not support this feature yet.") + # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) - #logging.debug("instance_list={0}".format(instance_list)) + # Check if they are running ip = None if len(instance_list) > 0: @@ -312,39 +374,70 @@ def upload_controller(cls, args, config): if ip is None: print "No active instance for this controller" return - #print " ".join(['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)]) - #os.execl('/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)) - cmd = ['/usr/bin/scp','-r','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(), args[1], 'ubuntu@{0}:/home/ubuntu/'.format(ip)] + cmd = ['/usr/bin/scp','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i', + controller_obj.provider.sshkeyfilename(), 'ubuntu@{0}:{1}'.format(ip, args[1]), '.'] print " ".join(cmd) subprocess.call(cmd) - print "SCP process completed" + print "SSH process completed" @classmethod def put_controller(cls, args, config): - """ Copy a local file to the controller's shared area. """ + """ Copy a local file to the controller's and workers' shared area. """ logging.debug("MOLNSController.put_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return + # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) - #logging.debug("instance_list={0}".format(instance_list)) + # Check if they are running - ip = None + inst = None if len(instance_list) > 0: for i in instance_list: status = controller_obj.get_instance_status(i) logging.debug("instance={0} has status={1}".format(i, status)) if status == controller_obj.STATUS_RUNNING: - ip = i.ip_address - if ip is None: - print "No active instance for this controller" - return - #print " ".join(['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)]) - #os.execl('/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)) - cmd = ['/usr/bin/scp','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(), args[1], 'ubuntu@{0}:/home/ubuntu/shared'.format(ip)] - print " ".join(cmd) - subprocess.call(cmd) - print "SSH process completed" + inst = i + if inst is None: + raise MOLNSException("No active instance for this controller") + + file_to_transfer = args[1] + logging.debug("File to transfer: {0}".format(file_to_transfer)) + + remote_file_path = os.path.join("/home/ubuntu/shared", os.path.basename(file_to_transfer)) + + controller_obj.ssh.connect(inst, SSHDeploy.DEFAULT_SSH_PORT, "ubuntu", controller_obj.provider.sshkeyfilename()) + + sftp = controller_obj.ssh.open_sftp() + remote_fh = sftp.file(remote_file_path, "w") + try: + with open(file_to_transfer, "r") as fh: + remote_fh.write(fh.read()) + finally: + remote_fh.close() + sftp.close() + + print "Transferred {0} to {1}@{2}:{3}".format(file_to_transfer, inst.ip_address, "ubuntu", remote_file_path) + + @classmethod + def is_controller_running(cls, args, config): + logging.debug("MOLNSController.is_controller_running(args={0})".format(args)) + if len(args) > 0: + try: + controller_obj = cls._get_controllerobj(args, config) + except MOLNSException: + return {} + if controller_obj is None: return False + # Check if any instances are assigned to this controller + instance_list = config.get_controller_instances(controller_id=controller_obj.id) + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + if status == controller_obj.get_instance_status.STATUS_RUNNING: + return True + + return False @classmethod @@ -352,14 +445,17 @@ def status_controller(cls, args, config): """ Get status of the head node of a MOLNs controller. """ logging.debug("MOLNSController.status_controller(args={0})".format(args)) if len(args) > 0: - controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + try: + controller_obj = cls._get_controllerobj(args, config) + except MOLNSException: + return {} + if controller_obj is None: return {} # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) table_data = [] if len(instance_list) > 0: for i in instance_list: - #provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + # provider_name = config.get_object_by_id(i.provider_id, 'Provider').name try: p = config.get_object_by_id(i.provider_id, 'Provider') provider_name = p.name @@ -367,7 +463,9 @@ def status_controller(cls, args, config): provider_name = 'ERROR: {0}'.format(e) controller_name = config.get_object_by_id(i.controller_id, 'Controller').name status = controller_obj.get_instance_status(i) - table_data.append([controller_name, status, 'controller', provider_name, i.provider_instance_identifier, i.ip_address]) + table_data.append( + [controller_name, status, 'controller', provider_name, i.provider_instance_identifier, + i.ip_address]) else: return {'msg': "No instance running for this controller"} @@ -377,43 +475,50 @@ def status_controller(cls, args, config): for i in instance_list: worker_name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name worker_obj = cls._get_workerobj([worker_name], config) - #provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + # provider_name = config.get_object_by_id(i.provider_id, 'Provider').name try: p = config.get_object_by_id(i.provider_id, 'Provider') provider_name = p.name except DatastoreException as e: provider_name = 'ERROR: {0}'.format(e) status = worker_obj.get_instance_status(i) - table_data.append([worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) - #table_print(['name','status','type','provider','instance id', 'IP address'],table_data) - r = {'type':'table', 'column_names':['name','status','type','provider','instance id', 'IP address'], 'data':table_data} + table_data.append( + [worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) + # table_print(['name','status','type','provider','instance id', 'IP address'],table_data) + r = {'type': 'table', 'column_names': ['name', 'status', 'type', 'provider', 'instance id', 'IP address'], + 'data': table_data} return r else: instance_list = config.get_all_instances() if len(instance_list) > 0: table_data = [] for i in instance_list: - provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + provider_obj = config.get_object_by_id(i.provider_id, 'Provider') + if provider_obj is None: + continue + provider_name = provider_obj.name controller_name = config.get_object_by_id(i.controller_id, 'Controller').name if i.worker_group_id is not None: worker_name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name table_data.append([worker_name, 'worker', provider_name, i.provider_instance_identifier]) else: - table_data.append([controller_name, 'controller', provider_name, i.provider_instance_identifier]) + table_data.append( + [controller_name, 'controller', provider_name, i.provider_instance_identifier]) - r = {'type':'table', 'column_names':['name','type','provider','instance id'], 'data':table_data} - r['msg']= "\n\tUse 'molns status NAME' to see current status of each instance." + r = {'type': 'table', 'column_names': ['name', 'type', 'provider', 'instance id'], 'data': table_data} + r['msg'] = "\n\tUse 'molns status NAME' to see current status of each instance." return r else: return {'msg': "No instance found"} - @classmethod - def start_controller(cls, args, config, password=None): + def start_controller(cls, args, config, password=None, openWebBrowser=True, reserved_cpus=2): """ Start the MOLNs controller. """ + resume = False logging.debug("MOLNSController.start_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return # Check if any instances are assigned to this controller instance_list = config.get_all_instances(controller_id=controller_obj.id) # Check if they are running or stopped (if so, resume them) @@ -428,16 +533,19 @@ def start_controller(cls, args, config, password=None): print "Resuming instance at {0}".format(i.ip_address) controller_obj.resume_instance(i) inst = i + resume=True break if inst is None: # Start a new instance print "Starting new controller" inst = controller_obj.start_instance() + # deploying - sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) - sshdeploy.deploy_ipython_controller(inst.ip_address, notebook_password=password) - sshdeploy.deploy_molns_webserver(inst.ip_address) - #sshdeploy.deploy_stochss(inst.ip_address, port=443) + sshdeploy = SSHDeploy(controller_obj.ssh, config=controller_obj.provider, config_dir=config.config_dir) + sshdeploy.deploy_ipython_controller(inst, controller_obj, notebook_password=password, resume=resume, + reserved_cpus=reserved_cpus) + sshdeploy.deploy_molns_webserver(inst, controller_obj, openWebBrowser=openWebBrowser) + # sshdeploy.deploy_stochss(inst.ip_address, port=443) @classmethod def stop_controller(cls, args, config): @@ -462,20 +570,20 @@ def stop_controller(cls, args, config): if status == worker_obj.STATUS_RUNNING or status == worker_obj.STATUS_STOPPED: print "Terminating worker '{1}' running at {0}".format(i.ip_address, worker_name) worker_obj.terminate_instance(i) - else: print "No instance running for this controller" - @classmethod def terminate_controller(cls, args, config): """ Terminate the head node of a MOLNs controller. """ logging.debug("MOLNSController.terminate_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return instance_list = config.get_all_instances(controller_id=controller_obj.id) logging.debug("\tinstance_list={0}".format([str(i) for i in instance_list])) - # Check if they are running or stopped + print("\tinstance_list={0}".format([str(i) for i in instance_list])) + # Check if they are running or stopped if len(instance_list) > 0: for i in instance_list: if i.worker_group_id is None: @@ -490,8 +598,6 @@ def terminate_controller(cls, args, config): if status == worker_obj.STATUS_RUNNING or status == worker_obj.STATUS_STOPPED: print "Terminating worker '{1}' running at {0}".format(i.ip_address, worker_name) worker_obj.terminate_instance(i) - - else: print "No instance running for this controller" @@ -525,7 +631,8 @@ def connect_controller_to_local(cls, args, config): sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) client_file_data = sshdeploy.get_ipython_client_file(inst.ip_address) home_dir = os.environ.get('HOME') - ipython_client_filename = os.path.join(home_dir, '.ipython/profile_{0}/'.format(profile_name), 'security/ipcontroller-client.json') + ipython_client_filename = os.path.join(home_dir, '.ipython/profile_{0}/'.format(profile_name), + 'security/ipcontroller-client.json') logging.debug("Writing file {0}".format(ipython_client_filename)) with open(ipython_client_filename, 'w') as fd: fd.write(client_file_data) @@ -539,8 +646,8 @@ class MOLNSWorkerGroup(MOLNSbase): def worker_group_export(cls, args, config): """ Export the configuration of a worker group. """ if len(args) < 1: - raise MOLNSException("USAGE: molns worker export name [Filename]\n"\ - "\tExport the data from the worker group with the given name.") + raise MOLNSException("USAGE: molns worker export name [Filename]\n" \ + "\tExport the data from the worker group with the given name.") worker_name = args[0] if len(args) > 1: filename = args[1] @@ -564,8 +671,8 @@ def worker_group_import(cls, args, config, json_data=None): """ Import the configuration of a worker group. """ if json_data is None: if len(args) < 1: - raise MOLNSException("USAGE: molns worker import [Filename.json]\n"\ - "\Import the data from the worker with the given name.") + raise MOLNSException("USAGE: molns worker import [Filename.json]\n" \ + "\Import the data from the worker with the given name.") filename = args[0] with open(filename) as fd: data = json.load(fd) @@ -585,27 +692,32 @@ def worker_group_import(cls, args, config, json_data=None): worker_obj = config.get_object(worker_name, kind='WorkerGroup') msg += "Found existing worker group\n" if worker_obj.provider.name != provider_obj.name: - raise MOLNSException("Import data has provider '{0}'. Worker group {1} exists with provider {2}. provider conversion is not possible.".format(data['provider_name'], worker_obj.name, worker_obj.provider.name)) + raise MOLNSException( + "Import data has provider '{0}'. Worker group {1} exists with provider {2}. provider conversion is not possible.".format( + data['provider_name'], worker_obj.name, worker_obj.provider.name)) if worker_obj.controller.name != controller_obj.name: - raise MOLNSException("Import data has controller '{0}'. Worker group {1} exists with controller {2}. provider conversion is not possible.".format(data['controller_name'], worker_obj.name, worker_obj.controller.name)) + raise MOLNSException( + "Import data has controller '{0}'. Worker group {1} exists with controller {2}. provider conversion is not possible.".format( + data['controller_name'], worker_obj.name, worker_obj.controller.name)) except DatastoreException as e: - worker_obj = config.create_object(ptype=provider_obj.type, name=worker_name, kind='WorkerGroup', provider_id=provider_obj.id, controller_id=controller_obj.id) + worker_obj = config.create_object(ptype=provider_obj.type, name=worker_name, kind='WorkerGroup', + provider_id=provider_obj.id, controller_id=controller_obj.id) msg += "Creating new worker group\n" cls.merge_config(worker_obj, data['config']) config.save_object(worker_obj, kind='WorkerGroup') msg += "Worker group data imported\n" - return {'msg':msg} + return {'msg': msg} @classmethod def worker_group_get_config(cls, name=None, provider_type=None, config=None): """ Return a list of dict of config var for the worker group config. Each dict in the list has the keys: 'key', 'value', 'type' - + Either 'name' or 'provider_type' must be specified. If 'name' is specified, then it will retreive the value from that config and return it in 'value' (or return the string '********' if that config is obfuscated, such passwords). - + """ if config is None: raise MOLNSException("no config specified") @@ -620,8 +732,8 @@ def worker_group_get_config(cls, name=None, provider_type=None, config=None): if obj is None and provider_type is not None: if provider_type not in VALID_PROVIDER_TYPES: raise MOLNSException("Unknown provider type '{0}'".format(provider_type)) - p_hand = get_provider_handle('WorkerGroup',provider_type) - obj = p_hand('__tmp__',data={},config_dir=config.config_dir) + p_hand = get_provider_handle('WorkerGroup', provider_type) + obj = p_hand('__tmp__', data={}, config_dir=config.config_dir) if obj is None: raise MOLNSException("Worker group {0} not found".format(name)) ret = [] @@ -646,13 +758,13 @@ def worker_group_get_config(cls, name=None, provider_type=None, config=None): if myval is not None and 'obfuscate' in conf and conf['obfuscate']: myval = '********' ret.append({ - 'question':question, - 'key':key, + 'question': question, + 'key': key, 'value': myval, - 'type':'string' + 'type': 'string' }) return ret - + @classmethod def setup_worker_groups(cls, args, config): """ Configure a worker group. """ @@ -667,31 +779,33 @@ def setup_worker_groups(cls, args, config): except DatastoreException as e: # provider providers = config.list_objects(kind='Provider') - if len(providers)==0: - print "No providers configured, please configure one ('molns provider setup') before initializing worker group." + if len(providers) == 0: + print "No providers configured, " \ + "please configure one ('molns provider setup') before initializing worker group." return print "Select a provider:" - for n,p in enumerate(providers): - print "\t[{0}] {1}".format(n,p.name) - provider_ndx = int(raw_input_default("enter the number of provider:", default='0')) + for n, p in enumerate(providers): + print "\t[{0}] {1}".format(n, p.name) + provider_ndx = int(raw_input_default("Enter the number of provider:", default='0')) provider_id = providers[provider_ndx].id provider_obj = config.get_object(name=providers[provider_ndx].name, kind='Provider') logging.debug("using provider {0}".format(provider_obj)) # controller controllers = config.list_objects(kind='Controller') - if len(controllers)==0: - print "No controllers configured, please configure one ('molns controller setup') before initializing worker group." + if len(controllers) == 0: + print "No controllers configured, " \ + "please configure one ('molns controller setup') before initializing worker group." return print "Select a controller:" - for n,p in enumerate(controllers): - print "\t[{0}] {1}".format(n,p.name) - controller_ndx = int(raw_input_default("enter the number of controller:", default='0')) - controller_id = controllers[controller_ndx].id + for n, p in enumerate(controllers): + print "\t[{0}] {1}".format(n, p.name) + controller_ndx = int(raw_input_default("Enter the number of controller:", default='0')) controller_obj = config.get_object(name=controllers[controller_ndx].name, kind='Controller') logging.debug("using controller {0}".format(controller_obj)) # create object try: - worker_obj = config.create_object(ptype=provider_obj.type, name=group_name, kind='WorkerGroup', provider_id=provider_id, controller_id=controller_obj.id) + worker_obj = config.create_object(ptype=provider_obj.type, name=group_name, kind='WorkerGroup', + provider_id=provider_id, controller_id=controller_obj.id) except DatastoreException as e: print e return @@ -707,7 +821,7 @@ def list_worker_groups(cls, args, config): else: table_data = [] for g in groups: - #provider_name = config.get_object_by_id(g.provider_id, 'Provider').name + # provider_name = config.get_object_by_id(g.provider_id, 'Provider').name try: p = config.get_object_by_id(g.provider_id, 'Provider') provider_name = p.name @@ -719,7 +833,7 @@ def list_worker_groups(cls, args, config): except DatastoreException as e: controller_name = 'ERROR: {0}'.format(e) table_data.append([g.name, provider_name, controller_name]) - return {'type':'table','column_names':['name', 'provider', 'controller'], 'data':table_data} + return {'type': 'table', 'column_names': ['name', 'provider', 'controller'], 'data': table_data} @classmethod def show_worker_groups(cls, args, config): @@ -746,17 +860,20 @@ def status_worker_groups(cls, args, config): if worker_obj is None: return # Check if any instances are assigned to this worker instance_list = config.get_all_instances(worker_group_id=worker_obj.id) - # Check if they are running or stopped + # Check if they are running or stopped if len(instance_list) > 0: table_data = [] for i in instance_list: status = worker_obj.get_instance_status(i) - #print "{0} type={3} ip={1} id={2}".format(status, i.ip_address, i.provider_instance_identifier, worker_obj.PROVIDER_TYPE) + # print "{0} type={3} ip={1} id={2}".format(status, i.ip_address, i.provider_instance_identifier, worker_obj.PROVIDER_TYPE) worker_name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name provider_name = config.get_object_by_id(i.provider_id, 'Provider').name status = worker_obj.get_instance_status(i) - table_data.append([worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) - return {'type':'table','column_names':['name','status','type','provider','instance id', 'IP address'],'data':table_data} + table_data.append( + [worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) + return {'type': 'table', + 'column_names': ['name', 'status', 'type', 'provider', 'instance id', 'IP address'], + 'data': table_data} else: return {'msg': "No worker instances running for this cluster"} else: @@ -772,15 +889,14 @@ def start_worker_groups(cls, args, config): num_vms_to_start = int(num_vms) controller_ip = cls.__launch_workers__get_controller(worker_obj, config) if controller_ip is None: return - #logging.debug("\tcontroller_ip={0}".format(controller_ip)) + # logging.debug("\tcontroller_ip={0}".format(controller_ip)) try: inst_to_deploy = cls.__launch_worker__start_or_resume_vms(worker_obj, config, num_vms_to_start) - #logging.debug("\tinst_to_deploy={0}".format(inst_to_deploy)) + # logging.debug("\tinst_to_deploy={0}".format(inst_to_deploy)) cls.__launch_worker__deploy_engines(worker_obj, controller_ip, inst_to_deploy, config) except ProviderException as e: print "Could not start workers: {0}".format(e) - @classmethod def add_worker_groups(cls, args, config): """ Add workers of a MOLNs cluster. """ @@ -822,7 +938,6 @@ def __launch_workers__get_controller(cls, worker_obj, config): print "No controller running for this worker group." return return controller_ip - @classmethod def __launch_worker__start_or_resume_vms(cls, worker_obj, config, num_vms_to_start=0): @@ -841,12 +956,12 @@ def __launch_worker__start_or_resume_vms(cls, worker_obj, config, num_vms_to_sta print "Resuming worker at {0}".format(i.ip_address) inst_to_resume.append(i) num_vms_to_start -= 1 - #logging.debug("inst_to_resume={0}".format(inst_to_resume)) + # logging.debug("inst_to_resume={0}".format(inst_to_resume)) if len(inst_to_resume) > 0: worker_obj.resume_instance(inst_to_resume) inst_to_deploy.extend(inst_to_resume) inst_to_deploy.extend(cls.__launch_worker__start_vms(worker_obj, num_vms_to_start)) - #logging.debug("inst_to_deploy={0}".format(inst_to_deploy)) + # logging.debug("inst_to_deploy={0}".format(inst_to_deploy)) return inst_to_deploy @classmethod @@ -856,12 +971,11 @@ def __launch_worker__start_vms(cls, worker_obj, num_vms_to_start=0): if num_vms_to_start > 0: # Start a new instances print "Starting {0} new workers".format(num_vms_to_start) - inst_to_deploy = worker_obj.start_instance(num=num_vms_to_start) - if not isinstance(inst_to_deploy,list): + inst_to_deploy = worker_obj.start_instance(num=num_vms_to_start) + if not isinstance(inst_to_deploy, list): inst_to_deploy = [inst_to_deploy] return inst_to_deploy - @classmethod def __launch_worker__deploy_engines(cls, worker_obj, controller_ip, inst_to_deploy, config): print "Deploying on {0} workers".format(len(inst_to_deploy)) @@ -875,8 +989,11 @@ def __launch_worker__deploy_engines(cls, worker_obj, controller_ip, inst_to_depl logging.debug("__launch_worker__deploy_engines() workpool(size={0})".format(len(inst_to_deploy))) jobs = [] for i in inst_to_deploy: - logging.debug("multiprocessing.Process(target=engine_ssh.deploy_ipython_engine({0}, engine_file)".format(i.ip_address)) - p = multiprocessing.Process(target=engine_ssh.deploy_ipython_engine, args=(i.ip_address, controller_ip, engine_file, controller_ssh_keyfile,)) + logging.debug( + "multiprocessing.Process(target=engine_ssh.deploy_ipython_engine({0}, engine_file)".format( + i.ip_address)) + p = multiprocessing.Process(target=engine_ssh.deploy_ipython_engine, args=( + i.ip_address, controller_ip, engine_file, controller_ssh_keyfile,)) jobs.append(p) p.start() logging.debug("__launch_worker__deploy_engines() joining processes.") @@ -933,6 +1050,7 @@ def terminate_worker_groups(cls, args, config): else: print "No workers running in the worker group" + ############################################### class MOLNSProvider(MOLNSbase): @@ -940,8 +1058,8 @@ class MOLNSProvider(MOLNSbase): def provider_export(cls, args, config): """ Export the configuration of a provider. """ if len(args) < 1: - raise MOLNSException("USAGE: molns provider export name [Filename]\n"\ - "\tExport the data from the provider with the given name.") + raise MOLNSException("USAGE: molns provider export name [Filename]\n" \ + "\tExport the data from the provider with the given name.") provider_name = args[0] if len(args) > 1: filename = args[1] @@ -964,8 +1082,8 @@ def provider_import(cls, args, config, json_data=None): """ Import the configuration of a provider. """ if json_data is None: if len(args) < 1: - raise MOLNSException("USAGE: molns provider import [Filename.json]\n"\ - "\Import the data from the provider with the given name.") + raise MOLNSException("USAGE: molns provider import [Filename.json]\n" \ + "\Import the data from the provider with the given name.") filename = args[0] with open(filename) as fd: data = json.load(fd) @@ -979,26 +1097,27 @@ def provider_import(cls, args, config, json_data=None): provider_obj = config.get_object(provider_name, kind='Provider') msg += "Found existing provider\n" if provider_obj.type != data['type']: - raise MOLNSException("Import data has provider type '{0}'. Provier {1} exists with type {2}. Type conversion is not possible.".format(data['type'], provider_obj.name, provider_obj.type)) + raise MOLNSException( + "Import data has provider type '{0}'. Provier {1} exists with type {2}. Type conversion is not possible.".format( + data['type'], provider_obj.name, provider_obj.type)) except DatastoreException as e: provider_obj = config.create_object(name=provider_name, ptype=data['type'], kind='Provider') msg += "Creating new provider\n" cls.merge_config(provider_obj, data['config']) config.save_object(provider_obj, kind='Provider') msg += "Provider data imported\n" - return {'msg':msg} - + return {'msg': msg} @classmethod def provider_get_config(cls, name=None, provider_type=None, config=None): """ Return a list of dict of config var for the provider config. Each dict in the list has the keys: 'key', 'value', 'type' - + Either 'name' or 'provider_type' must be specified. If 'name' is specified, then it will retreive the value from that config and return it in 'value' (or return the string '********' if that config is obfuscated, such passwords). - + """ if config is None: raise MOLNSException("no config specified") @@ -1013,8 +1132,8 @@ def provider_get_config(cls, name=None, provider_type=None, config=None): if obj is None and provider_type is not None: if provider_type not in VALID_PROVIDER_TYPES: raise MOLNSException("unknown provider type '{0}'".format(provider_type)) - p_hand = get_provider_handle('Provider',provider_type) - obj = p_hand('__tmp__',data={},config_dir=config.config_dir) + p_hand = get_provider_handle('Provider', provider_type) + obj = p_hand('__tmp__', data={}, config_dir=config.config_dir) if obj is None: raise MOLNSException("provider {0} not found".format(name)) ret = [] @@ -1039,40 +1158,36 @@ def provider_get_config(cls, name=None, provider_type=None, config=None): if myval is not None and 'obfuscate' in conf and conf['obfuscate']: myval = '********' ret.append({ - 'question':question, - 'key':key, + 'question': question, + 'key': key, 'value': myval, - 'type':'string' + 'type': 'string' }) return ret @classmethod def provider_setup(cls, args, config): """ Setup a new provider. Create the MOLNS image and SSH key if necessary.""" - #print "MOLNSProvider.provider_setup(args={0})".format(args) if len(args) < 1: print "USAGE: molns provider setup name" print "\tCreates a new provider with the given name." return - # find the \n\tWhere PROVIDER_TYPE is one of: {0}".format(VALID_PROVIDER_TYPES) - # provider name - provider_name = args[0] # check if provider exists try: provider_obj = config.get_object(args[0], kind='Provider') - except DatastoreException as e: + except DatastoreException: # ask provider type print "Select a provider type:" - for n,p in enumerate(VALID_PROVIDER_TYPES): - print "\t[{0}] {1}".format(n,p) + for n, p in enumerate(VALID_PROVIDER_TYPES): + print "\t[{0}] {1}".format(n, p) while True: try: - provider_ndx = int(raw_input_default("enter the number of type:", default='0')) + provider_ndx = int(raw_input_default("Enter the number of type:", default='0')) provider_type = VALID_PROVIDER_TYPES[provider_ndx] break except (ValueError, IndexError): pass - logging.debug("provider type '{0}'".format(provider_type)) + logging.debug("Provider type '{0}'".format(provider_type)) # Create provider try: provider_obj = config.create_object(name=args[0], ptype=provider_type, kind='Provider') @@ -1083,15 +1198,15 @@ def provider_setup(cls, args, config): print "Enter configuration for provider {0}:".format(args[0]) setup_object(provider_obj) config.save_object(provider_obj, kind='Provider') - # - cls.provider_initialize(args[0], config) + cls.provider_initialize(args[0], config) @classmethod def provider_initialize(cls, provider_name, config): """ Create the MOLNS image and SSH key if necessary.""" try: provider_obj = config.get_object(provider_name, kind='Provider') + print "Provider object {0}".format(provider_obj) except DatastoreException as e: raise MOLNSException("provider not found") # @@ -1115,7 +1230,7 @@ def provider_initialize(cls, provider_name, config): provider_obj.create_seurity_group() else: print "security group={0} is valid.".format(provider_obj['group_name']) - + # check for MOLNS image if provider_obj['molns_image_name'] is None or provider_obj['molns_image_name'] == '': if provider_obj['ubuntu_image_name'] is None or provider_obj['ubuntu_image_name'] == '': @@ -1124,13 +1239,12 @@ def provider_initialize(cls, provider_name, config): print "Creating new image, this process can take a long time (10-30 minutes)." provider_obj['molns_image_name'] = provider_obj.create_molns_image() elif not provider_obj.check_molns_image(): - print "Error: an molns image was provided, but it is not available in cloud." + print "Error: a molns image ID was provided, but it does not exist." return print "Success." config.save_object(provider_obj, kind='Provider') - - + @classmethod def provider_rebuild(cls, args, config): """ Rebuild the MOLNS image.""" @@ -1155,7 +1269,7 @@ def provider_rebuild(cls, args, config): @classmethod def provider_list(cls, args, config): """ List all the currently configured providers.""" - #print "MOLNSProvider.provider_list(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.provider_list(args={0}, config={1})".format(args, config) providers = config.list_objects(kind='Provider') if len(providers) == 0: print "No providers configured" @@ -1163,14 +1277,14 @@ def provider_list(cls, args, config): table_data = [] for p in providers: table_data.append([p.name, p.type]) - #table_print(['name', 'type'], table_data) - r = {'type':'table', 'column_names':['name', 'type'],'data':table_data} + # table_print(['name', 'type'], table_data) + r = {'type': 'table', 'column_names': ['name', 'type'], 'data': table_data} return r @classmethod def show_provider(cls, args, config): """ Show all the details of a provider config. """ - #print "MOLNSProvider.show_provider(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.show_provider(args={0}, config={1})".format(args, config) if len(args) == 0: print "USAGE: molns provider show name" return @@ -1179,11 +1293,13 @@ def show_provider(cls, args, config): @classmethod def delete_provider(cls, args, config): """ Delete a provider config. """ - #print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) if len(args) == 0: print "USAGE: molns provider delete name" return config.delete_object(name=args[0], kind='Provider') + + ############################################### class MOLNSInstances(MOLNSbase): @@ -1194,15 +1310,19 @@ def show_instances(cls, args, config): if len(instance_list) > 0: table_data = [] for i in instance_list: - provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + provider_obj = config.get_object_by_id(i.provider_id, 'Provider') + if provider_obj is None: + continue + provider_name = provider_obj.name + #print "provider_obj.type",provider_obj.type if i.worker_group_id is not None: - name = config.get_object_by_id(i.worker_id, 'WorkerGroup').name + name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name itype = 'worker' else: name = config.get_object_by_id(i.controller_id, 'Controller').name itype = 'controller' table_data.append([i.id, provider_name, i.provider_instance_identifier, itype, name]) - table_print(['ID', 'provider', 'instance id', 'type', 'name'],table_data) + table_print(['ID', 'provider', 'instance id', 'type', 'name'], table_data) else: print "No instance found" @@ -1224,7 +1344,6 @@ def delete_instance(cls, args, config): config.delete_instance(instance) print "instance {0} deleted".format(instance_id) - @classmethod def clear_instances(cls, args, config): """ delete all instances in the db """ @@ -1237,6 +1356,157 @@ def clear_instances(cls, args, config): else: print "No instance found" +############################################### + +class MOLNSExec(MOLNSbase): + @classmethod + def _get_ip_for_job(cls, job, config): + instance_list = config.get_controller_instances(controller_id=job.controller_id) + controller_obj = config.get_object_by_id(job.controller_id, 'Controller') + if controller_obj is None: + raise MOLNSException("Could not find the controller for this job") + # Check if they are running + ip = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + logging.debug("instance={0} has status={1}".format(i, status)) + if status == controller_obj.STATUS_RUNNING: + ip = i.ip_address + return ip, controller_obj + + @classmethod + def start_job(cls, args, config): + ''' Execute a process on the controller.''' + # Get Controller + if len(args) < 2: + raise MOLNSException("USAGE: molns exec start name [Command]\n"\ + "\tExecute 'Command' on the controller with the given name.") + + else: + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + raise Exception("Countroller {0} not found".format(args[0])) + # Check if controller is running + instance_list = config.get_all_instances(controller_id=controller_obj.id) + inst = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + if status == controller_obj.STATUS_RUNNING: + inst = i + break + if inst is None: + raise MOLNSException("Controller {0} is not running.".format(args[0])) + # Create Datastore object + exec_str = args[1] + job = config.start_job(controller_id=controller_obj.id, exec_str=exec_str) + # execute command + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + sshdeploy.deploy_remote_execution_job(inst.ip_address, job.jobID, exec_str) + # + return {'JobID':job.jobID, 'id':job.id, 'msg':"Job started, ID={1} JobID={0}".format(job.jobID,job.id)} + + @classmethod + def job_status(cls, args, config): + ''' Check if a process is still running on the controller.''' + if len(args) < 1: + raise MOLNSException("USAGE: molns exec status [JobID]\n"\ + "\tCheck if a process is still running on the controller.") + j = config.get_job(jobID=args[0]) + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + return {'running':False, 'msg': "No active instance for this controller"} + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + (running, msg) = sshdeploy.remote_execution_job_status(ip, j.jobID) + return {'running':running, 'msg':msg} + + @classmethod + def job_logs(cls, args, config): + ''' Return the output (stdout/stderr) of the process.''' + if len(args) < 1: + raise MOLNSException("USAGE: molns exec logs [JobID] [seek]\n"\ + "\tReturn the output (stdout/stderr) of the process (starting from 'seek').") + j = config.get_job(jobID=args[0]) + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + raise MOLNSException("No active instance for this controller") + seek = 0 + if len(args) > 1: + try: + seek = int(args[1]) + except Exception: + raise MOLNSException("'seek' must be an integer") + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + logs = sshdeploy.remote_execution_get_job_logs(ip, j.jobID, seek) + return {'msg': logs} + + + @classmethod + def fetch_job_results(cls, args, config, overwrite=False): + ''' Transfer files created by the process from the controller to local file system.''' + if len(args) < 2: + raise MOLNSException("USAGE: molns exec fetch [JobID] [filename] (destination filename)\n"\ + "\tRemove process files from the controller (will kill active processes if running).") + filename = args[1] + j = config.get_job(jobID=args[0]) + if j is None: + raise MOLNSException("Job not found") + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + raise MOLNSException("No active instance for this controller") + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + if os.path.isfile(filename) and not overwrite and (len(args) < 3 or args[-1] != '--force'): + raise MOLNSException("File {0} exists, use '--force' or overwrite=True to ignore.") + if len(args) >= 3 and not args[2].startswith('--'): + localfile = args[2] + else: + localfile = filename + sshdeploy.remote_execution_fetch_file(ip, j.jobID, filename, localfile) + return {'msg': "File transfer complete."} + + + @classmethod + def cleanup_job(cls, args, config): + ''' Remove process files from the controller (will kill active processes if running).''' + if len(args) < 1: + raise MOLNSException("USAGE: molns exec cleanup [JobID]\n"\ + "\tRemove process files from the controller (will kill active processes if running).") + j = config.get_job(jobID=args[0]) + if j is None: + return {'msg':"Job not found"} + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + raise MOLNSException("No active instance for this controller") + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + sshdeploy.remote_execution_delete_job(ip, j.jobID) + config.delete_job(j) + return {'msg':"Job {0} deleted".format(args[0])} + + @classmethod + def list_jobs(cls, args, config): + ''' List all jobs. If 'name' is specified, list all jobs on named controller.''' + if len(args) > 0: + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + raise Exception("Countroller {0} not found".format(args[0])) + jobs = config.get_all_jobs(controller_id=controller_obj.id) + else: + jobs = config.get_all_jobs() + + if len(jobs) == 0: + return {'msg':"No jobs found"} + else: + table_data = [] + for j in jobs: + try: + p = config.get_object_by_id(j.controller_id, 'Controller') + controller_name = p.name + except DatastoreException as e: + controller_name = 'ERROR: {0}'.format(e) + table_data.append([j.id, j.jobID, controller_name, j.exec_str, j.date]) + return {'type':'table','column_names':['ID', 'JobID', 'Controller', 'Command', 'Date'], 'data':table_data} + ############################################################################################## ############################################################################################## @@ -1249,47 +1519,52 @@ def clear_instances(cls, args, config): class CommandException(Exception): pass + def process_output_exception(e): logging.exception(e) sys.stderr.write("Error: {0}\n".format(e)) + def process_output(result): if result is not None: - if type(result)==dict and 'type' in result: + if type(result) == dict and 'type' in result: if result['type'] == 'table' and 'column_names' in result and 'data' in result: - table_print(result['column_names'],result['data']) + table_print(result['column_names'], result['data']) if result['type'] == 'file' and 'filename' in result and 'data' in result: - output_to_file(result['filename'],result['data']) - elif type(result)==dict and 'msg' in result: + output_to_file(result['filename'], result['data']) + elif type(result) == dict and 'msg' in result: print result['msg'] else: print result + def output_to_file(filename, data): - with open(filename,'w+') as fd: + with open(filename, 'w+') as fd: fd.write(data) + def table_print(column_names, data): - column_width = [0]*len(column_names) - for i,n in enumerate(column_names): + column_width = [0] * len(column_names) + for i, n in enumerate(column_names): column_width[i] = len(str(n)) for row in data: if len(row) != len(column_names): raise Exception("len(row) != len(column_names): {0} vs {1}".format(len(row), len(column_names))) - for i,n in enumerate(row): + for i, n in enumerate(row): if len(str(n)) > column_width[i]: column_width[i] = len(str(n)) - out = "|".join([ "-"*(column_width[i]+2) for i in range(len(column_names))]) - print '|'+out+'|' - out = " | ".join([ column_names[i].ljust(column_width[i]) for i in range(len(column_names))]) - print '| '+out+' |' - out = "|".join([ "-"*(column_width[i]+2) for i in range(len(column_names))]) - print '|'+out+'|' + out = "|".join(["-" * (column_width[i] + 2) for i in range(len(column_names))]) + print '|' + out + '|' + out = " | ".join([column_names[i].ljust(column_width[i]) for i in range(len(column_names))]) + print '| ' + out + ' |' + out = "|".join(["-" * (column_width[i] + 2) for i in range(len(column_names))]) + print '|' + out + '|' for row in data: - out = " | ".join([ str(n).ljust(column_width[i]) for i,n in enumerate(row)]) - print '| '+out+' |' - out = "|".join([ "-"*(column_width[i]+2) for i in range(len(column_names))]) - print '|'+out+'|' + out = " | ".join([str(n).ljust(column_width[i]) for i, n in enumerate(row)]) + print '| ' + out + ' |' + out = "|".join(["-" * (column_width[i] + 2) for i in range(len(column_names))]) + print '|' + out + '|' + def raw_input_default(q, default=None, obfuscate=False): if default is None or default == '': @@ -1304,6 +1579,7 @@ def raw_input_default(q, default=None, obfuscate=False): else: return ret.strip() + def raw_input_default_config(q, default=None, obj=None): """ Ask the user and process the response with a default value. """ if default is None: @@ -1322,26 +1598,29 @@ def raw_input_default_config(q, default=None, obj=None): else: return raw_input_default(q['q'], default=default, obfuscate=False) + def setup_object(obj): """ Setup a molns_datastore object using raw_input_default function. """ for key, conf, value in obj.get_config_vars(): obj[key] = raw_input_default_config(conf, default=value, obj=obj) + ############################################### -class SubCommand(): +class SubCommand: def __init__(self, command, subcommands): self.command = command self.subcommands = subcommands + def __str__(self): r = '' for c in self.subcommands: - r += self.command + " " + c.__str__() + "\n" + r += self.command + " " + c.__str__() + "\n" return r[:-1] + def __eq__(self, other): return self.command == other def run(self, args, config_dir=None): - #print "SubCommand().run({0}, {1})".format(self.command, args) if len(args) > 0: cmd = args[0] for c in self.subcommands: @@ -1349,8 +1628,11 @@ def run(self, args, config_dir=None): return c.run(args[1:], config_dir=config_dir) raise CommandException("command not found") + ############################################### -class Command(): + + +class Command: def __init__(self, command, args_defs={}, description=None, function=None): self.command = command self.args_defs = args_defs @@ -1361,112 +1643,131 @@ def __init__(self, command, args_defs={}, description=None, function=None): self.description = function.__doc__.strip() else: self.description = description + def __str__(self): - ret = self.command+" " - for k,v in self.args_defs.iteritems(): + ret = self.command + " " + for k, v in self.args_defs.iteritems(): if v is None: ret += "[{0}] ".format(k) else: - ret += "[{0}={1}] ".format(k,v) - ret += "\n\t"+self.description + ret += "[{0}={1}] ".format(k, v) + ret += "\n\t" + self.description return ret - + def __eq__(self, other): return self.command == other def run(self, args, config_dir=None): config = MOLNSConfig(config_dir=config_dir) return self.function(args, config=config) + + ############################################### COMMAND_LIST = [ - # Commands to interact with the head-node. - Command('ssh', {'name':None}, + # Commands to interact with the head-node. + Command('ssh', {'name': None}, function=MOLNSController.ssh_controller), - Command('status', {'name':None}, + Command('status', {'name': None}, function=MOLNSController.status_controller), - Command('start', {'name':None}, + Command('start', {'name': None}, function=MOLNSController.start_controller), - Command('stop', {'name':None}, + Command('stop', {'name': None}, function=MOLNSController.stop_controller), - Command('terminate', {'name':None}, + Command('terminate', {'name': None}, function=MOLNSController.terminate_controller), - Command('put', {'name':None, 'file':None}, + Command('get', {'name':None, 'file':None}, + function=MOLNSController.get_controller), + Command('put', {'name':None, 'file':None}, function=MOLNSController.put_controller), - Command('upload', {'name':None, 'file':None}, + Command('upload', {'name': None, 'file': None}, function=MOLNSController.upload_controller), - #Command('local-connect', {'name':None}, - # function=MOLNSController.connect_controller_to_local), - # Commands to interact with controller - SubCommand('controller',[ - Command('setup', {'name':None}, + # Command('local-connect', {'name':None}, + # function=MOLNSController.connect_controller_to_local), + # Commands to interact with controller + SubCommand('controller', [ + Command('setup', {'name': None}, function=MOLNSController.setup_controller), - Command('list', {'name':None}, + Command('list', {'name': None}, function=MOLNSController.list_controller), - Command('show', {'name':None}, + Command('show', {'name': None}, function=MOLNSController.show_controller), - Command('delete', {'name':None}, + Command('delete', {'name': None}, function=MOLNSController.delete_controller), - Command('export',{'name':None}, + Command('export', {'name': None}, function=MOLNSController.controller_export), - Command('import',{'filename.json':None}, + Command('import', {'filename.json': None}, function=MOLNSController.controller_import), - ]), - # Commands to interact with Worker-Groups - SubCommand('worker',[ - Command('setup', {'name':None}, + ]), + # Commands to interact with Worker-Groups + SubCommand('worker', [ + Command('setup', {'name': None}, function=MOLNSWorkerGroup.setup_worker_groups), - Command('list', {'name':None}, + Command('list', {'name': None}, function=MOLNSWorkerGroup.list_worker_groups), - Command('show', {'name':None}, + Command('show', {'name': None}, function=MOLNSWorkerGroup.show_worker_groups), - Command('delete', {'name':None}, + Command('delete', {'name': None}, function=MOLNSWorkerGroup.delete_worker_groups), - Command('start', {'name':None}, + Command('start', {'name': None}, function=MOLNSWorkerGroup.start_worker_groups), - Command('add', {'name':None}, + Command('add', {'name': None}, function=MOLNSWorkerGroup.add_worker_groups), - Command('status', {'name':None}, + Command('status', {'name': None}, function=MOLNSWorkerGroup.status_worker_groups), - #Command('stop', {'name':None}, - # function=MOLNSWorkerGroup.stop_worker_groups), - Command('terminate', {'name':None}, + Command('stop', {'name':None}, function=MOLNSWorkerGroup.terminate_worker_groups), - Command('export',{'name':None}, + Command('terminate', {'name':None}, + function=MOLNSWorkerGroup.terminate_worker_groups), + Command('export', {'name': None}, function=MOLNSWorkerGroup.worker_group_export), - Command('import',{'filename.json':None}, + Command('import', {'filename.json': None}, function=MOLNSWorkerGroup.worker_group_import), - ]), - # Commands to interact with Infrastructure-Providers - SubCommand('provider',[ - Command('setup',{'name':None}, + ]), + # Commands to interact with Infrastructure-Providers + SubCommand('provider', [ + Command('setup', {'name': None}, function=MOLNSProvider.provider_setup), - Command('rebuild',{'name':None}, + Command('rebuild', {'name': None}, function=MOLNSProvider.provider_rebuild), - Command('list',{'name':None}, + Command('list', {'name': None}, function=MOLNSProvider.provider_list), - Command('show',{'name':None}, + Command('show', {'name': None}, function=MOLNSProvider.show_provider), - Command('delete',{'name':None}, + Command('delete', {'name': None}, function=MOLNSProvider.delete_provider), - Command('export',{'name':None}, + Command('export', {'name': None}, function=MOLNSProvider.provider_export), - Command('import',{'filename.json':None}, + Command('import', {'filename.json': None}, function=MOLNSProvider.provider_import), - ]), - # Commands to interact with the instance DB - SubCommand('instancedb',[ - Command('list', {}, + ]), + # Commands to interact with the instance DB + SubCommand('instancedb', [ + Command('list', {}, function=MOLNSInstances.show_instances), - Command('delete', {'ID':None}, + Command('delete', {'ID': None}, function=MOLNSInstances.delete_instance), - Command('clear', {}, + Command('clear', {}, function=MOLNSInstances.clear_instances), ]), + SubCommand('exec',[ + Command('start', OrderedDict([('name',None), ('command',None)]), + function=MOLNSExec.start_job), + Command('status', {'jobID':None}, + function=MOLNSExec.job_status), + Command('logs', {'jobID':None}, + function=MOLNSExec.job_logs), + Command('fetch', OrderedDict([('jobID',None), ('filename', None)]), + function=MOLNSExec.fetch_job_results), + Command('cleanup', {'jobID':None}, + function=MOLNSExec.cleanup_job), + Command('list', {'name':None}, + function=MOLNSExec.list_jobs), + ]), ] -def printHelp(): +def print_help(): print "molns " print " --config=[Config Directory=./.molns/]" print "\tSpecify an alternate config location. (Must be first argument.)" @@ -1474,25 +1775,30 @@ def printHelp(): print c -def parseArgs(): +def parse_args(): if len(sys.argv) < 2 or sys.argv[1] == '-h': - printHelp() + print_help() return - + Log.verbose = True arg_list = sys.argv[1:] config_dir = './.molns/' + while len(arg_list) > 0 and arg_list[0].startswith('--'): + if arg_list[0].startswith('--config='): - config_dir = sys.argv[1].split('=',2)[1] + config_dir = sys.argv[1].split('=', 2)[1] + if arg_list[0].startswith('--debug'): print "Turning on Debugging output" - logger.setLevel(logging.DEBUG) #for Debugging + logger.setLevel(logging.DEBUG) + Log.verbose = True + arg_list = arg_list[1:] - - if len(arg_list) == 0 or arg_list[0] =='help' or arg_list[0] == '-h': - printHelp() + + if len(arg_list) == 0 or arg_list[0] == 'help' or arg_list[0] == '-h': + print_help() return - + if arg_list[0] in COMMAND_LIST: for cmd in COMMAND_LIST: if cmd == arg_list[0]: @@ -1506,10 +1812,12 @@ def parseArgs(): process_output_exception(e) return - print "unknown command: " + " ".join(arg_list) - #printHelp() + print "unknown command: " + " ".join(arg_list) print "use 'molns help' to see all possible commands" if __name__ == "__main__": - parseArgs() + logger = logging.getLogger() + #logger.setLevel(logging.INFO) #for Debugging + #logger.setLevel(logging.DEBUG) + parse_args()