diff --git a/.gitignore b/.gitignore index bf0e1ac..c7091ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,12 @@ *.pyc .molns/ molns_install.log +.ec2_creds_molns +.idea/ +*.tar.gz +*.tar +notes +qsubscript +/dockerfile_* +\#qsubscript\# +docker_test.py diff --git a/MolnsLib/DockerProvider.py b/MolnsLib/DockerProvider.py new file mode 100644 index 0000000..cc04fe1 --- /dev/null +++ b/MolnsLib/DockerProvider.py @@ -0,0 +1,243 @@ +import logging +import os +import tempfile +import time +import DockerProxy +import constants +import installSoftware +from collections import OrderedDict +from DockerSSH import DockerSSH +from constants import Constants +from molns_provider import ProviderBase, ProviderException + + +def docker_provider_default_key_name(): + user = os.environ.get('USER') or 'USER' + return "{0}_molns_docker_sshkey_{1}".format(user, hex(int(time.time())).replace('0x', '')) + + +class DockerBase(ProviderBase): + """ Base class for Docker. """ + + SSH_KEY_EXTENSION = ".pem" + PROVIDER_TYPE = 'Docker' + + def __init__(self, name, config=None, config_dir=None, **kwargs): + ProviderBase.__init__(self, name, config, config_dir, **kwargs) + self.docker = DockerProxy.DockerProxy() + self.ssh = DockerSSH(self.docker) + + def _get_container_status(self, container_id): + self.docker.container_status(container_id) + + def start_instance(self, num=1): + """ Start given number of (or 1) containers. """ + started_containers = [] + for i in range(num): + container_id = self.docker.create_container(self.provider.config["molns_image_name"], name=self.name, + port_bindings={ + Constants.DEFAULT_PUBLIC_WEBSERVER_PORT: + ('127.0.0.1', self.config['web_server_port']), + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT: + ('127.0.0.1', self.config['notebook_port'])}, + working_directory=self.config["working_directory"]) + stored_container = self.datastore.get_instance(provider_instance_identifier=container_id, + ip_address=self.docker.get_container_ip_address(container_id) + , provider_id=self.provider.id, controller_id=self.id, + provider_type=constants.Constants.DockerProvider) + started_containers.append(stored_container) + if num == 1: + return started_containers[0] + return started_containers + + def resume_instance(self, instances): + instance_ids = [] + if isinstance(instances, list): + for instance in instances: + instance_ids.append(instance.provider_instance_identifier) + else: + instance_ids.append(instances.provider_instance_identifier) + self.docker.start_containers(instance_ids) + + def stop_instance(self, instances): + instance_ids = [] + if isinstance(instances, list): + for instance in instances: + instance_ids.append(instance.provider_instance_identifier) + else: + instance_ids.append(instances.provider_instance_identifier) + self.docker.stop_containers(instance_ids) + + def terminate_instance(self, instances): + instance_ids = [] + if isinstance(instances, list): + for instance in instances: + instance_ids.append(instance.provider_instance_identifier) + else: + instance_ids.append(instances.provider_instance_identifier) + self.docker.terminate_containers(instance_ids) + + def exec_command(self, container_id, command): + self.docker.execute_command(container_id, command) + + +class DockerProvider(DockerBase): + """ Provider handle for local Docker based service. """ + + OBJ_NAME = 'DockerProvider' + + CONFIG_VARS = OrderedDict([ + ('ubuntu_image_name', + {'q': 'Base Ubuntu image to use', 'default': constants.Constants.DOCKER_DEFAULT_IMAGE, + 'ask': True}), + ('molns_image_name', + {'q': 'Local MOLNs image (Docker image ID or image tag) to use ', 'default': None, 'ask': True}), + ('key_name', + {'q': 'Docker Key Pair name', 'default': "docker-default", 'ask': False}), # Unused. + ('group_name', + {'q': 'Docker Security Group name', 'default': 'molns', 'ask': False}), # Unused. + ('login_username', + {'default': 'ubuntu', 'ask': False}), # Unused. + ('provider_type', + {'default': constants.Constants.DockerProvider, 'ask': False}) + ]) + + def get_config_credentials(self): + return None + + @staticmethod + def __get_new_dockerfile_name(): + import uuid + filename = constants.Constants.DOCKERFILE_NAME + str(uuid.uuid4()) + return filename + + def check_ssh_key(self): + """ Returns true. (Implementation does not use SSH.) """ + return True + + def create_ssh_key(self): + """ Returns true. """ + ssh_key_dir = os.path.join(self.config_dir, self.name) + with open(ssh_key_dir, 'w') as fp: + fp.write("This is a dummy key.") + os.chmod(ssh_key_dir, 0o600) + + def check_security_group(self): + """ Returns true. (Implementation does not use SSH.) """ + return True + + def create_seurity_group(self): + """ Returns true. (Implementation does not use SSH.) """ + return True + + def create_molns_image(self): + """ Create a molns image, save it on localhost and return DockerImage ID of created image. """ + file_to_remove = None + try: + dockerfile, file_to_remove = self._create_dockerfile(installSoftware.InstallSW.get_command_list()) + image_id = self.docker.build_image(dockerfile) + return image_id + except Exception as e: + logging.exception(e) + raise ProviderException("Failed to create molns image: {0}".format(e)) + finally: + if file_to_remove is not None: + os.remove(file_to_remove) + + def check_molns_image(self): + """ Check if the molns image exists. """ + if 'molns_image_name' in self.config and self.config['molns_image_name'] is not None \ + and self.config['molns_image_name'] != '': + return self.docker.image_exists(self.config['molns_image_name']) + return False + + def _create_dockerfile(self, commands): + """ Create Dockerfile from given commands. """ + import Utils + + user_id = Utils.get_sudo_user_id() + dockerfile = '''FROM ubuntu:14.04\nRUN apt-get update\n\n# Add user ubuntu.\nRUN useradd -u {0} -ms /bin/bash ubuntu\n + # Set up base environment.\nRUN apt-get install -yy \ \n software-properties-common \ \n + python-software-properties \ \n wget \ \n curl \ \n git \ \n ipython \ \n sudo \ \n + screen \ \n iptables \nRUN echo "ubuntu ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + \nWORKDIR /home/ubuntu\n\nUSER ubuntu\nENV HOME /home/ubuntu'''.format(user_id) + + flag = False + + for entry in commands: + if isinstance(entry, list): + dockerfile += '''\n\nRUN ''' + first = True + flag = False + for sub_entry in entry: + if first is True: + dockerfile += self._preprocess(sub_entry) + first = False + else: + dockerfile += ''' && \ \n ''' + self._preprocess(sub_entry) + else: + if flag is False: + dockerfile += '''\n\nRUN ''' + flag = True + dockerfile += self._preprocess(entry) + else: + dockerfile += ''' && \ \n ''' + self._preprocess(entry) + + dockerfile += '''\n\n\n''' + + dockerfile_file = DockerProvider.__get_new_dockerfile_name() + with open(dockerfile_file, 'w') as Dockerfile: + Dockerfile.write(dockerfile) + named_dockerfile = tempfile.NamedTemporaryFile() + named_dockerfile.write(dockerfile) + named_dockerfile.seek(0) + + return named_dockerfile, dockerfile_file + + @staticmethod + def _preprocess(command): + """ Prepends "shell only" commands with '/bin/bash -c'. """ + for shell_command in DockerProxy.DockerProxy.shell_commands: + if shell_command in command: + replace_string = "/bin/bash -c \"" + shell_command + command = command.replace(shell_command, replace_string) + command += "\"" + return command + + +def get_default_working_directory(config=None): + if config is None: + raise Exception("Config should not be None.") + return os.path.realpath(os.path.join(config.config_dir, "docker_controller_working_dirs", config.name)) + + +class DockerController(DockerBase): + """ Provider handle for a Docker controller. """ + + OBJ_NAME = 'DockerController' + CONFIG_VARS = OrderedDict([ + ('web_server_port', + {'q': 'Port to use for web server', 'default': "8080", + 'ask': True}), + ('notebook_port', + {'q': 'Port to use for jupyter notebook', 'default': "8081", + 'ask': True}), + ('working_directory', + {'q': 'Working directory for this controller', 'default': get_default_working_directory, 'ask': True}), + ('ssh_key_file', + {'q': 'SSH key to a qsub and docker enabled cluster', 'default': "None", 'ask': True}) + ]) + + def get_instance_status(self, instance): + return self.docker.container_status(instance.provider_instance_identifier) + + +class DockerWorkerGroup(DockerController): + """ Provider handle for Docker worker group. """ + + OBJ_NAME = 'DockerWorkerGroup' + + CONFIG_VARS = OrderedDict([ + ('num_vms', + {'q': 'Number of containers in group', 'default': '1', 'ask': True}), + ]) diff --git a/MolnsLib/DockerProxy.py b/MolnsLib/DockerProxy.py new file mode 100644 index 0000000..5d0d6b6 --- /dev/null +++ b/MolnsLib/DockerProxy.py @@ -0,0 +1,314 @@ +import logging +import re +import time +import constants +from molns_provider import ProviderBase +from constants import Constants +from docker import APIClient as Client +from docker.errors import NotFound, NullResource, APIError + + +class InvalidVolumeName(Exception): + pass + + +class DockerProxy: + + """ A wrapper over docker-py and some utility methods and classes. """ + + LOG_TAG = "Docker " + + shell_commands = ["source"] + + class ImageBuildException(Exception): + def __init__(self, message=None): + super("Something went wrong while building docker container image.\n{0}".format(message)) + + def __init__(self): + self.client = Client(base_url=Constants.DOCKER_BASE_URL) + self.build_count = 0 + logging.basicConfig(level=logging.DEBUG) + + @staticmethod + def get_container_volume_from_working_dir(working_directory): + import os + return os.path.join("/home/ubuntu/", os.path.basename(working_directory)) + + def create_container(self, image_str, working_directory=None, name=None, + port_bindings={Constants.DEFAULT_PUBLIC_WEBSERVER_PORT: ('127.0.0.1', 8080), + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT: ('127.0.0.1', 8081)}): + """Creates a new container with elevated privileges. Returns the container ID. Maps port 80 of container + to 8080 of locahost by default""" + + docker_image = DockerImage.from_string(image_str) + volume_dir = DockerProxy.get_container_volume_from_working_dir(working_directory) + + if name is None: + import uuid + random_str = str(uuid.uuid4()) + name = constants.Constants.MolnsDockerContainerNamePrefix + random_str[:8] + image = docker_image.image_id if docker_image.image_id is not Constants.DockerNonExistentTag \ + else docker_image.image_tag + + logging.info("Using image {0}".format(image)) + import os + if DockerProxy._verify_directory(working_directory) is False: + if working_directory is not None: + raise InvalidVolumeName("\n\nMOLNs uses certain reserved names for its configuration files in the " + "controller environment, and unfortunately the provided name for working " + "directory of the controller cannot be one of these. Please configure this " + "controller again with a different volume name and retry. " + "Here is the list of forbidden names: \n{0}" + .format(Constants.ForbiddenVolumeNames)) + + logging.warning(DockerProxy.LOG_TAG + "Unable to verify provided directory to use to as volume. Volume will NOT " + "be created.") + hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings) + container = self.client.create_container(image=image, name=name, command="/bin/bash", tty=True, detach=True, + ports=[Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT], + host_config=hc, + environment={"PYTHONPATH": "/usr/local/"}) + + else: + container_mount_point = '/home/ubuntu/{0}'.format(os.path.basename(working_directory)) + hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings, + binds={working_directory: {'bind': container_mount_point, + 'mode': 'rw'}}) + + container = self.client.create_container(image=image, name=name, command="/bin/bash", tty=True, detach=True, + ports=[Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, + Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT], + volumes=container_mount_point, host_config=hc, + working_dir=volume_dir, + environment={"PYTHONPATH": "/usr/local/"}) + + container_id = container.get("Id") + + return container_id + + # noinspection PyBroadException + @staticmethod + def _verify_directory(working_directory): + import os + if working_directory is None or os.path.basename(working_directory) in Constants.ForbiddenVolumeNames: + return False + try: + if not os.path.exists(working_directory): + os.makedirs(working_directory) + return True + except: + return False + + def stop_containers(self, container_ids): + """Stops given containers.""" + for container_id in container_ids: + self.stop_container(container_id) + + def stop_container(self, container_id): + """Stops the container with given ID.""" + self.client.stop(container_id) + + def container_status(self, container_id): + """Checks if container with given ID running.""" + status = ProviderBase.STATUS_TERMINATED + try: + ret_val = str(self.client.inspect_container(container_id).get('State').get('Status')) + if ret_val.startswith("running"): + status = ProviderBase.STATUS_RUNNING + else: + status = ProviderBase.STATUS_STOPPED + except NotFound: + pass + return status + + def start_containers(self, container_ids): + """Starts each container in given list of container IDs.""" + for container_id in container_ids: + self.start_container(container_id) + + def start_container(self, container_id): + """ Start the container with given ID.""" + logging.info(DockerProxy.LOG_TAG + " Starting container " + container_id) + try: + self.client.start(container=container_id) + except (NotFound, NullResource) as e: + print (DockerProxy.LOG_TAG + "Something went wrong while starting container.", e) + return False + return True + + def execute_command(self, container_id, command): + """Executes given command as a shell command in the given container. Returns None is anything goes wrong.""" + run_command = "/bin/bash -c \"" + command + "\"" + # print("CONTAINER: {0} COMMAND: {1}".format(container_id, run_command)) + if self.start_container(container_id) is False: + print (DockerProxy.LOG_TAG + "Could not start container.") + return None + try: + exec_instance = self.client.exec_create(container_id, run_command) + response = self.client.exec_start(exec_instance) + return [self.client.exec_inspect(exec_instance), response] + except (NotFound, APIError) as e: + print (DockerProxy.LOG_TAG + " Could not execute command.", e) + return None + + def build_image(self, dockerfile): + """ Build image from given Dockerfile object and return ID of the image created. """ + import uuid + logging.info("Building image...") + random_string = str(uuid.uuid4()) + image_tag = Constants.DOCKER_IMAGE_PREFIX + "{0}".format(random_string[:]) + last_line = "" + try: + for line in self.client.build(fileobj=dockerfile, rm=True, tag=image_tag): + print(DockerProxy._decorate(line)) + if "errorDetail" in line: + raise DockerProxy.ImageBuildException() + last_line = line + + # Return image ID. It's a hack around the fact that docker-py's build image command doesn't return an image + # id. + image_id = get_docker_image_id_from_string(str(last_line)) + logging.info("Image ID: {0}".format(image_id)) + return str(DockerImage(image_id, image_tag)) + + except (DockerProxy.ImageBuildException, IndexError) as e: + raise DockerProxy.ImageBuildException(e) + + @staticmethod + def _decorate(some_line): + return some_line[11:-4].rstrip() + + def image_exists(self, image_str): + """Checks if an image with the given ID/tag exists locally.""" + docker_image = DockerImage.from_string(image_str) + + if docker_image.image_id is Constants.DockerNonExistentTag \ + and docker_image.image_tag is Constants.DockerNonExistentTag: + raise InvalidDockerImageException("Neither image_id nor image_tag provided.") + + for image in self.client.images(): + some_id = image["Id"] + some_tags = image["RepoTags"] or [None] + if docker_image.image_id in \ + some_id[:(Constants.DOCKER_PY_IMAGE_ID_PREFIX_LENGTH + Constants.DOKCER_IMAGE_ID_LENGTH)]: + return True + if docker_image.image_tag in some_tags: + return True + return False + + def terminate_containers(self, container_ids): + """ Terminates containers with given container ids.""" + for container_id in container_ids: + try: + if self.container_status(container_id) == ProviderBase.STATUS_RUNNING: + self.stop_container(container_id) + self.terminate_container(container_id) + except NotFound: + pass + + def terminate_container(self, container_id): + self.client.remove_container(container_id) + + def get_mapped_ports(self, container_id): + container_ins = self.client.inspect_container(container_id) + mapped_ports = container_ins['HostConfig']['PortBindings'] + ret_val = [] + if mapped_ports is None: + logging.info("No mapped ports for {0}".format(container_id)) + return + for k, v in mapped_ports.iteritems(): + host_port = v[0]['HostPort'] + ret_val.append(host_port) + return ret_val + + def get_working_directory(self, container_id): + return self.client.inspect_container(container_id)["Config"]["WorkingDir"] + + def get_home_directory(self, container_id): + env_vars = self.client.inspect_container(container_id)["Config"]["Env"] + home = [i for i in env_vars if i.startswith("HOME")] + return home[0].split("=")[1] + + def put_archive(self, container_id, tar_file_bytes, target_path_in_container): + """ Copies and unpacks a given tarfile in the container at specified location. + Location must exist in container.""" + if self.start_container(container_id) is False: + raise Exception("Could not start container.") + + # Prepend file path with /home/ubuntu/. TODO Should be refined. + if not target_path_in_container.startswith("/home/ubuntu/"): + import os + target_path_in_container = os.path.join("/home/ubuntu/", target_path_in_container) + + logging.info("target path in container: {0}".format(target_path_in_container)) + if not self.client.put_archive(container_id, target_path_in_container, tar_file_bytes): + logging.error(DockerProxy.LOG_TAG + "Failed to copy.") + + def get_container_ip_address(self, container_id): + """ Returns the IP Address of given container.""" + self.start_container(container_id) + ins = self.client.inspect_container(container_id) + ip_address = str(ins.get("NetworkSettings").get("IPAddress")) + while True: + ip_address = str(ins.get("NetworkSettings").get("IPAddress")) + if ip_address == "": + time.sleep(3) + if ip_address.startswith("1") is True: + break + return ip_address + + +def get_docker_image_id_from_string(some_string): + exp = r'[a-z0-9]{12}' + matches = re.findall(exp, some_string) + if len(matches) is 0: + return None + else: + return matches[0] + + +class InvalidDockerImageException(Exception): + def __init__(self, message): + super(message) + + +class DockerImage: + def __init__(self, image_id=None, image_tag=None): + if image_id in [None, Constants.DockerNonExistentTag] and image_tag in [None, Constants.DockerNonExistentTag]: + raise InvalidDockerImageException("Both image_id and image_tag cannot be None.") + + self.image_id = image_id if image_id is not None else Constants.DockerNonExistentTag + self.image_tag = image_tag if image_tag is not None else Constants.DockerNonExistentTag + + def __str__(self): + if self.image_id is Constants.DockerNonExistentTag and self.image_tag is Constants.DockerNonExistentTag: + raise InvalidDockerImageException( + "Cannot serialize DockerImage object because both image_id and image_tag are None.") + + return "{0}{1}{2}".format(self.image_id, Constants.DockerImageDelimiter, self.image_tag) + + @staticmethod + def from_string(serialized_docker_image): + temp = serialized_docker_image.split(Constants.DockerImageDelimiter) + + if len(temp) is 2: + return DockerImage(image_id=temp[0], image_tag=temp[1]) + + if len(temp) > 2 or len(temp) is 0: + raise InvalidDockerImageException("Unexpected format, cannot serialize to DockerImage.") + + temp = temp[0] + # Figure out if temp is image_id or image_name. + if DockerImage.looks_like_image_id(temp): + return DockerImage(image_id=temp) + else: + return DockerImage(image_tag=temp) + + @staticmethod + def looks_like_image_id(some_string): + possible_image_id = get_docker_image_id_from_string(some_string) + if some_string is possible_image_id: + return True + else: + return False diff --git a/MolnsLib/DockerSSH.py b/MolnsLib/DockerSSH.py new file mode 100644 index 0000000..6f2032a --- /dev/null +++ b/MolnsLib/DockerSSH.py @@ -0,0 +1,95 @@ +import StringIO +import tarfile +import os +import re + + +# "unused" arguments to some methods are added to maintain compatibility with existing upper level APIs. + +class DockerSSH(object): + def __init__(self, docker): + self.docker = docker + self.container_id = None + + def exec_command(self, command, verbose=None): + cmd = re.sub("\"", "\\\"", command) # Escape all occurrences of ". + ret_val, response = self.docker.execute_command(self.container_id, cmd) + return response + + def exec_multi_command(self, command, verbose=None): + return self.exec_command(command) + + def open_sftp(self): + return MockSFTP(self.docker, self.container_id) + + def connect(self, instance, endpoint, username=None, key_filename=None): + self.container_id = instance.provider_instance_identifier + + def connect_cluster_node(self, ip_address, port, username, keyfile): + raise DockerSSHException("This invocation means that an error has occurred.") + + def close(self): + self.container_id = None + + +class MockSFTPFileException(Exception): + pass + + +class DockerSSHException(Exception): + pass + + +class MockSFTP: + def __init__(self, docker, container_id): + self.docker = docker + self.container_id = container_id + + def file(self, filename, flag): + return MockSFTPFile(filename, flag, self.docker, self.container_id) + + def close(self): + pass + + +class MockSFTPFile: + def __init__(self, filename, flag, docker, container_id): + self.filename = filename # Absolute path of file. + self.file_contents = "" + self.docker = docker + self.container_id = container_id + if flag is 'w': + self.flag = flag + # else: + # print("WARNING Unrecognized file mode. Filename: {0}, Flag: {1}".format(filename, flag)) + + def write(self, write_this): + self.file_contents += write_this + + def close(self): + # Make tarfile. + import uuid + rand_str = str(uuid.uuid4()) + temp_tar = "transport-{0}.tar".format(rand_str[:8]) + try: + tar = tarfile.TarFile(temp_tar, "w") + string = StringIO.StringIO() + string.write(self.file_contents) + string.seek(0) + tar_file_info = tarfile.TarInfo(name=os.path.basename(self.filename)) + tar_file_info.size = len(string.buf) + tar.addfile(tarinfo=tar_file_info, fileobj=string) + tar.close() + + path_to_file = os.path.dirname(self.filename) + + if not path_to_file.startswith("/home"): + path_to_file = os.path.join(self.docker.get_home_directory(self.container_id), path_to_file) + + with open(temp_tar, mode='rb') as f: + tar_file_bytes = f.read() + + # print("path to file: {0}".format(path_to_file)) + self.docker.put_archive(self.container_id, tar_file_bytes, path_to_file) + finally: + os.remove(temp_tar) # Remove temporary tar file. diff --git a/MolnsLib/EC2Provider.py b/MolnsLib/EC2Provider.py index 3451570..42f706f 100644 --- a/MolnsLib/EC2Provider.py +++ b/MolnsLib/EC2Provider.py @@ -169,7 +169,7 @@ def _get_image_name(self): ########################################## class EC2Controller(EC2Base): - """ Provider handle for an open stack controller. """ + """ Provider handle for an EC2 controller. """ OBJ_NAME = 'EC2Controller' @@ -272,7 +272,7 @@ def get_instance_status(self, instance): ########################################## class EC2WorkerGroup(EC2Controller): - """ Provider handle for an open stack controller. """ + """ Provider handle for EC2 worker group. """ OBJ_NAME = 'EC2WorkerGroup' @@ -464,7 +464,7 @@ def start_ec2_instances(self, image_id=None, key_name=None, group_name=None, num raise ProviderException("Could not find image_id={0}".format(image_id)) if img.state != "available": if img.state != "pending": - raise ProviderException("Image {0} is not available, it has state is {1}.".format(image_id, img.state)) + raise ProviderException("Image {0} is not available, it's state is {1}.".format(image_id, img.state)) while img.state == "pending": print "Image {0} has state {1}, waiting {2} seconds for it to become available.".format(image_id, img.state, self.PENDING_IMAGE_WAITTIME) time.sleep(self.PENDING_IMAGE_WAITTIME) diff --git a/MolnsLib/EucalyptusProvider.py b/MolnsLib/EucalyptusProvider.py new file mode 100644 index 0000000..b79e9f1 --- /dev/null +++ b/MolnsLib/EucalyptusProvider.py @@ -0,0 +1,685 @@ +import boto +import boto.ec2 +from boto.exception import EC2ResponseError +from boto.ec2.regioninfo import RegionInfo +import collections +import os +import time +import sys +import logging +from urlparse import urlparse +from collections import OrderedDict +import installSoftware +import ssh_deploy +from molns_provider import ProviderBase, ProviderException + +#logging.getLogger('boto').setLevel(logging.ERROR) +logging.getLogger('boto').setLevel(logging.CRITICAL) + + +########################################## +class EucalyptusBase(ProviderBase): + """ Abstract class for Eucalyptus. """ + + SSH_KEY_EXTENSION = ".pem" + PROVIDER_TYPE = 'Eucalyptus' + +#def EucalyptusProvider_config_get_region(): +# if os.environ.get('AWS_DEFAULT_REGION') is None: +# return 'us-east-1' +# return os.environ.get('AWS_DEFAULT_REGION') + +def EucalyptusProvider_config_get_ubuntu_images_by_region(conf=None): + if conf is not None: + access_key = conf['aws_access_key'] + secret_key = conf['aws_secret_key'] + ec2_url = conf['ec2_url'] + else: + access_key = os.environ.get('EC2_ACCESS_KEY') + secret_key = os.environ.get('EC2_SECRET_KEY') + ec2_url = os.environ.get('EC2_URL') + + try: + o = urlparse(ec2_url) + ec2_host = o.hostname + ec2_port = o.port + ec2_path = o.path + # Setup connection to Eucalyptus + conn = boto.connect_ec2(aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + region=RegionInfo(name="eucalyptus", endpoint=ec2_host), + port=ec2_port, + path=ec2_path) + + # Run commands + images = conn.get_all_images() + for i in images: + if 'trusty' in i.name.lower(): + return i.id + except Exception as e: + logging.debug('EucalyptusProvider_config_get_ubuntu_images_by_region() caught exception {0}'.format(e)) + return None + +def EucalyptusProvider_default_key_name(): + user = os.environ.get('USER') or 'USER' + return "{0}_molns_sshkey_{1}".format(user, hex(int(time.time())).replace('0x','')) +########################################## +class EucalyptusProvider(EucalyptusBase): + """ Provider handle for an Eucalyptus service. """ + + OBJ_NAME = 'EucalyptusProvider' + + CONFIG_VARS = OrderedDict( + [ + ('aws_access_key', + {'q':'Eucalyptus access Key', 'default':os.environ.get('EC2_ACCESS_KEY'), 'ask':True, 'obfuscate':True}), + ('aws_secret_key', + {'q':'Eucalyptus secret key', 'default':os.environ.get('EC2_SECRET_KEY'), 'ask':True, 'obfuscate':True}), + ('ec2_url', + {'q':'URL of Eucalyptus service (EC2_URL)', 'default':os.environ.get('EC2_URL'), 'ask':True, 'obfuscate':False}), + ('key_name', + {'q':'Eucalyptus Key Pair name', 'default':EucalyptusProvider_default_key_name(), 'ask':True}), + ('group_name', + {'q':'Eucalyptus Security Group name', 'default':'molns', 'ask':True}), + ('ubuntu_image_name', + {'q':'ID of the base Ubuntu image to use', 'default':EucalyptusProvider_config_get_ubuntu_images_by_region, 'ask':True}), + ('molns_image_name', + {'q':'ID of the MOLNs image (leave empty for none)', 'default':None, 'ask':True}), + ('default_instance_type', + {'q':'Default Instance Type', 'default':'c3.large', 'ask':True}), + ('login_username', + {'default':'ubuntu', 'ask':False}) + ]) + + def get_config_credentials(self): + """ Return a dict with the credentials necessary for authentication. """ + return { + 'aws_access_key_id' : self.config['aws_access_key'], + 'aws_secret_access_key' : self.config['aws_secret_key'] + } + + + def check_ssh_key(self): + """ Check that the SSH key is found locally and remotely. + Returns: + True if the key is valid, otherwise False. + """ + ssh_key_dir = os.path.join(self.config_dir, self.name) + logging.debug('ssh_key_dir={0}'.format(ssh_key_dir)) + if not os.path.isdir(ssh_key_dir): + logging.debug('making ssh_key_dir={0}'.format(ssh_key_dir)) + os.makedirs(ssh_key_dir) + ssh_key_file = os.path.join(ssh_key_dir,self.config['key_name']+self.SSH_KEY_EXTENSION) + if not os.path.isfile(ssh_key_file): + logging.debug("ssh_key_file '{0}' not found".format(ssh_key_file)) + return False + self._connect() + return self.eucalyptus.keypair_exists(self.config['key_name']) + + def create_ssh_key(self): + """ Create the ssh key and write the file locally. """ + self._connect() + ssh_key_dir = os.path.join(self.config_dir, self.name) + logging.debug('creating ssh key {0} in dir{1}'.format(self.config['key_name'], ssh_key_dir)) + self.eucalyptus.create_keypair(self.config['key_name'], ssh_key_dir) + + def check_security_group(self): + """ Check if the security group is created. """ + self._connect() + return self.eucalyptus.security_group_exists(self.config['group_name']) + + def create_seurity_group(self): + """ Create the security group. """ + self._connect() + return self.eucalyptus.create_security_group(self.config['group_name']) + + def check_molns_image(self): + """ Check if the molns image is created. """ + if 'molns_image_name' in self.config and self.config['molns_image_name'] is not None and self.config['molns_image_name'] != '': + self._connect() + return self.eucalyptus.image_exists(self.config['molns_image_name']) + return False + + def create_molns_image(self): + """ Create the molns image is created. """ + self._connect() + # clear the network-related persisent udev rules: + #echo "" > /etc/udev/rules.d/70-persistent-net.rules + #echo "" > /lib/udev/rules.d/75-persistent-net-generator.rules + # + + # start vm + instances = self.eucalyptus.start_eucalyptus_instances(image_id=self.config["ubuntu_image_name"]) + instance = instances[0] + # get login ip + ip = instance.public_dns_name + # install software + try: + logging.debug("installing software on server (ip={0})".format(ip)) + install_vm_instance = installSoftware.InstallSW(ip, config=self) + #install_vm_instance.run_with_logging() + # create image + logging.debug("Shutting down instance") + self.eucalyptus.stop_eucalyptus_instances([instance]) + logging.debug("Creating image") + image_id = instance.create_image(name=self._get_image_name()) + #logging.debug("Finding volume of instance") + #vol = None + #for v in self.eucalyptus.conn.get_all_volumes(): + # if v.attach_data is not None and v.attach_data.instance_id == instance.id: + # vol = v + # break + #if vol is None: + # raise Exception("Can not find volume associated with instance. Base image must be an EBS backed image.") + #snap = vol.create_snapshot() + #logging.debug('Snapshot {0} of volume {1}'.format(snap.id, vol.id)) + #image_id = self.eucalyptus.conn.register_image(name=self._get_image_name(), snapshot_id=snap.id, delete_root_volume_on_termination=True) + ##deleteOnTermination + #image_id = self.eucalyptus.conn.register_image(name=self._get_image_name(), snapshot_id=snap.id) + logging.debug("Image created: {0}".format(image_id)) + except Exception as e: + logging.exception(e) + raise ProviderException("Failed to create molns image: {0}".format(e)) + finally: + logging.debug("terminating {0}".format(instance)) + self.eucalyptus.terminate_eucalyptus_instances([instance]) + return image_id + + def _connect(self): + if self.connected: return + self.eucalyptus = CreateVM(config=self) + self.connected = True + + def _get_image_name(self): + return "MOLNS_{0}_{1}_{2}".format(self.PROVIDER_TYPE, self.name, int(time.time())) + +########################################## +class EucalyptusController(EucalyptusBase): + """ Provider handle for an open stack controller. """ + + OBJ_NAME = 'EucalyptusController' + + CONFIG_VARS = OrderedDict( + [ + ('instance_type', + {'q':'Default Instance Type', 'default':'c3.large', 'ask':True}), + ]) + + def _connect(self): + if self.connected: return + self.eucalyptus = CreateVM(config=self.provider) + self.connected = True + + def start_instance(self, num=1): + """ Start or resume the controller. """ + try: + self._connect() + instances = self.eucalyptus.start_eucalyptus_instances(image_id=self.provider.config["molns_image_name"], num=int(num), instance_type=self.config["instance_type"]) + ret = [] + for instance in instances: + ip = instance.public_dns_name + i = self.datastore.get_instance(provider_instance_identifier=instance.id, ip_address=ip, provider_id=self.provider.id, controller_id=self.id) + ret.append(i) + if num == 1: + return ret[0] + else: + return ret + except Exception as e: + logging.exception(e) + raise ProviderException("Failed to start molns instance: {0}".format(e)) + + def resume_instance(self, instances): + self._connect() + if isinstance(instances, list): + eucalyptus_instances = [] + for instance in instances: + eucalyptus_instance = self.eucalyptus.get_instance(instance.provider_instance_identifier) + eucalyptus_instances.append(eucalyptus_instance) + new_eucalyptus_instances = self.eucalyptus.resume_eucalyptus_instances(eucalyptus_instances) + instances_to_update = list(instances) + while len(instances_to_update) > 0: + instance = instances_to_update.pop() + success=False + for eucalyptus_inst in new_eucalyptus_instances: + if eucalyptus_inst.id == instance.provider_instance_identifier: + instance.ip_address = eucalyptus_inst.public_dns_name + logging.debug("instance.id={0} updated with ip={1}".format(instance.provider_instance_identifier, instance.ip_address)) + success=True + break + if not success: + raise ProviderException("Could not update the IP of id={0} after resume".format(instance.provider_instance_identifier)) + else: + eucalyptus_instance = self.eucalyptus.get_instance(instances.provider_instance_identifier) + new_instance = self.eucalyptus.resume_eucalyptus_instances([eucalyptus_instance]) + instances.ip_address = new_instance[0].public_dns_name + logging.debug("instance.id={0} updated with ip={1}".format(instances.provider_instance_identifier, instances.ip_address)) + + def stop_instance(self, instances): + self._connect() + if isinstance(instances, list): + eucalyptus_instances = [] + for instance in instances: + eucalyptus_instance = self.eucalyptus.get_instance(instance.provider_instance_identifier) + eucalyptus_instances.append(eucalyptus_instance) + self.eucalyptus.stop_eucalyptus_instances(eucalyptus_instances) + else: + eucalyptus_instance = self.eucalyptus.get_instance(instances.provider_instance_identifier) + self.eucalyptus.stop_eucalyptus_instances([eucalyptus_instance]) + + def terminate_instance(self, instances): + self._connect() + if isinstance(instances, list): + eucalyptus_instances = [] + for instance in instances: + eucalyptus_instance = self.eucalyptus.get_instance(instances.provider_instance_identifier) + eucalyptus_instances.append(eucalyptus_instance) + self.datastore.delete_instance(instance) + self.eucalyptus.terminate_eucalyptus_instances(eucalyptus_instances) + else: + eucalyptus_instance = self.eucalyptus.get_instance(instances.provider_instance_identifier) + self.eucalyptus.terminate_eucalyptus_instances([eucalyptus_instance]) + self.datastore.delete_instance(instances) + + def get_instance_status(self, instance): + self._connect() + try: + status = self.eucalyptus.get_instance_status(instance.provider_instance_identifier) + except Exception as e: + #logging.exception(e) + return self.STATUS_TERMINATED + if status == 'running' or status == 'pending': + return self.STATUS_RUNNING + if status == 'stopped' or status == 'stopping': + return self.STATUS_STOPPED + if status == 'terminated' or status == 'shutting-down': + return self.STATUS_TERMINATED + raise ProviderException("EucalyptusController.get_instance_status() got unknown status '{0}'".format(status)) + + +########################################## +class EucalyptusWorkerGroup(EucalyptusController): + """ Provider handle for an open stack controller. """ + + OBJ_NAME = 'EucalyptusWorkerGroup' + + CONFIG_VARS = OrderedDict( + [ + ('instance_type', + {'q':'Default Instance Type', 'default':'c3.large', 'ask':True}), + ('num_vms', + {'q':'Number of virtual machines in group', 'default':'1', 'ask':True}), + ]) + + def start_instance(self, num=1): + """ Start worker group vms. """ + try: + self._connect() + instances = self.eucalyptus.start_eucalyptus_instances(image_id=self.provider.config["molns_image_name"], num=int(num), instance_type=self.config["instance_type"]) + ret = [] + for instance in instances: + ip = instance.public_dns_name + i = self.datastore.get_instance(provider_instance_identifier=instance.id, ip_address=ip, provider_id=self.provider.id, controller_id=self.controller.id, worker_group_id=self.id) + ret.append(i) + if num == 1: + return ret[0] + else: + return ret + except Exception as e: + logging.exception(e) + raise ProviderException("Failed to start molns instance: {0}".format(e)) + + def terminate_instance(self, instances): + self._connect() + if isinstance(instances, list): + eucalyptus_instances = [] + for instance in instances: + eucalyptus_instance = self.eucalyptus.get_instance(instance.provider_instance_identifier) + eucalyptus_instances.append(eucalyptus_instance) + self.datastore.delete_instance(instance) + self.eucalyptus.terminate_eucalyptus_instances(eucalyptus_instances) + else: + eucalyptus_instance = self.eucalyptus.get_instance(instances.provider_instance_identifier) + self.eucalyptus.terminate_eucalyptus_instances([eucalyptus_instance]) + self.datastore.delete_instance(instances) + + +########################################## +class CreateVM: + ''' + This class is used to create VMs for Eucalyptus + ''' + PENDING_IMAGE_WAITTIME = 60 + + def __init__(self, config=None, connect=True): + if config is not None: + self.config = config + if self.config['aws_access_key'] is None or self.config['aws_secret_key'] is None: + raise ProviderException("AWS_SECRET_KEY or AWS_ACCESS_KEY not set") + if connect: + self.connect() + + def connect(self): + #self.conn = boto.ec2.connect_to_region( + # self.config['aws_region'], + # aws_access_key_id=self.config['aws_access_key'], + # aws_secret_access_key=self.config['aws_secret_key'] + #) + access_key = self.config['aws_access_key'] + secret_key = self.config['aws_secret_key'] + ec2_url = self.config['ec2_url'] + o = urlparse(ec2_url) + ec2_host = o.hostname + ec2_port = o.port + ec2_path = o.path + # Setup connection to Eucalyptus + self.conn = boto.connect_ec2(aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + region=RegionInfo(name="eucalyptus", endpoint=ec2_host), + port=ec2_port, + path=ec2_path) + + + def get_instance(self, instance_id): + #logging.debug("get_instance(instance_id={0})".format(instance_id)) + try: + reservations = self.conn.get_all_reservations(instance_ids=[instance_id]) + except EC2ResponseError: + raise ProviderException("instance not found {0}".format(instance_id)) + #logging.debug("get_instance() reservations:{0}".format(reservations)) + for reservation in reservations: + #logging.debug("get_instance() reservation.instances:{0}".format(reservation.instances)) + for instance in reservation.instances: + if instance.id == instance_id: + return instance + raise ProviderException("instance not found {0}".format(instance_id)) + + def get_instance_status(self, instance_id): + return self.get_instance(instance_id).state + + + def get_vm_status(self, key_name=None, verbose=False, show_all=False): + if key_name is None: + key_name = self.config['key_name'] + reservations = self.conn.get_all_reservations() + stopped_vms = [] + running_vms = [] + for reservation in reservations: + for instance in reservation.instances: + if verbose and show_all: + print "{0}\t{1}\t{2}\t{3}".format(instance.id,instance.key_name,instance.state,instance.public_dns_name) + if instance.key_name == key_name: + if verbose and not show_all: + print "{0}\t{1}\t{2}\t{3}".format(instance.id,instance.key_name,instance.state,instance.public_dns_name) + if instance.state == 'running': + running_vms.append(instance) + elif instance.state == 'stopped': + stopped_vms.append(instance) + #return (stopped_vms, running_vms) + return (stopped_vms, sorted(running_vms, key=lambda vm: vm.id)) + + def image_exists(self, image_id): + try: + img = self.conn.get_all_images(image_ids=[image_id])[0] + return True + except IndexError: + return False + + def start_vms(self, image_id=None, key_name=None, group_name=None, num=None, instance_type=None): + if key_name is None: + key_name = self.config['key_name'] + if group_name is None: + group_name = self.config['group_name'] + if num is None: + num = 1 + if instance_type is None: + instance_type = self.config['default_instance_type'] + # Check the group + self.create_security_group(group_name) + + #(stopped_vms, running_vms) = self.get_vm_status(key_name) + #if len(running_vms) > 0: + # msg = "Error: {0} VMs are already running with key_name={1}".format(len(running_vms), + # key_name) + # print msg + # raise ProviderException(msg) + + if len(stopped_vms) > 0: + return self.resume_eucalyptus_instances(stopped_vms) + + if image_id is None: + raise ProviderException("Base Ubuntu image not specified.") + else: + self.image_id = image_id + + # Check image + try: + img = self.conn.get_all_images(image_ids=[self.image_id])[0] + except IndexError: + raise ProviderException("Could not find image_id={0}".format(self.image_id)) + + if img.state != "available": + if img.state != "pending": + raise ProviderException("Image {0} is not available, it has state is {1}.".format(self.image_id, img.state)) + while img.state == "pending": + print "Image {0} has state {1}, waiting {2} seconds for it to become available.".format(self.image_id, img.state, self.PENDING_IMAGE_WAITTIME) + time.sleep(self.PENDING_IMAGE_WAITTIME) + img.update() + + self.key_name = key_name + self.group_name = group_name + group_list = [] + for _ in range(num): + group_list.append(group_name) + + print "Starting {0} Eucalyptus instance(s). This will take a minute...".format(num) + reservation = self.conn.run_instances(self.image_id, min_count=num, max_count=num, key_name=key_name, security_groups=group_list, instance_type=instance_type) + + instances = reservation.instances + num_instance = len(instances) + num_running = 0 + while num_running < num_instance: + num_running = 0 + for instance in instances: + instance.update() + if instance.state == 'running': + num_running += 1 + if num_running < num_instance: + time.sleep(5) + print "Eucalyptus instances started." + return sorted(instances, key=lambda vm: vm.id) + + def start_eucalyptus_instances(self, image_id=None, key_name=None, group_name=None, num=1, instance_type=None): + if key_name is None: + key_name = self.config['key_name'] + if group_name is None: + group_name = self.config['group_name'] + if num is None: + num = 1 + if instance_type is None: + instance_type = self.config['default_instance_type'] + try: + img = self.conn.get_all_images(image_ids=[image_id])[0] + except IndexError: + raise ProviderException("Could not find image_id={0}".format(image_id)) + if img.state != "available": + if img.state != "pending": + raise ProviderException("Image {0} is not available, it has state is {1}.".format(image_id, img.state)) + while img.state == "pending": + print "Image {0} has state {1}, waiting {2} seconds for it to become available.".format(image_id, img.state, self.PENDING_IMAGE_WAITTIME) + time.sleep(self.PENDING_IMAGE_WAITTIME) + img.update() + print "Starting {0} Eucalyptus instance(s). This will take a minute...".format(num) + reservation = self.conn.run_instances(image_id, min_count=num, max_count=num, key_name=key_name, security_groups=[group_name], instance_type=instance_type) + instances = reservation.instances + num_instance = len(instances) + num_running = 0 + while num_running < num_instance: + num_running = 0 + for instance in instances: + instance.update() + if instance.state == 'running': + num_running += 1 + if num_running < num_instance: + time.sleep(5) + print "Eucalyptus instances started." + return sorted(instances, key=lambda vm: vm.id) + + def stop_vms(self, key_name=None): + if key_name is None: + key_name = self.config['key_name'] + (stopped_vms, running_vms) = self.get_vm_status(key_name) + self.stop_eucalyptus_instances(running_vms) + + def terminate_vms(self, key_name=None): + if key_name is None: + key_name = self.config['key_name'] + (stopped_vms, running_vms) = self.get_vm_status(key_name) + self.terminate_eucalyptus_instances(running_vms+stopped_vms) + + def resume_eucalyptus_instances(self, instances): + num_instance = len(instances) + print "Resuming Eucalyptus instance(s). This will take a minute..." + for instance in instances: + print "\t{0}.".format(instance.id) + instance.start() + num_running = 0 + while num_running < num_instance: + num_running = 0 + for instance in instances: + instance.update() + if instance.state == 'running': + num_running += 1 + if num_running < num_instance: + time.sleep(5) + print "Eucalyptus instances resumed." + return instances + + def stop_eucalyptus_instances(self, instances): + num_instance = len(instances) + print "Stopping Eucalyptus instance(s). This will take a minute..." + for instance in instances: + print "\t{0}.".format(instance.id) + instance.stop() + num_stopped = 0 + while num_stopped < num_instance: + num_stopped = 0 + for instance in instances: + instance.update() + if instance.state == 'stopped': + num_stopped += 1 + if num_stopped < num_instance: + time.sleep(5) + print "Eucalyptus instances stopped." + + def terminate_eucalyptus_instances(self, instances): + num_instance = len(instances) + print "Terminating Eucalyptus instance(s). This will take a minute..." + for instance in instances: + print "\t{0}.".format(instance.id) + instance.terminate() + num_terminated = 0 + while num_terminated < num_instance: + num_terminated = 0 + for instance in instances: + instance.update() + if instance.state == 'terminated': + num_terminated += 1 + if num_terminated < num_instance: + time.sleep(5) + print "Eucalyptus instance terminated." + + def create_vm_image(self, image_name=None, key_name=None): + if key_name is None: + key_name = self.config['key_name'] + if image_name is None: + image_name = "MOLNS_{0}_{1}".format(key_name,int(time.time())) + (stopped_vms, running_vms) = self.get_vm_status(key_name) + if len(running_vms) != 1: + raise ProviderException("Expected only one running vm, {0} are running".format(len(running_vms))) + self.stop_eucalyptus_instances(running_vms) + instance = running_vms[0] + image_ami = instance.create_image(image_name) + print "Image created id={0} name={0}".format(image_ami, image_name) + self.terminate_eucalyptus_instances(running_vms) + return image_ami + + + + def keypair_exists(self, key_name): + for sg in self.conn.get_all_key_pairs(): + if sg.name == key_name: + return True + return False + + def keypair_file_exists(cls, key_name, conf_dir): + return os.path.exists(conf_dir + os.sep + key_name + ".pem") + + def create_keypair(self, key_name, conf_dir): + key_pair = self.conn.create_key_pair(key_name) + key_pair.save(conf_dir) + + def security_group_exists(self, group_name): + for sg in self.conn.get_all_security_groups(): + if sg.name == group_name: + return True + return False + + def create_security_group(self, group_name): + security_group = None + for sg in self.conn.get_all_security_groups(): + if sg.name == group_name: + security_group = sg + break + if security_group is None: + print "Security group not found, creating one." + security_group = self.conn.create_security_group(group_name, 'MOLNs Security Group') + self.set_security_group_rules(security_group) + elif not self.check_security_group_rules(security_group): + raise ProviderException("Security group {0} exists, but has the wrong firewall rules. Please delete the group, or choose a different one.") + return security_group + + + def set_security_group_rules(self, group, expected_rules=ProviderBase.FIREWALL_RULES): + for rule in expected_rules: + if not group.authorize(ip_protocol=rule.ip_protocol, + from_port=rule.from_port, + to_port=rule.to_port, + cidr_ip=rule.cidr_ip): + return False + return True + + def check_security_group_rules(self, group, expected_rules=ProviderBase.FIREWALL_RULES): + """ Check to be sure the expected_rules are set for this group. """ + ret = True + + current_rules = [] + for rule in group.rules: + if not rule.grants[0].cidr_ip: + current_rule = self.SecurityGroupRule(rule.ip_protocol, + rule.from_port, + rule.to_port, + "0.0.0.0/0", + rule.grants[0].name) + else: + current_rule = self.SecurityGroupRule(rule.ip_protocol, + rule.from_port, + rule.to_port, + rule.grants[0].cidr_ip, + None) + + if current_rule not in expected_rules: + print "Unexpected Rule: {0}".format(current_rule) + ret = False + else: + #print "Current Rule: {0}".format(current_rule) + current_rules.append(current_rule) + + for rule in expected_rules: + if rule not in current_rules: + print "Rule not found: {0}".format(rule) + ret = False + + return ret + diff --git a/MolnsLib/OpenStackProvider.py b/MolnsLib/OpenStackProvider.py index 26c8779..5a35c98 100644 --- a/MolnsLib/OpenStackProvider.py +++ b/MolnsLib/OpenStackProvider.py @@ -43,6 +43,8 @@ class OpenStackProvider(OpenStackBase): {'q':'OpenStack project_name', 'default':os.environ.get('OS_TENANT_NAME'), 'ask':True}), ('neutron_nic', {'q':'Network ID (leave empty if only one possible network)', 'default':None, 'ask':True}), + ('region_name', + {'q':'Specify the region (leave empty if only one region)', 'default':os.environ.get('OS_REGION_NAME'), 'ask':True}), ('floating_ip_pool', {'q':'Name of Floating IP Pool (leave empty if only one possible pool)', 'default':None, 'ask':True}), ('nova_version', @@ -192,6 +194,8 @@ def _connect(self): creds['api_key'] = self.config['nova_password'] creds['auth_url'] = self.config['nova_auth_url'] creds['project_id'] = self.config['nova_project_id'] + if 'region_name' in self.config and self.config['region_name'] is not None: + creds['region_name'] = self.config['region_name'] self.nova = novaclient.Client(self.config['nova_version'], **creds) self.connected = True diff --git a/MolnsLib/Utils.py b/MolnsLib/Utils.py new file mode 100644 index 0000000..a6b75f2 --- /dev/null +++ b/MolnsLib/Utils.py @@ -0,0 +1,45 @@ +def get_user_name(): + try: + import os + return os.environ['SUDO_USER'] + except KeyError: + import getpass + return getpass.getuser() + + +def get_sudo_user_id(): + import pwd + u_name = get_user_name() + return pwd.getpwnam(u_name).pw_uid + + +def get_sudo_group_id(): + import grp + u_name = get_user_name() + return grp.getgrnam(u_name).gr_gid + + +def ensure_sudo_mode(some_function): + import os + import sys + if sys.platform.startswith("linux") and os.getuid() != 0: + pass + raise NoPrivilegedMode("\n\nOn Linux platforms, 'docker' is a priviledged command. " + "To use 'docker' functionality, please run in sudo mode or as root user.") + return some_function + + +class Log: + verbose = True + + def __init__(self): + pass + + @staticmethod + def write_log(message): + if Log.verbose: + print message + + +class NoPrivilegedMode(Exception): + pass diff --git a/MolnsLib/constants.py b/MolnsLib/constants.py new file mode 100644 index 0000000..7ac7ffb --- /dev/null +++ b/MolnsLib/constants.py @@ -0,0 +1,28 @@ +import os + + +class Constants: + DockerWorkingDirectoryPrefix = "working_dir_" + LOGGING_DIRECTORY = "~/MOLNS_LOG" + DOCKER_BASE_URL = "unix://var/run/docker.sock" + DOCKER_DEFAULT_IMAGE = "ubuntu:latest" + DOCKER_DEFAULT_PORT = '9000' + DOCKER_CONTAINER_RUNNING = "running" + DOCKER_CONTAINER_EXITED = "exited" + DOCKERFILE_NAME = "dockerfile_" + DOKCER_IMAGE_ID_LENGTH = 12 + DOCKER_IMAGE_PREFIX = "molns-docker-provider-" + DOCKER_PY_IMAGE_ID_PREFIX_LENGTH = 7 + DockerProvider = "Docker" + DockerNonExistentTag = "**NA**" + DockerImageDelimiter = "|||" + MolnsDockerContainerNamePrefix = "Molns-" + MolnsExecHelper = "molns_exec_helper.py" + DEFAULT_PRIVATE_NOTEBOOK_PORT = 8081 + DEFAULT_PUBLIC_NOTEBOOK_PORT = 443 + DEFAULT_PRIVATE_WEBSERVER_PORT = 8001 + DEFAULT_PUBLIC_WEBSERVER_PORT = 80 + DEFAULT_QSUB_SSH_PORT = 22 + ForbiddenVolumeNames = [".ssh", ".ipython", ".molns", "ipython", "localarea", "shared"] + ConfigDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".molns/") + ClusterKeyFileNameOnController = "molns_cluster_secretkey" diff --git a/MolnsLib/installSoftware.py b/MolnsLib/installSoftware.py index 2094ccf..a89fc90 100644 --- a/MolnsLib/installSoftware.py +++ b/MolnsLib/installSoftware.py @@ -16,7 +16,7 @@ class InstallSW: This class is used for installing software ''' - # Install the necessary software for IPython and PyURDME. + # Contextualization, install the software for IPython and PyURDME. # Commands can be specified in 3 ways: # 1: a string # 2: a list a strings @@ -24,6 +24,8 @@ class InstallSW: # item is a 'check' command, which should error (return code 1) if the first item(s) did not # install correctly command_list = [ + + # Basic contextualization "curl http://www.ubuntu.com", # Check to make sure networking is up. "sudo apt-get update", "sudo apt-get -y install git", @@ -32,69 +34,115 @@ class InstallSW: "sudo apt-get -y install python-matplotlib python-numpy python-scipy", "sudo apt-get -y install make", "sudo apt-get -y install python-software-properties", - "sudo add-apt-repository ppa:fenics-packages/fenics", - "sudo apt-get update", - "sudo apt-get -y install fenics", "sudo apt-get -y install cython python-h5py", "sudo apt-get -y install python-pip python-dev build-essential", "sudo pip install pyzmq --upgrade", "sudo pip install dill cloud pygments", "sudo pip install tornado Jinja2", + + # Molnsutil develop + [ + "sudo pip install jsonschema jsonpointer", + # EC2/S3 and OpenStack APIs + "sudo pip install boto", + "sudo apt-get -y install pandoc", + # This set of packages is needed for OpenStack, as molns_util uses them for hybrid cloud deployment + "sudo apt-get -y install libxml2-dev libxslt1-dev python-dev", + "sudo pip install python-novaclient", + "sudo easy_install -U pip", + "sudo pip install python-keystoneclient", + "sudo pip install python-swiftclient", + ], + [ + "sudo rm -rf /usr/local/molnsutil;sudo mkdir -p /usr/local/molnsutil;sudo chown ubuntu /usr/local/molnsutil", + "cd /usr/local/ && git clone https://github.com/aviral26/molnsutil.git && cd /usr/local/molnsutil && git checkout qsub_support" + ], + + # Molns develop + [ + "sudo rm -rf /usr/local/molns;sudo mkdir -p /usr/local/molns;sudo chown ubuntu /usr/local/molns", + "cd /usr/local/ && git clone https://github.com/aviral26/molns.git && cd /usr/local/molns" + ], + + # Cluster execution + [ + "sudo rm -rf /usr/local/cluster_execution;sudo mkdir -p /usr/local/cluster_execution;sudo chown ubuntu /usr/local/cluster_execution", + "cd /usr/local/ && git clone https://github.com/aviral26/cluster_execution.git" + ], - - # For molnsutil - "sudo pip install jsonschema jsonpointer", - # S3 and OS APIs - "sudo pip install boto", - "sudo apt-get -y install pandoc", - # This set of packages is really only needed for OpenStack, but molnsutil uses them - "sudo apt-get -y install libxml2-dev libxslt1-dev python-dev", - "sudo pip install python-novaclient", - "sudo easy_install -U pip", - "sudo pip install python-keystoneclient", - "sudo pip install python-swiftclient", # So the workers can mount the controller via SSHfs [ "sudo apt-get -y install sshfs", "sudo gpasswd -a ubuntu fuse", + "mkdir -p /home/ubuntu/.ssh/", "echo 'ServerAliveInterval 60' >> /home/ubuntu/.ssh/config", ], - # FOR DEVELOPMENT, NEEDS TO BE TESTED - # High-performance ssh-hpn - #[ - # "sudo add-apt-repository ppa:w-rouesnel/openssh-hpn -y", - # "sudo apt-get update -y", - #], - - # IPython install + + # IPython [ "sudo rm -rf ipython;git clone --recursive https://github.com/Molns/ipython.git", "cd ipython && git checkout 3.0.0-molns_fixes && python setup.py submodule && sudo python setup.py install", "sudo rm -rf ipython", "ipython profile create default", "sudo pip install terminado", #Jupyter terminals + "python -c \"from IPython.external import mathjax; mathjax.install_mathjax(tag='2.2.0')\"" ], - [ "sudo rm -rf /usr/local/pyurdme;sudo mkdir -p /usr/local/pyurdme;sudo chown ubuntu /usr/local/pyurdme", - "cd /usr/local/ && git clone https://github.com/MOLNs/pyurdme.git", - "cd /usr/local/pyurdme && git checkout develop", + + + ### Simulation software related to pyurdme and StochSS + + # Gillespy + [ "sudo rm -rf /usr/local/StochKit;sudo mkdir -p /usr/local/StochKit;sudo chown ubuntu /usr/local/StochKit", + "cd /usr/local/ && git clone https://github.com/StochSS/stochkit.git StochKit", + "cd /usr/local/StochKit && ./install.sh", - "cp /usr/local/pyurdme/pyurdme/data/three.js_templates/js/* .ipython/profile_default/static/custom/", + #"wget https://github.com/StochSS/stochss/blob/master/ode-1.0.4.tgz?raw=true -q -O /tmp/ode.tgz", + "wget https://github.com/StochSS/StochKit_ode/archive/master.tar.gz?raw=true -q -O /tmp/ode.tgz", + "cd /tmp && tar -xzf /tmp/ode.tgz", + "sudo mv /tmp/StochKit_ode-master /usr/local/ode", + "rm /tmp/ode.tgz", + "cd /usr/local/ode/cvodes/ && tar -xzf \"cvodes-2.7.0.tar.gz\"", + "cd /usr/local/ode/cvodes/cvodes-2.7.0/ && ./configure --prefix=\"/usr/local/ode/cvodes/cvodes-2.7.0/cvodes\" 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/cvodes/cvodes-2.7.0/ && make 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/cvodes/cvodes-2.7.0/ && make install 1>stdout.log 2>stderr.log", + "cd /usr/local/ode/ && STOCHKIT_HOME=/usr/local/StochKit/ STOCHKIT_ODE=/usr/local/ode/ make 1>stdout.log 2>stderr.log", + + "sudo rm -rf /usr/local/gillespy;sudo mkdir -p /usr/local/gillespy;sudo chown ubuntu /usr/local/gillespy", + "cd /usr/local/ && git clone https://github.com/briandrawert/gillespy.git", + "cd /usr/local/gillespy && sudo STOCHKIT_HOME=/usr/local/StochKit/ STOCHKIT_ODE_HOME=/usr/local/ode/ python setup.py install" + + ], + + # FeniCS/Dolfin/pyurdme + [ "sudo add-apt-repository -y ppa:fenics-packages/fenics", + "sudo apt-get update", + "sudo apt-get -y install fenics", + # Gmsh for Finite Element meshes + "sudo apt-get install -y gmsh", + ], + + ["sudo apt-get install docker", "sudo pip install docker", "sudo pip install sqlalchemy", + "sudo pip install boto", "sudo pip install python-novaclient", "sudo pip install paramiko"], + # pyurdme + [ "sudo rm -rf /usr/local/pyurdme && sudo mkdir -p /usr/local/pyurdme && sudo chown ubuntu /usr/local/pyurdme", + "cd /usr/local/ && git clone https://github.com/MOLNs/pyurdme.git", + #"cd /usr/local/pyurdme && git checkout develop", # for development only + "cp /usr/local/pyurdme/pyurdme/data/three.js_templates/js/* $HOME/.ipython/profile_default/static/custom/", "source /usr/local/pyurdme/pyurdme_init && python -c 'import pyurdme'", ], - [ "rm -rf MOLNS_notebooks;git clone https://github.com/Molns/MOLNS_notebooks.git", - "cp MOLNS_notebooks/*.ipynb .;rm -rf MOLNS_notebooks;", + + # example notebooks + [ "rm -rf MOLNS_notebooks && git clone https://github.com/Molns/MOLNS_notebooks.git", + "cp MOLNS_notebooks/*.ipynb . && rm -rf MOLNS_notebooks", "ls *.ipynb" ], - [ - "sudo rm -rf /usr/local/molnsutil;sudo mkdir -p /usr/local/molnsutil;sudo chown ubuntu /usr/local/molnsutil", - "cd /usr/local/ && git clone https://github.com/Molns/molnsutil.git", - "cd /usr/local/molnsutil && sudo python setup.py install" - ], - "python -c \"from IPython.external import mathjax; mathjax.install_mathjax(tag='2.2.0')\"", - - # Upgrade scipy from pip to get rid of six.py bug on Trusty + + # Upgrade scipy from pip to get rid of super-annoying six.py bug on Trusty "sudo apt-get -y remove python-scipy", "sudo pip install scipy", - - "sync", # This is critial for some infrastructures. + + "sudo pip install jsonschema jsonpointer", # redo this install to be sure it has not been removed. + "sudo pip install paramiko", + + "sync", # This is critical for some infrastructures. ] # How many time do we try to install each package. @@ -234,7 +282,6 @@ def exec_command_list_switch(self, command_list): raise SystemExit("CRITICAL ERROR: could not complete command '{0}'. Exiting.".format(command)) print "Installation complete in {0}s".format(time.time() - tic) - def log_exec(self, msg): if self.log_file is not None: self.log_file.write(msg) @@ -298,6 +345,11 @@ def exec_multi_command(self, command, next_command): print "FAILED......\t{0}:{1}\t{2}\t{3}".format(self.hostname, self.ssh_endpoint, command, e) raise InstallSWException() + @staticmethod + def get_command_list(): + """Returns the whole list of dependency installation commands. """ + return InstallSW.command_list + if __name__ == "__main__": print "{0}".format(InstallSW.command_list) print "len={0}".format(len(InstallSW.command_list)) @@ -308,4 +360,3 @@ def exec_multi_command(self, command, next_command): else: cnt += 1 print "cnt={0}".format(cnt) - diff --git a/MolnsLib/molns_datastore.py b/MolnsLib/molns_datastore.py index b54bf43..dee8428 100644 --- a/MolnsLib/molns_datastore.py +++ b/MolnsLib/molns_datastore.py @@ -1,15 +1,20 @@ #!/usr/bin/env python from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base + Base = declarative_base() from sqlalchemy import Column, Integer, String, Sequence from sqlalchemy.orm import sessionmaker import os import logging import sys +import uuid +import datetime + ############################################################# -#VALID_PROVIDER_TYPES = ['OpenStack', 'EC2', 'Rackspace'] -VALID_PROVIDER_TYPES = ['OpenStack', 'EC2'] +VALID_PROVIDER_TYPES = ['OpenStack', 'EC2', 'Eucalyptus', 'Docker'] + + ############################################################# #### SCHEMA ################################################# ############################################################# @@ -18,12 +23,13 @@ class Provider(Base): """ DB object for an infrastructure service provider. """ __tablename__ = 'providers' id = Column(Integer, Sequence('provider_id_seq'), primary_key=True) - type = Column(String) #'EC2', 'Azure', 'OpenStack' + type = Column(String) # 'EC2', 'Azure', 'OpenStack' name = Column(String) def __str__(self): return "Provider({0}): name={1} type={2}".format(self.id, self.name, self.type) + class ProviderData(Base): """ DB object to store the key/value pairs for a service provider. """ __tablename__ = 'provider_data' @@ -33,19 +39,22 @@ class ProviderData(Base): value = Column(String) def __str__(self): - return "ProviderData({0}): provider_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, self.value) + return "ProviderData({0}): provider_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, + self.value) + class Controller(Base): """ DB object for a MOLNS controller. """ __tablename__ = 'controllers' id = Column(Integer, Sequence('controller_id_seq'), primary_key=True) - type = Column(String) #'EC2', 'Azure', 'OpenStack' + type = Column(String) # 'EC2', 'Azure', 'OpenStack' name = Column(String) provider_id = Column(Integer) - + def __str__(self): return "Controller({0}): name={1} provider_id={2}".format(self.id, self.name, self.provider_id) + class ControllerData(Base): """ DB object to store the key/value pairs for a controller. """ __tablename__ = 'controller_data' @@ -55,19 +64,24 @@ class ControllerData(Base): value = Column(String) def __str__(self): - return "ControllerData({0}): controller_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, self.value) + return "ControllerData({0}): controller_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, + self.value) + class WorkerGroup(Base): """ DB object for a MOLNS WorkerGroup. """ __tablename__ = 'worker_groups' id = Column(Integer, Sequence('worker_group_id_seq'), primary_key=True) - type = Column(String) #'EC2', 'Azure', 'OpenStack' + type = Column(String) # 'EC2', 'Azure', 'OpenStack' name = Column(String) provider_id = Column(Integer) controller_id = Column(Integer) - + def __str__(self): - return "WorkerGroup({0}): name={1} provider_id={2} controller_id={3}".format(self.id, self.name, self.provider_id, self.controller_id) + return "WorkerGroup({0}): name={1} provider_id={2} controller_id={3}".format(self.id, self.name, + self.provider_id, + self.controller_id) + class WorkerGroupData(Base): """ DB object to store the key/value pairs for a worker groups. """ @@ -78,36 +92,54 @@ class WorkerGroupData(Base): value = Column(String) def __str__(self): - return "WorkerGrouprData({0}): worker_group_id={1} name={2} value={3}".format(self.id, self.parent_id, self.name, self.value) + return "WorkerGrouprData({0}): worker_group_id={1} name={2} value={3}".format(self.id, self.parent_id, + self.name, self.value) class Instance(Base): """ DB object for a MOLNS VM instance. """ __tablename__ = 'instances' id = Column(Integer, Sequence('instance_id_seq'), primary_key=True) - type = Column(String) #'head-node' or 'worker' + type = Column(String) # 'head-node' or 'worker' controller_id = Column(Integer) worker_group_id = Column(Integer) provider_id = Column(Integer) ip_address = Column(String) provider_instance_identifier = Column(String) - + + def __str__(self): + return "Instance({0}): provider_instance_identifier={1} provider_id={2} controller_id={3} worker_group_id={4}".format( + self.id, self.provider_instance_identifier, self.provider_id, self.controller_id, self.worker_group_id) + + +class ExecJob(Base): + """ DB object for MOLNS exec jobs. """ + __tablename__ = 'jobs' + id = Column(Integer, Sequence('instance_id_seq'), primary_key=True) + controller_id = Column(Integer) + exec_str = Column(String) + jobID = Column(String) + date = Column(String) + def __str__(self): - return "Instance({0}): provider_instance_identifier={1} provider_id={2} controller_id={3} worker_group_id={4}".format(self.id, self.provider_instance_identifier, self.provider_id, self.controller_id, self.worker_group_id) + return "ExecJob({0}): jobID={1} controller_id={2}, exec_str={3}".format(self.id, self.jobID, self.controller_id, + self.exec_str) class DatastoreException(Exception): pass + ############################################################# HANDLE_MAPPING = { - 'Provider':(Provider,ProviderData), - 'Controller':(Controller,ControllerData), - 'WorkerGroup':(WorkerGroup,WorkerGroupData), + 'Provider': (Provider, ProviderData), + 'Controller': (Controller, ControllerData), + 'WorkerGroup': (WorkerGroup, WorkerGroupData), } -#from OpenStackProvider import OpenStackProvider, OpenStackController, OpenStackWorkerGroup -#from EC2Provider import EC2Provider, EC2Controller, EC2WorkerGroup + +# from OpenStackProvider import OpenStackProvider, OpenStackController, OpenStackWorkerGroup +# from EC2Provider import EC2Provider, EC2Controller, EC2WorkerGroup def dynamic_module_import(name): mod = __import__(name) @@ -116,26 +148,30 @@ def dynamic_module_import(name): mod = getattr(mod, comp) return mod + def get_provider_handle(kind, ptype): """ Return object of 'kind' (Provider, Controller or WokerGroup) for provider of type 'ptype'. Load the module if necessary. """ - #logging.debug("get_provider_handle(kind={0}, ptype={1})".format(kind, ptype)) + # logging.debug("get_provider_handle(kind={0}, ptype={1})".format(kind, ptype)) valid_handles = ['Provider', 'Controller', 'WorkerGroup'] if kind not in valid_handles: raise DatastoreException("Unknown kind {0}".format(kind)) if ptype not in VALID_PROVIDER_TYPES: - raise DatastoreException("Unknown {1} type {0}".format(ptype, kind)) + # raise DatastoreException("Unknown {1} type {0}".format(ptype, kind)) + return None cls_name = "{0}{1}".format(ptype, kind) pkg_name = "MolnsLib.{0}Provider".format(ptype) if pkg_name not in sys.modules: logging.debug("loading {0} from {1}".format(cls_name, pkg_name)) + # pkg = dynamic_module_import(pkg_name) pkg = dynamic_module_import(pkg_name) try: - #logging.debug("dir(pkg={0})={1}".format(pkg, dir(pkg))) + # logging.debug("dir(pkg={0})={1}".format(pkg, dir(pkg))) mod = getattr(pkg, cls_name) except AttributeError: raise DatastoreException("module {0} does not contain {1}".format(pkg_name, cls_name)) return mod + ############################################################# @@ -148,24 +184,25 @@ def __init__(self, db_file=None, config_dir=None): """ Constructor. """ if db_file is not None: self.engine = create_engine('sqlite:///{0}'.format(db_file)) + if config_dir is None: + self.config_dir = os.path.abspath(os.path.dirname(db_file)) elif config_dir is not None: if not os.path.exists(config_dir): os.makedirs(config_dir) self.engine = create_engine('sqlite:///{0}/{1}'.format(config_dir, self.MOLNS_DATASTORE)) + self.config_dir = config_dir else: if not os.path.exists(self.MOLNS_CONFIG_DIR): os.makedirs(self.MOLNS_CONFIG_DIR) self.engine = create_engine('sqlite:///{0}/{1}'.format(self.MOLNS_CONFIG_DIR, self.MOLNS_DATASTORE)) - Base.metadata.create_all(self.engine) # Create all the tables + Base.metadata.create_all(self.engine) # Create all the tables Session = sessionmaker(bind=self.engine) self.session = Session() - self.config_dir = config_dir def __del__(self): """ Destructor. """ self.session.commit() - def list_objects(self, kind): """ Get all the currently configured objects of kind (Provider, Controller, WorkerGroup). @@ -195,16 +232,16 @@ def create_object(self, ptype, name, kind, **kwargs): raise DatastoreException("{1} {0} already exists with type".format(name, kind, p.type)) p_handle = get_provider_handle(kind, ptype) - #logging.debug("create_object() {1}(name={0})".format(name, p_handle)) + # logging.debug("create_object() {1}(name={0})".format(name, p_handle)) p = p_handle(name=name, config_dir=self.config_dir) if 'provider_id' in kwargs: p.provider_id = kwargs['provider_id'] - #logging.debug("create_object() provider_id={0}".format(kwargs['provider_id'])) + # logging.debug("create_object() provider_id={0}".format(kwargs['provider_id'])) if 'controller_id' in kwargs: p.controller_id = kwargs['controller_id'] - #logging.debug("create_object() controller_id={0}".format(kwargs['controller_id'])) + # logging.debug("create_object() controller_id={0}".format(kwargs['controller_id'])) return p - + def delete_object(self, name, kind): """ Delete a objects of kind (Provider, Controller, WorkerGroup). @@ -223,7 +260,7 @@ def delete_object(self, name, kind): logging.debug("Deleting entry: {0}".format(p)) self.session.delete(p) self.session.commit() - + def get_object(self, name, kind): """ Get a config object of of kind (Provider, Controller, WorkerGroup). @@ -259,7 +296,7 @@ def get_object_by_id(self, id, kind): (handle, d_handle) = HANDLE_MAPPING[kind] p = self.session.query(handle).filter_by(id=id).first() if p is None: - raise DatastoreException("{0} {1} not found".format(kind, name)) + raise DatastoreException("{0} {1} not found".format(kind, id)) return self._get_object_data(d_handle, kind, p.type, p) def _get_object_data(self, d_handle, kind, ptype, p): @@ -269,16 +306,26 @@ def _get_object_data(self, d_handle, kind, ptype, p): data[d.name] = d.value p_handle = get_provider_handle(kind, ptype) - #logging.debug("{2}(name={0}, data={1})".format(name,data,p_handle)) + # logging.debug("{2}(name={0}, data={1})".format(name,data,p_handle)) + if p_handle is None: + return None ret = p_handle(name=p.name, config=data, config_dir=self.config_dir) ret.id = p.id ret.datastore = self if 'provider_id' in p.__dict__: - #logging.debug("_get_object_data(): provider_id={0}".format(p.provider_id)) - ret.provider = self.get_object_by_id(id=p.provider_id, kind='Provider') + # logging.debug("_get_object_data(): provider_id={0}".format(p.provider_id)) + try: + ret.provider = self.get_object_by_id(id=p.provider_id, kind='Provider') + except DatastoreException as e: + logging.debug('Error: provider {0} not found'.format(p.provider_id)) + ret.provider = None if 'controller_id' in p.__dict__: - #logging.debug("_get_object_data(): controller_id={0}".format(p.controller_id)) - ret.controller = self.get_object_by_id(id=p.controller_id, kind='Controller') + # logging.debug("_get_object_data(): controller_id={0}".format(p.controller_id)) + try: + ret.controller = self.get_object_by_id(id=p.controller_id, kind='Controller') + except DatastoreException as e: + logging.debug('Error: controller {0} not found'.format(p.controller_id)) + ret.controller = None return ret def save_object(self, config, kind): @@ -296,15 +343,16 @@ def save_object(self, config, kind): # Add new entry. p = handle(name=config.name, type=config.type) self.session.add(p) - #logging.debug("Created new DB entry: {0}".format(p)) - #print "save_object() config.__dict__={0}".format(config.__dict__) + # logging.debug("Created new DB entry: {0}".format(p)) + # print "save_object() config.__dict__={0}".format(config.__dict__) if 'provider_id' in config.__dict__: - logging.debug("provider_id is in config.__dict__ {0} {1}".format(config.provider_id, type(config.provider_id))) + logging.debug( + "provider_id is in config.__dict__ {0} {1}".format(config.provider_id, type(config.provider_id))) p.provider_id = config.provider_id if 'controller_id' in config.__dict__: logging.debug("controller_id is in config.__dict__ {0}".format(config.controller_id)) p.controller_id = config.controller_id - #logging.debug("Updated DB entry: {0}".format(p)) + # logging.debug("Updated DB entry: {0}".format(p)) self.session.commit() data = config.config.copy() @@ -314,33 +362,34 @@ def save_object(self, config, kind): d.value = data[d.name] del data[d.name] else: - #logging.debug("Deleting entry: {0}".format(d)) + # logging.debug("Deleting entry: {0}".format(d)) self.session.delete(d) for d in data.keys(): dd = d_handle(parent_id=p.id, name=d, value=data[d]) - #logging.debug("Created new entry: {0}".format(dd)) + # logging.debug("Created new entry: {0}".format(dd)) self.session.add(dd) self.session.commit() - def get_instance_by_id(self, id): """ Create or get the value for an instance. """ return self.session.query(Instance).filter_by(id=id).first() - - def get_instance(self, provider_instance_identifier, ip_address, provider_id=None, controller_id=None, worker_group_id=None): + + def get_instance(self, provider_instance_identifier, ip_address, provider_id=None, controller_id=None, + worker_group_id=None, provider_type=None): """ Create or get the value for an instance. """ p = self.session.query(Instance).filter_by(provider_instance_identifier=provider_instance_identifier).first() if p is None: - p = Instance(provider_instance_identifier=provider_instance_identifier, ip_address=ip_address, provider_id=provider_id, controller_id=controller_id, worker_group_id=worker_group_id) + p = Instance(provider_instance_identifier=provider_instance_identifier, ip_address=ip_address, + provider_id=provider_id, controller_id=controller_id, worker_group_id=worker_group_id) self.session.add(p) self.session.commit() - #logging.debug("Creating instance: {0}".format(p)) + # logging.debug("Creating instance: {0}".format(p)) else: - #logging.debug("Fetching instance: {0}".format(p)) + # logging.debug("Fetching instance: {0}".format(p)) pass return p - def get_controller_instances(self,controller_id=None): + def get_controller_instances(self, controller_id=None): logging.debug("get_controller_instances by controller_id={0}".format(controller_id)) ret = self.session.query(Instance).filter_by(controller_id=controller_id, worker_group_id=None).all() if ret is None: @@ -348,24 +397,24 @@ def get_controller_instances(self,controller_id=None): else: return ret - def get_worker_instances(self,controller_id=None): - #logging.debug("get_worker_instances by controller_id={0}".format(controller_id)) - ret = self.session.query(Instance).filter_by(controller_id=controller_id).filter(Instance.worker_group_id!=None).all() + def get_worker_instances(self, controller_id=None): + # logging.debug("get_worker_instances by controller_id={0}".format(controller_id)) + ret = self.session.query(Instance).filter_by(controller_id=controller_id).filter( + Instance.worker_group_id != None).all() if ret is None: return [] else: return ret - def get_all_instances(self, provider_id=None, controller_id=None, worker_group_id=None): if provider_id is not None: - #logging.debug("get_all_instances by provider_id={0}".format(provider_id)) + # logging.debug("get_all_instances by provider_id={0}".format(provider_id)) ret = self.session.query(Instance).filter_by(provider_id=provider_id).all() elif controller_id is not None: - #logging.debug("get_all_instances by controller_id={0}".format(controller_id)) + # logging.debug("get_all_instances by controller_id={0}".format(controller_id)) ret = self.session.query(Instance).filter_by(controller_id=controller_id).all() elif worker_group_id is not None: - #logging.debug("get_all_instances by worker_group_id={0}".format(worker_group_id)) + # logging.debug("get_all_instances by worker_group_id={0}".format(worker_group_id)) ret = self.session.query(Instance).filter_by(worker_group_id=worker_group_id).all() else: ret = self.session.query(Instance).all() @@ -376,9 +425,43 @@ def get_all_instances(self, provider_id=None, controller_id=None, worker_group_i def delete_instance(self, instance): """ Delete an instance. """ - #logging.debug("Deleting instance: {0}".format(instance)) + # logging.debug("Deleting instance: {0}".format(instance)) self.session.delete(instance) self.session.commit() + def get_all_jobs(self, controller_id=None): + if controller_id is not None: + # logging.debug("get_all_instances by controller_id={0}".format(controller_id)) + ret = self.session.query(ExecJob).filter_by(controller_id=controller_id).all() + else: + ret = self.session.query(ExecJob).all() + if ret is None: + return [] + else: + return ret + def get_job(self, jobID): + """ Get the objet for a job. """ + # logging.debug("get_job(jobID={0})".format(jobID)) + try: + id = int(jobID) + j = self.session.query(ExecJob).filter_by(id=id).first() + except Exception: + j = self.session.query(ExecJob).filter_by(jobID=jobID).first() + if j is None: + raise DatastoreException("Job {0} not found".format(jobID)) + return j + + def start_job(self, controller_id=None, exec_str=None): + """ Create the objet for a job. """ + date_str = str(datetime.datetime.now()) + jobID = str(uuid.uuid4()) + j = ExecJob(jobID=jobID, controller_id=controller_id, exec_str=exec_str, date=date_str) + self.session.add(j) + self.session.commit() + logging.debug("Creating ExecJob: {0}".format(j)) + return j + def delete_job(self, job): + self.session.delete(job) + self.session.commit() diff --git a/MolnsLib/molns_exec_helper.py b/MolnsLib/molns_exec_helper.py new file mode 100644 index 0000000..6f3a5b2 --- /dev/null +++ b/MolnsLib/molns_exec_helper.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +import os +import subprocess +import shlex +import json +import traceback +import sys + + +def run_job(exec_str, stdout_file): + with open(stdout_file, 'w') as stdout_fh: + try: + p = subprocess.Popen( + shlex.split(exec_str), + stdout=stdout_fh, + stderr=stdout_fh, + ) + pid = p.pid + # create pid file + pid_file = ".molns/pid" + return_code_file = ".molns/return_value" + with open(pid_file, 'w+') as fd: + fd.write(str(pid)) + # Wait on program execution... + return_code = p.wait() + print "Return code:", return_code + if return_code_file is not None: + with open(return_code_file, 'w+') as fd: + fd.write(str(return_code)) + except Exception as e: + stdout_fh.write('Error: {}'.format(str(e))) + stdout_fh.write(traceback.format_exc()) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + +if __name__ == "__main__": + with open(".molns/cmd",'r') as fd: + exec_str = fd.read() + print "exec_str", exec_str + run_job(exec_str, ".molns/stdout") diff --git a/MolnsLib/molns_landing_page.py b/MolnsLib/molns_landing_page.py new file mode 100644 index 0000000..0238f26 --- /dev/null +++ b/MolnsLib/molns_landing_page.py @@ -0,0 +1,93 @@ +from pipes import quote + +class MolnsLandingPage: + def __init__(self, port): + self.molns_landing_page = quote(""" + + + + + MOLNs + + + + + + +
+ +
+
+
+ +
+

MOLNs

+

A cloud computing appliance for spatial stochastic simulation of biochemical systems.

+

+ + + To the IPython Interface +

+ + + + +

Please note that due to the self-signed certification, you will see a warning before you can view the page. Please accept the warning and proceed.

+
+
+
+
+
+   +
+
+

+

+ +

+
+
+
+
+
+

Write PyURDME models as sharable IPython notebooks

+ + PyURDME API reference + +
+
+

Advanced analysis with Python scientific libraries

+ + +
+
+

Large scale computational experiments made easy

+ +
+
+ +
+ +
+ + + + + +
+ +
+ + +""".format(port)) diff --git a/MolnsLib/molns_provider.py b/MolnsLib/molns_provider.py index 587c3eb..fbee62f 100644 --- a/MolnsLib/molns_provider.py +++ b/MolnsLib/molns_provider.py @@ -1,16 +1,21 @@ import os import collections +from ssh import SSH + + class ProviderException(Exception): pass -class ProviderBase(): + +class ProviderBase: """ Abstract class. """ STATUS_RUNNING = 'running' STATUS_STOPPED = 'stopped' STATUS_TERMINATED = 'terminated' - SecurityGroupRule = collections.namedtuple("SecurityGroupRule", ["ip_protocol", "from_port", "to_port", "cidr_ip", "src_group_name"]) + SecurityGroupRule = collections.namedtuple("SecurityGroupRule", ["ip_protocol", "from_port", "to_port", "cidr_ip", + "src_group_name"]) FIREWALL_RULES = [ SecurityGroupRule("tcp", "22", "22", "0.0.0.0/0", None), @@ -22,7 +27,7 @@ class ProviderBase(): SecurityGroupRule("tcp", "9000", "65535", "0.0.0.0/0", None), ] - def __init__(self, name, config=None, config_dir=None,**kwargs): + def __init__(self, name, config=None, config_dir=None, **kwargs): self.config = {} self.name = name self.type = self.PROVIDER_TYPE @@ -35,6 +40,7 @@ def __init__(self, name, config=None, config_dir=None,**kwargs): self.config[k] = v for k,v in kwargs.iteritems(): self.__dict__[k] = v + self.ssh = SSH() def __getitem__(self, key): if key not in self.CONFIG_VARS.keys(): diff --git a/MolnsLib/ssh.py b/MolnsLib/ssh.py new file mode 100644 index 0000000..d4f7d0a --- /dev/null +++ b/MolnsLib/ssh.py @@ -0,0 +1,71 @@ +import paramiko +import time + + +class SSHException(Exception): + pass + + +class SSH: + def __init__(self): + self.ssh = paramiko.SSHClient() + self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + def exec_command(self, command, verbose=True): + try: + stdout_data = [] + stderr_data = [] + session = self.ssh.get_transport().open_session() + session.exec_command(command) + nbytes = 4096 + # TODO add a timeout here, don't wait for commands forever. + while True: + if session.recv_ready(): + msg = session.recv(nbytes) + stdout_data.append(msg) + if session.recv_stderr_ready(): + msg = session.recv_stderr(nbytes) + stderr_data.append(msg) + if session.exit_status_ready(): + break + time.sleep(0.1) # Sleep briefly to prevent over-polling + + status = session.recv_exit_status() + str_return = ''.join(stdout_data).splitlines() + stderr_str = ''.join(stderr_data) + session.close() + if status != 0: + raise paramiko.SSHException( + "Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, "\n".join(str_return), stderr_str)) + if verbose: + print "EXECUTING...\t{0}".format(command) + return str_return + except paramiko.SSHException as e: + if verbose: + print "FAILED......\t{0}\t{1}".format(command, e) + raise SSHException("{0}\t{1}".format(command, e)) + + def exec_multi_command(self, command, next_command): + try: + stdin, stdout, stderr = self.ssh.exec_command(command) + stdin.write(next_command) + stdin.flush() + status = stdout.channel.recv_exit_status() + if status != 0: + raise paramiko.SSHException("Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, stdout.read(), + stderr.read())) + except paramiko.SSHException as e: + print "FAILED......\t{0}\t{1}".format(command, e) + raise e + + def open_sftp(self): + return self.ssh.open_sftp() + + def connect(self, instance, port, username=None, key_filename=None): + return self.ssh.connect(instance.ip_address, port, username, key_filename=key_filename) + + def connect_cluster_node(self, ip_address, port, username, key_filename): + return self.ssh.connect(ip_address, port, username, key_filename=key_filename) + + def close(self): + self.ssh.close() \ No newline at end of file diff --git a/MolnsLib/ssh_deploy.py b/MolnsLib/ssh_deploy.py index 0574840..2167192 100644 --- a/MolnsLib/ssh_deploy.py +++ b/MolnsLib/ssh_deploy.py @@ -1,8 +1,7 @@ - import json import logging import os -import paramiko +import Utils import string import sys import time @@ -10,9 +9,17 @@ import webbrowser import urllib2 +from constants import Constants + +from DockerProxy import DockerProxy +from ssh import SSH +from DockerSSH import DockerSSH + + class SSHDeployException(Exception): pass + class SSHDeploy: ''' This class is used for deploy IPython @@ -20,19 +27,16 @@ class SSHDeploy: DEFAULT_STOCHSS_PORT = 1443 DEFAULT_INTERNAL_STOCHSS_PORT = 8080 DEFAULT_GAE_ADMIN_PORT = 8000 - DEFAULT_PRIVATE_NOTEBOOK_PORT = 8081 - DEFAULT_PUBLIC_NOTEBOOK_PORT = 443 - DEFAULT_PRIVATE_WEBSERVER_PORT = 8001 - DEFAULT_PUBLIC_WEBSERVER_PORT = 80 SSH_CONNECT_WAITTIME = 5 MAX_NUMBER_SSH_CONNECT_ATTEMPTS = 25 DEFAULT_SSH_PORT = 22 DEFAULT_IPCONTROLLER_PORT = 9000 - DEFAULT_PYURDME_TEMPDIR="/mnt/pyurdme_tmp" - + DEFAULT_PYURDME_TEMPDIR = "/mnt/pyurdme_tmp" + + REMOTE_EXEC_JOB_PATH = "/mnt/molnsexec" - def __init__(self, config=None, config_dir=None): + def __init__(self, ssh, config=None, config_dir=None): if config is None: raise SSHDeployException("No config given") self.config = config @@ -40,23 +44,25 @@ def __init__(self, config=None, config_dir=None): if config_dir is None: self.config_dir = os.path.join(os.path.dirname(__file__), '/../.molns/') self.username = config['login_username'] - self.endpoint = self.DEFAULT_PRIVATE_NOTEBOOK_PORT + self.endpoint = Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT self.ssh_endpoint = self.DEFAULT_SSH_PORT self.keyfile = config.sshkeyfilename() - self.ssh = paramiko.SSHClient() - self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + if not (isinstance(ssh, SSH) or isinstance(ssh, DockerSSH)): + raise SSHDeployException("SSH object invalid.") + self.ssh = ssh + self.provider_name = config.name self.profile = 'default' - self.profile_dir = "/home/%s/.ipython/profile_default/" %(self.username) - self.ipengine_env = 'export INSTANT_OS_CALL_METHOD=SUBPROCESS;export PYURDME_TMPDIR={0};'.format(self.DEFAULT_PYURDME_TEMPDIR) + self.profile_dir = "/home/%s/.ipython/profile_default/" % (self.username) + self.ipengine_env = 'export INSTANT_OS_CALL_METHOD=SUBPROCESS;export PYURDME_TMPDIR={0};'.format( + self.DEFAULT_PYURDME_TEMPDIR) self.profile_dir_server = self.profile_dir self.profile_dir_client = self.profile_dir self.ipython_port = self.DEFAULT_IPCONTROLLER_PORT - - def scp_command(self, hostname): + def scp_command(self, hostname): return "scp -o 'StrictHostKeyChecking no' \ %s@%s:%ssecurity/ipcontroller-engine.json %ssecurity/" \ - %(self.username, hostname, self.profile_dir_server, self.profile_dir_client) + % (self.username, hostname, self.profile_dir_server, self.profile_dir_client) def prompt_for_password(self): import getpass @@ -72,12 +78,12 @@ def prompt_for_password(self): print "Passwords do not match, try again." def create_ssl_cert(self, cert_directory, cert_name_prefix, hostname): - self.exec_command("mkdir -p '{0}'".format(cert_directory)) + self.ssh.exec_command("mkdir -p '{0}'".format(cert_directory)) user_cert = cert_directory + '{0}-user_cert.pem'.format(cert_name_prefix) ssl_key = cert_directory + '{0}-ssl_key.pem'.format(cert_name_prefix) ssl_cert = cert_directory + '{0}-ssl_cert.pem'.format(cert_name_prefix) - ssl_subj = "/C=CN/ST=SH/L=STAR/O=Dis/CN=%s" % hostname - self.exec_command( + ssl_subj = "/C=CN/ST=SH/L=STAR/O=Dis/CN=%s" % hostname + self.ssh.exec_command( "openssl req -new -newkey rsa:4096 -days 365 " '-nodes -x509 -subj %s -keyout %s -out %s' % (ssl_subj, ssl_key, ssl_cert)) @@ -94,67 +100,72 @@ def create_ipython_config(self, hostname, notebook_password=None): else: passwd = notebook_password try: - sha1pass_out = self.exec_command(sha1cmd % passwd , verbose=False) - sha1pass = sha1pass_out[0].strip() + sha1pass_out = self.ssh.exec_command(sha1cmd % passwd, verbose=False) + if isinstance(sha1pass_out, list): + sha1pass = sha1pass_out[0].strip() + else: + sha1pass = sha1pass_out.strip() + Utils.Log.write_log("SHA1PASS_OUT: " + sha1pass_out) + Utils.Log.write_log("SHA1PASS: " + sha1pass) except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, hostname, self.ssh_endpoint) raise e - sftp = self.ssh.open_sftp() notebook_config_file = sftp.file(remote_file_name, 'w+') - notebook_config_file.write('\n'.join([ - "c = get_config()", - "c.IPKernelApp.pylab = 'inline'", - "c.NotebookApp.certfile = u'%s'" % ssl_cert, - "c.NotebookApp.keyfile = u'%s'" % ssl_key, - "c.NotebookApp.ip = '*'", - "c.NotebookApp.open_browser = False", - "c.NotebookApp.password = u'%s'" % sha1pass, - "c.NotebookApp.port = %d" % int(notebook_port), - #"c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()', 'import logging','logging.getLogger(\'UFL\').setLevel(logging.ERROR)','logging.getLogger(\'FFC\').setLevel(logging.ERROR)']", - ])) + notebook_config_file.write('\n'.join([ + "c = get_config()", + "c.IPKernelApp.pylab = 'inline'", + "c.NotebookApp.certfile = u'%s'" % ssl_cert, + "c.NotebookApp.keyfile = u'%s'" % ssl_key, + "c.NotebookApp.ip = '*'", + "c.NotebookApp.open_browser = False", + "c.NotebookApp.password = u'%s'" % sha1pass, + "c.NotebookApp.port = %d" % int(notebook_port), + # "c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()', 'import logging','logging.getLogger(\'UFL\').setLevel(logging.ERROR)','logging.getLogger(\'FFC\').setLevel(logging.ERROR)']", + ])) notebook_config_file.close() - - remote_file_name='%sipcontroller_config.py' % self.profile_dir_server + + remote_file_name = '%sipcontroller_config.py' % self.profile_dir_server notebook_config_file = sftp.file(remote_file_name, 'w+') notebook_config_file.write('\n'.join([ "c = get_config()", "c.IPControllerApp.log_level=20", "c.HeartMonitor.period=10000", "c.HeartMonitor.max_heartmonitor_misses=10", + "c.HubFactory.db_class = \"SQLiteDB\"", ])) notebook_config_file.close() - # IPython startup code - remote_file_name='{0}startup/molns_dill_startup.py'.format(self.profile_dir_server) - dill_init_file = sftp.file(remote_file_name, 'w+') - dill_init_file.write('\n'.join([ - 'import dill', - 'from IPython.utils import pickleutil', - 'pickleutil.use_dill()', - 'import logging', - "logging.getLogger('UFL').setLevel(logging.ERROR)", - "logging.getLogger('FFC').setLevel(logging.ERROR)" - "import cloud", - "logging.getLogger('Cloud').setLevel(logging.ERROR)" - ])) - dill_init_file.close() +# # IPython startup code +# remote_file_name='{0}startup/molns_dill_startup.py'.format(self.profile_dir_server) +# dill_init_file = sftp.file(remote_file_name, 'w+') +# dill_init_file.write('\n'.join([ +# 'import dill', +# 'from IPython.utils import pickleutil', +# 'pickleutil.use_dill()', +# 'import logging', +# "logging.getLogger('UFL').setLevel(logging.ERROR)", +# "logging.getLogger('FFC').setLevel(logging.ERROR)" +# "import cloud", +# "logging.getLogger('Cloud').setLevel(logging.ERROR)" +# ])) +# dill_init_file.close() sftp.close() def create_s3_config(self): sftp = self.ssh.open_sftp() - remote_file_name='.molns/s3.json' + remote_file_name = '.molns/s3.json' s3_config_file = sftp.file(remote_file_name, 'w') config = {} config["provider_type"] = self.config.type - config["bucket_name"] = "molns_storage_{0}".format(self.get_cluster_id()) + config["bucket_name"] = "molns_storage_{1}_{0}".format(self.get_cluster_id(), self.provider_name) config["credentials"] = self.config.get_config_credentials() s3_config_file.write(json.dumps(config)) s3_config_file.close() sftp.close() def get_cluster_id(self): - """ retreive the cluster id from the config. """ + """ Retrieve the cluster id from the config. """ filename = os.path.join(self.config_dir, 'cluster_id') if not os.path.isfile(filename): new_id = str(uuid.uuid4()) @@ -163,22 +174,22 @@ def get_cluster_id(self): wfd.write(new_id) with open(filename) as fd: idstr = fd.readline().rstrip() - logging.debug("get_cluster_id() file {0} found id = {1}".format(filename,idstr)) + logging.debug("get_cluster_id() file {0} found id = {1}".format(filename, idstr)) if idstr is None or len(idstr) == 0: - raise SSHDeployException("error getting id for cluster from file, please check your file '{0}'".format(filename)) + raise SSHDeployException( + "error getting id for cluster from file, please check your file '{0}'".format(filename)) return idstr - def create_engine_config(self): sftp = self.ssh.open_sftp() - remote_file_name='%sipengine_config.py' % self.profile_dir_server + remote_file_name = '%sipengine_config.py' % self.profile_dir_server notebook_config_file = sftp.file(remote_file_name, 'w+') notebook_config_file.write('\n'.join([ - "c = get_config()", - "c.IPEngineApp.log_level=20", - "c.IPEngineApp.log_to_file = True", - "c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()']", - ])) + "c = get_config()", + "c.IPEngineApp.log_level=20", + "c.IPEngineApp.log_to_file = True", + "c.Global.exec_lines = ['import dill', 'from IPython.utils import pickleutil', 'pickleutil.use_dill()']", + ])) notebook_config_file.close() sftp.close() self.create_s3_config() @@ -191,7 +202,7 @@ def _get_ipython_client_file(self): engine_file.close() sftp.close() return file_data - + def _put_ipython_client_file(self, file_data): sftp = self.ssh.open_sftp() engine_file = sftp.file(self.profile_dir_server + 'security/ipcontroller-client.json', 'w+') @@ -207,7 +218,7 @@ def _get_ipython_engine_file(self): engine_file.close() sftp.close() return file_data - + def _put_ipython_engine_file(self, file_data): sftp = self.ssh.open_sftp() engine_file = sftp.file(self.profile_dir_server + 'security/ipcontroller-engine.json', 'w+') @@ -217,90 +228,71 @@ def _put_ipython_engine_file(self, file_data): def exec_command_list_switch(self, command_list): for command in command_list: - self.exec_command(command) + self.ssh.exec_command(command) - def exec_command(self, command, verbose=True): - try: - stdout_data = [] - stderr_data = [] - session = self.ssh.get_transport().open_session() - session.exec_command(command) - nbytes = 4096 - #TODO add a timeout here, don't wait for commands forever. - while True: - if session.recv_ready(): - msg = session.recv(nbytes) - stdout_data.append(msg) - if session.recv_stderr_ready(): - msg = session.recv_stderr(nbytes) - stderr_data.append(msg) - if session.exit_status_ready(): - break - time.sleep(0.1) # Sleep breifly to prevent over-polling - - status = session.recv_exit_status() - str_return = ''.join(stdout_data).splitlines() - stderr_str = ''.join(stderr_data) - session.close() - if status != 0: - raise paramiko.SSHException("Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, "\n".join(str_return), stderr_str)) - if verbose: - print "EXECUTING...\t{0}".format(command) - return str_return - except paramiko.SSHException as e: - if verbose: - print "FAILED......\t{0}\t{1}".format(command,e) - raise SSHDeployException("{0}\t{1}".format(command,e)) - - def exec_multi_command(self, command, next_command): - try: - stdin, stdout, stderr = self.ssh.exec_command(command) - stdin.write(next_command) - stdin.flush() - status = stdout.channel.recv_exit_status() - if status != 0: - raise paramiko.SSHException("Exit Code: {0}\tSTDOUT: {1}\tSTDERR: {2}\n\n".format(status, stdout.read(), stderr.read())) - except paramiko.SSHException as e: - print "FAILED......\t{0}\t{1}".format(command,e) - raise e - - def connect(self, hostname, port): - print "Connecting to {0}:{1} keyfile={2}".format(hostname,port,self.keyfile) + def connect(self, instance, port=None): + if port is None: + port = self.ssh_endpoint + print "Connecting to {0}:{1} keyfile={2}".format(instance.ip_address, port, self.keyfile) for i in range(self.MAX_NUMBER_SSH_CONNECT_ATTEMPTS): try: - self.ssh.connect(hostname, port, username=self.username, - key_filename=self.keyfile) - print "SSH connection established" + self.ssh.connect(instance, self.ssh_endpoint, username=self.username, + key_filename=self.keyfile) + if not isinstance(self.ssh, DockerSSH): + print "SSH connection established" + else: + print "Ready to execute commands in local container." return except Exception as e: - print "Retry in {0} seconds...\t\t{1}".format(self.SSH_CONNECT_WAITTIME,e) + print "Retry in {0} seconds...\t\t{1}".format(self.SSH_CONNECT_WAITTIME, e) time.sleep(self.SSH_CONNECT_WAITTIME) - raise SSHDeployException("ssh connect Failed!!!\t{0}:{1}".format(hostname,self.ssh_endpoint)) + raise SSHDeployException("ssh connect Failed!!!\t{0}:{1}".format(instance.ip_address, self.ssh_endpoint)) + + def deploy_molns_webserver(self, instance, controller_obj, openWebBrowser=True): + ip_address = instance.ip_address + logging.debug('deploy_molns_webserver(): openWebBrowser={0}, controller_obj.provider.type={1}\n', + openWebBrowser, controller_obj.provider.type) + + if controller_obj.provider.type == Constants.DockerProvider: + ip_address = "0.0.0.0:{0}".format(controller_obj.config["web_server_port"]) + logging.debug('deploy_molns_webserver(): ip_address={0}\n',ip_address) - def deploy_molns_webserver(self, ip_address): try: - self.connect(ip_address, self.ssh_endpoint) - self.exec_command("sudo rm -rf /usr/local/molns_webroot") - self.exec_command("sudo mkdir -p /usr/local/molns_webroot") - self.exec_command("sudo chown ubuntu /usr/local/molns_webroot") - self.exec_command("git clone https://github.com/Molns/MOLNS_web_landing_page.git /usr/local/molns_webroot") - self.exec_multi_command("cd /usr/local/molns_webroot; python -m SimpleHTTPServer {0} > ~/.molns_webserver.log 2>&1 &".format(self.DEFAULT_PRIVATE_WEBSERVER_PORT), '\n') - self.exec_command("sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format(self.DEFAULT_PUBLIC_WEBSERVER_PORT,self.DEFAULT_PRIVATE_WEBSERVER_PORT)) + self.connect(instance, self.ssh_endpoint) + self.ssh.exec_command("sudo rm -rf /usr/local/molns_webroot") + self.ssh.exec_command("sudo mkdir -p /usr/local/molns_webroot") + self.ssh.exec_command("sudo chown ubuntu /usr/local/molns_webroot") + self.ssh.exec_command( + "git clone https://github.com/Molns/MOLNS_web_landing_page.git /usr/local/molns_webroot") + + # If DockerProvider, replace index page. + if controller_obj.provider.type == Constants.DockerProvider: + from molns_landing_page import MolnsLandingPage + index_page = MolnsLandingPage(controller_obj.config["notebook_port"]).molns_landing_page + self.ssh.exec_command("echo {0} > /usr/local/molns_webroot/index.html".format(index_page)) + + self.ssh.exec_multi_command( + "cd /usr/local/molns_webroot; python -m SimpleHTTPServer {0} > ~/.molns_webserver.log 2>&1 &".format( + Constants.DEFAULT_PRIVATE_WEBSERVER_PORT), '\n') + self.ssh.exec_command( + "sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format( + Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, Constants.DEFAULT_PRIVATE_WEBSERVER_PORT)) self.ssh.close() print "Deploying MOLNs webserver" url = "http://{0}/".format(ip_address) - while True: - try: - req = urllib2.urlopen(url) - sys.stdout.write("\n") - sys.stdout.flush() - break - except Exception as e: - #sys.stdout.write("{0}".format(e)) - sys.stdout.write(".") - sys.stdout.flush() - time.sleep(1) - webbrowser.open(url) + if openWebBrowser: + while True: + try: + req = urllib2.urlopen(url) + sys.stdout.write("\n") + sys.stdout.flush() + break + except Exception as e: + #sys.stdout.write("{0}".format(e)) + sys.stdout.write(".") + sys.stdout.flush() + time.sleep(1) + webbrowser.open(url) except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] @@ -308,11 +300,139 @@ def deploy_molns_webserver(self, ip_address): def get_number_processors(self): cmd = 'python -c "import multiprocessing;print multiprocessing.cpu_count()"' try: - output = self.exec_command(cmd)[0].strip() + output = self.ssh.exec_command(cmd)[0].strip() return int(output) except Exception as e: raise SSHDeployException("Could not determine the number of processors on the remote system: {0}".format(e)) + def deploy_remote_execution_job(self, ip_address, jobID, exec_str): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + EXEC_HELPER_FILENAME = 'molns_exec_helper.py' + try: + self.connect(ip_address, self.ssh_endpoint) + # parse command, retreive files to upload (iff they are in the local directory) + # create remote direct=ory + self.ssh.exec_command("sudo mkdir -p {0}".format(base_path)) + self.ssh.exec_command("sudo chown ubuntu {0}".format(base_path)) + self.ssh.exec_command("mkdir -p {0}/.molns/".format(base_path)) + sftp = self.ssh.open_sftp() + # Parse exec_str to get job files + files_to_transfer = [] + remote_command_list = [] + for c in exec_str.split(): + c2 = c + if c.startswith('~'): + c2 = os.path.expanduser(c) + if os.path.isfile(c2): + files_to_transfer.append(c2) + remote_command_list.append(os.path.basename(c2)) + else: + remote_command_list.append(c) + # Transfer job files + for f in files_to_transfer: + logging.debug('Uploading file {0}'.format(f)) + sftp.put(f, "{0}/{1}".format(base_path, os.path.basename(f))) + # Transfer helper file (to .molns subdirectory) + logging.debug('Uploading file {0}'.format(EXEC_HELPER_FILENAME)) + sftp.put( + os.path.join(os.path.dirname(os.path.abspath(__file__)),EXEC_HELPER_FILENAME), + "{0}/.molns/{1}".format(base_path,EXEC_HELPER_FILENAME) + ) + # Write 'cmd' file + remote_command = " ".join(remote_command_list) + logging.debug("Writing remote_command = {0}".format(remote_command)) + cmd_file = sftp.file("{0}/.molns/{1}".format(base_path,'cmd'), 'w') + cmd_file.write(remote_command) + cmd_file.close() + # execute command + logging.debug("Executing command") + self.ssh.exec_command("cd {0};python {0}/.molns/{1} &".format(base_path, EXEC_HELPER_FILENAME)) + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_job_status(self, ip_address, jobID): + ''' Check the status of a remote process. + + Returns: Tuple with two elements: (Is_Running, Message) + Is_Running: bool True if the process is running + Message: str Description of the status + ''' + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + sftp = self.ssh.open_sftp() + # Does the 'pid' file exists remotely? + try: + sftp.stat("{0}/.molns/pid".format(base_path)) + except (IOError, OSError) as e: + self.ssh.close() + raise SSHDeployException("Remote process not started (pid file not found") + # Does the 'return_value' file exist? + try: + sftp.stat("{0}/.molns/return_value".format(base_path)) + # Process is complete + return (False, "Remote process finished") + except (IOError, OSError) as e: + pass + # is the process running? + try: + self.ssh.exec_command("kill -0 `cat {0}/.molns/pid` > /dev/null 2&>1".format(base_path)) + return (True, "Remote process running") + except SSHDeployException as e: + raise SSHDeployException("Remote process not running (process not found)") + finally: + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_get_job_logs(self, ip_address, jobID, seek): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + sftp = self.ssh.open_sftp() + log = sftp.file("{0}/.molns/stdout".format(base_path), 'r') + log.seek(seek) + output = log.read() + self.ssh.close() + return output + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_delete_job(self, ip_address, jobID): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + ### If process is still running, terminate it + try: + self.ssh.exec_command("kill -TERM `cat {0}/.molns/pid` > /dev/null 2&>1".format(base_path)) + except Exception as e: + pass + ### Remove the filess on the remote server + self.ssh.exec_command("rm -rf {0}/* {0}/.molns*".format(base_path)) + self.ssh.exec_command("sudo rmdir {0}".format(base_path)) + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + def remote_execution_fetch_file(self, ip_address, jobID, filename, localfilename): + base_path = "{0}/{1}".format(self.REMOTE_EXEC_JOB_PATH,jobID) + try: + self.connect(ip_address, self.ssh_endpoint) + sftp = self.ssh.open_sftp() + sftp.get("{0}/{1}".format(base_path, filename), localfilename) + self.ssh.close() + except Exception as e: + print "Remote execution failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) + raise sys.exc_info()[1], None, sys.exc_info()[2] + + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% def deploy_stochss(self, ip_address, port=1443): try: print "{0}:{1}".format(ip_address, self.ssh_endpoint) @@ -320,7 +440,8 @@ def deploy_stochss(self, ip_address, port=1443): print "Configure Nginx" (ssl_key, ssl_cert) = self.create_ssl_cert('/home/ubuntu/.nginx_cert/', 'stochss', ip_address) sftp = self.ssh.open_sftp() - with open(os.path.dirname(os.path.abspath(__file__))+os.sep+'..'+os.sep+'templates'+os.sep+'nginx.conf') as fd: + with open(os.path.dirname( + os.path.abspath(__file__)) + os.sep + '..' + os.sep + 'templates' + os.sep + 'nginx.conf') as fd: web_file = sftp.file("/tmp/nginx.conf", 'w+') buff = fd.read() buff = string.replace(buff, '###LISTEN_PORT###', str(port)) @@ -329,16 +450,16 @@ def deploy_stochss(self, ip_address, port=1443): print buff web_file.write(buff) web_file.close() - self.exec_command("sudo chown root /tmp/nginx.conf") - self.exec_command("sudo mv /tmp/nginx.conf /etc/nginx/nginx.conf") + self.ssh.exec_command("sudo chown root /tmp/nginx.conf") + self.ssh.exec_command("sudo mv /tmp/nginx.conf /etc/nginx/nginx.conf") print "Starting Nginx" - self.exec_command("sudo nginx") + self.ssh.exec_command("sudo nginx") print "Modifying StochSS to not open a webbrowser (TODO: move to install)" - self.exec_command("sed -i 's/webbrowser.open_new(stochss_url)/pass/' /usr/local/stochss/run.ubuntu.sh") + self.ssh.exec_command("sed -i 's/webbrowser.open_new(stochss_url)/pass/' /usr/local/stochss/run.ubuntu.sh") print "Starting StochSS" - self.exec_command("cd /usr/local/stochss/ && screen -d -m ./run.ubuntu.sh") + self.ssh.exec_command("cd /usr/local/stochss/ && screen -d -m ./run.ubuntu.sh") print "Waiting for StochSS to become available:" stochss_url = "https://{0}/".format(ip_address) while True: @@ -346,15 +467,16 @@ def deploy_stochss(self, ip_address, port=1443): req = urllib2.urlopen(stochss_url) break except Exception as e: - #sys.stdout.write("{0}".format(e)) + # sys.stdout.write("{0}".format(e)) sys.stdout.write(".") sys.stdout.flush() time.sleep(1) print "Success!" print "Configuring StochSS" admin_token = uuid.uuid4() - create_and_exchange_admin_token = "python /usr/local/stochss/generate_admin_token.py {0}".format(admin_token) - self.exec_command(create_and_exchange_admin_token) + create_and_exchange_admin_token = "python /usr/local/stochss/generate_admin_token.py {0}".format( + admin_token) + self.ssh.exec_command(create_and_exchange_admin_token) time.sleep(1) stochss_url = "{0}login?secret_key={1}".format(stochss_url, admin_token) print "StochSS available: {0}".format(stochss_url) @@ -363,54 +485,125 @@ def deploy_stochss(self, ip_address, port=1443): print "StochSS launch failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - def deploy_ipython_controller(self, ip_address, notebook_password=None): - controller_hostname = '' - engine_file_data = '' + def __transfer_cluster_ssh_key_file(self, remote_target_dir, controller_obj): + local_ssh_key_file_path = controller_obj.config["ssh_key_file"] + + if local_ssh_key_file_path is None: + print "No SSH key file provided for cluster access." + return + + if not os.access(local_ssh_key_file_path, os.R_OK): + print "No read access to SSH key file. Skipping transfer." + return + + # Transfer secret key file. + sftp = self.ssh.open_sftp() + remote_file_abs_path = os.path.join(remote_target_dir, Constants.ClusterKeyFileNameOnController) + remote_ssh_key_file = sftp.file(remote_file_abs_path, 'w') + + with open(local_ssh_key_file_path, "r") as local_ssh_key_file: + remote_ssh_key_file.write(local_ssh_key_file.read()) + + remote_ssh_key_file.close() + sftp.close() + + # Only user ubuntu has permission to access file. + self.ssh.exec_command("sudo chown ubuntu:ubuntu {0}".format(remote_file_abs_path)) + self.ssh.exec_command("sudo chmod 400 {0}".format(remote_file_abs_path)) + + def deploy_ipython_controller(self, instance, controller_obj, notebook_password=None, reserved_cpus=2, + resume=False): + ip_address = instance.ip_address + + logging.debug('deploy_ipython_controller(ip_address={0}, reserved_cpus={1})'.format(ip_address, reserved_cpus)) try: print "{0}:{1}".format(ip_address, self.ssh_endpoint) - self.connect(ip_address, self.ssh_endpoint) - + self.connect(instance, self.ssh_endpoint) + # Set up the symlink to local scratch space - self.exec_command("sudo mkdir -p /mnt/molnsarea") - self.exec_command("sudo chown ubuntu /mnt/molnsarea") - self.exec_command("sudo mkdir -p /mnt/molnsarea/cache") - self.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea") + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea/cache") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") + + self.ssh.exec_command( + "test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) - self.exec_command("test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) - # Setup symlink to the shared scratch space - self.exec_command("sudo mkdir -p /mnt/molnsshared") - self.exec_command("sudo chown ubuntu /mnt/molnsshared") - self.exec_command("test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsshared {0}".format('/home/ubuntu/shared')) + self.ssh.exec_command("sudo mkdir -p /mnt/molnsshared") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsshared") + self.ssh.exec_command( + "test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsshared {0}".format('/home/ubuntu/shared')) # - self.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) - self.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) # - #self.exec_command("cd /usr/local/molnsutil && git pull && sudo python setup.py install") - self.exec_command("mkdir -p .molns") - self.create_s3_config() - - self.exec_command("ipython profile create {0}".format(self.profile)) - self.create_ipython_config(ip_address, notebook_password) - self.create_engine_config() - self.exec_command("source /usr/local/pyurdme/pyurdme_init; screen -d -m ipcontroller --profile={1} --ip='*' --location={0} --port={2} --log-to-file".format(ip_address, self.profile, self.ipython_port), '\n') - # Start one ipengine per processor - - import time - time.sleep(10) - - num_procs = self.get_number_processors() - num_engines = num_procs - 2 - for _ in range(num_engines): - self.exec_command("{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipengine --profile={0} --debug".format(self.profile, self.ipengine_env)) - self.exec_command("{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipython notebook --profile={0}".format(self.profile, self.ipengine_env)) - self.exec_command("sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format(self.DEFAULT_PUBLIC_NOTEBOOK_PORT,self.DEFAULT_PRIVATE_NOTEBOOK_PORT)) - self.ssh.close() + # self.exec_command("cd /usr/local/molns_util && git pull && sudo python setup.py install") + + home_dir = "" + if controller_obj.provider.type == Constants.DockerProvider: + home_dir = "/home/ubuntu/" + + # If its not a DockerController being resumed, then create config files and move sample notebooks to volume. + if not (controller_obj.provider.type == Constants.DockerProvider and resume is True): + self.ssh.exec_command("mkdir -p {0}.molns".format(home_dir)) + self.create_s3_config() + self.ssh.exec_command("ipython profile create {0}".format(self.profile)) + self.create_ipython_config(ip_address, notebook_password) + self.create_engine_config() + self.__transfer_cluster_ssh_key_file(remote_target_dir=home_dir, controller_obj=controller_obj) + if controller_obj.provider.type == Constants.DockerProvider: + self.ssh.exec_command("mv {0}*.ipynb {1}".format(home_dir, + DockerProxy.get_container_volume_from_working_dir( + controller_obj.config["working_directory"]))) + + # If provider is Docker, then ipython controller and ipengines aren't started + + if controller_obj.provider.type != Constants.DockerProvider: + self.ssh.exec_command( + "source /usr/local/pyurdme/pyurdme_init; screen -d -m ipcontroller --profile={1} --ip='*' --location={0} " + "--port={2}--log-to-file".format( + ip_address, self.profile, self.ipython_port), '\n') + # Give the controller time to startup + import time + logging.debug('Waiting 5 seconds for the IPython controller to start.') + time.sleep(5) + + # Start one ipengine per processor + num_procs = self.get_number_processors() + num_engines = num_procs - reserved_cpus + logging.debug( + 'Starting {0} engines (#cpu={1}, reserved_cpus={2})'.format(num_engines, num_procs, reserved_cpus)) + + for _ in range(num_engines): + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/; screen -d -m ipengine --profile={0} --debug".format( + self.profile, self.ipengine_env)) + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipython notebook --profile={0}".format( + self.profile, self.ipengine_env)) + else: + self.ssh.exec_command( + "sudo pip install /usr/local/pyurdme/; screen -d -m ipython notebook --profile={0}".format( + self.profile)) + + # TODO remove next three commands after testing. Put them in the image instead. + # self.ssh.exec_command("git clone https://github.com/aviral26/cluster_execution.git") + # self.ssh.exec_command("git clone https://github.com/aviral26/molns.git") + # self.ssh.exec_command("cd /usr/local/molnsutil; git checkout qsub_support; git pull") + + self.ssh.exec_command( + "sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport {0} -j REDIRECT --to-port {1}".format( + Constants.DEFAULT_PUBLIC_NOTEBOOK_PORT, Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT)) except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - url = "http://%s" %(ip_address) - print "\nThe URL for your MOLNs cluster is: %s." % url + + finally: + self.ssh.close() + + url = "https://%s" % (ip_address) + print "\nThe URL for your MOLNs head node is: %s." % url def get_ipython_engine_file(self, ip_address): try: @@ -434,30 +627,28 @@ def get_ipython_client_file(self, ip_address): print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - def deploy_ipython_engine(self, ip_address, controler_ip, engine_file_data, controller_ssh_keyfile): try: print "{0}:{1}".format(ip_address, self.ssh_endpoint) self.connect(ip_address, self.ssh_endpoint) - - # Setup the symlink to local scratch space - self.exec_command("sudo mkdir -p /mnt/molnsarea") - self.exec_command("sudo chown ubuntu /mnt/molnsarea") - self.exec_command("sudo mkdir -p /mnt/molnsarea/cache") - self.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") + # Setup the symlink to local scratch space + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea") + self.ssh.exec_command("sudo mkdir -p /mnt/molnsarea/cache") + self.ssh.exec_command("sudo chown ubuntu /mnt/molnsarea/cache") - self.exec_command("test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) + self.ssh.exec_command( + "test -e {0} && sudo rm {0} ; sudo ln -s /mnt/molnsarea {0}".format('/home/ubuntu/localarea')) # - self.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) - self.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo mkdir -p {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) + self.ssh.exec_command("sudo chown ubuntu {0}".format(self.DEFAULT_PYURDME_TEMPDIR)) # Setup config for object store - self.exec_command("mkdir -p .molns") + self.ssh.exec_command("mkdir -p .molns") self.create_s3_config() - - + # SSH mount the controller on each engine - remote_file_name='.ssh/id_dsa' + remote_file_name='/home/ubuntu/.ssh/controller_ssh_key' with open(controller_ssh_keyfile) as fd: sftp = self.ssh.open_sftp() controller_keyfile = sftp.file(remote_file_name, 'w') @@ -468,27 +659,25 @@ def deploy_ipython_engine(self, ip_address, controler_ip, engine_file_data, cont print "Remote file {0} has {1} bytes".format(remote_file_name, sftp.stat(remote_file_name).st_size) sftp.close() self.exec_command("chmod 0600 {0}".format(remote_file_name)) + self.exec_command("sudo rm -rf {0}".format('/home/ubuntu/shared')) self.exec_command("mkdir -p /home/ubuntu/shared") - self.exec_command("sshfs -o Ciphers=arcfour -o Compression=no -o reconnect -o idmap=user -o StrictHostKeyChecking=no ubuntu@{0}:/mnt/molnsshared /home/ubuntu/shared".format(controler_ip)) + self.exec_command("sshfs -o IdentityFile={1} -o Ciphers=arcfour -o Compression=no -o reconnect -o idmap=user -o StrictHostKeyChecking=no ubuntu@{0}:/mnt/molnsshared /home/ubuntu/shared".format(controler_ip,remote_file_name)) - # Update the Molnsutil package: TODO remove when molnsutil is stable - #self.exec_command("cd /usr/local/molnsutil && git pull && sudo python setup.py install") + # Update the Molnsutil package: TODO remove when molns_util is stable + # self.exec_command("cd /usr/local/molns_util && git pull && sudo python setup.py install") - self.exec_command("ipython profile create {0}".format(self.profile)) + self.ssh.exec_command("ipython profile create {0}".format(self.profile)) self.create_engine_config() # Just write the engine_file to the engine self._put_ipython_engine_file(engine_file_data) # Start one ipengine per processor for _ in range(self.get_number_processors()): - self.exec_command("{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipengine --profile={0} --debug".format(self.profile, self.ipengine_env)) + self.ssh.exec_command( + "{1}source /usr/local/pyurdme/pyurdme_init; screen -d -m ipengine --profile={0} --debug".format( + self.profile, self.ipengine_env)) self.ssh.close() except Exception as e: print "Failed: {0}\t{1}:{2}".format(e, ip_address, self.ssh_endpoint) raise sys.exc_info()[1], None, sys.exc_info()[2] - - -if __name__ == "__main__": - sshdeploy = SSHDeploy() - sshdeploy.deploy_ipython_controller() diff --git a/README.md b/README.md index 19b0c63..55a1cab 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # MOLNs spatial stochastic simulation appliance # -MOLNs is a cloud appliance that will set up, start and manage a virtual platform for scalable, distributed computational experiments using PyURDME (www.pyurdme.org). +MOLNs is a cloud appliance that will set up, start and manage a virtual platform for scalable, distributed computational experiments using (spatial) stochastic simulation software such as PyURDME (www.pyurdme.org) and StochKit/Gillespy (www.github.com/Gillespy/gillespy). In addition, MOLNs by default makes FEniCS/Dolfin available as-a Service. + +Since MOLNs will configure and manage a virtual IPython Cluster (with a Notebook frontend), with Numpy, SciPy and Ipython Parallel enabled, it can also be useful for general contextualization and management of dynamic, cloud-agnostic (supports EC2 and OpenStack-based clouds) virtual IPython environments, even if you are not into spatial stochastic simulations in systems biology. + +Note: MOLNs is currently compatible only with 'EC2-Classic', we are working on supporting Amazon VPC. ### Prerequisites ### To use MOLNs, you need valid credentials to an OpenStack cloud, Amazon Elastic Compute Cloud (EC2) or HP Helion public cloud. You also need Python, and the following packages: @@ -71,9 +75,10 @@ To set up a start a MOLNs virtual platform named "molns-test" in a cloud provide $ molns start molns-test $ molns worker start molns-test-workers -You will be presented with a URL for the controller node of your platform. Navigate there using a browser (Google Chrome or Firefox are recommended). The easiest way to get started using the platform is to dive into one of the provided tutorial notebooks that are made available in every fresh MOLNs virtual platform. +You will be presented with a URL for the controller node of your platform. Navigate there using a browser (Google Chrome is strongly recommended, and Safari should be avoided). The easiest way to get started using the platform is to dive into one of the provided tutorial notebooks that are made available in every fresh MOLNs virtual platform. For a complete list of the valid subcommands for molns, type + $ molns help ### Above commands explained ### diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..ff7cc60 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +from molns import * diff --git a/molns.py b/molns.py index 6702a15..766d1d4 100755 --- a/molns.py +++ b/molns.py @@ -1,142 +1,63 @@ #!/usr/bin/env python import os -import re import sys -from MolnsLib.molns_datastore import Datastore, DatastoreException, VALID_PROVIDER_TYPES + +from MolnsLib.Utils import Log +from MolnsLib.molns_datastore import Datastore, DatastoreException, VALID_PROVIDER_TYPES, get_provider_handle from MolnsLib.molns_provider import ProviderException -from collections import OrderedDict import subprocess from MolnsLib.ssh_deploy import SSHDeploy import multiprocessing - +import json import logging -logger = logging.getLogger() -#logger.setLevel(logging.INFO) #for Debugging -logger.setLevel(logging.CRITICAL) -############################################### -class CommandException(Exception): - pass -############################################### -def table_print(column_names, data): - column_width = [0]*len(column_names) - for i,n in enumerate(column_names): - column_width[i] = len(str(n)) - for row in data: - if len(row) != len(column_names): - print "len(row) != len(column_names): {0} vs {1}".format(len(row), len(column_names)) - for i,n in enumerate(row): - if len(str(n)) > column_width[i]: - column_width[i] = len(str(n)) - out = "|".join([ "-"*(column_width[i]+2) for i in range(len(column_names))]) - print '|'+out+'|' - out = " | ".join([ column_names[i].ljust(column_width[i]) for i in range(len(column_names))]) - print '| '+out+' |' - out = "|".join([ "-"*(column_width[i]+2) for i in range(len(column_names))]) - print '|'+out+'|' - for row in data: - out = " | ".join([ str(n).ljust(column_width[i]) for i,n in enumerate(row)]) - print '| '+out+' |' - out = "|".join([ "-"*(column_width[i]+2) for i in range(len(column_names))]) - print '|'+out+'|' +from MolnsLib import constants -def raw_input_default(q, default=None, obfuscate=False): - if default is None or default == '': - return raw_input("{0}:".format(q)) - else: - if obfuscate: - ret = raw_input("{0} [******]: ".format(q)) - else: - ret = raw_input("{0} [{1}]: ".format(q, default)) - if ret == '': - return default - else: - return ret.strip() - -def raw_input_default_config(q, default=None, obj=None): - """ Ask the user and process the response with a default value. """ - if default is None: - if callable(q['default']): - f1 = q['default'] - try: - default = f1(obj) - except TypeError: - pass - else: - default = q['default'] - if 'ask' in q and not q['ask']: - return default - if 'obfuscate' in q and q['obfuscate']: - return raw_input_default(q['q'], default=default, obfuscate=True) - else: - return raw_input_default(q['q'], default=default, obfuscate=False) - -def setup_object(obj): - """ Setup a molns_datastore object using raw_input_default function. """ - for key, conf, value in obj.get_config_vars(): - obj[key] = raw_input_default_config(conf, default=value, obj=obj) +logger = logging.getLogger() +# logger.setLevel(logging.INFO) #for Debugging +logger.setLevel(logging.CRITICAL) -############################################### -class SubCommand(): - def __init__(self, command, subcommands): - self.command = command - self.subcommands = subcommands - def __str__(self): - r = '' - for c in self.subcommands: - r += self.command + " " + c.__str__() + "\n" - return r[:-1] - def __eq__(self, other): - return self.command == other - def run(self, args, config_dir=None): - #print "SubCommand().run({0}, {1})".format(self.command, args) - if len(args) > 0: - cmd = args[0] - for c in self.subcommands: - if c == cmd: - return c.run(args[1:], config_dir=config_dir) - raise CommandException("command not found") +from collections import OrderedDict ############################################### -class Command(): - def __init__(self, command, args_defs={}, description=None, function=None): - self.command = command - self.args_defs = args_defs - if function is None: - raise Exception("Command must have a function") - self.function = function - if description is None: - self.description = function.__doc__.strip() - else: - self.description = description - def __str__(self): - ret = self.command+" " - for k,v in self.args_defs.iteritems(): - if v is None: - ret += "[{0}] ".format(k) - else: - ret += "[{0}={1}] ".format(k,v) - ret += "\n\t"+self.description - return ret - - def __eq__(self, other): - return self.command == other +class MOLNSException(Exception): + pass - def run(self, args, config_dir=None): - config = MOLNSConfig(config_dir=config_dir) - self.function(args, config=config) ############################################### class MOLNSConfig(Datastore): - def __init__(self, config_dir): - Datastore.__init__(self,config_dir=config_dir) - + def __init__(self, config_dir=None, db_file=None): + Datastore.__init__(self, config_dir=config_dir, db_file=db_file) + def __str__(self): return "MOLNSConfig(config_dir={0})".format(self.config_dir) + ############################################### class MOLNSbase(): + @classmethod + def merge_config(cls, obj, config): + for key, conf, value in obj.get_config_vars(): + if key not in config: + if value is not None: + myval = value + else: + if 'default' in conf and conf['default']: + if callable(conf['default']): + f1 = conf['default'] + try: + myval = f1(obj) + except TypeError: + myval = None + else: + myval = conf['default'] + else: + myval = None + obj.config[key] = myval + else: + obj.config[key] = config[key] + @classmethod def _get_workerobj(cls, args, config): # Name @@ -148,9 +69,10 @@ def _get_workerobj(cls, args, config): worker_obj = config.get_object(name=worker_name, kind='WorkerGroup') except DatastoreException: worker_obj = None - #logging.debug("controller_obj {0}".format(controller_obj)) + # logging.debug("controller_obj {0}".format(controller_obj)) if worker_obj is None: - print "worker group '{0}' is not initialized, use 'molns worker setup {0}' to initialize the controller.".format(worker_name) + print "worker group '{0}' is not initialized, use 'molns worker setup {0}' to initialize the controller.".format( + worker_name) else: print "No worker name specified, please specify a name" return worker_obj @@ -161,22 +83,140 @@ def _get_controllerobj(cls, args, config): if len(args) > 0: controller_name = args[0] else: - print "No controller name given" - return None + raise MOLNSException("No controller name given") # Get controller db object try: controller_obj = config.get_object(name=controller_name, kind='Controller') except DatastoreException: controller_obj = None - #logging.debug("controller_obj {0}".format(controller_obj)) + # logging.debug("controller_obj {0}".format(controller_obj)) if controller_obj is None: - print "controller '{0}' is not initialized, use 'molns controller setup {0}' to initialize the controller.".format(controller_name) + raise MOLNSException( + "controller '{0}' is not initialized, use 'molns controller setup {0}' to initialize the controller.".format( + controller_name)) return controller_obj + class MOLNSController(MOLNSbase): + @classmethod + def controller_export(cls, args, config): + """ Export the configuration of a controller. """ + if len(args) < 1: + raise MOLNSException("USAGE: molns controller export name [Filename]\n" \ + "\tExport the data from the controller with the given name.") + controller_name = args[0] + if len(args) > 1: + filename = args[1] + else: + filename = 'Molns-Export-Controller-' + controller_name + '.json' + # check if provider exists + try: + controller_obj = config.get_object(controller_name, kind='Controller') + except DatastoreException as e: + raise MOLNSException("provider not found") + data = {'name': controller_obj.name, + 'provider_name': controller_obj.provider.name, + 'config': controller_obj.config} + return {'data': json.dumps(data), + 'type': 'file', + 'filename': filename} + + @classmethod + def controller_import(cls, args, config, json_data=None): + """ Import the configuration of a controller. """ + if json_data is None: + if len(args) < 1: + raise MOLNSException("USAGE: molns controller import [Filename.json]\n" \ + "\Import the data from the controller with the given name.") + filename = args[0] + with open(filename) as fd: + data = json.load(fd) + else: + data = json_data + controller_name = data['name'] + msg = '' + try: + provider_obj = config.get_object(data['provider_name'], kind='Provider') + except DatastoreException as e: + raise MOLNSException("unknown provider '{0}'".format(data['provider_name'])) + try: + controller_obj = config.get_object(controller_name, kind='Controller') + msg += "Found existing controller\n" + if controller_obj.provider.name != provider_obj.name: + raise MOLNSException( + "Import data has provider '{0}'. Controller {1} exists with provider {2}. provider conversion is not possible.".format( + data['provider_name'], controller_obj.name, controller_obj.provider.name)) + except DatastoreException as e: + controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', + provider_id=provider_obj.id) + msg += "Creating new controller\n" + cls.merge_config(controller_obj, data['config']) + config.save_object(controller_obj, kind='Controller') + msg += "Controller data imported\n" + return {'msg': msg} + + @classmethod + def controller_get_config(cls, name=None, provider_type=None, config=None): + """ Return a list of dict of config var for the controller config. + Each dict in the list has the keys: 'key', 'value', 'type' + + Either 'name' or 'provider_type' must be specified. + If 'name' is specified, then it will retreive the value from that + config and return it in 'value' (or return the string '********' + if that config is obfuscated, such passwords). + + """ + if config is None: + raise MOLNSException("no config specified") + if name is None and provider_type is None: + raise MOLNSException("Controller name or provider type must be specified") + obj = None + if obj is None and name is not None: + try: + obj = config.get_object(name, kind='Controller') + except DatastoreException as e: + pass + if obj is None and provider_type is not None: + if provider_type not in VALID_PROVIDER_TYPES: + raise MOLNSException("Unknown provider type '{0}'".format(provider_type)) + p_hand = get_provider_handle('Controller', provider_type) + obj = p_hand('__tmp__', data={}, config_dir=config.config_dir) + if obj is None: + raise MOLNSException("Controller {0} not found".format(name)) + + ret = [] + for key, conf, value in obj.get_config_vars(): + if 'ask' in conf and not conf['ask']: + continue + question = conf['q'] + if value is not None: + myval = value + else: + if 'default' in conf and conf['default']: + if callable(conf['default']): + f1 = conf['default'] + try: + myval = f1() + except TypeError: + pass + else: + myval = conf['default'] + else: + myval = None + if myval is not None and 'obfuscate' in conf and conf['obfuscate']: + myval = '********' + ret.append({ + 'question': question, + 'key': key, + 'value': myval, + 'type': 'string' + }) + return ret + @classmethod def setup_controller(cls, args, config): - """Setup a controller. Set the provider configuration for the head node. Use 'worker setup' to set the configuration for worker nodes + """Setup a controller. Set the provider configuration for the head node. + Use 'worker setup' to set the configuration for worker nodes. """ logging.debug("MOLNSController.setup_controller(config={0})".format(config)) # name @@ -190,19 +230,21 @@ def setup_controller(cls, args, config): except DatastoreException as e: # provider providers = config.list_objects(kind='Provider') - if len(providers)==0: - print "No providers configured, please configure one ('molns provider setup') before initializing controller." + if len(providers) == 0: + print "No providers configured, " \ + "please configure one ('molns provider setup') before initializing controller." return print "Select a provider:" - for n,p in enumerate(providers): - print "\t[{0}] {1}".format(n,p.name) - provider_ndx = int(raw_input_default("enter the number of provider:", default='0')) + for n, p in enumerate(providers): + print "\t[{0}] {1}".format(n, p.name) + provider_ndx = int(raw_input_default("Enter the number of provider:", default='0')) provider_id = providers[provider_ndx].id provider_obj = config.get_object(name=providers[provider_ndx].name, kind='Provider') logging.debug("using provider {0}".format(provider_obj)) # create object try: - controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', provider_id=provider_id) + controller_obj = config.create_object(ptype=provider_obj.type, name=controller_name, kind='Controller', + provider_id=provider_id) except DatastoreException as e: print e return @@ -214,29 +256,31 @@ def list_controller(cls, args, config): """ List all the currently configured controllers.""" controllers = config.list_objects(kind='Controller') if len(controllers) == 0: - print "No controllers configured" + return {'msg': "No controllers configured"} else: table_data = [] for c in controllers: - provider_name = config.get_object_by_id(c.provider_id, 'Provider').name + try: + p = config.get_object_by_id(c.provider_id, 'Provider') + provider_name = p.name + except DatastoreException as e: + provider_name = 'ERROR: {0}'.format(e) table_data.append([c.name, provider_name]) - table_print(['name', 'provider'], table_data) - + return {'type': 'table', 'column_names': ['name', 'provider'], 'data': table_data} + @classmethod def show_controller(cls, args, config): """ Show all the details of a controller config. """ if len(args) == 0: - print "USAGE: molns controller show name" - return - print config.get_object(name=args[0], kind='Controller') + raise MOLNSException("USAGE: molns controller show name") + return {'msg': str(config.get_object(name=args[0], kind='Controller'))} @classmethod def delete_controller(cls, args, config): """ Delete a controller config. """ - #print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) if len(args) == 0: - print "USAGE: molns cluser delete name" - return + raise MOLNSException("USAGE: molns cluser delete name") config.delete_object(name=args[0], kind='Controller') @classmethod @@ -247,7 +291,7 @@ def ssh_controller(cls, args, config): if controller_obj is None: return # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) - #logging.debug("instance_list={0}".format(instance_list)) + # logging.debug("instance_list={0}".format(instance_list)) # Check if they are running ip = None if len(instance_list) > 0: @@ -257,25 +301,68 @@ def ssh_controller(cls, args, config): if status == controller_obj.STATUS_RUNNING: ip = i.ip_address if ip is None: - print "No active instance for this controller" - return + raise MOLNSException("No active instance for this controller") #print " ".join(['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)]) #os.execl('/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)) - cmd = ['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)] + cmd = ['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i', + controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)] print " ".join(cmd) subprocess.call(cmd) print "SSH process completed" + @classmethod + def upload_controller(cls, args, config): + """ Copy a local file to the controller's home directory. """ + logging.debug("MOLNSController.upload_controller(args={0})".format(args)) + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + return + # Check if any instances are assigned to this controller + instance_list = config.get_controller_instances(controller_id=controller_obj.id) + + # Check if they are running + inst = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + logging.debug("instance={0} has status={1}".format(i, status)) + if status == controller_obj.STATUS_RUNNING: + inst = i + if inst is None: + raise MOLNSException("No active instance for this controller") + + file_to_transfer = args[1] + logging.debug("File to transfer: {0}".format(file_to_transfer)) + + remote_file_path = os.path.join("/home/ubuntu/", os.path.basename(file_to_transfer)) + + controller_obj.ssh.connect(inst, SSHDeploy.DEFAULT_SSH_PORT, "ubuntu", controller_obj.provider.sshkeyfilename()) + + sftp = controller_obj.ssh.open_sftp() + remote_fh = sftp.file(remote_file_path, "w") + try: + with open(file_to_transfer, "r") as fh: + remote_fh.write(fh.read()) + finally: + remote_fh.close() + sftp.close() + + print "Transferred {0} to {1}@{2}:{3}".format(file_to_transfer, inst.ip_address, "ubuntu", remote_file_path) @classmethod - def put_controller(cls, args, config): - """ Copy a local file to the controller's shared area. """ - logging.debug("MOLNSController.put_controller(args={0})".format(args)) + def get_controller(cls, args, config): + """ Copy a controller's file to the local filesystem. """ + logging.debug("MOLNSController.get_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return + + if controller_obj.provider.type == constants.Constants.DockerProvider: + raise NotImplementedError("DockerController does not support this feature yet.") + # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) - #logging.debug("instance_list={0}".format(instance_list)) + # Check if they are running ip = None if len(instance_list) > 0: @@ -287,12 +374,70 @@ def put_controller(cls, args, config): if ip is None: print "No active instance for this controller" return - #print " ".join(['/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)]) - #os.execl('/usr/bin/ssh','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(),'ubuntu@{0}'.format(ip)) - cmd = ['/usr/bin/scp','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i',controller_obj.provider.sshkeyfilename(), args[1], 'ubuntu@{0}:/home/ubuntu/shared'.format(ip)] + cmd = ['/usr/bin/scp','-oStrictHostKeyChecking=no','-oUserKnownHostsFile=/dev/null','-i', + controller_obj.provider.sshkeyfilename(), 'ubuntu@{0}:{1}'.format(ip, args[1]), '.'] print " ".join(cmd) subprocess.call(cmd) print "SSH process completed" + + @classmethod + def put_controller(cls, args, config): + """ Copy a local file to the controller's and workers' shared area. """ + logging.debug("MOLNSController.put_controller(args={0})".format(args)) + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + return + + # Check if any instances are assigned to this controller + instance_list = config.get_controller_instances(controller_id=controller_obj.id) + + # Check if they are running + inst = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + logging.debug("instance={0} has status={1}".format(i, status)) + if status == controller_obj.STATUS_RUNNING: + inst = i + if inst is None: + raise MOLNSException("No active instance for this controller") + + file_to_transfer = args[1] + logging.debug("File to transfer: {0}".format(file_to_transfer)) + + remote_file_path = os.path.join("/home/ubuntu/shared", os.path.basename(file_to_transfer)) + + controller_obj.ssh.connect(inst, SSHDeploy.DEFAULT_SSH_PORT, "ubuntu", controller_obj.provider.sshkeyfilename()) + + sftp = controller_obj.ssh.open_sftp() + remote_fh = sftp.file(remote_file_path, "w") + try: + with open(file_to_transfer, "r") as fh: + remote_fh.write(fh.read()) + finally: + remote_fh.close() + sftp.close() + + print "Transferred {0} to {1}@{2}:{3}".format(file_to_transfer, inst.ip_address, "ubuntu", remote_file_path) + + @classmethod + def is_controller_running(cls, args, config): + logging.debug("MOLNSController.is_controller_running(args={0})".format(args)) + if len(args) > 0: + try: + controller_obj = cls._get_controllerobj(args, config) + except MOLNSException: + return {} + if controller_obj is None: return False + # Check if any instances are assigned to this controller + instance_list = config.get_controller_instances(controller_id=controller_obj.id) + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + if status == controller_obj.get_instance_status.STATUS_RUNNING: + return True + + return False @classmethod @@ -300,57 +445,80 @@ def status_controller(cls, args, config): """ Get status of the head node of a MOLNs controller. """ logging.debug("MOLNSController.status_controller(args={0})".format(args)) if len(args) > 0: - controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + try: + controller_obj = cls._get_controllerobj(args, config) + except MOLNSException: + return {} + if controller_obj is None: return {} # Check if any instances are assigned to this controller instance_list = config.get_controller_instances(controller_id=controller_obj.id) table_data = [] if len(instance_list) > 0: for i in instance_list: - provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + # provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + try: + p = config.get_object_by_id(i.provider_id, 'Provider') + provider_name = p.name + except DatastoreException as e: + provider_name = 'ERROR: {0}'.format(e) controller_name = config.get_object_by_id(i.controller_id, 'Controller').name status = controller_obj.get_instance_status(i) - table_data.append([controller_name, status, 'controller', provider_name, i.provider_instance_identifier, i.ip_address]) + table_data.append( + [controller_name, status, 'controller', provider_name, i.provider_instance_identifier, + i.ip_address]) else: - print "No instance running for this controller" - return + return {'msg': "No instance running for this controller"} # Check if any worker instances are assigned to this controller instance_list = config.get_worker_instances(controller_id=controller_obj.id) if len(instance_list) > 0: for i in instance_list: worker_name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name worker_obj = cls._get_workerobj([worker_name], config) - provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + # provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + try: + p = config.get_object_by_id(i.provider_id, 'Provider') + provider_name = p.name + except DatastoreException as e: + provider_name = 'ERROR: {0}'.format(e) status = worker_obj.get_instance_status(i) - table_data.append([worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) - table_print(['name','status','type','provider','instance id', 'IP address'],table_data) + table_data.append( + [worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) + # table_print(['name','status','type','provider','instance id', 'IP address'],table_data) + r = {'type': 'table', 'column_names': ['name', 'status', 'type', 'provider', 'instance id', 'IP address'], + 'data': table_data} + return r else: instance_list = config.get_all_instances() if len(instance_list) > 0: - print "Current instances:" table_data = [] for i in instance_list: - provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + provider_obj = config.get_object_by_id(i.provider_id, 'Provider') + if provider_obj is None: + continue + provider_name = provider_obj.name controller_name = config.get_object_by_id(i.controller_id, 'Controller').name if i.worker_group_id is not None: worker_name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name table_data.append([worker_name, 'worker', provider_name, i.provider_instance_identifier]) else: - table_data.append([controller_name, 'controller', provider_name, i.provider_instance_identifier]) + table_data.append( + [controller_name, 'controller', provider_name, i.provider_instance_identifier]) - table_print(['name','type','provider','instance id'],table_data) - print "\n\tUse 'molns status NAME' to see current status of each instance." + r = {'type': 'table', 'column_names': ['name', 'type', 'provider', 'instance id'], 'data': table_data} + r['msg'] = "\n\tUse 'molns status NAME' to see current status of each instance." + return r else: - print "No instance found" - + return {'msg': "No instance found"} @classmethod - def start_controller(cls, args, config): + def start_controller(cls, args, config, password=None, openWebBrowser=True, reserved_cpus=2): """ Start the MOLNs controller. """ + resume = False logging.debug("MOLNSController.start_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return # Check if any instances are assigned to this controller instance_list = config.get_all_instances(controller_id=controller_obj.id) # Check if they are running or stopped (if so, resume them) @@ -365,16 +533,19 @@ def start_controller(cls, args, config): print "Resuming instance at {0}".format(i.ip_address) controller_obj.resume_instance(i) inst = i + resume=True break if inst is None: # Start a new instance print "Starting new controller" inst = controller_obj.start_instance() + # deploying - sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) - sshdeploy.deploy_ipython_controller(inst.ip_address) - sshdeploy.deploy_molns_webserver(inst.ip_address) - #sshdeploy.deploy_stochss(inst.ip_address, port=443) + sshdeploy = SSHDeploy(controller_obj.ssh, config=controller_obj.provider, config_dir=config.config_dir) + sshdeploy.deploy_ipython_controller(inst, controller_obj, notebook_password=password, resume=resume, + reserved_cpus=reserved_cpus) + sshdeploy.deploy_molns_webserver(inst, controller_obj, openWebBrowser=openWebBrowser) + # sshdeploy.deploy_stochss(inst.ip_address, port=443) @classmethod def stop_controller(cls, args, config): @@ -399,20 +570,20 @@ def stop_controller(cls, args, config): if status == worker_obj.STATUS_RUNNING or status == worker_obj.STATUS_STOPPED: print "Terminating worker '{1}' running at {0}".format(i.ip_address, worker_name) worker_obj.terminate_instance(i) - else: print "No instance running for this controller" - @classmethod def terminate_controller(cls, args, config): """ Terminate the head node of a MOLNs controller. """ logging.debug("MOLNSController.terminate_controller(args={0})".format(args)) controller_obj = cls._get_controllerobj(args, config) - if controller_obj is None: return + if controller_obj is None: + return instance_list = config.get_all_instances(controller_id=controller_obj.id) logging.debug("\tinstance_list={0}".format([str(i) for i in instance_list])) - # Check if they are running or stopped + print("\tinstance_list={0}".format([str(i) for i in instance_list])) + # Check if they are running or stopped if len(instance_list) > 0: for i in instance_list: if i.worker_group_id is None: @@ -427,8 +598,6 @@ def terminate_controller(cls, args, config): if status == worker_obj.STATUS_RUNNING or status == worker_obj.STATUS_STOPPED: print "Terminating worker '{1}' running at {0}".format(i.ip_address, worker_name) worker_obj.terminate_instance(i) - - else: print "No instance running for this controller" @@ -462,19 +631,140 @@ def connect_controller_to_local(cls, args, config): sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) client_file_data = sshdeploy.get_ipython_client_file(inst.ip_address) home_dir = os.environ.get('HOME') - ipython_client_filename = os.path.join(home_dir, '.ipython/profile_{0}/'.format(profile_name), 'security/ipcontroller-client.json') + ipython_client_filename = os.path.join(home_dir, '.ipython/profile_{0}/'.format(profile_name), + 'security/ipcontroller-client.json') logging.debug("Writing file {0}".format(ipython_client_filename)) with open(ipython_client_filename, 'w') as fd: fd.write(client_file_data) print "Success" - @classmethod - def start_spark(cls, args, config): - """ Start Apache Spark on the cluster. """ ############################################### class MOLNSWorkerGroup(MOLNSbase): + @classmethod + def worker_group_export(cls, args, config): + """ Export the configuration of a worker group. """ + if len(args) < 1: + raise MOLNSException("USAGE: molns worker export name [Filename]\n" \ + "\tExport the data from the worker group with the given name.") + worker_name = args[0] + if len(args) > 1: + filename = args[1] + else: + filename = 'Molns-Export-Worker-' + worker_name + '.json' + # check if provider exists + try: + worker_obj = config.get_object(worker_name, kind='WorkerGroup') + except DatastoreException as e: + raise MOLNSException("worker group not found") + data = {'name': worker_obj.name, + 'provider_name': worker_obj.provider.name, + 'controller_name': worker_obj.controller.name, + 'config': worker_obj.config} + return {'data': json.dumps(data), + 'type': 'file', + 'filename': filename} + + @classmethod + def worker_group_import(cls, args, config, json_data=None): + """ Import the configuration of a worker group. """ + if json_data is None: + if len(args) < 1: + raise MOLNSException("USAGE: molns worker import [Filename.json]\n" \ + "\Import the data from the worker with the given name.") + filename = args[0] + with open(filename) as fd: + data = json.load(fd) + else: + data = json_data + worker_name = data['name'] + msg = '' + try: + provider_obj = config.get_object(data['provider_name'], kind='Provider') + except DatastoreException as e: + raise MOLNSException("unknown provider '{0}'".format(data['provider_name'])) + try: + controller_obj = config.get_object(data['controller_name'], kind='Controller') + except DatastoreException as e: + raise MOLNSException("unknown controller '{0}'".format(data['provider_name'])) + try: + worker_obj = config.get_object(worker_name, kind='WorkerGroup') + msg += "Found existing worker group\n" + if worker_obj.provider.name != provider_obj.name: + raise MOLNSException( + "Import data has provider '{0}'. Worker group {1} exists with provider {2}. provider conversion is not possible.".format( + data['provider_name'], worker_obj.name, worker_obj.provider.name)) + if worker_obj.controller.name != controller_obj.name: + raise MOLNSException( + "Import data has controller '{0}'. Worker group {1} exists with controller {2}. provider conversion is not possible.".format( + data['controller_name'], worker_obj.name, worker_obj.controller.name)) + except DatastoreException as e: + worker_obj = config.create_object(ptype=provider_obj.type, name=worker_name, kind='WorkerGroup', + provider_id=provider_obj.id, controller_id=controller_obj.id) + msg += "Creating new worker group\n" + cls.merge_config(worker_obj, data['config']) + config.save_object(worker_obj, kind='WorkerGroup') + msg += "Worker group data imported\n" + return {'msg': msg} + + @classmethod + def worker_group_get_config(cls, name=None, provider_type=None, config=None): + """ Return a list of dict of config var for the worker group config. + Each dict in the list has the keys: 'key', 'value', 'type' + + Either 'name' or 'provider_type' must be specified. + If 'name' is specified, then it will retreive the value from that + config and return it in 'value' (or return the string '********' + if that config is obfuscated, such passwords). + + """ + if config is None: + raise MOLNSException("no config specified") + if name is None and provider_type is None: + raise MOLNSException("'name' or 'provider_type' must be specified.") + obj = None + if obj is None and name is not None: + try: + obj = config.get_object(name, kind='WorkerGroup') + except DatastoreException as e: + pass + if obj is None and provider_type is not None: + if provider_type not in VALID_PROVIDER_TYPES: + raise MOLNSException("Unknown provider type '{0}'".format(provider_type)) + p_hand = get_provider_handle('WorkerGroup', provider_type) + obj = p_hand('__tmp__', data={}, config_dir=config.config_dir) + if obj is None: + raise MOLNSException("Worker group {0} not found".format(name)) + ret = [] + for key, conf, value in obj.get_config_vars(): + if 'ask' in conf and not conf['ask']: + continue + question = conf['q'] + if value is not None: + myval = value + else: + if 'default' in conf and conf['default']: + if callable(conf['default']): + f1 = conf['default'] + try: + myval = f1() + except TypeError: + pass + else: + myval = conf['default'] + else: + myval = None + if myval is not None and 'obfuscate' in conf and conf['obfuscate']: + myval = '********' + ret.append({ + 'question': question, + 'key': key, + 'value': myval, + 'type': 'string' + }) + return ret + @classmethod def setup_worker_groups(cls, args, config): """ Configure a worker group. """ @@ -489,31 +779,33 @@ def setup_worker_groups(cls, args, config): except DatastoreException as e: # provider providers = config.list_objects(kind='Provider') - if len(providers)==0: - print "No providers configured, please configure one ('molns provider setup') before initializing worker group." + if len(providers) == 0: + print "No providers configured, " \ + "please configure one ('molns provider setup') before initializing worker group." return print "Select a provider:" - for n,p in enumerate(providers): - print "\t[{0}] {1}".format(n,p.name) - provider_ndx = int(raw_input_default("enter the number of provider:", default='0')) + for n, p in enumerate(providers): + print "\t[{0}] {1}".format(n, p.name) + provider_ndx = int(raw_input_default("Enter the number of provider:", default='0')) provider_id = providers[provider_ndx].id provider_obj = config.get_object(name=providers[provider_ndx].name, kind='Provider') logging.debug("using provider {0}".format(provider_obj)) # controller controllers = config.list_objects(kind='Controller') - if len(controllers)==0: - print "No controllers configured, please configure one ('molns controller setup') before initializing worker group." + if len(controllers) == 0: + print "No controllers configured, " \ + "please configure one ('molns controller setup') before initializing worker group." return print "Select a controller:" - for n,p in enumerate(controllers): - print "\t[{0}] {1}".format(n,p.name) - controller_ndx = int(raw_input_default("enter the number of controller:", default='0')) - controller_id = controllers[controller_ndx].id + for n, p in enumerate(controllers): + print "\t[{0}] {1}".format(n, p.name) + controller_ndx = int(raw_input_default("Enter the number of controller:", default='0')) controller_obj = config.get_object(name=controllers[controller_ndx].name, kind='Controller') logging.debug("using controller {0}".format(controller_obj)) # create object try: - worker_obj = config.create_object(ptype=provider_obj.type, name=group_name, kind='WorkerGroup', provider_id=provider_id, controller_id=controller_obj.id) + worker_obj = config.create_object(ptype=provider_obj.type, name=group_name, kind='WorkerGroup', + provider_id=provider_id, controller_id=controller_obj.id) except DatastoreException as e: print e return @@ -525,28 +817,37 @@ def list_worker_groups(cls, args, config): """ List all the currently configured worker groups.""" groups = config.list_objects(kind='WorkerGroup') if len(groups) == 0: - print "No worker groups configured" + raise MOLNSException("No worker groups configured") else: table_data = [] for g in groups: - provider_name = config.get_object_by_id(g.provider_id, 'Provider').name - controller_name = config.get_object_by_id(g.controller_id, 'Controller').name + # provider_name = config.get_object_by_id(g.provider_id, 'Provider').name + try: + p = config.get_object_by_id(g.provider_id, 'Provider') + provider_name = p.name + except DatastoreException as e: + provider_name = 'ERROR: {0}'.format(e) + try: + c = config.get_object_by_id(g.controller_id, 'Controller') + controller_name = c.name + except DatastoreException as e: + controller_name = 'ERROR: {0}'.format(e) table_data.append([g.name, provider_name, controller_name]) - table_print(['name', 'provider', 'controller'], table_data) + return {'type': 'table', 'column_names': ['name', 'provider', 'controller'], 'data': table_data} @classmethod def show_worker_groups(cls, args, config): """ Show all the details of a worker group config. """ if len(args) == 0: - print "USAGE: molns worker show name" + raise MOLNSException("USAGE: molns worker show name") return - print config.get_object(name=args[0], kind='WorkerGroup') + return {'msg': str(config.get_object(name=args[0], kind='WorkerGroup'))} @classmethod def delete_worker_groups(cls, args, config): """ Delete a worker group config. """ if len(args) == 0: - print "USAGE: molns worker delete name" + raise MOLNSException("USAGE: molns worker delete name") return config.delete_object(name=args[0], kind='WorkerGroup') @@ -559,21 +860,24 @@ def status_worker_groups(cls, args, config): if worker_obj is None: return # Check if any instances are assigned to this worker instance_list = config.get_all_instances(worker_group_id=worker_obj.id) - # Check if they are running or stopped + # Check if they are running or stopped if len(instance_list) > 0: table_data = [] for i in instance_list: status = worker_obj.get_instance_status(i) - #print "{0} type={3} ip={1} id={2}".format(status, i.ip_address, i.provider_instance_identifier, worker_obj.PROVIDER_TYPE) + # print "{0} type={3} ip={1} id={2}".format(status, i.ip_address, i.provider_instance_identifier, worker_obj.PROVIDER_TYPE) worker_name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name provider_name = config.get_object_by_id(i.provider_id, 'Provider').name status = worker_obj.get_instance_status(i) - table_data.append([worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) - table_print(['name','status','type','provider','instance id', 'IP address'],table_data) + table_data.append( + [worker_name, status, 'worker', provider_name, i.provider_instance_identifier, i.ip_address]) + return {'type': 'table', + 'column_names': ['name', 'status', 'type', 'provider', 'instance id', 'IP address'], + 'data': table_data} else: - print "No worker instances running for this cluster" + return {'msg': "No worker instances running for this cluster"} else: - print "USAGE: molns worker status NAME" + raise MOLNSException("USAGE: molns worker status NAME") @classmethod def start_worker_groups(cls, args, config): @@ -585,15 +889,14 @@ def start_worker_groups(cls, args, config): num_vms_to_start = int(num_vms) controller_ip = cls.__launch_workers__get_controller(worker_obj, config) if controller_ip is None: return - #logging.debug("\tcontroller_ip={0}".format(controller_ip)) + # logging.debug("\tcontroller_ip={0}".format(controller_ip)) try: inst_to_deploy = cls.__launch_worker__start_or_resume_vms(worker_obj, config, num_vms_to_start) - #logging.debug("\tinst_to_deploy={0}".format(inst_to_deploy)) + # logging.debug("\tinst_to_deploy={0}".format(inst_to_deploy)) cls.__launch_worker__deploy_engines(worker_obj, controller_ip, inst_to_deploy, config) except ProviderException as e: print "Could not start workers: {0}".format(e) - @classmethod def add_worker_groups(cls, args, config): """ Add workers of a MOLNs cluster. """ @@ -635,7 +938,6 @@ def __launch_workers__get_controller(cls, worker_obj, config): print "No controller running for this worker group." return return controller_ip - @classmethod def __launch_worker__start_or_resume_vms(cls, worker_obj, config, num_vms_to_start=0): @@ -654,12 +956,12 @@ def __launch_worker__start_or_resume_vms(cls, worker_obj, config, num_vms_to_sta print "Resuming worker at {0}".format(i.ip_address) inst_to_resume.append(i) num_vms_to_start -= 1 - #logging.debug("inst_to_resume={0}".format(inst_to_resume)) + # logging.debug("inst_to_resume={0}".format(inst_to_resume)) if len(inst_to_resume) > 0: worker_obj.resume_instance(inst_to_resume) inst_to_deploy.extend(inst_to_resume) inst_to_deploy.extend(cls.__launch_worker__start_vms(worker_obj, num_vms_to_start)) - #logging.debug("inst_to_deploy={0}".format(inst_to_deploy)) + # logging.debug("inst_to_deploy={0}".format(inst_to_deploy)) return inst_to_deploy @classmethod @@ -669,12 +971,11 @@ def __launch_worker__start_vms(cls, worker_obj, num_vms_to_start=0): if num_vms_to_start > 0: # Start a new instances print "Starting {0} new workers".format(num_vms_to_start) - inst_to_deploy = worker_obj.start_instance(num=num_vms_to_start) - if not isinstance(inst_to_deploy,list): + inst_to_deploy = worker_obj.start_instance(num=num_vms_to_start) + if not isinstance(inst_to_deploy, list): inst_to_deploy = [inst_to_deploy] return inst_to_deploy - @classmethod def __launch_worker__deploy_engines(cls, worker_obj, controller_ip, inst_to_deploy, config): print "Deploying on {0} workers".format(len(inst_to_deploy)) @@ -688,8 +989,11 @@ def __launch_worker__deploy_engines(cls, worker_obj, controller_ip, inst_to_depl logging.debug("__launch_worker__deploy_engines() workpool(size={0})".format(len(inst_to_deploy))) jobs = [] for i in inst_to_deploy: - logging.debug("multiprocessing.Process(target=engine_ssh.deploy_ipython_engine({0}, engine_file)".format(i.ip_address)) - p = multiprocessing.Process(target=engine_ssh.deploy_ipython_engine, args=(i.ip_address, controller_ip, engine_file, controller_ssh_keyfile,)) + logging.debug( + "multiprocessing.Process(target=engine_ssh.deploy_ipython_engine({0}, engine_file)".format( + i.ip_address)) + p = multiprocessing.Process(target=engine_ssh.deploy_ipython_engine, args=( + i.ip_address, controller_ip, engine_file, controller_ssh_keyfile,)) jobs.append(p) p.start() logging.debug("__launch_worker__deploy_engines() joining processes.") @@ -746,36 +1050,144 @@ def terminate_worker_groups(cls, args, config): else: print "No workers running in the worker group" + ############################################### -class MOLNSProvider(): +class MOLNSProvider(MOLNSbase): + @classmethod + def provider_export(cls, args, config): + """ Export the configuration of a provider. """ + if len(args) < 1: + raise MOLNSException("USAGE: molns provider export name [Filename]\n" \ + "\tExport the data from the provider with the given name.") + provider_name = args[0] + if len(args) > 1: + filename = args[1] + else: + filename = 'Molns-Export-Provider-' + provider_name + '.json' + # check if provider exists + try: + provider_obj = config.get_object(args[0], kind='Provider') + except DatastoreException as e: + raise MOLNSException("provider not found") + data = {'name': provider_obj.name, + 'type': provider_obj.type, + 'config': provider_obj.config} + return {'data': json.dumps(data), + 'type': 'file', + 'filename': filename} + + @classmethod + def provider_import(cls, args, config, json_data=None): + """ Import the configuration of a provider. """ + if json_data is None: + if len(args) < 1: + raise MOLNSException("USAGE: molns provider import [Filename.json]\n" \ + "\Import the data from the provider with the given name.") + filename = args[0] + with open(filename) as fd: + data = json.load(fd) + else: + data = json_data + provider_name = data['name'] + msg = '' + if data['type'] not in VALID_PROVIDER_TYPES: + raise MOLNSException("unknown provider type '{0}'".format(data['type'])) + try: + provider_obj = config.get_object(provider_name, kind='Provider') + msg += "Found existing provider\n" + if provider_obj.type != data['type']: + raise MOLNSException( + "Import data has provider type '{0}'. Provier {1} exists with type {2}. Type conversion is not possible.".format( + data['type'], provider_obj.name, provider_obj.type)) + except DatastoreException as e: + provider_obj = config.create_object(name=provider_name, ptype=data['type'], kind='Provider') + msg += "Creating new provider\n" + cls.merge_config(provider_obj, data['config']) + config.save_object(provider_obj, kind='Provider') + msg += "Provider data imported\n" + return {'msg': msg} + + @classmethod + def provider_get_config(cls, name=None, provider_type=None, config=None): + """ Return a list of dict of config var for the provider config. + Each dict in the list has the keys: 'key', 'value', 'type' + + Either 'name' or 'provider_type' must be specified. + If 'name' is specified, then it will retreive the value from that + config and return it in 'value' (or return the string '********' + if that config is obfuscated, such passwords). + + """ + if config is None: + raise MOLNSException("no config specified") + if name is None and provider_type is None: + raise MOLNSException("provider name or type must be specified") + obj = None + if obj is None and name is not None: + try: + obj = config.get_object(name, kind='Provider') + except DatastoreException as e: + pass + if obj is None and provider_type is not None: + if provider_type not in VALID_PROVIDER_TYPES: + raise MOLNSException("unknown provider type '{0}'".format(provider_type)) + p_hand = get_provider_handle('Provider', provider_type) + obj = p_hand('__tmp__', data={}, config_dir=config.config_dir) + if obj is None: + raise MOLNSException("provider {0} not found".format(name)) + ret = [] + for key, conf, value in obj.get_config_vars(): + if 'ask' in conf and not conf['ask']: + continue + question = conf['q'] + if value is not None: + myval = value + else: + if 'default' in conf and conf['default']: + if callable(conf['default']): + f1 = conf['default'] + try: + myval = f1() + except TypeError: + pass + else: + myval = conf['default'] + else: + myval = None + if myval is not None and 'obfuscate' in conf and conf['obfuscate']: + myval = '********' + ret.append({ + 'question': question, + 'key': key, + 'value': myval, + 'type': 'string' + }) + return ret + @classmethod def provider_setup(cls, args, config): """ Setup a new provider. Create the MOLNS image and SSH key if necessary.""" - #print "MOLNSProvider.provider_setup(args={0})".format(args) if len(args) < 1: print "USAGE: molns provider setup name" print "\tCreates a new provider with the given name." return - # find the \n\tWhere PROVIDER_TYPE is one of: {0}".format(VALID_PROVIDER_TYPES) - # provider name - provider_name = args[0] # check if provider exists try: provider_obj = config.get_object(args[0], kind='Provider') - except DatastoreException as e: + except DatastoreException: # ask provider type print "Select a provider type:" - for n,p in enumerate(VALID_PROVIDER_TYPES): - print "\t[{0}] {1}".format(n,p) + for n, p in enumerate(VALID_PROVIDER_TYPES): + print "\t[{0}] {1}".format(n, p) while True: try: - provider_ndx = int(raw_input_default("enter the number of type:", default='0')) + provider_ndx = int(raw_input_default("Enter the number of type:", default='0')) provider_type = VALID_PROVIDER_TYPES[provider_ndx] break except (ValueError, IndexError): pass - logging.debug("provider type '{0}'".format(provider_type)) + logging.debug("Provider type '{0}'".format(provider_type)) # Create provider try: provider_obj = config.create_object(name=args[0], ptype=provider_type, kind='Provider') @@ -786,6 +1198,17 @@ def provider_setup(cls, args, config): print "Enter configuration for provider {0}:".format(args[0]) setup_object(provider_obj) config.save_object(provider_obj, kind='Provider') + + cls.provider_initialize(args[0], config) + + @classmethod + def provider_initialize(cls, provider_name, config): + """ Create the MOLNS image and SSH key if necessary.""" + try: + provider_obj = config.get_object(provider_name, kind='Provider') + print "Provider object {0}".format(provider_obj) + except DatastoreException as e: + raise MOLNSException("provider not found") # print "Checking all config artifacts." # check for ssh key @@ -807,7 +1230,7 @@ def provider_setup(cls, args, config): provider_obj.create_seurity_group() else: print "security group={0} is valid.".format(provider_obj['group_name']) - + # check for MOLNS image if provider_obj['molns_image_name'] is None or provider_obj['molns_image_name'] == '': if provider_obj['ubuntu_image_name'] is None or provider_obj['ubuntu_image_name'] == '': @@ -816,13 +1239,12 @@ def provider_setup(cls, args, config): print "Creating new image, this process can take a long time (10-30 minutes)." provider_obj['molns_image_name'] = provider_obj.create_molns_image() elif not provider_obj.check_molns_image(): - print "Error: an molns image was provided, but it is not available in cloud." + print "Error: a molns image ID was provided, but it does not exist." return print "Success." config.save_object(provider_obj, kind='Provider') - - + @classmethod def provider_rebuild(cls, args, config): """ Rebuild the MOLNS image.""" @@ -847,7 +1269,7 @@ def provider_rebuild(cls, args, config): @classmethod def provider_list(cls, args, config): """ List all the currently configured providers.""" - #print "MOLNSProvider.provider_list(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.provider_list(args={0}, config={1})".format(args, config) providers = config.list_objects(kind='Provider') if len(providers) == 0: print "No providers configured" @@ -855,12 +1277,14 @@ def provider_list(cls, args, config): table_data = [] for p in providers: table_data.append([p.name, p.type]) - table_print(['name', 'type'], table_data) + # table_print(['name', 'type'], table_data) + r = {'type': 'table', 'column_names': ['name', 'type'], 'data': table_data} + return r @classmethod def show_provider(cls, args, config): """ Show all the details of a provider config. """ - #print "MOLNSProvider.show_provider(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.show_provider(args={0}, config={1})".format(args, config) if len(args) == 0: print "USAGE: molns provider show name" return @@ -869,14 +1293,16 @@ def show_provider(cls, args, config): @classmethod def delete_provider(cls, args, config): """ Delete a provider config. """ - #print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) + # print "MOLNSProvider.delete_provider(args={0}, config={1})".format(args, config) if len(args) == 0: print "USAGE: molns provider delete name" return config.delete_object(name=args[0], kind='Provider') + + ############################################### -class MOLNSInstances(): +class MOLNSInstances(MOLNSbase): @classmethod def show_instances(cls, args, config): """ List all instances in the db """ @@ -884,15 +1310,19 @@ def show_instances(cls, args, config): if len(instance_list) > 0: table_data = [] for i in instance_list: - provider_name = config.get_object_by_id(i.provider_id, 'Provider').name + provider_obj = config.get_object_by_id(i.provider_id, 'Provider') + if provider_obj is None: + continue + provider_name = provider_obj.name + #print "provider_obj.type",provider_obj.type if i.worker_group_id is not None: - name = config.get_object_by_id(i.worker_id, 'WorkerGroup').name + name = config.get_object_by_id(i.worker_group_id, 'WorkerGroup').name itype = 'worker' else: name = config.get_object_by_id(i.controller_id, 'Controller').name itype = 'controller' table_data.append([i.id, provider_name, i.provider_instance_identifier, itype, name]) - table_print(['ID', 'provider', 'instance id', 'type', 'name'],table_data) + table_print(['ID', 'provider', 'instance id', 'type', 'name'], table_data) else: print "No instance found" @@ -914,7 +1344,6 @@ def delete_instance(cls, args, config): config.delete_instance(instance) print "instance {0} deleted".format(instance_id) - @classmethod def clear_instances(cls, args, config): """ delete all instances in the db """ @@ -927,85 +1356,418 @@ def clear_instances(cls, args, config): else: print "No instance found" +############################################### + +class MOLNSExec(MOLNSbase): + @classmethod + def _get_ip_for_job(cls, job, config): + instance_list = config.get_controller_instances(controller_id=job.controller_id) + controller_obj = config.get_object_by_id(job.controller_id, 'Controller') + if controller_obj is None: + raise MOLNSException("Could not find the controller for this job") + # Check if they are running + ip = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + logging.debug("instance={0} has status={1}".format(i, status)) + if status == controller_obj.STATUS_RUNNING: + ip = i.ip_address + return ip, controller_obj + + @classmethod + def start_job(cls, args, config): + ''' Execute a process on the controller.''' + # Get Controller + if len(args) < 2: + raise MOLNSException("USAGE: molns exec start name [Command]\n"\ + "\tExecute 'Command' on the controller with the given name.") + + else: + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + raise Exception("Countroller {0} not found".format(args[0])) + # Check if controller is running + instance_list = config.get_all_instances(controller_id=controller_obj.id) + inst = None + if len(instance_list) > 0: + for i in instance_list: + status = controller_obj.get_instance_status(i) + if status == controller_obj.STATUS_RUNNING: + inst = i + break + if inst is None: + raise MOLNSException("Controller {0} is not running.".format(args[0])) + # Create Datastore object + exec_str = args[1] + job = config.start_job(controller_id=controller_obj.id, exec_str=exec_str) + # execute command + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + sshdeploy.deploy_remote_execution_job(inst.ip_address, job.jobID, exec_str) + # + return {'JobID':job.jobID, 'id':job.id, 'msg':"Job started, ID={1} JobID={0}".format(job.jobID,job.id)} + + @classmethod + def job_status(cls, args, config): + ''' Check if a process is still running on the controller.''' + if len(args) < 1: + raise MOLNSException("USAGE: molns exec status [JobID]\n"\ + "\tCheck if a process is still running on the controller.") + j = config.get_job(jobID=args[0]) + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + return {'running':False, 'msg': "No active instance for this controller"} + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + (running, msg) = sshdeploy.remote_execution_job_status(ip, j.jobID) + return {'running':running, 'msg':msg} + + @classmethod + def job_logs(cls, args, config): + ''' Return the output (stdout/stderr) of the process.''' + if len(args) < 1: + raise MOLNSException("USAGE: molns exec logs [JobID] [seek]\n"\ + "\tReturn the output (stdout/stderr) of the process (starting from 'seek').") + j = config.get_job(jobID=args[0]) + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + raise MOLNSException("No active instance for this controller") + seek = 0 + if len(args) > 1: + try: + seek = int(args[1]) + except Exception: + raise MOLNSException("'seek' must be an integer") + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + logs = sshdeploy.remote_execution_get_job_logs(ip, j.jobID, seek) + return {'msg': logs} + + + @classmethod + def fetch_job_results(cls, args, config, overwrite=False): + ''' Transfer files created by the process from the controller to local file system.''' + if len(args) < 2: + raise MOLNSException("USAGE: molns exec fetch [JobID] [filename] (destination filename)\n"\ + "\tRemove process files from the controller (will kill active processes if running).") + filename = args[1] + j = config.get_job(jobID=args[0]) + if j is None: + raise MOLNSException("Job not found") + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + raise MOLNSException("No active instance for this controller") + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + if os.path.isfile(filename) and not overwrite and (len(args) < 3 or args[-1] != '--force'): + raise MOLNSException("File {0} exists, use '--force' or overwrite=True to ignore.") + if len(args) >= 3 and not args[2].startswith('--'): + localfile = args[2] + else: + localfile = filename + sshdeploy.remote_execution_fetch_file(ip, j.jobID, filename, localfile) + return {'msg': "File transfer complete."} + + + @classmethod + def cleanup_job(cls, args, config): + ''' Remove process files from the controller (will kill active processes if running).''' + if len(args) < 1: + raise MOLNSException("USAGE: molns exec cleanup [JobID]\n"\ + "\tRemove process files from the controller (will kill active processes if running).") + j = config.get_job(jobID=args[0]) + if j is None: + return {'msg':"Job not found"} + ip, controller_obj = cls._get_ip_for_job(j, config) + if ip is None: + raise MOLNSException("No active instance for this controller") + sshdeploy = SSHDeploy(config=controller_obj.provider, config_dir=config.config_dir) + sshdeploy.remote_execution_delete_job(ip, j.jobID) + config.delete_job(j) + return {'msg':"Job {0} deleted".format(args[0])} + + @classmethod + def list_jobs(cls, args, config): + ''' List all jobs. If 'name' is specified, list all jobs on named controller.''' + if len(args) > 0: + controller_obj = cls._get_controllerobj(args, config) + if controller_obj is None: + raise Exception("Countroller {0} not found".format(args[0])) + jobs = config.get_all_jobs(controller_id=controller_obj.id) + else: + jobs = config.get_all_jobs() + + if len(jobs) == 0: + return {'msg':"No jobs found"} + else: + table_data = [] + for j in jobs: + try: + p = config.get_object_by_id(j.controller_id, 'Controller') + controller_name = p.name + except DatastoreException as e: + controller_name = 'ERROR: {0}'.format(e) + table_data.append([j.id, j.jobID, controller_name, j.exec_str, j.date]) + return {'type':'table','column_names':['ID', 'JobID', 'Controller', 'Command', 'Date'], 'data':table_data} + + +############################################################################################## +############################################################################################## +############################################################################################## +############################################################################################## +############################################################################################## +############################################################################################## +# Below is the API for the commmand line execution + +class CommandException(Exception): + pass + + +def process_output_exception(e): + logging.exception(e) + sys.stderr.write("Error: {0}\n".format(e)) + + +def process_output(result): + if result is not None: + if type(result) == dict and 'type' in result: + if result['type'] == 'table' and 'column_names' in result and 'data' in result: + table_print(result['column_names'], result['data']) + if result['type'] == 'file' and 'filename' in result and 'data' in result: + output_to_file(result['filename'], result['data']) + elif type(result) == dict and 'msg' in result: + print result['msg'] + else: + print result + + +def output_to_file(filename, data): + with open(filename, 'w+') as fd: + fd.write(data) + + +def table_print(column_names, data): + column_width = [0] * len(column_names) + for i, n in enumerate(column_names): + column_width[i] = len(str(n)) + for row in data: + if len(row) != len(column_names): + raise Exception("len(row) != len(column_names): {0} vs {1}".format(len(row), len(column_names))) + for i, n in enumerate(row): + if len(str(n)) > column_width[i]: + column_width[i] = len(str(n)) + out = "|".join(["-" * (column_width[i] + 2) for i in range(len(column_names))]) + print '|' + out + '|' + out = " | ".join([column_names[i].ljust(column_width[i]) for i in range(len(column_names))]) + print '| ' + out + ' |' + out = "|".join(["-" * (column_width[i] + 2) for i in range(len(column_names))]) + print '|' + out + '|' + for row in data: + out = " | ".join([str(n).ljust(column_width[i]) for i, n in enumerate(row)]) + print '| ' + out + ' |' + out = "|".join(["-" * (column_width[i] + 2) for i in range(len(column_names))]) + print '|' + out + '|' + + +def raw_input_default(q, default=None, obfuscate=False): + if default is None or default == '': + return raw_input("{0}:".format(q)) + else: + if obfuscate: + ret = raw_input("{0} [******]: ".format(q)) + else: + ret = raw_input("{0} [{1}]: ".format(q, default)) + if ret == '': + return default + else: + return ret.strip() + + +def raw_input_default_config(q, default=None, obj=None): + """ Ask the user and process the response with a default value. """ + if default is None: + if callable(q['default']): + f1 = q['default'] + try: + default = f1(obj) + except TypeError: + pass + else: + default = q['default'] + if 'ask' in q and not q['ask']: + return default + if 'obfuscate' in q and q['obfuscate']: + return raw_input_default(q['q'], default=default, obfuscate=True) + else: + return raw_input_default(q['q'], default=default, obfuscate=False) + + +def setup_object(obj): + """ Setup a molns_datastore object using raw_input_default function. """ + for key, conf, value in obj.get_config_vars(): + obj[key] = raw_input_default_config(conf, default=value, obj=obj) + + +############################################### +class SubCommand: + def __init__(self, command, subcommands): + self.command = command + self.subcommands = subcommands + + def __str__(self): + r = '' + for c in self.subcommands: + r += self.command + " " + c.__str__() + "\n" + return r[:-1] + + def __eq__(self, other): + return self.command == other + + def run(self, args, config_dir=None): + if len(args) > 0: + cmd = args[0] + for c in self.subcommands: + if c == cmd: + return c.run(args[1:], config_dir=config_dir) + raise CommandException("command not found") + + +############################################### + + +class Command: + def __init__(self, command, args_defs={}, description=None, function=None): + self.command = command + self.args_defs = args_defs + if function is None: + raise Exception("Command must have a function") + self.function = function + if description is None: + self.description = function.__doc__.strip() + else: + self.description = description + + def __str__(self): + ret = self.command + " " + for k, v in self.args_defs.iteritems(): + if v is None: + ret += "[{0}] ".format(k) + else: + ret += "[{0}={1}] ".format(k, v) + ret += "\n\t" + self.description + return ret + + def __eq__(self, other): + return self.command == other + + def run(self, args, config_dir=None): + config = MOLNSConfig(config_dir=config_dir) + return self.function(args, config=config) ############################################### COMMAND_LIST = [ - # Commands to interact with the head-node. - Command('ssh', {'name':None}, + # Commands to interact with the head-node. + Command('ssh', {'name': None}, function=MOLNSController.ssh_controller), - Command('status', {'name':None}, + Command('status', {'name': None}, function=MOLNSController.status_controller), - Command('start', {'name':None}, + Command('start', {'name': None}, function=MOLNSController.start_controller), - Command('stop', {'name':None}, + Command('stop', {'name': None}, function=MOLNSController.stop_controller), - Command('terminate', {'name':None}, + Command('terminate', {'name': None}, function=MOLNSController.terminate_controller), - Command('put', {'name':None, 'file':None}, + Command('get', {'name':None, 'file':None}, + function=MOLNSController.get_controller), + Command('put', {'name':None, 'file':None}, function=MOLNSController.put_controller), - - #Command('local-connect', {'name':None}, - # function=MOLNSController.connect_controller_to_local), - # Commands to interact with controller - SubCommand('controller',[ - Command('setup', {'name':None}, + Command('upload', {'name': None, 'file': None}, + function=MOLNSController.upload_controller), + # Command('local-connect', {'name':None}, + # function=MOLNSController.connect_controller_to_local), + # Commands to interact with controller + SubCommand('controller', [ + Command('setup', {'name': None}, function=MOLNSController.setup_controller), - Command('list', {'name':None}, + Command('list', {'name': None}, function=MOLNSController.list_controller), - Command('show', {'name':None}, + Command('show', {'name': None}, function=MOLNSController.show_controller), - Command('delete', {'name':None}, + Command('delete', {'name': None}, function=MOLNSController.delete_controller), - ]), - # Commands to interact with Worker-Groups - SubCommand('worker',[ - Command('setup', {'name':None}, + Command('export', {'name': None}, + function=MOLNSController.controller_export), + Command('import', {'filename.json': None}, + function=MOLNSController.controller_import), + ]), + # Commands to interact with Worker-Groups + SubCommand('worker', [ + Command('setup', {'name': None}, function=MOLNSWorkerGroup.setup_worker_groups), - Command('list', {'name':None}, + Command('list', {'name': None}, function=MOLNSWorkerGroup.list_worker_groups), - Command('show', {'name':None}, + Command('show', {'name': None}, function=MOLNSWorkerGroup.show_worker_groups), - Command('delete', {'name':None}, + Command('delete', {'name': None}, function=MOLNSWorkerGroup.delete_worker_groups), - Command('start', {'name':None}, + Command('start', {'name': None}, function=MOLNSWorkerGroup.start_worker_groups), - Command('add', {'name':None}, + Command('add', {'name': None}, function=MOLNSWorkerGroup.add_worker_groups), - Command('status', {'name':None}, + Command('status', {'name': None}, function=MOLNSWorkerGroup.status_worker_groups), - #Command('stop', {'name':None}, - # function=MOLNSWorkerGroup.stop_worker_groups), - Command('terminate', {'name':None}, + Command('stop', {'name':None}, function=MOLNSWorkerGroup.terminate_worker_groups), - ]), - # Commands to interact with Infrastructure-Providers - SubCommand('provider',[ - Command('setup',{'name':None}, + Command('terminate', {'name':None}, + function=MOLNSWorkerGroup.terminate_worker_groups), + Command('export', {'name': None}, + function=MOLNSWorkerGroup.worker_group_export), + Command('import', {'filename.json': None}, + function=MOLNSWorkerGroup.worker_group_import), + ]), + # Commands to interact with Infrastructure-Providers + SubCommand('provider', [ + Command('setup', {'name': None}, function=MOLNSProvider.provider_setup), - Command('rebuild',{'name':None}, + Command('rebuild', {'name': None}, function=MOLNSProvider.provider_rebuild), - Command('list',{'name':None}, + Command('list', {'name': None}, function=MOLNSProvider.provider_list), - Command('show',{'name':None}, + Command('show', {'name': None}, function=MOLNSProvider.show_provider), - Command('delete',{'name':None}, + Command('delete', {'name': None}, function=MOLNSProvider.delete_provider), - ]), - # Commands to interact with the instance DB - SubCommand('instances',[ - Command('list', {}, + Command('export', {'name': None}, + function=MOLNSProvider.provider_export), + Command('import', {'filename.json': None}, + function=MOLNSProvider.provider_import), + ]), + # Commands to interact with the instance DB + SubCommand('instancedb', [ + Command('list', {}, function=MOLNSInstances.show_instances), - Command('delete', {'ID':None}, + Command('delete', {'ID': None}, function=MOLNSInstances.delete_instance), - Command('clear', {}, + Command('clear', {}, function=MOLNSInstances.clear_instances), ]), + SubCommand('exec',[ + Command('start', OrderedDict([('name',None), ('command',None)]), + function=MOLNSExec.start_job), + Command('status', {'jobID':None}, + function=MOLNSExec.job_status), + Command('logs', {'jobID':None}, + function=MOLNSExec.job_logs), + Command('fetch', OrderedDict([('jobID',None), ('filename', None)]), + function=MOLNSExec.fetch_job_results), + Command('cleanup', {'jobID':None}, + function=MOLNSExec.cleanup_job), + Command('list', {'name':None}, + function=MOLNSExec.list_jobs), + ]), ] -def printHelp(): +def print_help(): print "molns " print " --config=[Config Directory=./.molns/]" print "\tSpecify an alternate config location. (Must be first argument.)" @@ -1013,41 +1775,49 @@ def printHelp(): print c -def parseArgs(): +def parse_args(): if len(sys.argv) < 2 or sys.argv[1] == '-h': - printHelp() + print_help() return - + Log.verbose = True arg_list = sys.argv[1:] config_dir = './.molns/' + while len(arg_list) > 0 and arg_list[0].startswith('--'): + if arg_list[0].startswith('--config='): - config_dir = sys.argv[1].split('=',2)[1] + config_dir = sys.argv[1].split('=', 2)[1] + if arg_list[0].startswith('--debug'): print "Turning on Debugging output" - logger.setLevel(logging.DEBUG) #for Debugging - #logger.setLevel(logging.INFO) #for Debugging + logger.setLevel(logging.DEBUG) + Log.verbose = True + arg_list = arg_list[1:] - - #print "config_dir", config_dir - #print "arg_list ", arg_list - if len(arg_list) == 0 or arg_list[0] =='help' or arg_list[0] == '-h': - printHelp() + + if len(arg_list) == 0 or arg_list[0] == 'help' or arg_list[0] == '-h': + print_help() return - + if arg_list[0] in COMMAND_LIST: - #print arg_list[0] + " in COMMAND_LIST" for cmd in COMMAND_LIST: if cmd == arg_list[0]: try: - cmd.run(arg_list[1:], config_dir=config_dir) + output = cmd.run(arg_list[1:], config_dir=config_dir) + process_output(output) return except CommandException: pass - print "unknown command: " + " ".join(arg_list) - #printHelp() + except Exception as e: + process_output_exception(e) + return + + print "unknown command: " + " ".join(arg_list) print "use 'molns help' to see all possible commands" if __name__ == "__main__": - parseArgs() + logger = logging.getLogger() + #logger.setLevel(logging.INFO) #for Debugging + #logger.setLevel(logging.DEBUG) + parse_args()