diff --git a/.github/ISSUE_TEMPLATE b/.github/ISSUE_TEMPLATE new file mode 100644 index 00000000..a94b3afb --- /dev/null +++ b/.github/ISSUE_TEMPLATE @@ -0,0 +1 @@ +spark-ec2 is no longer in active development. Please refer to the README. diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE new file mode 100644 index 00000000..a94b3afb --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE @@ -0,0 +1 @@ +spark-ec2 is no longer in active development. Please refer to the README. diff --git a/README.md b/README.md index 523ecd1a..854325f3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +_Please note: spark-ec2 is **no longer under active development** and the project has been archived. All the existing code, PRs and issues are still accessible but are now read-only. If you're looking for a similar tool that is under active development, we recommend you take a look at [Flintrock](https://github.com/nchammas/flintrock)._ + # EC2 Cluster Setup for Apache Spark `spark-ec2` allows you @@ -52,8 +54,8 @@ identify machines belonging to each cluster in the Amazon EC2 Console. ```bash export AWS_SECRET_ACCESS_KEY=AaBbCcDdEeFGgHhIiJjKkLlMmNnOoPpQqRrSsTtU -export AWS_ACCESS_KEY_ID=ABCDEFG1234567890123 -./spark-ec2 --key-pair=awskey --identity-file=awskey.pem --region=us-west-1 --zone=us-west-1a launch my-spark-cluster + export AWS_ACCESS_KEY_ID=ABCDEFG1234567890123 + ./spark-ec2 --key-pair=awskey --identity-file=awskey.pem --region=us-west-1 --zone=us-west-1a launch my-spark-cluster ``` - After everything launches, check that the cluster scheduler is up and sees @@ -65,7 +67,7 @@ following options are worth pointing out: - `--instance-type=` can be used to specify an EC2 instance type to use. For now, the script only supports 64-bit instance -types, and the default type is `m1.large` (which has 2 cores and 7.5 GB +types, and the default type is `m3.large` (which has 2 cores and 7.5 GB RAM). Refer to the Amazon pages about [EC2 instance types](http://aws.amazon.com/ec2/instance-types) and [EC2 pricing](http://aws.amazon.com/ec2/#pricing) for information about other @@ -110,8 +112,8 @@ permissions on your private key file, you can run `launch` with the ```bash export AWS_SECRET_ACCESS_KEY=AaBbCcDdEeFGgHhIiJjKkLlMmNnOoPpQqRrSsTtU -export AWS_ACCESS_KEY_ID=ABCDEFG1234567890123 -./spark-ec2 --key-pair=awskey --identity-file=awskey.pem --region=us-west-1 --zone=us-west-1a --vpc-id=vpc-a28d24c7 --subnet-id=subnet-4eb27b39 --spark-version=1.1.0 launch my-spark-cluster + export AWS_ACCESS_KEY_ID=ABCDEFG1234567890123 + ./spark-ec2 --key-pair=awskey --identity-file=awskey.pem --region=us-west-1 --zone=us-west-1a --vpc-id=vpc-a28d24c7 --subnet-id=subnet-4eb27b39 --spark-version=1.1.0 launch my-spark-cluster ``` ## Running Applications @@ -148,7 +150,7 @@ as JVM options. This file needs to be copied to **every machine** to reflect the do this is to use a script we provide called `copy-dir`. First edit your `spark-env.sh` file on the master, then run `~/spark-ec2/copy-dir /root/spark/conf` to RSYNC it to all the workers. -The [configuration guide](configuration.html) describes the available configuration options. +The [configuration guide](http://spark.apache.org/docs/latest/configuration.html) describes the available configuration options. ## Terminating a Cluster diff --git a/spark_ec2.py b/spark_ec2.py index f6aa00aa..28d72f43 100644 --- a/spark_ec2.py +++ b/spark_ec2.py @@ -51,7 +51,7 @@ raw_input = input xrange = range -SPARK_EC2_VERSION = "1.6.0" +SPARK_EC2_VERSION = "1.6.2" SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ @@ -76,6 +76,8 @@ "1.5.1", "1.5.2", "1.6.0", + "1.6.1", + "1.6.2", ]) SPARK_TACHYON_MAP = { @@ -94,6 +96,8 @@ "1.5.1": "0.7.1", "1.5.2": "0.7.1", "1.6.0": "0.8.2", + "1.6.1": "0.8.2", + "1.6.2": "0.8.2", } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION @@ -101,7 +105,7 @@ # Default location to get the spark-ec2 scripts (and ami-list) from DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2" -DEFAULT_SPARK_EC2_BRANCH = "branch-1.5" +DEFAULT_SPARK_EC2_BRANCH = "branch-1.6" def setup_external_libs(libs): @@ -192,7 +196,7 @@ def parse_args(): help="If you have multiple profiles (AWS or boto config), you can configure " + "additional, named profiles by using this option (default: %default)") parser.add_option( - "-t", "--instance-type", default="m1.large", + "-t", "--instance-type", default="m3.large", help="Type of instance to launch (default: %default). " + "WARNING: must be 64-bit; small instances won't work") parser.add_option(