From 260e4b106011cdc3d2ffea486d4291159eeeb976 Mon Sep 17 00:00:00 2001 From: wangyingjie Date: Fri, 30 Jun 2023 23:17:56 +0800 Subject: [PATCH 1/6] Add Cpp Deployment in Ray Serve Signed-off-by: wangyingjie --- reps/2023-06-30-serve-cpp-deployment.md | 411 ++++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 reps/2023-06-30-serve-cpp-deployment.md diff --git a/reps/2023-06-30-serve-cpp-deployment.md b/reps/2023-06-30-serve-cpp-deployment.md new file mode 100644 index 00000000..210ea58a --- /dev/null +++ b/reps/2023-06-30-serve-cpp-deployment.md @@ -0,0 +1,411 @@ +## Summary +### General Motivation +In scenarios such as search, inference, and others, system performance is of utmost importance. Taking the model inference and search engines of Ant Group as an example, these systems require high throughput, low latency, and high concurrency as they handle a massive amount of business. In order to meet these requirements, they have all chosen C++ for designing and developing their systems, ensuring high efficiency and stability. Currently, these systems plan to run on Ray Serve to enhance their distributed capabilities. Therefore, Ray Serve needs to provide C++ deployment so that users can easily deploy their services. +### Should this change be within `ray` or outside? +main ray project. Changes are made to the Ray Serve level. +## Stewardship + +### Required Reviewers +@sihanwang41 + +### Shepherd of the Proposal (should be a senior committer) +@sihanwang41 + +## Design and Architecture + +### Example Model +Taking the recommendation system as an example, when user inputs a search word, the system returns recommended words that are similar to it. + +![image](https://user-images.githubusercontent.com/40862361/250098983-a6ef70f7-667b-47c3-9838-7b39111a5240.png) + +The RecommendService receives user requests and calls the FeatureService, SimilarityService, and RankService to calculate similar words, then return the results to users: +```cpp +#pragma once + +#include + +#include "FeatureService.h" +#include "SimilarityService.h" +#include "RankService.h" + +namespace ray { +namespace serve { + +class RecommendService { + public: + RecommendService() { + feature_service_ = std::make_shared(); + similarity_service_ = std::make_shared(); + rank_service_ = std::make_shared(); + } + + std::vector Recommend(const std::string &request, const int num) { + // 1. Calculate vector + std::vector features = feature_service_->GetVector(request); + + // 2. Calculate similarity + std::unordered_map similarities = similarity_service_->GetSimilarity(features); + + // 3. Get similar words + std::vector result = rank_service_->Rank(similarities, num); + return result; +} + + private: + std::shared_ptr feature_service_; + std::shared_ptr similarity_service_; + std::shared_ptr rank_service_; +}; + +} // namespace serve +} // namespace ray + +``` +The FeatureService convert requests to vector: +```cpp +#pragma once + +#include +#include +#include + +namespace ray { +namespace serve { + +class FeatureService { + public: + FeatureService() { + model_.load("model/word2vec.bin"); + } + + ~FeatureService() { + delete model_; + model_ = nullptr; + } + + std::vector GetVector(const std::string &request) { + return model_.getVector(request); + } + + private: + word2vec::Word2Vec model_; +}; + +} // namespace serve +} // namespace ray + +``` +The SimilarityService is used to calculate similarity: +```cpp +#pragma once + +#include +#include +#include +#include + +namespace ray { +namespace serve { + +class SimilarityService { + public: + std::unordered_map GetSimilarity(std::vector request_vec) { + std::unordered_map result; + for (auto it = recommend_cache_.begin(); it != recommend_cache_.end(); it++) { + result.insert({it->first, ComputeCosineSimilarity(request_vec, it->second)}); + } + return result; + } + + private: + float ComputeCosineSimilarity(std::vector v1, std::vector v2) { + int len = v1.size(); + float dotProduct = 0; + float magnitude1 = 0; + float magnitude2 = 0; + for (int i = 0; i < len; i++) { + dotProduct += v1[i] * v2[i]; + magnitude1 += v1[i] * v1[i]; + magnitude2 += v2[i] * v2[i]; + } + magnitude1 = std::sqrt(magnitude1); + magnitude2 = std::sqrt(magnitude2); + return dotProduct / (magnitude1 * magnitude2); + } + + std::unordered_map> recommend_cache_ = { + {"mac", {1.5, 2.3, 3.5, 5.5}}, + {"car", {1.5, 3.2, 3.9, 7.5}}, + {"phone", {1.5, 2.0, 4.5, 8.1}}, + }; +}; + +} // namespace serve +} // namespace ray + +``` +The RankService is used to sort based on similarity: +```cpp +#pragma once + +#include +#include +#include +#include +#include + +namespace ray { +namespace serve { + +class RankService { + public: + std::vector Rank(std::unordered_map recommends, int num) { + std::priority_queue, std::vector>, std::greater>> pq; + for (auto& pair : recommends) { + pq.push(pair); + if (pq.size() > num) { + pq.pop(); + } + } + + std::vector result; + while (!pq.empty()) { + result.push_back(pq.top().first); + pq.pop(); + } + std::reverse(result.begin(), result.end()); + return result; + } +}; + +} // namespace serve +} // namespace ray + +``` +This is the code that uses the RecommendService class: +```cpp +#include "RecommendService.h" + +int main(int argc, char *argv[]) { + ray::serve::RecommendService recommend_service; + std::vector recommends = recommend_service.Recommend("computer", 1); + for(std::string recommend_word : recommends) { + printf("Recommend word is %s", recommend_word.c_str()); + } + return 0; +} +``` +In this way, all services need to be deployed together, which increases the system load and is not conducive to expansion. +### Converting to a Ray Serve Deployment +Through Ray Serve, the core computing logic can be deployed as a scalable distributed service. +First, convert these Services to run on Ray Serve. +FeatureService: +```cpp +#pragma once + +#include +#include +#include + +namespace ray { +namespace serve { + +class FeatureService { + public: + FeatureService() { + model_.load("model/word2vec.bin"); + } + + ~FeatureService() { + delete model_; + model_ = nullptr; + } + + std::vector GetVector(const std::string &request) { + return model_.getVector(request); + } + + static FeatureService *FactoryCreate() { + return new FeatureService(); + } + private: + word2vec::Word2Vec model_; +}; + +// Register function +SERVE_DEPLOYMENT(FeatureService::FactoryCreate); + +} // namespace serve +} // namespace ray + +``` +SimilarityService: +```cpp +#pragma once + +#include +#include +#include +#include + +namespace ray { +namespace serve { + +class SimilarityService { + public: + std::unordered_map GetSimilarity(std::vector request_vec) { + std::unordered_map result; + for (auto it = recommend_cache_.begin(); it != recommend_cache_.end(); it++) { + result.insert({it->first, ComputeCosineSimilarity(request_vec, it->second)}); + } + return result; + } + + static SimilarityService *FactoryCreate() { + return new SimilarityService(); + } + + private: + float ComputeCosineSimilarity(std::vector v1, std::vector v2) { + int len = v1.size(); + float dotProduct = 0; + float magnitude1 = 0; + float magnitude2 = 0; + for (int i = 0; i < len; i++) { + dotProduct += v1[i] * v2[i]; + magnitude1 += v1[i] * v1[i]; + magnitude2 += v2[i] * v2[i]; + } + magnitude1 = std::sqrt(magnitude1); + magnitude2 = std::sqrt(magnitude2); + return dotProduct / (magnitude1 * magnitude2); + } + + std::unordered_map> recommend_cache_ = { + {"mac", {1.5, 2.3, 3.5, 5.5}}, + {"car", {1.5, 3.2, 3.9, 7.5}}, + {"phone", {1.5, 2.0, 4.5, 8.1}}, + }; +}; + +// Register function +SERVE_DEPLOYMENT(SimilarityService::FactoryCreate); + +} // namespace serve +} // namespace ray + +``` +RankService: +```cpp +#pragma once + +#include +#include +#include +#include +#include + +namespace ray { +namespace serve { + +class RankService { + public: + std::vector Rank(std::unordered_map recommends, int num) { + std::priority_queue, std::vector>, std::greater>> pq; + for (auto& pair : recommends) { + pq.push(pair); + if (pq.size() > num) { + pq.pop(); + } + } + + std::vector result; + while (!pq.empty()) { + result.push_back(pq.top().first); + pq.pop(); + } + std::reverse(result.begin(), result.end()); + return result; + } + + static RankService *FactoryCreate() { + return new RankService(); + } +}; + +// Register function +SERVE_DEPLOYMENT(RankService::FactoryCreate); + +} // namespace serve +} // namespace ray + +``` +RecommendService is a sequential invocation of other services without complex processing logic, so we can directly use the DAG ability to connect these services, eliminating the need for RecommendService and simplifying user logic. +Next, we start the Ray Serve runtime and use Python Serve API deploy these Service as Deployment: +```python +feature_service = serve.deployment(_func_or_class='FeatureService::FactoryCreate', name='feature_service', language='CPP') +similarity_service = serve.deployment(_func_or_class='SimilarityService::FactoryCreate', name='similarity_service', language='CPP') +rank_service = serve.deployment(_func_or_class='RankService::FactoryCreate', name='rank_service', language='CPP') + +with InputNode() as input: + features = feature_service.GetVector.bind(input[0]) + similarities = similarity_service.GetSimilarity.bind(features) + rank_result = rank_service.Rank.bind(similarities, input[1]) + +graph = DAGDriver.bind(rank_result, http_adapter=json_request) +handle = serve.run(graph) +ref = handle.GetVector.remote() +result = ray.get(ref) +print(result) +``` +### Calling Ray Serve Deployment with HTTP +```java +curl http://127.0.0.1:8000?request=computer&num=1 +``` +### Overall Design + ![image](https://user-images.githubusercontent.com/40862361/250100276-5eeed3a1-70a2-4208-8563-e0b1bf7f7fd2.png) +Ray Serve maintains Controller Actor and Ingress Actor. So these two roles are not related to the language of the user's choice. And they have the ability to manage cross-language deployments and route requests. +### C++ Case Deduction +The businesses can send control commands to the Controller Actor of Ray Serve, which includes creating ingress, creating deployment, etc. When publishing C++ online services, the DeploymentState component needs to create C++ Deployment Actors. Users can call their business logic in Python/Java Driver, Ray task, or Ray Actor, and the requests will be dispatched to C++ Deployment Actors. +![image](https://user-images.githubusercontent.com/40862361/250100685-96466519-1f8e-414e-8d8a-1bcb63952a41.png) +### Package +C++ programs are typically compiled and packaged into three types of results: binary, static library, shared library. +- binary: C++ does not have a standard load binary API, and binaries compiled by different compilers may not be the same. Directly loading the binary can result in many uncontrollable factors. +- static library: Static library bundle all their dependencies together, whereas common utility libraries such as glog and boost may be loaded via dynamic dependencies in the Ray Serve Deployment. This can lead to a higher probability of conflicts when the same library exists as both a dynamic and static dependency. +- shared library: C++ provides a standard API for loading shared library, and it can reduce memory usage and prevent conflicts. + +In conclusion, the business needs to package the system as a shared library to run it on Ray Serve. +### Register function +Ray Serve will add SERVE_FUNC and SERVE_DEPLOYMENT macros to publish user Service as Deployment. +SERVE_FUNC: Resolving overloaded function registration; +SERVE_DEPLOYMENT: Publishing user Service as Deployment. +Example: +```cpp +static RecommendService *CreateRecommendService(std::string request) { + return new RecommendService(request); +} + +static RecommendService *CreateRecommendService(std::string request, int num) { + return new RecommendService(request, num); +} + +static FeatureService *CreateFeatureService() { + return new FeatureService(); +} + +// Register function +SERVE_DEPLOYMENT(SERVE_FUNC(CreateRecommendService, std::string, int), + CreateFeatureService); +``` +## Compatibility, Deprecation, and Migration Plan +The new feature is to add C++ deployment for Ray Serve, without modifying or deprecating existing functionalities. The changes to the Ray Serve API are also adding new capabilities. + +## Test Plan and Acceptance Criteria +- Unit and integration test for core components +- Benchmarks on C++ Deployment + +## (Optional) Follow-on Work +- Init Ray Serve C++ project structure +- Create C++ Deployment through Python/Java API +- ServeHandler for C++ +- Accessing C++ Deployment using Python/Java/C++ ServeHandle or HTTP From 784e9d9696063c3d7b2eec196653497d9ae643bf Mon Sep 17 00:00:00 2001 From: wangyingjie Date: Tue, 7 Nov 2023 21:57:33 +0800 Subject: [PATCH 2/6] update to use bind Signed-off-by: wangyingjie --- reps/2023-06-30-serve-cpp-deployment.md | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/reps/2023-06-30-serve-cpp-deployment.md b/reps/2023-06-30-serve-cpp-deployment.md index 210ea58a..36da9a2c 100644 --- a/reps/2023-06-30-serve-cpp-deployment.md +++ b/reps/2023-06-30-serve-cpp-deployment.md @@ -7,9 +7,13 @@ main ray project. Changes are made to the Ray Serve level. ### Required Reviewers @sihanwang41 +@edoakes +@akshay-anyscale ### Shepherd of the Proposal (should be a senior committer) @sihanwang41 +@edoakes +@akshay-anyscale ## Design and Architecture @@ -343,17 +347,16 @@ SERVE_DEPLOYMENT(RankService::FactoryCreate); RecommendService is a sequential invocation of other services without complex processing logic, so we can directly use the DAG ability to connect these services, eliminating the need for RecommendService and simplifying user logic. Next, we start the Ray Serve runtime and use Python Serve API deploy these Service as Deployment: ```python -feature_service = serve.deployment(_func_or_class='FeatureService::FactoryCreate', name='feature_service', language='CPP') -similarity_service = serve.deployment(_func_or_class='SimilarityService::FactoryCreate', name='similarity_service', language='CPP') -rank_service = serve.deployment(_func_or_class='RankService::FactoryCreate', name='rank_service', language='CPP') +feature_deployment = serve.deployment(_func_or_class='FeatureService::FactoryCreate', name='feature_service', language='CPP') +similarity_deployment = serve.deployment(_func_or_class='SimilarityService::FactoryCreate', name='similarity_service', language='CPP') +rank_deployment = serve.deployment(_func_or_class='RankService::FactoryCreate', name='rank_service', language='CPP') -with InputNode() as input: - features = feature_service.GetVector.bind(input[0]) - similarities = similarity_service.GetSimilarity.bind(features) - rank_result = rank_service.Rank.bind(similarities, input[1]) +feature_service = feature_deployment.bind() +similarity_service = similarity_deployment.bind() +rank_service = rank_deployment.bind() -graph = DAGDriver.bind(rank_result, http_adapter=json_request) -handle = serve.run(graph) +app = RecommendService.bind(feature_service, similarity_service, rank_service) +handle = serve.run(app) ref = handle.GetVector.remote() result = ray.get(ref) print(result) From 5b9417eec499aa82b8497408725af41c26b812f3 Mon Sep 17 00:00:00 2001 From: wangyingjie Date: Wed, 8 Nov 2023 10:14:14 +0800 Subject: [PATCH 3/6] Introduction to deleting DAG Signed-off-by: wangyingjie --- reps/2023-06-30-serve-cpp-deployment.md | 1 - 1 file changed, 1 deletion(-) diff --git a/reps/2023-06-30-serve-cpp-deployment.md b/reps/2023-06-30-serve-cpp-deployment.md index 36da9a2c..678dd10b 100644 --- a/reps/2023-06-30-serve-cpp-deployment.md +++ b/reps/2023-06-30-serve-cpp-deployment.md @@ -344,7 +344,6 @@ SERVE_DEPLOYMENT(RankService::FactoryCreate); } // namespace ray ``` -RecommendService is a sequential invocation of other services without complex processing logic, so we can directly use the DAG ability to connect these services, eliminating the need for RecommendService and simplifying user logic. Next, we start the Ray Serve runtime and use Python Serve API deploy these Service as Deployment: ```python feature_deployment = serve.deployment(_func_or_class='FeatureService::FactoryCreate', name='feature_service', language='CPP') From fed4b3c95e701c3c78e2c52b19da7010834802da Mon Sep 17 00:00:00 2001 From: wangyingjie Date: Sun, 12 Nov 2023 16:46:44 +0800 Subject: [PATCH 4/6] Change python api Signed-off-by: wangyingjie --- reps/2023-06-30-serve-cpp-deployment.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/reps/2023-06-30-serve-cpp-deployment.md b/reps/2023-06-30-serve-cpp-deployment.md index 678dd10b..8fdfb885 100644 --- a/reps/2023-06-30-serve-cpp-deployment.md +++ b/reps/2023-06-30-serve-cpp-deployment.md @@ -346,9 +346,9 @@ SERVE_DEPLOYMENT(RankService::FactoryCreate); ``` Next, we start the Ray Serve runtime and use Python Serve API deploy these Service as Deployment: ```python -feature_deployment = serve.deployment(_func_or_class='FeatureService::FactoryCreate', name='feature_service', language='CPP') -similarity_deployment = serve.deployment(_func_or_class='SimilarityService::FactoryCreate', name='similarity_service', language='CPP') -rank_deployment = serve.deployment(_func_or_class='RankService::FactoryCreate', name='rank_service', language='CPP') +feature_deployment = serve.deployment('FeatureService::FactoryCreate', name='feature_service', language='CPP') +similarity_deployment = serve.deployment('SimilarityService::FactoryCreate', name='similarity_service', language='CPP') +rank_deployment = serve.deployment('RankService::FactoryCreate', name='rank_service', language='CPP') feature_service = feature_deployment.bind() similarity_service = similarity_deployment.bind() From 28732401479c5b0f695a48c04066b1e181eca314 Mon Sep 17 00:00:00 2001 From: wangyingjie Date: Fri, 1 Dec 2023 19:29:58 +0800 Subject: [PATCH 5/6] Change follow-on work Signed-off-by: wangyingjie --- reps/2023-06-30-serve-cpp-deployment.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/reps/2023-06-30-serve-cpp-deployment.md b/reps/2023-06-30-serve-cpp-deployment.md index 8fdfb885..f0662ba8 100644 --- a/reps/2023-06-30-serve-cpp-deployment.md +++ b/reps/2023-06-30-serve-cpp-deployment.md @@ -407,7 +407,6 @@ The new feature is to add C++ deployment for Ray Serve, without modifying or dep - Benchmarks on C++ Deployment ## (Optional) Follow-on Work -- Init Ray Serve C++ project structure -- Create C++ Deployment through Python/Java API +- Implement C++ Deployment +- Accessing C++ Deployment using HTTP or Python ServeHandle - ServeHandler for C++ -- Accessing C++ Deployment using Python/Java/C++ ServeHandle or HTTP From 8a1aa6660e98591b741f670f6c3b872c08cb3395 Mon Sep 17 00:00:00 2001 From: wangyingjie Date: Tue, 5 Dec 2023 11:15:00 +0800 Subject: [PATCH 6/6] Change http request Signed-off-by: wangyingjie --- reps/2023-06-30-serve-cpp-deployment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reps/2023-06-30-serve-cpp-deployment.md b/reps/2023-06-30-serve-cpp-deployment.md index f0662ba8..b0294867 100644 --- a/reps/2023-06-30-serve-cpp-deployment.md +++ b/reps/2023-06-30-serve-cpp-deployment.md @@ -362,7 +362,7 @@ print(result) ``` ### Calling Ray Serve Deployment with HTTP ```java -curl http://127.0.0.1:8000?request=computer&num=1 +curl -d '{"request": "computer", "num": 1}' http://127.0.0.1:8000 ``` ### Overall Design ![image](https://user-images.githubusercontent.com/40862361/250100276-5eeed3a1-70a2-4208-8563-e0b1bf7f7fd2.png)