feat: v3.0.1.2. custom response json data structure is supported

cactusgame · cactusgame · commit cfe0dc552a5a · 2021-11-09T16:35:57.000+08:00
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,5 +1,10 @@
 # 变更
 
+#### v3.0.1.2
+feat: custome response format are supported.
+fix: test case with hive, ODPS and spark.
+docs: sync/async request
+
 #### v3.0.1.0 变更
 feat: 支持多机license
 refactor: 测试用例整理
diff --git a/README.md b/README.md
@@ -85,6 +85,7 @@ The SLB binding succeeded, Access path http://somehost:someport,
 - [安装](docs/cn/install.md)
 - 服务能力
   - [服务与API](docs/cn/service.md)
+  - [同步与异步请求](docs/cn/task.md)
 - 数据处理能力
   - [操作rds/hive/odps数据](docs/cn/dbm.md)
   - [spark处理数据](docs/cn/service.md)
@@ -109,6 +110,7 @@ The SLB binding succeeded, Access path http://somehost:someport,
 - [插件机制](docs/cn/plugin.md)
 - 最佳实践
   - [如何创建新项目](docs/cn/new-project.md)
+  - [定制响应结构](docs/cn/custom-response-format.md)
   - [最佳实践](docs/cn/best-practice.md)
 
 ## 视频专区
diff --git a/docs/cn/abt.md b/docs/cn/abt.md
@@ -1,6 +1,6 @@
 # ABT 使用示例
 
-## 简易部署脚本使用方法
+## 简易部署脚本使用方法(不支持license)
 
 使用前提：按照`ABT 参考`章节，完成配置`全局配置`与`项目配置`。不用怕，事实上你需要改写的配置非常少。
 
diff --git a/docs/cn/custom-response-format.md b/docs/cn/custom-response-format.md
@@ -0,0 +1,12 @@
+# 定制响应结构
+
+有时，你需要定制请求返回值的数据结构，目前已支持返回自定义的json格式，示例如下
+
+```
+from ab import jsonify
+
+
+@algorithm()
+def custom_response():
+    return jsonify({"res": 1})
+```
diff --git a/docs/cn/service.md b/docs/cn/service.md
@@ -143,55 +143,6 @@ response:
 }
 ```
 
-### GET /api/task
-获取异步算法任务列表
-
-query string:
-* page: 第几页
-* size: 每页条数
-
-response:
-```
-{
-    "code": 0,
-    "data": [{
-            "code": 2,  // 0：新建， 1：执行中，2：运行结束，-1：异常
-            "task_id": "d97704ef0b704b54bb777de090531eef",
-            "app_name": "algorithm-base-demo-app",
-            "algorithm_name": "async_example",
-            "status": "begin", // 算法状态
-            "gmt_create": "2019-09-02 19:35:52.000",
-            "gmt_modified": "2019-09-02 20:23:34.000"
-        }
-    ]
-}
-```
-
-
-### GET /api/task/{task_id}/
-获取异步算法任务状态。比上述算法列表多了data/log字段
-
-path variable:
-* task_id: 上面接口返回的异步task_id
-
-response:
-```
-{
-    "code": 2,  // 0：新建， 1：执行中，2：运行结束，-1：异常
-    "task_id": "d97704ef0b704b54bb777de090531eef",
-    "app_name": "algorithm-base-demo-app",
-    "algorithm_name": "async_example",
-    "status": "begin", // 算法状态
-    "data": {
-        "sample_rate": 100,  // 采样率
-        "sample_count": 50,  // 采样行数
-    }, // 算法的返回值
-    "spark_app_id": "xxx", // spark application id
-    "log": "yyy",  // spark及算法中print的log
-    "gmt_create": "2019-09-02 19:35:52.000",
-    "gmt_modified": "2019-09-02 20:23:34.000"
-}
-```
 
 ### DELETE /api/data_source/{data_source_id}/table/{table_name}/cache
 删除表的所有缓存
@@ -256,5 +207,4 @@ except Exception as e:
 更多信息，详见 [异常与错误处理](error.md)
 
 
-# 已知问题
-- 通过/api/task可以查看所有结束的任务，目前没有删除机制
+
diff --git a/docs/cn/task.md b/docs/cn/task.md
@@ -0,0 +1,76 @@
+# 请求类型
+
+## 同步请求
+
+同步请求，请求后会一直阻塞等待返回结果，用于请求耗时较短的请求。
+
+在请求参数中，加入如下参数即可, mode参数的默认值就是sync。
+```
+"mode"="sync"
+```
+
+## 异步请求
+
+异步请求，请求发出后，会立刻返回`taskId`, 随后根据`taskId` 查询请求状态，最终获得请求返回值。用于请求耗时长，非实时的任务，比如Hive，ODPS，Spark等。
+
+在请求参数中，加入如下参数即可
+```
+"mode"="async"
+```
+
+### GET /api/task
+获取异步算法任务列表
+
+query string:
+* page: 第几页
+* size: 每页条数
+
+response:
+```
+{
+    "code": 0,
+    "data": [{
+            "code": 2,  // 0：新建， 1：执行中，2：运行结束，-1：异常
+            "task_id": "d97704ef0b704b54bb777de090531eef",
+            "app_name": "algorithm-base-demo-app",
+            "algorithm_name": "async_example",
+            "status": "begin", // 算法状态
+            "gmt_create": "2019-09-02 19:35:52.000",
+            "gmt_modified": "2019-09-02 20:23:34.000"
+        }
+    ]
+}
+```
+
+
+### GET /api/task/{task_id}/
+获取异步算法任务状态。比上述算法列表多了data/log字段。状态返回值`code`
+
+```
+"code": 2,  // 0：新建， 1：执行中，2：运行结束，-1：异常
+```
+
+path variable:
+* task_id: 上面接口返回的异步task_id
+
+response:
+```
+{
+    "code": 2,  // 0：新建， 1：执行中，2：运行结束，-1：异常
+    "task_id": "d97704ef0b704b54bb777de090531eef",
+    "app_name": "algorithm-base-demo-app",
+    "algorithm_name": "async_example",
+    "status": "begin", // 算法状态
+    "data": {
+        "sample_rate": 100,  // 采样率
+        "sample_count": 50,  // 采样行数
+    }, // 算法的返回值
+    "spark_app_id": "xxx", // spark application id
+    "log": "yyy",  // spark及算法中print的log
+    "gmt_create": "2019-09-02 19:35:52.000",
+    "gmt_modified": "2019-09-02 20:23:34.000"
+}
+```
+
+## 已知问题
+- 通过/api/task可以查看所有结束的任务, 没有被删除掉
diff --git a/ext/license/license.py b/ext/license/license.py
@@ -6,7 +6,7 @@
 # only enc
 def step1(input_line):
     sout = [chr(ord(a) ^ ord(b)) for (a, b) in
-            zip(input_line, cycle("your-key"))]
+            zip(input_line, cycle("utf-8oZFSxbFXYpZ4mcX5FIwEXecsYbutf-8"))]
     return "".join(sout)
 
 
diff --git a/src/ab/plugins/db/hive.py b/src/ab/plugins/db/hive.py
@@ -1,8 +1,7 @@
+import sqlalchemy
+
 from sqlalchemy import MetaData, Table
 from sqlalchemy.exc import NoSuchTableError
-from sqlalchemy import *
-from sqlalchemy.engine import create_engine
-from sqlalchemy.schema import *
 
 import thrift.transport.TSocket
 
diff --git a/src/ab/task/recorder.py b/src/ab/task/recorder.py
@@ -11,7 +11,9 @@
 
 
 class TaskRecorder:
-    # TODO Task status
+    """
+    the status will be updated to database
+    """
     ERROR = -1
     INIT = 0
     RUNNING = 1
diff --git a/src/ab/task/task.py b/src/ab/task/task.py
@@ -1,4 +1,3 @@
-import contextlib
 import os
 import time
 import uuid
@@ -142,6 +141,7 @@ def run(self):
             self.lazy_init()
             '''2. run'''
             ret = self.run_algorithm()
+            # fixme: throw errors when return by jsonify
             self.recorder.done(ret)
             return ret
         finally:
diff --git a/src/ab/utils/serializer.py b/src/ab/utils/serializer.py
@@ -2,6 +2,7 @@
 import sys
 import pickle
 from datetime import date, datetime
+from flask import Response
 
 import numpy
 import pandas as pd
@@ -13,6 +14,8 @@
 
 class AlgorithmEncoder(json.JSONEncoder):
     def default(self, o):
+        if isinstance(o, Response):
+            return str(o.data)
         if isinstance(o, pd.DataFrame):
             return o.to_dict('records')
         if isinstance(o, pd.Series):
diff --git a/tests/api/algorithms/args.py b/tests/api/algorithms/args.py
@@ -1,15 +1,18 @@
 from ab.utils import logger
 from ab.utils.algorithm import algorithm
+from ab import jsonify
 
 
 @algorithm('args')
-def get_data(task_id, data_source_id=None, table_name=None, data=None, table_info=None, recorder=None, cache_client=None,
+def get_data(task_id, data_source_id=None, table_name=None, data=None, table_info=None, recorder=None,
+             cache_client=None,
              dfs_client=None, eureka_client=None, qs_arg=None, f1=None, single_form_arg: int = None,
              couple_form_args: int = None, the_file=None):
     logger.info('get task_id:', task_id)
     return {'task_id': task_id, 'data_source_id': data_source_id, 'table_name': table_name, 'data': data,
             'table_info': table_info, 'recorder': str(recorder), 'cache_client': str(cache_client),
-            'dfs_client': str(dfs_client), 'eureka_client': str(eureka_client), 'spark': None, # spark init is slow, disable it here
+            'dfs_client': str(dfs_client), 'eureka_client': str(eureka_client), 'spark': None,
+            # spark init is slow, disable it here
             'qs_arg': qs_arg, 'f1': f1, 'single_form_arg': single_form_arg, 'couple_form_args': couple_form_args,
             'the_file_context': the_file.read().decode('utf-8') if the_file else None,
             'the_filename': the_file.filename if the_file else None
@@ -19,3 +22,9 @@ def get_data(task_id, data_source_id=None, table_name=None, data=None, table_inf
 @algorithm()
 def fixture_overwrite(f2=None, f3=None):
     return f2, f3
+
+
+@algorithm()
+def custom_response():
+    # return "hello"
+    return jsonify({"res": 1})
diff --git a/tests/api/algorithms/task.py b/tests/api/algorithms/task.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+
+
+from ab.utils.algorithm import algorithm
+
+
+@algorithm()
+def sync():
+    return "hello-sync-task"
+
+
+@algorithm()
+def async_unlimit():
+    import time
+    time.sleep(2)
+    return "hello-async-unlimit-task"
+
+
+@algorithm()
+def async_pool():
+    import time
+    time.sleep(2)
+    return "hello-async-pool-task"
diff --git a/tests/api/test_hive_ldap.py b/tests/api/test_hive_ldap.py
@@ -2,20 +2,19 @@
 
 hive = {
     "type": "hive",
-    "host": ac.get_value("test_hive_host"),
-    "port": ac.get_value("test_hive_port"),
-    "username": ac.get_value("test_hive_username"),
-    "password": ac.get_value("test_hive_password"),
-    "db": ac.get_value("test_hive_db_zyq")
+    "host": ac.get_value("test_docker_hive_host"),
+    "port": ac.get_value("test_docker_hive_port"),
+    "username": ac.get_value("test_docker_hive_username"),
+    "password": ac.get_value("test_docker_hive_password"),
+    "db": ac.get_value("test_docker_hive_db_testdb")
 }
 
-
 def test_hive_ldap(client):
     input = {
         'data_source': hive,
         'cacheable': False,
         'args': {
-            'table_name': 't'
+            'table_name': 'student'
         }
     }
     resp = client.post_data('/api/algorithm/args?qs_arg=123', input)
diff --git a/tests/api/test_spark_async.py b/tests/api/test_spark_async.py
diff --git a/tests/api/test_tasks.py b/tests/api/test_tasks.py
diff --git a/tests/backup/test_spark_example.py b/tests/backup/test_spark_example.py
diff --git a/tests/config.py b/tests/config.py