diff --git a/redash/query_runner/snowflake.py b/redash/query_runner/snowflake.py index ce40e343b7..d1fbc311db 100644 --- a/redash/query_runner/snowflake.py +++ b/redash/query_runner/snowflake.py @@ -16,7 +16,9 @@ BaseSQLQueryRunner, register, ) -from redash.utils import json_dumps + +from redash.utils import json_dumps, json_loads +import re TYPES_MAP = { 0: TYPE_INTEGER, @@ -32,7 +34,35 @@ } -class Snowflake(BaseSQLQueryRunner): +def _query_restrictions(query): + if query.find("/*laspha*/") > 0: + return True, None + query_without_comments = '' + for line in query.split('\n'): + line = line.strip() + if line.find('--') != -1: + line = line[:line.find('--')] + query_without_comments += ' ' + line # creates one line query + query = ' ' + query_without_comments.lower() + ' ' + # replace multiple spaces with one space + query = re.sub(' +', ' ', query) + # remove /* */ comments + query = re.sub('\/\*.*\*\/', '', query) + # get rid of prefix like bigbrain. or final. + query = re.sub('bigbrain.', '', re.sub('final.', '', re.sub('raw.', '', query))) + occurrences = re.findall(" from events ", query) + re.findall(" join events ", query) + # print("num of occurrences : ", len(occurrences)) + if len(occurrences) > 1: + return False, f'Querying events table multiple times is forbidden.The query contains {len(occurrences)} occurrences of the table events. ' + + if occurrences: + if query.find("created_at") + query.find("ingestion_time") == -2: + return False, 'Querying events table should always be with time constraint (by created_at for ' \ + 'FINAL.events & ingestion_time for RAW.events) ' + return True, None + + +class Snowflake(BaseQueryRunner): noop_query = "SELECT 1" @classmethod @@ -124,6 +154,10 @@ def _parse_results(self, cursor): def run_query(self, query, user, query_id=None): connection = self._get_connection() cursor = connection.cursor() + passed, error = _query_restrictions(query) + + if not passed: + return None, error try: cursor.execute("USE WAREHOUSE {}".format(self.configuration["warehouse"])) @@ -131,7 +165,7 @@ def run_query(self, query, user, query_id=None): user_id = "redash" if user is None else user.email query_id = str(query_id) if query_id else '' - query += "-- REDASH USER: " + user_id + " QUERY ID: " + query_id + query += '-- {"REDASH USER": "' + user_id + '" , "QUERY ID": "' + query_id + '"}' cursor.execute(query) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 69a51f2a57..c45dbeeff0 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -177,7 +177,10 @@ def run(self): annotated_query = self._annotate_query(query_runner) try: - data, error = query_runner.run_query(annotated_query, self.user, self.query_id) + if self.data_source.type.lower() == "snowflake": + data, error = query_runner.run_query(annotated_query, self.user, self.query_id) + else: + data, error = query_runner.run_query(annotated_query, self.user) except Exception as e: if isinstance(e, JobTimeoutException): error = TIMEOUT_MESSAGE