diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 38a8295a96176..0da6ab8d0f688 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -401,6 +401,7 @@ def get_tables_for_schema( @serialized_lru_cache(maxsize=1) def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]: + logger.debug(f"Fetching views for {db_name}") page_limit = SHOW_VIEWS_MAX_PAGE_SIZE views: Dict[str, List[SnowflakeView]] = {} @@ -425,6 +426,7 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]] view_name = view["name"] schema_name = view["schema_name"] + logger.debug(f"Fetched view: {view_name} for {schema_name}") if schema_name not in views: views[schema_name] = [] views[schema_name].append( @@ -449,6 +451,13 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]] ) view_pagination_marker = view_name + if logger.isEnabledFor(logging.DEBUG): + num_views_per_schema = { + schema_name: len(schema_views) + for schema_name, schema_views in views.items() + } + logger.debug(f"Fetched views per schema: {num_views_per_schema}") + return views @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 08d4bbe118513..2fde31c9fb8fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -613,6 +613,9 @@ def fetch_views_for_schema( self.report.report_entity_scanned(view_name, "view") if not self.filters.filter_config.view_pattern.allowed(view_name): + logger.debug( + f"Dropping view {view_name} in database {db_name} and schema {schema_name} due to view pattern filter" + ) self.report.report_dropped(view_name) else: views.append(view) @@ -1227,9 +1230,13 @@ def get_views_for_schema( self, schema_name: str, db_name: str ) -> List[SnowflakeView]: views = self.data_dictionary.get_views_for_database(db_name) - # Some schema may not have any table - return views.get(schema_name, []) + result = views.get(schema_name, []) + logger.info( + f"Number of views in database {db_name} for schema {schema_name}: {len(result)}" + ) + + return result def get_columns_for_table( self, table_name: str, snowflake_schema: SnowflakeSchema, db_name: str