File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -137,6 +137,12 @@ def dataset_structure(self) -> DatasetStructure:
137137 def dataset_structure (self , value : DatasetStructure ) -> None :
138138 self ._dataset_info .dataset_structure = value
139139
140+ @property
141+ def logger (self ) -> logging .Logger :
142+ """Get the logger.
143+ """
144+ return self ._logger
145+
140146 def shard_info_iterator (self , split : SplitT | None ) -> Iterator [ShardInfo ]:
141147 """Iterate all `ShardInfo` in the split.
142148
@@ -305,6 +311,15 @@ def __init__(
305311 if shard_filter (shard_info )
306312 ]
307313
314+ kept_metadata : set [str ] = {
315+ str (s .custom_metadata ) for s in shards_list
316+ }
317+ self .dataset .logger .info (
318+ "Filtered shards with custom metadata: %s from split: %s" ,
319+ kept_metadata ,
320+ split ,
321+ )
322+
308323 # Only use a limited amount of shards for each setting of
309324 # custom_metadata.
310325 if custom_metadata_type_limit :
@@ -319,6 +334,7 @@ def __init__(
319334 counts [k ] = counts .get (k , 0 ) + 1
320335 if counts [k ] <= custom_metadata_type_limit :
321336 shard_list .append (shard_info )
337+ self .dataset .logger .info ("Took %s shards total" , len (shard_list ))
322338
323339 # Limit the number of shards.
324340 if shards :
You can’t perform that action at this time.
0 commit comments