diff --git a/pom.xml b/pom.xml index e58e137f62..2ec68b9ac0 100644 --- a/pom.xml +++ b/pom.xml @@ -461,6 +461,9 @@ Arturo Bernal + + Ramanan Ravi + diff --git a/src/main/java/org/apache/commons/collections4/CollectionUtils.java b/src/main/java/org/apache/commons/collections4/CollectionUtils.java index 28b5f3a67d..1ffd79c7e5 100644 --- a/src/main/java/org/apache/commons/collections4/CollectionUtils.java +++ b/src/main/java/org/apache/commons/collections4/CollectionUtils.java @@ -17,6 +17,7 @@ package org.apache.commons.collections4; import java.lang.reflect.Array; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -30,6 +31,9 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import java.util.stream.Collectors; import org.apache.commons.collections4.bag.HashBag; import org.apache.commons.collections4.collection.PredicatedCollection; @@ -2153,4 +2157,53 @@ public static E extractSingleton(final Collection collection) { } return collection.iterator().next(); } + + /** + * Returns partitions of given {@code collection}, each of size + * {@code chunkSize} (the final partition may be smaller). + *

+ * For example, partitioning a {@code collection} containing + * {@code [a, b, c, d, e, f, g]} with a {@code chunkSize} of 3 yields an outer + * {@link List} containing three collections of type {@code collection} where + * the first two collections will have three elements each and the final + * collection will have one element. Ordering of elements would be based on that + * of the Stream of the given {@code collection}. + *

+ *

+ * Passing an empty {@code collection} as input would return an empty + * {@link List}. Passing {@code chunkSize} greater than the size of input + * {@code collection} would return a {@link List} with just one element which + * would in-turn be the input {@code collection} itself. + *

+ * + * @param the type of Collection + * @param collection the collection to be partitioned + * @param chunkSize the desired size of each partition (the last may be + * smaller) + * @return a list of collections (type as that of given input collection) + * @throws NullPointerException if the input collection is null + * @throws IllegalArgumentException if the input chunkSize is lesser than or + * equal to 0 + * @throws IllegalArgumentException if new instance of input collection cannot + * be instantiated + */ + @SuppressWarnings({ "unchecked", "rawtypes" }) + public static List partitionByChunkSize(final E collection, int chunkSize) { + Objects.requireNonNull(collection, "input collection must not be null"); + if (chunkSize <= 0) { + throw new IllegalArgumentException("input chunk size must be greater than 0"); + } + Supplier supplier = () -> { + try { + return (E) collection.getClass().getDeclaredConstructor().newInstance(); + } catch (InvocationTargetException | NoSuchMethodException | IllegalAccessException + | InstantiationException e) { + throw new IllegalArgumentException("unable to get instance of given input collection"); + } + }; + final AtomicInteger counter = new AtomicInteger(0); + final Map map = (Map) collection.stream().collect( + Collectors.groupingBy((i -> counter.getAndIncrement() / chunkSize), Collectors.toCollection(supplier))); + return new ArrayList<>(map.values()); + } } diff --git a/src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java b/src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java index de4480ec40..97d623e23c 100644 --- a/src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java +++ b/src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java @@ -2303,4 +2303,104 @@ public void union() { assertEquals(Integer.valueOf(1), freq2.get(5)); } + @Test + public void testPartitionByChunkSize() { + // test with Set + final Set set = new HashSet<>(); + for (int i = 1; i <= 7; i++) { + set.add(i); + } + List> setPartitions = CollectionUtils.partitionByChunkSize(set, 3); + assertEquals(3, setPartitions.size()); + assertEquals(3, setPartitions.get(0).size()); + assertEquals(3, setPartitions.get(1).size()); + assertEquals(1, setPartitions.get(2).size()); + + // test for max chunk size use-case + List> setPartitionMaxSize = CollectionUtils.partitionByChunkSize(set, Integer.MAX_VALUE); + assertEquals(1, setPartitionMaxSize.size()); + assertEquals(set, setPartitionMaxSize.get(0)); + + // test with List, have duplicate values + final List list = new ArrayList<>(); + for (int i = 1; i <= 7; i++) { + list.add(i); + } + list.add(7L); + list.add(7L); + List> listPartitions = CollectionUtils.partitionByChunkSize(list, 2); + assertEquals(5, listPartitions.size()); + assertEquals(2, listPartitions.get(0).size()); + assertEquals(2, listPartitions.get(1).size()); + assertEquals(2, listPartitions.get(2).size()); + assertEquals(2, listPartitions.get(3).size()); + assertEquals(1, listPartitions.get(4).size()); + + // test with List, have null elements + final List listWithNullElements = new ArrayList<>(); + for (int i = 1; i <= 6; i++) { + listWithNullElements.add(i); + } + listWithNullElements.add(null); + listWithNullElements.add(7L); + List> listPartitionsWithNullElements = CollectionUtils.partitionByChunkSize(listWithNullElements, + 2); + assertEquals(4, listPartitionsWithNullElements.size()); + assertEquals(2, listPartitionsWithNullElements.get(0).size()); + assertEquals(2, listPartitionsWithNullElements.get(1).size()); + assertEquals(2, listPartitionsWithNullElements.get(2).size()); + assertEquals(2, listPartitionsWithNullElements.get(3).size()); + + // test with nested Collection + List> strLists = new ArrayList<>(); + List strList1 = Arrays.asList("1", "one"); + strLists.add(strList1); + List strList2 = Arrays.asList("2"); + strLists.add(strList2); + List strList3 = Arrays.asList("3", "three"); + strLists.add(strList3); + List strList4 = Arrays.asList("4", null); + strLists.add(strList4); + List strList5 = Arrays.asList("5", "five"); + strLists.add(strList5); + List>> retStrLists = CollectionUtils.partitionByChunkSize(strLists, 2); + assertEquals(3, retStrLists.size()); + assertEquals(2, retStrLists.get(0).size()); + assertEquals(strList1, retStrLists.get(0).get(0)); + assertEquals(strList2, retStrLists.get(0).get(1)); + assertEquals(2, retStrLists.get(1).size()); + assertEquals(strList3, retStrLists.get(1).get(0)); + assertEquals(strList4, retStrLists.get(1).get(1)); + assertEquals(1, retStrLists.get(2).size()); + assertEquals(strList5, retStrLists.get(2).get(0)); + + // test with empty collection + List emptyList = new ArrayList<>(); + List> emptyPartitions = CollectionUtils.partitionByChunkSize(emptyList, 2); + assertEquals(0, emptyPartitions.size()); + // test exception scenarios + assertAll( + () -> { + assertThrows(IllegalArgumentException.class, () -> CollectionUtils.partitionByChunkSize(listWithNullElements, -2), + "failed to check if input chunk size is greater than 0"); + }, + () -> { + assertThrows(IllegalArgumentException.class, () -> CollectionUtils.partitionByChunkSize(listWithNullElements, 0), + "failed to check if input chunk size is greater than 0"); + }, + () -> { + assertThrows(NullPointerException.class, () -> CollectionUtils.partitionByChunkSize(null, 2), + "failed to check if input collection is null"); + }, + () -> { + @SuppressWarnings("rawtypes") + Collection mockCollection = createMock(Collection.class); + expect(mockCollection.stream()).andReturn(Arrays.asList("").stream()); + replay(); + assertThrows(IllegalArgumentException.class, () -> CollectionUtils.partitionByChunkSize(mockCollection, 2), + "failed to check instance of given input collection"); + } + ); + } + }