Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add partitionByChunkSize method in CollectionUtils #269

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@
<contributor>
<name>Arturo Bernal</name>
</contributor>
<contributor>
<name>Ramanan Ravi</name>
</contributor>
</contributors>

<dependencies>
Expand Down
53 changes: 53 additions & 0 deletions src/main/java/org/apache/commons/collections4/CollectionUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.commons.collections4;

import java.lang.reflect.Array;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -30,6 +31,9 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.apache.commons.collections4.bag.HashBag;
import org.apache.commons.collections4.collection.PredicatedCollection;
Expand Down Expand Up @@ -2153,4 +2157,53 @@ public static <E> E extractSingleton(final Collection<E> collection) {
}
return collection.iterator().next();
}

/**
* Returns partitions of given {@code collection}, each of size
* {@code chunkSize} (the final partition may be smaller).
* <p>
* For example, partitioning a {@code collection} containing
* {@code [a, b, c, d, e, f, g]} with a {@code chunkSize} of 3 yields an outer
* {@link List} containing three collections of type {@code collection} where
* the first two collections will have three elements each and the final
* collection will have one element. Ordering of elements would be based on that
* of the Stream of the given {@code collection}.
* </p>
* <p>
* Passing an empty {@code collection} as input would return an empty
* {@link List}. Passing {@code chunkSize} greater than the size of input
* {@code collection} would return a {@link List} with just one element which
* would in-turn be the input {@code collection} itself.
* </p>
*
* @param <E> the type of Collection
* @param collection the collection to be partitioned
* @param chunkSize the desired size of each partition (the last may be
* smaller)
* @return a list of collections (type as that of given input collection)
* @throws NullPointerException if the input collection is null
* @throws IllegalArgumentException if the input chunkSize is lesser than or
* equal to 0
* @throws IllegalArgumentException if new instance of input collection cannot
* be instantiated
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static <E extends Collection> List<E> partitionByChunkSize(final E collection, int chunkSize) {
Objects.requireNonNull(collection, "input collection must not be null");
if (chunkSize <= 0) {
throw new IllegalArgumentException("input chunk size must be greater than 0");
}
Supplier<E> supplier = () -> {
try {
return (E) collection.getClass().getDeclaredConstructor().newInstance();
} catch (InvocationTargetException | NoSuchMethodException | IllegalAccessException
| InstantiationException e) {
throw new IllegalArgumentException("unable to get instance of given input collection");
}
};
final AtomicInteger counter = new AtomicInteger(0);
final Map<Integer, E> map = (Map<Integer, E>) collection.stream().collect(
Collectors.groupingBy((i -> counter.getAndIncrement() / chunkSize), Collectors.toCollection(supplier)));
return new ArrayList<>(map.values());
}
}
100 changes: 100 additions & 0 deletions src/test/java/org/apache/commons/collections4/CollectionUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2303,4 +2303,104 @@ public void union() {
assertEquals(Integer.valueOf(1), freq2.get(5));
}

@Test
public void testPartitionByChunkSize() {
// test with Set
final Set<Number> set = new HashSet<>();
for (int i = 1; i <= 7; i++) {
set.add(i);
}
List<Set<Number>> setPartitions = CollectionUtils.partitionByChunkSize(set, 3);
assertEquals(3, setPartitions.size());
assertEquals(3, setPartitions.get(0).size());
assertEquals(3, setPartitions.get(1).size());
assertEquals(1, setPartitions.get(2).size());

// test for max chunk size use-case
List<Set<Number>> setPartitionMaxSize = CollectionUtils.partitionByChunkSize(set, Integer.MAX_VALUE);
assertEquals(1, setPartitionMaxSize.size());
assertEquals(set, setPartitionMaxSize.get(0));

// test with List, have duplicate values
final List<Number> list = new ArrayList<>();
for (int i = 1; i <= 7; i++) {
list.add(i);
}
list.add(7L);
list.add(7L);
List<List<Number>> listPartitions = CollectionUtils.partitionByChunkSize(list, 2);
assertEquals(5, listPartitions.size());
assertEquals(2, listPartitions.get(0).size());
assertEquals(2, listPartitions.get(1).size());
assertEquals(2, listPartitions.get(2).size());
assertEquals(2, listPartitions.get(3).size());
assertEquals(1, listPartitions.get(4).size());

// test with List, have null elements
final List<Number> listWithNullElements = new ArrayList<>();
for (int i = 1; i <= 6; i++) {
listWithNullElements.add(i);
}
listWithNullElements.add(null);
listWithNullElements.add(7L);
List<List<Number>> listPartitionsWithNullElements = CollectionUtils.partitionByChunkSize(listWithNullElements,
2);
assertEquals(4, listPartitionsWithNullElements.size());
assertEquals(2, listPartitionsWithNullElements.get(0).size());
assertEquals(2, listPartitionsWithNullElements.get(1).size());
assertEquals(2, listPartitionsWithNullElements.get(2).size());
assertEquals(2, listPartitionsWithNullElements.get(3).size());

// test with nested Collection
List<List<String>> strLists = new ArrayList<>();
List<String> strList1 = Arrays.asList("1", "one");
strLists.add(strList1);
List<String> strList2 = Arrays.asList("2");
strLists.add(strList2);
List<String> strList3 = Arrays.asList("3", "three");
strLists.add(strList3);
List<String> strList4 = Arrays.asList("4", null);
strLists.add(strList4);
List<String> strList5 = Arrays.asList("5", "five");
strLists.add(strList5);
List<List<List<String>>> retStrLists = CollectionUtils.partitionByChunkSize(strLists, 2);
assertEquals(3, retStrLists.size());
assertEquals(2, retStrLists.get(0).size());
assertEquals(strList1, retStrLists.get(0).get(0));
assertEquals(strList2, retStrLists.get(0).get(1));
assertEquals(2, retStrLists.get(1).size());
assertEquals(strList3, retStrLists.get(1).get(0));
assertEquals(strList4, retStrLists.get(1).get(1));
assertEquals(1, retStrLists.get(2).size());
assertEquals(strList5, retStrLists.get(2).get(0));

// test with empty collection
List<String> emptyList = new ArrayList<>();
List<List<String>> emptyPartitions = CollectionUtils.partitionByChunkSize(emptyList, 2);
assertEquals(0, emptyPartitions.size());
// test exception scenarios
assertAll(
() -> {
assertThrows(IllegalArgumentException.class, () -> CollectionUtils.partitionByChunkSize(listWithNullElements, -2),
"failed to check if input chunk size is greater than 0");
},
() -> {
assertThrows(IllegalArgumentException.class, () -> CollectionUtils.partitionByChunkSize(listWithNullElements, 0),
"failed to check if input chunk size is greater than 0");
},
() -> {
assertThrows(NullPointerException.class, () -> CollectionUtils.partitionByChunkSize(null, 2),
"failed to check if input collection is null");
},
() -> {
@SuppressWarnings("rawtypes")
Collection mockCollection = createMock(Collection.class);
expect(mockCollection.stream()).andReturn(Arrays.asList("").stream());
replay();
assertThrows(IllegalArgumentException.class, () -> CollectionUtils.partitionByChunkSize(mockCollection, 2),
"failed to check instance of given input collection");
}
);
}

}