-
Notifications
You must be signed in to change notification settings - Fork 1
/
split.sh
executable file
·41 lines (32 loc) · 939 Bytes
/
split.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#! /bin/bash
if [ -z "$1" -o '!' -f "$1" ]
then
echo "Invalid input file specified"
echo "Usage: split.sh <infile> [<chunksize:10000>]"
echo
exit 1
fi
in_xml=$1
chunksize=${2:-10000}
header=$'<?xml version="1.0" encoding="UTF-8"?>\n<collection>'
footer="</collection>"
chunk=0
begin=0
end=0
prefix=`basename $1 .xml`
file="${prefix}_${chunk}.xml"
while read -r line
do
begin=$end
end=$line
if [ "$begin" -ne "0" ]
then
echo -n "$header" > $file
dd if=$in_xml of=$file skip=$begin count=$((end-begin)) iflag=skip_bytes,count_bytes oflag=append conv=notrunc 2> /dev/null
echo "$footer" >> $file
chunk=$((chunk+1))
file="${prefix}_${chunk}.xml"
fi
done < <(grep -ob '<record>' $in_xml | sed 's/:.*//' | stdbuf -o0 awk "{if(NR==1 || NR % $((chunksize)) == 1) print}")
echo -n "$header" > $file
dd if=$in_xml of=$file skip=$end iflag=skip_bytes,count_bytes oflag=append conv=notrunc 2> /dev/null