Skip to content
This repository was archived by the owner on Feb 21, 2019. It is now read-only.

Commit cd1b50d

Browse files
committed
Sync'ing latest changes
1 parent 5177124 commit cd1b50d

File tree

8 files changed

+150
-89
lines changed

8 files changed

+150
-89
lines changed

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ This code is built with the following assumptions. You may get mixed results if
1111
* [HBase](http://hbase.apache.org) 0.90+
1212
* [Pig](http://pig.apache.org) 0.9+
1313
* [Hive](https://github.com/xstevens/hive) 0.7 with [automatic promotion of certain types](https://github.com/xstevens/hive/commit/566ca633546e5231cf5ea20d554c1f61784f39e4)
14+
* [Jackson](https://github.com/FasterXML) 2.x (for all things JSON)
15+
* We don't use anything fancy but fasterxml switch broke from 1.x packaging. You can probably fork and compile fairly easily if you want to use Jackson 1.x since that's what ships with Hadoop projects.
1416

1517
### Building ###
1618
To make a jar you can do:
@@ -28,4 +30,5 @@ All aspects of this software written in Java are distributed under Apache Softwa
2830
### Contributors ###
2931

3032
* Xavier Stevens ([@xstevens](http://twitter.com/xstevens))
31-
* Daniel Einspanjer ([@deinspanjer](http://twitter/deinspanjer))
33+
* Daniel Einspanjer ([@deinspanjer](http://twitter/deinspanjer))
34+
* Mark Reid ([@reid_write](http://twitter.com/reid_write))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/*
2+
* Copyright 2012 Mozilla Foundation
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
package com.mozilla.hadoop.fs;
21+
22+
import java.io.BufferedReader;
23+
import java.io.IOException;
24+
import java.io.InputStreamReader;
25+
import java.util.ArrayList;
26+
import java.util.Iterator;
27+
import java.util.List;
28+
29+
import org.apache.hadoop.conf.Configuration;
30+
import org.apache.hadoop.fs.FileStatus;
31+
import org.apache.hadoop.fs.FileSystem;
32+
import org.apache.hadoop.fs.Path;
33+
import org.apache.log4j.Logger;
34+
35+
public class TextFileDirectoryReader {
36+
37+
private static final Logger LOG = Logger.getLogger(TextFileDirectoryReader.class);
38+
39+
private Configuration conf = new Configuration();
40+
private FileSystem fs;
41+
private List<Path> paths;
42+
private Iterator<Path> pathIter;
43+
private Path curPath;
44+
private BufferedReader curReader;
45+
46+
public TextFileDirectoryReader(Path inputPath) throws IOException {
47+
fs = FileSystem.get(inputPath.toUri(), conf);
48+
paths = new ArrayList<Path>();
49+
for(FileStatus status : fs.listStatus(inputPath)) {
50+
Path p = status.getPath();
51+
if (!status.isDir() && !p.getName().startsWith("_")) {
52+
paths.add(p);
53+
}
54+
}
55+
56+
pathIter = paths.iterator();
57+
}
58+
59+
private boolean nextReader() throws IOException {
60+
if (curReader != null) {
61+
curReader.close();
62+
}
63+
64+
if (!pathIter.hasNext()) {
65+
return false;
66+
}
67+
68+
curPath = pathIter.next();
69+
curReader = new BufferedReader(new InputStreamReader(fs.open(curPath)));
70+
71+
return true;
72+
}
73+
74+
public void close() {
75+
if (curReader != null) {
76+
try {
77+
curReader.close();
78+
} catch (IOException e) {
79+
LOG.error("Error closing reader", e);
80+
}
81+
}
82+
if (fs != null) {
83+
try {
84+
fs.close();
85+
} catch (IOException e) {
86+
LOG.error("Error closing filesystem", e);
87+
}
88+
}
89+
}
90+
91+
public String next() throws IOException {
92+
if (curReader == null) {
93+
if (!nextReader()) {
94+
return null;
95+
}
96+
}
97+
98+
String line = curReader.readLine();
99+
if (line == null) {
100+
boolean success = nextReader();
101+
if (success) {
102+
line = curReader.readLine();
103+
}
104+
}
105+
106+
return line;
107+
}
108+
109+
public static void main(String[] args) {
110+
String inputPath = args[0];
111+
TextFileDirectoryReader tfdr = null;
112+
try {
113+
tfdr = new TextFileDirectoryReader(new Path(inputPath));
114+
String line = null;
115+
while ((line = tfdr.next()) != null) {
116+
System.out.println(line);
117+
}
118+
} catch (IOException e) {
119+
LOG.error("IO error while reading directory", e);
120+
} finally {
121+
if (tfdr != null) {
122+
tfdr.close();
123+
}
124+
}
125+
}
126+
}

src/main/java/com/mozilla/pig/eval/date/DaysAgo.java

-63
This file was deleted.

src/main/java/com/mozilla/pig/eval/date/FormatDate.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public FormatDate(String format) {
3838

3939
@Override
4040
public String exec(Tuple input) throws IOException {
41-
if (input == null || input.size() == 0) {
41+
if (input == null || input.size() == 0 || input.get(0) == null) {
4242
return null;
4343
}
4444

src/main/java/com/mozilla/pig/eval/date/TimeDelta.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ public TimeDelta(String deltaUnitStr, String dateFormat) throws ParseException {
5858

5959
@Override
6060
public Long exec(Tuple input) throws IOException {
61-
if (input == null || input.size() == 0) {
61+
if (input == null || input.size() < 2 ||
62+
input.get(0) == null || input.get(1) == null) {
6263
return null;
6364
}
6465

src/main/java/com/mozilla/pig/eval/geoip/GeoIpLookup.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public class GeoIpLookup extends EvalFunc<Tuple> {
5151
*
5252
* This will expect a file in hdfs in /user/you/GeoIPCity.dat
5353
*
54-
* Using the getCacheFiles approach,y ou no longer need to specify the
54+
* Using the getCacheFiles approach, you no longer need to specify the
5555
* -Dmapred.cache.archives
5656
* -Dmapred.create.symlink
5757
* options to pig.

src/test/java/com/mozilla/pig/eval/ExampleTest.java

+2-11
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,15 @@
1919
*/
2020
package com.mozilla.pig.eval;
2121

22-
import static org.junit.Assert.assertEquals;
23-
import static org.junit.Assert.assertTrue;
24-
2522
import java.io.IOException;
2623

27-
import org.apache.pig.data.Tuple;
28-
import org.apache.pig.data.TupleFactory;
29-
import org.apache.pig.data.DataBag;
30-
import org.apache.pig.data.BagFactory;
3124
import org.junit.Test;
3225

3326
public class ExampleTest {
34-
35-
private TupleFactory tupleFactory = TupleFactory.getInstance();
36-
private BagFactory bagFactory = BagFactory.getInstance();
3727

3828
@Test
39-
public void testInstantiate() throws IOException {
29+
public void testConstructor() throws IOException {
4030
Example e = new Example();
4131
}
32+
4233
}

src/test/java/com/mozilla/pig/eval/date/DaysAgoTest.java src/test/java/com/mozilla/pig/eval/date/TimeDeltaTest.java

+14-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/*
1+
/**
22
* Copyright 2012 Mozilla Foundation
33
*
44
* Licensed to the Apache Software Foundation (ASF) under one
@@ -23,43 +23,46 @@
2323
import static org.junit.Assert.assertNull;
2424

2525
import java.io.IOException;
26+
import java.text.ParseException;
2627
import java.text.SimpleDateFormat;
2728
import java.util.Calendar;
2829

2930
import org.apache.pig.data.Tuple;
3031
import org.apache.pig.data.TupleFactory;
3132
import org.junit.Test;
3233

33-
public class DaysAgoTest {
34+
public class TimeDeltaTest {
3435

3536
private static final String TIME_FORMAT = "yyyyMMdd";
3637

37-
private DaysAgo daysAgo = new DaysAgo(TIME_FORMAT);
3838
private TupleFactory tupleFactory = TupleFactory.getInstance();
3939

4040
@Test
41-
public void testExec1() throws IOException {
42-
Integer deltaDays = daysAgo.exec(null);
41+
public void testExec1() throws IOException, ParseException {
42+
TimeDelta daysAgo = new TimeDelta("5", TIME_FORMAT);
43+
Long deltaDays = daysAgo.exec(null);
4344
assertNull(deltaDays);
4445
}
4546

4647
@Test
47-
public void testExec2() throws IOException {
48+
public void testExec2() throws IOException, ParseException {
4849
SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
4950
Calendar cal = Calendar.getInstance();
5051
cal.add(Calendar.DATE, -1);
5152

52-
Tuple input = tupleFactory.newTuple(1);
53-
53+
Tuple input = tupleFactory.newTuple(2);
5454
input.set(0, sdf.format(cal.getTime()));
55-
Integer deltaDays = daysAgo.exec(input);
56-
assertEquals(1, (int)deltaDays);
55+
input.set(1, sdf.format(Calendar.getInstance().getTime()));
56+
57+
TimeDelta daysAgo = new TimeDelta("5", TIME_FORMAT);
58+
Long deltaDays = daysAgo.exec(input);
59+
assertEquals(1, (long)deltaDays);
5760

5861
cal = Calendar.getInstance();
5962
cal.add(Calendar.DATE, -30);
6063
input.set(0, sdf.format(cal.getTime()));
6164
deltaDays = daysAgo.exec(input);
62-
assertEquals(30, (int)deltaDays);
65+
assertEquals(30, (long)deltaDays);
6366
}
6467

6568
}

0 commit comments

Comments
 (0)