Skip to content

Commit

Permalink
NUTCH-3101 src/java/org/apache/nutch/crawl/Inlink.java
Browse files Browse the repository at this point in the history
  • Loading branch information
Markus Jelsma committed Jan 9, 2025
1 parent 3b6d2c6 commit 18e7aeb
Showing 1 changed file with 40 additions and 2 deletions.
42 changes: 40 additions & 2 deletions src/java/org/apache/nutch/crawl/Inlink.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Map.Entry;

import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

Expand All @@ -28,6 +30,7 @@ public class Inlink implements Writable {

private String fromUrl;
private String anchor;
private MapWritable md = null;

public Inlink() {
}
Expand All @@ -41,6 +44,13 @@ public Inlink(String fromUrl, String anchor) {
public void readFields(DataInput in) throws IOException {
fromUrl = Text.readString(in);
anchor = Text.readString(in);
boolean hasMD = in.readBoolean();
if (hasMD) {
md = new org.apache.hadoop.io.MapWritable();
md.readFields(in);
} else {
md = null;
}
}

/**
Expand All @@ -51,12 +61,23 @@ public void readFields(DataInput in) throws IOException {
public static void skip(DataInput in) throws IOException {
Text.skip(in); // skip fromUrl
Text.skip(in); // skip anchor
boolean hasMD = in.readBoolean();
if (hasMD) {
MapWritable metadata = new org.apache.hadoop.io.MapWritable();
metadata.readFields(in);
}
}

@Override
public void write(DataOutput out) throws IOException {
Text.writeString(out, fromUrl);
Text.writeString(out, anchor);
if (md != null && md.size() > 0) {
out.writeBoolean(true);
md.write(out);
} else {
out.writeBoolean(false);
}
}

public static Inlink read(DataInput in) throws IOException {
Expand All @@ -73,6 +94,14 @@ public String getAnchor() {
return anchor;
}

public MapWritable getMetadata() {
return md;
}

public void setMetadata(MapWritable md) {
this.md = md;
}

@Override
public boolean equals(Object o) {
if (!(o instanceof Inlink))
Expand All @@ -89,7 +118,16 @@ public int hashCode() {

@Override
public String toString() {
return "fromUrl: " + fromUrl + " anchor: " + anchor;
StringBuilder buffer = new StringBuilder();
if (md != null && !md.isEmpty()) {
for (Entry<Writable, Writable> e : md.entrySet()) {
buffer.append(" ");
buffer.append(e.getKey());
buffer.append(": ");
buffer.append(e.getValue());
}
}

return "fromUrl: " + fromUrl + " anchor: " + anchor + " metadata: " + buffer.toString();
}

}

0 comments on commit 18e7aeb

Please sign in to comment.