Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: Module reporting XML schema errors #972

Open
wants to merge 2 commits into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions jhove-bbt/scripts/create-1.33-target.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,34 @@ echo "TEST BASELINE: Creating baseline"
echo " - copying ${baselineRoot} baseline to ${targetRoot}"
cp -R "${baselineRoot}" "${targetRoot}"

# Update release details for HTML module
find "${targetRoot}" -type f -name "*.html.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2024-08-22">HTML-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">HTML-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/<module release="1.4.4">HTML-hul<\/module>/<module release="1.4.5">HTML-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/<release>1.4.4<\/release>/<release>1.4.5<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/2024-08-22/2024-11-27/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/01-08-2002/2002-08-01/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/31-05-2001/2001-05-31/' {} \;

# Update release details for JPEG 2000 module
find "${targetRoot}" -type f -name "*.jp2.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2023-03-16">JPEG2000-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">JPEG2000-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "*.jpx.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2023-03-16">JPEG2000-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">JPEG2000-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "*.md.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2023-03-16">JPEG2000-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">JPEG2000-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/<module release="1.4.4">JPEG2000-hul<\/module>/<module release="1.4.5">JPEG2000-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/<release>1.4.4<\/release>/<release>1.4.5<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-11-27/' {} \;

# Copy the files affected by the relative URL output changes to the XML reporting module
if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml"
fi
if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml"
fi

# Copy the files affected by the change to the JPEG-2000 module that prevents empty CompositeListHeader lists from been created
if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml"
fi
if [[ -f "${candidateRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" ]]; then
cp "${candidateRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" "${targetRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

package edu.harvard.hul.ois.jhove.handler;

import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.NumberFormat;
import java.util.Date;
import java.util.Iterator;
Expand Down Expand Up @@ -352,11 +355,6 @@ public void show(RepInfo info) {
_writer.println(margn2
+ element("reportingModule", attr2, module.getName()));
}
/*
* else { String [][] attr2 = { {"severity", "error"} }; _writer.println
* (margn2 + element ("message", attr2,
* "file not found or not readable")); }
*/
Date date = info.getCreated();
if (date != null) {
_writer.println(margn2 + element("created", toDateTime(date)));
Expand Down Expand Up @@ -4455,51 +4453,41 @@ private void writeAESTimeRangePart(String indent, String elementName, AESAudioMe
element(elementName, attributes, String.valueOf(timeDesc.getSamples())));
}

/*
* Clean up a URI string by escaping forbidden characters. We assume
* (perhaps dangerously) that a % is the start of an already escaped
* hexadecimal sequence.
/**
* Returns a path normalised URI from the presented string path.@interface
* Solution based upon the follwing post from Eugene Yokota:
* https://eed3si9n.com/encoding-file-path-as-URI-reference/
*/
private String cleanURIString(String uri) {
StringBuffer sb = new StringBuffer(uri.length() * 2);
boolean change = false;
for (int i = 0; i < uri.length(); i++) {
char c = uri.charAt(i);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9') || (c == '%') || // assume it's an
// escape
("-_.!~*'();/?:@=+$,".indexOf(c) >= 0)) {
sb.append(c);
} else {
int cval = c;

// More significant hex digit
int mshd = (cval >> 4);
if (mshd >= 10) {
mshd += 'A' - 10;
} else {
mshd += '0';
}
sb.append('%');
sb.append((char) mshd);

// Less significant hex digit
int lshd = (cval & 0X0F);
if (lshd >= 10) {
lshd += 'A' - 10;
private static final String cleanURIString(final String path) {
File input = new File(path);
final boolean isWindows = System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows");
final String fileScheme = "file";
try {
if (isWindows && !path.isEmpty() && path.startsWith(Character.toString(File.separatorChar))) {
if (path.startsWith("\\")) {
return new URI(fileScheme, normaliseToSlash(path), null).toString();
} else {
lshd += '0';
return new URI(fileScheme, "", normaliseToSlash(path), null).toString();
}
sb.append((char) lshd);
change = true;
} else if (input.isAbsolute()) {
return new URI(fileScheme, "", normaliseToSlash(ensureHeadSlash(input.getAbsolutePath())), null)
.toString();
}
return new URI(null, normaliseToSlash(path), null).toString();
} catch (URISyntaxException e) {
// If this fails simply return the original path
return path;
}
// For efficiency, return the original string
// if nothing changed.
if (change) {
return sb.toString();
}
return uri;
}

private static final String ensureHeadSlash(final String name) {
return (!name.isEmpty() && name.startsWith(Character.toString(File.separatorChar)))
? Character.toString(File.separatorChar) + name
: name;
}

private static final String normaliseToSlash(final String name) {
return (File.separatorChar == '/') ? name : name.replace(File.separatorChar, '/');
}

/** Appends a Rational value to a StringBuffer */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ public class HtmlModule extends ModuleBase {
private static final String XHTML_1_1_STR = "XHTML 1.1";

private static final String NAME = "HTML-hul";
private static final String RELEASE = "1.4.4";
private static final int[] DATE = { 2024, 8, 22 };
private static final String RELEASE = "1.4.5";
private static final int[] DATE = { 2024, 11, 27 };
private static final String[] FORMAT = { "HTML" };
private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,"
+ "HTML 4.0 Transitional, HTML 4.0 Frameset, "
Expand Down Expand Up @@ -231,7 +231,7 @@ public HtmlModule() {
+ "(Second Edition)",
DocumentType.REPORT);
doc.setPublisher(w3cAgent);
doc.setDate("01-08-2002");
doc.setDate("2002-08-01");
doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/",
IdentifierType.URL));
_specification.add(doc);
Expand All @@ -240,7 +240,7 @@ public HtmlModule() {
doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML",
DocumentType.REPORT);
doc.setPublisher(w3cAgent);
doc.setDate("31-05-2001");
doc.setDate("2001-05-31");
doc.setIdentifier(new Identifier(
"http://www.w3.org/TR/2001/REC-xhtml11-20010531/",
IdentifierType.URL));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ public class Jpeg2000Module extends ModuleBase {
******************************************************************/

private static final String NAME = "JPEG2000-hul";
private static final String RELEASE = "1.4.4";
private static final int[] DATE = { 2023, 03, 16 };
private static final String RELEASE = "1.4.5";
private static final int[] DATE = { 2024, 11, 27 };
private static final String[] FORMAT = { "JPEG 2000", "JP2", "JPX" };
private static final String COVERAGE = "JP2 (ISO/IEC 15444-1:2000/"
+ "ITU-T Rec. T.800 (200)), JPX (ISO/IEC 15444-2:2004)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,138 +24,129 @@ public class ComposLayerHdrBox extends JP2Box {
private Property channelDefProp;
private Property codestreamRegProp;
private List<Property> colorSpecs;



/**
* Constructor with superbox.
* Constructor with superbox.
*
* @param parent parent superbox of this box
* @param parent parent superbox of this box
*/
public ComposLayerHdrBox(RandomAccessFile raf, BoxHolder parent) {
super(raf, parent);
}

/** Reads the box, putting appropriate information in
* the RepInfo object. setModule, setBoxHeader,
* setRepInfo and setDataInputStream must be called
* before <code>readBox</code> is called.
* <code>readBox</code> must completely consume the
* box, so that the next byte to be read by the
* DataInputStream is the <code>FF</code> byte of the next Box.
/**
* Reads the box, putting appropriate information in
* the RepInfo object. setModule, setBoxHeader,
* setRepInfo and setDataInputStream must be called
* before <code>readBox</code> is called.
* <code>readBox</code> must completely consume the
* box, so that the next byte to be read by the
* DataInputStream is the <code>FF</code> byte of the next Box.
*/
@Override
public boolean readBox() throws IOException {
public boolean readBox() throws IOException {
if (_parentBox != null) {
// Box must be at top level.
wrongBoxContext();
return false;
}
initBytesRead ();
initBytesRead();
hasBoxes = true;
colorSpecs = new LinkedList<> ();
colorSpecs = new LinkedList<>();

// Unlike some other boxes, compositing layer boxes
// are numbered by their order in the file, starting
// with 0. A definite case of design by committee.
// with 0. A definite case of design by committee.
JP2Box box = null;
boolean hasOpacity = false;
boolean hasChannelDef = false;
while (hasNext ()) {
box = (JP2Box) next ();
while (hasNext()) {
box = (JP2Box) next();
if (box == null) {
break;
}
if (box instanceof ColorGroupBox ||
box instanceof OpacityBox ||
box instanceof ChannelDefBox ||
box instanceof CodestreamRegBox ||
box instanceof IPRBox ||
box instanceof ResolutionBox ||
box instanceof LabelBox) {
if (!box.readBox ()) {
return false;
}
if (box instanceof OpacityBox) {
hasOpacity = true;
}
else if (box instanceof ChannelDefBox) {
hasChannelDef = true;
}
if (box instanceof LabelBox) {
label = new Property ("Label",
PropertyType.STRING,
((LabelBox) box).getLabel ());
}
}
else {
box.skipBox ();
box instanceof OpacityBox ||
box instanceof ChannelDefBox ||
box instanceof CodestreamRegBox ||
box instanceof IPRBox ||
box instanceof ResolutionBox ||
box instanceof LabelBox) {
if (!box.readBox()) {
return false;
}
if (box instanceof OpacityBox) {
hasOpacity = true;
} else if (box instanceof ChannelDefBox) {
hasChannelDef = true;
}
if (box instanceof LabelBox) {
label = new Property("Label",
PropertyType.STRING,
((LabelBox) box).getLabel());
}
} else {
box.skipBox();
}
}
if (hasOpacity && hasChannelDef) {
_repInfo.setMessage (new ErrorMessage
(MessageConstants.JPEG2000_HUL_12,
_module.getFilePos ()));
_repInfo.setValid (false);
_repInfo.setMessage(new ErrorMessage(MessageConstants.JPEG2000_HUL_12,
_module.getFilePos()));
_repInfo.setValid(false);
}
finalizeBytesRead ();
List<Property> propList = new ArrayList (4);
finalizeBytesRead();

List<Property> propList = new ArrayList(4);
if (label != null) {
propList.add (label);
propList.add(label);
}
if (!colorSpecs.isEmpty ()) {
propList.add (new Property ("ColorSpecs",
if (!colorSpecs.isEmpty()) {
propList.add(new Property("ColorSpecs",
PropertyType.PROPERTY,
PropertyArity.LIST,
colorSpecs));
}
if (opacityProp != null) {
propList.add (opacityProp);
propList.add(opacityProp);
}
if (channelDefProp != null) {
propList.add (channelDefProp);
propList.add(channelDefProp);
}
if (codestreamRegProp != null) {
propList.add (codestreamRegProp);
propList.add(codestreamRegProp);
}
_module.addComposLayer(new Property
("CompositeLayerHeader",
if (!propList.isEmpty()) {
_module.addComposLayer(new Property("CompositeLayerHeader",
PropertyType.PROPERTY,
PropertyArity.LIST,
propList));
}
return true;
}


/** Add a color specification property. */
protected void addColorSpec (Property p)
{
colorSpecs.add (p);
protected void addColorSpec(Property p) {
colorSpecs.add(p);
}

/** Add an opacity property. */
protected void addOpacity (Property p)
{
protected void addOpacity(Property p) {
opacityProp = p;
}

/** Add channel definition property. */
protected void addChannelDef (Property p)
{
protected void addChannelDef(Property p) {
channelDefProp = p;
}

/** Add codestream registration property. */
protected void addCodestreamReg (Property p)
{
protected void addCodestreamReg(Property p) {
codestreamRegProp = p;
}

/** Returns the name of the Box. */
/** Returns the name of the Box. */
@Override
protected String getSelfPropName ()
{
protected String getSelfPropName() {
return "Compositing Layer Header Box";
}
}
Loading