Skip to content

Commit

Permalink
Improve archive index format (#269)
Browse files Browse the repository at this point in the history
- Defines V2 archive index properties
- Implements parsers for both archive index versions
- Uses the parsers instead of direct properties access
- Adds new tests

Resolves #248
{minor}

Signed-off-by: Esta Nagy <[email protected]>
  • Loading branch information
nagyesta authored Jun 19, 2024
1 parent 43c4ca5 commit 6956ea7
Show file tree
Hide file tree
Showing 10 changed files with 415 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
import java.util.stream.IntStream;
import java.util.stream.LongStream;

import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.*;
import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.toChunkFileName;
import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.toIndexFileName;
import static com.github.nagyesta.filebarj.io.stream.ReadOnlyArchiveIndex.INDEX_VERSION;
import static com.github.nagyesta.filebarj.io.stream.crypto.EncryptionUtil.newCipherInputStream;
import static com.github.nagyesta.filebarj.io.stream.internal.ChunkingOutputStream.MEBIBYTE;
import static org.apache.commons.io.FilenameUtils.normalizeNoEndSeparator;
Expand Down Expand Up @@ -133,6 +135,7 @@ public BarjCargoArchiveEntryIterator getIteratorForScope(

/**
* Returns the matching entries in order of occurrence in the archive.
*
* @param archiveEntriesInScope the entries in scope
* @return the matching entries
*/
Expand Down Expand Up @@ -298,10 +301,10 @@ protected Properties readProperties(
@NotNull
protected List<BarjCargoEntityIndex> parseEntityIndexes(
@NotNull final Properties properties) {
final var totalEntities = Long.parseLong(properties.getProperty(LAST_ENTITY_INDEX_PROPERTY));
return LongStream.rangeClosed(1L, totalEntities)
final var index = parse(properties);
return LongStream.rangeClosed(1L, index.getTotalEntities())
.mapToObj(BarjCargoUtil::entryIndexPrefix)
.map(prefix -> BarjCargoEntityIndex.fromProperties(properties, prefix))
.map(index::entity)
.toList();
}

Expand All @@ -316,10 +319,10 @@ protected List<BarjCargoEntityIndex> parseEntityIndexes(
protected SortedMap<String, Path> generateFilePathMap(
@NotNull final Properties properties,
@NotNull final BarjCargoInputStreamConfiguration config) {
final var totalChunks = Integer.parseInt(properties.getProperty(LAST_CHUNK_INDEX_PROPERTY));
final var index = parse(properties);
final var map = new TreeMap<String, Path>();
IntStream.rangeClosed(1, totalChunks)
.mapToObj(index -> toChunkFileName(config.getPrefix(), index))
IntStream.rangeClosed(1, index.getNumberOfChunks())
.mapToObj(i -> toChunkFileName(config.getPrefix(), i))
.map(p -> Path.of(config.getFolder().toAbsolutePath().toString(), p))
.map(Path::toAbsolutePath)
.forEach(path -> map.put(path.getFileName().toString(), path));
Expand All @@ -336,35 +339,45 @@ protected SortedMap<String, Path> generateFilePathMap(
protected void verifyFilesExistAndHaveExpectedSizes(
@NotNull final Properties properties,
@NotNull final SortedMap<String, Path> chunkPaths) throws ArchiveIntegrityException {
final var maxChunkSize = Long.parseLong(properties.getProperty(MAX_CHUNK_SIZE_PROPERTY));
final var lastChunkSize = Long.parseLong(properties.getProperty(LAST_CHUNK_SIZE_PROPERTY));
final var expectedTotalSize = Long.parseLong(properties.getProperty(TOTAL_SIZE_PROPERTY));
final var index = parse(properties);
var totalSize = 0L;
final var iterator = chunkPaths.keySet().iterator();
while (iterator.hasNext()) {
final var key = iterator.next();
final var path = chunkPaths.get(key);
final var file = path.toFile();
if (!file.exists()) {
throw new ArchiveIntegrityException("Chunk file does not exist: " + path);
}
final long expectedSize;
if (iterator.hasNext()) {
expectedSize = maxChunkSize;
} else {
expectedSize = lastChunkSize;
}
final var fileSize = file.length();
if (expectedSize != fileSize) {
throw new ArchiveIntegrityException("Chunk file size is wrong: " + path
+ ", expected: " + expectedSize + " bytes, actual: " + fileSize + " bytes.");
}
totalSize += fileSize;
totalSize += verifiedFileSize(index, path, iterator.hasNext());
}
if (totalSize != expectedTotalSize) {
if (totalSize != index.getTotalSize()) {
throw new ArchiveIntegrityException(
"Total size is wrong: " + totalSize + " bytes, expected: " + expectedTotalSize + " bytes.");
"Total size is wrong: " + totalSize + " bytes, expected: " + index.getTotalSize() + " bytes.");
}
}

private static long verifiedFileSize(
final ReadOnlyArchiveIndex index,
final Path path,
final boolean isNotLast) {
final var file = path.toFile();
if (!file.exists()) {
throw new ArchiveIntegrityException("Chunk file does not exist: " + path);
}
final long expectedSize;
if (isNotLast) {
expectedSize = index.getMaxChunkSizeInBytes();
} else {
expectedSize = index.getLastChunkSizeInBytes();
}
final var fileSize = file.length();
if (expectedSize != fileSize) {
throw new ArchiveIntegrityException("Chunk file size is wrong: " + path
+ ", expected: " + expectedSize + " bytes, actual: " + fileSize + " bytes.");
}
return fileSize;
}

private static ReadOnlyArchiveIndex parse(final Properties properties) {
return IndexVersion.forVersionString(properties.getProperty(INDEX_VERSION))
.createIndex(properties);
}

private void validateEntityIndexes(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.nagyesta.filebarj.io.stream;

import com.github.nagyesta.filebarj.io.stream.index.ArchiveIndexV2;
import com.github.nagyesta.filebarj.io.stream.internal.BaseBarjCargoArchiverFileOutputStream;
import com.github.nagyesta.filebarj.io.stream.internal.model.BarjCargoEntityIndex;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -11,7 +12,8 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;

import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.*;
import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.entryIndexPrefix;
import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.toIndexFileName;
import static com.github.nagyesta.filebarj.io.stream.crypto.EncryptionUtil.newCipherOutputStream;

/**
Expand Down Expand Up @@ -100,11 +102,14 @@ private void writeIndexFileHeader() throws IOException {

private void writeIndexFileFooter() throws IOException {
final var lastChunk = getCurrentFilePath();
final var footer = LAST_CHUNK_INDEX_PROPERTY + COLON + getCurrentChunkIndex() + LINE_BREAK
+ LAST_CHUNK_SIZE_PROPERTY + COLON + lastChunk.toFile().length() + LINE_BREAK
+ MAX_CHUNK_SIZE_PROPERTY + COLON + getMaxChunkSizeBytes() + LINE_BREAK
+ LAST_ENTITY_INDEX_PROPERTY + COLON + entryCount() + LINE_BREAK
+ TOTAL_SIZE_PROPERTY + COLON + getTotalByteCount() + LINE_BREAK;
final var footer = ArchiveIndexV2.builder()
.numberOfChunks(getCurrentChunkIndex())
.lastChunkSizeInBytes(lastChunk.toFile().length())
.maxChunkSizeInBytes(getMaxChunkSizeBytes())
.totalEntities(entryCount())
.totalSize(getTotalByteCount())
.build()
.footerAsString();
indexStreamWriter.write(footer);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,6 @@ public final class BarjCargoUtil {
* The file name suffix for index files.
*/
public static final String INDEX = ".index";
/**
* The name of the property storing the last entity index.
*/
public static final String LAST_ENTITY_INDEX_PROPERTY = "last.entity.index";
/**
* The name of the property storing the index of the last chunk.
*/
public static final String LAST_CHUNK_INDEX_PROPERTY = "last.cnunk.index";
/**
* The name of the property storing the size of the last chunk.
*/
public static final String LAST_CHUNK_SIZE_PROPERTY = "last.cnunk.size";
/**
* The name of the property storing the maximum chunk size.
*/
public static final String MAX_CHUNK_SIZE_PROPERTY = "max.cnunk.size";
/**
* The name of the property storing the total size of the archive.
*/
public static final String TOTAL_SIZE_PROPERTY = "total.size";
/**
* A colon character used for separating the key and value in properties.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.github.nagyesta.filebarj.io.stream;

import com.github.nagyesta.filebarj.io.stream.index.ArchiveIndexV1;
import com.github.nagyesta.filebarj.io.stream.index.ArchiveIndexV2;
import lombok.Getter;
import org.jetbrains.annotations.NotNull;

import java.util.Properties;

/**
* The version of the File Barj index specification.
*/
@Getter
public enum IndexVersion {
/**
* The initial version of the File Barj index specification.
*/
V1("1") {
@Override
ReadOnlyArchiveIndex createIndex(@NotNull final Properties properties) {
return new ArchiveIndexV1(properties);
}
},
/**
* The 2nd version of the File Barj index specification.
*/
V2("2") {
@Override
ReadOnlyArchiveIndex createIndex(@NotNull final Properties properties) {
return new ArchiveIndexV2(properties);
}
};

private final String version;

IndexVersion(final String version) {
this.version = version;
}

public static IndexVersion forVersionString(final String version) {
for (final var indexVersion : values()) {
if (indexVersion.version.equals(version)) {
return indexVersion;
}
}
return V1;
}

/**
* Instantiates a read-only archive index from the given properties.
*
* @param properties the properties
* @return the read-only archive index
*/
abstract ReadOnlyArchiveIndex createIndex(@NotNull Properties properties);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.github.nagyesta.filebarj.io.stream;

import com.github.nagyesta.filebarj.io.stream.internal.model.BarjCargoEntityIndex;

/**
* A read-only representation of an archive index file.
*/
public interface ReadOnlyArchiveIndex {

/**
* The name of the property that contains the version of the index specification.
*/
String INDEX_VERSION = "version";

int getNumberOfChunks();

long getMaxChunkSizeInBytes();

long getLastChunkSizeInBytes();

long getTotalSize();

long getTotalEntities();

BarjCargoEntityIndex entity(String prefix);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.github.nagyesta.filebarj.io.stream.index;

import com.github.nagyesta.filebarj.io.stream.IndexVersion;
import com.github.nagyesta.filebarj.io.stream.ReadOnlyArchiveIndex;
import com.github.nagyesta.filebarj.io.stream.internal.model.BarjCargoEntityIndex;
import lombok.Getter;
import org.jetbrains.annotations.NotNull;

import java.util.Properties;

@Getter
public class ArchiveIndexV1 implements ReadOnlyArchiveIndex {

private static final String LAST_ENTITY_INDEX_PROPERTY = "last.entity.index";
private static final String LAST_CHUNK_INDEX_PROPERTY = "last.cnunk.index";
private static final String LAST_CHUNK_SIZE_PROPERTY = "last.cnunk.size";
private static final String MAX_CHUNK_SIZE_PROPERTY = "max.cnunk.size";
private static final String TOTAL_SIZE_PROPERTY = "total.size";
private final Properties properties;
private final IndexVersion indexVersion;
private final long totalEntities;
private final int numberOfChunks;
private final long maxChunkSizeInBytes;
private final long lastChunkSizeInBytes;
private final long totalSize;

public ArchiveIndexV1(@NotNull final Properties properties) {
this.properties = properties;
this.indexVersion = IndexVersion.forVersionString(properties.getProperty(INDEX_VERSION));
this.totalEntities = Long.parseLong(properties.getProperty(LAST_ENTITY_INDEX_PROPERTY));
this.numberOfChunks = Integer.parseInt(properties.getProperty(LAST_CHUNK_INDEX_PROPERTY));
this.maxChunkSizeInBytes = Long.parseLong(properties.getProperty(MAX_CHUNK_SIZE_PROPERTY));
this.lastChunkSizeInBytes = Long.parseLong(properties.getProperty(LAST_CHUNK_SIZE_PROPERTY));
this.totalSize = Long.parseLong(properties.getProperty(TOTAL_SIZE_PROPERTY));
}

@Override
public BarjCargoEntityIndex entity(@NotNull final String prefix) {
return BarjCargoEntityIndex.fromProperties(properties, prefix);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.github.nagyesta.filebarj.io.stream.index;

import com.github.nagyesta.filebarj.io.stream.IndexVersion;
import com.github.nagyesta.filebarj.io.stream.ReadOnlyArchiveIndex;
import com.github.nagyesta.filebarj.io.stream.internal.model.BarjCargoEntityIndex;
import lombok.Builder;
import lombok.Getter;
import org.jetbrains.annotations.NotNull;

import java.util.Properties;

import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.COLON;
import static com.github.nagyesta.filebarj.io.stream.BarjCargoUtil.LINE_BREAK;

@Getter
public class ArchiveIndexV2 implements ReadOnlyArchiveIndex {

private static final String LAST_ENTITY_INDEX_PROPERTY = "last.entity.index";
private static final String LAST_CHUNK_INDEX_PROPERTY = "last.chunk.index";
private static final String LAST_CHUNK_SIZE_PROPERTY = "last.chunk.size";
private static final String MAX_CHUNK_SIZE_PROPERTY = "max.chunk.size";
private static final String TOTAL_SIZE_PROPERTY = "total.size";
private final Properties properties;
private final IndexVersion indexVersion;
private final long totalEntities;
private final int numberOfChunks;
private final long maxChunkSizeInBytes;
private final long lastChunkSizeInBytes;
private final long totalSize;

public ArchiveIndexV2(@NotNull final Properties properties) {
this.properties = properties;
this.indexVersion = IndexVersion.forVersionString(properties.getProperty(INDEX_VERSION));
this.totalEntities = Long.parseLong(properties.getProperty(LAST_ENTITY_INDEX_PROPERTY));
this.numberOfChunks = Integer.parseInt(properties.getProperty(LAST_CHUNK_INDEX_PROPERTY));
this.maxChunkSizeInBytes = Long.parseLong(properties.getProperty(MAX_CHUNK_SIZE_PROPERTY));
this.lastChunkSizeInBytes = Long.parseLong(properties.getProperty(LAST_CHUNK_SIZE_PROPERTY));
this.totalSize = Long.parseLong(properties.getProperty(TOTAL_SIZE_PROPERTY));
}

@Builder
public ArchiveIndexV2(
final long totalSize,
final long lastChunkSizeInBytes,
final long maxChunkSizeInBytes,
final int numberOfChunks,
final long totalEntities) {
this.indexVersion = IndexVersion.V2;
this.properties = null;
this.totalSize = totalSize;
this.lastChunkSizeInBytes = lastChunkSizeInBytes;
this.maxChunkSizeInBytes = maxChunkSizeInBytes;
this.numberOfChunks = numberOfChunks;
this.totalEntities = totalEntities;
}

@Override
public BarjCargoEntityIndex entity(@NotNull final String prefix) {
return BarjCargoEntityIndex.fromProperties(properties, prefix);
}

public String footerAsString() {
return LAST_CHUNK_INDEX_PROPERTY + COLON + numberOfChunks + LINE_BREAK
+ LAST_CHUNK_SIZE_PROPERTY + COLON + lastChunkSizeInBytes + LINE_BREAK
+ MAX_CHUNK_SIZE_PROPERTY + COLON + maxChunkSizeInBytes + LINE_BREAK
+ LAST_ENTITY_INDEX_PROPERTY + COLON + totalEntities + LINE_BREAK
+ TOTAL_SIZE_PROPERTY + COLON + totalSize + LINE_BREAK
+ INDEX_VERSION + COLON + indexVersion.getVersion() + LINE_BREAK;
}
}
Loading

0 comments on commit 6956ea7

Please sign in to comment.