Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support s3 paths #103

Merged
merged 9 commits into from
Jul 18, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ StatsBase = "0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33"
julia = "1.3"

[extras]
Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20"
ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Expand All @@ -33,4 +34,4 @@ ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
WAV = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88"

[targets]
test = ["Test", "MLDatasets", "TestImages", "ImageMagick", "Logging", "LightGraphs", "Plots", "PyPlot", "WAV", "Tracker", "ValueHistories"]
test = ["Test", "MLDatasets", "TestImages", "ImageMagick", "Logging", "LightGraphs", "Plots", "PyPlot", "WAV", "Tracker", "ValueHistories", "Minio"]
10 changes: 5 additions & 5 deletions src/Deserialization/deserialization.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
export summary_iterator

"""
is_valid_event(f::IOStream) => Bool
is_valid_event(f::IO) => Bool

Returns true if the stream points to a valid TensorBoard event, false overwise.
This is accomplished by checeking the crc checksum on the header (first 8
bytes) of the event.
"""
function is_valid_event(f::IOStream)
function is_valid_event(f::IO)
eof(f) && return false

header = read(f, 8)
Expand All @@ -23,13 +23,13 @@ end


"""
read_event(f::IOStream) => Event
read_event(f::IO) => Event

Reads the stream `f`, assuming it's encoded according to TensorBoard format,
and decodes a single event.
This function assumes that `eof(f) == false`.
"""
function read_event(f::IOStream)
function read_event(f::IO)
header = read(f, 8)
crc_header = read(f, 4)

Expand Down Expand Up @@ -113,7 +113,7 @@ Iterator for iterating along a fstream.
The optional argument `stop_at_step` tells at what step the iterator should stop.
"""
struct TBEventFileIterator
fstream::IOStream
fstream::IO
stop_at_step::Int
end
TBEventFileIterator(fstream) = TBEventFileIterator(fstream, typemax(Int))
Expand Down
19 changes: 11 additions & 8 deletions src/Loggers/LogEmbeddings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function log_embeddings(logger::TBLogger, name::AbstractString, mat::AbstractMat
write_pbtext(name, logger.logdir, matrix_path, metadata, img_labels, step)
end

function write_matrix(mat::AbstractMatrix, matrix_path::AbstractString)
function write_matrix(mat::AbstractMatrix, matrix_path)
matrix_path = joinpath(matrix_path, "tensor.tsv")
mat = convert(Array{Float64,2}, mat)
open(matrix_path, "w") do file
Expand All @@ -45,7 +45,7 @@ function write_matrix(mat::AbstractMatrix, matrix_path::AbstractString)
end
end

function write_metadata(metadata::AbstractArray, matrix_path::AbstractString)
function write_metadata(metadata::AbstractArray, matrix_path)
matrix_path = joinpath(matrix_path, "metadata.tsv")
open(matrix_path, "w") do file
for x in metadata
Expand All @@ -54,7 +54,7 @@ function write_metadata(metadata::AbstractArray, matrix_path::AbstractString)
end
end

function write_sprite(img_labels::AbstractArray, matrix_path::AbstractString)
function write_sprite(img_labels::AbstractArray, matrix_path)
n, _, _, w = size(img_labels)
sqrt(n)*w <= 8192 || throw(ErrorException("the value √N * W must be less than or equal to 8192 because of tensorboard restrictions"))
total_pixels = size(img_labels, 1)*size(img_labels, 3)*size(img_labels, 4)
Expand All @@ -65,7 +65,9 @@ function write_sprite(img_labels::AbstractArray, matrix_path::AbstractString)
arranged_augment_square_CHW = zeros((3, sprite_size, sprite_size))
arranged_augment_square_CHW[:, 1:size(arranged_img_CHW, 2), :] = arranged_img_CHW
sprite_path = joinpath(matrix_path, "sprite.png")
save(sprite_path, colorview(RGB, arranged_augment_square_CHW))
open(sprite_path; write=true) do io
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is annoying the this is needed
Should an issue be openned in FileIO.jl?

Copy link
Member Author

@ericphanson ericphanson Jul 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So there's two issues here; one is easy (JuliaCloud/AWSS3.jl#174), the other is that ImageMagick types paths to AbstractString which is too strict (FilePathsBase PosixPaths for example are probably OK too), but it isn't fully generic because it eventually goes to

ccall((:MagickWriteImages, libwand), Cint, (Ptr{Cvoid}, Ptr{UInt8}, Cint), wand, filename, true)

in writeimage and I don't think that can handle s3 paths. Possibly ImageMagick should add another method for writeimage where unknown filename types are handled by opening a stream like we do here?

save(Stream{format"PNG"}(io), colorview(RGB, arranged_augment_square_CHW))
end
end

function make_grid_of_images(img_labels::AbstractArray, ncols::Integer)
Expand All @@ -89,21 +91,22 @@ function make_grid_of_images(img_labels::AbstractArray, ncols::Integer)
grid
end

function write_pbtext(name::AbstractString, path::AbstractString, matrix_path::AbstractString, metadata, img_labels, step)
function write_pbtext(name::AbstractString, path, matrix_path, metadata, img_labels, step)
metadata_path = joinpath(matrix_path, "metadata.tsv")
img_labels_path = joinpath(matrix_path, "sprite.png")
matrix_path = joinpath(matrix_path, "tensor.tsv")
path = joinpath(path, "projector_config.pbtxt")
isfile(path) || write(path, "") # workaround https://github.com/JuliaCloud/AWSS3.jl/issues/173
open(path, "a") do file
write(file, "embeddings {\n")
write(file, "tensor_name: \""*name*":"*repr(step)*"\"\n")
write(file, "tensor_path: \""*matrix_path*"\"\n")
write(file, string("tensor_path: \"", matrix_path, "\"\n"))
if metadata != nothing
write(file, "metadata_path: \""*metadata_path*"\"\n")
write(file, string("metadata_path: \"", metadata_path, "\"\n"))
end
if img_labels != nothing
write(file, "sprite {\n")
write(file, "image_path: \""*img_labels_path*"\"\n")
write(file, string("image_path: \"", img_labels_path, "\"\n"))
write(file, "single_image_dim: "*string(size(img_labels, 4))*"\n")
write(file, "single_image_dim: "*string(size(img_labels, 3))*"\n")
write(file, "}\n")
Expand Down
12 changes: 6 additions & 6 deletions src/TBLogger.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
mutable struct TBLogger <: AbstractLogger
logdir::String
file::IOStream
all_files::Dict{String, IOStream}
logdir::Any
file::IO
all_files::Dict{String, IO}
global_step::Int
step_increment::Int
min_level::LogLevel
Expand Down Expand Up @@ -89,7 +89,7 @@ function init_logdir(logdir, overwrite=tb_increment)
end

"""
create_eventfile(logdir, [purge_step=nothing; time=time()]) -> IOStream
create_eventfile(logdir, [purge_step=nothing; time=time()]) -> IO

Creates a protobuffer events file in the logdir and returns the IO buffer for
writing to it. If `purge_step::Int` is passed then a special event is written
Expand Down Expand Up @@ -144,14 +144,14 @@ logdir(lg::TBLogger) = lg.logdir
"""
get_file(lg::TBLogger) -> IOS

Returns the main `file` IOStream object of Logger `lg`.
Returns the main `file` IO object of Logger `lg`.
"""
get_file(lg::TBLogger) = lg.file

"""
get_file(lg, tags::String...) -> IOS

Returns the `file` IOStream object of Logger `lg` writing to the tag
Returns the `file` IO object of Logger `lg` writing to the tag
`tags1/tags2.../tagsN`.
"""
function get_file(lg::TBLogger, tags::String...)
Expand Down
4 changes: 2 additions & 2 deletions src/event.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function make_event(logger::TBLogger, summary::GraphDef; step=TensorBoardLogger.
end

"""
write_event(out::IOStream, event::Event)
write_event(out::IO, event::Event)

Serializes the Event `event` to the `out` stream according to the TensorBoard
format. The format follows the following rule (in bytes)
Expand All @@ -26,7 +26,7 @@ format. The format follows the following rule (in bytes)
#3 16...N - serialized `event` as protobuffer
#4 N..N+8 UInt32 masked_CRC of #3
"""
function write_event(out::IOStream, event::Event)
function write_event(out::IO, event::Event)
data = PipeBuffer();
_writeproto(data, event)

Expand Down
36 changes: 23 additions & 13 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,29 @@ using TestImages
using ImageCore
using FileIO
using LightGraphs
using Minio


test_log_dir = "test_logs/"
ENV["DATADEPS_ALWAYS_ACCEPT"] = true
ENV["GKSwstype"] = "100"

ENV["DATADEPS_ALWAYS_ACCEPT"] = true

@testset "TensorBoardLogger" begin
# Setup Minio server to test s3 paths
minio_server = Minio.Server(mktempdir(); address="localhost:9001")
run(minio_server, wait=false)
config = MinioConfig("http://localhost:9001")
s3_create_bucket(config, "tensorboard-tests")
s3_log_dir = S3Path("s3://tensorboard-tests/logdir/"; config=config)


@testset "TensorBoardLogger with path $(test_log_dir)" for test_log_dir in ("test_logs/", s3_log_dir)

@testset "TBLogger" begin
include("test_TBLogger.jl")
end

@testset "Scalar Value Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1

ss = TensorBoardLogger.scalar_summary("test", 12.0)
Expand Down Expand Up @@ -49,7 +57,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "Histogram Value Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1

x0 = 0.5+step/30; s0 = 0.5/(step/20);
Expand Down Expand Up @@ -96,7 +104,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "Text Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1

ss = TensorBoardLogger.text_summary("test", "Hello World")
Expand Down Expand Up @@ -127,7 +135,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "Image Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1

# The following tests are akin to @test_nothrow, which does not exist.
Expand Down Expand Up @@ -217,7 +225,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "LogInterface" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
woman = testimage("woman_blonde")
mri = testimage("mri")
with_logger(logger) do
Expand All @@ -238,7 +246,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "Audio Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1

ss = TensorBoardLogger.audio_summary("test", rand(800), 800)
Expand All @@ -255,7 +263,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "Graph Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1
ss = TensorBoardLogger.graph_summary(DiGraph(1), ["1"], ["1"], ["cpu"], [nothing])
@test isa(ss, TensorBoardLogger.GraphDef)
Expand All @@ -272,14 +280,14 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
end

@testset "Embedding Logger" begin
logger = TBLogger(test_log_dir*"t", tb_overwrite)
logger = TBLogger(joinpath(test_log_dir, "t/"), tb_overwrite)
step = 1
mat = rand(4, 4)
metadata = rand(4, 10)
metadata_header = Array(collect(1:10))
imgs = TBImages(rand(8, 8, 3, 4), HWCN)
@test π != log_embeddings(logger, "random1", mat, metadata = metadata, metadata_header = metadata_header, img_labels = imgs, step = step)
@test π != log_embeddings(logger, "random2", mat, step = step+1)
@test π != log_embeddings(logger, "random1/", mat, metadata = metadata, metadata_header = metadata_header, img_labels = imgs, step = step)
@test π != log_embeddings(logger, "random2/", mat, step = step+1)

close.(values(logger.all_files))
end
Expand Down Expand Up @@ -318,3 +326,5 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
rm(test_log_dir, force=true, recursive=true)

end

kill(minio_server)