diff --git a/CHANGELOG.md b/CHANGELOG.md index f0e43fc..f7164be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 3.2.2 + - Fixed lack of consistent fingerprints on Hash/Map objects [#55](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/55) + ## 3.2.1 - Fixed concurrent SHA fingerprinting by making the instances thread local diff --git a/lib/logstash/filters/fingerprint.rb b/lib/logstash/filters/fingerprint.rb index ae08657..78a3063 100644 --- a/lib/logstash/filters/fingerprint.rb +++ b/lib/logstash/filters/fingerprint.rb @@ -119,12 +119,12 @@ def filter(event) if @concatenate_sources || @concatenate_all_fields to_string = "" if @concatenate_all_fields - event.to_hash.sort.map do |k,v| + deep_sort_hashes(event.to_hash).each do |k,v| to_string << "|#{k}|#{v}" end else @source.sort.each do |k| - to_string << "|#{k}|#{event.get(k)}" + to_string << "|#{k}|#{deep_sort_hashes(event.get(k))}" end end to_string << "|" @@ -134,9 +134,9 @@ def filter(event) @source.each do |field| next unless event.include?(field) if event.get(field).is_a?(Array) - event.set(@target, event.get(field).collect { |v| fingerprint(v) }) + event.set(@target, event.get(field).collect { |v| fingerprint(deep_sort_hashes(v)) }) else - event.set(@target, fingerprint(event.get(field))) + event.set(@target, fingerprint(deep_sort_hashes(event.get(field)))) end end end @@ -145,6 +145,20 @@ def filter(event) end private + def deep_sort_hashes(object) + case object + when Hash + sorted_hash = Hash.new + object.sort.each do |sorted_key, value| + sorted_hash[sorted_key] = deep_sort_hashes(value) + end + sorted_hash + when Array + object.map {|element| deep_sort_hashes(element) } + else + object + end + end def fingerprint_ipv4_network(ip_string) # in JRuby 1.7.11 outputs as US-ASCII diff --git a/logstash-filter-fingerprint.gemspec b/logstash-filter-fingerprint.gemspec index e76822f..f92b05d 100644 --- a/logstash-filter-fingerprint.gemspec +++ b/logstash-filter-fingerprint.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-filter-fingerprint' - s.version = '3.2.1' + s.version = '3.2.2' s.licenses = ['Apache-2.0'] s.summary = "Fingerprints fields by replacing values with a consistent hash" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/filters/fingerprint_spec.rb b/spec/filters/fingerprint_spec.rb index fea3c95..6fbba60 100644 --- a/spec/filters/fingerprint_spec.rb +++ b/spec/filters/fingerprint_spec.rb @@ -230,4 +230,58 @@ end end + describe "tolerance to hash order" do + # insertion order can influence the result of to_hash's keys + let(:data1) { { + "a" => {"a0" => 0, "a1" => 1}, + "b" => {"b0" => 0, "b1" => 1}, + } } + let(:event1) { LogStash::Event.new(data1) } + let(:data2) { { + "b" => {"b1" => 1, "b0" => 0}, + "a" => {"a1" => 1, "a0" => 0}, + } } + let(:event2) { LogStash::Event.new(data2) } + let(:config) { { "source" => [ "a" ] } } + + before(:each) do + # for testing purposes we want to ensure the hash order is different. + # since we can't easily control the order on the underlying Map, + # we're mocking the order here: + allow(event1).to receive(:to_hash).and_return(data1) + allow(event2).to receive(:to_hash).and_return(data2) + # by default event.get(key) fetches data from the event. + # mocking the default value has to be done first, and only + # then we can mock the getting "a" and "b" + allow(event1).to receive(:get).and_call_original + allow(event2).to receive(:get).and_call_original + # mock event.get("a") and event.get("b") for both events + # so we can inject an inconsistent order for the tests + allow(event1).to receive(:get).with("a") {|arg| data1["a"] } + allow(event1).to receive(:get).with("b") {|arg| data1["b"] } + allow(event2).to receive(:get).with("a") {|arg| data2["a"] } + allow(event2).to receive(:get).with("b") {|arg| data2["b"] } + plugin.filter(event1) + plugin.filter(event2) + end + it "computes the same hash" do + # confirm the order of the keys in the nested hash is different + # (of course it is, we just mocked the to_hash return) + expect(event1.to_hash["a"].keys).to_not eq(event2.to_hash["a"].keys) + # let's check that the fingerprint doesn't care about the insertion order + expect(event1.get("fingerprint")).to eq(event2.get("fingerprint")) + end + context "concatenate_sources" do + let("config") { { "source" => [ "a", "b"], "concatenate_sources" => true } } + it "computes the same hash" do + expect(event1.get("fingerprint")).to eq(event2.get("fingerprint")) + end + end + context "concatenate_all_fields => true" do + let(:config) { { "concatenate_all_fields" => true } } + it "computes the same hash" do + expect(event1.get("fingerprint")).to eq(event2.get("fingerprint")) + end + end + end end