diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index 633728aa..10064d66 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -176,7 +176,7 @@ struct ActiveMonitors : NonCopyable { auto res = allAuthors.try_emplace(Bytes32(f.authors->at(i))); res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); } - } else if (f.tags.size()) { + } else if (f.tags.size() || f.andTags.size()) { for (const auto &[tagName, filterSet] : f.tags) { for (size_t i = 0; i < filterSet.size(); i++) { auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); @@ -184,6 +184,13 @@ struct ActiveMonitors : NonCopyable { res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); } } + for (const auto &[tagName, filterSet] : f.andTags) { + for (size_t i = 0; i < filterSet.size(); i++) { + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); + auto res = allTags.try_emplace(tagSpec); + res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } + } } else if (f.kinds) { for (size_t i = 0; i < f.kinds->size(); i++) { auto res = allKinds.try_emplace(f.kinds->at(i)); @@ -211,7 +218,7 @@ struct ActiveMonitors : NonCopyable { monSet.erase(&f); if (monSet.empty()) allAuthors.erase(author); } - } else if (f.tags.size()) { + } else if (f.tags.size() || f.andTags.size()) { for (const auto &[tagName, filterSet] : f.tags) { for (size_t i = 0; i < filterSet.size(); i++) { auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); @@ -220,6 +227,14 @@ struct ActiveMonitors : NonCopyable { if (monSet.empty()) allTags.erase(tagSpec); } } + for (const auto &[tagName, filterSet] : f.andTags) { + for (size_t i = 0; i < filterSet.size(); i++) { + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); + auto &monSet = allTags.at(tagSpec); + monSet.erase(&f); + if (monSet.empty()) allTags.erase(tagSpec); + } + } } else if (f.kinds) { for (size_t i = 0; i < f.kinds->size(); i++) { uint64_t kind = f.kinds->at(i); diff --git a/src/DBQuery.h b/src/DBQuery.h index 7e91f4bd..1808b036 100644 --- a/src/DBQuery.h +++ b/src/DBQuery.h @@ -120,28 +120,37 @@ struct DBScan : NonCopyable { } ); } - } else if (f.tags.size()) { + } else if (f.tags.size() || f.andTags.size()) { indexDbi = env.dbi_Event__tag; desc = "Tag"; char tagName = '\0'; + bool useAndTag = false; { - uint64_t numTags = MAX_U64; + uint64_t bestSize = MAX_U64; for (const auto &[tn, filterSet] : f.tags) { - if (filterSet.size() < numTags) { - numTags = filterSet.size(); + if (filterSet.size() < bestSize) { + bestSize = filterSet.size(); tagName = tn; + useAndTag = false; + } + } + // AND tags only need 1 cursor (all values must match, so scanning for one suffices) + for (const auto &[tn, filterSet] : f.andTags) { + if (1 < bestSize) { + bestSize = 1; + tagName = tn; + useAndTag = true; } } } - const auto &filterSet = f.tags.at(tagName); - - cursors.reserve(filterSet.size()); - for (uint64_t i = 0; i < filterSet.size(); i++) { + if (useAndTag) { + const auto &filterSet = f.andTags.at(tagName); + cursors.reserve(1); std::string search; search += tagName; - search += filterSet.at(i); + search += filterSet.at(0); cursors.emplace_back( search + std::string(8, '\xFF'), @@ -150,6 +159,23 @@ struct DBScan : NonCopyable { return k.size() == search.size() + 8 && k.starts_with(search) ? KeyMatchResult::Yes : KeyMatchResult::No; } ); + } else { + const auto &filterSet = f.tags.at(tagName); + + cursors.reserve(filterSet.size()); + for (uint64_t i = 0; i < filterSet.size(); i++) { + std::string search; + search += tagName; + search += filterSet.at(i); + + cursors.emplace_back( + search + std::string(8, '\xFF'), + MAX_U64, + [search](std::string_view k){ + return k.size() == search.size() + 8 && k.starts_with(search) ? KeyMatchResult::Yes : KeyMatchResult::No; + } + ); + } } } else if (f.authors && f.kinds && f.authors->size() * f.kinds->size() < 1'000) { indexDbi = env.dbi_Event__pubkeyKind; diff --git a/src/apps/relay/RelayWebsocket.cpp b/src/apps/relay/RelayWebsocket.cpp index 45924964..7f9ebd8f 100644 --- a/src/apps/relay/RelayWebsocket.cpp +++ b/src/apps/relay/RelayWebsocket.cpp @@ -53,7 +53,7 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { auto supportedNips = []{ - tao::json::value output = tao::json::value::array({ 1, 2, 4, 9, 11, 22, 28, 40, 70 }); + tao::json::value output = tao::json::value::array({ 1, 2, 4, 9, 11, 22, 28, 40, 70, 91 }); if (cfg().relay__maxFilterLimitCount > 0) output.push_back(45); if (cfg().relay__negentropy__enabled) output.push_back(77); diff --git a/src/filters.h b/src/filters.h index 066adaac..1d9ee26e 100644 --- a/src/filters.h +++ b/src/filters.h @@ -113,6 +113,7 @@ struct NostrFilter { std::optional authors; std::optional kinds; flat_hash_map tags; + flat_hash_map andTags; // NIP-91: AND tag filters uint64_t since = 0; uint64_t until = MAX_U64; @@ -192,6 +193,29 @@ struct NostrFilter { } catch (std::exception &e) { throw herr("error parsing ", k, ": ", e.what()); } + } else if (k.starts_with('&')) { + checkArray(); + if (v.get_array().size() == 0) { + neverMatch = true; + continue; + } + numMajorFields++; + + try { + if (k.size() == 2) { + char tag = k[1]; + + if (tag == 'p' || tag == 'e') { + andTags.emplace(tag, FilterSetBytes(v, true, 32, 32)); + } else { + andTags.emplace(tag, FilterSetBytes(v, false, 0, MAX_INDEXED_TAG_VAL_SIZE)); + } + } else { + throw herr("unindexed tag filter"); + } + } catch (std::exception &e) { + throw herr("error parsing ", k, ": ", e.what()); + } } else if (k == "since") { since = jsonGetUnsigned(v, "error parsing since"); } else if (k == "until") { @@ -203,11 +227,11 @@ struct NostrFilter { } } - if (tags.size() > 3) throw herr("too many tags in filter"); // O(N^2) in matching, so prevent it from being too large + if (tags.size() + andTags.size() > 3) throw herr("too many tags in filter"); // O(N^2) in matching, so prevent it from being too large if (limit > maxFilterLimit) limit = maxFilterLimit; - indexOnlyScans = (numMajorFields <= 1) || (numMajorFields == 2 && authors && kinds); + indexOnlyScans = andTags.size() == 0 && ((numMajorFields <= 1) || (numMajorFields == 2 && authors && kinds)); } bool doesMatchTimes(uint64_t created) const { @@ -239,11 +263,29 @@ struct NostrFilter { if (!foundMatch) return false; } + // NIP-91: AND tag filters — ALL values must be present + for (const auto &[tag, filt] : andTags) { + for (size_t i = 0; i < filt.size(); i++) { + std::string val = filt.at(i); + bool found = false; + + ev.foreachTag([&](char tagName, std::string_view tagVal){ + if (tagName == tag && tagVal == val) { + found = true; + return false; + } + return true; + }); + + if (!found) return false; + } + } + return true; } bool isFullDbQuery() { - return !ids && !authors && !kinds && tags.size() == 0; + return !ids && !authors && !kinds && tags.size() == 0 && andTags.size() == 0; } }; diff --git a/test/dumbFilter.pl b/test/dumbFilter.pl index 1aefbec1..88655b56 100755 --- a/test/dumbFilter.pl +++ b/test/dumbFilter.pl @@ -115,5 +115,21 @@ sub doesMatchSingle { return 0 if !$found; } + # NIP-91: AND tag filters — ALL values must be present + foreach my $key (keys %$filter) { + next unless $key =~ /^&(.)$/; + my $tagName = $1; + foreach my $search (@{ $filter->{$key} }) { + my $found; + foreach my $tag (@{ $ev->{tags} }) { + if ($tag->[0] eq $tagName && $tag->[1] eq $search) { + $found = 1; + last; + } + } + return 0 if !$found; + } + } + return 1; } diff --git a/test/filterFuzzTest.pl b/test/filterFuzzTest.pl index a49bf630..12c76ff2 100755 --- a/test/filterFuzzTest.pl +++ b/test/filterFuzzTest.pl @@ -151,6 +151,14 @@ sub genRandomFilterGroup { push @{$f->{'#t'}}, $topics->[int(rand() * @$topics)]; } } + + # NIP-91: AND tag filter + if (rand() < .15) { + $f->{'&t'} = []; + for (1..(rand()*3)+1) { + push @{$f->{'&t'}}, $topics->[int(rand() * @$topics)]; + } + } } if (rand() < .2) { diff --git a/test/genTestData.pl b/test/genTestData.pl new file mode 100644 index 00000000..b59030c9 --- /dev/null +++ b/test/genTestData.pl @@ -0,0 +1,71 @@ +#!/usr/bin/env perl + +# Generates synthetic nostr-like events for testing filters including NIP-91 AND tag filters. +# Output is JSONL suitable for `strfry import`. + +use strict; +use JSON::XS; + +my $numEvents = shift || 5000; + +sub fakehex { sprintf("%064x", int(rand() * 2**48) ^ ($_[0] * 7919)) } +my @pubkeys = map { fakehex($_) } (0..19); +my @event_ids = map { fakehex($_ + 1000) } (0..49); +my @topics = qw(bitcoin nos nostr nostrnovember gitlog introductions jb55 damus chat meme cat dog art music); +my @kinds = (0, 1, 3, 4, 6, 7, 30, 42); + +srand(42); # deterministic + +for my $i (0..$numEvents-1) { + my $pubkey = $pubkeys[int(rand() * @pubkeys)]; + my $kind = $kinds[int(rand() * @kinds)]; + my $created_at = 1640300802 + int(rand() * 86400 * 365); + my $content = "test event $i"; + + my @tags; + + # Add e-tags + if (rand() < 0.3) { + my $num_e = int(rand() * 3) + 1; + for (1..$num_e) { + push @tags, ["e", $event_ids[int(rand() * @event_ids)]]; + } + } + + # Add p-tags + if (rand() < 0.3) { + my $num_p = int(rand() * 2) + 1; + for (1..$num_p) { + push @tags, ["p", $pubkeys[int(rand() * @pubkeys)]]; + } + } + + # Add t-tags (important for NIP-91 AND filter testing) + if (rand() < 0.5) { + my $num_t = int(rand() * 4) + 1; + my %used; + for (1..$num_t) { + my $topic = $topics[int(rand() * @topics)]; + next if $used{$topic}++; + push @tags, ["t", $topic]; + } + } + + # Compute a fake but valid-looking id + my $id = sprintf("%064x", int(rand() * 2**48) ^ ($i * 104729)); + + # Fake sig (128 hex chars) + my $sig = sprintf("%064x", int(rand() * 2**48)) . sprintf("%064x", int(rand() * 2**48)); + + my $event = { + id => $id, + pubkey => $pubkey, + created_at => $created_at + 0, + kind => $kind + 0, + tags => \@tags, + content => $content, + sig => $sig, + }; + + print encode_json($event), "\n"; +} diff --git a/test/runTests.sh b/test/runTests.sh new file mode 100644 index 00000000..f6a07cff --- /dev/null +++ b/test/runTests.sh @@ -0,0 +1,276 @@ +#!/bin/sh +set -e + +echo "=== Generating test data ===" +perl test/genTestData.pl 5000 > /tmp/testdata.jsonl +echo "Generated $(wc -l < /tmp/testdata.jsonl) events" + +echo "" +echo "=== Creating strfry config ===" +cat > /tmp/strfry.conf << 'CONF' +relay { + bind = "127.0.0.1" + port = 7777 + info { + name = "test" + } + maxFilterLimit = 500 +} +db { + path = "/tmp/strfry-db/" +} +CONF + +mkdir -p /tmp/strfry-db + +echo "" +echo "=== Importing test data ===" +./strfry --config /tmp/strfry.conf import < /tmp/testdata.jsonl 2>&1 | tail -3 + +echo "" +echo "=== Running scan fuzz test (50 iterations) ===" +# We modify the test to run a fixed number of iterations instead of forever +SEED=42 timeout 120 perl -e ' +use strict; +use JSON::XS; + +# Source the test +do "test/filterFuzzTest.pl" if 0; + +# Inline the needed parts +my $kinds = [qw/1 7 4 42 0 30 3 6/]; +my $pubkeys = []; +my $ids = []; +my $topics = [qw/bitcoin nos nostr nostrnovember gitlog introductions jb55 damus chat meme cat dog art music/]; + +# Get actual pubkeys and ids from our test data +open my $fh, "<", "/tmp/testdata.jsonl" or die; +my %seen_pk; +my %seen_id; +while (<$fh>) { + my $ev = decode_json($_); + push @$pubkeys, $ev->{pubkey} unless $seen_pk{$ev->{pubkey}}++; + push @$ids, $ev->{id} unless $seen_id{$ev->{id}}++; + last if @$pubkeys >= 20 && @$ids >= 50; +} +close $fh; + +$ENV{STRFRY_CONFIG} = "/tmp/strfry.conf"; + +srand(42); +my $pass = 0; +my $fail = 0; + +for my $iter (1..50) { + my $f = genRandomFilterGroup(0); + my $fge = encode_json($f); + + my $resA = `./strfry --config /tmp/strfry.conf export --reverse 2>/dev/null | perl test/dumbFilter.pl \x27$fge\x27 | jq -r .id | sort | sha256sum`; + my $resB = `./strfry --config /tmp/strfry.conf scan --pause 1 --metrics \x27$fge\x27 2>/dev/null | jq -r .id | sort | sha256sum`; + + if ($resA eq $resB) { + $pass++; + print " scan iter $iter: PASS\n"; + } else { + $fail++; + print " scan iter $iter: FAIL\n"; + print " filter: $fge\n"; + print " export|dumbFilter: $resA"; + print " scan: $resB"; + } +} + +print "\n=== Scan results: $pass passed, $fail failed ===\n"; +exit($fail > 0 ? 1 : 0); + +sub genRandomFilterGroup { + my $useLimit = shift; + my $numFilters = $useLimit ? 1 : int(rand()*10)+1; + my @filters; + for (1..$numFilters) { + my $f = {}; + while (!keys %$f) { + if (rand() < .15) { + $f->{ids} = []; + for (1..(rand()*10)) { + push @{$f->{ids}}, $ids->[int(rand() * @$ids)]; + } + } + if (rand() < .3) { + $f->{authors} = []; + for (1..(rand()*5)) { + push @{$f->{authors}}, $pubkeys->[int(rand() * @$pubkeys)]; + } + } + if (rand() < .2) { + $f->{kinds} = []; + for (1..(rand()*5)) { + push @{$f->{kinds}}, 0+$kinds->[int(rand() * @$kinds)]; + } + } + if (rand() < .2) { + $f->{"#e"} = []; + for (1..(rand()*10)) { + push @{$f->{"#e"}}, $ids->[int(rand() * @$ids)]; + } + } + if (rand() < .2) { + $f->{"#p"} = []; + for (1..(rand()*5)) { + push @{$f->{"#p"}}, $pubkeys->[int(rand() * @$pubkeys)]; + } + } + if (rand() < .2) { + $f->{"#t"} = []; + for (1..(rand()*5)) { + push @{$f->{"#t"}}, $topics->[int(rand() * @$topics)]; + } + } + # NIP-91: AND tag filter + if (rand() < .15) { + $f->{"&t"} = []; + for (1..(rand()*3)+1) { + push @{$f->{"&t"}}, $topics->[int(rand() * @$topics)]; + } + } + } + if (rand() < .2) { + $f->{since} = 1640300802 + int(rand() * 86400*365); + } + if (rand() < .2) { + $f->{until} = 1640300802 + int(rand() * 86400*365); + } + if ($useLimit) { + $f->{limit} = 1 + int(rand() * 1000); + } + if ($f->{since} && $f->{until} && $f->{since} > $f->{until}) { + delete $f->{since}; + delete $f->{until}; + } + push @filters, $f; + } + return \@filters; +} +' 2>&1 + +echo "" +echo "=== Running scan-limit fuzz test (50 iterations) ===" +SEED=42 timeout 120 perl -e ' +use strict; +use JSON::XS; + +my $kinds = [qw/1 7 4 42 0 30 3 6/]; +my $pubkeys = []; +my $ids = []; +my $topics = [qw/bitcoin nos nostr nostrnovember gitlog introductions jb55 damus chat meme cat dog art music/]; + +open my $fh, "<", "/tmp/testdata.jsonl" or die; +my %seen_pk; +my %seen_id; +while (<$fh>) { + my $ev = decode_json($_); + push @$pubkeys, $ev->{pubkey} unless $seen_pk{$ev->{pubkey}}++; + push @$ids, $ev->{id} unless $seen_id{$ev->{id}}++; + last if @$pubkeys >= 20 && @$ids >= 50; +} +close $fh; + +srand(42); +my $pass = 0; +my $fail = 0; + +for my $iter (1..50) { + my $f = genRandomFilterGroup(1); + my $fge = encode_json($f); + + my $headCmd = "| head -n $f->[0]->{limit}"; + my $resA = `./strfry --config /tmp/strfry.conf export --reverse 2>/dev/null | perl test/dumbFilter.pl \x27$fge\x27 $headCmd | jq -r .id | sort | sha256sum`; + my $resB = `./strfry --config /tmp/strfry.conf scan --pause 1 --metrics \x27$fge\x27 2>/dev/null | jq -r .id | sort | sha256sum`; + + if ($resA eq $resB) { + $pass++; + print " scan-limit iter $iter: PASS\n"; + } else { + $fail++; + print " scan-limit iter $iter: FAIL\n"; + print " filter: $fge\n"; + print " export|dumbFilter: $resA"; + print " scan: $resB"; + } +} + +print "\n=== Scan-limit results: $pass passed, $fail failed ===\n"; +exit($fail > 0 ? 1 : 0); + +sub genRandomFilterGroup { + my $useLimit = shift; + my $numFilters = $useLimit ? 1 : int(rand()*10)+1; + my @filters; + for (1..$numFilters) { + my $f = {}; + while (!keys %$f) { + if (rand() < .15) { + $f->{ids} = []; + for (1..(rand()*10)) { + push @{$f->{ids}}, $ids->[int(rand() * @$ids)]; + } + } + if (rand() < .3) { + $f->{authors} = []; + for (1..(rand()*5)) { + push @{$f->{authors}}, $pubkeys->[int(rand() * @$pubkeys)]; + } + } + if (rand() < .2) { + $f->{kinds} = []; + for (1..(rand()*5)) { + push @{$f->{kinds}}, 0+$kinds->[int(rand() * @$kinds)]; + } + } + if (rand() < .2) { + $f->{"#e"} = []; + for (1..(rand()*10)) { + push @{$f->{"#e"}}, $ids->[int(rand() * @$ids)]; + } + } + if (rand() < .2) { + $f->{"#p"} = []; + for (1..(rand()*5)) { + push @{$f->{"#p"}}, $pubkeys->[int(rand() * @$pubkeys)]; + } + } + if (rand() < .2) { + $f->{"#t"} = []; + for (1..(rand()*5)) { + push @{$f->{"#t"}}, $topics->[int(rand() * @$topics)]; + } + } + # NIP-91: AND tag filter + if (rand() < .15) { + $f->{"&t"} = []; + for (1..(rand()*3)+1) { + push @{$f->{"&t"}}, $topics->[int(rand() * @$topics)]; + } + } + } + if (rand() < .2) { + $f->{since} = 1640300802 + int(rand() * 86400*365); + } + if (rand() < .2) { + $f->{until} = 1640300802 + int(rand() * 86400*365); + } + if ($useLimit) { + $f->{limit} = 1 + int(rand() * 1000); + } + if ($f->{since} && $f->{until} && $f->{since} > $f->{until}) { + delete $f->{since}; + delete $f->{until}; + } + push @filters, $f; + } + return \@filters; +} +' 2>&1 + +echo "" +echo "=== All tests complete ==="