Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ COMMANDS ?= TOP_10 TOP_10_COUNT COUNT

# ENGINES ?= tantivy-0.13 lucene-8.4.0 pisa-0.8.2 rucene-0.1 bleve-0.8.0-scorch rucene-0.1 tantivy-0.11 tantivy-0.14 tantivy-0.15 tantivy-0.16 tantivy-0.17 tantivy-0.18 tantivy-0.19
# ENGINES ?= tantivy-0.16 lucene-8.10.1 pisa-0.8.2 bleve-0.8.0-scorch rucene-0.1
ENGINES ?= tantivy-0.16 tantivy-0.17 tantivy-0.18 tantivy-0.19
ENGINES ?= tantivy-0.19 lucene-8.10.1
PORT ?= 8080

help:
Expand Down
3 changes: 3 additions & 0 deletions corpus_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ def transform(text):

if doc["url"] == "":
continue
if len(doc["url"].split("curid=",1)) == 1:
continue

doc_transformed = {
"id": doc["url"],
"id_num": int(doc["url"].split("curid=",1)[1]),
"text": transform(doc["body"])
}

Expand Down
4 changes: 4 additions & 0 deletions engines/lucene-8.10.1/src/main/java/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ public static void main(String[] args) throws IOException {
final Document document = new Document();

StoredField idField = new StoredField("id", "");
IntPoint idNumField = new IntPoint("id_num", 4);
TextField textField = new TextField("text", "", Field.Store.NO);

document.add(idField);
document.add(idNumField);
document.add(textField);

String line;
Expand All @@ -37,8 +39,10 @@ public static void main(String[] args) throws IOException {
}
final JsonObject parsed_doc = Json.parse(line).asObject();
final String id = parsed_doc.get("id").asString();
final int id_num = parsed_doc.get("id_num").asInt();
final String text = parsed_doc.get("text").asString();
idField.setStringValue(id);
idNumField.setIntValue(id_num);
textField.setStringValue(text);
writer.addDocument(document);
}
Expand Down
3 changes: 2 additions & 1 deletion engines/tantivy-0.19/src/bin/build_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use futures::executor::block_on;
use std::env;
use std::io::BufRead;
use std::path::Path;
use tantivy::schema::{Schema, STORED, TEXT};
use tantivy::schema::{Schema, FAST, INDEXED, STORED, TEXT};
use tantivy::Index;

fn main() {
Expand All @@ -12,6 +12,7 @@ fn main() {

fn create_schema() -> Schema {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("id_num", FAST | INDEXED);
schema_builder.add_text_field("id", STORED);
schema_builder.add_text_field("text", TEXT);
schema_builder.build()
Expand Down
6 changes: 3 additions & 3 deletions format_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ def generate_queries(words):
}

for line in fileinput.input():
(count, query) = PTN.split(line.decode("utf-8").strip(), 1)
(count, query) = PTN.split(line.strip(), 1)
count = int(count)
if not LETTERS_ONLY.match(query):
continue
words = PTN.split(query)
for q in generate_queries(words):
try:
qdoc = json.dumps(q).encode("utf-8")
print qdoc
qdoc = json.dumps(q)
print(qdoc)
except:
pass
6 changes: 6 additions & 0 deletions queries.txt
Original file line number Diff line number Diff line change
Expand Up @@ -897,3 +897,9 @@
{"query": "\"laborers international union of north america\"", "tags": ["phrase", "phrase:num_tokens_>3"]}
{"query": "laborers international union of north america", "tags": ["union", "global", "union:num_tokens_>3"]}
{"query": "+\"the who\" +uk", "tags": ["two-phase-critic"]}
{"query": "id_num:[48694410 TO 48694420] +griffith +observatory", "tags": ["range", "range_selective"]}
{"query": "id_num:[48694410 TO 48694420] +the", "tags": ["range", "range_selective"]}
{"query": "id_num:[48694410 TO 48694420] niceville high school", "tags": ["range", "range_selective"]}
{"query": "id_num:[0 TO 10000000] +griffith +observatory", "tags": ["range", "range_unselective"]}
{"query": "id_num:[0 TO 10000000] +the", "tags": ["range", "range_unselective"]}
{"query": "id_num:[0 TO 10000000] niceville high school", "tags": ["range", "range_unselective"]}
2 changes: 1 addition & 1 deletion results.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion web/build/results.json

Large diffs are not rendered by default.