Skip to content

Commit 7066385

Browse files
committed
iterative development, token implemented
1 parent b388ccd commit 7066385

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

src/py_scripts/script.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818

1919
def sentence_to_embeddings(query):
2020
# unpack the models
21-
print(query)
21+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
22+
model = BertModel.from_pretrained('bert-base-uncased')
23+
tokens = tokenizer.tokenize(query)
24+
print(tokens)
2225

2326
if __name__ == "__main__":
2427
# The query is passed as the second argument
@@ -34,4 +37,5 @@ def sentence_to_embeddings(query):
3437
# SELECT run_py('/data/trino/src/py_scripts/script.py') AS result;
3538
# SELECT square(4);
3639
# docker exec -it trino-nlp-embeddings bash
37-
# docker cp src/py_scripts/requirements.txt trino-nlp-embeddings:/data/trino/src/py_scripts/requirements.txt
40+
# docker cp src/py_scripts/requirements.txt trino-nlp-embeddings:/data/trino/src/py_scripts/requirements.txt
41+
# docker cp src/py_scripts/script.py trino-nlp-embeddings:/data/trino/src/py_scripts/script.py

0 commit comments

Comments
 (0)