-
Notifications
You must be signed in to change notification settings - Fork 190
/
encode_texts.py
41 lines (32 loc) · 1.34 KB
/
encode_texts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# -*- coding: utf-8 -*-
""" Use torchMoji to encode texts into emotional feature vectors.
"""
from __future__ import print_function, division, unicode_literals
import json
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_feature_encoding
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
TEST_SENTENCES = ['I love mom\'s cooking',
'I love how you never reply back..',
'I love cruising with my homies',
'I love messing with yo mind!!',
'I love you and now you\'re just gone..',
'This is shit',
'This is the shit']
maxlen = 30
batch_size = 32
print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
with open(VOCAB_PATH, 'r') as f:
vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)
print('Loading model from {}.'.format(PRETRAINED_PATH))
model = torchmoji_feature_encoding(PRETRAINED_PATH)
print(model)
print('Encoding texts..')
encoding = model(tokenized)
print('First 5 dimensions for sentence: {}'.format(TEST_SENTENCES[0]))
print(encoding[0,:5])
# Now you could visualize the encodings to see differences,
# run a logistic regression classifier on top,
# or basically anything you'd like to do.