RWKV-v2-RNN-paddle/run.py at main · JunnYu/RWKV-v2-RNN-paddle · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# -*- coding:utf-8 -*-
########################################################################################################
# The RWKV v2-RNN Language Model - https://github.com/BlinkDL/RWKV-LM
########################################################################################################

import numpy as np
import math
import time
import types
import copy
import paddle
import paddle.nn.functional as F
from src.utils import TOKENIZER, set_seed
from src.model_run import RWKV_RNN


np.set_printoptions(precision=4, suppress=True, linewidth=200)

### Step 0: set global seed for reproduce ##############################################################
# set_seed(42)

### Step 1: set model ##################################################################################

ctx_len = 1024
n_layer = 6
n_embd = 512
model_type = 'RWKV'           # 'RWKV' or 'RWKV-ffnPre'

# your trained model
# MODEL_NAME = 'trained-31'
# WORD_NAME = 'vocab'           # the .json vocab (generated by train.py

# ########## Uncomment these to test my 27M params enwik8 model ##########
MODEL_NAME = 'weights/enwik8-ppl1.65-6064-1024-RWKV-6-512-2022-03-25-21-05-13'
WORD_NAME = 'weights/enwik8-vocab'
# EVAL_DATA = 'enwik8'  # uncomment this for EVAL MODE (no text generation)
# ########################################################################

# --> set UNKNOWN_CHAR to the rarest token in your vocab.json <--
# --> all unknown tokens in your context will be denoted by it <--
UNKNOWN_CHAR = ' '   # here we just set it to [space] for simplicity

RUN_DEVICE = 'cpu'   # 'cpu' (already very fast) or 'gpu'
paddle.set_device(RUN_DEVICE)
DEBUG_DEBUG = False  # True False - show softmax output

### Step 2: set context ################################################################################

context = "\nIn the"       # ==> this is your prompt

NUM_TRIALS = 999
LENGTH_PER_TRIAL = 500

TEMPERATURE = 1.0
top_p = 0.7
top_p_newline = 0.9

########################################################################################################

print(f'Loading {MODEL_NAME}...')
model = RWKV_RNN(MODEL_NAME, RUN_DEVICE, model_type, n_layer, n_embd, ctx_len)
tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR)

########################################################################################################

if 'EVAL_DATA' in vars() or 'EVAL_DATA' in globals():
    print('Evaluating on ' + EVAL_DATA + ' ...')

    data = open(EVAL_DATA, "r", encoding='utf-8').read()

    loss_table = np.zeros(ctx_len)

    N_SAMPLE = 1000

    for iii in range(N_SAMPLE):
        pos = np.random.randint(0, len(data) - ctx_len-1)
        context = data[pos:pos+ctx_len+1]
        ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context]

        model.clear()
        for i in range(1, ctx_len+1):
            x = ctx[:i]
            out = model.run(x)
            prob = F.softmax(paddle.to_tensor(out), axis=-1)
            loss_table[i-1] += -math.log(prob[ctx[i]])

        print(f'Tested {iii+1} samples: avg_loss over ctx_len =',
              np.mean(loss_table) / (iii+1))

    exit(0)

########################################################################################################

context = tokenizer.refine_context(context)
print('\nYour prompt has ' + str(len(context)) + ' tokens.')
print('\n--> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. This will be much faster in future versions. <--\n')

for TRIAL in range(1 if DEBUG_DEBUG else NUM_TRIALS):
    t_begin = time.time_ns()

    src_len = len(context)
    ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context]
    print(('-' * 30) + context, end='')

    model.clear()
    if TRIAL == 0:
        init_state = types.SimpleNamespace()
        for i in range(src_len):
            x = ctx[:i+1]
            if i == src_len - 1:
                init_state.out = model.run(x)
            else:
                model.run(x)
        model.save(init_state)
    else:
        model.load(init_state)

    for i in range(src_len, src_len + (1 if DEBUG_DEBUG else LENGTH_PER_TRIAL)):
        x = ctx[:i+1]
        x = x[-ctx_len:]

        if i == src_len:
            out = copy.deepcopy(init_state.out)
        else:
            out = model.run(x)
        if DEBUG_DEBUG:
            print('model', np.array(x), '==>', np.array(
                out), np.max(out), np.min(out))

        char = tokenizer.sample_logits(out, x, ctx_len, temperature=TEMPERATURE,
                                       top_p_usual=top_p, top_p_newline=top_p_newline)
        char = char.item()
        print(tokenizer.itos[int(char)], end='', flush=True)
        ctx += [char]
    t_end = time.time_ns()
    print("\n----------", round((t_end - t_begin) / (10 ** 9), 2), end='s ')