|
1 | 1 | # Encoding Test Result
|
2 | 2 | | python tiktoken | golang tiktoken-go |
|
3 | 3 | | :------------------------------------------------------- | :------------------------------------------------------- |
|
| 4 | +| text: hallo world!, encoding: o200k_base, token: 4 | text: hallo world!, encoding: o200k_base, token: 4 | |
4 | 5 | | text: hallo world!, encoding: cl100k_base, token: 4 | text: hallo world!, encoding: cl100k_base, token: 4 |
|
5 | 6 | | text: hallo world!, encoding: p50k_base, token: 4 | text: hallo world!, encoding: p50k_base, token: 4 |
|
6 | 7 | | text: hallo world!, encoding: r50k_base, token: 4 | text: hallo world!, encoding: r50k_base, token: 4 |
|
| 8 | +| text: 你好世界!, encoding: o200k_base, token: 3 | text: 你好世界!, encoding: o200k_base, token: 3 | |
7 | 9 | | text: 你好世界!, encoding: cl100k_base, token: 6 | text: 你好世界!, encoding: cl100k_base, token: 6 |
|
8 | 10 | | text: 你好世界!, encoding: p50k_base, token: 11 | text: 你好世界!, encoding: p50k_base, token: 11 |
|
9 | 11 | | text: 你好世界!, encoding: r50k_base, token: 11 | text: 你好世界!, encoding: r50k_base, token: 11 |
|
| 12 | +| text: こんにちは世界!, encoding: o200k_base, token: 3 | text: こんにちは世界!, encoding: cl100k_base, token: 3 | |
10 | 13 | | text: こんにちは世界!, encoding: cl100k_base, token: 5 | text: こんにちは世界!, encoding: cl100k_base, token: 5 |
|
11 | 14 | | text: こんにちは世界!, encoding: p50k_base, token: 13 | text: こんにちは世界!, encoding: p50k_base, token: 13 |
|
12 | 15 | | text: こんにちは世界!, encoding: r50k_base, token: 13 | text: こんにちは世界!, encoding: r50k_base, token: 13 |
|
| 16 | +| text: 안녕하세요 세계!, encoding: o200k_base, token: 4 | text: 안녕하세요 세계!, encoding: o200k_base, token: 4 | |
13 | 17 | | text: 안녕하세요 세계!, encoding: cl100k_base, token: 10 | text: 안녕하세요 세계!, encoding: cl100k_base, token: 10 |
|
14 | 18 | | text: 안녕하세요 세계!, encoding: p50k_base, token: 21 | text: 안녕하세요 세계!, encoding: p50k_base, token: 21 |
|
15 | 19 | | text: 안녕하세요 세계!, encoding: r50k_base, token: 21 | text: 안녕하세요 세계!, encoding: r50k_base, token: 21 |
|
| 20 | +| text: Привет мир!, encoding: o200k_base, token: 4 | text: Привет мир!, encoding: cl100k_base, token: 4 | |
16 | 21 | | text: Привет мир!, encoding: cl100k_base, token: 6 | text: Привет мир!, encoding: cl100k_base, token: 6 |
|
17 | 22 | | text: Привет мир!, encoding: p50k_base, token: 12 | text: Привет мир!, encoding: p50k_base, token: 12 |
|
18 | 23 | | text: Привет мир!, encoding: r50k_base, token: 12 | text: Привет мир!, encoding: r50k_base, token: 12 |
|
| 24 | +| text: ¡Hola mundo!, encoding: o200k_base, token: 4 | text: ¡Hola mundo!, encoding: o200k_base, token: 4 | |
19 | 25 | | text: ¡Hola mundo!, encoding: cl100k_base, token: 4 | text: ¡Hola mundo!, encoding: cl100k_base, token: 4 |
|
20 | 26 | | text: ¡Hola mundo!, encoding: p50k_base, token: 7 | text: ¡Hola mundo!, encoding: p50k_base, token: 7 |
|
21 | 27 | | text: ¡Hola mundo!, encoding: r50k_base, token: 7 | text: ¡Hola mundo!, encoding: r50k_base, token: 7 |
|
| 28 | +| text: Hallo Welt!, encoding: o200k_base, token: 3 | text: Hallo Welt!, encoding: o200k_base, token: 3 | |
22 | 29 | | text: Hallo Welt!, encoding: cl100k_base, token: 3 | text: Hallo Welt!, encoding: cl100k_base, token: 3 |
|
23 | 30 | | text: Hallo Welt!, encoding: p50k_base, token: 5 | text: Hallo Welt!, encoding: p50k_base, token: 5 |
|
24 | 31 | | text: Hallo Welt!, encoding: r50k_base, token: 5 | text: Hallo Welt!, encoding: r50k_base, token: 5 |
|
| 32 | +| text: Bonjour le monde!, encoding: o200k_base, token: 4 | text: Bonjour le monde!, encoding: o200k_base, token: 4 | |
25 | 33 | | text: Bonjour le monde!, encoding: cl100k_base, token: 4 | text: Bonjour le monde!, encoding: cl100k_base, token: 4 |
|
26 | 34 | | text: Bonjour le monde!, encoding: p50k_base, token: 7 | text: Bonjour le monde!, encoding: p50k_base, token: 7 |
|
27 | 35 | | text: Bonjour le monde!, encoding: r50k_base, token: 7 | text: Bonjour le monde!, encoding: r50k_base, token: 7 |
|
| 36 | +| text: Ciao mondo!, encoding: o200k_base, token: 4 | text: Ciao mondo!, encoding: o200k_base, token: 4 | |
28 | 37 | | text: Ciao mondo!, encoding: cl100k_base, token: 4 | text: Ciao mondo!, encoding: cl100k_base, token: 4 |
|
29 | 38 | | text: Ciao mondo!, encoding: p50k_base, token: 5 | text: Ciao mondo!, encoding: p50k_base, token: 5 |
|
30 | 39 | | text: Ciao mondo!, encoding: r50k_base, token: 5 | text: Ciao mondo!, encoding: r50k_base, token: 5 |
|
| 40 | +| text: Hej världen!, encoding: cl100k_base, token: 3 | text: Hej världen!, encoding: o200k_base, token: 3 | |
31 | 41 | | text: Hej världen!, encoding: cl100k_base, token: 7 | text: Hej världen!, encoding: cl100k_base, token: 7 |
|
32 | 42 | | text: Hej världen!, encoding: p50k_base, token: 8 | text: Hej världen!, encoding: p50k_base, token: 8 |
|
33 | 43 | | text: Hej världen!, encoding: r50k_base, token: 8 | text: Hej världen!, encoding: r50k_base, token: 8 |
|
| 44 | +| text: Hallo wereld!, encoding: o200k_base, token: 3 | text: Hallo wereld!, encoding: o200k_base, token: 3 | |
34 | 45 | | text: Hallo wereld!, encoding: cl100k_base, token: 3 | text: Hallo wereld!, encoding: cl100k_base, token: 3 |
|
35 | 46 | | text: Hallo wereld!, encoding: p50k_base, token: 5 | text: Hallo wereld!, encoding: p50k_base, token: 5 |
|
36 | 47 | | text: Hallo wereld!, encoding: r50k_base, token: 5 | text: Hallo wereld!, encoding: r50k_base, token: 5 |
|
37 | 48 | | text: Hallo verden!, encoding: cl100k_base, token: 4 | text: Hallo verden!, encoding: cl100k_base, token: 4 |
|
38 | 49 | | text: Hallo verden!, encoding: p50k_base, token: 5 | text: Hallo verden!, encoding: p50k_base, token: 5 |
|
39 | 50 | | text: Hallo verden!, encoding: r50k_base, token: 5 | text: Hallo verden!, encoding: r50k_base, token: 5 |
|
| 51 | +| text: Hallo wereld!, encoding: o200k_base, token: 3 | text: Hallo wereld!, encoding: o200k_base, token: 3 | |
40 | 52 | | text: Hallo wereld!, encoding: cl100k_base, token: 3 | text: Hallo wereld!, encoding: cl100k_base, token: 3 |
|
41 | 53 | | text: Hallo wereld!, encoding: p50k_base, token: 5 | text: Hallo wereld!, encoding: p50k_base, token: 5 |
|
42 | 54 | | text: Hallo wereld!, encoding: r50k_base, token: 5 | text: Hallo wereld!, encoding: r50k_base, token: 5 |
|
|
0 commit comments