Skip to content

Commit a65380a

Browse files
committed
WASM support
Add wasm support
1 parent 7c7db77 commit a65380a

File tree

12 files changed

+419
-13
lines changed

12 files changed

+419
-13
lines changed

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ The package has no library dependencies and provides pre-compiled wheels for all
1515
1616
```sh
1717
pip install tree-sitter
18+
# For wasm support
19+
pip install tree-sitter[wasm]
1820
```
1921

2022
## Usage
@@ -39,6 +41,22 @@ from tree_sitter import Language, Parser
3941
PY_LANGUAGE = Language(tspython.language())
4042
```
4143

44+
#### Wasm support
45+
46+
If you enable the `wasm` extra, then tree-sitter will be able to use wasmtime to load languages compiled to wasm and parse with them. Example:
47+
48+
```python
49+
from pathlib import Path
50+
from wasmtime import Engine
51+
from tree_sitter import Language, Parser
52+
53+
engine = Engine()
54+
wasm_bytes = Path("my_language.wasm").read_bytes()
55+
MY_LANGUAGE = Language.from_wasm("my_language", engine, wasm_bytes)
56+
```
57+
58+
Languages loaded this way work identically to native-binary languages.
59+
4260
### Basic parsing
4361

4462
Create a `Parser` and configure it to use a language:

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ tests = [
3838
"tree-sitter-python>=0.23.0",
3939
"tree-sitter-rust>=0.23.0",
4040
]
41+
wasm = ["wasmtime>=25"]
4142

4243
[tool.ruff]
4344
target-version = "py39"

setup.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,33 @@
2323
"tree_sitter/binding/range.c",
2424
"tree_sitter/binding/tree.c",
2525
"tree_sitter/binding/tree_cursor.c",
26+
"tree_sitter/binding/wasmtime.c",
2627
"tree_sitter/binding/module.c",
2728
],
2829
include_dirs=[
2930
"tree_sitter/binding",
3031
"tree_sitter/core/lib/include",
3132
"tree_sitter/core/lib/src",
33+
"tree_sitter/core/lib/src/wasm",
3234
],
3335
define_macros=[
3436
("PY_SSIZE_T_CLEAN", None),
3537
("TREE_SITTER_HIDE_SYMBOLS", None),
38+
("TREE_SITTER_FEATURE_WASM", None),
3639
],
37-
undef_macros=[
38-
"TREE_SITTER_FEATURE_WASM",
39-
],
40-
extra_compile_args=[
41-
"-std=c11",
42-
"-fvisibility=hidden",
43-
"-Wno-cast-function-type",
44-
"-Werror=implicit-function-declaration",
45-
] if system() != "Windows" else [
46-
"/std:c11",
47-
"/wd4244",
48-
],
40+
extra_compile_args=(
41+
[
42+
"-std=c11",
43+
"-fvisibility=hidden",
44+
"-Wno-cast-function-type",
45+
"-Werror=implicit-function-declaration",
46+
]
47+
if system() != "Windows"
48+
else [
49+
"/std:c11",
50+
"/wd4244",
51+
]
52+
),
4953
)
5054
],
5155
)

tests/test_wasm.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import importlib.resources
2+
from unittest import TestCase
3+
4+
from tree_sitter import Language, Parser, Tree
5+
6+
try:
7+
import wasmtime
8+
9+
class TestWasm(TestCase):
10+
@classmethod
11+
def setUpClass(cls):
12+
javascript_wasm = (
13+
importlib.resources.files("tests")
14+
.joinpath("wasm/tree-sitter-javascript.wasm")
15+
.read_bytes()
16+
)
17+
engine = wasmtime.Engine()
18+
cls.javascript = Language.from_wasm("javascript", engine, javascript_wasm)
19+
20+
def test_parser(self):
21+
parser = Parser(self.javascript)
22+
self.assertIsInstance(parser.parse(b"test"), Tree)
23+
24+
def test_language_is_wasm(self):
25+
self.assertEqual(self.javascript.is_wasm, True)
26+
27+
except ImportError:
28+
29+
class TestWasmDisabled(TestCase):
30+
def test_parser(self):
31+
def runtest():
32+
Language.from_wasm("javascript", None, b"")
33+
34+
self.assertRaisesRegex(
35+
RuntimeError, "wasmtime module is not loaded", runtest
36+
)
630 KB
Binary file not shown.

tree_sitter/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
MIN_COMPATIBLE_LANGUAGE_VERSION,
1818
)
1919

20-
Point.__doc__ = "A position in a multi-line text document, in terms of rows and columns."
20+
21+
Point.__doc__ = (
22+
"A position in a multi-line text document, in terms of rows and columns."
23+
)
2124
Point.row.__doc__ = "The zero-based row of the document."
2225
Point.column.__doc__ = "The zero-based column of the document."
2326

tree_sitter/binding/language.c

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "types.h"
22

3+
extern void wasm_engine_delete(TSWasmEngine *engine);
4+
extern TSWasmEngine *wasmtime_engine_clone(TSWasmEngine *engine);
5+
36
int language_init(Language *self, PyObject *args, PyObject *Py_UNUSED(kwargs)) {
47
PyObject *language;
58
if (!PyArg_ParseTuple(args, "O:__init__", &language)) {
@@ -30,10 +33,119 @@ int language_init(Language *self, PyObject *args, PyObject *Py_UNUSED(kwargs)) {
3033
}
3134

3235
void language_dealloc(Language *self) {
36+
if (self->wasm_engine != NULL) {
37+
wasm_engine_delete(self->wasm_engine);
38+
}
3339
ts_language_delete(self->language);
3440
Py_TYPE(self)->tp_free(self);
3541
}
3642

43+
// ctypes.cast(managed_pointer.ptr(), ctypes.c_void_p).value
44+
static void *get_managed_pointer(PyObject *cast, PyObject *c_void_p, PyObject *managed_pointer) {
45+
void *ptr = NULL;
46+
PyObject *ptr_method = NULL;
47+
PyObject *ptr_result = NULL;
48+
PyObject *cast_result = NULL;
49+
PyObject *value_attr = NULL;
50+
51+
// Call .ptr() method on the managed pointer
52+
ptr_method = PyObject_GetAttrString(managed_pointer, "ptr");
53+
if (ptr_method == NULL) {
54+
goto cleanup;
55+
}
56+
ptr_result = PyObject_CallObject(ptr_method, NULL);
57+
if (ptr_result == NULL) {
58+
goto cleanup;
59+
}
60+
61+
// Call cast function
62+
cast_result = PyObject_CallFunctionObjArgs(cast, ptr_result, c_void_p, NULL);
63+
if (cast_result == NULL) {
64+
goto cleanup;
65+
}
66+
67+
// Get the 'value' attribute from the cast result
68+
value_attr = PyObject_GetAttrString(cast_result, "value");
69+
if (value_attr == NULL) {
70+
goto cleanup;
71+
}
72+
73+
// Convert the value attribute to a C void pointer
74+
ptr = PyLong_AsVoidPtr(value_attr);
75+
76+
cleanup:
77+
Py_XDECREF(value_attr);
78+
Py_XDECREF(cast_result);
79+
Py_XDECREF(ptr_result);
80+
Py_XDECREF(ptr_method);
81+
82+
if (PyErr_Occurred()) {
83+
return NULL;
84+
}
85+
86+
return ptr;
87+
}
88+
89+
PyObject *language_from_wasm(PyTypeObject *cls, PyObject *args) {
90+
ModuleState *state = (ModuleState *)PyType_GetModuleState(cls);
91+
TSWasmError error;
92+
TSWasmStore *wasm_store = NULL;
93+
TSLanguage *language = NULL;
94+
Language *self = NULL;
95+
char *name;
96+
PyObject *py_engine = NULL;
97+
char *wasm;
98+
Py_ssize_t wasm_length;
99+
if (state->wasmtime_engine_type == NULL) {
100+
PyErr_SetString(PyExc_RuntimeError, "wasmtime module is not loaded");
101+
return NULL;
102+
}
103+
if (!PyArg_ParseTuple(args, "sO!y#:from_wasm", &name, state->wasmtime_engine_type, &py_engine, &wasm, &wasm_length)) {
104+
return NULL;
105+
}
106+
107+
TSWasmEngine *engine = (TSWasmEngine *)get_managed_pointer(state->ctypes_cast, state->c_void_p, py_engine);
108+
if (engine == NULL) {
109+
goto fail;
110+
}
111+
engine = wasmtime_engine_clone(engine);
112+
if (engine == NULL) {
113+
goto fail;
114+
}
115+
116+
wasm_store = ts_wasm_store_new(engine, &error);
117+
if (wasm_store == NULL) {
118+
PyErr_Format(PyExc_RuntimeError, "Failed to create TSWasmStore: %s", error.message);
119+
goto fail;
120+
}
121+
122+
language = (TSLanguage *)ts_wasm_store_load_language(wasm_store, name, wasm, wasm_length, &error);
123+
if (language == NULL) {
124+
PyErr_Format(PyExc_RuntimeError, "Failed to load language: %s", error.message);
125+
goto fail;
126+
}
127+
128+
self = (Language *)cls->tp_alloc(cls, 0);
129+
if (self == NULL) {
130+
goto fail;
131+
}
132+
133+
self->language = language;
134+
self->wasm_engine = engine;
135+
self->version = ts_language_version(self->language);
136+
#if HAS_LANGUAGE_NAMES
137+
self->name = ts_language_name(self->language);
138+
#endif
139+
return (PyObject *)self;
140+
141+
fail:
142+
if (engine != NULL) {
143+
wasm_engine_delete(engine);
144+
}
145+
ts_language_delete(language);
146+
return NULL;
147+
}
148+
37149
PyObject *language_repr(Language *self) {
38150
#if HAS_LANGUAGE_NAMES
39151
if (self->name == NULL) {
@@ -82,6 +194,10 @@ PyObject *language_get_field_count(Language *self, void *Py_UNUSED(payload)) {
82194
return PyLong_FromUnsignedLong(ts_language_field_count(self->language));
83195
}
84196

197+
PyObject *language_is_wasm(Language *self, void *Py_UNUSED(payload)) {
198+
return PyBool_FromLong(ts_language_is_wasm(self->language));
199+
}
200+
85201
PyObject *language_node_kind_for_id(Language *self, PyObject *args) {
86202
TSSymbol symbol;
87203
if (!PyArg_ParseTuple(args, "H:node_kind_for_id", &symbol)) {
@@ -190,6 +306,9 @@ PyObject *language_query(Language *self, PyObject *args) {
190306
return PyObject_CallFunction((PyObject *)state->query_type, "Os#", self, source, length);
191307
}
192308

309+
PyDoc_STRVAR(language_from_wasm_doc,
310+
"from_wasm(self, name, engine, wasm, /)\n--\n\n"
311+
"Load a language compiled as wasm.");
193312
PyDoc_STRVAR(language_node_kind_for_id_doc,
194313
"node_kind_for_id(self, id, /)\n--\n\n"
195314
"Get the name of the node kind for the given numerical id.");
@@ -220,6 +339,12 @@ PyDoc_STRVAR(
220339
"Create a new :class:`Query` from a string containing one or more S-expression patterns.");
221340

222341
static PyMethodDef language_methods[] = {
342+
{
343+
.ml_name = "from_wasm",
344+
.ml_meth = (PyCFunction)language_from_wasm,
345+
.ml_flags = METH_CLASS | METH_VARARGS,
346+
.ml_doc = language_from_wasm_doc,
347+
},
223348
{
224349
.ml_name = "node_kind_for_id",
225350
.ml_meth = (PyCFunction)language_node_kind_for_id,
@@ -291,6 +416,8 @@ static PyGetSetDef language_accessors[] = {
291416
PyDoc_STR("The number of valid states in this language."), NULL},
292417
{"field_count", (getter)language_get_field_count, NULL,
293418
PyDoc_STR("The number of distinct field names in this language."), NULL},
419+
{"is_wasm", (getter)language_is_wasm, NULL,
420+
PyDoc_STR("Check if the language came from a wasm module."), NULL},
294421
{NULL},
295422
};
296423

tree_sitter/binding/module.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <wasm.h>
12
#include "types.h"
23

34
extern PyType_Spec language_type_spec;
@@ -15,6 +16,8 @@ extern PyType_Spec range_type_spec;
1516
extern PyType_Spec tree_cursor_type_spec;
1617
extern PyType_Spec tree_type_spec;
1718

19+
void tsp_load_wasmtime_symbols();
20+
1821
// TODO(0.24): drop Python 3.9 support
1922
#if PY_MINOR_VERSION > 9
2023
#define AddObjectRef PyModule_AddObjectRef
@@ -62,6 +65,9 @@ static void module_free(void *self) {
6265
Py_XDECREF(state->tree_type);
6366
Py_XDECREF(state->query_error);
6467
Py_XDECREF(state->re_compile);
68+
Py_XDECREF(state->wasmtime_engine_type);
69+
Py_XDECREF(state->ctypes_cast);
70+
Py_XDECREF(state->c_void_p);
6571
}
6672

6773
static struct PyModuleDef module_definition = {
@@ -147,6 +153,35 @@ PyMODINIT_FUNC PyInit__binding(void) {
147153
if (namedtuple == NULL) {
148154
goto cleanup;
149155
}
156+
157+
PyObject *wasmtime_engine = import_attribute("wasmtime", "Engine");
158+
if (wasmtime_engine == NULL) {
159+
// No worries, disable functionality.
160+
PyErr_Clear();
161+
} else {
162+
// Ensure wasmtime_engine is a PyTypeObject
163+
if (!PyType_Check(wasmtime_engine)) {
164+
PyErr_SetString(PyExc_TypeError, "wasmtime.Engine is not a type");
165+
goto cleanup;
166+
}
167+
state->wasmtime_engine_type = (PyTypeObject *)wasmtime_engine;
168+
169+
tsp_load_wasmtime_symbols();
170+
if (PyErr_Occurred()) {
171+
goto cleanup;
172+
}
173+
174+
state->ctypes_cast = import_attribute("ctypes", "cast");
175+
if (state->ctypes_cast == NULL) {
176+
goto cleanup;
177+
}
178+
179+
state->c_void_p = import_attribute("ctypes", "c_void_p");
180+
if (state->c_void_p == NULL) {
181+
goto cleanup;
182+
}
183+
}
184+
150185
PyObject *point_args = Py_BuildValue("s[ss]", "Point", "row", "column");
151186
PyObject *point_kwargs = PyDict_New();
152187
PyDict_SetItemString(point_kwargs, "module", PyUnicode_FromString("tree_sitter"));

0 commit comments

Comments
 (0)