Skip to content

Commit 88da914

Browse files
committed
Implement CSS selectors
1 parent 7defc23 commit 88da914

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+4073
-16
lines changed

UPGRADING.INTERNALS

+1
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ PHP 8.4 INTERNALS UPGRADE NOTES
247247
- Removed the "properties" HashTable field from php_libxml_node_object.
248248
- Added a way to attached private data to a php_libxml_ref_obj.
249249
- Added a way to fix a class type onto php_libxml_ref_obj.
250+
- Added a way to record quirks mode in php_libxml_ref_obj.
250251
- Added php_libxml_uses_internal_errors().
251252
- Added a way to override document handlers (e.g. serialization) with
252253
php_libxml_document_handlers.

codecov.yml

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
ignore:
2-
- "ext/dom/lexbor/lexbor" # bundled library
3-
- "ext/pcre/pcre2lib" # bundled library
2+
# bundled libraries
3+
- "ext/dom/lexbor/lexbor/core"
4+
- "ext/dom/lexbor/lexbor/css"
5+
- "ext/dom/lexbor/lexbor/dom"
6+
- "ext/dom/lexbor/lexbor/encoding"
7+
- "ext/dom/lexbor/lexbor/html"
8+
- "ext/dom/lexbor/lexbor/ns"
9+
- "ext/dom/lexbor/lexbor/ports"
10+
- "ext/dom/lexbor/lexbor/tag"
11+
- "ext/pcre/pcre2lib"

ext/dom/config.m4

+9-3
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ if test "$PHP_DOM" != "no"; then
2222
$LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \
2323
$LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \
2424
$LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \
25-
$LEXBOR_DIR/selectors/selectors.c \
25+
$LEXBOR_DIR/css/state.c $LEXBOR_DIR/css/log.c $LEXBOR_DIR/css/parser.c $LEXBOR_DIR/css/selectors/state.c $LEXBOR_DIR/css/selectors/selectors.c $LEXBOR_DIR/css/selectors/selector.c $LEXBOR_DIR/css/selectors/pseudo_state.c $LEXBOR_DIR/css/selectors/pseudo.c $LEXBOR_DIR/css/syntax/tokenizer/error.c $LEXBOR_DIR/css/syntax/state.c $LEXBOR_DIR/css/syntax/parser.c $LEXBOR_DIR/css/syntax/syntax.c $LEXBOR_DIR/css/syntax/anb.c $LEXBOR_DIR/css/syntax/tokenizer.c $LEXBOR_DIR/css/syntax/token.c $LEXBOR_DIR/css/css.c \
26+
$LEXBOR_DIR/selectors-adapted/selectors.c \
2627
$LEXBOR_DIR/ns/ns.c \
2728
$LEXBOR_DIR/tag/tag.c"
2829
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
2930
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
30-
domexception.c parentnode.c \
31+
domexception.c \
32+
parentnode/tree.c parentnode/css_selectors.c \
3133
processinginstruction.c cdatasection.c \
3234
documentfragment.c domimplementation.c \
3335
element.c node.c characterdata.c \
@@ -38,14 +40,18 @@ if test "$PHP_DOM" != "no"; then
3840
namednodemap.c xpath_callbacks.c \
3941
$LEXBOR_SOURCES],
4042
$ext_shared,,$PHP_LEXBOR_CFLAGS)
43+
PHP_ADD_BUILD_DIR($ext_builddir/parentnode)
4144
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ports/posix/lexbor/core)
4245
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/core)
4346
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/dom/interfaces)
4447
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/tree/insertion_mode)
4548
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/tokenizer)
4649
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/interfaces)
4750
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/encoding)
48-
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors)
51+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/selectors)
52+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/tokenizer)
53+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/syntax/tokenizer)
54+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors-adapted)
4955
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ns)
5056
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/tag)
5157
PHP_SUBST(DOM_SHARED_LIBADD)

ext/dom/config.w32

+7-2
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ if (PHP_DOM == "yes") {
99
) {
1010
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
1111
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
12-
domexception.c parentnode.c processinginstruction.c \
12+
domexception.c processinginstruction.c \
1313
cdatasection.c documentfragment.c domimplementation.c element.c \
1414
node.c characterdata.c documenttype.c \
1515
entity.c nodelist.c html_collection.c text.c comment.c \
1616
entityreference.c \
1717
notation.c xpath.c dom_iterators.c \
1818
namednodemap.c xpath_callbacks.c", null, "-Iext/dom/lexbor");
1919

20+
ADD_SOURCES("ext/dom/parentnode", "tree.c css_selectors.c", "dom");
2021
ADD_SOURCES("ext/dom/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "dom");
2122
ADD_SOURCES("ext/dom/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "dom");
2223
ADD_SOURCES("ext/dom/lexbor/lexbor/dom", "interface.c", "dom");
@@ -27,7 +28,11 @@ if (PHP_DOM == "yes") {
2728
ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom");
2829
ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom");
2930
ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom");
30-
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors", "selectors.c", "dom");
31+
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors-adapted", "selectors.c", "dom");
32+
ADD_SOURCES("ext/dom/lexbor/lexbor/css", "state.c log.c parser.c css.c", "dom");
33+
ADD_SOURCES("ext/dom/lexbor/lexbor/css/selectors", "state.c selectors.c selector.c pseudo_state.c pseudo.c", "dom");
34+
ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax", "state.c parser.c syntax.c anb.c tokenizer.c token.c", "dom");
35+
ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax/tokenizer", "error.c", "dom");
3136
ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom");
3237
ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom");
3338
ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC ");

ext/dom/domexception.h

+3
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,7 @@ typedef enum {
4646
VALIDATION_ERR = 16
4747
} dom_exception_code;
4848

49+
void php_dom_throw_error(dom_exception_code error_code, bool strict_error);
50+
void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error);
51+
4952
#endif /* DOM_EXCEPTION_H */

ext/dom/element.c

+62
Original file line numberDiff line numberDiff line change
@@ -1752,4 +1752,66 @@ PHP_METHOD(DOMElement, toggleAttribute)
17521752
}
17531753
/* }}} end DOMElement::prepend */
17541754

1755+
static void php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAMETERS, bool all)
1756+
{
1757+
zend_string *selectors_str;
1758+
1759+
ZEND_PARSE_PARAMETERS_START(1, 1)
1760+
Z_PARAM_STR(selectors_str)
1761+
ZEND_PARSE_PARAMETERS_END();
1762+
1763+
xmlNodePtr thisp;
1764+
dom_object *intern;
1765+
zval *id;
1766+
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);
1767+
1768+
if (all) {
1769+
dom_parent_node_query_selector_all(thisp, intern, return_value, selectors_str);
1770+
} else {
1771+
dom_parent_node_query_selector(thisp, intern, return_value, selectors_str);
1772+
}
1773+
}
1774+
1775+
PHP_METHOD(Dom_Element, querySelector)
1776+
{
1777+
php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1778+
}
1779+
1780+
PHP_METHOD(Dom_Element, querySelectorAll)
1781+
{
1782+
php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1783+
}
1784+
1785+
PHP_METHOD(Dom_Element, matches)
1786+
{
1787+
zend_string *selectors_str;
1788+
1789+
ZEND_PARSE_PARAMETERS_START(1, 1)
1790+
Z_PARAM_STR(selectors_str)
1791+
ZEND_PARSE_PARAMETERS_END();
1792+
1793+
xmlNodePtr thisp;
1794+
dom_object *intern;
1795+
zval *id;
1796+
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);
1797+
1798+
dom_element_matches(thisp, intern, return_value, selectors_str);
1799+
}
1800+
1801+
PHP_METHOD(Dom_Element, closest)
1802+
{
1803+
zend_string *selectors_str;
1804+
1805+
ZEND_PARSE_PARAMETERS_START(1, 1)
1806+
Z_PARAM_STR(selectors_str)
1807+
ZEND_PARSE_PARAMETERS_END();
1808+
1809+
xmlNodePtr thisp;
1810+
dom_object *intern;
1811+
zval *id;
1812+
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);
1813+
1814+
dom_element_closest(thisp, intern, return_value, selectors_str);
1815+
}
1816+
17551817
#endif

ext/dom/html5_parser.c

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "html5_parser.h"
2525
#include <lexbor/html/parser.h>
2626
#include <lexbor/html/interfaces/element.h>
27+
#include <lexbor/dom/dom.h>
2728
#include <libxml/parserInternals.h>
2829
#include <libxml/HTMLtree.h>
2930
#include <Zend/zend.h>
@@ -380,6 +381,7 @@ void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxm
380381
observations->has_explicit_html_tag = tree->has_explicit_html_tag;
381382
observations->has_explicit_head_tag = tree->has_explicit_head_tag;
382383
observations->has_explicit_body_tag = tree->has_explicit_body_tag;
384+
observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS;
383385
}
384386

385387
#endif /* HAVE_LIBXML && HAVE_DOM */

ext/dom/html5_parser.h

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations {
4747
bool has_explicit_html_tag;
4848
bool has_explicit_head_tag;
4949
bool has_explicit_body_tag;
50+
bool quirks_mode;
5051
} lexbor_libxml2_bridge_extracted_observations;
5152

5253
typedef struct _lexbor_libxml2_bridge_parse_context {

ext/dom/html_document.c

+2
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
917917
NULL
918918
);
919919
dom_set_xml_class(intern->document);
920+
intern->document->quirks_mode = ctx.observations.quirks_mode;
920921
intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
921922
return;
922923

@@ -1137,6 +1138,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
11371138
NULL
11381139
);
11391140
dom_set_xml_class(intern->document);
1141+
intern->document->quirks_mode = ctx.observations.quirks_mode;
11401142
intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
11411143
return;
11421144

0 commit comments

Comments
 (0)