diff --git a/src/htsparse.c b/src/htsparse.c index 46477a77..1bd2c423 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -386,7 +386,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // int emited_footer = 0; // emitted footer comment tag(s) count + int emited_footer_todo = 0; // Flag pour mise à jour différée + int skip_until_end_of_tag = 0; // Skip until the > of the end of tag ? + // int parent_relative = 0; // the parent is the base path (.js, .css..) @@ -661,6 +664,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* Meta ? */ if (check_tag(intag_start, "meta")) { int pos; + int please_skip_tag = 0; // if ((pos = rech_tageq_all(html, "http-equiv"))) { @@ -669,14 +673,54 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (len > 0) { if (strfield(token, "content-type")) { - intag_ctype = 1; - //NOPE-we do not convert the whole page actually - //intag_start[1] = 'X'; + if ((emited_footer > 0) || (emited_footer_todo > 0)) { + // Skip this tag that is redundant + please_skip_tag = 1; + } + else { + intag_ctype = 1; + //NOPE-we do not convert the whole page actually + //intag_start[1] = 'X'; + } } else if (strfield(token, "refresh")) { intag_ctype = 2; } } } + else if ((pos = rech_tageq_all(html, "charset"))) { + if ((emited_footer > 0) || (emited_footer_todo > 0)) { + // Skip this tag that is redundant + please_skip_tag = 1; + } + } + + if (please_skip_tag == 1) { + if (html - r->adr < r->size) { + /* Not on a starting tag yet */ + const char *adr_next = html + 1; + + while(*adr_next != '<' && (adr_next - r->adr) < r->size) { + adr_next++; + } + /* Jump to near end (index hack) */ + if (!adr_next || *adr_next != '<') { + if (html - r->adr < r->size - 4 + && r->size > 4 + ) { + html = r->adr + r->size - 2; + } + } else { + html = adr_next; + } + } + lastsaved = html; + } + } + + if (check_tag(intag_start, "base")) { + // Base tag will be empty so don't write it + html += 5; + lastsaved = html; } if (opt->getmode & 1) { // sauver html @@ -685,15 +729,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { case 0: // We are looking for the first head so that we can declare the HTTP-headers charset early // Emit as soon as we see the first
, , or tag. - // FIXME: we currently emit the tag BEFORE the tag, actually, which is not clean - if ((p = strfield(html, "")) != 0 - || ((p = strfield(html, "")) != 0 + if ((p = strfield(html, "")) != 0 || ((p = strfield(html, "")) != 0 + || ((p = strfield(html, "urlmode == 0) { // URL absolue dans tous les cas