Skip to content

Commit cd2097d

Browse files
committed
* integrated abandoned stringEncode/encode class
1 parent d388b3b commit cd2097d

File tree

8 files changed

+133
-10
lines changed

8 files changed

+133
-10
lines changed

composer.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"ext-mbstring": "*",
2323
"ext-zlib": "*",
2424
"ext-curl": "*",
25-
"translate5/string-encoder": "dev-master",
25+
"ext-iconv": "*",
2626
"php-http/httplug": "^2.1",
2727
"guzzlehttp/guzzle": "^7.8",
2828
"guzzlehttp/psr7": "^2.7",

src/PHPHtmlParser/DTO/Tag/AttributeDTO.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
namespace PHPHtmlParser\DTO\Tag;
66

7-
use stringEncode\Encode;
8-
use stringEncode\Exception;
7+
use Exception;
8+
use PHPHtmlParser\Encode;
99

1010
final class AttributeDTO
1111
{

src/PHPHtmlParser/Dom/Node/AbstractNode.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
1313
use PHPHtmlParser\Finder;
1414
use PHPHtmlParser\Selector\Selector;
15-
use stringEncode\Encode;
15+
use PHPHtmlParser\Encode;
1616

1717
/**
1818
* Dom node object.

src/PHPHtmlParser/Dom/Node/InnerNode.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use PHPHtmlParser\Exceptions\ChildNotFoundException;
99
use PHPHtmlParser\Exceptions\CircularException;
1010
use PHPHtmlParser\Exceptions\LogicalException;
11-
use stringEncode\Encode;
11+
use PHPHtmlParser\Encode;
1212

1313
/**
1414
* Inner node of the html tree, might have children.

src/PHPHtmlParser/Dom/Parser.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
use PHPHtmlParser\Exceptions\LogicalException;
1818
use PHPHtmlParser\Exceptions\StrictException;
1919
use PHPHtmlParser\Options;
20-
use stringEncode\Encode;
20+
use PHPHtmlParser\Encode;
2121

2222
class Parser implements ParserInterface
2323
{

src/PHPHtmlParser/Dom/Tag.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
use PHPHtmlParser\DTO\Tag\AttributeDTO;
88
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
9-
use stringEncode\Encode;
9+
use PHPHtmlParser\Encode;
1010

1111
/**
1212
* Class Tag.
@@ -263,7 +263,7 @@ public function setAttributes(array $attr)
263263
/**
264264
* Returns all attributes of this tag.
265265
*
266-
* @throws \stringEncode\Exception
266+
* @throws \Exception
267267
*
268268
* @return AttributeDTO[]
269269
*/
@@ -286,7 +286,7 @@ public function getAttributes(): array
286286
* Returns an attribute by the key.
287287
*
288288
* @throws AttributeNotFoundException
289-
* @throws \stringEncode\Exception
289+
* @throws \Exception
290290
*/
291291
public function getAttribute(string $key): AttributeDTO
292292
{

src/PHPHtmlParser/Encode.php

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<?php
2+
namespace PHPHtmlParser;
3+
4+
use Exception;
5+
6+
class Encode {
7+
8+
/**
9+
* The encoding that the string is currently in.
10+
*
11+
* @var string
12+
*/
13+
protected $from;
14+
15+
/**
16+
* The encoding that we would like the string to be in.
17+
*
18+
* @var string
19+
*/
20+
protected $to;
21+
22+
/**
23+
* Sets the default charsets for thie package.
24+
*/
25+
public function __construct()
26+
{
27+
// default from encoding
28+
$this->from = 'CP1252';
29+
30+
// default to encoding
31+
$this->to = 'UTF-8';
32+
}
33+
34+
/**
35+
* Sets the charset that we will be converting to.
36+
*
37+
* @param string $charset
38+
* @chainable
39+
*/
40+
public function to($charset)
41+
{
42+
$this->to = strtoupper($charset);
43+
return $this;
44+
}
45+
46+
/**
47+
* Sets the charset that we will be converting from.
48+
*
49+
* @param string $charset
50+
* @chainable
51+
*/
52+
public function from($charset)
53+
{
54+
$this->from = strtoupper($charset);
55+
}
56+
57+
/**
58+
* Returns the to and from charset that we will be using.
59+
*
60+
* @return array
61+
*/
62+
public function charset()
63+
{
64+
return [
65+
'from' => $this->from,
66+
'to' => $this->to,
67+
];
68+
}
69+
70+
/**
71+
* Attempts to detect the encoding of the given string from the encodingList.
72+
*
73+
* @param string $str
74+
* @param array $encodingList
75+
* @return bool
76+
*/
77+
public function detect($str, $encodingList = ['UTF-8', 'CP1252'])
78+
{
79+
$charset = mb_detect_encoding($str, $encodingList);
80+
if ($charset === false)
81+
{
82+
// could not detect charset
83+
return false;
84+
}
85+
86+
$this->from = $charset;
87+
return true;
88+
}
89+
90+
/**
91+
* Attempts to convert the string to the proper charset.
92+
*
93+
* @return string
94+
*/
95+
public function convert($str)
96+
{
97+
if ($this->from != $this->to)
98+
{
99+
$str = iconv($this->from, $this->to, $str);
100+
}
101+
102+
if ($str === false)
103+
{
104+
// the convertion was a failure
105+
throw new Exception('The convertion from "'.$this->from.'" to "'.$this->to.'" was a failure.');
106+
}
107+
108+
// deal with BOM issue for utf-8 text
109+
if ($this->to == 'UTF-8')
110+
{
111+
if (substr($str, 0, 3) == "\xef\xbb\xbf")
112+
{
113+
$str = substr($str, 3);
114+
}
115+
if (substr($str, -3, 3) == "\xef\xbb\xbf")
116+
{
117+
$str = substr($str, 0, -3);
118+
}
119+
}
120+
121+
return $str;
122+
}
123+
}

tests/Node/TextTest.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use PHPHtmlParser\Dom\Node\TextNode;
77
use PHPHtmlParser\Options;
88
use PHPUnit\Framework\TestCase;
9-
use stringEncode\Encode;
9+
use PHPHtmlParser\Encode;
1010

1111
class NodeTextTest extends TestCase
1212
{

0 commit comments

Comments
 (0)