diff --git a/composer.json b/composer.json index 6a7455b9d..8e5924a6c 100644 --- a/composer.json +++ b/composer.json @@ -52,7 +52,6 @@ "rockettheme/toolbox": "~1.5", "maximebf/debugbar": "~1.16", "league/climate": "^3.6", - "antoligy/dom-string-iterators": "^1.0", "miljar/php-exif": "^0.6", "composer/ca-bundle": "^1.2", "dragonmantank/cron-expression": "^1.2", @@ -97,7 +96,9 @@ "Twig\\": "system/src/Twig" }, "files": [ - "system/defines.php" + "system/defines.php", + "system/src/DOMLettersIterator.php", + "system/src/DOMWordsIterator.php" ] }, "archive": { diff --git a/composer.lock b/composer.lock index 2bc68f592..2c4c4ea44 100644 --- a/composer.lock +++ b/composer.lock @@ -4,56 +4,8 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "209e1ceeafd2ef97fbf90bf4b1ec5dd4", + "content-hash": "1067938388862c52927c6450e8a36df0", "packages": [ - { - "name": "antoligy/dom-string-iterators", - "version": "v1.0.1", - "source": { - "type": "git", - "url": "https://github.com/antoligy/dom-string-iterators.git", - "reference": "fae88f66e1970d68c5585fc42db44f1217bf74e6" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/antoligy/dom-string-iterators/zipball/fae88f66e1970d68c5585fc42db44f1217bf74e6", - "reference": "fae88f66e1970d68c5585fc42db44f1217bf74e6", - "shasum": "" - }, - "require": { - "php": ">=5.3.0" - }, - "type": "library", - "autoload": { - "psr-4": { - "": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "CC0-1.0" - ], - "authors": [ - { - "name": "Alex Wilson", - "email": "a@ax.gy" - }, - { - "name": "Kornel Lesinski", - "email": "pornel@pornel.net" - }, - { - "name": "Patrick Galbraith", - "email": "patrick.j.galbraith@gmail.com" - } - ], - "description": "Composer package for DOMWordsIterator and DOMLettersIterator", - "support": { - "issues": "https://github.com/antoligy/dom-string-iterators/issues", - "source": "https://github.com/antoligy/dom-string-iterators/tree/v1.0.1" - }, - "time": "2018-02-03T16:01:11+00:00" - }, { "name": "composer/ca-bundle", "version": "1.3.1", diff --git a/system/src/DOMLettersIterator.php b/system/src/DOMLettersIterator.php new file mode 100644 index 000000000..3ebe20e4f --- /dev/null +++ b/system/src/DOMLettersIterator.php @@ -0,0 +1,165 @@ +load('example.xml'); + * foreach(new DOMLettersIterator($doc) as $letter) echo $letter; + * + * NB: If you only need characters without their position + * in the document, use DOMNode->textContent instead. + * + * @author porneL http://pornel.net + * @license Public Domain + * @url https://github.com/antoligy/dom-string-iterators + * + * @implements Iterator + */ +final class DOMLettersIterator implements Iterator +{ + /** @var DOMElement */ + private $start; + /** @var DOMElement|null */ + private $current; + /** @var int */ + private $offset = -1; + /** @var int|null */ + private $key; + /** @var array|null */ + private $letters; + + /** + * expects DOMElement or DOMDocument (see DOMDocument::load and DOMDocument::loadHTML) + * + * @param DOMNode $el + */ + public function __construct(DOMNode $el) + { + if ($el instanceof DOMDocument) { + $el = $el->documentElement; + } + + if (!$el instanceof DOMElement) { + throw new InvalidArgumentException('Invalid arguments, expected DOMElement or DOMDocument'); + } + + $this->start = $el; + } + + /** + * Returns position in text as DOMText node and character offset. + * (it's NOT a byte offset, you must use mb_substr() or similar to use this offset properly). + * node may be NULL if iterator has finished. + * + * @return array + */ + public function currentTextPosition(): array + { + return [$this->current, $this->offset]; + } + + /** + * Returns DOMElement that is currently being iterated or NULL if iterator has finished. + * + * @return DOMElement|null + */ + public function currentElement(): ?DOMElement + { + return $this->current ? $this->current->parentNode : null; + } + + // Implementation of Iterator interface + + /** + * @return int|null + */ + public function key(): ?int + { + return $this->key; + } + + /** + * @return void + */ + public function next(): void + { + if (null === $this->current) { + return; + } + + if ($this->current->nodeType === XML_TEXT_NODE || $this->current->nodeType === XML_CDATA_SECTION_NODE) { + if ($this->offset === -1) { + preg_match_all('/./us', $this->current->textContent, $m); + $this->letters = $m[0]; + } + + $this->offset++; + $this->key++; + if ($this->letters && $this->offset < count($this->letters)) { + return; + } + + $this->offset = -1; + } + + while ($this->current->nodeType === XML_ELEMENT_NODE && $this->current->firstChild) { + $this->current = $this->current->firstChild; + if ($this->current->nodeType === XML_TEXT_NODE || $this->current->nodeType === XML_CDATA_SECTION_NODE) { + $this->next(); + return; + } + } + + while (!$this->current->nextSibling && $this->current->parentNode) { + $this->current = $this->current->parentNode; + if ($this->current === $this->start) { + $this->current = null; + return; + } + } + + $this->current = $this->current->nextSibling; + + $this->next(); + } + + /** + * Return the current element + * @link https://php.net/manual/en/iterator.current.php + * + * @return string|null + */ + public function current(): ?string + { + return $this->letters ? $this->letters[$this->offset] : null; + } + + /** + * Checks if current position is valid + * @link https://php.net/manual/en/iterator.valid.php + * + * @return bool + */ + public function valid(): bool + { + return (bool)$this->current; + } + + /** + * @return void + */ + public function rewind(): void + { + $this->current = $this->start; + $this->offset = -1; + $this->key = -1; + $this->letters = []; + + $this->next(); + } +} + diff --git a/system/src/DOMWordsIterator.php b/system/src/DOMWordsIterator.php new file mode 100644 index 000000000..31ca90fe9 --- /dev/null +++ b/system/src/DOMWordsIterator.php @@ -0,0 +1,158 @@ +load('example.xml'); + * foreach(new DOMWordsIterator($doc) as $word) echo $word; + * + * @author pjgalbraith http://www.pjgalbraith.com + * @author porneL http://pornel.net (based on DOMLettersIterator available at http://pornel.net/source/domlettersiterator.php) + * @license Public Domain + * @url https://github.com/antoligy/dom-string-iterators + * + * @implements Iterator + */ + +final class DOMWordsIterator implements Iterator +{ + /** @var DOMElement */ + private $start; + /** @var DOMElement|null */ + private $current; + /** @var int */ + private $offset = -1; + /** @var int|null */ + private $key; + /** @var array|null */ + private $words; + + /** + * expects DOMElement or DOMDocument (see DOMDocument::load and DOMDocument::loadHTML) + * + * @param DOMNode $el + */ + public function __construct(DOMNode $el) + { + if ($el instanceof DOMDocument) { + $el = $el->documentElement; + } + + if (!$el instanceof DOMElement) { + throw new InvalidArgumentException('Invalid arguments, expected DOMElement or DOMDocument'); + } + + $this->start = $el; + } + + /** + * Returns position in text as DOMText node and character offset. + * (it's NOT a byte offset, you must use mb_substr() or similar to use this offset properly). + * node may be NULL if iterator has finished. + * + * @return array + */ + public function currentWordPosition(): array + { + return [$this->current, $this->offset, $this->words]; + } + + /** + * Returns DOMElement that is currently being iterated or NULL if iterator has finished. + * + * @return DOMElement|null + */ + public function currentElement(): ?DOMElement + { + return $this->current ? $this->current->parentNode : null; + } + + // Implementation of Iterator interface + + /** + * Return the key of the current element + * @link https://php.net/manual/en/iterator.key.php + * @return int|null + */ + public function key(): ?int + { + return $this->key; + } + + /** + * @return void + */ + public function next(): void + { + if (null === $this->current) { + return; + } + + if ($this->current->nodeType === XML_TEXT_NODE || $this->current->nodeType === XML_CDATA_SECTION_NODE) { + if ($this->offset === -1) { + $this->words = preg_split("/[\n\r\t ]+/u", $this->current->textContent, -1, PREG_SPLIT_NO_EMPTY) ?: []; + } + $this->offset++; + + if ($this->words && $this->offset < count($this->words)) { + $this->key++; + return; + } + $this->offset = -1; + } + + while ($this->current->nodeType === XML_ELEMENT_NODE && $this->current->firstChild) { + $this->current = $this->current->firstChild; + if ($this->current->nodeType === XML_TEXT_NODE || $this->current->nodeType === XML_CDATA_SECTION_NODE) { + $this->next(); + return; + } + } + + while (!$this->current->nextSibling && $this->current->parentNode) { + $this->current = $this->current->parentNode; + if ($this->current === $this->start) { + $this->current = null; + return; + } + } + + $this->current = $this->current->nextSibling; + + $this->next(); + } + + /** + * Return the current element + * @link https://php.net/manual/en/iterator.current.php + * @return string|null + */ + public function current(): ?string + { + return $this->words ? $this->words[$this->offset] : null; + } + + /** + * Checks if current position is valid + * @link https://php.net/manual/en/iterator.valid.php + * @return bool + */ + public function valid(): bool + { + return (bool)$this->current; + } + + public function rewind(): void + { + $this->current = $this->start; + $this->offset = -1; + $this->key = -1; + $this->words = []; + + $this->next(); + } +}