| sidebar_position | 6 |
|---|
XmlUtil provides a dedicated CleanDocument class for selectively removing specific tags or content from XML or HTML documents. This is useful for cleaning up documents before processing or display.
<?php
$document = new \ByJG\XmlUtil\CleanDocument($documentXmlOrHtml);
$document
->removeContentByTag('a', 'name')
->removeContentByProperty('src')
->stripTagsExcept(['img'])
->get();Removes all HTML/XML tags from the document.
$document = new \ByJG\XmlUtil\CleanDocument($html);
$plainText = $document->stripAllTags();Strips all HTML/XML tags except those specified in the array.
$document = new \ByJG\XmlUtil\CleanDocument($html);
$cleanHtml = $document->stripTagsExcept(['p', 'div', 'span'])->get();Removes content from any tag that contains the specified property.
$document = new \ByJG\XmlUtil\CleanDocument($html);
// Removes all tags containing the "style" property and their content
$cleanHtml = $document->removeContentByProperty('style')->get();Removes content from the specified tag, optionally filtering by a property.
$document = new \ByJG\XmlUtil\CleanDocument($html);
// Removes all <script> tags and their content
$cleanHtml = $document->removeContentByTag('script')->get();
// Removes all <a> tags that have an "onclick" property
$cleanHtml = $document->removeContentByTag('a', 'onclick')->get();Removes content from the specified tag that does NOT have the specified property.
$document = new \ByJG\XmlUtil\CleanDocument($html);
// Removes all <a> tags that don't have an "href" property
$cleanHtml = $document->removeContentByTagWithoutProperty('a', 'href')->get();Returns the cleaned document as a string.
$document = new \ByJG\XmlUtil\CleanDocument($html);
// Apply cleaning operations
$document->removeContentByTag('script');
// Get the final result
$cleanHtml = $document->get();