/', $string, $matches, PREG_OFFSET_CAPTURE); $lastMatch = end($matches[0]); $result = substr($string, 0, $lastMatch[1] + strlen($lastMatch[0]) + 1); return static::sanitizeXML($result); } /** * Removes invalid characters from a UTF-8 XML string * * @param string a XML string potentially containing invalid characters * @return string * * @source https://www.ryadel.com/php-eliminare-caratteri-non-validi-file-stringa-xml-utf8-utf-8/ */ protected static function sanitizeXML($string) { if (!empty($string)) { $regex = '/( [\xC0-\xC1] # Invalid UTF-8 Bytes | [\xF5-\xFF] # Invalid UTF-8 Bytes | \xE0[\x80-\x9F] # Overlong encoding of prior code point | \xF0[\x80-\x8F] # Overlong encoding of prior code point | [\xC2-\xDF](?![\x80-\xBF]) # Invalid UTF-8 Sequence Start | [\xE0-\xEF](?![\x80-\xBF]{2}) # Invalid UTF-8 Sequence Start | [\xF0-\xF4](?![\x80-\xBF]{3}) # Invalid UTF-8 Sequence Start | (?<=[\x0-\x7F\xF5-\xFF])[\x80-\xBF] # Invalid UTF-8 Sequence Middle | (?= 0x20) && ($current <= 0xD7FF)) || (($current >= 0xE000) && ($current <= 0xFFFD)) || (($current >= 0x10000) && ($current <= 0x10FFFF))) { $result .= chr($current); } else { $ret; // use this to strip invalid character(s) // $ret .= " "; // use this to replace them with spaces } } $string = $result; } return $string; } }