Okay, I rewrote the function to this but I can't upgrade these forums until I get home to test it live on here:
/**
* More robust version of PHP's trim() function. It includes a list of UTF-16 blank characters
* from http://kb.mozillazine.org/Network.IDN.blacklist_chars
*
* @param string The string to trim from
* @param string Optional. The stripped characters can also be specified using the charlist parameter
* @return string The trimmed string
*/
function trim_blank_chrs($string, $charlist=false)
{
$hex_chrs = array(
0x20 => 1,
0x09 => 1,
0x0A => 1,
0x0D => 1,
0x0B => 1,
0xCC => array(0xB7 => 1, 0xB8 => 1), // \x{0337} or \x{0338}
0xE1 => array(0x85 => array(0x9F => 1, 0xA0 => 1)), // \x{115F} or \x{1160}
0xE2 => array(0x80 => array(0x80 => 1, 0x81 => 1, 0x82 => 1, 0x83 => 1, 0x84 => 1, 0x85 => 1, 0x86 => 1, 0x87 => 1, 0x88 => 1, 0x89 => 1, 0x8A => 1, 0x8B => 1, // \x{2000} to \x{200B}
0xA8 => 1, 0xA9 => 1, 0xAA => 1, 0xAB => 1, 0xAC => 1, 0xAD => 1, 0xAE => 1, 0xAF => 1), // \x{2028} to \x{202F}
0x81 => array(0x9F => 1)), // \x{205F}
0xE3 => array(0x80 => array(0x80 => 1), // \x{3000}
0x85 => array(0xA4 => 1)), // \x{3164}
0xEF => array(0xBB => array(0xBF => 1), // \x{FEFF}
0xBE => array(0xA0 => 1), // \x{FFA0}
0xBF => array(0xB9 => 1, 0xBA => 1, 0xBB => 1)), // \x{FFF9} to \x{FFFB}
);
$hex_chrs_rev = array(
0x20 => 1,
0x09 => 1,
0x0A => 1,
0x0D => 1,
0x0B => 1,
0xB8 => array(0xCC => 1), // \x{0338}
0xB7 => array(0xCC => 1), // \x{0337}
0xA0 => array(0x85 => array(0xE1 => 1)), // \x{1160}
0x9F => array(0x85 => array(0xE1 => 1), // \x{115F}
0x81 => array(0xE2 => 1)), // \x{205F}
0x80 => array(0x80 => array(0xE3 => 1, 0xE2 => 1)), // \x{3000}, \x{2000}
0x81 => array(0x80 => array(0xE2 => 1)), // \x{2001}
0x82 => array(0x80 => array(0xE2 => 1)), // \x{2002}
0x83 => array(0x80 => array(0xE2 => 1)), // \x{2003}
0x84 => array(0x80 => array(0xE2 => 1)), // \x{2004}
0x85 => array(0x80 => array(0xE2 => 1)), // \x{2005}
0x86 => array(0x80 => array(0xE2 => 1)), // \x{2006}
0x87 => array(0x80 => array(0xE2 => 1)), // \x{2007}
0x88 => array(0x80 => array(0xE2 => 1)), // \x{2008}
0x89 => array(0x80 => array(0xE2 => 1)), // \x{2009}
0x8A => array(0x80 => array(0xE2 => 1)), // \x{200A}
0x8B => array(0x80 => array(0xE2 => 1)), // \x{200B}
0xA8 => array(0x80 => array(0xE2 => 1)), // \x{2028}
0xA9 => array(0x80 => array(0xE2 => 1)), // \x{2029}
0xAA => array(0x80 => array(0xE2 => 1)), // \x{202A}
0xAB => array(0x80 => array(0xE2 => 1)), // \x{202B}
0xAC => array(0x80 => array(0xE2 => 1)), // \x{202C}
0xAD => array(0x80 => array(0xE2 => 1)), // \x{202D}
0xAE => array(0x80 => array(0xE2 => 1)), // \x{202E}
0xAF => array(0x80 => array(0xE2 => 1)), // \x{202F}
0xA4 => array(0x85 => array(0xE3 => 1)), // \x{3164}
0xBF => array(0xBB => array(0xEF => 1)), // \x{FEFF}
0xA0 => array(0xBE => array(0xEF => 1)), // \x{FFA0}
0xB9 => array(0xBF => array(0xEF => 1)), // \x{FFF9}
0xBA => array(0xBF => array(0xEF => 1)), // \x{FFFA}
0xBB => array(0xBF => array(0xEF => 1)), // \x{FFFB}
);
// Start from the beginning and work our way in
do
{
// Check to see if we have matched a first character in our utf-16 array
$offset = match_sequence($string, $hex_chrs);
if(!$offset)
{
// If not, then we must have a "good" character and we don't need to do anymore processing
break;
}
$string = substr($string, $offset);
}
while(++$i);
// Start from the end and work our way in
$string = strrev($string);
do
{
// Check to see if we have matched a first character in our utf-16 array
$offset = match_sequence($string, $hex_chrs_rev);
if(!$offset)
{
// If not, then we must have a "good" character and we don't need to do anymore processing
break;
}
$string = substr($string, $offset);
}
while(++$i);
$string = strrev($string);
if($charlist !== false)
{
$string = trim($string, $charlist);
}
else
{
$string = trim($string);
}
return $string;
}
function match_sequence($string, $array, $i=0, $n=0)
{
if($string === "")
{
return 0;
}
$ord = ord($string[$i]);
if(array_key_exists($ord, $array))
{
$level = $array[$ord];
++$n;
if(is_array($level))
{
++$i;
return match_sequence($string, $level, $i, $n);
}
return $n;
}
return 0;
}