You should be able to just use strtr with an associative array of characters to convert (the data is available from MSDN, and converted into a PHP array below). Note that in this code, reserved byte values are replaced with the U+FFFD replacement character ("xefxbfxbd"
).
function win1255ToUtf8($str) {
static $tbl = null;
if (!$tbl) {
$tbl = array_combine(range("x80", "xff"), array(
"xe2x82xac", "xefxbfxbd", "xe2x80x9a", "xc6x92",
"xe2x80x9e", "xe2x80xa6", "xe2x80xa0", "xe2x80xa1",
"xcbx86", "xe2x80xb0", "xefxbfxbd", "xe2x80xb9",
"xefxbfxbd", "xefxbfxbd", "xefxbfxbd", "xefxbfxbd",
"xefxbfxbd", "xe2x80x98", "xe2x80x99", "xe2x80x9c",
"xe2x80x9d", "xe2x80xa2", "xe2x80x93", "xe2x80x94",
"xcbx9c", "xe2x84xa2", "xefxbfxbd", "xe2x80xba",
"xefxbfxbd", "xefxbfxbd", "xefxbfxbd", "xefxbfxbd",
"xc2xa0", "xc2xa1", "xc2xa2", "xc2xa3", "xe2x82xaa",
"xc2xa5", "xc2xa6", "xc2xa7", "xc2xa8", "xc2xa9",
"xc3x97", "xc2xab", "xc2xac", "xc2xad", "xc2xae",
"xc2xaf", "xc2xb0", "xc2xb1", "xc2xb2", "xc2xb3",
"xc2xb4", "xc2xb5", "xc2xb6", "xc2xb7", "xc2xb8",
"xc2xb9", "xc3xb7", "xc2xbb", "xc2xbc", "xc2xbd",
"xc2xbe", "xc2xbf", "xd6xb0", "xd6xb1", "xd6xb2",
"xd6xb3", "xd6xb4", "xd6xb5", "xd6xb6", "xd6xb7",
"xd6xb8", "xd6xb9", "xefxbfxbd", "xd6xbb", "xd6xbc",
"xd6xbd", "xd6xbe", "xd6xbf", "xd7x80", "xd7x81",
"xd7x82", "xd7x83", "xd7xb0", "xd7xb1", "xd7xb2",
"xd7xb3", "xd7xb4", "xefxbfxbd", "xefxbfxbd",
"xefxbfxbd", "xefxbfxbd", "xefxbfxbd", "xefxbfxbd",
"xefxbfxbd", "xd7x90", "xd7x91", "xd7x92", "xd7x93",
"xd7x94", "xd7x95", "xd7x96", "xd7x97", "xd7x98",
"xd7x99", "xd7x9a", "xd7x9b", "xd7x9c", "xd7x9d",
"xd7x9e", "xd7x9f", "xd7xa0", "xd7xa1", "xd7xa2",
"xd7xa3", "xd7xa4", "xd7xa5", "xd7xa6", "xd7xa7",
"xd7xa8", "xd7xa9", "xd7xaa", "xefxbfxbd", "xefxbfxbd",
"xe2x80x8e", "xe2x80x8f", "xefxbfxbd",
));
}
return strtr($str, $tbl);
}
I generated the above code with this PHP script:
function win1255ToUtf8($str) {
static $tbl = null;
if (!$tbl) {
$tbl = array_combine(range("x80", "xff"), array(
<?php
function encodeString($str) {
return '"' . preg_replace('/../', 'x$0', bin2hex($str)) . '"';
}
function codepointToUtf8($n) {
return mb_convert_encoding(pack('V', $n), 'UTF-8', 'UTF-32LE');
}
$text = strip_tags( file_get_contents( 'http://msdn.microsoft.com/en-us/goglobal/cc305148.aspx') );
preg_match_all('/([0-9A-F]{2}) = U+([0-9A-F]{4})/', $text, $matches, PREG_SET_ORDER);
$table = array_fill(0, 128, "xefxbfxbd");
foreach ($matches as $match) {
$input = hexdec($match[1]) - 128;
if ($input >= 0) {
$table[$input] = codepointToUtf8(hexdec($match[2]));
}
}
$buf = '';
foreach ($table as $from => $to) {
$buf .= encodeString($to) . ', ';
}
echo wordwrap(substr($buf, 0, -1), 68, "
"), "
";
?>
));
}
return strtr($str, $tbl);
}