Just like I said I would, here your very own Json Object parser.
One word of warning, these kind of things can be more art then science so if your inputs vary from what was in your example, it could have issues. Given the small sample size (1 document) I make no guarantees on it's functionality outside that one example.
I would try to explain how this works, but I fear it would be lost on mere mortals.
Seriously, this was fun, enjoyed the challenge for once.
<?php
function parseJson($subject, $tokens)
{
$types = array_keys($tokens);
$patterns = [];
$lexer_stream = [];
$result = false;
foreach ($tokens as $k=>$v){
$patterns[] = "(?P<$k>$v)";
}
$pattern = "/".implode('|', $patterns)."/i";
if (preg_match_all($pattern, $subject, $matches, PREG_OFFSET_CAPTURE)) {
//print_r($matches);
foreach ($matches[0] as $key => $value) {
$match = [];
foreach ($types as $type) {
$match = $matches[$type][$key];
if (is_array($match) && $match[1] != -1) {
break;
}
}
$tok = [
'content' => $match[0],
'type' => $type,
'offset' => $match[1]
];
$lexer_stream[] = $tok;
}
$result = parseJsonTokens( $lexer_stream );
}
return $result;
}
function parseJsonTokens( array &$lexer_stream ){
$result = [];
next($lexer_stream); //advnace one
$mode = 'key'; //items start in key mode ( key => value )
$key = '';
$value = '';
while($current = current($lexer_stream)){
$content = $current['content'];
$type = $current['type'];
switch($type){
case 'T_WHITESPACE'://ignore whitespace
next($lexer_stream);
break;
case 'T_STRING':
//keys are always strings, but strings are not always keys
if( $mode == 'key')
$key .= $content;
else
$value .= $content;
next($lexer_stream); //consume a token
break;
case 'T_COLON':
$mode = 'value'; //change mode key :
next($lexer_stream);//consume a token
break;
case 'T_ENCAP_STRING':
$value .= trim(unicode_decode($content),'"'); //encapsulated strings are always content
next($lexer_stream);//consume a token
break;
case 'T_NULL':
$value = null; //encapsulated strings are always content
next($lexer_stream);//consume a token
break;
case 'T_COMMA': //comma ends an item
//store
$result[$key] = $value;
//reset
$mode = 'key'; //items start in key mode ( key => value )
$key = '';
$value = '';
next($lexer_stream);//consume a token
break;
case 'T_OPEN_BRACE': //start of a sub-block
$value = parseJsonTokens($lexer_stream); //recursive
break;
case 'T_CLOSE_BRACE': //start of a sub-block
//store
$result[$key] = $value;
next($lexer_stream);//consume a token
return $result;
break;
default:
print_r($current);
trigger_error("Unknown token $type value $content", E_USER_ERROR);
}
}
if( !$current ) return;
print_r($current);
trigger_error("Unclosed item $mode for $type value $content", E_USER_ERROR);
}
//@see https://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha
function replace_unicode_escape_sequence($match) {
return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
}
function unicode_decode($str) {
return preg_replace_callback('/\\u([0-9a-f]{4})/i', 'replace_unicode_escape_sequence', $str);
}
$str = '{
party:"bases",
number:"1",
id:"xx_3039366",
url:"systen01-ny.com",
target:"_self",
address:"Chu00e3o as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA",
coordinate:{
x:90.995262145996094,
y:-1.3394836426
},
contactDetails:{
id:"366",
phone:"xxxxxx",
mobile:"",
fax:"xxxx 777 235",
c2c:!0
},
parameters:"Flex Amu00e1vel Silva,hal,,EN_30336,S,786657,1,0,",
text:"Vila Nova de Loz Cu00f4a,os melhores vinhos, vu00e1rias. Produtor/exportador/comu00e9rcio",
website:null,
mail:"",
listing:"paid",
pCode:"64",
name:"xpto Amu00e1vel Costa",
logo:{src:"http://ny.test.gif",
altname:"xpto Amu00e1vel Costa"},
bookingUrl:"",
ipUrl:"",
ipLabel:"",
customerId:"7657",
addressId:"98760",
combined:null,
showReviews:!0
}';
$tokens = [
'T_OPEN_BRACE' => '{',
'T_CLOSE_BRACE' => '}',
'T_NULL' => 'null',
'T_ENCAP_STRING' => '".*?(?<!\\)"',
'T_COLON' => ':',
'T_COMMA' => ',',
'T_STRING' => '[-a-z0-9_.!]+',
'T_WHITESPACE' => '[
s ]+',
'T_UNKNOWN' => '.+?'
];
var_export( parseJson($str, $tokens) );
Outputs ( this is what everyone wants )
array (
'party' => 'bases',
'number' => '1',
'id' => 'xx_3039366',
'url' => 'systen01-ny.com',
'target' => '_self',
'address' => 'Ch?o as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA',
'coordinate' =>
array (
'x' => '90.995262145996094',
'y' => '-1.3394836426',
),
'contactDetails' =>
array (
'id' => '366',
'phone' => 'xxxxxx',
'mobile' => '',
'fax' => 'xxxx 777 235',
'c2c' => '!0',
),
'parameters' => 'Flex Amável Silva,hal,,EN_30336,S,786657,1,0,',
'text' => 'Vila Nova de Loz C?a,os melhores vinhos, várias. Produtor/exportador/comércio',
'website' => NULL,
'mail' => '',
'listing' => 'paid',
'pCode' => '64',
'name' => 'xpto Amável Costa',
'logo' =>
array (
'src' => 'http://ny.test.gif',
'altname' => 'xpto Amável Costa',
),
'bookingUrl' => '',
'ipUrl' => '',
'ipLabel' => '',
'customerId' => '7657',
'addressId' => '98760',
'combined' => NULL,
'showReviews' => '!0',
)
And you can even test it here ( because I am a nice guy )
http://sandbox.onlinephpfunctions.com/code/3c1dcafb59abbf19f7f3209724dbdd4a46546c57
I was able to fix the encoding issues u00e
etc with help of this SO post, so a shout out to them, because I hate character encoding.
http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-char
Man I just love a beautiful piece of code, just umm.
Cheers!