Skip to content

Commit 8255bed

Browse files
committed
yyjson: move decode error line tracking off the hot path
1 parent 5cdd424 commit 8255bed

4 files changed

Lines changed: 73 additions & 99 deletions

File tree

ext/json/json_parser.y

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ int json_yydebug = 1;
3939

4040
}
4141

42-
%locations
4342
%define api.prefix {php_json_yy}
4443
%define api.pure full
4544
%param { php_json_parser *parser }
@@ -64,8 +63,8 @@ int json_yydebug = 1;
6463
%destructor { zval_ptr_dtor_nogc(&$$); } <value>
6564

6665
%code {
67-
static int php_json_yylex(union YYSTYPE *value, YYLTYPE *location, php_json_parser *parser);
68-
static void php_json_yyerror(YYLTYPE *location, php_json_parser *parser, char const *msg);
66+
static int php_json_yylex(union YYSTYPE *value, php_json_parser *parser);
67+
static void php_json_yyerror(php_json_parser *parser, char const *msg);
6968
static int php_json_parser_array_create(php_json_parser *parser, zval *array);
7069
static int php_json_parser_object_create(php_json_parser *parser, zval *array);
7170

@@ -275,7 +274,7 @@ static int php_json_parser_object_update_validate(php_json_parser *parser, zval
275274
return SUCCESS;
276275
}
277276

278-
static int php_json_yylex(union YYSTYPE *value, YYLTYPE *location, php_json_parser *parser)
277+
static int php_json_yylex(union YYSTYPE *value, php_json_parser *parser)
279278
{
280279
int token = php_json_scan(&parser->scanner);
281280

@@ -291,15 +290,10 @@ static int php_json_yylex(union YYSTYPE *value, YYLTYPE *location, php_json_pars
291290
value->value = parser->scanner.value;
292291
}
293292

294-
location->first_column = PHP_JSON_SCANNER_LOCATION(parser->scanner, first_column);
295-
location->first_line = PHP_JSON_SCANNER_LOCATION(parser->scanner, first_line);
296-
location->last_column = PHP_JSON_SCANNER_LOCATION(parser->scanner, last_column);
297-
location->last_line = PHP_JSON_SCANNER_LOCATION(parser->scanner, last_line);
298-
299293
return token;
300294
}
301295

302-
static void php_json_yyerror(YYLTYPE *location, php_json_parser *parser, char const *msg)
296+
static void php_json_yyerror(php_json_parser *parser, char const *msg)
303297
{
304298
if (!parser->scanner.errcode) {
305299
parser->scanner.errcode = PHP_JSON_ERROR_SYNTAX;
@@ -311,11 +305,64 @@ PHP_JSON_API php_json_error_code php_json_parser_error_code(const php_json_parse
311305
return parser->scanner.errcode;
312306
}
313307

308+
static zend_always_inline bool php_json_is_hex(php_json_ctype c, php_json_ctype lo, php_json_ctype hi)
309+
{
310+
php_json_ctype l = c | 0x20; /* fold ASCII case */
311+
return l >= (lo | 0x20) && l <= (hi | 0x20);
312+
}
313+
314+
static size_t php_json_compute_error_column(const php_json_scanner *s)
315+
{
316+
const php_json_ctype *p = s->line_start;
317+
const php_json_ctype *end = s->token;
318+
/* Replay the scanner's per-token column rules from the line start to the
319+
* failing token, keeping the decode success path free of column bookkeeping. */
320+
size_t column = 1;
321+
bool in_string = false;
322+
323+
while (p < end) {
324+
php_json_ctype c = *p;
325+
if (!in_string) {
326+
if (c == '"') {
327+
in_string = true;
328+
}
329+
column++;
330+
p++;
331+
} else if (c == '"') {
332+
in_string = false;
333+
column++;
334+
p++;
335+
} else if (c == '\\') {
336+
if (p + 5 < end && (p[1] | 0x20) == 'u') {
337+
/* \uXXXX, possibly the high half of a surrogate pair */
338+
if (php_json_is_hex(p[2], 'd', 'd') && php_json_is_hex(p[3], '8', 'b')
339+
&& p + 11 < end && p[6] == '\\' && (p[7] | 0x20) == 'u'
340+
&& php_json_is_hex(p[8], 'd', 'd') && php_json_is_hex(p[9], 'c', 'f')) {
341+
p += 12;
342+
} else {
343+
p += 6;
344+
}
345+
column++;
346+
} else {
347+
column += 2;
348+
p += 2;
349+
}
350+
} else if ((c & 0xC0) == 0x80) {
351+
/* UTF-8 continuation byte: counted with its leading byte */
352+
p++;
353+
} else {
354+
column++;
355+
p++;
356+
}
357+
}
358+
return column;
359+
}
360+
314361
PHP_JSON_API void php_json_parser_error_details(const php_json_parser *parser, php_json_error_details *out)
315362
{
316363
out->code = parser->scanner.errcode;
317-
out->line = parser->scanner.errloc.first_line;
318-
out->column = parser->scanner.errloc.first_column;
364+
out->line = parser->scanner.line;
365+
out->column = php_json_compute_error_column(&parser->scanner);
319366
}
320367

321368
static const php_json_parser_methods default_parser_methods =

ext/json/json_scanner.re

Lines changed: 12 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1)
5252

5353
#define PHP_JSON_TOKEN_LENGTH() ((size_t) (s->cursor - s->token))
54-
#define PHP_JSON_TOKEN_LOCATION(location) (s)->errloc.location
5554

5655
static void php_json_scanner_copy_string(php_json_scanner *s, size_t esc_size)
5756
{
@@ -96,10 +95,8 @@ void php_json_scanner_init(php_json_scanner *s, const char *str, size_t str_len,
9695
s->cursor = (php_json_ctype *) str;
9796
s->limit = (php_json_ctype *) str + str_len;
9897
s->options = options;
99-
PHP_JSON_TOKEN_LOCATION(first_column) = 1;
100-
PHP_JSON_TOKEN_LOCATION(first_line) = 1;
101-
PHP_JSON_TOKEN_LOCATION(last_column) = 1;
102-
PHP_JSON_TOKEN_LOCATION(last_line) = 1;
98+
s->line = 1;
99+
s->line_start = (php_json_ctype *) str;
103100
PHP_JSON_CONDITION_SET(JS);
104101
}
105102

@@ -108,8 +105,6 @@ int php_json_scan(php_json_scanner *s)
108105
ZVAL_NULL(&s->value);
109106

110107
std:
111-
PHP_JSON_TOKEN_LOCATION(first_column) = s->errloc.last_column;
112-
PHP_JSON_TOKEN_LOCATION(first_line) = s->errloc.last_line;
113108
s->token = s->cursor;
114109

115110
/*!re2c
@@ -155,49 +150,27 @@ std:
155150
UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ;
156151
UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ;
157152
158-
<JS>"{" {
159-
PHP_JSON_TOKEN_LOCATION(last_column)++;
160-
return '{';
161-
}
162-
<JS>"}" {
163-
PHP_JSON_TOKEN_LOCATION(last_column)++;
164-
return '}';
165-
}
166-
<JS>"[" {
167-
PHP_JSON_TOKEN_LOCATION(last_column)++;
168-
return '[';
169-
}
170-
<JS>"]" {
171-
PHP_JSON_TOKEN_LOCATION(last_column)++;
172-
return ']';
173-
}
174-
<JS>":" {
175-
PHP_JSON_TOKEN_LOCATION(last_column)++;
176-
return ':';
177-
}
178-
<JS>"," {
179-
PHP_JSON_TOKEN_LOCATION(last_column)++;
180-
return ',';
181-
}
153+
<JS>"{" { return '{'; }
154+
<JS>"}" { return '}'; }
155+
<JS>"[" { return '['; }
156+
<JS>"]" { return ']'; }
157+
<JS>":" { return ':'; }
158+
<JS>"," { return ','; }
182159
<JS>"null" {
183-
PHP_JSON_TOKEN_LOCATION(last_column) += 4;
184160
ZVAL_NULL(&s->value);
185161
return PHP_JSON_T_NUL;
186162
}
187163
<JS>"true" {
188-
PHP_JSON_TOKEN_LOCATION(last_column) += 4;
189164
ZVAL_TRUE(&s->value);
190165
return PHP_JSON_T_TRUE;
191166
}
192167
<JS>"false" {
193-
PHP_JSON_TOKEN_LOCATION(last_column) += 5;
194168
ZVAL_FALSE(&s->value);
195169
return PHP_JSON_T_FALSE;
196170
}
197171
<JS>INT {
198172
bool bigint = 0, negative = s->token[0] == '-';
199173
size_t digits = PHP_JSON_TOKEN_LENGTH();
200-
PHP_JSON_TOKEN_LOCATION(last_column) += digits;
201174
digits -= negative;
202175
if (digits >= PHP_JSON_INT_MAX_LENGTH) {
203176
if (digits == PHP_JSON_INT_MAX_LENGTH) {
@@ -221,19 +194,15 @@ std:
221194
}
222195
}
223196
<JS>FLOAT|EXP {
224-
PHP_JSON_TOKEN_LOCATION(last_column) += PHP_JSON_TOKEN_LENGTH();
225197
ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL));
226198
return PHP_JSON_T_DOUBLE;
227199
}
228200
<JS>NL {
229-
PHP_JSON_TOKEN_LOCATION(last_line)++;
230-
PHP_JSON_TOKEN_LOCATION(last_column) = 1;
231-
goto std;
232-
}
233-
<JS>WS {
234-
PHP_JSON_TOKEN_LOCATION(last_column) += PHP_JSON_TOKEN_LENGTH();
201+
s->line++;
202+
s->line_start = s->cursor;
235203
goto std;
236204
}
205+
<JS>WS { goto std; }
237206
<JS>EOI {
238207
if (s->limit < s->cursor) {
239208
return PHP_JSON_T_EOI;
@@ -243,7 +212,6 @@ std:
243212
}
244213
}
245214
<JS>["] {
246-
PHP_JSON_TOKEN_LOCATION(last_column)++;
247215
s->str_start = s->cursor;
248216
s->str_esc = 0;
249217
s->utf8_invalid = 0;
@@ -268,22 +236,18 @@ std:
268236
return PHP_JSON_T_ERROR;
269237
}
270238
<STR_P1>UTF16_1 {
271-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
272239
s->str_esc += 5;
273240
PHP_JSON_CONDITION_GOTO(STR_P1);
274241
}
275242
<STR_P1>UTF16_2 {
276-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
277243
s->str_esc += 4;
278244
PHP_JSON_CONDITION_GOTO(STR_P1);
279245
}
280246
<STR_P1>UTF16_3 {
281-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
282247
s->str_esc += 3;
283248
PHP_JSON_CONDITION_GOTO(STR_P1);
284249
}
285250
<STR_P1>UTF16_4 {
286-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
287251
s->str_esc += 8;
288252
PHP_JSON_CONDITION_GOTO(STR_P1);
289253
}
@@ -292,7 +256,6 @@ std:
292256
return PHP_JSON_T_ERROR;
293257
}
294258
<STR_P1>ESC {
295-
PHP_JSON_TOKEN_LOCATION(last_column) += 2;
296259
s->str_esc++;
297260
PHP_JSON_CONDITION_GOTO(STR_P1);
298261
}
@@ -301,7 +264,6 @@ std:
301264
return PHP_JSON_T_ERROR;
302265
}
303266
<STR_P1>["] {
304-
PHP_JSON_TOKEN_LOCATION(last_column)++;
305267
zend_string *str;
306268
size_t len = (size_t)(s->cursor - s->str_start - s->str_esc - 1 + s->utf8_invalid_count);
307269
if (len == 0) {
@@ -322,22 +284,7 @@ std:
322284
return PHP_JSON_T_STRING;
323285
}
324286
}
325-
<STR_P1>UTF8_1 {
326-
PHP_JSON_TOKEN_LOCATION(last_column)++;
327-
PHP_JSON_CONDITION_GOTO(STR_P1);
328-
}
329-
<STR_P1>UTF8_2 {
330-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
331-
PHP_JSON_CONDITION_GOTO(STR_P1);
332-
}
333-
<STR_P1>UTF8_3 {
334-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
335-
PHP_JSON_CONDITION_GOTO(STR_P1);
336-
}
337-
<STR_P1>UTF8_4 {
338-
PHP_JSON_TOKEN_LOCATION(last_column) += 1;
339-
PHP_JSON_CONDITION_GOTO(STR_P1);
340-
}
287+
<STR_P1>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); }
341288
<STR_P1>ANY {
342289
if (s->options & (PHP_JSON_INVALID_UTF8_IGNORE | PHP_JSON_INVALID_UTF8_SUBSTITUTE)) {
343290
if (s->options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {

ext/json/php_json_parser.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,12 @@ typedef struct _php_json_parser_methods {
4848
php_json_parser_func_object_end_t object_end;
4949
} php_json_parser_methods;
5050

51-
typedef struct _php_json_parser_location {
52-
size_t first_line;
53-
size_t first_column;
54-
size_t last_line;
55-
size_t last_column;
56-
} php_json_parser_location;
57-
5851
struct _php_json_parser {
5952
php_json_scanner scanner;
6053
zval *return_value;
6154
int depth;
6255
int max_depth;
6356
php_json_parser_methods methods;
64-
php_json_parser_location *location;
6557
};
6658

6759
PHP_JSON_API void php_json_parser_init_ex(

ext/json/php_json_scanner.h

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,6 @@
2020

2121
typedef unsigned char php_json_ctype;
2222

23-
typedef struct _php_json_error_location {
24-
/** first column of the error */
25-
size_t first_column;
26-
/** first line of the error */
27-
size_t first_line;
28-
/** last column of the error */
29-
size_t last_column;
30-
/** last line of the error */
31-
size_t last_line;
32-
} php_json_error_location;
33-
3423
typedef struct _php_json_scanner {
3524
php_json_ctype *cursor; /* cursor position */
3625
php_json_ctype *token; /* token position */
@@ -39,18 +28,17 @@ typedef struct _php_json_scanner {
3928
php_json_ctype *ctxmarker; /* marker position for context backtracking */
4029
php_json_ctype *str_start; /* start position of the string */
4130
php_json_ctype *pstr; /* string pointer for escapes conversion */
31+
php_json_ctype *line_start; /* start position of the current line */
32+
size_t line; /* current line number (1-based) */
4233
zval value; /* value */
4334
int str_esc; /* number of extra characters for escaping */
4435
int state; /* condition state */
4536
int options; /* options */
4637
php_json_error_code errcode; /* error type if there is an error */
47-
php_json_error_location errloc; /* error location */
4838
int utf8_invalid; /* whether utf8 is invalid */
4939
int utf8_invalid_count; /* number of extra character for invalid utf8 */
5040
} php_json_scanner;
5141

52-
#define PHP_JSON_SCANNER_LOCATION(scanner, slocation) (scanner).errloc.slocation
53-
5442
void php_json_scanner_init(php_json_scanner *scanner, const char *str, size_t str_len, int options);
5543
int php_json_scan(php_json_scanner *s);
5644

0 commit comments

Comments
 (0)