| 732 | | |
| | 734 | |
| | 735 | /* |
| | 736 | * Copy a line break character to a string buffer and advance pointers. |
| | 737 | */ |
| | 738 | |
| | 739 | #define COPY_LINE(parser,string) \ |
| | 740 | ((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \ |
| | 741 | (*((string).pointer++) = (yaml_char_t) '\n', \ |
| | 742 | parser->pointer += 2, \ |
| | 743 | parser->index += 2, \ |
| | 744 | parser->column = 0, \ |
| | 745 | parser->line ++, \ |
| | 746 | parser->unread -= 2) : \ |
| | 747 | (CHECK_AT(parser,'\r',0) || CHECK_AT(parser,'\n',0)) ? /* CR|LF -> LF */ \ |
| | 748 | (*((string).pointer++) = (yaml_char_t) '\n', \ |
| | 749 | parser->pointer ++, \ |
| | 750 | parser->index ++, \ |
| | 751 | parser->column = 0, \ |
| | 752 | parser->line ++, \ |
| | 753 | parser->unread --) : \ |
| | 754 | (CHECK_AT(parser,'\xC2',0) && CHECK_AT(parser,'\x85',1)) ? /* NEL -> LF */ \ |
| | 755 | (*((string).pointer++) = (yaml_char_t) '\n', \ |
| | 756 | parser->pointer += 2, \ |
| | 757 | parser->index ++, \ |
| | 758 | parser->column = 0, \ |
| | 759 | parser->line ++, \ |
| | 760 | parser->unread --) : \ |
| | 761 | (CHECK_AT(parser,'\xE2',0) && \ |
| | 762 | CHECK_AT(parser,'\x80',1) && \ |
| | 763 | (CHECK_AT(parser,'\xA8',2) || \ |
| | 764 | CHECK_AT(parser,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ |
| | 765 | (*((string).pointer++) = *(parser->pointer++), \ |
| | 766 | *((string).pointer++) = *(parser->pointer++), \ |
| | 767 | *((string).pointer++) = *(parser->pointer++), \ |
| | 768 | parser->index ++, \ |
| | 769 | parser->column = 0, \ |
| | 770 | parser->line ++, \ |
| | 771 | parser->unread --) : 0) |
| | 772 | |
| | 773 | /* |
| | 774 | * Append a string to another string and clear the former string. |
| | 775 | */ |
| | 776 | |
| | 777 | #define JOIN(parser,head_string,tail_string) \ |
| | 778 | (yaml_parser_join_string(parser, &(head_string), &(tail_string)) && \ |
| | 779 | yaml_parser_clear_string(parser, &(tail_string))) |
| | 3242 | /* |
| | 3243 | * Scan a block scalar. |
| | 3244 | */ |
| | 3245 | |
| | 3246 | static yaml_token_t * |
| | 3247 | yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) |
| | 3248 | { |
| | 3249 | yaml_mark_t start_mark; |
| | 3250 | yaml_mark_t end_mark; |
| | 3251 | yaml_string_t string = yaml_parser_new_string(parser); |
| | 3252 | yaml_string_t line_break = yaml_parser_new_string(parser); |
| | 3253 | yaml_string_t breaks = yaml_parser_new_string(parser); |
| | 3254 | yaml_token_t *token = NULL; |
| | 3255 | int chomping = 0; |
| | 3256 | int increment = 0; |
| | 3257 | int indent = 0; |
| | 3258 | int leading_blank = 0; |
| | 3259 | int trailing_blank = 0; |
| | 3260 | |
| | 3261 | if (!string.buffer) goto error; |
| | 3262 | if (!line_break.buffer) goto error; |
| | 3263 | if (!breaks.buffer) goto error; |
| | 3264 | |
| | 3265 | /* Eat the indicator '|' or '>'. */ |
| | 3266 | |
| | 3267 | start_mark = yaml_parser_get_mark(parser); |
| | 3268 | |
| | 3269 | FORWARD(parser); |
| | 3270 | |
| | 3271 | /* Scan the additional block scalar indicators. */ |
| | 3272 | |
| | 3273 | if (!UPDATE(parser, 1)) goto error; |
| | 3274 | |
| | 3275 | /* Check for a chomping indicator. */ |
| | 3276 | |
| | 3277 | if (CHECK(parser, '+') || CHECK(parser, '-')) |
| | 3278 | { |
| | 3279 | /* Set the chomping method and eat the indicator. */ |
| | 3280 | |
| | 3281 | chomping = CHECK(parser, '+') ? +1 : -1; |
| | 3282 | |
| | 3283 | FORWARD(parser); |
| | 3284 | |
| | 3285 | /* Check for an indentation indicator. */ |
| | 3286 | |
| | 3287 | if (!UPDATE(parser, 1)) goto error; |
| | 3288 | |
| | 3289 | if (IS_DIGIT(parser)) |
| | 3290 | { |
| | 3291 | /* Check that the intendation is greater than 0. */ |
| | 3292 | |
| | 3293 | if (CHECK(parser, '0')) { |
| | 3294 | yaml_parser_set_scanner_error(parser, "while scanning a block scalar", |
| | 3295 | start_mark, "found an intendation indicator equal to 0"); |
| | 3296 | goto error; |
| | 3297 | } |
| | 3298 | |
| | 3299 | /* Get the intendation level and eat the indicator. */ |
| | 3300 | |
| | 3301 | increment = AS_DIGIT(parser); |
| | 3302 | |
| | 3303 | FORWARD(parser); |
| | 3304 | } |
| | 3305 | } |
| | 3306 | |
| | 3307 | /* Do the same as above, but in the opposite order. */ |
| | 3308 | |
| | 3309 | else if (IS_DIGIT(parser)) |
| | 3310 | { |
| | 3311 | if (CHECK(parser, '0')) { |
| | 3312 | yaml_parser_set_scanner_error(parser, "while scanning a block scalar", |
| | 3313 | start_mark, "found an intendation indicator equal to 0"); |
| | 3314 | goto error; |
| | 3315 | } |
| | 3316 | |
| | 3317 | increment = AS_DIGIT(parser); |
| | 3318 | |
| | 3319 | FORWARD(parser); |
| | 3320 | |
| | 3321 | if (!UPDATE(parser, 1)) goto error; |
| | 3322 | |
| | 3323 | if (CHECK(parser, '+') || CHECK(parser, '-')) { |
| | 3324 | chomping = CHECK(parser, '+') ? +1 : -1; |
| | 3325 | FORWARD(parser); |
| | 3326 | } |
| | 3327 | } |
| | 3328 | |
| | 3329 | /* Eat whitespaces and comments to the end of the line. */ |
| | 3330 | |
| | 3331 | if (!UPDATE(parser, 1)) goto error; |
| | 3332 | |
| | 3333 | while (IS_BLANK(parser)) { |
| | 3334 | FORWARD(parser); |
| | 3335 | if (!UPDATE(parser, 1)) goto error; |
| | 3336 | } |
| | 3337 | |
| | 3338 | if (CHECK(parser, '#')) { |
| | 3339 | while (!IS_BREAKZ(parser)) { |
| | 3340 | FORWARD(parser); |
| | 3341 | if (!UPDATE(parser, 1)) goto error; |
| | 3342 | } |
| | 3343 | } |
| | 3344 | |
| | 3345 | /* Check if we are at the end of the line. */ |
| | 3346 | |
| | 3347 | if (!IS_BREAKZ(parser)) { |
| | 3348 | yaml_parser_set_scanner_error(parser, "while scanning a block scalar", |
| | 3349 | start_mark, "did not found expected comment or line break"); |
| | 3350 | goto error; |
| | 3351 | } |
| | 3352 | |
| | 3353 | /* Eat a line break. */ |
| | 3354 | |
| | 3355 | if (IS_BREAK(parser)) { |
| | 3356 | if (!UPDATE(parser, 2)) goto error; |
| | 3357 | FORWARD_LINE(parser); |
| | 3358 | } |
| | 3359 | |
| | 3360 | end_mark = yaml_parser_get_mark(parser); |
| | 3361 | |
| | 3362 | /* Set the intendation level if it was specified. */ |
| | 3363 | |
| | 3364 | if (increment) { |
| | 3365 | indent = parser->indent >= 0 ? parser->indent+increment : increment; |
| | 3366 | } |
| | 3367 | |
| | 3368 | /* Scan the leading line breaks and determine the indentation level if needed. */ |
| | 3369 | |
| | 3370 | if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &breaks, |
| | 3371 | start_mark, &end_mark)) goto error; |
| | 3372 | |
| | 3373 | /* Scan the block scalar content. */ |
| | 3374 | |
| | 3375 | if (!UPDATE(parser, 1)) goto error; |
| | 3376 | |
| | 3377 | while (parser->column == indent && !IS_Z(parser)) |
| | 3378 | { |
| | 3379 | /* |
| | 3380 | * We are at the beginning of a non-empty line. |
| | 3381 | */ |
| | 3382 | |
| | 3383 | /* Is it a trailing whitespace? */ |
| | 3384 | |
| | 3385 | trailing_blank = IS_BLANK(parser); |
| | 3386 | |
| | 3387 | /* Check if we need to fold the leading line break. */ |
| | 3388 | |
| | 3389 | if (!literal && (*line_break.buffer == '\n') |
| | 3390 | && !leading_blank && !trailing_blank) |
| | 3391 | { |
| | 3392 | /* Do we need to join the lines by space? */ |
| | 3393 | |
| | 3394 | if (*breaks.buffer == '\0') { |
| | 3395 | if (!RESIZE(parser, string)) goto error; |
| | 3396 | *(string.pointer ++) = ' '; |
| | 3397 | } |
| | 3398 | |
| | 3399 | yaml_parser_clear_string(parser, &line_break); |
| | 3400 | } |
| | 3401 | else { |
| | 3402 | if (!JOIN(parser, string, line_break)) goto error; |
| | 3403 | } |
| | 3404 | |
| | 3405 | /* Append the remaining line breaks. */ |
| | 3406 | |
| | 3407 | if (!JOIN(parser, string, breaks)) goto error; |
| | 3408 | |
| | 3409 | /* Is it a leading whitespace? */ |
| | 3410 | |
| | 3411 | leading_blank = IS_BLANK(parser); |
| | 3412 | |
| | 3413 | /* Consume the current line. */ |
| | 3414 | |
| | 3415 | while (!IS_BREAKZ(parser)) { |
| | 3416 | if (!RESIZE(parser, string)) goto error; |
| | 3417 | COPY(parser, string); |
| | 3418 | if (!UPDATE(parser, 1)) goto error; |
| | 3419 | } |
| | 3420 | |
| | 3421 | /* Consume the line break. */ |
| | 3422 | |
| | 3423 | if (!UPDATE(parser, 2)) goto error; |
| | 3424 | |
| | 3425 | COPY_LINE(parser, line_break); |
| | 3426 | |
| | 3427 | /* Eat the following intendation spaces and line breaks. */ |
| | 3428 | |
| | 3429 | if (!yaml_parser_scan_block_scalar_breaks(parser, |
| | 3430 | &indent, &breaks, start_mark, &end_mark)) goto error; |
| | 3431 | } |
| | 3432 | |
| | 3433 | /* Chomp the tail. */ |
| | 3434 | |
| | 3435 | if (chomping != -1) { |
| | 3436 | if (!JOIN(parser, string, line_break)) goto error; |
| | 3437 | } |
| | 3438 | if (chomping == 1) { |
| | 3439 | if (!JOIN(parser, string, breaks)) goto error; |
| | 3440 | } |
| | 3441 | |
| | 3442 | /* Create a token. */ |
| | 3443 | |
| | 3444 | token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, |
| | 3445 | literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE, |
| | 3446 | start_mark, end_mark); |
| | 3447 | if (!token) { |
| | 3448 | parser->error = YAML_MEMORY_ERROR; |
| | 3449 | return 0; |
| | 3450 | } |
| | 3451 | |
| | 3452 | yaml_free(line_break.buffer); |
| | 3453 | yaml_free(breaks.buffer); |
| | 3454 | |
| | 3455 | return token; |
| | 3456 | |
| | 3457 | error: |
| | 3458 | yaml_free(string.buffer); |
| | 3459 | yaml_free(line_break.buffer); |
| | 3460 | yaml_free(breaks.buffer); |
| | 3461 | |
| | 3462 | return NULL; |
| | 3463 | } |
| | 3464 | |
| | 3465 | /* |
| | 3466 | * Scan intendation spaces and line breaks for a block scalar. Determine the |
| | 3467 | * intendation level if needed. |
| | 3468 | */ |
| | 3469 | |
| | 3470 | static int |
| | 3471 | yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, |
| | 3472 | int *indent, yaml_string_t *breaks, |
| | 3473 | yaml_mark_t start_mark, yaml_mark_t *end_mark) |
| | 3474 | { |
| | 3475 | int max_indent = 0; |
| | 3476 | |
| | 3477 | *end_mark = yaml_parser_get_mark(parser); |
| | 3478 | |
| | 3479 | /* Eat the intendation spaces and line breaks. */ |
| | 3480 | |
| | 3481 | while (1) |
| | 3482 | { |
| | 3483 | /* Eat the intendation spaces. */ |
| | 3484 | |
| | 3485 | if (!UPDATE(parser, 1)) return 0; |
| | 3486 | |
| | 3487 | while ((!*indent || parser->column < *indent) && IS_SPACE(parser)) { |
| | 3488 | FORWARD(parser); |
| | 3489 | if (!UPDATE(parser, 1)) return 0; |
| | 3490 | } |
| | 3491 | |
| | 3492 | if (parser->column > max_indent) |
| | 3493 | max_indent = parser->column; |
| | 3494 | |
| | 3495 | /* Check for a tab character messing the intendation. */ |
| | 3496 | |
| | 3497 | if ((!*indent || parser->column < *indent) && IS_TAB(parser)) { |
| | 3498 | return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", |
| | 3499 | start_mark, "found a tab character where an intendation space is expected"); |
| | 3500 | } |
| | 3501 | |
| | 3502 | /* Have we found a non-empty line? */ |
| | 3503 | |
| | 3504 | if (!IS_BREAK(parser)) break; |
| | 3505 | |
| | 3506 | /* Consume the line break. */ |
| | 3507 | |
| | 3508 | if (!UPDATE(parser, 2)) return 0; |
| | 3509 | if (!RESIZE(parser, *breaks)) return 0; |
| | 3510 | COPY_LINE(parser, *breaks); |
| | 3511 | *end_mark = yaml_parser_get_mark(parser); |
| | 3512 | } |
| | 3513 | |
| | 3514 | /* Determine the indentation level if needed. */ |
| | 3515 | |
| | 3516 | if (!*indent) { |
| | 3517 | *indent = max_indent; |
| | 3518 | if (*indent < parser->indent + 1) |
| | 3519 | *indent = parser->indent + 1; |
| | 3520 | if (*indent < 1) |
| | 3521 | *indent = 1; |
| | 3522 | } |
| | 3523 | |
| | 3524 | return 1; |
| | 3525 | } |
| | 3526 | |