Changeset 187
- Timestamp:
- 06/11/06 13:29:09 (7 years ago)
- File:
-
- 1 edited
-
libyaml/trunk/src/scanner.c (modified) (14 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libyaml/trunk/src/scanner.c
r186 r187 508 508 509 509 /* 510 * Check if the character at the specified position is an alphabetical 511 * character, a digit, '_', or '-'. 512 */ 513 514 #define IS_ALPHA_AT(parser,offset) \ 515 ((parser->pointer[offset] >= (yaml_char_t) '0' && \ 516 parser->pointer[offset] <= (yaml_char_t) '9') || \ 517 (parser->pointer[offset] >= (yaml_char_t) 'A' && \ 518 parser->pointer[offset] <= (yaml_char_t) 'Z') || \ 519 (parser->pointer[offset] >= (yaml_char_t) 'a' && \ 520 parser->pointer[offset] <= (yaml_char_t) 'z') || \ 521 parser->pointer[offset] == '_' || \ 522 parser->pointer[offset] == '-') 523 524 #define IS_ALPHA(parser) IS_ALPHA_AT(parser,0) 525 526 /* 527 * Check if the character at the specified position is a digit. 528 */ 529 530 #define IS_DIGIT_AT(parser,offset) \ 531 ((parser->pointer[offset] >= (yaml_char_t) '0' && \ 532 parser->pointer[offset] <= (yaml_char_t) '9')) 533 534 #define IS_DIGIT(parser) IS_DIGIT_AT(parser,0) 535 536 /* 537 * Get the value of a digit. 538 */ 539 540 #define AS_DIGIT_AT(parser,offset) \ 541 (parser->pointer[offset] - (yaml_char_t) '0') 542 543 #define AS_DIGIT(parser) AS_DIGIT_AT(parser,0) 544 545 /* 546 * Check if the character at the specified position is a hex-digit. 547 */ 548 549 #define IS_HEX_AT(parser,offset) \ 550 ((parser->pointer[offset] >= (yaml_char_t) '0' && \ 551 parser->pointer[offset] <= (yaml_char_t) '9') || \ 552 (parser->pointer[offset] >= (yaml_char_t) 'A' && \ 553 parser->pointer[offset] <= (yaml_char_t) 'F') || \ 554 (parser->pointer[offset] >= (yaml_char_t) 'a' && \ 555 parser->pointer[offset] <= (yaml_char_t) 'f')) 556 557 #define IS_HEX(parser) IS_HEX_AT(parser,0) 558 559 /* 560 * Get the value of a hex-digit. 561 */ 562 563 #define AS_HEX_AT(parser,offset) \ 564 ((parser->pointer[offset] >= (yaml_char_t) 'A' && \ 565 parser->pointer[offset] <= (yaml_char_t) 'F') ? \ 566 (parser->pointer[offset] - (yaml_char_t) 'A' + 10) : \ 567 (parser->pointer[offset] >= (yaml_char_t) 'a' && \ 568 parser->pointer[offset] <= (yaml_char_t) 'f') ? \ 569 (parser->pointer[offset] - (yaml_char_t) 'a' + 10) : \ 570 (parser->pointer[offset] - (yaml_char_t) '0')) 571 572 #define AS_HEX(parser) AS_HEX_AT(parser,0) 573 574 /* 510 575 * Check if the character at the specified position is NUL. 511 576 */ … … 514 579 515 580 #define IS_Z(parser) IS_Z_AT(parser,0) 581 582 /* 583 * Check if the character at the specified position is BOM. 584 */ 585 586 #define IS_BOM_AT(parser,offset) \ 587 (CHECK_AT(parser,'\xEF',(offset)) \ 588 && CHECK_AT(parser,'\xBB',(offset)+1) \ 589 && CHECK_AT(parser,'\xBF',(offset)+1)) /* BOM (#xFEFF) */ 590 591 #define IS_BOM(parser) IS_BOM_AT(parser,0) 516 592 517 593 /* … … 548 624 || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ 549 625 || (CHECK_AT(parser,'\xC2',(offset)) \ 550 && CHECK_AT(parser,'\x85',(offset +1))) /* NEL (#x85) */ \626 && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \ 551 627 || (CHECK_AT(parser,'\xE2',(offset)) \ 552 && CHECK_AT(parser,'\x80',(offset +1)) \553 && CHECK_AT(parser,'\xA8',(offset +2))) /* LS (#x2028) */ \628 && CHECK_AT(parser,'\x80',(offset)+1) \ 629 && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \ 554 630 || (CHECK_AT(parser,'\xE2',(offset)) \ 555 && CHECK_AT(parser,'\x80',(offset +1)) \556 && CHECK_AT(parser,'\xA9',(offset +2)))) /* LS (#x2029) */631 && CHECK_AT(parser,'\x80',(offset)+1) \ 632 && CHECK_AT(parser,'\xA9',(offset)+2))) /* LS (#x2029) */ 557 633 558 634 #define IS_BREAK(parser) IS_BREAK_AT(parser,0) … … 606 682 */ 607 683 608 #define FORWARD(parser) \684 #define FORWARD(parser) \ 609 685 (parser->index ++, \ 610 ((IS_BREAK(parser) && !IS_CRLF(parser)) ? \ 611 (parser->line ++, parser->column = 0) : \ 612 (parser->column ++)), \ 686 parser->column ++, \ 613 687 parser->unread --, \ 614 688 parser->pointer += WIDTH(parser)) 615 689 690 #define FORWARD_LINE(parser) \ 691 (IS_CRLF(parser) ? \ 692 (parser->index += 2, \ 693 parser->column = 0, \ 694 parser->unread -= 2, \ 695 parser->pointer += 2) : \ 696 IS_BREAK(parser) ? \ 697 (parser->index ++, \ 698 parser->column = 0, \ 699 parser->unread --, \ 700 parser->pointer += WIDTH(parser)) : 0) 701 702 /* 703 * Resize a string if needed. 704 */ 705 706 #define RESIZE(parser,string) \ 707 (string.pointer-string.buffer+5 < string.size ? 1 : \ 708 yaml_parser_resize_string(parser, &string)) 709 710 /* 711 * Copy a character to a string buffer and advance pointers. 712 */ 713 714 #define COPY(parser,string) \ 715 (((*parser->pointer & 0x80) == 0x00 ? \ 716 (*(string.pointer++) = *(parser->pointer++)) : \ 717 (*parser->pointer & 0xE0) == 0xC0 ? \ 718 (*(string.pointer++) = *(parser->pointer++), \ 719 *(string.pointer++) = *(parser->pointer++)) : \ 720 (*parser->pointer & 0xF0) == 0xE0 ? \ 721 (*(string.pointer++) = *(parser->pointer++), \ 722 *(string.pointer++) = *(parser->pointer++), \ 723 *(string.pointer++) = *(parser->pointer++)) : \ 724 (*parser->pointer & 0xF8) == 0xF0 ? \ 725 (*(string.pointer++) = *(parser->pointer++), \ 726 *(string.pointer++) = *(parser->pointer++), \ 727 *(string.pointer++) = *(parser->pointer++), \ 728 *(string.pointer++) = *(parser->pointer++)) : 0), \ 729 parser->index ++, \ 730 parser->column ++, \ 731 parser->unread --) 732 733 616 734 /* 617 735 * Public API declarations. … … 636 754 637 755 /* 756 * Buffers and lists. 757 */ 758 759 typedef struct { 760 yaml_char_t *buffer; 761 yaml_char_t *pointer; 762 size_t size; 763 } yaml_string_t; 764 765 static yaml_string_t 766 yaml_parser_new_string(yaml_parser_t *parser); 767 768 static int 769 yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string); 770 771 static int 772 yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, 773 size_t item_size); 774 775 /* 638 776 * High-level token API. 639 777 */ … … 753 891 754 892 static int 755 yaml_parser_scan_ yaml_directive_value(yaml_parser_t *parser,893 yaml_parser_scan_version_directive_value(yaml_parser_t *parser, 756 894 yaml_mark_t start_mark, int *major, int *minor); 757 895 758 896 static int 759 yaml_parser_scan_ yaml_directive_number(yaml_parser_t *parser,897 yaml_parser_scan_version_directive_number(yaml_parser_t *parser, 760 898 yaml_mark_t start_mark, int *number); 761 899 762 900 static int 763 901 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, 764 yaml_ char_t **handle, yaml_char_t **prefix);902 yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix); 765 903 766 904 static yaml_token_t * … … 777 915 static int 778 916 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, 779 yaml_mark_t start_mark, yaml_char_t **url); 917 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri); 918 919 static int 920 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, 921 yaml_mark_t start_mark, yaml_string_t *string); 780 922 781 923 static yaml_token_t * … … 840 982 841 983 return parser->tokens[parser->tokens_head]; 984 } 985 986 /* 987 * Create a new string. 988 */ 989 990 static yaml_string_t 991 yaml_parser_new_string(yaml_parser_t *parser) 992 { 993 yaml_string_t string = { NULL, NULL, 0 }; 994 995 string.buffer = yaml_malloc(YAML_DEFAULT_SIZE); 996 if (!string.buffer) { 997 parser->error = YAML_MEMORY_ERROR; 998 return string; 999 } 1000 1001 memset(string.buffer, 0, YAML_DEFAULT_SIZE); 1002 string.pointer = string.buffer; 1003 string.size = YAML_DEFAULT_SIZE; 1004 1005 return string; 1006 } 1007 1008 /* 1009 * Double the size of a string. 1010 */ 1011 1012 static int 1013 yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string) 1014 { 1015 yaml_char_t *new_buffer = yaml_realloc(string->buffer, string->size*2); 1016 1017 if (!new_buffer) { 1018 yaml_free(string->buffer); 1019 string->buffer = NULL; 1020 string->pointer = NULL; 1021 string->size = 0; 1022 parser->error = YAML_MEMORY_ERROR; 1023 return 0; 1024 } 1025 1026 memset(new_buffer+string->size, 0, string->size); 1027 1028 string->pointer = new_buffer + (string->buffer-string->pointer); 1029 string->buffer = new_buffer; 1030 string->size *= 2; 1031 1032 return 1; 1033 } 1034 1035 /* 1036 * Double a list. 1037 */ 1038 1039 static int 1040 yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, 1041 size_t item_size) 1042 { 1043 void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2); 1044 1045 if (!new_buffer) { 1046 parser->error = YAML_MEMORY_ERROR; 1047 return 0; 1048 } 1049 1050 memset(new_buffer+(*size), 0, item_size*(*size)); 1051 1052 *buffer = new_buffer; 1053 *size *= 2; 1054 1055 return 1; 842 1056 } 843 1057 … … 1235 1449 /* Check if we need to resize the list. */ 1236 1450 1237 if (parser->flow_level == parser->simple_keys_size-1) 1238 { 1239 yaml_simple_key_t **new_simple_keys = 1240 yaml_realloc(parser->simple_keys, 1241 sizeof(yaml_simple_key_t *) * parser->simple_keys_size * 2); 1242 1243 if (!new_simple_keys) { 1244 parser->error = YAML_MEMORY_ERROR; 1451 if (parser->flow_level == parser->simple_keys_size-1) { 1452 if (!yaml_parser_resize_list(parser, (void **)&parser->simple_keys, 1453 &parser->simple_keys_size, sizeof(yaml_simple_key_t *))) 1245 1454 return 0; 1246 }1247 1248 memset(new_simple_keys+parser->simple_keys_size, 0,1249 sizeof(yaml_simple_key_t *)*parser->simple_keys_size);1250 1251 parser->simple_keys = new_simple_keys;1252 parser->simple_keys_size *= 2;1253 1455 } 1254 1456 … … 1305 1507 /* Check if we need to resize the queue. */ 1306 1508 1307 if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) 1308 { 1309 yaml_token_t **new_tokens = yaml_realloc(parser->tokens, 1310 sizeof(yaml_token_t *) * parser->tokens_size * 2); 1311 1312 if (!new_tokens) { 1313 parser->error = YAML_MEMORY_ERROR; 1509 if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) { 1510 if (!yaml_parser_resize_list(parser, (void **)&parser->tokens, 1511 &parser->tokens_size, sizeof(yaml_token_t *))) 1314 1512 return 0; 1315 }1316 1317 memset(new_tokens+parser->tokens_size, 0,1318 sizeof(yaml_token_t *)*parser->tokens_size);1319 1320 parser->tokens = new_tokens;1321 parser->tokens_size *= 2;1322 1513 } 1323 1514 … … 1372 1563 /* Check if we need to expand the indents stack. */ 1373 1564 1374 if (parser->indents_length == parser->indents_size) 1375 { 1376 int *new_indents = yaml_realloc(parser->indents, 1377 sizeof(int) * parser->indents_size * 2); 1378 1379 if (!new_indents) { 1380 parser->error = YAML_MEMORY_ERROR; 1565 if (parser->indents_length == parser->indents_size) { 1566 if (!yaml_parser_resize_list(parser, (void **)&parser->indents, 1567 &parser->indents_size, sizeof(int))) 1381 1568 return 0; 1382 }1383 1384 memset(new_indents+parser->indents_size, 0,1385 sizeof(int)*parser->indents_size);1386 1387 parser->indents = new_indents;1388 parser->indents_size *= 2;1389 1569 } 1390 1570 … … 1941 2121 /* Remove the simple key from the list. */ 1942 2122 1943 if (!yaml_parser_remove_simple_key(parser)) return 0; 2123 yaml_free(simple_key); 2124 parser->simple_keys[parser->flow_level] = NULL; 1944 2125 1945 2126 /* A simple key cannot follow another simple key. */ … … 1970 2151 } 1971 2152 1972 /* Remove a potential simple key from the list. */1973 1974 if (!yaml_parser_remove_simple_key(parser)) return 0;1975 1976 2153 /* Simple keys after ':' are allowed in the block context. */ 1977 2154 … … 2168 2345 } 2169 2346 2347 /* 2348 * Eat whitespaces and comments until the next token is found. 2349 */ 2350 2351 static int 2352 yaml_parser_scan_to_next_token(yaml_parser_t *parser) 2353 { 2354 /* Until the next token is not found. */ 2355 2356 while (1) 2357 { 2358 /* Allow the BOM mark to start a line. */ 2359 2360 if (!UPDATE(parser, 1)) return 0; 2361 2362 if (parser->column == 0 && IS_BOM(parser)) 2363 FORWARD(parser); 2364 2365 /* 2366 * Eat whitespaces. 2367 * 2368 * Tabs are allowed: 2369 * 2370 * - in the flow context; 2371 * - in the block context, but not at the beginning of the line or 2372 * after '-', '?', or ':' (complex value). 2373 */ 2374 2375 if (!UPDATE(parser, 1)) return 0; 2376 2377 while (CHECK(parser,' ') || 2378 ((parser->flow_level || !parser->simple_key_allowed) && 2379 CHECK(parser, '\t'))) { 2380 FORWARD(parser); 2381 if (!UPDATE(parser, 1)) return 0; 2382 } 2383 2384 /* Eat a comment until a line break. */ 2385 2386 if (CHECK(parser, '#')) { 2387 while (!IS_BREAKZ(parser)) { 2388 FORWARD(parser); 2389 if (!UPDATE(parser, 1)) return 0; 2390 } 2391 } 2392 2393 /* If it is a line break, eat it. */ 2394 2395 if (IS_BREAK(parser)) 2396 { 2397 if (!UPDATE(parser, 2)) return 0; 2398 FORWARD_LINE(parser); 2399 2400 /* In the block context, a new line may start a simple key. */ 2401 2402 if (!parser->flow_level) { 2403 parser->simple_key_allowed = 1; 2404 } 2405 } 2406 else 2407 { 2408 /* We have found a token. */ 2409 2410 break; 2411 } 2412 } 2413 2414 return 1; 2415 } 2416 2417 /* 2418 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 2419 * 2420 * Scope: 2421 * %YAML 1.1 # a comment \n 2422 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2423 * %TAG !yaml! tag:yaml.org,2002: \n 2424 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2425 */ 2426 2427 static yaml_token_t * 2428 yaml_parser_scan_directive(yaml_parser_t *parser) 2429 { 2430 yaml_mark_t start_mark, end_mark; 2431 yaml_char_t *name = NULL; 2432 int major, minor; 2433 yaml_char_t *handle = NULL, *prefix = NULL; 2434 yaml_token_t *token = NULL; 2435 2436 /* Eat '%'. */ 2437 2438 start_mark = yaml_parser_get_mark(parser); 2439 2440 FORWARD(parser); 2441 2442 /* Scan the directive name. */ 2443 2444 if (!yaml_parser_scan_directive_name(parser, start_mark, &name)) 2445 goto error; 2446 2447 /* Is it a YAML directive? */ 2448 2449 if (strcmp((char *)name, "YAML") == 0) 2450 { 2451 /* Scan the VERSION directive value. */ 2452 2453 if (!yaml_parser_scan_version_directive_value(parser, start_mark, 2454 &major, &minor)) 2455 goto error; 2456 2457 end_mark = yaml_parser_get_mark(parser); 2458 2459 /* Create a VERSION-DIRECTIVE token. */ 2460 2461 token = yaml_version_directive_token_new(major, minor, 2462 start_mark, end_mark); 2463 if (!token) goto error; 2464 } 2465 2466 /* Is it a TAG directive? */ 2467 2468 else if (strcmp((char *)name, "TAG") == 0) 2469 { 2470 /* Scan the TAG directive value. */ 2471 2472 if (!yaml_parser_scan_tag_directive_value(parser, start_mark, 2473 &handle, &prefix)) 2474 goto error; 2475 2476 end_mark = yaml_parser_get_mark(parser); 2477 2478 /* Create a TAG-DIRECTIVE token. */ 2479 2480 token = yaml_tag_directive_token_new(handle, prefix, 2481 start_mark, end_mark); 2482 if (!token) goto error; 2483 } 2484 2485 /* Unknown directive. */ 2486 2487 else 2488 { 2489 yaml_parser_set_scanner_error(parser, "While scanning a directive", 2490 start_mark, "found uknown directive name"); 2491 goto error; 2492 } 2493 2494 /* Eat the rest of the line including any comments. */ 2495 2496 while (IS_BLANK(parser)) { 2497 FORWARD(parser); 2498 if (!UPDATE(parser, 1)) goto error; 2499 } 2500 2501 if (CHECK(parser, '#')) { 2502 while (!IS_BREAKZ(parser)) { 2503 FORWARD(parser); 2504 if (!UPDATE(parser, 1)) goto error; 2505 } 2506 } 2507 2508 /* Check if we are at the end of the line. */ 2509 2510 if (!IS_BREAKZ(parser)) { 2511 yaml_parser_set_scanner_error(parser, "While scanning a directive", 2512 start_mark, "did not found expected comment or line break"); 2513 goto error; 2514 } 2515 2516 /* Eat a line break. */ 2517 2518 if (IS_BREAK(parser)) { 2519 if (!UPDATE(parser, 2)) goto error; 2520 FORWARD_LINE(parser); 2521 } 2522 2523 yaml_free(name); 2524 2525 return token; 2526 2527 error: 2528 yaml_free(token); 2529 yaml_free(prefix); 2530 yaml_free(handle); 2531 yaml_free(name); 2532 return NULL; 2533 } 2534 2535 /* 2536 * Scan the directive name. 2537 * 2538 * Scope: 2539 * %YAML 1.1 # a comment \n 2540 * ^^^^ 2541 * %TAG !yaml! tag:yaml.org,2002: \n 2542 * ^^^ 2543 */ 2544 2545 static int 2546 yaml_parser_scan_directive_name(yaml_parser_t *parser, 2547 yaml_mark_t start_mark, yaml_char_t **name) 2548 { 2549 yaml_string_t string = yaml_parser_new_string(parser); 2550 2551 if (!string.buffer) goto error; 2552 2553 /* Consume the directive name. */ 2554 2555 if (!UPDATE(parser, 1)) goto error; 2556 2557 while (IS_ALPHA(parser)) 2558 { 2559 if (!RESIZE(parser, string)) goto error; 2560 COPY(parser, string); 2561 if (!UPDATE(parser, 1)) goto error; 2562 } 2563 2564 /* Check if the name is empty. */ 2565 2566 if (string.buffer == string.pointer) { 2567 yaml_parser_set_scanner_error(parser, "while scanning a directive", 2568 start_mark, "cannot found expected directive name"); 2569 goto error; 2570 } 2571 2572 /* Check for an blank character after the name. */ 2573 2574 if (!IS_BLANKZ(parser)) { 2575 yaml_parser_set_scanner_error(parser, "while scanning a directive", 2576 start_mark, "found unexpected non-alphabetical character"); 2577 goto error; 2578 } 2579 2580 *name = string.buffer; 2581 2582 return 1; 2583 2584 error: 2585 yaml_free(string.buffer); 2586 return 0; 2587 } 2588 2589 /* 2590 * Scan the value of VERSION-DIRECTIVE. 2591 * 2592 * Scope: 2593 * %YAML 1.1 # a comment \n 2594 * ^^^^^^ 2595 */ 2596 2597 static int 2598 yaml_parser_scan_version_directive_value(yaml_parser_t *parser, 2599 yaml_mark_t start_mark, int *major, int *minor) 2600 { 2601 /* Eat whitespaces. */ 2602 2603 if (!UPDATE(parser, 1)) return 0; 2604 2605 while (IS_BLANK(parser)) { 2606 FORWARD(parser); 2607 if (!UPDATE(parser, 1)) return 0; 2608 } 2609 2610 /* Consume the major version number. */ 2611 2612 if (!yaml_parser_scan_version_directive_number(parser, start_mark, major)) 2613 return 0; 2614 2615 /* Eat '.'. */ 2616 2617 if (!CHECK(parser, '.')) { 2618 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 2619 start_mark, "did not find expected digit or '.' character"); 2620 } 2621 2622 FORWARD(parser); 2623 2624 /* Consume the minor version number. */ 2625 2626 if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor)) 2627 return 0; 2628 } 2629 2630 #define MAX_NUMBER_LENGTH 9 2631 2632 /* 2633 * Scan the version number of VERSION-DIRECTIVE. 2634 * 2635 * Scope: 2636 * %YAML 1.1 # a comment \n 2637 * ^ 2638 * %YAML 1.1 # a comment \n 2639 * ^ 2640 */ 2641 2642 static int 2643 yaml_parser_scan_version_directive_number(yaml_parser_t *parser, 2644 yaml_mark_t start_mark, int *number) 2645 { 2646 int value = 0; 2647 size_t length = 0; 2648 2649 /* Repeat while the next character is digit. */ 2650 2651 if (!UPDATE(parser, 1)) return 0; 2652 2653 while (IS_DIGIT(parser)) 2654 { 2655 /* Check if the number is too long. */ 2656 2657 if (++length > MAX_NUMBER_LENGTH) { 2658 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 2659 start_mark, "found extremely long version number"); 2660 } 2661 2662 value = value*10 + AS_DIGIT(parser); 2663 2664 FORWARD(parser); 2665 2666 if (!UPDATE(parser, 1)) return 0; 2667 } 2668 2669 /* Check if the number was present. */ 2670 2671 if (!length) { 2672 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 2673 start_mark, "did not find expected version number"); 2674 } 2675 2676 *number = value; 2677 2678 return 1; 2679 } 2680 2681 /* 2682 * Scan the value of a TAG-DIRECTIVE token. 2683 * 2684 * Scope: 2685 * %TAG !yaml! tag:yaml.org,2002: \n 2686 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2687 */ 2688 2689 static int 2690 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, 2691 yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix) 2692 { 2693 yaml_char_t *handle_value = NULL; 2694 yaml_char_t *prefix_value = NULL; 2695 2696 /* Eat whitespaces. */ 2697 2698 if (!UPDATE(parser, 1)) goto error; 2699 2700 while (IS_BLANK(parser)) { 2701 FORWARD(parser); 2702 if (!UPDATE(parser, 1)) goto error; 2703 } 2704 2705 /* Scan a handle. */ 2706 2707 if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value)) 2708 goto error; 2709 2710 /* Expect a whitespace. */ 2711 2712 if (!UPDATE(parser, 1)) goto error; 2713 2714 if (!IS_BLANK(parser)) { 2715 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 2716 start_mark, "did not find expected whitespace"); 2717 goto error; 2718 } 2719 2720 /* Eat whitespaces. */ 2721 2722 while (IS_BLANK(parser)) { 2723 FORWARD(parser); 2724 if (!UPDATE(parser, 1)) goto error; 2725 } 2726 2727 /* Scan a prefix. */ 2728 2729 if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value)) 2730 goto error; 2731 2732 /* Expect a whitespace or line break. */ 2733 2734 if (!UPDATE(parser, 1)) goto error; 2735 2736 if (!IS_BLANKZ(parser)) { 2737 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 2738 start_mark, "did not find expected whitespace or line break"); 2739 goto error; 2740 } 2741 2742 *handle = handle_value; 2743 *prefix = prefix_value; 2744 2745 return 1; 2746 2747 error: 2748 yaml_free(handle_value); 2749 yaml_free(prefix_value); 2750 return 0; 2751 } 2752 2753 static yaml_token_t * 2754 yaml_parser_scan_anchor(yaml_parser_t *parser, 2755 yaml_token_type_t type) 2756 { 2757 int length = 0; 2758 yaml_mark_t start_mark, end_mark; 2759 yaml_token_t *token = NULL; 2760 yaml_string_t string = yaml_parser_new_string(parser); 2761 2762 if (!string.buffer) goto error; 2763 2764 /* Eat the indicator character. */ 2765 2766 start_mark = yaml_parser_get_mark(parser); 2767 2768 FORWARD(parser); 2769 2770 /* Consume the value. */ 2771 2772 if (!UPDATE(parser, 1)) goto error; 2773 2774 while (IS_ALPHA(parser)) { 2775 if (!RESIZE(parser, string)) goto error; 2776 COPY(parser, string); 2777 if (!UPDATE(parser, 1)) goto error; 2778 length ++; 2779 } 2780 2781 end_mark = yaml_parser_get_mark(parser); 2782 2783 /* 2784 * Check if length of the anchor is greater than 0 and it is followed by 2785 * a whitespace character or one of the indicators: 2786 * 2787 * '?', ':', ',', ']', '}', '%', '@', '`'. 2788 */ 2789 2790 if (!length || !(IS_BLANKZ(parser) || CHECK(parser, '?') || CHECK(parser, ':') || 2791 CHECK(parser, ',') || CHECK(parser, ']') || CHECK(parser, '}') || 2792 CHECK(parser, '%') || CHECK(parser, '@') || CHECK(parser, '`'))) { 2793 yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ? 2794 "while scanning an anchor" : "while scanning an alias", start_mark, 2795 "did not find expected alphabetic or numeric character"); 2796 goto error; 2797 } 2798 2799 /* Create a token. */ 2800 2801 token = type == YAML_ANCHOR_TOKEN ? 2802 yaml_anchor_token_new(string.buffer, start_mark, end_mark) : 2803 yaml_alias_token_new(string.buffer, start_mark, end_mark); 2804 if (!token) goto error; 2805 2806 return token; 2807 2808 error: 2809 yaml_free(string.buffer); 2810 yaml_free(token); 2811 return 0; 2812 } 2813 2814 /* 2815 * Scan a TAG token. 2816 */ 2817 2818 static yaml_token_t * 2819 yaml_parser_scan_tag(yaml_parser_t *parser) 2820 { 2821 yaml_char_t *handle = NULL; 2822 yaml_char_t *suffix = NULL; 2823 yaml_token_t *token = NULL; 2824 yaml_mark_t start_mark, end_mark; 2825 2826 start_mark = yaml_parser_get_mark(parser); 2827 2828 /* Check if the tag is in the canonical form. */ 2829 2830 if (!UPDATE(parser, 2)) goto error; 2831 2832 if (CHECK_AT(parser, '<', 1)) 2833 { 2834 /* Set the handle to '' */ 2835 2836 handle = yaml_malloc(1); 2837 if (!handle) goto error; 2838 handle[0] = '\0'; 2839 2840 /* Eat '!<' */ 2841 2842 FORWARD(parser); 2843 FORWARD(parser); 2844 2845 /* Consume the tag value. */ 2846 2847 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) 2848 goto error; 2849 2850 /* Check for '>' and eat it. */ 2851 2852 if (!CHECK(parser, '>')) { 2853 yaml_parser_set_scanner_error(parser, "while scanning a tag", 2854 start_mark, "did not find the expected '>'"); 2855 goto error; 2856 } 2857 2858 FORWARD(parser); 2859 } 2860 else 2861 { 2862 /* The tag has either the '!suffix' or the '!handle!suffix' form. */ 2863 2864 /* First, try to scan a handle. */ 2865 2866 if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle)) 2867 goto error; 2868 2869 /* Check if it is, indeed, handle. */ 2870 2871 if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!') 2872 { 2873 /* Scan the suffix now. */ 2874 2875 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) 2876 goto error; 2877 } 2878 else 2879 { 2880 /* It wasn't a handle after all. Scan the rest of the tag. */ 2881 2882 if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix)) 2883 goto error; 2884 2885 /* Set the handle to '!'. */ 2886 2887 yaml_free(handle); 2888 handle = yaml_malloc(2); 2889 if (!handle) goto error; 2890 handle[0] = '!'; 2891 handle[1] = '\0'; 2892 } 2893 } 2894 2895 /* Check the character which ends the tag. */ 2896 2897 if (!UPDATE(parser, 1)) goto error; 2898 2899 if (!IS_BLANKZ(parser)) { 2900 yaml_parser_set_scanner_error(parser, "while scanning a tag", 2901 start_mark, "did not found expected whitespace or line break"); 2902 goto error; 2903 } 2904 2905 end_mark = yaml_parser_get_mark(parser); 2906 2907 /* Create a token. */ 2908 2909 token = yaml_tag_token_new(handle, suffix, start_mark, end_mark); 2910 if (!token) goto error; 2911 2912 return token; 2913 2914 error: 2915 yaml_free(handle); 2916 yaml_free(suffix); 2917 return NULL; 2918 } 2919 2920 /* 2921 * Scan a tag handle. 2922 */ 2923 2924 static int 2925 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, 2926 yaml_mark_t start_mark, yaml_char_t **handle) 2927 { 2928 yaml_string_t string = yaml_parser_new_string(parser); 2929 2930 if (!string.buffer) goto error; 2931 2932 /* Check the initial '!' character. */ 2933 2934 if (!UPDATE(parser, 1)) goto error; 2935 2936 if (!CHECK(parser, '!')) { 2937 yaml_parser_set_scanner_error(parser, directive ? 2938 "while scanning a tag directive" : "while scanning a tag", 2939 start_mark, "did not find expected '!'"); 2940 goto error; 2941 } 2942 2943 /* Copy the '!' character. */ 2944 2945 COPY(parser, string); 2946 2947 /* Copy all subsequent alphabetical and numerical characters. */ 2948 2949 if (!UPDATE(parser, 1)) goto error; 2950 2951 while (IS_ALPHA(parser)) 2952 { 2953 if (!RESIZE(parser, string)) goto error; 2954 COPY(parser, string); 2955 if (!UPDATE(parser, 1)) goto error; 2956 } 2957 2958 /* Check if the trailing character is '!' and copy it. */ 2959 2960 if (CHECK(parser, '!')) 2961 { 2962 if (!RESIZE(parser, string)) goto error; 2963 COPY(parser, string); 2964 } 2965 else 2966 { 2967 /* 2968 * It's not really a tag handle. If it's a %TAG directive, it's an 2969 * error. If it's a tag token, it must be a part of URI. 2970 */ 2971 2972 if (directive) { 2973 yaml_parser_set_scanner_error(parser, "while parsing a directive", 2974 start_mark, "did not find expected '!'"); 2975 goto error; 2976 } 2977 } 2978 2979 *handle = string.buffer; 2980 2981 return 1; 2982 2983 error: 2984 yaml_free(string.buffer); 2985 return 0; 2986 } 2987 2988 /* 2989 * Scan a tag. 2990 */ 2991 2992 static int 2993 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, 2994 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri) 2995 { 2996 size_t length = head ? strlen((char *)head) : 0; 2997 yaml_string_t string = yaml_parser_new_string(parser); 2998 2999 if (!string.buffer) goto error; 3000 3001 /* Resize the string to include the head. */ 3002 3003 while (string.size <= length) { 3004 if (!yaml_parser_resize_string(parser, &string)) goto error; 3005 } 3006 3007 /* Copy the head if needed. */ 3008 3009 if (length) { 3010 memcpy(string.buffer, head, length); 3011 string.pointer += length; 3012 } 3013 3014 /* Scan the tag. */ 3015 3016 if (!UPDATE(parser, 1)) goto error; 3017 3018 /* 3019 * The set of characters that may appear in URI is as follows: 3020 * 3021 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 3022 * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 3023 * '%'. 3024 */ 3025 3026 while (IS_ALPHA(parser) || CHECK(parser, ';') || CHECK(parser, '/') || 3027 CHECK(parser, '?') || CHECK(parser, ':') || CHECK(parser, '@') || 3028 CHECK(parser, '&') || CHECK(parser, '=') || CHECK(parser, '+') || 3029 CHECK(parser, '$') || CHECK(parser, ',') || CHECK(parser, '.') || 3030 CHECK(parser, '!') || CHECK(parser, '~') || CHECK(parser, '*') || 3031 CHECK(parser, '\'') || CHECK(parser, '(') || CHECK(parser, ')') || 3032 CHECK(parser, '[') || CHECK(parser, ']') || CHECK(parser, '%')) 3033 { 3034 if (!RESIZE(parser, string)) goto error; 3035 3036 /* Check if it is a URI-escape sequence. */ 3037 3038 if (CHECK(parser, '%')) { 3039 if (!yaml_parser_scan_uri_escapes(parser, 3040 directive, start_mark, &string)) goto error; 3041 } 3042 else { 3043 COPY(parser, string); 3044 } 3045 3046 length ++; 3047 if (!UPDATE(parser, 1)) goto error; 3048 } 3049 3050 /* Check if the tag is non-empty. */ 3051 3052 if (!length) { 3053 yaml_parser_set_scanner_error(parser, directive ? 3054 "while parsing a %TAG directive" : "while parsing a tag", 3055 start_mark, "did not find expected tag URI"); 3056 goto error; 3057 } 3058 3059 *uri = string.buffer; 3060 3061 return 1; 3062 3063 error: 3064 yaml_free(string.buffer); 3065 return 0; 3066 } 3067 3068 /* 3069 * Decode an URI-escape sequence corresponding to a single UTF-8 character. 3070 */ 3071 3072 static int 3073 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, 3074 yaml_mark_t start_mark, yaml_string_t *string) 3075 { 3076 int width = 0; 3077 3078 /* Decode the required number of characters. */ 3079 3080 do { 3081 3082 unsigned char octet = 0; 3083 3084 /* Check for a URI-escaped octet. */ 3085 3086 if (!UPDATE(parser, 3)) return 0; 3087 3088 if (!(CHECK(parser, '%') && IS_HEX_AT(parser, 1) && IS_HEX_AT(parser, 2))) { 3089 return yaml_parser_set_scanner_error(parser, directive ? 3090 "while parsing a %TAG directive" : "while parsing a tag", 3091 start_mark, "did not find URI escaped octet"); 3092 } 3093 3094 /* Get the octet. */ 3095 3096 octet = (AS_HEX_AT(parser, 1) << 4) + AS_HEX_AT(parser, 2); 3097 3098 /* If it is the leading octet, determine the length of the UTF-8 sequence. */ 3099 3100 if (!width) 3101 { 3102 width = (octet & 0x80) == 0x00 ? 1 : 3103 (octet & 0xE0) == 0xC0 ? 2 : 3104 (octet & 0xF0) == 0xE0 ? 3 : 3105 (octet & 0xF8) == 0xF0 ? 4 : 0; 3106 if (!width) { 3107 return yaml_parser_set_scanner_error(parser, directive ? 3108 "while parsing a %TAG directive" : "while parsing a tag", 3109 start_mark, "found an incorrect leading UTF-8 octet"); 3110 } 3111 } 3112 else 3113 { 3114 /* Check if the trailing octet is correct. */ 3115 3116 if ((octet & 0xC0) != 0x80) { 3117 return yaml_parser_set_scanner_error(parser, directive ? 3118 "while parsing a %TAG directive" : "while parsing a tag", 3119 start_mark, "found an incorrect trailing UTF-8 octet"); 3120 } 3121 } 3122 3123 /* Copy the octet and move the pointers. */ 3124 3125 *(string->pointer++) = octet; 3126 FORWARD(parser); 3127 FORWARD(parser); 3128 FORWARD(parser); 3129 3130 } while (--width); 3131 3132 return 1; 3133 } 3134
Note: See TracChangeset
for help on using the changeset viewer.
