Changeset 136 for pyyaml/trunk/lib/yaml/scanner.py
- Timestamp:
- 04/15/06 19:54:52 (6 years ago)
- Files:
-
- 1 modified
-
pyyaml/trunk/lib/yaml/scanner.py (modified) (63 diffs)
Legend:
- Unmodified
- Added
- Removed
-
pyyaml/trunk/lib/yaml/scanner.py
r132 r136 46 46 class Scanner: 47 47 48 49 def __init__(self, reader): 48 def __init__(self): 50 49 """Initialize the scanner.""" 51 # The input stream. The Reader class do the dirty work of checking for52 # BOM and converting the input data to Unicode. It also adds NUL to53 # the end.50 # It is assumed that Scanner and Reader will have a common descendant. 51 # Reader do the dirty work of checking for BOM and converting the 52 # input data to Unicode. It also adds NUL to the end. 54 53 # 55 54 # Reader supports the following methods 56 # self.reader.peek(i=0) # peek the next i-th character 57 # self.reader.prefix(l=1) # peek the next l characters 58 # self.reader.forward(l=1) # read the next l characters 59 # and move the pointer 60 self.reader = reader 55 # self.peek(i=0) # peek the next i-th character 56 # self.prefix(l=1) # peek the next l characters 57 # self.forward(l=1) # read the next l characters and move the pointer. 61 58 62 59 # Had we reached the end of the stream? … … 114 111 # Public methods. 115 112 116 def check (self, *choices):113 def check_token(self, *choices): 117 114 # Check if the next token is one of the given types. 118 115 while self.need_more_tokens(): 119 116 self.fetch_more_tokens() 120 117 if self.tokens: 118 if not choices: 119 return True 121 120 for choice in choices: 122 121 if isinstance(self.tokens[0], choice): … … 124 123 return False 125 124 126 def peek (self):125 def peek_token(self): 127 126 # Return the next token, but do not delete if from the queue. 128 127 while self.need_more_tokens(): … … 131 130 return self.tokens[0] 132 131 133 def get (self):132 def get_token(self): 134 133 # Return the next token. 135 134 while self.need_more_tokens(): … … 172 171 # Compare the current indentation and column. It may add some tokens 173 172 # and decrease the current indentation level. 174 self.unwind_indent(self. reader.column)173 self.unwind_indent(self.column) 175 174 176 175 # Peek the next character. 177 ch = self. reader.peek()176 ch = self.peek() 178 177 179 178 # Is it the end of stream? … … 266 265 raise ScannerError("while scanning for the next token", None, 267 266 "found character %r that cannot start any token" 268 % ch.encode('utf-8'), self. reader.get_mark())267 % ch.encode('utf-8'), self.get_mark()) 269 268 270 269 # Simple keys treatment. … … 294 293 for level in self.possible_simple_keys.keys(): 295 294 key = self.possible_simple_keys[level] 296 if key.line != self. reader.line \297 or self. reader.index-key.index > 1024:295 if key.line != self.line \ 296 or self.index-key.index > 1024: 298 297 if key.required: 299 298 raise ScannerError("while scanning a simple key", key.mark, 300 "could not found expected ':'", self. reader.get_mark())299 "could not found expected ':'", self.get_mark()) 301 300 del self.possible_simple_keys[level] 302 301 … … 307 306 308 307 # Check if a simple key is required at the current position. 309 required = not self.flow_level and self.indent == self. reader.column308 required = not self.flow_level and self.indent == self.column 310 309 311 310 # A simple key is required only if it is the first token in the current … … 318 317 self.remove_possible_simple_key() 319 318 token_number = self.tokens_taken+len(self.tokens) 320 index = self.reader.index321 line = self.reader.line322 column = self.reader.column323 mark = self.reader.get_mark()324 319 key = SimpleKey(token_number, required, 325 index, line, column, mark)320 self.index, self.line, self.column, self.get_mark()) 326 321 self.possible_simple_keys[self.flow_level] = key 327 322 … … 335 330 #if key.required: 336 331 # raise ScannerError("while scanning a simple key", key.mark, 337 # "could not found expected ':'", self. reader.get_mark())332 # "could not found expected ':'", self.get_mark()) 338 333 339 334 # Indentation functions. … … 350 345 # raise ScannerError(None, None, 351 346 # "invalid intendation or unclosed '[' or '{'", 352 # self. reader.get_mark())347 # self.get_mark()) 353 348 354 349 # In the flow context, indentation is ignored. We make the scanner less … … 359 354 # In block context, we may need to issue the BLOCK-END tokens. 360 355 while self.indent > column: 361 mark = self. reader.get_mark()356 mark = self.get_mark() 362 357 self.indent = self.indents.pop() 363 358 self.tokens.append(BlockEndToken(mark, mark)) … … 378 373 379 374 # Read the token. 380 mark = self. reader.get_mark()375 mark = self.get_mark() 381 376 382 377 # Add STREAM-START. 383 378 self.tokens.append(StreamStartToken(mark, mark, 384 encoding=self. reader.encoding))379 encoding=self.encoding)) 385 380 386 381 … … 395 390 396 391 # Read the token. 397 mark = self. reader.get_mark()392 mark = self.get_mark() 398 393 399 394 # Add STREAM-END. 400 395 self.tokens.append(StreamEndToken(mark, mark)) 401 396 402 # The reader is ended.397 # The steam is finished. 403 398 self.done = True 404 399 … … 432 427 433 428 # Add DOCUMENT-START or DOCUMENT-END. 434 start_mark = self. reader.get_mark()435 self. reader.forward(3)436 end_mark = self. reader.get_mark()429 start_mark = self.get_mark() 430 self.forward(3) 431 end_mark = self.get_mark() 437 432 self.tokens.append(TokenClass(start_mark, end_mark)) 438 433 … … 455 450 456 451 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. 457 start_mark = self. reader.get_mark()458 self. reader.forward()459 end_mark = self. reader.get_mark()452 start_mark = self.get_mark() 453 self.forward() 454 end_mark = self.get_mark() 460 455 self.tokens.append(TokenClass(start_mark, end_mark)) 461 456 … … 478 473 479 474 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. 480 start_mark = self. reader.get_mark()481 self. reader.forward()482 end_mark = self. reader.get_mark()475 start_mark = self.get_mark() 476 self.forward() 477 end_mark = self.get_mark() 483 478 self.tokens.append(TokenClass(start_mark, end_mark)) 484 479 … … 492 487 493 488 # Add FLOW-ENTRY. 494 start_mark = self. reader.get_mark()495 self. reader.forward()496 end_mark = self. reader.get_mark()489 start_mark = self.get_mark() 490 self.forward() 491 end_mark = self.get_mark() 497 492 self.tokens.append(FlowEntryToken(start_mark, end_mark)) 498 493 … … 506 501 raise ScannerError(None, None, 507 502 "sequence entries are not allowed here", 508 self. reader.get_mark())503 self.get_mark()) 509 504 510 505 # We may need to add BLOCK-SEQUENCE-START. 511 if self.add_indent(self. reader.column):512 mark = self. reader.get_mark()506 if self.add_indent(self.column): 507 mark = self.get_mark() 513 508 self.tokens.append(BlockSequenceStartToken(mark, mark)) 514 509 … … 525 520 526 521 # Add BLOCK-ENTRY. 527 start_mark = self. reader.get_mark()528 self. reader.forward()529 end_mark = self. reader.get_mark()522 start_mark = self.get_mark() 523 self.forward() 524 end_mark = self.get_mark() 530 525 self.tokens.append(BlockEntryToken(start_mark, end_mark)) 531 526 … … 539 534 raise ScannerError(None, None, 540 535 "mapping keys are not allowed here", 541 self. reader.get_mark())536 self.get_mark()) 542 537 543 538 # We may need to add BLOCK-MAPPING-START. 544 if self.add_indent(self. reader.column):545 mark = self. reader.get_mark()539 if self.add_indent(self.column): 540 mark = self.get_mark() 546 541 self.tokens.append(BlockMappingStartToken(mark, mark)) 547 542 … … 553 548 554 549 # Add KEY. 555 start_mark = self. reader.get_mark()556 self. reader.forward()557 end_mark = self. reader.get_mark()550 start_mark = self.get_mark() 551 self.forward() 552 end_mark = self.get_mark() 558 553 self.tokens.append(KeyToken(start_mark, end_mark)) 559 554 … … 592 587 raise ScannerError(None, None, 593 588 "mapping values are not allowed here", 594 self. reader.get_mark())589 self.get_mark()) 595 590 596 591 # Simple keys are allowed after ':' in the block context. … … 601 596 602 597 # Add VALUE. 603 start_mark = self. reader.get_mark()604 self. reader.forward()605 end_mark = self. reader.get_mark()598 start_mark = self.get_mark() 599 self.forward() 600 end_mark = self.get_mark() 606 601 self.tokens.append(ValueToken(start_mark, end_mark)) 607 602 … … 692 687 # DIRECTIVE: ^ '%' ... 693 688 # The '%' indicator is already checked. 694 if self. reader.column == 0:689 if self.column == 0: 695 690 return True 696 691 … … 698 693 699 694 # DOCUMENT-START: ^ '---' (' '|'\n') 700 if self. reader.column == 0:701 if self. reader.prefix(3) == u'---' \702 and self. reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':695 if self.column == 0: 696 if self.prefix(3) == u'---' \ 697 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 703 698 return True 704 699 … … 706 701 707 702 # DOCUMENT-END: ^ '...' (' '|'\n') 708 if self. reader.column == 0:709 prefix = self. reader.peek(4)710 if self. reader.prefix(3) == u'...' \711 and self. reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':703 if self.column == 0: 704 prefix = self.peek(4) 705 if self.prefix(3) == u'...' \ 706 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 712 707 return True 713 708 … … 715 710 716 711 # BLOCK-ENTRY: '-' (' '|'\n') 717 return self. reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'712 return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 718 713 719 714 def check_key(self): … … 725 720 # KEY(block context): '?' (' '|'\n') 726 721 else: 727 return self. reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'722 return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 728 723 729 724 def check_value(self): … … 735 730 # VALUE(block context): ':' (' '|'\n') 736 731 else: 737 return self. reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'732 return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 738 733 739 734 def check_plain(self): … … 751 746 # '-' character) because we want the flow context to be space 752 747 # independent. 753 ch = self. reader.peek()748 ch = self.peek() 754 749 return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ 755 or (self. reader.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'750 or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' 756 751 and (ch == u'-' or (not self.flow_level and ch in u'?:'))) 757 752 … … 778 773 # Scanners for block, flow, and plain scalars need to be modified. 779 774 780 if self. reader.index == 0 and self.reader.peek() == u'\uFEFF':781 self. reader.forward()775 if self.index == 0 and self.peek() == u'\uFEFF': 776 self.forward() 782 777 found = False 783 778 while not found: 784 while self. reader.peek() == u' ':785 self. reader.forward()786 if self. reader.peek() == u'#':787 while self. reader.peek() not in u'\0\r\n\x85\u2028\u2029':788 self. reader.forward()779 while self.peek() == u' ': 780 self.forward() 781 if self.peek() == u'#': 782 while self.peek() not in u'\0\r\n\x85\u2028\u2029': 783 self.forward() 789 784 if self.scan_line_break(): 790 785 if not self.flow_level: … … 795 790 def scan_directive(self): 796 791 # See the specification for details. 797 start_mark = self. reader.get_mark()798 self. reader.forward()792 start_mark = self.get_mark() 793 self.forward() 799 794 name = self.scan_directive_name(start_mark) 800 795 value = None 801 796 if name == u'YAML': 802 797 value = self.scan_yaml_directive_value(start_mark) 803 end_mark = self. reader.get_mark()798 end_mark = self.get_mark() 804 799 elif name == u'TAG': 805 800 value = self.scan_tag_directive_value(start_mark) 806 end_mark = self. reader.get_mark()801 end_mark = self.get_mark() 807 802 else: 808 end_mark = self. reader.get_mark()809 while self. reader.peek() not in u'\0\r\n\x85\u2028\u2029':810 self. reader.forward()803 end_mark = self.get_mark() 804 while self.peek() not in u'\0\r\n\x85\u2028\u2029': 805 self.forward() 811 806 self.scan_directive_ignored_line(start_mark) 812 807 return DirectiveToken(name, value, start_mark, end_mark) … … 815 810 # See the specification for details. 816 811 length = 0 817 ch = self. reader.peek(length)812 ch = self.peek(length) 818 813 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 819 814 or ch in u'-_': 820 815 length += 1 821 ch = self. reader.peek(length)816 ch = self.peek(length) 822 817 if not length: 823 818 raise ScannerError("while scanning a directive", start_mark, 824 819 "expected alphabetic or numeric character, but found %r" 825 % ch.encode('utf-8'), self. reader.get_mark())826 value = self. reader.prefix(length)827 self. reader.forward(length)828 ch = self. reader.peek()820 % ch.encode('utf-8'), self.get_mark()) 821 value = self.prefix(length) 822 self.forward(length) 823 ch = self.peek() 829 824 if ch not in u'\0 \r\n\x85\u2028\u2029': 830 825 raise ScannerError("while scanning a directive", start_mark, 831 826 "expected alphabetic or numeric character, but found %r" 832 % ch.encode('utf-8'), self. reader.get_mark())827 % ch.encode('utf-8'), self.get_mark()) 833 828 return value 834 829 835 830 def scan_yaml_directive_value(self, start_mark): 836 831 # See the specification for details. 837 while self. reader.peek() == u' ':838 self. reader.forward()832 while self.peek() == u' ': 833 self.forward() 839 834 major = self.scan_yaml_directive_number(start_mark) 840 if self. reader.peek() != '.':835 if self.peek() != '.': 841 836 raise ScannerError("while scanning a directive", start_mark, 842 837 "expected a digit or '.', but found %r" 843 % self. reader.peek().encode('utf-8'),844 self. reader.get_mark())845 self. reader.forward()838 % self.peek().encode('utf-8'), 839 self.get_mark()) 840 self.forward() 846 841 minor = self.scan_yaml_directive_number(start_mark) 847 if self. reader.peek() not in u'\0 \r\n\x85\u2028\u2029':842 if self.peek() not in u'\0 \r\n\x85\u2028\u2029': 848 843 raise ScannerError("while scanning a directive", start_mark, 849 844 "expected a digit or ' ', but found %r" 850 % self. reader.peek().encode('utf-8'),851 self. reader.get_mark())845 % self.peek().encode('utf-8'), 846 self.get_mark()) 852 847 return (major, minor) 853 848 854 849 def scan_yaml_directive_number(self, start_mark): 855 850 # See the specification for details. 856 ch = self. reader.peek()851 ch = self.peek() 857 852 if not (u'0' <= ch <= '9'): 858 853 raise ScannerError("while scanning a directive", start_mark, 859 854 "expected a digit, but found %r" % ch.encode('utf-8'), 860 self. reader.get_mark())855 self.get_mark()) 861 856 length = 0 862 while u'0' <= self. reader.peek(length) <= u'9':857 while u'0' <= self.peek(length) <= u'9': 863 858 length += 1 864 value = int(self. reader.prefix(length))865 self. reader.forward(length)859 value = int(self.prefix(length)) 860 self.forward(length) 866 861 return value 867 862 868 863 def scan_tag_directive_value(self, start_mark): 869 864 # See the specification for details. 870 while self. reader.peek() == u' ':871 self. reader.forward()865 while self.peek() == u' ': 866 self.forward() 872 867 handle = self.scan_tag_directive_handle(start_mark) 873 while self. reader.peek() == u' ':874 self. reader.forward()868 while self.peek() == u' ': 869 self.forward() 875 870 prefix = self.scan_tag_directive_prefix(start_mark) 876 871 return (handle, prefix) … … 879 874 # See the specification for details. 880 875 value = self.scan_tag_handle('directive', start_mark) 881 ch = self. reader.peek()876 ch = self.peek() 882 877 if ch != u' ': 883 878 raise ScannerError("while scanning a directive", start_mark, 884 879 "expected ' ', but found %r" % ch.encode('utf-8'), 885 self. reader.get_mark())880 self.get_mark()) 886 881 return value 887 882 … … 889 884 # See the specification for details. 890 885 value = self.scan_tag_uri('directive', start_mark) 891 ch = self. reader.peek()886 ch = self.peek() 892 887 if ch not in u'\0 \r\n\x85\u2028\u2029': 893 888 raise ScannerError("while scanning a directive", start_mark, 894 889 "expected ' ', but found %r" % ch.encode('utf-8'), 895 self. reader.get_mark())890 self.get_mark()) 896 891 return value 897 892 898 893 def scan_directive_ignored_line(self, start_mark): 899 894 # See the specification for details. 900 while self. reader.peek() == u' ':901 self. reader.forward()902 if self. reader.peek() == u'#':903 while self. reader.peek() not in u'\0\r\n\x85\u2028\u2029':904 self. reader.forward()905 ch = self. reader.peek()895 while self.peek() == u' ': 896 self.forward() 897 if self.peek() == u'#': 898 while self.peek() not in u'\0\r\n\x85\u2028\u2029': 899 self.forward() 900 ch = self.peek() 906 901 if ch not in u'\0\r\n\x85\u2028\u2029': 907 902 raise ScannerError("while scanning a directive", start_mark, 908 903 "expected a comment or a line break, but found %r" 909 % ch.encode('utf-8'), self. reader.get_mark())904 % ch.encode('utf-8'), self.get_mark()) 910 905 self.scan_line_break() 911 906 … … 919 914 # [ *alias , "value" ] 920 915 # Therefore we restrict aliases to numbers and ASCII letters. 921 start_mark = self. reader.get_mark()922 indicator = self. reader.peek()916 start_mark = self.get_mark() 917 indicator = self.peek() 923 918 if indicator == '*': 924 919 name = 'alias' 925 920 else: 926 921 name = 'anchor' 927 self. reader.forward()922 self.forward() 928 923 length = 0 929 ch = self. reader.peek(length)924 ch = self.peek(length) 930 925 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 931 926 or ch in u'-_': 932 927 length += 1 933 ch = self. reader.peek(length)928 ch = self.peek(length) 934 929 if not length: 935 930 raise ScannerError("while scanning an %s" % name, start_mark, 936 931 "expected alphabetic or numeric character, but found %r" 937 % ch.encode('utf-8'), self. reader.get_mark())938 value = self. reader.prefix(length)939 self. reader.forward(length)940 ch = self. reader.peek()932 % ch.encode('utf-8'), self.get_mark()) 933 value = self.prefix(length) 934 self.forward(length) 935 ch = self.peek() 941 936 if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': 942 937 raise ScannerError("while scanning an %s" % name, start_mark, 943 938 "expected alphabetic or numeric character, but found %r" 944 % ch.encode('utf-8'), self. reader.get_mark())945 end_mark = self. reader.get_mark()939 % ch.encode('utf-8'), self.get_mark()) 940 end_mark = self.get_mark() 946 941 return TokenClass(value, start_mark, end_mark) 947 942 948 943 def scan_tag(self): 949 944 # See the specification for details. 950 start_mark = self. reader.get_mark()951 ch = self. reader.peek(1)945 start_mark = self.get_mark() 946 ch = self.peek(1) 952 947 if ch == u'<': 953 948 handle = None 954 self. reader.forward(2)949 self.forward(2) 955 950 suffix = self.scan_tag_uri('tag', start_mark) 956 if self. reader.peek() != u'>':951 if self.peek() != u'>': 957 952 raise ScannerError("while parsing a tag", start_mark, 958 "expected '>', but found %r" % self. reader.peek().encode('utf-8'),959 self. reader.get_mark())960 self. reader.forward()953 "expected '>', but found %r" % self.peek().encode('utf-8'), 954 self.get_mark()) 955 self.forward() 961 956 elif ch in u'\0 \t\r\n\x85\u2028\u2029': 962 957 handle = None 963 958 suffix = u'!' 964 self. reader.forward()959 self.forward() 965 960 else: 966 961 length = 1 … … 971 966 break 972 967 length += 1 973 ch = self. reader.peek(length)968 ch = self.peek(length) 974 969 handle = u'!' 975 970 if use_handle: … … 977 972 else: 978 973 handle = u'!' 979 self. reader.forward()974 self.forward() 980 975 suffix = self.scan_tag_uri('tag', start_mark) 981 ch = self. reader.peek()976 ch = self.peek() 982 977 if ch not in u'\0 \r\n\x85\u2028\u2029': 983 978 raise ScannerError("while scanning a tag", start_mark, 984 979 "expected ' ', but found %r" % ch.encode('utf-8'), 985 self. reader.get_mark())980 self.get_mark()) 986 981 value = (handle, suffix) 987 end_mark = self. reader.get_mark()982 end_mark = self.get_mark() 988 983 return TagToken(value, start_mark, end_mark) 989 984 … … 997 992 998 993 chunks = [] 999 start_mark = self. reader.get_mark()994 start_mark = self.get_mark() 1000 995 1001 996 # Scan the header. 1002 self. reader.forward()997 self.forward() 1003 998 chomping, increment = self.scan_block_scalar_indicators(start_mark) 1004 999 self.scan_block_scalar_ignored_line(start_mark) … … 1017 1012 1018 1013 # Scan the inner part of the block scalar. 1019 while self. reader.column == indent and self.reader.peek() != u'\0':1014 while self.column == indent and self.peek() != u'\0': 1020 1015 chunks.extend(breaks) 1021 leading_non_space = self. reader.peek() not in u' \t'1016 leading_non_space = self.peek() not in u' \t' 1022 1017 length = 0 1023 while self. reader.peek(length) not in u'\0\r\n\x85\u2028\u2029':1018 while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': 1024 1019 length += 1 1025 chunks.append(self. reader.prefix(length))1026 self. reader.forward(length)1020 chunks.append(self.prefix(length)) 1021 self.forward(length) 1027 1022 line_break = self.scan_line_break() 1028 1023 breaks, end_mark = self.scan_block_scalar_breaks(indent) 1029 if self. reader.column == indent and self.reader.peek() != u'\0':1024 if self.column == indent and self.peek() != u'\0': 1030 1025 1031 1026 # Unfortunately, folding rules are ambiguous. … … 1034 1029 1035 1030 if folded and line_break == u'\n' \ 1036 and leading_non_space and self. reader.peek() not in u' \t':1031 and leading_non_space and self.peek() not in u' \t': 1037 1032 if not breaks: 1038 1033 chunks.append(u' ') … … 1045 1040 #if folded and line_break == u'\n': 1046 1041 # if not breaks: 1047 # if self. reader.peek() not in ' \t':1042 # if self.peek() not in ' \t': 1048 1043 # chunks.append(u' ') 1049 1044 # else: … … 1068 1063 chomping = None 1069 1064 increment = None 1070 ch = self. reader.peek()1065 ch = self.peek() 1071 1066 if ch in u'+-': 1072 1067 if ch == '+': … … 1074 1069 else: 1075 1070 chomping = False 1076 self. reader.forward()1077 ch = self. reader.peek()1071 self.forward() 1072 ch = self.peek() 1078 1073 if ch in u'0123456789': 1079 1074 increment = int(ch) … … 1081 1076 raise ScannerError("while scanning a block scalar", start_mark, 1082 1077 "expected indentation indicator in the range 1-9, but found 0", 1083 self. reader.get_mark())1084 self. reader.forward()1078 self.get_mark()) 1079 self.forward() 1085 1080 elif ch in u'0123456789': 1086 1081 increment = int(ch) … … 1088 1083 raise ScannerError("while scanning a block scalar", start_mark, 1089 1084 "expected indentation indicator in the range 1-9, but found 0", 1090 self. reader.get_mark())1091 self. reader.forward()1092 ch = self. reader.peek()1085 self.get_mark()) 1086 self.forward() 1087 ch = self.peek() 1093 1088 if ch in u'+-': 1094 1089 if ch == '+': … … 1096 1091 else: 1097 1092 chomping = False 1098 self. reader.forward()1099 ch = self. reader.peek()1093 self.forward() 1094 ch = self.peek() 1100 1095 if ch not in u'\0 \r\n\x85\u2028\u2029': 1101 1096 raise ScannerError("while scanning a block scalar", start_mark, 1102 1097 "expected chomping or indentation indicators, but found %r" 1103 % ch.encode('utf-8'), self. reader.get_mark())1098 % ch.encode('utf-8'), self.get_mark()) 1104 1099 return chomping, increment 1105 1100 1106 1101 def scan_block_scalar_ignored_line(self, start_mark): 1107 1102 # See the specification for details. 1108 while self. reader.peek() == u' ':1109 self. reader.forward()1110 if self. reader.peek() == u'#':1111 while self. reader.peek() not in u'\0\r\n\x85\u2028\u2029':1112 self. reader.forward()1113 ch = self. reader.peek()1103 while self.peek() == u' ': 1104 self.forward() 1105 if self.peek() == u'#': 1106 while self.peek() not in u'\0\r\n\x85\u2028\u2029': 1107 self.forward() 1108 ch = self.peek() 1114 1109 if ch not in u'\0\r\n\x85\u2028\u2029': 1115 1110 raise ScannerError("while scanning a block scalar", start_mark, 1116 1111 "expected a comment or a line break, but found %r" 1117 % ch.encode('utf-8'), self. reader.get_mark())1112 % ch.encode('utf-8'), self.get_mark()) 1118 1113 self.scan_line_break() 1119 1114 … … 1122 1117 chunks = [] 1123 1118 max_indent = 0 1124 end_mark = self. reader.get_mark()1125 while self. reader.peek() in u' \r\n\x85\u2028\u2029':1126 if self. reader.peek() != u' ':1119 end_mark = self.get_mark() 1120 while self.peek() in u' \r\n\x85\u2028\u2029': 1121 if self.peek() != u' ': 1127 1122 chunks.append(self.scan_line_break()) 1128 end_mark = self. reader.get_mark()1123 end_mark = self.get_mark() 1129 1124 else: 1130 self. reader.forward()1131 if self. reader.column > max_indent:1132 max_indent = self. reader.column1125 self.forward() 1126 if self.column > max_indent: 1127 max_indent = self.column 1133 1128 return chunks, max_indent, end_mark 1134 1129 … … 1136 1131 # See the specification for details. 1137 1132 chunks = [] 1138 end_mark = self. reader.get_mark()1139 while self. reader.column < indent and self.reader.peek() == u' ':1140 self. reader.forward()1141 while self. reader.peek() in u'\r\n\x85\u2028\u2029':1133 end_mark = self.get_mark() 1134 while self.column < indent and self.peek() == u' ': 1135 self.forward() 1136 while self.peek() in u'\r\n\x85\u2028\u2029': 1142 1137 chunks.append(self.scan_line_break()) 1143 end_mark = self. reader.get_mark()1144 while self. reader.column < indent and self.reader.peek() == u' ':1145 self. reader.forward()1138 end_mark = self.get_mark() 1139 while self.column < indent and self.peek() == u' ': 1140 self.forward() 1146 1141 return chunks, end_mark 1147 1142 … … 1158 1153 double = False 1159 1154 chunks = [] 1160 start_mark = self. reader.get_mark()1161 quote = self. reader.peek()1162 self. reader.forward()1155 start_mark = self.get_mark() 1156 quote = self.peek() 1157 self.forward() 1163 1158 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) 1164 while self. reader.peek() != quote:1159 while self.peek() != quote: 1165 1160 chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) 1166 1161 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) 1167 self. reader.forward()1168 end_mark = self. reader.get_mark()1162 self.forward() 1163 end_mark = self.get_mark() 1169 1164 return ScalarToken(u''.join(chunks), False, start_mark, end_mark, 1170 1165 style) … … 1201 1196 while True: 1202 1197 length = 0 1203 while self. reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':1198 while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': 1204 1199 length += 1 1205 1200 if length: 1206 chunks.append(self. reader.prefix(length))1207 self. reader.forward(length)1208 ch = self. reader.peek()1209 if not double and ch == u'\'' and self. reader.peek(1) == u'\'':1201 chunks.append(self.prefix(length)) 1202 self.forward(length) 1203 ch = self.peek() 1204 if not double and ch == u'\'' and self.peek(1) == u'\'': 1210 1205 chunks.append(u'\'') 1211 self. reader.forward(2)1206 self.forward(2) 1212 1207 elif (double and ch == u'\'') or (not double and ch in u'\"\\'): 1213 1208 chunks.append(ch) 1214 self. reader.forward()1209 self.forward() 1215 1210 elif double and ch == u'\\': 1216 self. reader.forward()1217 ch = self. reader.peek()1211 self.forward() 1212 ch = self.peek() 1218 1213 if ch in self.ESCAPE_REPLACEMENTS: 1219 1214 chunks.append(self.ESCAPE_REPLACEMENTS[ch]) 1220 self. reader.forward()1215 self.forward() 1221 1216 elif ch in self.ESCAPE_CODES: 1222 1217 length = self.ESCAPE_CODES[ch] 1223 self. reader.forward()1218 self.forward() 1224 1219 for k in range(length): 1225 if self. reader.peek(k) not in u'0123456789ABCDEFabcdef':1220 if self.peek(k) not in u'0123456789ABCDEFabcdef': 1226 1221 raise ScannerError("while scanning a double-quoted scalar", start_mark, 1227 1222 "expected escape sequence of %d hexdecimal numbers, but found %r" % 1228 (length, self. reader.peek(k).encode('utf-8')), self.reader.get_mark())1229 code = int(self. reader.prefix(length), 16)1223 (length, self.peek(k).encode('utf-8')), self.get_mark()) 1224 code = int(self.prefix(length), 16) 1230 1225 chunks.append(unichr(code)) 1231 self. reader.forward(length)1226 self.forward(length) 1232 1227 elif ch in u'\r\n\x85\u2028\u2029': 1233 1228 self.scan_line_break() … … 1235 1230 else: 1236 1231 raise ScannerError("while scanning a double-quoted scalar", start_mark, 1237 "found unknown escape character %r" % ch.encode('utf-8'), self. reader.get_mark())1232 "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) 1238 1233 else: 1239 1234 return chunks … … 1243 1238 chunks = [] 1244 1239 length = 0 1245 while self. reader.peek(length) in u' \t':1240 while self.peek(length) in u' \t': 1246 1241 length += 1 1247 whitespaces = self. reader.prefix(length)1248 self. reader.forward(length)1249 ch = self. reader.peek()1242 whitespaces = self.prefix(length) 1243 self.forward(length) 1244 ch = self.peek() 1250 1245 if ch == u'\0': 1251 1246 raise ScannerError("while scanning a quoted scalar", start_mark, 1252 "found unexpected end of stream", self. reader.get_mark())1247 "found unexpected end of stream", self.get_mark()) 1253 1248 elif ch in u'\r\n\x85\u2028\u2029': 1254 1249 line_break = self.scan_line_break() … … 1269 1264 # Instead of checking indentation, we check for document 1270 1265 # separators. 1271 prefix = self. reader.prefix(3)1266 prefix = self.prefix(3) 1272 1267 if (prefix == u'---' or prefix == u'...') \ 1273 and self. reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1268 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 1274 1269 raise ScannerError("while scanning a quoted scalar", start_mark, 1275 "found unexpected document separator", self. reader.get_mark())1276 while self. reader.peek() in u' \t':1277 self. reader.forward()1278 if self. reader.peek() in u'\r\n\x85\u2028\u2029':1270 "found unexpected document separator", self.get_mark()) 1271 while self.peek() in u' \t': 1272 self.forward() 1273 if self.peek() in u'\r\n\x85\u2028\u2029': 1279 1274 chunks.append(self.scan_line_break()) 1280 1275 else: … … 1288 1283 # Indentation rules are loosed for the flow context. 1289 1284 chunks = [] 1290 start_mark = self. reader.get_mark()1285 start_mark = self.get_mark() 1291 1286 end_mark = start_mark 1292 1287 indent = self.indent+1 … … 1298 1293 while True: 1299 1294 length = 0 1300 if self. reader.peek() == u'#':1295 if self.peek() == u'#': 1301 1296 break 1302 1297 while True: 1303 ch = self. reader.peek(length)1298 ch = self.peek(length) 1304 1299 if ch in u'\0 \t\r\n\x85\u2028\u2029' \ 1305 1300 or (not self.flow_level and ch == u':' and 1306 self. reader.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \1301 self.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \ 1307 1302 or (self.flow_level and ch in u',:?[]{}'): 1308 1303 break … … 1312 1307 self.allow_simple_key = False 1313 1308 chunks.extend(spaces) 1314 chunks.append(self. reader.prefix(length))1315 self. reader.forward(length)1316 end_mark = self. reader.get_mark()1309 chunks.append(self.prefix(length)) 1310 self.forward(length) 1311 end_mark = self.get_mark() 1317 1312 spaces = self.scan_plain_spaces(indent, start_mark) 1318 if not spaces or self. reader.peek() == u'#' \1319 or (not self.flow_level and self. reader.column < indent):1313 if not spaces or self.peek() == u'#' \ 1314 or (not self.flow_level and self.column < indent): 1320 1315 break 1321 1316 return ScalarToken(u''.join(chunks), True, start_mark, end_mark) … … 1327 1322 chunks = [] 1328 1323 length = 0 1329 while self. reader.peek(length) in u' ':1324 while self.peek(length) in u' ': 1330 1325 length += 1 1331 whitespaces = self. reader.prefix(length)1332 self. reader.forward(length)1333 ch = self. reader.peek()1326 whitespaces = self.prefix(length) 1327 self.forward(length) 1328 ch = self.peek() 1334 1329 if ch in u'\r\n\x85\u2028\u2029': 1335 1330 line_break = self.scan_line_break() 1336 1331 self.allow_simple_key = True 1337 prefix = self. reader.prefix(3)1332 prefix = self.prefix(3) 1338 1333 if (prefix == u'---' or prefix == u'...') \ 1339 and self. reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1334 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 1340 1335 return 1341 1336 breaks = [] 1342 while self. reader.peek() in u' \r\n\x85\u2028\u2029':1343 if self. reader.peek() == ' ':1344 self. reader.forward()1337 while self.peek() in u' \r\n\x85\u2028\u2029': 1338 if self.peek() == ' ': 1339 self.forward() 1345 1340 else: 1346 1341 breaks.append(self.scan_line_break()) 1347 prefix = self. reader.prefix(3)1342 prefix = self.prefix(3) 1348 1343 if (prefix == u'---' or prefix == u'...') \ 1349 and self. reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1344 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 1350 1345 return 1351 1346 if line_break != u'\n': … … 1362 1357 # For some strange reasons, the specification does not allow '_' in 1363 1358 # tag handles. I have allowed it anyway. 1364 ch = self. reader.peek()1359 ch = self.peek() 1365 1360 if ch != u'!': 1366 1361 raise ScannerError("while scanning a %s" % name, start_mark, 1367 1362 "expected '!', but found %r" % ch.encode('utf-8'), 1368 self. reader.get_mark())1363 self.get_mark()) 1369 1364 length = 1 1370 ch = self. reader.peek(length)1365 ch = self.peek(length) 1371 1366 if ch != u' ': 1372 1367 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 1373 1368 or ch in u'-_': 1374 1369 length += 1 1375 ch = self. reader.peek(length)1370 ch = self.peek(length) 1376 1371 if ch != u'!': 1377 self. reader.forward(length)1372 self.forward(length) 1378 1373 raise ScannerError("while scanning a %s" % name, start_mark, 1379 1374 "expected '!', but found %r" % ch.encode('utf-8'), 1380 self. reader.get_mark())1375 self.get_mark()) 1381 1376 length += 1 1382 value = self. reader.prefix(length)1383 self. reader.forward(length)1377 value = self.prefix(length) 1378 self.forward(length) 1384 1379 return value 1385 1380 … … 1389 1384 chunks = [] 1390 1385 length = 0 1391 ch = self. reader.peek(length)1386 ch = self.peek(length) 1392 1387 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 1393 1388 or ch in u'-;/?:@&=+$,_.!~*\'()[]%': 1394 1389 if ch == u'%': 1395 chunks.append(self. reader.prefix(length))1396 self. reader.forward(length)1390 chunks.append(self.prefix(length)) 1391 self.forward(length) 1397 1392 length = 0 1398 1393 chunks.append(self.scan_uri_escapes(name, start_mark)) 1399 1394 else: 1400 1395 length += 1 1401 ch = self. reader.peek(length)1396 ch = self.peek(length) 1402 1397 if length: 1403 chunks.append(self. reader.prefix(length))1404 self. reader.forward(length)1398 chunks.append(self.prefix(length)) 1399 self.forward(length) 1405 1400 length = 0 1406 1401 if not chunks: 1407 1402 raise ScannerError("while parsing a %s" % name, start_mark, 1408 1403 "expected URI, but found %r" % ch.encode('utf-8'), 1409 self. reader.get_mark())1404 self.get_mark()) 1410 1405 return u''.join(chunks) 1411 1406 … … 1413 1408 # See the specification for details. 1414 1409 bytes = [] 1415 mark = self. reader.get_mark()1416 while self. reader.peek() == u'%':1417 self. reader.forward()1410 mark = self.get_mark() 1411 while self.peek() == u'%': 1412 self.forward() 1418 1413 for k in range(2): 1419 if self. reader.peek(k) not in u'0123456789ABCDEFabcdef':1414 if self.peek(k) not in u'0123456789ABCDEFabcdef': 1420 1415 raise ScannerError("while scanning a %s" % name, start_mark, 1421 1416 "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % 1422 (self. reader.peek(k).encode('utf-8')), self.reader.get_mark())1423 bytes.append(chr(int(self. reader.prefix(2), 16)))1424 self. reader.forward(2)1417 (self.peek(k).encode('utf-8')), self.get_mark()) 1418 bytes.append(chr(int(self.prefix(2), 16))) 1419 self.forward(2) 1425 1420 try: 1426 1421 value = unicode(''.join(bytes), 'utf-8') … … 1438 1433 # '\u2029 : '\u2029' 1439 1434 # default : '' 1440 ch = self. reader.peek()1435 ch = self.peek() 1441 1436 if ch in u'\r\n\x85': 1442 if self. reader.prefix(2) == u'\r\n':1443 self. reader.forward(2)1437 if self.prefix(2) == u'\r\n': 1438 self.forward(2) 1444 1439 else: 1445 self. reader.forward()1440 self.forward() 1446 1441 return u'\n' 1447 1442 elif ch in u'\u2028\u2029': 1448 self. reader.forward()1443 self.forward() 1449 1444 return ch 1450 1445 return u''
