Changeset 47
- Timestamp:
- 02/17/06 17:39:52 (7 years ago)
- Location:
- branches/pyyaml3000
- Files:
-
- 6 added
- 9 edited
- 2 copied
-
lib/yaml/parser.py (modified) (4 diffs)
-
lib/yaml/reader.py (modified) (3 diffs)
-
lib/yaml/scanner.py (modified) (25 diffs)
-
lib/yaml/tokens.py (modified) (6 diffs)
-
tests/data/forbidden-entry.error-message (added)
-
tests/data/forbidden-key.error-message (added)
-
tests/data/forbidden-value.error-message (added)
-
tests/data/invalid-character.error-message (copied) (copied from branches/pyyaml3000/tests/data/invalid-character.stream-error)
-
tests/data/invalid-simple-key.error-message (added)
-
tests/data/invalid-utf8-byte.error-message (copied) (copied from branches/pyyaml3000/tests/data/invalid-utf8-byte.stream-error)
-
tests/data/spec-10-07.data (modified) (1 diff)
-
tests/data/unclosed-bracket.error-message (added)
-
tests/test_appliance.py (modified) (3 diffs)
-
tests/test_errors.py (added)
-
tests/test_marker.py (modified) (2 diffs)
-
tests/test_tokens.py (modified) (2 diffs)
-
tests/test_yaml.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
branches/pyyaml3000/lib/yaml/parser.py
r46 r47 96 96 def parse_stream(self): 97 97 documents = [] 98 if not self.is_token(DirectiveToken, DocumentStartToken, EndToken):98 if not self.is_token(DirectiveToken, DocumentStartToken, StreamEndToken): 99 99 documents.append(self.parse_block_node()) 100 while not self.is_token( EndToken):100 while not self.is_token(StreamEndToken): 101 101 while self.is_token(DirectiveToken): 102 102 self.get_token() … … 105 105 self.get_token() 106 106 if self.is_token(DirectiveToken, 107 DocumentStartToken, DocumentEndToken, EndToken):107 DocumentStartToken, DocumentEndToken, StreamEndToken): 108 108 documents.append(None) 109 109 else: … … 111 111 while self.is_token(DocumentEndToken): 112 112 self.get_token() 113 if not self.is_token( EndToken):114 self.fail(" END is expected")113 if not self.is_token(StreamEndToken): 114 self.fail("STREAM-END is expected") 115 115 return documents 116 116 … … 285 285 def fail(self, message): 286 286 marker = self.scanner.peek_token().start_marker 287 raise Error(message+':\n'+marker.get_snippet())288 287 raise ParserError(message+':\n'+marker.get_snippet()) 288 -
branches/pyyaml3000/lib/yaml/reader.py
r46 r47 66 66 self.pointer = pointer 67 67 68 def get_snippet(self, max_length=79):68 def get_snippet(self, indent=4, max_length=75): 69 69 if self.buffer is None: 70 70 return None … … 86 86 break 87 87 snippet = self.buffer[start:end].encode('utf-8') 88 return head + snippet + tail + '\n' \ 89 + ' '*(self.pointer-start+len(head)) + '^' + '\n' 88 return ' '*indent + head + snippet + tail + '\n' \ 89 + ' '*(indent+self.pointer-start+len(head)) + '^' 90 91 def __str__(self): 92 snippet = self.get_snippet() 93 where = " in \"%s\", line %d, column %d" \ 94 % (self.name, self.line+1, self.column+1) 95 if snippet is not None: 96 where += ":\n"+snippet 97 return where 90 98 91 99 class ReaderError(YAMLError): … … 101 109 if isinstance(self.character, str): 102 110 return "'%s' codec can't decode byte #x%02x: %s\n" \ 103 " \tin '%s', position %d."\111 " in \"%s\", position %d" \ 104 112 % (self.encoding, ord(self.character), self.reason, 105 113 self.name, self.position) 106 114 else: 107 115 return "unacceptable character #x%04x: %s\n" \ 108 " \tin '%s', position %d."\116 " in \"%s\", position %d" \ 109 117 % (ord(self.character), self.reason, 110 118 self.name, self.position) -
branches/pyyaml3000/lib/yaml/scanner.py
r46 r47 24 24 # key: "valu\?e" 25 25 # ^ 26 pass 26 def __init__(self, context=None, context_marker=None, 27 problem=None, problem_marker=None, description=None): 28 self.context = context 29 self.context_marker = context_marker 30 self.problem = problem 31 self.problem_marker = problem_marker 32 self.description = description 33 34 def __str__(self): 35 lines = [] 36 for (place, marker) in [(self.context, self.context_marker), 37 (self.problem, self.problem_marker)]: 38 if place is not None: 39 lines.append(place) 40 if marker is not None: 41 lines.append(str(marker)) 42 if self.description is not None: 43 lines.append(self.description) 44 return '\n'.join(lines) 27 45 28 46 class SimpleKey: … … 140 158 # and decrease the current indentation level. 141 159 self.unwind_indent(self.reader.column) 142 143 #print144 #print self.reader.get_marker().get_snippet()145 160 146 161 # Peek the next character. … … 257 272 or self.reader.index-key.index > 1024: 258 273 if key.required: 259 self.fail("simple key is required") 274 raise ScannerError("while scanning a simple key", key.marker, 275 "could not found expected ':'", self.reader.get_marker()) 260 276 del self.possible_simple_keys[level] 261 277 … … 267 283 # Check if a simple key is required at the current position. 268 284 required = not self.flow_level and self.indent == self.reader.column 285 286 # A simple key is required only if it is the first token in the current 287 # line. Therefore it is always allowed. 288 assert self.allow_simple_key or not required 269 289 270 290 # The next token might be a simple key. Let's save it's number and … … 281 301 self.possible_simple_keys[self.flow_level] = key 282 302 283 # A simple key is required at the current position.284 elif required:285 self.fail("simple key is required")286 287 303 def remove_possible_simple_key(self): 288 304 # Remove the saved possible key position at the current flow level. 289 305 if self.flow_level in self.possible_simple_keys: 290 306 key = self.possible_simple_keys[self.flow_level] 291 if key.required: 292 self.fail("simple key is required") 307 308 # I don't think it's possible, but I could be wrong. 309 assert not key.required 310 #if key.required: 311 # raise ScannerError("while scanning a simple key", key.marker, 312 # "could not found expected ':'", self.reader.get_marker()) 293 313 294 314 # Indentation functions. … … 297 317 298 318 # In flow context, tokens should respect indentation. 319 # Actually the condition should be `self.indent >= column` according to 320 # the spec. But this condition will prohibit intuitively correct 321 # constructions such as 322 # key : { 323 # } 299 324 if self.flow_level and self.indent > column: 300 self.fail("invalid intendation in the flow context") 325 raise ScannerError(None, None, 326 "invalid intendation or unclosed '[' or '{'", 327 self.reader.get_marker()) 301 328 302 329 # In block context, we may need to issue the BLOCK-END tokens. … … 329 356 330 357 # Add END. 331 self.tokens.append( EndToken(marker, marker))358 self.tokens.append(StreamEndToken(marker, marker)) 332 359 333 360 # The reader is ended. … … 344 371 345 372 # Scan and add DIRECTIVE. 346 self. scan_directive()373 self.tokens.append(self.scan_directive()) 347 374 348 375 def fetch_document_start(self): … … 421 448 # Are we allowed to start a new entry? 422 449 if not self.allow_simple_key: 423 self.fail("Cannot start a new entry here") 450 raise ScannerError(None, None, 451 "sequence entries are not allowed here", 452 self.reader.get_marker()) 424 453 425 454 # We may need to add BLOCK-SEQUENCE-START. … … 447 476 # Are we allowed to start a key (not nessesary a simple)? 448 477 if not self.allow_simple_key: 449 self.fail("Cannot start a new key here") 478 raise ScannerError(None, None, 479 "mapping keys are not allowed here", 480 self.reader.get_marker()) 450 481 451 482 # We may need to add BLOCK-MAPPING-START. … … 490 521 else: 491 522 523 # Block context needs additional checks. 524 # (Do we really need them? They will be catched by the parser 525 # anyway.) 526 if not self.flow_level: 527 528 # We are allowed to start a complex value if and only if 529 # we can start a simple key. 530 if not self.allow_simple_key: 531 raise ScannerError(None, None, 532 "mapping values are not allowed here", 533 self.reader.get_marker()) 534 492 535 # Simple keys are allowed after ':' in the block context. 493 536 self.allow_simple_key = not self.flow_level … … 511 554 512 555 # Scan and add ALIAS. 513 self. scan_anchor(AliasToken)556 self.tokens.append(self.scan_anchor(AliasToken)) 514 557 515 558 def fetch_anchor(self): … … 522 565 523 566 # Scan and add ANCHOR. 524 self. scan_anchor(AnchorToken)567 self.tokens.append(self.scan_anchor(AnchorToken)) 525 568 526 569 def fetch_tag(self): … … 533 576 534 577 # Scan and add TAG. 535 self. scan_tag()578 self.tokens.append(self.scan_tag()) 536 579 537 580 def fetch_literal(self): … … 550 593 551 594 # Scan and add SCALAR. 552 self. scan_block_scalar(folded)595 self.tokens.append(self.scan_block_scalar(folded)) 553 596 554 597 def fetch_single(self): … … 567 610 568 611 # Scan and add SCALAR. 569 self. scan_flow_scalar(double)612 self.tokens.append(self.scan_flow_scalar(double)) 570 613 571 614 def fetch_plain(self): … … 580 623 581 624 # Scan and add SCALAR. May change `allow_simple_key`. 582 self. scan_plain()625 self.tokens.append(self.scan_plain()) 583 626 584 627 # Checkers. … … 646 689 647 690 def scan_to_next_token(self): 691 # We ignore spaces, line breaks and comments. 692 # If we find a line break in the block context, we set the flag 693 # `allow_simple_key` on. 648 694 found = False 649 695 while not found: … … 651 697 self.reader.forward() 652 698 if self.reader.peek() == u'#': 653 while self.reader.peek() not in u'\ r\n':699 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': 654 700 self.reader.forward() 655 if self.reader.peek() in u'\r\n': 656 self.reader.forward() 701 if self.scan_line_break(): 657 702 if not self.flow_level: 658 703 self.allow_simple_key = True … … 663 708 marker = self.reader.get_marker() 664 709 if self.reader.peek(5) == u'%YAML ': 665 self.tokens.append(YAMLDirectiveToken(1, 1, marker, marker))710 token = YAMLDirectiveToken(1, 1, marker, marker) 666 711 elif self.reader.peek(4) == u'%TAG ': 667 self.tokens.append(TagDirectiveToken(marker, marker))712 token = TagDirectiveToken(marker, marker) 668 713 else: 669 self.tokens.append(ReservedDirectiveToken('', marker, marker))714 token = ReservedDirectiveToken('', marker, marker) 670 715 while self.reader.peek() not in u'\0\r\n': 671 716 self.reader.forward() 672 717 self.reader.forward() 718 return token 673 719 674 720 def scan_anchor(self, TokenClass): … … 677 723 self.reader.forward() 678 724 end_marker = self.reader.get_marker() 679 self.tokens.append(TokenClass('', start_marker, end_marker))725 return TokenClass('', start_marker, end_marker) 680 726 681 727 def scan_tag(self): … … 684 730 self.reader.forward() 685 731 end_marker = self.reader.get_marker() 686 self.tokens.append(TagToken('', start_marker, end_marker))732 return TagToken('', start_marker, end_marker) 687 733 688 734 def scan_block_scalar(self, folded): … … 702 748 if count < indent and self.reader.peek() not in u'#\r\n\x85\u2028\u2029': 703 749 break 704 self.tokens.append(ScalarToken('', False, start_marker, start_marker))750 return ScalarToken('', False, start_marker, start_marker) 705 751 706 752 def scan_flow_scalar(self, double): … … 716 762 self.reader.forward(1) 717 763 self.reader.forward(1) 718 self.tokens.append(ScalarToken('', False, marker, marker))764 return ScalarToken('', False, marker, marker) 719 765 720 766 def scan_plain(self): … … 748 794 break 749 795 space = True 750 self.tokens.append(ScalarToken('', True, marker, marker)) 796 return ScalarToken('', True, marker, marker) 797 798 def scan_line_break(self): 799 # Transforms: 800 # '\r\n' : '\n' 801 # '\r' : '\n' 802 # '\n' : '\n' 803 # '\x85' : '\n' 804 # '\u2028' : '\u2028' 805 # '\u2029 : '\u2029' 806 # default : '' 807 ch = self.reader.peek() 808 if ch in u'\r\n\x85': 809 if self.reader.peek(2) == u'\r\n': 810 self.forward(2) 811 else: 812 self.reader.forward() 813 return u'\n' 814 elif ch in u'\u2028\u2029': 815 self.reader.forward() 816 return ch 817 return u'' 751 818 752 819 def invalid_token(self): 753 820 self.fail("invalid token") 754 755 def fail(self, message):756 raise ScannerError(message)757 821 758 822 #try: -
branches/pyyaml3000/lib/yaml/tokens.py
r46 r47 6 6 7 7 class DirectiveToken(Token): 8 pass8 code = '<directive>' 9 9 10 10 class YAMLDirectiveToken(DirectiveToken): 11 code = '<%YAML directive>' 11 12 def __init__(self, major_version, minor_version, start_marker, end_marker): 12 13 self.major_version = major_version … … 16 17 17 18 class TagDirectiveToken(DirectiveToken): 18 pass19 code = '<%TAG directive>' 19 20 20 21 class ReservedDirectiveToken(DirectiveToken): 22 code = '<unknown directive>' 21 23 def __init__(self, name, start_marker, end_marker): 22 24 self.name = name … … 25 27 26 28 class DocumentStartToken(Token): 27 pass29 code = '<document start>' 28 30 29 31 class DocumentEndToken(Token): 30 pass32 code = '<document end>' 31 33 32 class EndToken(Token):33 pass34 class StreamEndToken(Token): 35 code = '<stream end>' 34 36 35 37 class BlockSequenceStartToken(Token): 36 pass38 code = '<block sequence start>' 37 39 38 40 class BlockMappingStartToken(Token): 39 pass41 code = '<block mapping end>' 40 42 41 43 class BlockEndToken(Token): 42 pass44 code = '<block end>' 43 45 44 46 class FlowSequenceStartToken(Token): 45 pass47 code = '[' 46 48 47 49 class FlowMappingStartToken(Token): 48 pass50 code = '{' 49 51 50 52 class FlowSequenceEndToken(Token): 51 pass53 code = ']' 52 54 53 55 class FlowMappingEndToken(Token): 54 pass56 code = '}' 55 57 56 58 class KeyToken(Token): 57 pass59 code = '?' 58 60 59 61 class ValueToken(Token): 60 pass62 code = ':' 61 63 62 64 class EntryToken(Token): 63 pass65 code = '- or ,' 64 66 65 67 class AliasToken(Token): 68 code = '<alias>' 66 69 def __init__(self, value, start_marker, end_marker): 67 70 self.value = value … … 70 73 71 74 class AnchorToken(Token): 75 code = '<anchor>' 72 76 def __init__(self, value, start_marker, end_marker): 73 77 self.value = value … … 76 80 77 81 class TagToken(Token): 82 code = '<tag>' 78 83 def __init__(self, value, start_marker, end_marker): 79 84 self.value = value … … 82 87 83 88 class ScalarToken(Token): 89 code = '<scalar>' 84 90 def __init__(self, value, plain, start_marker, end_marker): 85 91 self.value = value -
branches/pyyaml3000/tests/data/spec-10-07.data
r44 r47 1 1 { 2 #? : value # Empty key 3 ? ~ : value, # Empty key 2 ? : value, # Empty key 4 3 ? explicit 5 4 key: value, -
branches/pyyaml3000/tests/test_appliance.py
r45 r47 70 70 return "%s(%s)" % (self.__class__.__name__, ''.join(args)) 71 71 72 class EndToken(Token):72 class StreamEndToken(Token): 73 73 pass 74 74 … … 133 133 ch = self.data[self.index] 134 134 if ch == u'\0': 135 tokens.append( EndToken())135 tokens.append(StreamEndToken()) 136 136 break 137 137 elif ch == u'%': … … 286 286 def parse_stream(self): 287 287 documents = [] 288 while not self.test_token( EndToken):288 while not self.test_token(StreamEndToken): 289 289 if self.test_token(DirectiveToken, DocumentStartToken): 290 290 documents.append(self.parse_document()) -
branches/pyyaml3000/tests/test_marker.py
r46 r47 20 20 index += 1 21 21 marker = Marker(test_name, line, column, unicode(input), index) 22 snippet = marker.get_snippet( )22 snippet = marker.get_snippet(indent=2, max_length=79) 23 23 #print "INPUT:" 24 24 #print input … … 26 26 #print snippet 27 27 self.failUnless(isinstance(snippet, str)) 28 self.failUnlessEqual(snippet.count('\n'), 2)29 data, pointer , dummy= snippet.split('\n')30 self.failUnless(len(data) < 8 0)28 self.failUnlessEqual(snippet.count('\n'), 1) 29 data, pointer = snippet.split('\n') 30 self.failUnless(len(data) < 82) 31 31 self.failUnlessEqual(data[len(pointer)-1], '*') 32 32 -
branches/pyyaml3000/tests/test_tokens.py
r46 r47 55 55 scanner = Scanner(Reader(file(data_filename, 'rb'))) 56 56 tokens1 = [] 57 while not isinstance(scanner.peek_token(), EndToken):57 while not isinstance(scanner.peek_token(), StreamEndToken): 58 58 tokens1.append(scanner.get_token()) 59 59 tokens1 = [self.replaces[t.__class__] for t in tokens1] … … 77 77 scanner = Scanner(Reader(file(filename, 'rb'))) 78 78 tokens = [] 79 while not isinstance(scanner.peek_token(), EndToken):79 while not isinstance(scanner.peek_token(), StreamEndToken): 80 80 tokens.append(scanner.get_token().__class__.__name__) 81 81 except: -
branches/pyyaml3000/tests/test_yaml.py
r46 r47 7 7 from test_tokens import * 8 8 from test_structure import * 9 from test_errors import * 9 10 10 11 def main(module='__main__'):
Note: See TracChangeset
for help on using the changeset viewer.
