Changeset 51
- Timestamp:
- 02/19/06 17:17:28 (7 years ago)
- Location:
- branches/pyyaml3000
- Files:
-
- 1 added
- 16 edited
-
lib/yaml/__init__.py (modified) (1 diff)
-
lib/yaml/events.py (added)
-
lib/yaml/parser.py (modified) (5 diffs)
-
lib/yaml/scanner.py (modified) (15 diffs)
-
lib/yaml/tokens.py (modified) (5 diffs)
-
tests/data/spec-05-08.canonical (modified) (1 diff)
-
tests/data/spec-06-01.canonical (modified) (1 diff)
-
tests/data/spec-09-20.canonical (modified) (1 diff)
-
tests/data/spec-09-30.canonical (modified) (1 diff)
-
tests/data/spec-09-31.canonical (modified) (1 diff)
-
tests/data/spec-09-32.canonical (modified) (1 diff)
-
tests/data/spec-09-33.canonical (modified) (1 diff)
-
tests/test_appliance.py (modified) (13 diffs)
-
tests/test_canonical.py (modified) (1 diff)
-
tests/test_errors.py (modified) (1 diff)
-
tests/test_structure.py (modified) (5 diffs)
-
tests/test_tokens.py (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/pyyaml3000/lib/yaml/__init__.py
r39 r51 1 2 from reader import Reader 3 from scanner import Scanner 4 from parser import Parser 5 6 from tokens import * 7 from events import * 8 9 def scan(data, Reader=Reader, Scanner=Scanner): 10 reader = Reader(data) 11 scanner = Scanner(reader) 12 return iter(scanner) 13 14 def parse(data, Reader=Reader, Scanner=Scanner, Parser=Parser): 15 reader = Reader(data) 16 scanner = Scanner(reader) 17 parser = Parser(scanner) 18 return iter(parser) 19 -
branches/pyyaml3000/lib/yaml/parser.py
r47 r51 1 1 2 # Production rules: 3 # stream ::= implicit_document? explicit_document* END 2 # YAML can be parsed by an LL(1) parser! 3 # 4 # We use the following production rules: 5 # stream ::= implicit_document? explicit_document* STREAM-END 4 6 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END? 5 7 # implicit_document ::= block_node DOCUMENT-END? … … 10 12 # flow_content ::= flow_collection | SCALAR 11 13 # block_collection ::= block_sequence | block_mapping 12 # block_sequence ::= BLOCK-SEQUENCE-START ( ENTRY block_node?)* BLOCK-END14 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END 13 15 # block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END 14 16 # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence) 15 # indentless_block_sequence ::= ( ENTRY block_node?)+17 # indentless_block_sequence ::= (BLOCK-ENTRY block_node?)+ 16 18 # flow_collection ::= flow_sequence | flow_mapping 17 # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END 18 # flow_mapping ::= FLOW-MAPPING-START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW-MAPPING-END 19 # flow_sequence_entry ::= flow_node | KEY flow_node (VALUE flow_node?)? 20 # flow_mapping_entry ::= flow_node | KEY flow_node (VALUE flow_node?)? 21 22 # FIRST(rule) sets: 23 # stream: {} 19 # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END 20 # flow_mapping ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END 21 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? 22 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? 23 # 24 # Note that there is a slight deviation from the specification. We require a 25 # non-empty node content if ANCHOR or TAG is specified. This disallow such 26 # documents as 27 # 28 # key: !!str # empty value 29 # 30 # This is done to prevent ambiguity in parsing tags and aliases: 31 # 32 # { !!perl/YAML::Parser: value } 33 # 34 # What is it? Should it be interpreted as 35 # { ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value } 36 # or 37 # { ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' } 38 # Since we disallow non-empty node content, tags are always followed by spaces 39 # or line breaks. 40 41 # FIRST sets: 42 # stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START } 24 43 # explicit_document: { DIRECTIVE DOCUMENT-START } 25 # implicit_document: block_node44 # implicit_document: FIRST(block_node) 26 45 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } 27 46 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } … … 32 51 # block_sequence: { BLOCK-SEQUENCE-START } 33 52 # block_mapping: { BLOCK-MAPPING-START } 34 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START ENTRY }53 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } 35 54 # indentless_sequence: { ENTRY } 36 55 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } … … 42 61 from error import YAMLError 43 62 from tokens import * 63 from events import * 44 64 45 65 class ParserError(YAMLError): 46 pass 47 48 class Node: 49 def __repr__(self): 50 args = [] 51 for attribute in ['anchor', 'tag', 'value']: 52 if hasattr(self, attribute): 53 args.append(repr(getattr(self, attribute))) 54 return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) 55 56 class AliasNode(Node): 57 def __init__(self, anchor): 58 self.anchor = anchor 59 60 class ScalarNode(Node): 61 def __init__(self, anchor, tag, value): 62 self.anchor = anchor 63 self.tag = tag 64 self.value = value 65 66 class SequenceNode(Node): 67 def __init__(self, anchor, tag, value): 68 self.anchor = anchor 69 self.tag = tag 70 self.value = value 71 72 class MappingNode(Node): 73 def __init__(self, anchor, tag, value): 74 self.anchor = anchor 75 self.tag = tag 76 self.value = value 66 67 def __init__(self, context=None, context_marker=None, 68 problem=None, problem_marker=None): 69 self.context = context 70 self.context_marker = context_marker 71 self.problem = problem 72 self.problem_marker = problem_marker 73 74 def __str__(self): 75 lines = [] 76 for (place, marker) in [(self.context, self.context_marker), 77 (self.problem, self.problem_marker)]: 78 if place is not None: 79 lines.append(place) 80 if marker is not None: 81 lines.append(str(marker)) 82 return '\n'.join(lines) 77 83 78 84 class Parser: 85 # Since writing an LL(1) parser is a straightforward task, we do not give 86 # many comments here. 87 # Note that we use Python generators. If you rewrite the parser to another 88 # language, you may replace all 'yield'-s with event handler calls. 89 90 DEFAULT_TAGS = { 91 u'!': u'!', 92 u'!!': u'tag:yaml.org,2002:', 93 } 79 94 80 95 def __init__(self, scanner): 81 96 self.scanner = scanner 82 83 def is_token(self, *choices): 84 token = self.scanner.peek_token() 85 for choice in choices: 86 if isinstance(token, choices): 87 return True 97 self.current_event = None 98 self.yaml_version = None 99 self.tag_handles = {} 100 self.event_generator = self.parse_stream() 101 102 def check(self, *choices): 103 # Check the type of the next event. 104 if self.current_event is None: 105 try: 106 self.current_event = self.event_generator.next() 107 except StopIteration: 108 pass 109 if self.current_event is not None: 110 for choice in choices: 111 if isinstance(self.current_event, choice): 112 return True 88 113 return False 89 114 90 def get_token(self): 91 return self.scanner.get_token() 92 93 def parse(self): 94 return self.parse_stream() 115 def get(self): 116 # Get the next event. 117 if self.current_event is None: 118 try: 119 self.current_event = self.event_generator.next() 120 except StopIteration: 121 pass 122 value = self.current_event 123 self.current_event = None 124 return value 125 126 def __iter__(self): 127 # Iterator protocol. 128 return self.event_generator 95 129 96 130 def parse_stream(self): 97 documents = [] 98 if not self.is_token(DirectiveToken, DocumentStartToken, StreamEndToken): 99 documents.append(self.parse_block_node()) 100 while not self.is_token(StreamEndToken): 101 while self.is_token(DirectiveToken): 102 self.get_token() 103 if not self.is_token(DocumentStartToken): 104 self.fail('DOCUMENT-START is expected') 105 self.get_token() 106 if self.is_token(DirectiveToken, 131 # implicit_document? explicit_document* STREAM-END 132 133 # Parse implicit document. 134 if not self.scanner.check(DirectiveToken, DocumentStartToken, 135 StreamEndToken): 136 self.tag_handles = self.DEFAULT_TAGS 137 for event in self.parse_block_node(): 138 yield event 139 140 # Parse explicit documents. 141 while not self.scanner.check(StreamEndToken): 142 self.process_directives() 143 if not self.scanner.check(DocumentStartToken): 144 raise ParserError(None, None, 145 "expected '<document start>', but found %r" 146 % self.scanner.peek().id, 147 self.scanner.peek().start_marker) 148 token = self.scanner.get() 149 if self.scanner.check(DirectiveToken, 107 150 DocumentStartToken, DocumentEndToken, StreamEndToken): 108 documents.append(None) 109 else: 110 documents.append(self.parse_block_node()) 111 while self.is_token(DocumentEndToken): 112 self.get_token() 113 if not self.is_token(StreamEndToken): 114 self.fail("STREAM-END is expected") 115 return documents 151 yield self.process_empty_scalar(token.end_marker) 152 else: 153 for event in self.parse_block_node(): 154 yield event 155 while self.scanner.check(DocumentEndToken): 156 self.scanner.get() 157 158 # Parse end of stream. 159 token = self.scanner.get() 160 yield StreamEndEvent(token.start_marker, token.end_marker) 161 162 def process_directives(self): 163 # DIRECTIVE* 164 self.yaml_version = None 165 self.tag_handles = {} 166 while self.scanner.check(DirectiveToken): 167 token = self.scanner.get() 168 if token.name == u'YAML': 169 if self.yaml_version is not None: 170 raise ParserError(None, None, 171 "found duplicate YAML directive", token.start_marker()) 172 major, minor = token.value 173 if major != 1: 174 raise ParserError(None, None, 175 "found incompatible YAML document (version 1.* is required)", 176 token.start_marker()) 177 self.yaml_version = token.value 178 elif token.name == u'TAG': 179 handle, prefix = token.value 180 if handle in self.tag_handles: 181 raise ParserError(None, None, 182 "duplicate tag handle %r" % handle.encode('utf-8'), 183 token.start_marker()) 184 self.tag_handles[handle] = prefix 185 for key in self.DEFAULT_TAGS: 186 if key not in self.tag_handles: 187 self.tag_handles[key] = self.DEFAULT_TAGS[key] 116 188 117 189 def parse_block_node(self): … … 125 197 126 198 def parse_node(self, block=False, indentless_sequence=False): 127 if self.is_token(AliasToken): 128 token = self.get_token() 129 return AliasNode(token.value) 130 anchor = None 131 tag = None 132 if self.is_token(AnchorToken): 133 anchor = self.get_token().value 134 if self.is_token(TagToken): 135 tag = self.get_token().value 136 elif self.is_token(TagToken): 137 tag = self.get_token().value 138 if self.is_token(AnchorToken): 139 anchor = self.get_token().value 140 if indentless_sequence and self.is_token(EntryToken): 141 NodeClass = SequenceNode 142 value = self.parse_indentless_sequence() 199 # block_node ::= ALIAS | properties? block_content 200 # flow_node ::= ALIAS | properties? flow_content 201 # properties ::= TAG ANCHOR? | ANCHOR TAG? 202 # block_content ::= block_collection | flow_collection | SCALAR 203 # flow_content ::= flow_collection | SCALAR 204 # block_collection ::= block_sequence | block_mapping 205 # block_node_or_indentless_sequence ::= ALIAS | properties? 206 # (block_content | indentless_block_sequence) 207 if self.scanner.check(AliasToken): 208 token = self.scanner.get() 209 yield AliasEvent(token.value, token.start_marker, token.end_marker) 143 210 else: 144 if self.is_token(ScalarToken): 145 NodeClass = ScalarNode 146 elif self.is_token(BlockSequenceStartToken, FlowSequenceStartToken): 147 NodeClass = SequenceNode 148 elif self.is_token(BlockMappingStartToken, FlowMappingStartToken): 149 NodeClass = MappingNode 150 if block: 151 value = self.parse_block_content() 152 else: 153 value = self.parse_flow_content() 154 return NodeClass(anchor, tag, value) 155 156 def parse_block_content(self): 157 if self.is_token(ScalarToken): 158 return self.get_token().value 159 elif self.is_token(BlockSequenceStartToken): 160 return self.parse_block_sequence() 161 elif self.is_token(BlockMappingStartToken): 162 return self.parse_block_mapping() 163 elif self.is_token(FlowSequenceStartToken): 164 return self.parse_flow_sequence() 165 elif self.is_token(FlowMappingStartToken): 166 return self.parse_flow_mapping() 167 else: 168 self.fail('block content is expected') 169 170 def parse_flow_content(self): 171 if self.is_token(ScalarToken): 172 return self.get_token().value 173 elif self.is_token(FlowSequenceStartToken): 174 return self.parse_flow_sequence() 175 elif self.is_token(FlowMappingStartToken): 176 return self.parse_flow_mapping() 177 else: 178 self.fail('flow content is expected') 211 anchor = None 212 tag = None 213 start_marker = end_marker = tag_marker = None 214 if self.scanner.check(AnchorToken): 215 token = self.scanner.get() 216 start_marker = end_marker = token.start_marker 217 anchor = token.value 218 if self.scanner.check(TagToken): 219 token = self.scanner.get() 220 end_marker = tag_marker = token.start_marker 221 tag = token.value 222 elif self.scanner.check(TagToken): 223 token = self.scanner.get() 224 start_marker = end_marker = tag_marker = token.start_marker 225 tag = token.value 226 if self.scanner.check(AnchorToken): 227 token = self.scanner.get() 228 end_marker = token.start_marker 229 anchor = token.value 230 if tag is not None: 231 handle, suffix = tag 232 if handle is not None: 233 if handle not in self.tag_handles: 234 raise ParserError("while parsing a node", start_marker, 235 "found undefined tag handle %r" % handle.encode('utf-8'), 236 tag_marker) 237 tag = self.tag_handles[handle]+suffix 238 else: 239 tag = suffix 240 if tag is None: 241 if not (self.scanner.check(ScalarToken) and 242 self.scanner.peek().plain): 243 tag = u'!' 244 if start_marker is None: 245 start_marker = self.scanner.peek().start_marker 246 event = None 247 collection_events = None 248 if indentless_sequence and self.scanner.check(BlockEntryToken): 249 end_marker = self.scanner.peek().end_marker 250 event = SequenceEvent(anchor, tag, start_marker, end_marker) 251 collection_events = self.parse_indentless_sequence() 252 else: 253 if self.scanner.check(ScalarToken): 254 token = self.scanner.get() 255 end_marker = token.end_marker 256 event = ScalarEvent(anchor, tag, token.value, 257 start_marker, end_marker) 258 elif self.scanner.check(FlowSequenceStartToken): 259 end_marker = self.scanner.peek().end_marker 260 event = SequenceEvent(anchor, tag, start_marker, end_marker) 261 collection_events = self.parse_flow_sequence() 262 elif self.scanner.check(FlowMappingStartToken): 263 end_marker = self.scanner.peek().end_marker 264 event = MappingEvent(anchor, tag, start_marker, end_marker) 265 collection_events = self.parse_flow_mapping() 266 elif block and self.scanner.check(BlockSequenceStartToken): 267 end_marker = self.scanner.peek().start_marker 268 event = SequenceEvent(anchor, tag, start_marker, end_marker) 269 collection_events = self.parse_block_sequence() 270 elif block and self.scanner.check(BlockMappingStartToken): 271 end_marker = self.scanner.peek().start_marker 272 event = MappingEvent(anchor, tag, start_marker, end_marker) 273 collection_events = self.parse_block_mapping() 274 else: 275 if block: 276 node = 'block' 277 else: 278 node = 'flow' 279 token = self.scanner.peek() 280 raise ParserError("while scanning a %s node" % node, start_marker, 281 "expected the node content, but found %r" % token.id, 282 token.start_marker) 283 yield event 284 if collection_events is not None: 285 for event in collection_events: 286 yield event 179 287 180 288 def parse_block_sequence(self): 181 sequence = [] 182 if not self.is_token(BlockSequenceStartToken): 183 self.fail('BLOCK-SEQUENCE-START is expected') 184 self.get_token() 185 while self.is_token(EntryToken): 186 self.get_token() 187 if not self.is_token(EntryToken, BlockEndToken): 188 sequence.append(self.parse_block_node()) 189 else: 190 sequence.append(None) 191 if not self.is_token(BlockEndToken): 192 self.fail('BLOCK-END is expected') 193 self.get_token() 194 return sequence 289 # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END 290 token = self.scanner.get() 291 start_marker = token.start_marker 292 while self.scanner.check(BlockEntryToken): 293 token = self.scanner.get() 294 if not self.scanner.check(BlockEntryToken, BlockEndToken): 295 for event in self.parse_block_node(): 296 yield event 297 else: 298 yield self.process_empty_scalar(token.end_marker) 299 if not self.scanner.check(BlockEndToken): 300 token = self.scanner.peek() 301 raise ParserError("while scanning a block collection", start_marker, 302 "expected <block end>, but found %r" % token.id, token.start_marker) 303 token = self.scanner.get() 304 yield CollectionEndEvent(token.start_marker, token.end_marker) 195 305 196 306 def parse_indentless_sequence(self): 197 sequence = [] 198 while self.is_token(EntryToken): 199 self.get_token() 200 if not self.is_token(EntryToken): 201 sequence.append(self.parse_block_node()) 202 else: 203 sequence.append(None) 204 return sequence 307 # (BLOCK-ENTRY block_node?)+ 308 while self.scanner.check(BlockEntryToken): 309 token = self.scanner.get() 310 if not self.scanner.check(BlockEntryToken, 311 KeyToken, ValueToken, BlockEndToken): 312 for event in self.parse_block_node(): 313 yield event 314 else: 315 yield self.process_empty_scalar(token.end_marker) 316 token = self.scanner.peek() 317 yield CollectionEndEvent(token.start_marker, token.start_marker) 205 318 206 319 def parse_block_mapping(self): 207 mapping = [] 208 if not self.is_token(BlockMappingStartToken): 209 self.fail('BLOCK-MAPPING-START is expected') 210 self.get_token() 211 while self.is_token(KeyToken, ValueToken): 212 key = None 213 value = None 214 if self.is_token(KeyToken): 215 self.get_token() 216 if not self.is_token(KeyToken, ValueToken, BlockEndToken): 217 key = self.parse_block_node_or_indentless_sequence() 218 if self.is_token(ValueToken): 219 self.get_token() 220 if not self.is_token(KeyToken, ValueToken, BlockEndToken): 221 value = self.parse_block_node_or_indentless_sequence() 222 mapping.append((key, value)) 223 if not self.is_token(BlockEndToken): 224 self.fail('BLOCK-END is expected') 225 self.get_token() 226 return mapping 320 # BLOCK-MAPPING_START 321 # ((KEY block_node_or_indentless_sequence?)? 322 # (VALUE block_node_or_indentless_sequence?)?)* 323 # BLOCK-END 324 token = self.scanner.get() 325 start_marker = token.start_marker 326 while self.scanner.check(KeyToken, ValueToken): 327 if self.scanner.check(KeyToken): 328 token = self.scanner.get() 329 if not self.scanner.check(KeyToken, ValueToken, BlockEndToken): 330 for event in self.parse_block_node_or_indentless_sequence(): 331 yield event 332 else: 333 yield self.process_empty_scalar(token.end_marker) 334 if self.scanner.check(ValueToken): 335 token = self.scanner.get() 336 if not self.scanner.check(KeyToken, ValueToken, BlockEndToken): 337 for event in self.parse_block_node_or_indentless_sequence(): 338 yield event 339 else: 340 yield self.process_empty_scalar(token.end_marker) 341 else: 342 token = self.scanner.peek() 343 yield self.process_empty_scalar(token.start_marker) 344 if not self.scanner.check(BlockEndToken): 345 token = self.scanner.peek() 346 raise ParserError("while scanning a block mapping", start_marker, 347 "expected <block end>, but found %r" % token.id, token.start_marker) 348 token = self.scanner.get() 349 yield CollectionEndEvent(token.start_marker, token.end_marker) 227 350 228 351 def parse_flow_sequence(self): 229 sequence = [] 230 if not self.is_token(FlowSequenceStartToken): 231 self.fail('FLOW-SEQUENCE-START is expected') 232 self.get_token() 233 while not self.is_token(FlowSequenceEndToken): 234 if self.is_token(KeyToken): 235 self.get_token() 236 key = None 237 value = None 238 if not self.is_token(ValueToken): 239 key = self.parse_flow_node() 240 if self.is_token(ValueToken): 241 self.get_token() 242 if not self.is_token(EntryToken, FlowSequenceEndToken): 243 value = self.parse_flow_node() 244 node = MappingNode(None, None, [(key, value)]) 245 sequence.append(node) 246 else: 247 sequence.append(self.parse_flow_node()) 248 if not self.is_token(EntryToken, FlowSequenceEndToken): 249 self.fail("ENTRY or FLOW-SEQUENCE-END are expected") 250 if self.is_token(EntryToken): 251 self.get_token() 252 if not self.is_token(FlowSequenceEndToken): 253 self.fail('FLOW-SEQUENCE-END is expected') 254 self.get_token() 255 return sequence 352 # flow_sequence ::= FLOW-SEQUENCE-START 353 # (flow_sequence_entry FLOW-ENTRY)* 354 # flow_sequence_entry? 355 # FLOW-SEQUENCE-END 356 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? 357 # 358 # Note that while production rules for both flow_sequence_entry and 359 # flow_mapping_entry are equal, their interpretations are different. 360 # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` 361 # generate an inline mapping (set syntax). 362 token = self.scanner.get() 363 start_marker = token.start_marker 364 while not self.scanner.check(FlowSequenceEndToken): 365 if self.scanner.check(KeyToken): 366 token = self.scanner.get() 367 yield MappingEvent(None, u'!', 368 token.start_marker, token.end_marker) 369 if not self.scanner.check(ValueToken, 370 FlowEntryToken, FlowSequenceEndToken): 371 for event in self.parse_flow_node(): 372 yield event 373 else: 374 yield self.process_empty_scalar(token.end_marker) 375 if self.scanner.check(ValueToken): 376 token = self.scanner.get() 377 if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken): 378 for event in self.parse_flow_node(): 379 yield event 380 else: 381 yield self.process_empty_scalar(token.end_marker) 382 else: 383 token = self.scanner.peek() 384 yield self.process_empty_scalar(token.start_marker) 385 token = self.scanner.peek() 386 yield CollectionEndEvent(token.start_marker, token.start_marker) 387 else: 388 for event in self.parse_flow_node(): 389 yield event 390 if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken): 391 token = self.scanner.peek() 392 raise ParserError("while scanning a flow sequence", start_marker, 393 "expected ',' or ']', but got %r" % token.id, token.start_marker) 394 if self.scanner.check(FlowEntryToken): 395 self.scanner.get() 396 if not self.scanner.check(FlowSequenceEndToken): 397 token = self.scanner.peek() 398 raise ParserError("while scanning a flow sequence", start_marker, 399 "expected ']', but found %r" % token.id, token.start_marker) 400 token = self.scanner.get() 401 yield CollectionEndEvent(token.start_marker, token.end_marker) 256 402 257 403 def parse_flow_mapping(self): 258 mapping = [] 259 if not self.is_token(FlowMappingStartToken): 260 self.fail('FLOW-MAPPING-START is expected') 261 self.get_token() 262 while not self.is_token(FlowMappingEndToken): 263 if self.is_token(KeyToken): 264 self.get_token() 265 key = None 266 value = None 267 if not self.is_token(ValueToken): 268 key = self.parse_flow_node() 269 if self.is_token(ValueToken): 270 self.get_token() 271 if not self.is_token(EntryToken, FlowMappingEndToken): 272 value = self.parse_flow_node() 273 mapping.append((key, value)) 274 else: 275 mapping.append((self.parse_flow_node(), None)) 276 if not self.is_token(EntryToken, FlowMappingEndToken): 277 self.fail("ENTRY or FLOW-MAPPING-END are expected") 278 if self.is_token(EntryToken): 279 self.get_token() 280 if not self.is_token(FlowMappingEndToken): 281 self.fail('FLOW-MAPPING-END is expected') 282 self.get_token() 283 return mapping 284 285 def fail(self, message): 286 marker = self.scanner.peek_token().start_marker 287 raise ParserError(message+':\n'+marker.get_snippet()) 288 404 # flow_mapping ::= FLOW-MAPPING-START 405 # (flow_mapping_entry FLOW-ENTRY)* 406 # flow_mapping_entry? 407 # FLOW-MAPPING-END 408 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? 409 token = self.scanner.get() 410 start_marker = token.start_marker 411 while not self.scanner.check(FlowMappingEndToken): 412 if self.scanner.check(KeyToken): 413 token = self.scanner.get() 414 if not self.scanner.check(ValueToken, 415 FlowEntryToken, FlowMappingEndToken): 416 for event in self.parse_flow_node(): 417 yield event 418 else: 419 yield self.process_empty_scalar(token.end_marker) 420 if self.scanner.check(ValueToken): 421 token = self.scanner.get() 422 if not self.scanner.check(FlowEntryToken, FlowMappingEndToken): 423 for event in self.parse_flow_node(): 424 yield event 425 else: 426 yield self.process_empty_scalar(token.end_marker) 427 else: 428 token = self.scanner.peek() 429 yield self.process_empty_scalar(token.start_marker) 430 else: 431 for event in self.parse_flow_node(): 432 yield event 433 yield self.process_empty_scalar(self.scanner.peek().start_marker) 434 if not self.scanner.check(FlowEntryToken, FlowMappingEndToken): 435 token = self.scanner.peek() 436 raise ParserError("while scanning a flow mapping", start_marker, 437 "expected ',' or '}', but got %r" % token.id, token.start_marker) 438 if self.scanner.check(FlowEntryToken): 439 self.scanner.get() 440 if not self.scanner.check(FlowMappingEndToken): 441 token = self.scanner.peek() 442 raise ParserError("while scanning a flow mapping", start_marker, 443 "expected '}', but found %r" % token.id, token.start_marker) 444 token = self.scanner.get() 445 yield CollectionEndEvent(token.start_marker, token.end_marker) 446 447 def process_empty_scalar(self, marker): 448 return ScalarEvent(None, None, u'', marker, marker) 449 -
branches/pyyaml3000/lib/yaml/scanner.py
r48 r51 15 15 16 16 class ScannerError(YAMLError): 17 # TODO:18 17 # ScannerError: while reading a quoted string 19 18 # in '...', line 5, column 10: … … 24 23 # key: "valu\?e" 25 24 # ^ 25 26 26 def __init__(self, context=None, context_marker=None, 27 27 problem=None, problem_marker=None): … … 42 42 43 43 class SimpleKey: 44 # See below simple keys treatment. 45 44 46 def __init__(self, token_number, required, index, line, column, marker): 45 47 self.token_number = token_number … … 115 117 self.possible_simple_keys = {} 116 118 117 # Two public methods. 118 119 def peek_token(self): 120 """Get the current token.""" 119 # Public methods. 120 121 def check(self, *choices): 122 # Check if the next token is one of the given types. 123 while self.need_more_tokens(): 124 self.fetch_more_tokens() 125 if self.tokens: 126 for choice in choices: 127 if isinstance(self.tokens[0], choice): 128 return True 129 return False 130 131 def peek(self): 132 # Return the next token, but do not delete if from the queue. 121 133 while self.need_more_tokens(): 122 134 self.fetch_more_tokens() … … 124 136 return self.tokens[0] 125 137 126 def get _token(self):127 "Get the current token and remove it from the list of pending tokens."""138 def get(self): 139 # Return the next token. 128 140 while self.need_more_tokens(): 129 141 self.fetch_more_tokens() … … 131 143 self.tokens_taken += 1 132 144 return self.tokens.pop(0) 145 146 def __iter__(self): 147 # Iterator protocol. 148 while self.need_more_tokens(): 149 self.fetch_more_tokens() 150 while self.tokens: 151 self.tokens_taken += 1 152 yield self.tokens.pop(0) 153 while self.need_more_tokens(): 154 self.fetch_more_tokens() 133 155 134 156 # Private methods. … … 164 186 return self.fetch_stream_end() 165 187 166 # Is it the byte order mark?167 if ch == u'\uFEFF':168 return self.fetch_bom()169 170 188 # Is it a directive? 171 189 if ch == u'%' and self.check_directive(): … … 198 216 return self.fetch_flow_mapping_end() 199 217 200 # Is it the entry indicator? 201 if ch in u'-,' and self.check_entry(): 202 return self.fetch_entry() 218 # Is it the flow entry indicator? 219 if ch in u',': 220 return self.fetch_flow_entry() 221 222 # Is it the block entry indicator? 223 if ch in u'-' and self.check_block_entry(): 224 return self.fetch_block_entry() 203 225 204 226 # Is it the key indicator? … … 365 387 self.done = True 366 388 367 def fetch_bom(self):368 # We consider the BOM marker as a DOCUMENT-END indicator unless it's369 # the first character in the stream. It's a reasonable approximation370 # of the specification requirements. We can follow the specification371 # literally, but it will require a new token class. Probably later.372 373 # We ignore BOM if it is the first character in the stream.374 if self.reader.index == 0:375 slef.reader.forward()376 377 # Otherwise we issue DOCUMENT-END.378 else:379 380 # Set the current intendation to -1.381 self.unwind_indent(-1)382 383 # Reset simple keys. Note that there could not be a block384 # collection after BOM.385 self.remove_possible_simple_key()386 self.allow_simple_key = False387 388 # Add DOCUMENT-END.389 start_marker = self.reader.get_marker()390 self.reader.forward()391 end_marker = self.reader.get_marker()392 self.tokens.append(DocumentEndToken(start_marker, end_marker))393 394 389 def fetch_directive(self): 395 390 … … 472 467 self.tokens.append(TokenClass(start_marker, end_marker)) 473 468 474 def fetch_entry(self): 469 def fetch_flow_entry(self): 470 471 # Simple keys are allowed after ','. 472 self.allow_simple_key = True 473 474 # Reset possible simple key on the current level. 475 self.remove_possible_simple_key() 476 477 # Add FLOW-ENTRY. 478 start_marker = self.reader.get_marker() 479 self.reader.forward() 480 end_marker = self.reader.get_marker() 481 self.tokens.append(FlowEntryToken(start_marker, end_marker)) 482 483 def fetch_block_entry(self): 475 484 476 485 # Block context needs additional checks. … … 488 497 self.tokens.append(BlockSequenceStartToken(marker, marker)) 489 498 490 # Simple keys are allowed after '-' and ','. 499 # It's an error for the block entry to occur in the flow context, 500 # but we let the parser detect this. 501 else: 502 pass 503 504 # Simple keys are allowed after '-'. 491 505 self.allow_simple_key = True 492 506 … … 494 508 self.remove_possible_simple_key() 495 509 496 # Add ENTRY.510 # Add BLOCK-ENTRY. 497 511 start_marker = self.reader.get_marker() 498 512 self.reader.forward() 499 513 end_marker = self.reader.get_marker() 500 self.tokens.append( EntryToken(start_marker, end_marker))514 self.tokens.append(BlockEntryToken(start_marker, end_marker)) 501 515 502 516 def fetch_key(self): … … 682 696 return True 683 697 684 def check_entry(self): 685 686 # ENTRY(flow context): ',' 687 if self.flow_level: 688 return self.reader.peek() == u',' 689 690 # ENTRY(block context): '-' (' '|'\n') 691 else: 692 return self.reader.peek() == u'-' \ 693 and self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 698 def check_block_entry(self): 699 700 # BLOCK-ENTRY: '-' (' '|'\n') 701 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 694 702 695 703 def check_key(self): … … 738 746 # If we find a line break in the block context, we set the flag 739 747 # `allow_simple_key` on. 748 # The byte order mark is stripped if it's the first character in the 749 # stream. We do not yet support BOM inside the stream as the 750 # specification requires. Any such mark will be considered as a part 751 # of the document. 752 if self.reader.index == 0 and self.reader.peek() == u'\uFEFF': 753 self.reader.forward() 740 754 found = False 741 755 while not found: … … 981 995 # 982 996 # This is the folding according to the specification: 983 #984 #if folded and line_break == u'\n' \985 #and leading_non_space and self.reader.peek() not in u' \t':986 #if not breaks:987 #chunks.append(u' ')988 #else:989 #chunks.append(line_break)990 #997 998 if folded and line_break == u'\n' \ 999 and leading_non_space and self.reader.peek() not in u' \t': 1000 if not breaks: 1001 chunks.append(u' ') 1002 else: 1003 chunks.append(line_break) 1004 991 1005 # This is Clark Evans's interpretation (also in the spec 992 1006 # examples): 993 1007 # 994 if folded and line_break == u'\n':995 if not breaks:996 if self.reader.peek() not in ' \t':997 chunks.append(u' ')998 else:999 chunks.append(line_break)1000 else:1001 chunks.append(line_break)1008 #if folded and line_break == u'\n': 1009 # if not breaks: 1010 # if self.reader.peek() not in ' \t': 1011 # chunks.append(u' ') 1012 # else: 1013 # chunks.append(line_break) 1014 #else: 1015 # chunks.append(line_break) 1002 1016 else: 1003 1017 break -
branches/pyyaml3000/lib/yaml/tokens.py
r48 r51 4 4 self.start_marker = start_marker 5 5 self.end_marker = end_marker 6 def __repr__(self): 7 attributes = [key for key in self.__dict__ 8 if not key.endswith('_marker')] 9 attributes.sort() 10 arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) 11 for key in attributes]) 12 return '%s(%s)' % (self.__class__.__name__, arguments) 13 14 #class BOMToken(Token): 15 # id = '<byte order mark>' 6 16 7 17 class DirectiveToken(Token): 8 code= '<directive>'18 id = '<directive>' 9 19 def __init__(self, name, value, start_marker, end_marker): 10 20 self.name = name … … 14 24 15 25 class DocumentStartToken(Token): 16 code= '<document start>'26 id = '<document start>' 17 27 18 28 class DocumentEndToken(Token): 19 code= '<document end>'29 id = '<document end>' 20 30 21 31 class StreamEndToken(Token): 22 code= '<stream end>'32 id = '<stream end>' 23 33 24 34 class BlockSequenceStartToken(Token): 25 code= '<block sequence start>'35 id = '<block sequence start>' 26 36 27 37 class BlockMappingStartToken(Token): 28 code= '<block mapping end>'38 id = '<block mapping end>' 29 39 30 40 class BlockEndToken(Token): 31 code= '<block end>'41 id = '<block end>' 32 42 33 43 class FlowSequenceStartToken(Token): 34 code= '['44 id = '[' 35 45 36 46 class FlowMappingStartToken(Token): 37 code= '{'47 id = '{' 38 48 39 49 class FlowSequenceEndToken(Token): 40 code= ']'50 id = ']' 41 51 42 52 class FlowMappingEndToken(Token): 43 code= '}'53 id = '}' 44 54 45 55 class KeyToken(Token): 46 code= '?'56 id = '?' 47 57 48 58 class ValueToken(Token): 49 code= ':'59 id = ':' 50 60 51 class EntryToken(Token): 52 code = '- or ,' 61 class BlockEntryToken(Token): 62 id = '-' 63 64 class FlowEntryToken(Token): 65 id = ',' 53 66 54 67 class AliasToken(Token): 55 code= '<alias>'68 id = '<alias>' 56 69 def __init__(self, value, start_marker, end_marker): 57 70 self.value = value … … 60 73 61 74 class AnchorToken(Token): 62 code= '<anchor>'75 id = '<anchor>' 63 76 def __init__(self, value, start_marker, end_marker): 64 77 self.value = value … … 67 80 68 81 class TagToken(Token): 69 code= '<tag>'82 id = '<tag>' 70 83 def __init__(self, value, start_marker, end_marker): 71 84 self.value = value … … 74 87 75 88 class ScalarToken(Token): 76 code= '<scalar>'89 id = '<scalar>' 77 90 def __init__(self, value, plain, start_marker, end_marker): 78 91 self.value = value -
branches/pyyaml3000/tests/data/spec-05-08.canonical
r38 r51 2 2 --- 3 3 !!map { 4 ? !!str "single" 5 : !!str "text", 4 6 ? !!str "double" 5 7 : !!str "text", 6 ? !!str "single"7 : !!str "text",8 8 } -
branches/pyyaml3000/tests/data/spec-06-01.canonical
r38 r51 9 9 : !!seq [ 10 10 !!str "By two", 11 !!str "Also by two", 11 12 !!str "Still by two", 12 !!str "Again by two",13 13 ] 14 14 } -
branches/pyyaml3000/tests/data/spec-09-20.canonical
r38 r51 5 5 !!str "\n\n# detected\n", 6 6 !!str " explicit\n", 7 !!str "\t detected\n",7 !!str "\t\ndetected\n", 8 8 ] -
branches/pyyaml3000/tests/data/spec-09-30.canonical
r44 r51 2 2 --- 3 3 !!str "folded line\n\ 4 next line\n\ 4 next line\n\n\ 5 5 \ * bullet\n\ 6 \ * list\n\ 6 \ * list\n\n\ 7 7 last line\n" -
branches/pyyaml3000/tests/data/spec-09-31.canonical
r44 r51 2 2 --- 3 3 !!str "folded line\n\ 4 next line\n\ 4 next line\n\n\ 5 5 \ * bullet\n\ 6 \ * list\n\ 6 \ * list\n\n\ 7 7 last line\n" -
branches/pyyaml3000/tests/data/spec-09-32.canonical
r44 r51 2 2 --- 3 3 !!str "folded line\n\ 4 next line\n\ 4 next line\n\n\ 5 5 \ * bullet\n\ 6 \ * list\n\ 6 \ * list\n\n\ 7 7 last line\n" -
branches/pyyaml3000/tests/data/spec-09-33.canonical
r44 r51 2 2 --- 3 3 !!str "folded line\n\ 4 next line\n\ 4 next line\n\n\ 5 5 \ * bullet\n\ 6 \ * list\n\ 6 \ * list\n\n\ 7 7 last line\n" -
branches/pyyaml3000/tests/test_appliance.py
r48 r51 1 1 2 2 import unittest, os 3 4 from yaml.tokens import * 5 from yaml.events import * 3 6 4 7 class TestAppliance(unittest.TestCase): … … 33 36 add_tests = classmethod(add_tests) 34 37 35 class Node:36 def __repr__(self):37 args = []38 for attribute in ['anchor', 'tag', 'value']:39 if hasattr(self, attribute):40 args.append(repr(getattr(self, attribute)))41 return "%s(%s)" % (self.__class__.__name__, ', '.join(args))42 43 class AliasNode(Node):44 def __init__(self, anchor):45 self.anchor = anchor46 47 class ScalarNode(Node):48 def __init__(self, anchor, tag, value):49 self.anchor = anchor50 self.tag = tag51 self.value = value52 53 class SequenceNode(Node):54 def __init__(self, anchor, tag, value):55 self.anchor = anchor56 self.tag = tag57 self.value = value58 59 class MappingNode(Node):60 def __init__(self, anchor, tag, value):61 self.anchor = anchor62 self.tag = tag63 self.value = value64 65 class Token:66 def __repr__(self):67 args = []68 if hasattr(self, 'value'):69 args.append(repr(self.value))70 return "%s(%s)" % (self.__class__.__name__, ''.join(args))71 72 class StreamEndToken(Token):73 pass74 75 class DirectiveToken(Token):76 pass77 78 class DocumentStartToken(Token):79 pass80 81 class SequenceStartToken(Token):82 pass83 84 class MappingStartToken(Token):85 pass86 87 class SequenceEndToken(Token):88 pass89 90 class MappingEndToken(Token):91 pass92 93 class KeyToken(Token):94 pass95 96 class ValueToken(Token):97 pass98 99 class EntryToken(Token):100 pass101 102 class AliasToken(Token):103 def __init__(self, value):104 self.value = value105 106 class AnchorToken(Token):107 def __init__(self, value):108 self.value = value109 110 class TagToken(Token):111 def __init__(self, value):112 self.value = value113 114 class ScalarToken(Token):115 def __init__(self, value):116 self.value = value117 118 38 class Error(Exception): 119 39 pass … … 121 41 class CanonicalScanner: 122 42 123 def __init__(self, source, data): 124 self.source = source 43 def __init__(self, data): 125 44 self.data = unicode(data, 'utf-8')+u'\0' 126 45 self.index = 0 … … 133 52 ch = self.data[self.index] 134 53 if ch == u'\0': 135 tokens.append(StreamEndToken( ))54 tokens.append(StreamEndToken(None, None)) 136 55 break 137 56 elif ch == u'%': … … 139 58 elif ch == u'-' and self.data[self.index:self.index+3] == u'---': 140 59 self.index += 3 141 tokens.append(DocumentStartToken( ))60 tokens.append(DocumentStartToken(None, None)) 142 61 elif ch == u'[': 143 62 self.index += 1 144 tokens.append( SequenceStartToken())63 tokens.append(FlowSequenceStartToken(None, None)) 145 64 elif ch == u'{': 146 65 self.index += 1 147 tokens.append( MappingStartToken())66 tokens.append(FlowMappingStartToken(None, None)) 148 67 elif ch == u']': 149 68 self.index += 1 150 tokens.append( SequenceEndToken())69 tokens.append(FlowSequenceEndToken(None, None)) 151 70 elif ch == u'}': 152 71 self.index += 1 153 tokens.append( MappingEndToken())72 tokens.append(FlowMappingEndToken(None, None)) 154 73 elif ch == u'?': 155 74 self.index += 1 156 tokens.append(KeyToken( ))75 tokens.append(KeyToken(None, None)) 157 76 elif ch == u':': 158 77 self.index += 1 159 tokens.append(ValueToken( ))78 tokens.append(ValueToken(None, None)) 160 79 elif ch == u',': 161 80 self.index += 1 162 tokens.append( EntryToken())81 tokens.append(FlowEntryToken(None, None)) 163 82 elif ch == u'*' or ch == u'&': 164 83 tokens.append(self.scan_alias()) … … 177 96 self.data[self.index+len(self.DIRECTIVE)] in u' \n\0': 178 97 self.index += len(self.DIRECTIVE) 179 return DirectiveToken( )98 return DirectiveToken('YAML', (1, 1), None, None) 180 99 181 100 def scan_alias(self): … … 189 108 self.index += 1 190 109 value = self.data[start:self.index] 191 return TokenClass(value )110 return TokenClass(value, None, None) 192 111 193 112 def scan_tag(self): … … 199 118 if value[0] == u'!': 200 119 value = 'tag:yaml.org,2002:'+value[1:] 120 elif value[0] == u'<' and value[-1] == u'>': 121 value = value[1:-1] 201 122 else: 202 value = value[1:-1]203 return TagToken(value )123 value = u'!'+value 124 return TagToken(value, None, None) 204 125 205 126 QUOTE_CODES = { … … 265 186 chunks.append(self.data[start:self.index]) 266 187 self.index += 1 267 return ScalarToken(u''.join(chunks) )188 return ScalarToken(u''.join(chunks), False, None, None) 268 189 269 190 def find_token(self): … … 282 203 class CanonicalParser: 283 204 284 def __init__(self, source, data): 285 self.scanner = CanonicalScanner(source, data) 205 def __init__(self, data): 206 self.scanner = CanonicalScanner(data) 207 self.events = [] 286 208 287 209 # stream: document* END 288 210 def parse_stream(self): 289 documents = []290 211 while not self.test_token(StreamEndToken): 291 212 if self.test_token(DirectiveToken, DocumentStartToken): 292 documents.append(self.parse_document())213 self.parse_document() 293 214 else: 294 215 raise Error("document is expected, got "+repr(self.tokens[self.index])) 295 return documents296 297 # document: DIRECTIVE? DOCUMENT-START node ?216 self.events.append(StreamEndEvent(None, None)) 217 218 # document: DIRECTIVE? DOCUMENT-START node 298 219 def parse_document(self): 299 220 node = None … … 301 222 self.consume_token(DirectiveToken) 302 223 self.consume_token(DocumentStartToken) 303 if self.test_token(TagToken, AliasToken, AnchorToken, TagToken, 304 SequenceStartToken, MappingStartToken, ScalarToken): 305 node = self.parse_node() 306 return node 224 self.parse_node() 307 225 308 226 # node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping) 309 227 def parse_node(self): 310 228 if self.test_token(AliasToken): 311 return AliasNode(self.get_value())229 self.events.append(AliasEvent(self.get_value(), None, None)) 312 230 else: 313 231 anchor = None 314 232 if self.test_token(AnchorToken): 315 233 anchor = self.get_value() 316 tag = None234 tag = u'!' 317 235 if self.test_token(TagToken): 318 236 tag = self.get_value() 319 237 if self.test_token(ScalarToken): 320 return ScalarNode(anchor, tag, self.get_value()) 321 elif self.test_token(SequenceStartToken): 322 return SequenceNode(anchor, tag, self.parse_sequence()) 323 elif self.test_token(MappingStartToken): 324 return MappingNode(anchor, tag, self.parse_mapping()) 238 self.events.append(ScalarEvent(anchor, tag, self.get_value(), None, None)) 239 elif self.test_token(FlowSequenceStartToken): 240 self.events.append(SequenceEvent(anchor, tag, None, None)) 241 self.parse_sequence() 242 elif self.test_token(FlowMappingStartToken): 243 self.events.append(MappingEvent(anchor, tag, None, None)) 244 self.parse_mapping() 325 245 else: 326 246 raise Error("SCALAR, '[', or '{' is expected, got "+repr(self.tokens[self.index])) … … 328 248 # sequence: SEQUENCE-START (node (ENTRY node)*)? ENTRY? SEQUENCE-END 329 249 def parse_sequence(self): 330 values = [] 331 self.consume_token(SequenceStartToken) 332 if not self.test_token(SequenceEndToken): 333 values.append(self.parse_node()) 334 while not self.test_token(SequenceEndToken): 335 self.consume_token(EntryToken) 336 if not self.test_token(SequenceEndToken): 337 values.append(self.parse_node()) 338 self.consume_token(SequenceEndToken) 339 return values 250 self.consume_token(FlowSequenceStartToken) 251 if not self.test_token(FlowSequenceEndToken): 252 self.parse_node() 253 while not self.test_token(FlowSequenceEndToken): 254 self.consume_token(FlowEntryToken) 255 if not self.test_token(FlowSequenceEndToken): 256 self.parse_node() 257 self.consume_token(FlowSequenceEndToken) 258 self.events.append(CollectionEndEvent(None, None)) 340 259 341 260 # mapping: MAPPING-START (map_entry (ENTRY map_entry)*)? ENTRY? MAPPING-END 342 261 def parse_mapping(self): 343 values = [] 344 self.consume_token(MappingStartToken) 345 if not self.test_token(MappingEndToken): 346 values.append(self.parse_map_entry()) 347 while not self.test_token(MappingEndToken): 348 self.consume_token(EntryToken) 349 if not self.test_token(MappingEndToken): 350 values.append(self.parse_map_entry()) 351 self.consume_token(MappingEndToken) 352 return values 262 self.consume_token(FlowMappingStartToken) 263 if not self.test_token(FlowMappingEndToken): 264 self.parse_map_entry() 265 while not self.test_token(FlowMappingEndToken): 266 self.consume_token(FlowEntryToken) 267 if not self.test_token(FlowMappingEndToken): 268 self.parse_map_entry() 269 self.consume_token(FlowMappingEndToken) 270 self.events.append(CollectionEndEvent(None, None)) 353 271 354 272 # map_entry: KEY node VALUE node 355 273 def parse_map_entry(self): 356 274 self.consume_token(KeyToken) 357 key =self.parse_node()275 self.parse_node() 358 276 self.consume_token(ValueToken) 359 value = self.parse_node() 360 return (key, value) 277 self.parse_node() 361 278 362 279 def test_token(self, *choices): … … 379 296 self.tokens = self.scanner.scan() 380 297 self.index = 0 381 return self.parse_stream() 382 298 self.parse_stream() 299 return self.events 300 -
branches/pyyaml3000/tests/test_canonical.py
r43 r51 6 6 def _testCanonicalScanner(self, test_name, canonical_filename): 7 7 data = file(canonical_filename, 'rb').read() 8 scanner = test_appliance.CanonicalScanner( canonical_filename,data)8 scanner = test_appliance.CanonicalScanner(data) 9 9 tokens = scanner.scan() 10 #print tokens 10 #for token in tokens: 11 # print token 11 12 12 13 def _testCanonicalParser(self, test_name, canonical_filename): 13 14 data = file(canonical_filename, 'rb').read() 14 parser = test_appliance.CanonicalParser( canonical_filename,data)15 documents = parser.parse()16 #for document in documents:17 # print document15 parser = test_appliance.CanonicalParser(data) 16 events = parser.parse() 17 #for event in events: 18 # print event 18 19 19 20 TestCanonicalAppliance.add_tests('testCanonicalScanner', '.canonical') -
branches/pyyaml3000/tests/test_errors.py
r47 r51 19 19 reader = Reader(file(filename, 'rb')) 20 20 scanner = Scanner(reader) 21 while scanner.peek_token(): 22 scanner.get_token() 21 return list(scanner) 23 22 24 23 def _load_string(self, filename): 25 24 reader = Reader(file(filename, 'rb').read()) 26 25 scanner = Scanner(reader) 27 while scanner.peek_token(): 28 scanner.get_token() 26 return list(scanner) 29 27 30 28 TestErrors.add_tests('testErrors', '.error-message') -
branches/pyyaml3000/tests/test_structure.py
r48 r51 13 13 try: 14 14 parser = Parser(Scanner(Reader(file(data_filename, 'rb')))) 15 node1 = parser.parse() 16 node1 = [self._convert(n) for n in node1] 15 node1 = [] 16 while not parser.check(StreamEndEvent): 17 node1.append(self._convert(parser)) 18 parser.get() 17 19 if len(node1) == 1: 18 20 node1 = node1[0] … … 26 28 raise 27 29 28 def _convert(self, node): 29 if isinstance(node, ScalarNode): 30 return True 31 elif isinstance(node, SequenceNode): 30 def _convert(self, parser): 31 if parser.check(ScalarEvent): 32 event = parser.get() 33 if event.tag or event.anchor or event.value: 34 return True 35 else: 36 return None 37 elif parser.check(SequenceEvent): 38 parser.get() 32 39 sequence = [] 33 for item in node.value: 34 sequence.append(self._convert(item)) 40 while not parser.check(CollectionEndEvent): 41 sequence.append(self._convert(parser)) 42 parser.get() 35 43 return sequence 36 elif isinstance(node, MappingNode): 44 elif parser.check(MappingEvent): 45 parser.get() 37 46 mapping = [] 38 for key, value in node.value: 39 mapping.append((self._convert(key), self._convert(value))) 47 while not parser.check(CollectionEndEvent): 48 key = self._convert(parser) 49 value = self._convert(parser) 50 mapping.append((key, value)) 51 parser.get() 40 52 return mapping 41 elif isinstance(node, AliasNode): 53 elif parser.check(AliasEvent): 54 parser.get() 42 55 return '*' 43 56 else: 44 return node 57 parser.get() 58 return '?' 45 59 46 60 TestStructure.add_tests('testStructure', '.data', '.structure') … … 49 63 50 64 def _testParser(self, test_name, data_filename, canonical_filename): 51 documents1 = None52 documents2 = None65 events1 = None 66 events2 = None 53 67 try: 54 68 parser = Parser(Scanner(Reader(file(data_filename, 'rb')))) 55 documents1 = parser.parse()56 canonical = test_appliance.CanonicalParser( canonical_filename,file(canonical_filename, 'rb').read())57 documents2 = canonical.parse()58 self._compare( documents1, documents2)69 events1 = list(iter(parser)) 70 canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read()) 71 events2 = canonical.parse() 72 self._compare(events1, events2) 59 73 except: 60 74 print … … 63 77 print "DATA2:" 64 78 print file(canonical_filename, 'rb').read() 65 print " DOCUMENTS1:", documents166 print " DOCUMENTS2:", documents279 print "EVENTS1:", events1 80 print "EVENTS2:", events2 67 81 raise 68 82 69 def _compare(self, value1, value2): 70 if value1 is None and hasattr(value2, 'tag') and value2.tag == 'tag:yaml.org,2002:null': 71 return 72 self.failUnlessEqual(type(value1), type(value2)) 73 if isinstance(value1, list) or isinstance(value1, tuple): 74 self.failUnlessEqual(len(value1), len(value2)) 75 for item1, item2 in zip(value1, value2): 76 self._compare(item1, item2) 77 else: 78 self.failUnlessEqual(value1.__class__.__name__, value2.__class__.__name__) 79 if isinstance(value1, SequenceNode): # or isinstance(value1, MappingNode): 80 self._compare(value1.value, value2.value) 81 elif isinstance(value1, ScalarNode): 82 self.failUnlessEqual(value1.value, value2.value) 83 def _compare(self, events1, events2): 84 self.failUnlessEqual(len(events1), len(events2)) 85 for event1, event2 in zip(events1, events2): 86 self.failUnlessEqual(event1.__class__, event2.__class__) 87 if isinstance(event1, AliasEvent): 88 #self.failUnlessEqual(event1.name, event2.name) 89 pass 90 elif isinstance(event1, ScalarEvent): 91 #self.failUnlessEqual(event1.anchor, event2.anchor) 92 #self.failUnlessEqual(event1.tag, event2.tag) 93 self.failUnlessEqual(event1.value, event2.value) 94 if isinstance(event1, CollectionEvent): 95 #self.failUnlessEqual(event1.anchor, event2.anchor) 96 #self.failUnlessEqual(event1.tag, event2.tag) 97 pass 98 83 99 84 100 TestParser.add_tests('testParser', '.data', '.canonical') … … 87 103 88 104 def _testParserOnCanonical(self, test_name, canonical_filename): 89 documents1 = None90 documents2 = None105 events1 = None 106 events2 = None 91 107 try: 92 108 parser = Parser(Scanner(Reader(file(canonical_filename, 'rb')))) 93 documents1 = parser.parse()94 canonical = test_appliance.CanonicalParser( canonical_filename,file(canonical_filename, 'rb').read())95 documents2 = canonical.parse()96 self._compare( documents1, documents2)109 events1 = list(iter(parser)) 110 canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read()) 111 events2 = canonical.parse() 112 self._compare(events1, events2) 97 113 except: 98 114 print 99 115 print "DATA:" 100 116 print file(canonical_filename, 'rb').read() 101 print " DOCUMENTS1:", documents1102 print " DOCUMENTS2:", documents2117 print "EVENTS1:", events1 118 print "EVENTS2:", events2 103 119 raise 104 120 105 def _compare(self, value1, value2): 106 if value1 is None and hasattr(value2, 'tag') and value2.tag == 'tag:yaml.org,2002:null': 107 return 108 self.failUnlessEqual(type(value1), type(value2)) 109 if isinstance(value1, list) or isinstance(value1, tuple): 110 self.failUnlessEqual(len(value1), len(value2)) 111 for item1, item2 in zip(value1, value2): 112 self._compare(item1, item2) 113 else: 114 self.failUnlessEqual(value1.__class__.__name__, value2.__class__.__name__) 115 if isinstance(value1, SequenceNode) or isinstance(value1, MappingNode): 116 self._compare(value1.value, value2.value) 117 elif isinstance(value1, ScalarNode): 118 self.failUnlessEqual(value1.value, value2.value) 121 def _compare(self, events1, events2): 122 self.failUnlessEqual(len(events1), len(events2)) 123 for event1, event2 in zip(events1, events2): 124 self.failUnlessEqual(event1.__class__, event2.__class__) 125 if isinstance(event1, AliasEvent): 126 self.failUnlessEqual(event1.name, event2.name) 127 elif isinstance(event1, ScalarEvent): 128 self.failUnlessEqual(event1.anchor, event2.anchor) 129 self.failUnlessEqual(event1.tag, event2.tag) 130 self.failUnlessEqual(event1.value, event2.value) 131 if isinstance(event1, CollectionEvent): 132 self.failUnlessEqual(event1.anchor, event2.anchor) 133 self.failUnlessEqual(event1.tag, event2.tag) 119 134 120 135 TestParserOnCanonical.add_tests('testParserOnCanonical', '.canonical') -
branches/pyyaml3000/tests/test_tokens.py
r48 r51 42 42 FlowMappingStartToken: '{', 43 43 FlowMappingEndToken: '}', 44 EntryToken: ',', 44 BlockEntryToken: ',', 45 FlowEntryToken: ',', 45 46 KeyToken: '?', 46 47 ValueToken: ':', … … 53 54 scanner = Scanner(Reader(file(data_filename, 'rb'))) 54 55 tokens1 = [] 55 while not isinstance(scanner.peek_token(), StreamEndToken): 56 tokens1.append(scanner.get_token()) 56 for token in scanner: 57 if not isinstance(token, StreamEndToken): 58 tokens1.append(token) 57 59 tokens1 = [self.replaces[t.__class__] for t in tokens1] 58 60 self.failUnlessEqual(tokens1, tokens2) … … 75 77 scanner = Scanner(Reader(file(filename, 'rb'))) 76 78 tokens = [] 77 while not isinstance(scanner.peek_token(), StreamEndToken): 78 tokens.append(scanner.get_token().__class__.__name__) 79 for token in scanner: 80 if not isinstance(token, StreamEndToken): 81 tokens.append(token.__class__.__name__) 79 82 except: 80 83 print
Note: See TracChangeset
for help on using the changeset viewer.
