auto merge of #11490 : wting/rust/wting_11362_update_extract_tests, r=alexcrichton

Refactored the file quite a bit, I can add unit tests if desired. There's a few changes from the previous version's behavior:

- destination directory will be created if it doesn't exist
- strings and file is written as unicode

I have a few questions, but will ask them in #11362.
This commit is contained in:
bors 2014-01-28 14:11:33 -08:00
commit edfb546e4b
18 changed files with 333 additions and 208 deletions

View file

@ -1,77 +1,209 @@
# xfail-license
# -*- coding: utf-8 -*-
"""
Script for extracting compilable fragments from markdown documentation. See
prep.js for a description of the format recognized by this tool. Expects
a directory fragments/ to exist under the current directory, and writes the
fragments in there as individual .rs files.
"""
from __future__ import print_function
from codecs import open
from collections import deque
from itertools import imap
import os
import re
import sys
# Script for extracting compilable fragments from markdown
# documentation. See prep.js for a description of the format
# recognized by this tool. Expects a directory fragments/ to exist
# under the current directory, and writes the fragments in there as
# individual .rs files.
# regexes
CHAPTER_NAME_REGEX = re.compile(r'# (.*)')
CODE_BLOCK_DELIM_REGEX = re.compile(r'~~~')
COMMENT_REGEX = re.compile(r'^# ')
COMPILER_DIRECTIVE_REGEX = re.compile(r'\#\[(.*)\];')
ELLIPSES_REGEX = re.compile(r'\.\.\.')
EXTERN_MOD_REGEX = re.compile(r'\bextern mod extra\b')
MAIN_FUNCTION_REGEX = re.compile(r'\bfn main\b')
TAGS_REGEX = re.compile(r'\.([\w-]*)')
import sys, re
# tags to ignore
IGNORE_TAGS = \
frozenset(["abnf", "ebnf", "field", "keyword", "notrust", "precedence"])
if len(sys.argv) < 3:
print("Please provide an input filename")
sys.exit(1)
# header for code snippet files
OUTPUT_BLOCK_HEADER = '\n'.join((
"#[ deny(warnings) ];",
"#[ allow(unused_variable) ];",
"#[ allow(dead_assignment) ];",
"#[ allow(unused_mut) ];",
"#[ allow(attribute_usage) ];",
"#[ allow(dead_code) ];",
"#[ feature(macro_rules, globs, struct_variant, managed_boxes) ];\n",))
filename = sys.argv[1]
dest = sys.argv[2]
f = open(filename)
lines = f.readlines()
f.close()
cur = 0
line = ""
chapter = ""
chapter_n = 0
def add_extern_mod(block):
if not has_extern_mod(block):
# add `extern mod extra;` after compiler directives
directives = []
while len(block) and is_compiler_directive(block[0]):
directives.append(block.popleft())
while cur < len(lines):
line = lines[cur]
cur += 1
chap = re.match("# (.*)", line)
if chap:
chapter = re.sub(r"\W", "_", chap.group(1)).lower()
chapter_n = 1
elif re.match("~~~", line):
# Parse the tags that open a code block in the pandoc format:
# ~~~ {.tag1 .tag2}
tags = re.findall("\.([\w-]*)", line)
block = ""
ignore = "notrust" in tags or "ignore" in tags
# Some tags used by the language ref that indicate not rust
ignore |= "ebnf" in tags
ignore |= "abnf" in tags
ignore |= "keyword" in tags
ignore |= "field" in tags
ignore |= "precedence" in tags
xfail = "xfail-test" in tags
while cur < len(lines):
line = lines[cur]
cur += 1
if re.match("~~~", line):
break
else:
# Lines beginning with '# ' are turned into valid code
line = re.sub("^# ", "", line)
# Allow ellipses in code snippets
line = re.sub("\.\.\.", "", line)
block += line
if not ignore:
if not re.search(r"\bfn main\b", block):
block = "fn main() {\n" + block + "\n}\n"
if not re.search(r"\bextern mod extra\b", block):
block = "extern mod extra;\n" + block
block = """#[ deny(warnings) ];
#[ allow(unused_variable) ];\n
#[ allow(dead_assignment) ];\n
#[ allow(unused_mut) ];\n
#[ allow(attribute_usage) ];\n
#[ allow(dead_code) ];\n
#[ feature(macro_rules, globs, struct_variant, managed_boxes) ];\n
""" + block
if xfail:
block = "// xfail-test\n" + block
filename = (dest + "/" + str(chapter)
+ "_" + str(chapter_n) + ".rs")
chapter_n += 1
f = open(filename, 'w')
f.write(block)
f.close()
block.appendleft("\nextern mod extra;\n\n")
block.extendleft(reversed(directives))
return block
def add_main_function(block):
if not has_main_function(block):
prepend_spaces = lambda x: ' ' + x
block = deque(imap(prepend_spaces, block))
block.appendleft("\nfn main() {\n")
block.append("\n}\n")
return block
def extract_code_fragments(dest_dir, lines):
"""
Extracts all the code fragments from a file that do not have ignored tags
writing them to the following file:
[dest dir]/[chapter name]_[chapter_index].rs
"""
chapter_name = None
chapter_index = 0
for line in lines:
if is_chapter_title(line):
chapter_name = get_chapter_name(line)
chapter_index = 1
continue
if not is_code_block_delim(line):
continue
assert chapter_name, "Chapter name missing for code block."
tags = get_tags(line)
block = get_code_block(lines)
if tags & IGNORE_TAGS:
continue
block = add_extern_mod(add_main_function(block))
block.appendleft(OUTPUT_BLOCK_HEADER)
if "ignore" in tags:
block.appendleft("//xfail-test\n")
elif "should_fail" in tags:
block.appendleft("//should-fail\n")
output_filename = os.path.join(
dest_dir,
chapter_name + '_' + str(chapter_index) + '.rs')
write_file(output_filename, block)
chapter_index += 1
def has_extern_mod(block):
"""Checks if a code block has the line `extern mod extra`."""
find_extern_mod = lambda x: re.search(EXTERN_MOD_REGEX, x)
return any(imap(find_extern_mod, block))
def has_main_function(block):
"""Checks if a code block has a main function."""
find_main_fn = lambda x: re.search(MAIN_FUNCTION_REGEX, x)
return any(imap(find_main_fn, block))
def is_chapter_title(line):
return re.match(CHAPTER_NAME_REGEX, line)
def is_code_block_delim(line):
return re.match(CODE_BLOCK_DELIM_REGEX, line)
def is_compiler_directive(line):
return re.match(COMPILER_DIRECTIVE_REGEX, line)
def get_chapter_name(line):
"""Get the chapter name from a `# Containers` line."""
return re.sub(
r'\W',
'_',
re.match(CHAPTER_NAME_REGEX, line).group(1)).lower()
def get_code_block(lines):
"""
Get a code block surrounded by ~~~, for example:
1: ~~~ { .tag }
2: let u: ~[u32] = ~[0, 1, 2];
3: let v: &[u32] = &[0, 1, 2, 3];
4: let w: [u32, .. 5] = [0, 1, 2, 3, 4];
5:
6: println!("u: {}, v: {}, w: {}", u.len(), v.len(), w.len());
7: ~~~
Returns lines 2-6. Assumes line 1 has been consumed by the caller.
"""
strip_comments = lambda x: re.sub(COMMENT_REGEX, '', x)
strip_ellipses = lambda x: re.sub(ELLIPSES_REGEX, '', x)
result = deque()
for line in lines:
if is_code_block_delim(line):
break
result.append(strip_comments(strip_ellipses(line)))
return result
def get_lines(filename):
with open(filename) as f:
for line in f:
yield line
def get_tags(line):
"""
Retrieves all tags from the line format:
~~~ { .tag1 .tag2 .tag3 }
"""
return set(re.findall(TAGS_REGEX, line))
def write_file(filename, lines):
with open(filename, 'w', encoding='utf-8') as f:
for line in lines:
f.write(unicode(line, encoding='utf-8', errors='replace'))
def main(argv=None):
if not argv:
argv = sys.argv
if len(sys.argv) < 2:
sys.stderr.write("Please provide an input filename.")
sys.exit(1)
elif len(sys.argv) < 3:
sys.stderr.write("Please provide a destination directory.")
sys.exit(1)
input_file = sys.argv[1]
dest_dir = sys.argv[2]
if not os.path.exists(input_file):
sys.stderr.write("Input file does not exist.")
sys.exit(1)
if not os.path.exists(dest_dir):
os.mkdir(dest_dir)
extract_code_fragments(dest_dir, get_lines(input_file))
if __name__ == "__main__":
sys.exit(main())