auto merge of #11490 : wting/rust/wting_11362_update_extract_tests, r=alexcrichton
Refactored the file quite a bit, I can add unit tests if desired. There's a few changes from the previous version's behavior: - destination directory will be created if it doesn't exist - strings and file is written as unicode I have a few questions, but will ask them in #11362.
This commit is contained in:
commit
edfb546e4b
18 changed files with 333 additions and 208 deletions
|
|
@ -1,77 +1,209 @@
|
|||
# xfail-license
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Script for extracting compilable fragments from markdown documentation. See
|
||||
prep.js for a description of the format recognized by this tool. Expects
|
||||
a directory fragments/ to exist under the current directory, and writes the
|
||||
fragments in there as individual .rs files.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from codecs import open
|
||||
from collections import deque
|
||||
from itertools import imap
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
# Script for extracting compilable fragments from markdown
|
||||
# documentation. See prep.js for a description of the format
|
||||
# recognized by this tool. Expects a directory fragments/ to exist
|
||||
# under the current directory, and writes the fragments in there as
|
||||
# individual .rs files.
|
||||
# regexes
|
||||
CHAPTER_NAME_REGEX = re.compile(r'# (.*)')
|
||||
CODE_BLOCK_DELIM_REGEX = re.compile(r'~~~')
|
||||
COMMENT_REGEX = re.compile(r'^# ')
|
||||
COMPILER_DIRECTIVE_REGEX = re.compile(r'\#\[(.*)\];')
|
||||
ELLIPSES_REGEX = re.compile(r'\.\.\.')
|
||||
EXTERN_MOD_REGEX = re.compile(r'\bextern mod extra\b')
|
||||
MAIN_FUNCTION_REGEX = re.compile(r'\bfn main\b')
|
||||
TAGS_REGEX = re.compile(r'\.([\w-]*)')
|
||||
|
||||
import sys, re
|
||||
# tags to ignore
|
||||
IGNORE_TAGS = \
|
||||
frozenset(["abnf", "ebnf", "field", "keyword", "notrust", "precedence"])
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Please provide an input filename")
|
||||
sys.exit(1)
|
||||
# header for code snippet files
|
||||
OUTPUT_BLOCK_HEADER = '\n'.join((
|
||||
"#[ deny(warnings) ];",
|
||||
"#[ allow(unused_variable) ];",
|
||||
"#[ allow(dead_assignment) ];",
|
||||
"#[ allow(unused_mut) ];",
|
||||
"#[ allow(attribute_usage) ];",
|
||||
"#[ allow(dead_code) ];",
|
||||
"#[ feature(macro_rules, globs, struct_variant, managed_boxes) ];\n",))
|
||||
|
||||
filename = sys.argv[1]
|
||||
dest = sys.argv[2]
|
||||
f = open(filename)
|
||||
lines = f.readlines()
|
||||
f.close()
|
||||
|
||||
cur = 0
|
||||
line = ""
|
||||
chapter = ""
|
||||
chapter_n = 0
|
||||
def add_extern_mod(block):
|
||||
if not has_extern_mod(block):
|
||||
# add `extern mod extra;` after compiler directives
|
||||
directives = []
|
||||
while len(block) and is_compiler_directive(block[0]):
|
||||
directives.append(block.popleft())
|
||||
|
||||
while cur < len(lines):
|
||||
line = lines[cur]
|
||||
cur += 1
|
||||
chap = re.match("# (.*)", line)
|
||||
if chap:
|
||||
chapter = re.sub(r"\W", "_", chap.group(1)).lower()
|
||||
chapter_n = 1
|
||||
elif re.match("~~~", line):
|
||||
# Parse the tags that open a code block in the pandoc format:
|
||||
# ~~~ {.tag1 .tag2}
|
||||
tags = re.findall("\.([\w-]*)", line)
|
||||
block = ""
|
||||
ignore = "notrust" in tags or "ignore" in tags
|
||||
# Some tags used by the language ref that indicate not rust
|
||||
ignore |= "ebnf" in tags
|
||||
ignore |= "abnf" in tags
|
||||
ignore |= "keyword" in tags
|
||||
ignore |= "field" in tags
|
||||
ignore |= "precedence" in tags
|
||||
xfail = "xfail-test" in tags
|
||||
while cur < len(lines):
|
||||
line = lines[cur]
|
||||
cur += 1
|
||||
if re.match("~~~", line):
|
||||
break
|
||||
else:
|
||||
# Lines beginning with '# ' are turned into valid code
|
||||
line = re.sub("^# ", "", line)
|
||||
# Allow ellipses in code snippets
|
||||
line = re.sub("\.\.\.", "", line)
|
||||
block += line
|
||||
if not ignore:
|
||||
if not re.search(r"\bfn main\b", block):
|
||||
block = "fn main() {\n" + block + "\n}\n"
|
||||
if not re.search(r"\bextern mod extra\b", block):
|
||||
block = "extern mod extra;\n" + block
|
||||
block = """#[ deny(warnings) ];
|
||||
#[ allow(unused_variable) ];\n
|
||||
#[ allow(dead_assignment) ];\n
|
||||
#[ allow(unused_mut) ];\n
|
||||
#[ allow(attribute_usage) ];\n
|
||||
#[ allow(dead_code) ];\n
|
||||
#[ feature(macro_rules, globs, struct_variant, managed_boxes) ];\n
|
||||
""" + block
|
||||
if xfail:
|
||||
block = "// xfail-test\n" + block
|
||||
filename = (dest + "/" + str(chapter)
|
||||
+ "_" + str(chapter_n) + ".rs")
|
||||
chapter_n += 1
|
||||
f = open(filename, 'w')
|
||||
f.write(block)
|
||||
f.close()
|
||||
block.appendleft("\nextern mod extra;\n\n")
|
||||
block.extendleft(reversed(directives))
|
||||
|
||||
return block
|
||||
|
||||
|
||||
def add_main_function(block):
|
||||
if not has_main_function(block):
|
||||
prepend_spaces = lambda x: ' ' + x
|
||||
block = deque(imap(prepend_spaces, block))
|
||||
block.appendleft("\nfn main() {\n")
|
||||
block.append("\n}\n")
|
||||
return block
|
||||
|
||||
|
||||
def extract_code_fragments(dest_dir, lines):
|
||||
"""
|
||||
Extracts all the code fragments from a file that do not have ignored tags
|
||||
writing them to the following file:
|
||||
|
||||
[dest dir]/[chapter name]_[chapter_index].rs
|
||||
"""
|
||||
chapter_name = None
|
||||
chapter_index = 0
|
||||
|
||||
for line in lines:
|
||||
if is_chapter_title(line):
|
||||
chapter_name = get_chapter_name(line)
|
||||
chapter_index = 1
|
||||
continue
|
||||
|
||||
if not is_code_block_delim(line):
|
||||
continue
|
||||
|
||||
assert chapter_name, "Chapter name missing for code block."
|
||||
tags = get_tags(line)
|
||||
block = get_code_block(lines)
|
||||
|
||||
if tags & IGNORE_TAGS:
|
||||
continue
|
||||
|
||||
block = add_extern_mod(add_main_function(block))
|
||||
block.appendleft(OUTPUT_BLOCK_HEADER)
|
||||
|
||||
if "ignore" in tags:
|
||||
block.appendleft("//xfail-test\n")
|
||||
elif "should_fail" in tags:
|
||||
block.appendleft("//should-fail\n")
|
||||
|
||||
output_filename = os.path.join(
|
||||
dest_dir,
|
||||
chapter_name + '_' + str(chapter_index) + '.rs')
|
||||
|
||||
write_file(output_filename, block)
|
||||
chapter_index += 1
|
||||
|
||||
|
||||
def has_extern_mod(block):
|
||||
"""Checks if a code block has the line `extern mod extra`."""
|
||||
find_extern_mod = lambda x: re.search(EXTERN_MOD_REGEX, x)
|
||||
return any(imap(find_extern_mod, block))
|
||||
|
||||
|
||||
def has_main_function(block):
|
||||
"""Checks if a code block has a main function."""
|
||||
find_main_fn = lambda x: re.search(MAIN_FUNCTION_REGEX, x)
|
||||
return any(imap(find_main_fn, block))
|
||||
|
||||
|
||||
def is_chapter_title(line):
|
||||
return re.match(CHAPTER_NAME_REGEX, line)
|
||||
|
||||
|
||||
def is_code_block_delim(line):
|
||||
return re.match(CODE_BLOCK_DELIM_REGEX, line)
|
||||
|
||||
|
||||
def is_compiler_directive(line):
|
||||
return re.match(COMPILER_DIRECTIVE_REGEX, line)
|
||||
|
||||
|
||||
def get_chapter_name(line):
|
||||
"""Get the chapter name from a `# Containers` line."""
|
||||
return re.sub(
|
||||
r'\W',
|
||||
'_',
|
||||
re.match(CHAPTER_NAME_REGEX, line).group(1)).lower()
|
||||
|
||||
|
||||
def get_code_block(lines):
|
||||
"""
|
||||
Get a code block surrounded by ~~~, for example:
|
||||
|
||||
1: ~~~ { .tag }
|
||||
2: let u: ~[u32] = ~[0, 1, 2];
|
||||
3: let v: &[u32] = &[0, 1, 2, 3];
|
||||
4: let w: [u32, .. 5] = [0, 1, 2, 3, 4];
|
||||
5:
|
||||
6: println!("u: {}, v: {}, w: {}", u.len(), v.len(), w.len());
|
||||
7: ~~~
|
||||
|
||||
Returns lines 2-6. Assumes line 1 has been consumed by the caller.
|
||||
"""
|
||||
strip_comments = lambda x: re.sub(COMMENT_REGEX, '', x)
|
||||
strip_ellipses = lambda x: re.sub(ELLIPSES_REGEX, '', x)
|
||||
|
||||
result = deque()
|
||||
|
||||
for line in lines:
|
||||
if is_code_block_delim(line):
|
||||
break
|
||||
result.append(strip_comments(strip_ellipses(line)))
|
||||
return result
|
||||
|
||||
|
||||
def get_lines(filename):
|
||||
with open(filename) as f:
|
||||
for line in f:
|
||||
yield line
|
||||
|
||||
|
||||
def get_tags(line):
|
||||
"""
|
||||
Retrieves all tags from the line format:
|
||||
~~~ { .tag1 .tag2 .tag3 }
|
||||
"""
|
||||
return set(re.findall(TAGS_REGEX, line))
|
||||
|
||||
|
||||
def write_file(filename, lines):
|
||||
with open(filename, 'w', encoding='utf-8') as f:
|
||||
for line in lines:
|
||||
f.write(unicode(line, encoding='utf-8', errors='replace'))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if not argv:
|
||||
argv = sys.argv
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
sys.stderr.write("Please provide an input filename.")
|
||||
sys.exit(1)
|
||||
elif len(sys.argv) < 3:
|
||||
sys.stderr.write("Please provide a destination directory.")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
dest_dir = sys.argv[2]
|
||||
|
||||
if not os.path.exists(input_file):
|
||||
sys.stderr.write("Input file does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(dest_dir):
|
||||
os.mkdir(dest_dir)
|
||||
|
||||
extract_code_fragments(dest_dir, get_lines(input_file))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue