diff --git a/tools/generate_intrinsics.py b/tools/generate_intrinsics.py index 56b425cdb0cd..539032340777 100644 --- a/tools/generate_intrinsics.py +++ b/tools/generate_intrinsics.py @@ -12,7 +12,7 @@ def run_command(command, cwd=None): sys.exit(1) -def clone_repository(repo_name, path, repo_url, branch="master", sub_paths=None): +def clone_repository(repo_name, path, repo_url, sub_paths): if os.path.exists(path): while True: choice = input("There is already a `{}` folder, do you want to update it? [y/N]".format(path)) @@ -21,18 +21,15 @@ def clone_repository(repo_name, path, repo_url, branch="master", sub_paths=None) return elif choice.lower() == "y": print("Updating repository...") - run_command(["git", "pull", "origin", branch], cwd=path) + run_command(["git", "pull", "origin", "main"], cwd=path) return else: print("Didn't understand answer...") print("Cloning {} repository...".format(repo_name)) - if sub_paths is None: - run_command(["git", "clone", repo_url, "--depth", "1", path]) - else: - run_command(["git", "clone", repo_url, "--filter=tree:0", "--no-checkout", path]) - run_command(["git", "sparse-checkout", "init"], cwd=path) - run_command(["git", "sparse-checkout", "set", *sub_paths], cwd=path) - run_command(["git", "checkout"], cwd=path) + run_command(["git", "clone", repo_url, "--filter=tree:0", "--no-checkout", path]) + run_command(["git", "sparse-checkout", "init"], cwd=path) + run_command(["git", "sparse-checkout", "set", *sub_paths], cwd=path) + run_command(["git", "checkout"], cwd=path) def append_intrinsic(array, intrinsic_name, translation): @@ -45,119 +42,36 @@ def convert_to_string(content): return content -def extract_intrinsics_from_llvm(llvm_path, intrinsics): - command = ["llvm-tblgen", "llvm/IR/Intrinsics.td"] +def extract_intrinsics_from_llvm(llvm_path): + intrinsics = {} + command = ["llvm-tblgen", "llvm/IR/Intrinsics.td", "--dump-json"] cwd = os.path.join(llvm_path, "llvm/include") print("=> Running command `{}` from `{}`".format(command, cwd)) p = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE) output, err = p.communicate() - lines = convert_to_string(output).splitlines() - pos = 0 - while pos < len(lines): - line = lines[pos] - if not line.startswith("def "): - pos += 1 + content = json.loads(convert_to_string(output)) + for intrinsic in content: + data = content[intrinsic] + if not isinstance(data, dict): continue - intrinsic = line.split(" ")[1].strip() - content = line - while pos < len(lines): - line = lines[pos].split(" // ")[0].strip() - content += line - pos += 1 - if line == "}": - break - entries = re.findall('string ClangBuiltinName = "(\\w+)";', content) - current_arch = re.findall('string TargetPrefix = "(\\w+)";', content) - if len(entries) == 1 and len(current_arch) == 1: - current_arch = current_arch[0] - intrinsic = intrinsic.split("_") - if len(intrinsic) < 2 or intrinsic[0] != "int": - continue - intrinsic[0] = "llvm" - intrinsic = ".".join(intrinsic) - if current_arch not in intrinsics: - intrinsics[current_arch] = [] - append_intrinsic(intrinsics[current_arch], intrinsic, entries[0]) - - -def append_translation(json_data, p, array): - it = json_data["index"][p] - content = it["docs"].split('`') - if len(content) != 5: - return - append_intrinsic(array, content[1], content[3]) - - -def extract_intrinsics_from_llvmint(llvmint, intrinsics): - archs = [ - "AMDGPU", - "aarch64", - "arm", - "cuda", - "hexagon", - "mips", - "nvvm", - "ppc", - "ptx", - "x86", - "xcore", - ] - - json_file = os.path.join(llvmint, "target/doc/llvmint.json") - # We need to regenerate the documentation! - run_command( - ["cargo", "rustdoc", "--", "-Zunstable-options", "--output-format", "json"], - cwd=llvmint, - ) - with open(json_file, "r", encoding="utf8") as f: - json_data = json.loads(f.read()) - for p in json_data["paths"]: - it = json_data["paths"][p] - if it["crate_id"] != 0: - # This is from an external crate. + current_arch = data.get("TargetPrefix") + builtin_name = data.get("ClangBuiltinName") + if current_arch is None or current_arch == "" or builtin_name is None: continue - if it["kind"] != "function": - # We're only looking for functions. + intrinsic = intrinsic.split("_") + if len(intrinsic) < 2 or intrinsic[0] != "int": continue - # if len(it["path"]) == 2: - # # This is a "general" intrinsic, not bound to a specific arch. - # append_translation(json_data, p, general) - # continue - if len(it["path"]) != 3 or it["path"][1] not in archs: - continue - arch = it["path"][1] - if arch not in intrinsics: - intrinsics[arch] = [] - append_translation(json_data, p, intrinsics[arch]) + intrinsic[0] = "llvm" + intrinsic = ".".join(intrinsic) + if current_arch not in intrinsics: + intrinsics[current_arch] = [] + append_intrinsic(intrinsics[current_arch], intrinsic, builtin_name) - -def fill_intrinsics(intrinsics, from_intrinsics, all_intrinsics): - for arch in from_intrinsics: - if arch not in intrinsics: - intrinsics[arch] = [] - for entry in from_intrinsics[arch]: - if entry[0] in all_intrinsics: - if all_intrinsics[entry[0]] == entry[1]: - # This is a "full" duplicate, both the LLVM instruction and the GCC - # translation are the same. - continue - intrinsics[arch].append((entry[0], entry[1], True)) - else: - intrinsics[arch].append((entry[0], entry[1], False)) - all_intrinsics[entry[0]] = entry[1] + return intrinsics def update_intrinsics(llvm_path): - intrinsics_llvm = {} - intrinsics_llvmint = {} - all_intrinsics = {} - - extract_intrinsics_from_llvm(llvm_path, intrinsics_llvm) - - intrinsics = {} - # We give priority to translations from LLVM over the ones from llvmint. - fill_intrinsics(intrinsics, intrinsics_llvm, all_intrinsics) - fill_intrinsics(intrinsics, intrinsics_llvmint, all_intrinsics) + intrinsics = extract_intrinsics_from_llvm(llvm_path) archs = [arch for arch in intrinsics] archs.sort() @@ -187,15 +101,13 @@ match arch {""") continue attribute = "#[expect(non_snake_case)]" if arch[0].isupper() else "" out.write("\"{}\" => {{ {} fn {}(name: &str,full_name:&str) -> &'static str {{ match name {{".format(arch, attribute, arch)) - intrinsics[arch].sort(key=lambda x: (x[0], x[2])) + intrinsics[arch].sort(key=lambda x: (x[0], x[1])) out.write(' // {}\n'.format(arch)) for entry in intrinsics[arch]: llvm_name = entry[0].removeprefix("llvm."); llvm_name = llvm_name.removeprefix(arch); llvm_name = llvm_name.removeprefix("."); - if entry[2] is True: # if it is a duplicate - out.write(' // [DUPLICATE]: "{}" => "{}",\n'.format(llvm_name, entry[1])) - elif "_round_mask" in entry[1]: + if "_round_mask" in entry[1]: out.write(' // [INVALID CONVERSION]: "{}" => "{}",\n'.format(llvm_name, entry[1])) else: out.write(' "{}" => "{}",\n'.format(llvm_name, entry[1])) @@ -224,11 +136,15 @@ def main(): "llvm-project", llvm_path, "https://github.com/llvm/llvm-project", - branch="main", - sub_paths=["llvm/include/llvm/IR", "llvm/include/llvm/CodeGen/"], + ["llvm/include/llvm/IR", "llvm/include/llvm/CodeGen/"], ) update_intrinsics(llvm_path) +# llvm-tblgen can be built with: +# +# mkdir llvm-tblgen-build && cd llvm-tblgen-build +# cmake -G Ninja -DLLVM_ENABLE_PROJECTS="llvm" -DCMAKE_BUILD_TYPE=Release ../llvm +# ninja llvm-tblgen if __name__ == "__main__": sys.exit(main())