Simplify intrinsics translation generation script

This commit is contained in:
Guillaume Gomez 2026-01-12 16:48:14 +01:00
parent 3e789ed576
commit 3dc60d0bf1

View file

@ -12,7 +12,7 @@ def run_command(command, cwd=None):
sys.exit(1)
def clone_repository(repo_name, path, repo_url, branch="master", sub_paths=None):
def clone_repository(repo_name, path, repo_url, sub_paths):
if os.path.exists(path):
while True:
choice = input("There is already a `{}` folder, do you want to update it? [y/N]".format(path))
@ -21,18 +21,15 @@ def clone_repository(repo_name, path, repo_url, branch="master", sub_paths=None)
return
elif choice.lower() == "y":
print("Updating repository...")
run_command(["git", "pull", "origin", branch], cwd=path)
run_command(["git", "pull", "origin", "main"], cwd=path)
return
else:
print("Didn't understand answer...")
print("Cloning {} repository...".format(repo_name))
if sub_paths is None:
run_command(["git", "clone", repo_url, "--depth", "1", path])
else:
run_command(["git", "clone", repo_url, "--filter=tree:0", "--no-checkout", path])
run_command(["git", "sparse-checkout", "init"], cwd=path)
run_command(["git", "sparse-checkout", "set", *sub_paths], cwd=path)
run_command(["git", "checkout"], cwd=path)
run_command(["git", "clone", repo_url, "--filter=tree:0", "--no-checkout", path])
run_command(["git", "sparse-checkout", "init"], cwd=path)
run_command(["git", "sparse-checkout", "set", *sub_paths], cwd=path)
run_command(["git", "checkout"], cwd=path)
def append_intrinsic(array, intrinsic_name, translation):
@ -45,119 +42,36 @@ def convert_to_string(content):
return content
def extract_intrinsics_from_llvm(llvm_path, intrinsics):
command = ["llvm-tblgen", "llvm/IR/Intrinsics.td"]
def extract_intrinsics_from_llvm(llvm_path):
intrinsics = {}
command = ["llvm-tblgen", "llvm/IR/Intrinsics.td", "--dump-json"]
cwd = os.path.join(llvm_path, "llvm/include")
print("=> Running command `{}` from `{}`".format(command, cwd))
p = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE)
output, err = p.communicate()
lines = convert_to_string(output).splitlines()
pos = 0
while pos < len(lines):
line = lines[pos]
if not line.startswith("def "):
pos += 1
content = json.loads(convert_to_string(output))
for intrinsic in content:
data = content[intrinsic]
if not isinstance(data, dict):
continue
intrinsic = line.split(" ")[1].strip()
content = line
while pos < len(lines):
line = lines[pos].split(" // ")[0].strip()
content += line
pos += 1
if line == "}":
break
entries = re.findall('string ClangBuiltinName = "(\\w+)";', content)
current_arch = re.findall('string TargetPrefix = "(\\w+)";', content)
if len(entries) == 1 and len(current_arch) == 1:
current_arch = current_arch[0]
intrinsic = intrinsic.split("_")
if len(intrinsic) < 2 or intrinsic[0] != "int":
continue
intrinsic[0] = "llvm"
intrinsic = ".".join(intrinsic)
if current_arch not in intrinsics:
intrinsics[current_arch] = []
append_intrinsic(intrinsics[current_arch], intrinsic, entries[0])
def append_translation(json_data, p, array):
it = json_data["index"][p]
content = it["docs"].split('`')
if len(content) != 5:
return
append_intrinsic(array, content[1], content[3])
def extract_intrinsics_from_llvmint(llvmint, intrinsics):
archs = [
"AMDGPU",
"aarch64",
"arm",
"cuda",
"hexagon",
"mips",
"nvvm",
"ppc",
"ptx",
"x86",
"xcore",
]
json_file = os.path.join(llvmint, "target/doc/llvmint.json")
# We need to regenerate the documentation!
run_command(
["cargo", "rustdoc", "--", "-Zunstable-options", "--output-format", "json"],
cwd=llvmint,
)
with open(json_file, "r", encoding="utf8") as f:
json_data = json.loads(f.read())
for p in json_data["paths"]:
it = json_data["paths"][p]
if it["crate_id"] != 0:
# This is from an external crate.
current_arch = data.get("TargetPrefix")
builtin_name = data.get("ClangBuiltinName")
if current_arch is None or current_arch == "" or builtin_name is None:
continue
if it["kind"] != "function":
# We're only looking for functions.
intrinsic = intrinsic.split("_")
if len(intrinsic) < 2 or intrinsic[0] != "int":
continue
# if len(it["path"]) == 2:
# # This is a "general" intrinsic, not bound to a specific arch.
# append_translation(json_data, p, general)
# continue
if len(it["path"]) != 3 or it["path"][1] not in archs:
continue
arch = it["path"][1]
if arch not in intrinsics:
intrinsics[arch] = []
append_translation(json_data, p, intrinsics[arch])
intrinsic[0] = "llvm"
intrinsic = ".".join(intrinsic)
if current_arch not in intrinsics:
intrinsics[current_arch] = []
append_intrinsic(intrinsics[current_arch], intrinsic, builtin_name)
def fill_intrinsics(intrinsics, from_intrinsics, all_intrinsics):
for arch in from_intrinsics:
if arch not in intrinsics:
intrinsics[arch] = []
for entry in from_intrinsics[arch]:
if entry[0] in all_intrinsics:
if all_intrinsics[entry[0]] == entry[1]:
# This is a "full" duplicate, both the LLVM instruction and the GCC
# translation are the same.
continue
intrinsics[arch].append((entry[0], entry[1], True))
else:
intrinsics[arch].append((entry[0], entry[1], False))
all_intrinsics[entry[0]] = entry[1]
return intrinsics
def update_intrinsics(llvm_path):
intrinsics_llvm = {}
intrinsics_llvmint = {}
all_intrinsics = {}
extract_intrinsics_from_llvm(llvm_path, intrinsics_llvm)
intrinsics = {}
# We give priority to translations from LLVM over the ones from llvmint.
fill_intrinsics(intrinsics, intrinsics_llvm, all_intrinsics)
fill_intrinsics(intrinsics, intrinsics_llvmint, all_intrinsics)
intrinsics = extract_intrinsics_from_llvm(llvm_path)
archs = [arch for arch in intrinsics]
archs.sort()
@ -187,15 +101,13 @@ match arch {""")
continue
attribute = "#[expect(non_snake_case)]" if arch[0].isupper() else ""
out.write("\"{}\" => {{ {} fn {}(name: &str,full_name:&str) -> &'static str {{ match name {{".format(arch, attribute, arch))
intrinsics[arch].sort(key=lambda x: (x[0], x[2]))
intrinsics[arch].sort(key=lambda x: (x[0], x[1]))
out.write(' // {}\n'.format(arch))
for entry in intrinsics[arch]:
llvm_name = entry[0].removeprefix("llvm.");
llvm_name = llvm_name.removeprefix(arch);
llvm_name = llvm_name.removeprefix(".");
if entry[2] is True: # if it is a duplicate
out.write(' // [DUPLICATE]: "{}" => "{}",\n'.format(llvm_name, entry[1]))
elif "_round_mask" in entry[1]:
if "_round_mask" in entry[1]:
out.write(' // [INVALID CONVERSION]: "{}" => "{}",\n'.format(llvm_name, entry[1]))
else:
out.write(' "{}" => "{}",\n'.format(llvm_name, entry[1]))
@ -224,11 +136,15 @@ def main():
"llvm-project",
llvm_path,
"https://github.com/llvm/llvm-project",
branch="main",
sub_paths=["llvm/include/llvm/IR", "llvm/include/llvm/CodeGen/"],
["llvm/include/llvm/IR", "llvm/include/llvm/CodeGen/"],
)
update_intrinsics(llvm_path)
# llvm-tblgen can be built with:
#
# mkdir llvm-tblgen-build && cd llvm-tblgen-build
# cmake -G Ninja -DLLVM_ENABLE_PROJECTS="llvm" -DCMAKE_BUILD_TYPE=Release ../llvm
# ninja llvm-tblgen
if __name__ == "__main__":
sys.exit(main())