Auto merge of #134907 - Kobzol:rustc-dev-guide-josh, r=ehuss
Turn rustc-dev-guide into a Josh subtree
Discussed on [Zulip](https://rust-lang.zulipchat.com/#narrow/channel/196385-t-compiler.2Fwg-rustc-dev-guide/topic/a.20move.20to.20main.20repo.20.28rust-lang.2Frust.29).
Accompanying rustc-dev-guide PR: https://github.com/rust-lang/rustc-dev-guide/pull/2183
I didn't create a bootstrap step for rustc-dev-guide yet, because the rustc-dev-guide version that we currently use in this repo doesn't have linkcheck enabled and that fails tests.
The subtree starts with commit [ad93c5f1c49f2aeb45f7a4954017b1e607df9f5e](ad93c5f1c4).
What I did:
```
export DIR=src/doc/rustc-dev-guide
# Remove submodule
git submodule status ${DIR}
git submodule deinit ${DIR}
git rm -r --cached ${DIR}
rm -rf ${DIR}
# Remove rustc-dev-guide from .gitmodules
git commit -m"Removed `${DIR}` submodule"
# Import history with josh
git fetch https://github.com/rust-lang/rustc-dev-guide ad93c5f1c49f2aeb45f7a4954017b1e607df9f5e
josh-filter ':prefix=src/doc/rustc-dev-guide' FETCH_HEAD
git merge --allow-unrelated FILTERED_HEAD
# A few follow-up cleanup commits
```
r? ehuss
This commit is contained in:
commit
c528b8c678
250 changed files with 348506 additions and 41 deletions
4
.gitmodules
vendored
4
.gitmodules
vendored
|
|
@ -22,10 +22,6 @@
|
|||
path = library/stdarch
|
||||
url = https://github.com/rust-lang/stdarch.git
|
||||
shallow = true
|
||||
[submodule "src/doc/rustc-dev-guide"]
|
||||
path = src/doc/rustc-dev-guide
|
||||
url = https://github.com/rust-lang/rustc-dev-guide.git
|
||||
shallow = true
|
||||
[submodule "src/doc/edition-guide"]
|
||||
path = src/doc/edition-guide
|
||||
url = https://github.com/rust-lang/edition-guide.git
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ macro_rules! book {
|
|||
}
|
||||
|
||||
// NOTE: When adding a book here, make sure to ALSO build the book by
|
||||
// adding a build step in `src/bootstrap/builder.rs`!
|
||||
// adding a build step in `src/bootstrap/code/builder/mod.rs`!
|
||||
// NOTE: Make sure to add the corresponding submodule when adding a new book.
|
||||
// FIXME: Make checking for a submodule automatic somehow (maybe by having a list of all submodules
|
||||
// and checking against it?).
|
||||
|
|
|
|||
|
|
@ -2525,35 +2525,6 @@ fn markdown_test(builder: &Builder<'_>, compiler: Compiler, markdown: &Path) ->
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct RustcGuide;
|
||||
|
||||
impl Step for RustcGuide {
|
||||
type Output = ();
|
||||
const DEFAULT: bool = false;
|
||||
const ONLY_HOSTS: bool = true;
|
||||
|
||||
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
|
||||
run.path("src/doc/rustc-dev-guide")
|
||||
}
|
||||
|
||||
fn make_run(run: RunConfig<'_>) {
|
||||
run.builder.ensure(RustcGuide);
|
||||
}
|
||||
|
||||
fn run(self, builder: &Builder<'_>) {
|
||||
let relative_path = "src/doc/rustc-dev-guide";
|
||||
builder.require_submodule(relative_path, None);
|
||||
|
||||
let src = builder.src.join(relative_path);
|
||||
let mut rustbook_cmd = builder.tool_cmd(Tool::Rustbook).delay_failure();
|
||||
rustbook_cmd.arg("linkcheck").arg(&src);
|
||||
let toolstate =
|
||||
if rustbook_cmd.run(builder) { ToolState::TestPass } else { ToolState::TestFail };
|
||||
builder.save_toolstate("rustc-dev-guide", toolstate);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct CrateLibrustc {
|
||||
compiler: Compiler,
|
||||
|
|
|
|||
|
|
@ -76,10 +76,7 @@ static STABLE_TOOLS: &[(&str, &str)] = &[
|
|||
// We do require that we checked whether they build or not on the tools builder,
|
||||
// though, as otherwise we will be unable to file an issue if they start
|
||||
// failing.
|
||||
static NIGHTLY_TOOLS: &[(&str, &str)] = &[
|
||||
("embedded-book", "src/doc/embedded-book"),
|
||||
// ("rustc-dev-guide", "src/doc/rustc-dev-guide"),
|
||||
];
|
||||
static NIGHTLY_TOOLS: &[(&str, &str)] = &[("embedded-book", "src/doc/embedded-book")];
|
||||
|
||||
fn print_error(tool: &str, submodule: &str) {
|
||||
eprintln!();
|
||||
|
|
|
|||
|
|
@ -951,7 +951,6 @@ impl<'a> Builder<'a> {
|
|||
test::UnstableBook,
|
||||
test::RustcBook,
|
||||
test::LintDocs,
|
||||
test::RustcGuide,
|
||||
test::EmbeddedBook,
|
||||
test::EditionGuide,
|
||||
test::Rustfmt,
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
Subproject commit ad93c5f1c49f2aeb45f7a4954017b1e607df9f5e
|
||||
10
src/doc/rustc-dev-guide/.editorconfig
Normal file
10
src/doc/rustc-dev-guide/.editorconfig
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
root = true
|
||||
|
||||
[src/*]
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
max_line_length = 100
|
||||
|
||||
[ci/*.sh]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
10
src/doc/rustc-dev-guide/.gitattributes
vendored
Normal file
10
src/doc/rustc-dev-guide/.gitattributes
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# Set the default behavior, in case people don't have core.autocrlf set.
|
||||
* text=auto eol=lf
|
||||
|
||||
# git grep shouldn't match entries in huge minified javascript
|
||||
*.min.js binary
|
||||
|
||||
# Older git versions try to fix line endings on images, this prevents it.
|
||||
*.png binary
|
||||
*.jpg binary
|
||||
*.ico binary
|
||||
74
src/doc/rustc-dev-guide/.github/workflows/ci.yml
vendored
Normal file
74
src/doc/rustc-dev-guide/.github/workflows/ci.yml
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
schedule:
|
||||
# Run at 18:00 UTC every day
|
||||
- cron: '0 18 * * *'
|
||||
|
||||
jobs:
|
||||
ci:
|
||||
if: github.repository == 'rust-lang/rustc-dev-guide'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
MDBOOK_VERSION: 0.4.21
|
||||
MDBOOK_LINKCHECK2_VERSION: 0.8.1
|
||||
MDBOOK_MERMAID_VERSION: 0.12.6
|
||||
MDBOOK_TOC_VERSION: 0.11.2
|
||||
DEPLOY_DIR: book/html
|
||||
BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
# linkcheck needs the base commit.
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cache binaries
|
||||
id: mdbook-cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/bin
|
||||
key: ${{ runner.os }}-${{ env.MDBOOK_VERSION }}--${{ env.MDBOOK_LINKCHECK2_VERSION }}--${{ env.MDBOOK_TOC_VERSION }}--${{ env.MDBOOK_MERMAID_VERSION }}
|
||||
|
||||
- name: Cache linkcheck
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/book/linkcheck
|
||||
key: ${{ runner.os }}-${{ hashFiles('./book/linkcheck') }}
|
||||
|
||||
- name: Install latest nightly Rust toolchain
|
||||
if: steps.mdbook-cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
rustup update nightly
|
||||
rustup override set nightly
|
||||
|
||||
- name: Install Dependencies
|
||||
if: steps.mdbook-cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
cargo install mdbook --version ${{ env.MDBOOK_VERSION }}
|
||||
cargo install mdbook-linkcheck2 --version ${{ env.MDBOOK_LINKCHECK2_VERSION }}
|
||||
cargo install mdbook-toc --version ${{ env.MDBOOK_TOC_VERSION }}
|
||||
cargo install mdbook-mermaid --version ${{ env.MDBOOK_MERMAID_VERSION }}
|
||||
|
||||
- name: Check build
|
||||
run: ENABLE_LINKCHECK=1 mdbook build
|
||||
|
||||
- name: Deploy to gh-pages
|
||||
if: github.event_name == 'push'
|
||||
run: |
|
||||
touch "${{ env.DEPLOY_DIR }}/.nojekyll"
|
||||
cp CNAME "${{ env.DEPLOY_DIR }}"
|
||||
cd "${{ env.DEPLOY_DIR }}"
|
||||
rm -rf .git
|
||||
git init
|
||||
git config user.name "Deploy from CI"
|
||||
git config user.email ""
|
||||
git add .
|
||||
git commit -m "Deploy ${GITHUB_SHA} to gh-pages"
|
||||
git push --quiet -f "https://x-token:${{ secrets.GITHUB_TOKEN }}@github.com/${GITHUB_REPOSITORY}" HEAD:gh-pages
|
||||
48
src/doc/rustc-dev-guide/.github/workflows/date-check.yml
vendored
Normal file
48
src/doc/rustc-dev-guide/.github/workflows/date-check.yml
vendored
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
name: Date-Check
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run at noon UTC every 1st of the month
|
||||
- cron: '00 12 01 * *'
|
||||
|
||||
# Allow manually starting the workflow
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
date-check:
|
||||
if: github.repository == 'rust-lang/rustc-dev-guide'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Ensure Rust is up-to-date
|
||||
run: |
|
||||
rustup update stable
|
||||
|
||||
- name: Run `date-check`
|
||||
working-directory: ci/date-check
|
||||
run: |
|
||||
cargo run -- ../../src/ > ../../date-check-output.txt
|
||||
|
||||
- name: Open issue
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
|
||||
const rawText = fs.readFileSync('date-check-output.txt', { encoding: 'utf8' });
|
||||
const title = rawText.split('\n')[0];
|
||||
if (title != 'empty') {
|
||||
const body = rawText.split('\n').slice(1).join('\n');
|
||||
github.rest.issues.create({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
title,
|
||||
body,
|
||||
});
|
||||
console.log('Opened issue.');
|
||||
} else {
|
||||
console.log('No dates to triage.');
|
||||
}
|
||||
6
src/doc/rustc-dev-guide/.gitignore
vendored
Normal file
6
src/doc/rustc-dev-guide/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
book
|
||||
|
||||
ci/date-check/target/
|
||||
|
||||
# Generated by check-in.sh
|
||||
pulls.json
|
||||
5
src/doc/rustc-dev-guide/.mailmap
Normal file
5
src/doc/rustc-dev-guide/.mailmap
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
Jynn Nelson <github@jyn.dev> <jyn514@gmail.com>
|
||||
Jynn Nelson <github@jyn.dev> <joshua@yottadb.com>
|
||||
Jynn Nelson <github@jyn.dev> <jyn.nelson@redjack.com>
|
||||
Jynn Nelson <github@jyn.dev> <jnelson@cloudflare.com>
|
||||
Jynn Nelson <github@jyn.dev>
|
||||
9
src/doc/rustc-dev-guide/CITATION.cff
Normal file
9
src/doc/rustc-dev-guide/CITATION.cff
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
cff-version: 1.2.0
|
||||
message: If you use this guide, please cite it using these metadata.
|
||||
title: Rust Compiler Development Guide (rustc-dev-guide)
|
||||
abstract: A guide to developing the Rust compiler (rustc)
|
||||
authors:
|
||||
- name: "The Rust Project Developers"
|
||||
date-released: "2018-01-16"
|
||||
license: "MIT OR Apache-2.0"
|
||||
repository-code: "https://github.com/rust-lang/rustc-dev-guide"
|
||||
1
src/doc/rustc-dev-guide/CNAME
Normal file
1
src/doc/rustc-dev-guide/CNAME
Normal file
|
|
@ -0,0 +1 @@
|
|||
rustc-dev-guide.rust-lang.org
|
||||
3
src/doc/rustc-dev-guide/CODE_OF_CONDUCT.md
Normal file
3
src/doc/rustc-dev-guide/CODE_OF_CONDUCT.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# The Rust Code of Conduct
|
||||
|
||||
This repository follows the [Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct) of the Rust organization.
|
||||
201
src/doc/rustc-dev-guide/LICENSE-APACHE
Normal file
201
src/doc/rustc-dev-guide/LICENSE-APACHE
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
23
src/doc/rustc-dev-guide/LICENSE-MIT
Normal file
23
src/doc/rustc-dev-guide/LICENSE-MIT
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
115
src/doc/rustc-dev-guide/README.md
Normal file
115
src/doc/rustc-dev-guide/README.md
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
[](https://github.com/rust-lang/rustc-dev-guide/actions/workflows/ci.yml)
|
||||
|
||||
|
||||
This is a collaborative effort to build a guide that explains how rustc
|
||||
works. The aim of the guide is to help new contributors get oriented
|
||||
to rustc, as well as to help more experienced folks in figuring out
|
||||
some new part of the compiler that they haven't worked on before.
|
||||
|
||||
[You can read the latest version of the guide here.](https://rustc-dev-guide.rust-lang.org/)
|
||||
|
||||
You may also find the rustdocs [for the compiler itself][rustdocs] useful.
|
||||
Note that these are not intended as a guide; it's recommended that you search
|
||||
for the docs you're looking for instead of reading them top to bottom.
|
||||
|
||||
[rustdocs]: https://doc.rust-lang.org/nightly/nightly-rustc
|
||||
|
||||
For documentation on developing the standard library, see
|
||||
[`std-dev-guide`](https://std-dev-guide.rust-lang.org/).
|
||||
|
||||
### Contributing to the guide
|
||||
|
||||
The guide is useful today, but it has a lot of work still to go.
|
||||
|
||||
If you'd like to help improve the guide, we'd love to have you! You can find
|
||||
plenty of issues on the [issue
|
||||
tracker](https://github.com/rust-lang/rustc-dev-guide/issues). Just post a
|
||||
comment on the issue you would like to work on to make sure that we don't
|
||||
accidentally duplicate work. If you think something is missing, please open an
|
||||
issue about it!
|
||||
|
||||
**In general, if you don't know how the compiler works, that is not a
|
||||
problem!** In that case, what we will do is to schedule a bit of time
|
||||
for you to talk with someone who **does** know the code, or who wants
|
||||
to pair with you and figure it out. Then you can work on writing up
|
||||
what you learned.
|
||||
|
||||
In general, when writing about a particular part of the compiler's code, we
|
||||
recommend that you link to the relevant parts of the [rustc
|
||||
rustdocs][rustdocs].
|
||||
|
||||
### Build Instructions
|
||||
|
||||
To build a local static HTML site, install [`mdbook`](https://github.com/rust-lang/mdBook) with:
|
||||
|
||||
```
|
||||
> cargo install mdbook mdbook-linkcheck2 mdbook-toc mdbook-mermaid
|
||||
```
|
||||
|
||||
and execute the following command in the root of the repository:
|
||||
|
||||
```
|
||||
> mdbook build --open
|
||||
```
|
||||
|
||||
The build files are found in the `book/html` directory.
|
||||
|
||||
### Link Validations
|
||||
|
||||
We use `mdbook-linkcheck2` to validate URLs included in our documentation. Link
|
||||
checking is **not** run by default locally, though it is in CI. To enable it
|
||||
locally, set the environment variable `ENABLE_LINKCHECK=1` like in the
|
||||
following example.
|
||||
|
||||
```console
|
||||
$ ENABLE_LINKCHECK=1 mdbook serve
|
||||
```
|
||||
|
||||
### Table of Contents
|
||||
|
||||
We use `mdbook-toc` to auto-generate TOCs for long sections. You can invoke the preprocessor by
|
||||
including the `<!-- toc -->` marker at the place where you want the TOC.
|
||||
|
||||
## How to fix toolstate failures
|
||||
|
||||
> [!NOTE]
|
||||
> Currently, we do not track the rustc-dev-guide toolstate due to
|
||||
> [spurious failures](https://github.com/rust-lang/rust/pull/71731),
|
||||
> but we leave these instructions for when we do it again in the future.
|
||||
|
||||
1. You will get a ping from the toolstate commit. e.g. https://github.com/rust-lang-nursery/rust-toolstate/commit/8ffa0e4c30ac9ba8546b7046e5c4ccc2b96ebdd4
|
||||
|
||||
2. The commit contains a link to the PR that caused the breakage. e.g. https://github.com/rust-lang/rust/pull/64321
|
||||
|
||||
3. If you go to that PR's thread, there is a post from bors with a link to the CI status: https://github.com/rust-lang/rust/pull/64321#issuecomment-529763807
|
||||
|
||||
4. Follow the check-actions link to get to the Actions page for that build
|
||||
|
||||
5. There will be approximately 1 billion different jobs for the build. They are for different configurations and platforms. The rustc-dev-guide build only runs on the Linux x86_64-gnu-tools job. So click on that job in the list, which is about 60% down in the list.
|
||||
|
||||
6. Click the Run build step in the job to get the console log for the step.
|
||||
|
||||
7. Click on the log and Ctrl-f to get a search box in the log
|
||||
|
||||
8. Search for rustc-dev-guide. This gets you to the place where the links are checked. It is usually ~11K lines into the log.
|
||||
|
||||
9. Look at the links in the log near that point in the log
|
||||
|
||||
10. Fix those links in the rustc-dev-guide (by making a PR in the rustc-dev-guide repo)
|
||||
|
||||
11. Make a PR on the rust-lang/rust repo to update the rustc-dev-guide git submodule in src/docs/rustc-dev-guide.
|
||||
To make a PR, the following steps are useful.
|
||||
|
||||
```bash
|
||||
# Assuming you already cloned the rust-lang/rust repo and you're in the correct directory
|
||||
git submodule update --remote src/doc/rustc-dev-guide
|
||||
git add -u
|
||||
git commit -m "Update rustc-dev-guide"
|
||||
# Note that you can use -i, which is short for --incremental, in the following command
|
||||
./x test --incremental src/doc/rustc-dev-guide # This is optional and should succeed anyway
|
||||
# Open a PR in rust-lang/rust
|
||||
```
|
||||
|
||||
12. Wait for PR to merge
|
||||
|
||||
Voilà!
|
||||
64
src/doc/rustc-dev-guide/book.toml
Normal file
64
src/doc/rustc-dev-guide/book.toml
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
[book]
|
||||
title = "Rust Compiler Development Guide"
|
||||
author = "The Rust Project Developers"
|
||||
description = "A guide to developing the Rust compiler (rustc)"
|
||||
|
||||
[build]
|
||||
create-missing = false
|
||||
|
||||
[preprocessor.toc]
|
||||
command = "mdbook-toc"
|
||||
renderer = ["html"]
|
||||
|
||||
[preprocessor.mermaid]
|
||||
command = "mdbook-mermaid"
|
||||
|
||||
[output.html]
|
||||
git-repository-url = "https://github.com/rust-lang/rustc-dev-guide"
|
||||
edit-url-template = "https://github.com/rust-lang/rustc-dev-guide/edit/master/{path}"
|
||||
additional-js = ["mermaid.min.js", "mermaid-init.js"]
|
||||
|
||||
[output.html.search]
|
||||
use-boolean-and = true
|
||||
|
||||
[output.html.fold]
|
||||
enable = true
|
||||
level = 0
|
||||
|
||||
[output.linkcheck]
|
||||
command = "ci/linkcheck.sh"
|
||||
follow-web-links = true
|
||||
exclude = [
|
||||
"crates\\.io",
|
||||
"gcc\\.godbolt\\.org",
|
||||
"youtube\\.com",
|
||||
"youtu\\.be",
|
||||
"dl\\.acm\\.org",
|
||||
"cs\\.bgu\\.ac\\.il",
|
||||
"www\\.amazon\\.com",
|
||||
"www\\.rustaceans\\.org",
|
||||
"play\\.rust-lang\\.org",
|
||||
"tomlee\\.co",
|
||||
"marketplace\\.visualstudio\\.com",
|
||||
"objects\\.githubusercontent\\.com",
|
||||
# The bug listing URL works only if an user is logged in, otherwise one gets 404.
|
||||
"github\\.com/issues\\?q=.*",
|
||||
# Similarly 500 is sometimes returned here.
|
||||
"github\\.com/rust-lang/rust/pulls\\?q=.*",
|
||||
# 401 is returned here for unknown reason
|
||||
"github\\.com/wesleywiser/rustc-bootstrap-wpa-analysis",
|
||||
# Handle: connection closed before message completed
|
||||
"microsoft\\.com/en-us/research/publication/",
|
||||
# 500 is returned for HEAD request
|
||||
"code\\.visualstudio\\.com/docs/editor/tasks",
|
||||
]
|
||||
cache-timeout = 86400
|
||||
warning-policy = "error"
|
||||
|
||||
[output.html.redirect]
|
||||
"/compiletest.html" = "tests/compiletest.html"
|
||||
"/diagnostics/sessiondiagnostic.html" = "diagnostic-structs.html"
|
||||
"/diagnostics/diagnostic-codes.html" = "error-codes.html"
|
||||
"/miri.html" = "const-eval/interpret.html"
|
||||
"/tests/integration.html" = "ecosystem.html"
|
||||
"/tests/headers.html" = "directives.html"
|
||||
38
src/doc/rustc-dev-guide/ci/check-in.sh
Executable file
38
src/doc/rustc-dev-guide/ci/check-in.sh
Executable file
|
|
@ -0,0 +1,38 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
# This is not a very smart script
|
||||
if [ $# != 2 ]; then
|
||||
echo "usage: $0 <since> <number-of-prs-merged>"
|
||||
if [ $# = 0 ]; then
|
||||
echo "help: you can find the last check-in at" \
|
||||
"https://rust-lang.zulipchat.com/#narrow/stream/238009-t-compiler.2Fmeetings/search/wg-rustc-dev-guide"
|
||||
elif [ $# = 1 ] ; then
|
||||
echo "help: you can find the number of PRs merged at" \
|
||||
"https://github.com/rust-lang/rustc-dev-guide/pulls?q=is%3Apr+is%3Amerged+updated%3A%3E$1"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
curl() {
|
||||
command curl -s "$@"
|
||||
}
|
||||
|
||||
# Get recently updated PRs
|
||||
curl "https://api.github.com/repos/rust-lang/rustc-dev-guide/pulls?state=closed&per_page=$2" \
|
||||
| jq '[.[] | select(.merged_at > "'"$1"'")]' > pulls.json
|
||||
|
||||
show_pulls() {
|
||||
jq -r '.[] | { title, number, html_url, user: .user.login } | "- " + .title + " [#" + (.number | tostring) + "](" + .html_url + ")"'
|
||||
}
|
||||
|
||||
echo "### Most notable changes"
|
||||
echo
|
||||
show_pulls < pulls.json
|
||||
echo
|
||||
echo "### Most notable WIPs"
|
||||
echo
|
||||
# If there are more than 30 PRs open at a time, you'll need to set `per_page`.
|
||||
# For now this seems unlikely.
|
||||
curl "https://api.github.com/repos/rust-lang/rustc-dev-guide/pulls?state=open" | show_pulls
|
||||
342
src/doc/rustc-dev-guide/ci/date-check/Cargo.lock
Normal file
342
src/doc/rustc-dev-guide/ci/date-check/Cargo.lock
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android-tzdata"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
|
||||
dependencies = [
|
||||
"android-tzdata",
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"wasm-bindgen",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
|
||||
|
||||
[[package]]
|
||||
name = "date-check"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"glob",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.60"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
"iana-time-zone-haiku",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"windows-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone-haiku"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.155"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
12
src/doc/rustc-dev-guide/ci/date-check/Cargo.toml
Normal file
12
src/doc/rustc-dev-guide/ci/date-check/Cargo.toml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "date-check"
|
||||
version = "0.1.0"
|
||||
authors = ["Noah Lev <camelidcamel@gmail.com>"]
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
glob = "0.3"
|
||||
regex = "1"
|
||||
chrono = "0.4"
|
||||
356
src/doc/rustc-dev-guide/ci/date-check/src/main.rs
Normal file
356
src/doc/rustc-dev-guide/ci/date-check/src/main.rs
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
use std::{
|
||||
collections::BTreeMap,
|
||||
convert::TryInto as _,
|
||||
env, fmt, fs,
|
||||
path::{Path, PathBuf},
|
||||
process,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use chrono::{Datelike as _, Month, TimeZone as _, Utc};
|
||||
use glob::glob;
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
struct Date {
|
||||
year: u32,
|
||||
month: u32,
|
||||
}
|
||||
|
||||
impl Date {
|
||||
fn months_since(self, other: Date) -> Option<u32> {
|
||||
let self_chrono = Utc
|
||||
.with_ymd_and_hms(self.year.try_into().unwrap(), self.month, 1, 0, 0, 0)
|
||||
.unwrap();
|
||||
let other_chrono = Utc
|
||||
.with_ymd_and_hms(other.year.try_into().unwrap(), other.month, 1, 0, 0, 0)
|
||||
.unwrap();
|
||||
let duration_since = self_chrono.signed_duration_since(other_chrono);
|
||||
let months_since = duration_since.num_days() / 30;
|
||||
if months_since < 0 {
|
||||
None
|
||||
} else {
|
||||
Some(months_since.try_into().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Date {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:04}-{:02}", self.year, self.month)
|
||||
}
|
||||
}
|
||||
|
||||
fn make_date_regex() -> Regex {
|
||||
Regex::new(
|
||||
r"(?x) # insignificant whitespace mode
|
||||
(<!--\s*
|
||||
date-check:\s*
|
||||
(?P<m1>[[:alpha:]]+)\s+
|
||||
(?P<y1>\d{4})\s*-->
|
||||
)
|
||||
|
|
||||
(<!--\s*
|
||||
date-check\s*-->\s+
|
||||
(?P<m2>[[:alpha:]]+)\s+
|
||||
(?P<y2>\d{4})\b
|
||||
)
|
||||
",
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn collect_dates_from_file(date_regex: &Regex, text: &str) -> Vec<(usize, Date)> {
|
||||
let mut line = 1;
|
||||
let mut end_of_last_cap = 0;
|
||||
date_regex
|
||||
.captures_iter(text)
|
||||
.filter_map(|cap| {
|
||||
if let (Some(month), Some(year), None, None) | (None, None, Some(month), Some(year)) = (
|
||||
cap.name("m1"),
|
||||
cap.name("y1"),
|
||||
cap.name("m2"),
|
||||
cap.name("y2"),
|
||||
) {
|
||||
let year = year.as_str().parse().expect("year");
|
||||
let month = Month::from_str(month.as_str())
|
||||
.expect("month")
|
||||
.number_from_month();
|
||||
Some((cap.get(0).expect("all").range(), Date { year, month }))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|(byte_range, date)| {
|
||||
line += text[end_of_last_cap..byte_range.end]
|
||||
.chars()
|
||||
.filter(|c| *c == '\n')
|
||||
.count();
|
||||
end_of_last_cap = byte_range.end;
|
||||
(line, date)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn collect_dates(paths: impl Iterator<Item = PathBuf>) -> BTreeMap<PathBuf, Vec<(usize, Date)>> {
|
||||
let date_regex = make_date_regex();
|
||||
let mut data = BTreeMap::new();
|
||||
for path in paths {
|
||||
let text = fs::read_to_string(&path).unwrap();
|
||||
let dates = collect_dates_from_file(&date_regex, &text);
|
||||
if !dates.is_empty() {
|
||||
data.insert(path, dates);
|
||||
}
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
fn filter_dates(
|
||||
current_month: Date,
|
||||
min_months_since: u32,
|
||||
dates_by_file: impl Iterator<Item = (PathBuf, Vec<(usize, Date)>)>,
|
||||
) -> impl Iterator<Item = (PathBuf, Vec<(usize, Date)>)> {
|
||||
dates_by_file
|
||||
.map(move |(path, dates)| {
|
||||
(
|
||||
path,
|
||||
dates
|
||||
.into_iter()
|
||||
.filter(|(_, date)| {
|
||||
current_month
|
||||
.months_since(*date)
|
||||
.expect("found date that is after current month")
|
||||
>= min_months_since
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
})
|
||||
.filter(|(_, dates)| !dates.is_empty())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut args = env::args();
|
||||
if args.len() == 1 {
|
||||
eprintln!("error: expected root Markdown directory as CLI argument");
|
||||
process::exit(1);
|
||||
}
|
||||
let root_dir = args.nth(1).unwrap();
|
||||
let root_dir_path = Path::new(&root_dir);
|
||||
let glob_pat = format!("{}/**/*.md", root_dir);
|
||||
let today_chrono = Utc::now().date_naive();
|
||||
let current_month = Date {
|
||||
year: today_chrono.year_ce().1,
|
||||
month: today_chrono.month(),
|
||||
};
|
||||
|
||||
let dates_by_file = collect_dates(glob(&glob_pat).unwrap().map(Result::unwrap));
|
||||
let dates_by_file: BTreeMap<_, _> =
|
||||
filter_dates(current_month, 6, dates_by_file.into_iter()).collect();
|
||||
|
||||
if dates_by_file.is_empty() {
|
||||
println!("empty");
|
||||
} else {
|
||||
println!("Date Reference Triage for {}", current_month);
|
||||
println!("## Procedure");
|
||||
println!();
|
||||
println!(
|
||||
"Each of these dates should be checked to see if the docs they annotate are \
|
||||
up-to-date. Each date should be updated (in the Markdown file where it appears) to \
|
||||
use the current month ({current_month}), or removed if the docs it annotates are not \
|
||||
expected to fall out of date quickly.",
|
||||
current_month = today_chrono.format("%B %Y"),
|
||||
);
|
||||
println!();
|
||||
println!(
|
||||
"Please check off each date once a PR to update it (and, if applicable, its \
|
||||
surrounding docs) has been merged. Please also mention that you are working on a \
|
||||
particular set of dates so duplicate work is avoided."
|
||||
);
|
||||
println!();
|
||||
println!("Finally, once all the dates have been updated, please close this issue.");
|
||||
println!();
|
||||
println!("## Dates");
|
||||
println!();
|
||||
|
||||
for (path, dates) in dates_by_file {
|
||||
println!(
|
||||
"- {}",
|
||||
path.strip_prefix(&root_dir_path).unwrap_or(&path).display(),
|
||||
);
|
||||
for (line, date) in dates {
|
||||
println!(" - [ ] line {}: {}", line, date);
|
||||
}
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_months_since() {
|
||||
let date1 = Date {
|
||||
year: 2020,
|
||||
month: 3,
|
||||
};
|
||||
let date2 = Date {
|
||||
year: 2021,
|
||||
month: 1,
|
||||
};
|
||||
assert_eq!(date2.months_since(date1), Some(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_regex() {
|
||||
let regex = &make_date_regex();
|
||||
assert!(regex.is_match("<!-- date-check: jan 2021 -->"));
|
||||
assert!(regex.is_match("<!-- date-check: january 2021 -->"));
|
||||
assert!(regex.is_match("<!-- date-check: Jan 2021 -->"));
|
||||
assert!(regex.is_match("<!-- date-check: January 2021 -->"));
|
||||
assert!(regex.is_match("<!-- date-check --> jan 2021"));
|
||||
assert!(regex.is_match("<!-- date-check --> january 2021"));
|
||||
assert!(regex.is_match("<!-- date-check --> Jan 2021"));
|
||||
assert!(regex.is_match("<!-- date-check --> January 2021"));
|
||||
|
||||
assert!(regex.is_match("<!-- date-check --> jan 2021 "));
|
||||
assert!(regex.is_match("<!-- date-check --> jan 2021."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_regex_fail() {
|
||||
let regexes = &make_date_regex();
|
||||
assert!(!regexes.is_match("<!-- date-check: jan 221 -->"));
|
||||
assert!(!regexes.is_match("<!-- date-check: jan 20221 -->"));
|
||||
assert!(!regexes.is_match("<!-- date-check: 01 2021 -->"));
|
||||
assert!(!regexes.is_match("<!-- date-check --> jan 221"));
|
||||
assert!(!regexes.is_match("<!-- date-check --> jan 20222"));
|
||||
assert!(!regexes.is_match("<!-- date-check --> 01 2021"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_collect_dates_from_file() {
|
||||
let text = r"
|
||||
Test1
|
||||
<!-- date-check: jan 2021 -->
|
||||
Test2
|
||||
Foo<!-- date-check: february 2021
|
||||
-->
|
||||
Test3
|
||||
Test4
|
||||
Foo<!-- date-check: Mar 2021 -->Bar
|
||||
<!-- date-check:April 2021
|
||||
-->
|
||||
Test5
|
||||
Test6
|
||||
Test7
|
||||
<!-- date-check:
|
||||
|
||||
may 2021 -->
|
||||
Test8
|
||||
Test1
|
||||
<!-- date-check --> jan 2021
|
||||
Test2
|
||||
Foo<!-- date-check
|
||||
--> february 2021
|
||||
Test3
|
||||
Test4
|
||||
Foo<!-- date-check --> mar 2021 Bar
|
||||
<!-- date-check
|
||||
--> apr 2021
|
||||
Test5
|
||||
Test6
|
||||
Test7
|
||||
<!-- date-check
|
||||
|
||||
--> may 2021
|
||||
Test8
|
||||
<!--
|
||||
date-check
|
||||
--> june 2021.
|
||||
";
|
||||
assert_eq!(
|
||||
collect_dates_from_file(&make_date_regex(), text),
|
||||
vec![
|
||||
(
|
||||
3,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 1,
|
||||
}
|
||||
),
|
||||
(
|
||||
6,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 2,
|
||||
}
|
||||
),
|
||||
(
|
||||
9,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 3,
|
||||
}
|
||||
),
|
||||
(
|
||||
11,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 4,
|
||||
}
|
||||
),
|
||||
(
|
||||
17,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 5,
|
||||
}
|
||||
),
|
||||
(
|
||||
20,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 1,
|
||||
}
|
||||
),
|
||||
(
|
||||
23,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 2,
|
||||
}
|
||||
),
|
||||
(
|
||||
26,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 3,
|
||||
}
|
||||
),
|
||||
(
|
||||
28,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 4,
|
||||
}
|
||||
),
|
||||
(
|
||||
34,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 5,
|
||||
}
|
||||
),
|
||||
(
|
||||
38,
|
||||
Date {
|
||||
year: 2021,
|
||||
month: 6,
|
||||
}
|
||||
),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
48
src/doc/rustc-dev-guide/ci/linkcheck.sh
Executable file
48
src/doc/rustc-dev-guide/ci/linkcheck.sh
Executable file
|
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
LINKCHECK_BINARY=mdbook-linkcheck2
|
||||
|
||||
set_github_token() {
|
||||
jq '.config.output.linkcheck."http-headers"."github\\.com" = ["Authorization: Bearer $GITHUB_TOKEN"]'
|
||||
}
|
||||
|
||||
if [ -z "$ENABLE_LINKCHECK" ] ; then
|
||||
echo "Skipping link check."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# https://docs.github.com/en/actions/reference/environment-variables
|
||||
if [ "$GITHUB_EVENT_NAME" = "schedule" ] ; then # running in scheduled job
|
||||
FLAGS=""
|
||||
USE_TOKEN=1
|
||||
|
||||
echo "Doing full link check."
|
||||
elif [ "$GITHUB_EVENT_NAME" = "pull_request" ] ; then # running in PR CI build
|
||||
if [ -z "$BASE_SHA" ]; then
|
||||
echo "error: unexpected state: BASE_SHA must be non-empty in CI"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CHANGED_FILES=$(git diff --name-only $BASE_SHA... | sed 's#^src/##' | tr '\n' ' ')
|
||||
FLAGS="--no-cache -f $CHANGED_FILES"
|
||||
USE_TOKEN=1
|
||||
|
||||
echo "Checking files changed since $BASE_SHA: $CHANGED_FILES"
|
||||
else # running locally
|
||||
COMMIT_RANGE=master...
|
||||
CHANGED_FILES=$(git diff --name-only $COMMIT_RANGE | sed 's#^src/##' | tr '\n' ' ')
|
||||
FLAGS="-f $CHANGED_FILES"
|
||||
|
||||
echo "Checking files changed in $COMMIT_RANGE: $CHANGED_FILES"
|
||||
fi
|
||||
|
||||
echo "exec $LINKCHECK_BINARY $FLAGS"
|
||||
if [ "$USE_TOKEN" = 1 ]; then
|
||||
config=$(set_github_token)
|
||||
exec $LINKCHECK_BINARY $FLAGS <<<"$config"
|
||||
else
|
||||
exec $LINKCHECK_BINARY $FLAGS
|
||||
fi
|
||||
11
src/doc/rustc-dev-guide/examples/README
Normal file
11
src/doc/rustc-dev-guide/examples/README
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
For each example to compile, you will need to first run the following:
|
||||
|
||||
rustup component add rustc-dev llvm-tools
|
||||
|
||||
To create an executable:
|
||||
|
||||
rustc rustc-driver-example.rs
|
||||
|
||||
To run an executable:
|
||||
|
||||
rustup run nightly ./rustc-driver-example
|
||||
92
src/doc/rustc-dev-guide/examples/rustc-driver-example.rs
Normal file
92
src/doc/rustc-dev-guide/examples/rustc-driver-example.rs
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
#![feature(rustc_private)]
|
||||
|
||||
extern crate rustc_ast;
|
||||
extern crate rustc_ast_pretty;
|
||||
extern crate rustc_data_structures;
|
||||
extern crate rustc_driver;
|
||||
extern crate rustc_error_codes;
|
||||
extern crate rustc_errors;
|
||||
extern crate rustc_hash;
|
||||
extern crate rustc_hir;
|
||||
extern crate rustc_interface;
|
||||
extern crate rustc_middle;
|
||||
extern crate rustc_session;
|
||||
extern crate rustc_span;
|
||||
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use rustc_ast_pretty::pprust::item_to_string;
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
use rustc_driver::{Compilation, RunCompiler};
|
||||
use rustc_interface::interface::Compiler;
|
||||
use rustc_middle::ty::TyCtxt;
|
||||
|
||||
struct MyFileLoader;
|
||||
|
||||
impl rustc_span::source_map::FileLoader for MyFileLoader {
|
||||
fn file_exists(&self, path: &Path) -> bool {
|
||||
path == Path::new("main.rs")
|
||||
}
|
||||
|
||||
fn read_file(&self, path: &Path) -> io::Result<String> {
|
||||
if path == Path::new("main.rs") {
|
||||
Ok(r#"
|
||||
fn main() {
|
||||
let message = "Hello, World!";
|
||||
println!("{message}");
|
||||
}
|
||||
"#
|
||||
.to_string())
|
||||
} else {
|
||||
Err(io::Error::other("oops"))
|
||||
}
|
||||
}
|
||||
|
||||
fn read_binary_file(&self, _path: &Path) -> io::Result<Lrc<[u8]>> {
|
||||
Err(io::Error::other("oops"))
|
||||
}
|
||||
}
|
||||
|
||||
struct MyCallbacks;
|
||||
|
||||
impl rustc_driver::Callbacks for MyCallbacks {
|
||||
fn after_crate_root_parsing(
|
||||
&mut self,
|
||||
_compiler: &Compiler,
|
||||
krate: &rustc_ast::Crate,
|
||||
) -> Compilation {
|
||||
for item in &krate.items {
|
||||
println!("{}", item_to_string(&item));
|
||||
}
|
||||
|
||||
Compilation::Continue
|
||||
}
|
||||
|
||||
fn after_analysis(&mut self, _compiler: &Compiler, tcx: TyCtxt<'_>) -> Compilation {
|
||||
// Analyze the program and inspect the types of definitions.
|
||||
for id in tcx.hir().items() {
|
||||
let hir = tcx.hir();
|
||||
let item = hir.item(id);
|
||||
match item.kind {
|
||||
rustc_hir::ItemKind::Static(_, _, _) | rustc_hir::ItemKind::Fn(_, _, _) => {
|
||||
let name = item.ident;
|
||||
let ty = tcx.type_of(item.hir_id().owner.def_id);
|
||||
println!("{name:?}:\t{ty:?}")
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
Compilation::Stop
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
match RunCompiler::new(&["main.rs".to_string()], &mut MyCallbacks) {
|
||||
mut compiler => {
|
||||
compiler.set_file_loader(Some(Box::new(MyFileLoader)));
|
||||
compiler.run();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
#![feature(rustc_private)]
|
||||
|
||||
extern crate rustc_ast;
|
||||
extern crate rustc_ast_pretty;
|
||||
extern crate rustc_data_structures;
|
||||
extern crate rustc_driver;
|
||||
extern crate rustc_error_codes;
|
||||
extern crate rustc_errors;
|
||||
extern crate rustc_hash;
|
||||
extern crate rustc_hir;
|
||||
extern crate rustc_interface;
|
||||
extern crate rustc_middle;
|
||||
extern crate rustc_session;
|
||||
extern crate rustc_span;
|
||||
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use rustc_ast_pretty::pprust::item_to_string;
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
use rustc_driver::{Compilation, RunCompiler};
|
||||
use rustc_interface::interface::Compiler;
|
||||
use rustc_middle::ty::TyCtxt;
|
||||
|
||||
struct MyFileLoader;
|
||||
|
||||
impl rustc_span::source_map::FileLoader for MyFileLoader {
|
||||
fn file_exists(&self, path: &Path) -> bool {
|
||||
path == Path::new("main.rs")
|
||||
}
|
||||
|
||||
fn read_file(&self, path: &Path) -> io::Result<String> {
|
||||
if path == Path::new("main.rs") {
|
||||
Ok(r#"
|
||||
fn main() {
|
||||
let message = "Hello, World!";
|
||||
println!("{message}");
|
||||
}
|
||||
"#
|
||||
.to_string())
|
||||
} else {
|
||||
Err(io::Error::other("oops"))
|
||||
}
|
||||
}
|
||||
|
||||
fn read_binary_file(&self, _path: &Path) -> io::Result<Lrc<[u8]>> {
|
||||
Err(io::Error::other("oops"))
|
||||
}
|
||||
}
|
||||
|
||||
struct MyCallbacks;
|
||||
|
||||
impl rustc_driver::Callbacks for MyCallbacks {
|
||||
fn after_crate_root_parsing(
|
||||
&mut self,
|
||||
_compiler: &Compiler,
|
||||
krate: &rustc_ast::Crate,
|
||||
) -> Compilation {
|
||||
for item in &krate.items {
|
||||
println!("{}", item_to_string(&item));
|
||||
}
|
||||
|
||||
Compilation::Continue
|
||||
}
|
||||
|
||||
fn after_analysis(&mut self, _compiler: &Compiler, tcx: TyCtxt<'_>) -> Compilation {
|
||||
// Every compilation contains a single crate.
|
||||
let hir_krate = tcx.hir();
|
||||
// Iterate over the top-level items in the crate, looking for the main function.
|
||||
for id in hir_krate.items() {
|
||||
let item = hir_krate.item(id);
|
||||
// Use pattern-matching to find a specific node inside the main function.
|
||||
if let rustc_hir::ItemKind::Fn(_, _, body_id) = item.kind {
|
||||
let expr = &tcx.hir().body(body_id).value;
|
||||
if let rustc_hir::ExprKind::Block(block, _) = expr.kind {
|
||||
if let rustc_hir::StmtKind::Let(let_stmt) = block.stmts[0].kind {
|
||||
if let Some(expr) = let_stmt.init {
|
||||
let hir_id = expr.hir_id; // hir_id identifies the string "Hello, world!"
|
||||
let def_id = item.hir_id().owner.def_id; // def_id identifies the main function
|
||||
let ty = tcx.typeck(def_id).node_type(hir_id);
|
||||
println!("{expr:#?}: {ty:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Compilation::Stop
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
match RunCompiler::new(&["main.rs".to_string()], &mut MyCallbacks) {
|
||||
mut compiler => {
|
||||
compiler.set_file_loader(Some(Box::new(MyFileLoader)));
|
||||
compiler.run();
|
||||
}
|
||||
}
|
||||
}
|
||||
81
src/doc/rustc-dev-guide/examples/rustc-interface-example.rs
Normal file
81
src/doc/rustc-dev-guide/examples/rustc-interface-example.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#![feature(rustc_private)]
|
||||
|
||||
extern crate rustc_driver;
|
||||
extern crate rustc_error_codes;
|
||||
extern crate rustc_errors;
|
||||
extern crate rustc_hash;
|
||||
extern crate rustc_hir;
|
||||
extern crate rustc_interface;
|
||||
extern crate rustc_session;
|
||||
extern crate rustc_span;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use rustc_errors::registry;
|
||||
use rustc_hash::FxHashMap;
|
||||
use rustc_session::config;
|
||||
|
||||
fn main() {
|
||||
let config = rustc_interface::Config {
|
||||
// Command line options
|
||||
opts: config::Options::default(),
|
||||
// cfg! configuration in addition to the default ones
|
||||
crate_cfg: Vec::new(), // FxHashSet<(String, Option<String>)>
|
||||
crate_check_cfg: Vec::new(), // CheckCfg
|
||||
input: config::Input::Str {
|
||||
name: rustc_span::FileName::Custom("main.rs".into()),
|
||||
input: r#"
|
||||
static HELLO: &str = "Hello, world!";
|
||||
fn main() {
|
||||
println!("{HELLO}");
|
||||
}
|
||||
"#
|
||||
.into(),
|
||||
},
|
||||
output_dir: None, // Option<PathBuf>
|
||||
output_file: None, // Option<PathBuf>
|
||||
file_loader: None, // Option<Box<dyn FileLoader + Send + Sync>>
|
||||
locale_resources: rustc_driver::DEFAULT_LOCALE_RESOURCES.to_owned(),
|
||||
lint_caps: FxHashMap::default(), // FxHashMap<lint::LintId, lint::Level>
|
||||
// This is a callback from the driver that is called when [`ParseSess`] is created.
|
||||
psess_created: None, //Option<Box<dyn FnOnce(&mut ParseSess) + Send>>
|
||||
// This is a callback from the driver that is called when we're registering lints;
|
||||
// it is called during plugin registration when we have the LintStore in a non-shared state.
|
||||
//
|
||||
// Note that if you find a Some here you probably want to call that function in the new
|
||||
// function being registered.
|
||||
register_lints: None, // Option<Box<dyn Fn(&Session, &mut LintStore) + Send + Sync>>
|
||||
// This is a callback from the driver that is called just after we have populated
|
||||
// the list of queries.
|
||||
//
|
||||
// The second parameter is local providers and the third parameter is external providers.
|
||||
override_queries: None, // Option<fn(&Session, &mut ty::query::Providers<'_>, &mut ty::query::Providers<'_>)>
|
||||
// Registry of diagnostics codes.
|
||||
registry: registry::Registry::new(rustc_errors::codes::DIAGNOSTICS),
|
||||
make_codegen_backend: None,
|
||||
expanded_args: Vec::new(),
|
||||
ice_file: None,
|
||||
hash_untracked_state: None,
|
||||
using_internal_features: Arc::default(),
|
||||
};
|
||||
rustc_interface::run_compiler(config, |compiler| {
|
||||
// Parse the program and print the syntax tree.
|
||||
let krate = rustc_interface::passes::parse(&compiler.sess);
|
||||
println!("{krate:?}");
|
||||
// Analyze the program and inspect the types of definitions.
|
||||
rustc_interface::create_and_enter_global_ctxt(&compiler, krate, |tcx| {
|
||||
for id in tcx.hir().items() {
|
||||
let hir = tcx.hir();
|
||||
let item = hir.item(id);
|
||||
match item.kind {
|
||||
rustc_hir::ItemKind::Static(_, _, _) | rustc_hir::ItemKind::Fn(_, _, _) => {
|
||||
let name = item.ident;
|
||||
let ty = tcx.type_of(item.hir_id().owner.def_id);
|
||||
println!("{name:?}:\t{ty:?}")
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
#![feature(rustc_private)]
|
||||
|
||||
extern crate rustc_data_structures;
|
||||
extern crate rustc_driver;
|
||||
extern crate rustc_error_codes;
|
||||
extern crate rustc_errors;
|
||||
extern crate rustc_hash;
|
||||
extern crate rustc_hir;
|
||||
extern crate rustc_interface;
|
||||
extern crate rustc_session;
|
||||
extern crate rustc_span;
|
||||
|
||||
use rustc_errors::emitter::Emitter;
|
||||
use rustc_errors::registry::{self, Registry};
|
||||
use rustc_errors::translation::Translate;
|
||||
use rustc_errors::{DiagCtxt, DiagInner, FluentBundle};
|
||||
use rustc_session::config;
|
||||
use rustc_span::source_map::SourceMap;
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
struct DebugEmitter {
|
||||
source_map: Arc<SourceMap>,
|
||||
diagnostics: Arc<Mutex<Vec<DiagInner>>>,
|
||||
}
|
||||
|
||||
impl Translate for DebugEmitter {
|
||||
fn fluent_bundle(&self) -> Option<&FluentBundle> {
|
||||
None
|
||||
}
|
||||
|
||||
fn fallback_fluent_bundle(&self) -> &FluentBundle {
|
||||
panic!("this emitter should not translate message")
|
||||
}
|
||||
}
|
||||
|
||||
impl Emitter for DebugEmitter {
|
||||
fn emit_diagnostic(&mut self, diag: DiagInner, _: &Registry) {
|
||||
self.diagnostics.lock().unwrap().push(diag);
|
||||
}
|
||||
|
||||
fn source_map(&self) -> Option<&SourceMap> {
|
||||
Some(&self.source_map)
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let buffer: Arc<Mutex<Vec<DiagInner>>> = Arc::default();
|
||||
let diagnostics = buffer.clone();
|
||||
let config = rustc_interface::Config {
|
||||
opts: config::Options::default(),
|
||||
// This program contains a type error.
|
||||
input: config::Input::Str {
|
||||
name: rustc_span::FileName::Custom("main.rs".into()),
|
||||
input: "
|
||||
fn main() {
|
||||
let x: &str = 1;
|
||||
}
|
||||
"
|
||||
.into(),
|
||||
},
|
||||
crate_cfg: Vec::new(),
|
||||
crate_check_cfg: Vec::new(),
|
||||
output_dir: None,
|
||||
output_file: None,
|
||||
file_loader: None,
|
||||
locale_resources: rustc_driver::DEFAULT_LOCALE_RESOURCES.to_owned(),
|
||||
lint_caps: rustc_hash::FxHashMap::default(),
|
||||
psess_created: Some(Box::new(|parse_sess| {
|
||||
parse_sess.set_dcx(DiagCtxt::new(Box::new(DebugEmitter {
|
||||
source_map: parse_sess.clone_source_map(),
|
||||
diagnostics,
|
||||
})));
|
||||
})),
|
||||
register_lints: None,
|
||||
override_queries: None,
|
||||
registry: registry::Registry::new(rustc_errors::codes::DIAGNOSTICS),
|
||||
make_codegen_backend: None,
|
||||
expanded_args: Vec::new(),
|
||||
ice_file: None,
|
||||
hash_untracked_state: None,
|
||||
using_internal_features: Arc::default(),
|
||||
};
|
||||
rustc_interface::run_compiler(config, |compiler| {
|
||||
let krate = rustc_interface::passes::parse(&compiler.sess);
|
||||
rustc_interface::create_and_enter_global_ctxt(&compiler, krate, |tcx| {
|
||||
// Run the analysis phase on the local crate to trigger the type error.
|
||||
let _ = tcx.analysis(());
|
||||
});
|
||||
// If the compiler has encountered errors when this closure returns, it will abort (!) the program.
|
||||
// We avoid this by resetting the error count before returning
|
||||
compiler.sess.dcx().reset_err_count();
|
||||
});
|
||||
// Read buffered diagnostics.
|
||||
buffer.lock().unwrap().iter().for_each(|diagnostic| {
|
||||
println!("{diagnostic:#?}");
|
||||
});
|
||||
}
|
||||
1
src/doc/rustc-dev-guide/mermaid-init.js
Normal file
1
src/doc/rustc-dev-guide/mermaid-init.js
Normal file
|
|
@ -0,0 +1 @@
|
|||
mermaid.initialize({startOnLoad:true});
|
||||
4
src/doc/rustc-dev-guide/mermaid.min.js
vendored
Normal file
4
src/doc/rustc-dev-guide/mermaid.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
234
src/doc/rustc-dev-guide/src/SUMMARY.md
Normal file
234
src/doc/rustc-dev-guide/src/SUMMARY.md
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
# Summary
|
||||
|
||||
[Getting Started](./getting-started.md)
|
||||
[About this guide](./about-this-guide.md)
|
||||
|
||||
---
|
||||
|
||||
# Building and debugging `rustc`
|
||||
|
||||
- [How to build and run the compiler](./building/how-to-build-and-run.md)
|
||||
- [Quickstart](./building/quickstart.md)
|
||||
- [Prerequisites](./building/prerequisites.md)
|
||||
- [Suggested Workflows](./building/suggested.md)
|
||||
- [Distribution artifacts](./building/build-install-distribution-artifacts.md)
|
||||
- [Building Documentation](./building/compiler-documenting.md)
|
||||
- [Rustdoc overview](./rustdoc.md)
|
||||
- [Adding a new target](./building/new-target.md)
|
||||
- [Optimized build](./building/optimized-build.md)
|
||||
- [Testing the compiler](./tests/intro.md)
|
||||
- [Running tests](./tests/running.md)
|
||||
- [Testing with Docker](./tests/docker.md)
|
||||
- [Testing with CI](./tests/ci.md)
|
||||
- [Adding new tests](./tests/adding.md)
|
||||
- [Best practices](./tests/best-practices.md)
|
||||
- [Compiletest](./tests/compiletest.md)
|
||||
- [UI tests](./tests/ui.md)
|
||||
- [Test directives](./tests/directives.md)
|
||||
- [Minicore](./tests/minicore.md)
|
||||
- [Ecosystem testing](./tests/ecosystem.md)
|
||||
- [Crater](./tests/crater.md)
|
||||
- [Fuchsia](./tests/fuchsia.md)
|
||||
- [Rust for Linux](./tests/rust-for-linux.md)
|
||||
- [Performance testing](./tests/perf.md)
|
||||
- [Suggest tests tool](./tests/suggest-tests.md)
|
||||
- [Misc info](./tests/misc.md)
|
||||
- [Debugging the compiler](./compiler-debugging.md)
|
||||
- [Using the tracing/logging instrumentation](./tracing.md)
|
||||
- [Profiling the compiler](./profiling.md)
|
||||
- [with the linux perf tool](./profiling/with_perf.md)
|
||||
- [with Windows Performance Analyzer](./profiling/wpa_profiling.md)
|
||||
- [with the Rust benchmark suite](./profiling/with_rustc_perf.md)
|
||||
- [crates.io Dependencies](./crates-io.md)
|
||||
|
||||
# Contributing to Rust
|
||||
|
||||
- [Contribution Procedures](./contributing.md)
|
||||
- [About the compiler team](./compiler-team.md)
|
||||
- [Using Git](./git.md)
|
||||
- [Mastering @rustbot](./rustbot.md)
|
||||
- [Walkthrough: a typical contribution](./walkthrough.md)
|
||||
- [Implementing new language features](./implementing_new_features.md)
|
||||
- [Stability attributes](./stability.md)
|
||||
- [Stabilizing Features](./stabilization_guide.md)
|
||||
- [Feature Gates](./feature-gates.md)
|
||||
- [Coding conventions](./conventions.md)
|
||||
- [Procedures for Breaking Changes](./bug-fix-procedure.md)
|
||||
- [Using external repositories](./external-repos.md)
|
||||
- [Fuzzing](./fuzzing.md)
|
||||
- [Notification groups](notification-groups/about.md)
|
||||
- [Apple](notification-groups/apple.md)
|
||||
- [ARM](notification-groups/arm.md)
|
||||
- [Cleanup Crew](notification-groups/cleanup-crew.md)
|
||||
- [Emscripten](notification-groups/emscripten.md)
|
||||
- [LLVM](notification-groups/llvm.md)
|
||||
- [RISC-V](notification-groups/risc-v.md)
|
||||
- [WASI](notification-groups/wasi.md)
|
||||
- [WebAssembly](notification-groups/wasm.md)
|
||||
- [Windows](notification-groups/windows.md)
|
||||
- [Rust for Linux](notification-groups/rust-for-linux.md)
|
||||
- [Licenses](./licenses.md)
|
||||
- [Editions](guides/editions.md)
|
||||
|
||||
# Bootstrapping
|
||||
|
||||
- [Prologue](./building/bootstrapping/intro.md)
|
||||
- [What Bootstrapping does](./building/bootstrapping/what-bootstrapping-does.md)
|
||||
- [How Bootstrap does it](./building/bootstrapping/how-bootstrap-does-it.md)
|
||||
|
||||
# High-level Compiler Architecture
|
||||
|
||||
- [Prologue](./part-2-intro.md)
|
||||
- [Overview of the compiler](./overview.md)
|
||||
- [The compiler source code](./compiler-src.md)
|
||||
- [Queries: demand-driven compilation](./query.md)
|
||||
- [The Query Evaluation Model in Detail](./queries/query-evaluation-model-in-detail.md)
|
||||
- [Incremental compilation](./queries/incremental-compilation.md)
|
||||
- [Incremental compilation In Detail](./queries/incremental-compilation-in-detail.md)
|
||||
- [Debugging and Testing](./incrcomp-debugging.md)
|
||||
- [Salsa](./queries/salsa.md)
|
||||
- [Memory Management in Rustc](./memory.md)
|
||||
- [Serialization in Rustc](./serialization.md)
|
||||
- [Parallel Compilation](./parallel-rustc.md)
|
||||
- [Rustdoc internals](./rustdoc-internals.md)
|
||||
- [Search](./rustdoc-internals/search.md)
|
||||
|
||||
# Source Code Representation
|
||||
|
||||
- [Prologue](./part-3-intro.md)
|
||||
- [Syntax and the AST](./syntax-intro.md)
|
||||
- [Lexing and Parsing](./the-parser.md)
|
||||
- [Macro expansion](./macro-expansion.md)
|
||||
- [Name resolution](./name-resolution.md)
|
||||
- [Attributes](./attributes.md)
|
||||
- [`#[test]` Implementation](./test-implementation.md)
|
||||
- [Panic Implementation](./panic-implementation.md)
|
||||
- [AST Validation](./ast-validation.md)
|
||||
- [Feature Gate Checking](./feature-gate-ck.md)
|
||||
- [Lang Items](./lang-items.md)
|
||||
- [The HIR (High-level IR)](./hir.md)
|
||||
- [Lowering AST to HIR](./ast-lowering.md)
|
||||
- [Debugging](./hir-debugging.md)
|
||||
- [The THIR (Typed High-level IR)](./thir.md)
|
||||
- [The MIR (Mid-level IR)](./mir/index.md)
|
||||
- [MIR construction](./mir/construction.md)
|
||||
- [MIR visitor and traversal](./mir/visitor.md)
|
||||
- [MIR queries and passes: getting the MIR](./mir/passes.md)
|
||||
- [Inline assembly](./asm.md)
|
||||
|
||||
# Supporting Infrastructure
|
||||
|
||||
- [Command-line arguments](./cli.md)
|
||||
- [rustc_driver and rustc_interface](./rustc-driver/intro.md)
|
||||
- [Example: Type checking](./rustc-driver/interacting-with-the-ast.md)
|
||||
- [Example: Getting diagnostics](./rustc-driver/getting-diagnostics.md)
|
||||
- [Errors and Lints](diagnostics.md)
|
||||
- [Diagnostic and subdiagnostic structs](./diagnostics/diagnostic-structs.md)
|
||||
- [Translation](./diagnostics/translation.md)
|
||||
- [`LintStore`](./diagnostics/lintstore.md)
|
||||
- [Error codes](./diagnostics/error-codes.md)
|
||||
- [Diagnostic items](./diagnostics/diagnostic-items.md)
|
||||
- [`ErrorGuaranteed`](./diagnostics/error-guaranteed.md)
|
||||
|
||||
# Analysis
|
||||
|
||||
- [Prologue](./part-4-intro.md)
|
||||
- [Generic parameter definitions](./generic_parameters_summary.md)
|
||||
- [Implementation nuances of early/late bound parameters](./early-late-bound-params/early-late-bound-implementation-nuances.md)
|
||||
- [Interactions with turbofishing](./early-late-bound-params/turbofishing-and-early-late-bound.md)
|
||||
- [The `ty` module: representing types](./ty.md)
|
||||
- [ADTs and Generic Arguments](./ty_module/generic_arguments.md)
|
||||
- [Parameter types/consts/regions](./ty_module/param_ty_const_regions.md)
|
||||
- [`EarlyBinder` and instantiating parameters](./ty_module/early_binder.md)
|
||||
- [`Binder` and Higher ranked regions](./ty_module/binders.md)
|
||||
- [Instantiating binders](./ty_module/instantiating_binders.md)
|
||||
- [`TypeFolder` and `TypeFoldable`](./ty-fold.md)
|
||||
- [Parameter Environments](./param_env/param_env_summary.md)
|
||||
- [What is it?](./param_env/param_env_what_is_it.md)
|
||||
- [How are `ParamEnv`'s constructed internally](./param_env/param_env_construction_internals.md)
|
||||
- [Which `ParamEnv` do I use?](./param_env/param_env_acquisition.md)
|
||||
- [Type inference](./type-inference.md)
|
||||
- [Trait solving](./traits/resolution.md)
|
||||
- [Higher-ranked trait bounds](./traits/hrtb.md)
|
||||
- [Caching subtleties](./traits/caching.md)
|
||||
- [Implied bounds](./traits/implied-bounds.md)
|
||||
- [Specialization](./traits/specialization.md)
|
||||
- [Chalk-based trait solving](./traits/chalk.md)
|
||||
- [Lowering to logic](./traits/lowering-to-logic.md)
|
||||
- [Goals and clauses](./traits/goals-and-clauses.md)
|
||||
- [Canonical queries](./traits/canonical-queries.md)
|
||||
- [Canonicalization](./traits/canonicalization.md)
|
||||
- [Next-gen trait solving](./solve/trait-solving.md)
|
||||
- [Invariants of the type system](./solve/invariants.md)
|
||||
- [The solver](./solve/the-solver.md)
|
||||
- [Canonicalization](./solve/canonicalization.md)
|
||||
- [Coinduction](./solve/coinduction.md)
|
||||
- [Caching](./solve/caching.md)
|
||||
- [Proof trees](./solve/proof-trees.md)
|
||||
- [Normalization](./solve/normalization.md)
|
||||
- [Opaque types](./solve/opaque-types.md)
|
||||
- [Significant changes and quirks](./solve/significant-changes.md)
|
||||
- [`Unsize` and `CoerceUnsized` traits](./traits/unsize.md)
|
||||
- [Type checking](./type-checking.md)
|
||||
- [Method Lookup](./method-lookup.md)
|
||||
- [Variance](./variance.md)
|
||||
- [Coherence Checking](./coherence.md)
|
||||
- [Opaque Types](./opaque-types-type-alias-impl-trait.md)
|
||||
- [Inference details](./opaque-types-impl-trait-inference.md)
|
||||
- [Return Position Impl Trait In Trait](./return-position-impl-trait-in-trait.md)
|
||||
- [Region inference restrictions][opaque-infer]
|
||||
- [Effect checking](./effects.md)
|
||||
- [Pattern and Exhaustiveness Checking](./pat-exhaustive-checking.md)
|
||||
- [Unsafety Checking](./unsafety-checking.md)
|
||||
- [MIR dataflow](./mir/dataflow.md)
|
||||
- [Drop elaboration](./mir/drop-elaboration.md)
|
||||
- [The borrow checker](./borrow_check.md)
|
||||
- [Tracking moves and initialization](./borrow_check/moves_and_initialization.md)
|
||||
- [Move paths](./borrow_check/moves_and_initialization/move_paths.md)
|
||||
- [MIR type checker](./borrow_check/type_check.md)
|
||||
- [Drop check](./borrow_check/drop_check.md)
|
||||
- [Region inference](./borrow_check/region_inference.md)
|
||||
- [Constraint propagation](./borrow_check/region_inference/constraint_propagation.md)
|
||||
- [Lifetime parameters](./borrow_check/region_inference/lifetime_parameters.md)
|
||||
- [Member constraints](./borrow_check/region_inference/member_constraints.md)
|
||||
- [Placeholders and universes][pau]
|
||||
- [Closure constraints](./borrow_check/region_inference/closure_constraints.md)
|
||||
- [Error reporting](./borrow_check/region_inference/error_reporting.md)
|
||||
- [Two-phase-borrows](./borrow_check/two_phase_borrows.md)
|
||||
- [Closure capture inference](./closure.md)
|
||||
- [Async closures/"coroutine-closures"](coroutine-closures.md)
|
||||
|
||||
# MIR to Binaries
|
||||
|
||||
- [Prologue](./part-5-intro.md)
|
||||
- [MIR optimizations](./mir/optimizations.md)
|
||||
- [Debugging MIR](./mir/debugging.md)
|
||||
- [Constant evaluation](./const-eval.md)
|
||||
- [Interpreter](./const-eval/interpret.md)
|
||||
- [Monomorphization](./backend/monomorph.md)
|
||||
- [Lowering MIR](./backend/lowering-mir.md)
|
||||
- [Code Generation](./backend/codegen.md)
|
||||
- [Updating LLVM](./backend/updating-llvm.md)
|
||||
- [Debugging LLVM](./backend/debugging.md)
|
||||
- [Backend Agnostic Codegen](./backend/backend-agnostic.md)
|
||||
- [Implicit Caller Location](./backend/implicit-caller-location.md)
|
||||
- [Libraries and Metadata](./backend/libs-and-metadata.md)
|
||||
- [Profile-guided Optimization](./profile-guided-optimization.md)
|
||||
- [LLVM Source-Based Code Coverage](./llvm-coverage-instrumentation.md)
|
||||
- [Sanitizers Support](./sanitizers.md)
|
||||
- [Debugging support in the Rust compiler](./debugging-support-in-rustc.md)
|
||||
|
||||
---
|
||||
|
||||
[Appendix A: Background topics](./appendix/background.md)
|
||||
[Appendix B: Glossary](./appendix/glossary.md)
|
||||
[Appendix C: Code Index](./appendix/code-index.md)
|
||||
[Appendix D: Compiler Lecture Series](./appendix/compiler-lecture.md)
|
||||
[Appendix E: Bibliography](./appendix/bibliography.md)
|
||||
|
||||
[Appendix Z: HumorRust](./appendix/humorust.md)
|
||||
|
||||
---
|
||||
|
||||
[pau]: ./borrow_check/region_inference/placeholders_and_universes.md
|
||||
[opaque-infer]: ./borrow_check/opaque-types-region-inference-restrictions.md
|
||||
107
src/doc/rustc-dev-guide/src/about-this-guide.md
Normal file
107
src/doc/rustc-dev-guide/src/about-this-guide.md
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# About this guide
|
||||
|
||||
This guide is meant to help document how rustc – the Rust compiler – works,
|
||||
as well as to help new contributors get involved in rustc development.
|
||||
|
||||
There are seven parts to this guide:
|
||||
|
||||
1. [Building `rustc`][p1]:
|
||||
Contains information that should be useful no matter how you are contributing,
|
||||
about building, debugging, profiling, etc.
|
||||
2. [Contributing to `rustc`][p2]:
|
||||
Contains information that should be useful no matter how you are contributing,
|
||||
about procedures for contribution, using git and Github, stabilizing features, etc.
|
||||
3. [High-Level Compiler Architecture][p3]:
|
||||
Discusses the high-level architecture of the compiler and stages of the compile process.
|
||||
4. [Source Code Representation][p4]:
|
||||
Describes the process of taking raw source code from the user
|
||||
and transforming it into various forms that the compiler can work with easily.
|
||||
5. [Analysis][p5]:
|
||||
discusses the analyses that the compiler uses to check various properties of the code
|
||||
and inform later stages of the compile process (e.g., type checking).
|
||||
6. [From MIR to Binaries][p6]: How linked executable machine code is generated.
|
||||
7. [Appendices][p7] at the end with useful reference information.
|
||||
There are a few of these with different information, including a glossary.
|
||||
|
||||
[p1]: ./building/how-to-build-and-run.html
|
||||
[p2]: ./contributing.md
|
||||
[p3]: ./part-2-intro.md
|
||||
[p4]: ./part-3-intro.md
|
||||
[p5]: ./part-4-intro.md
|
||||
[p6]: ./part-5-intro.md
|
||||
[p7]: ./appendix/background.md
|
||||
|
||||
### Constant change
|
||||
|
||||
Keep in mind that `rustc` is a real production-quality product,
|
||||
being worked upon continuously by a sizeable set of contributors.
|
||||
As such, it has its fair share of codebase churn and technical debt.
|
||||
In addition, many of the ideas discussed throughout this guide are idealized designs
|
||||
that are not fully realized yet.
|
||||
All this makes keeping this guide completely up to date on everything very hard!
|
||||
|
||||
The Guide itself is of course open-source as well,
|
||||
and the sources can be found at the [GitHub repository].
|
||||
If you find any mistakes in the guide, please file an issue about it.
|
||||
Even better, open a PR with a correction!
|
||||
|
||||
If you do contribute to the guide,
|
||||
please see the corresponding [subsection on writing documentation in this guide].
|
||||
|
||||
[subsection on writing documentation in this guide]: contributing.md#contributing-to-rustc-dev-guide
|
||||
|
||||
> “‘All conditioned things are impermanent’ —
|
||||
> when one sees this with wisdom, one turns away from suffering.”
|
||||
> _The Dhammapada, verse 277_
|
||||
|
||||
## Other places to find information
|
||||
|
||||
You might also find the following sites useful:
|
||||
|
||||
- This guide contains information about how various parts of the
|
||||
compiler work and how to contribute to the compiler.
|
||||
- [rustc API docs] -- rustdoc documentation for the compiler, devtools, and internal tools
|
||||
- [Forge] -- contains documentation about Rust infrastructure, team procedures, and more
|
||||
- [compiler-team] -- the home-base for the Rust compiler team, with description
|
||||
of the team procedures, active working groups, and the team calendar.
|
||||
- [std-dev-guide] -- a similar guide for developing the standard library.
|
||||
- [The t-compiler zulip][z]
|
||||
- `#contribute` and `#wg-rustup` on [Discord](https://discord.gg/rust-lang).
|
||||
- The [Rust Internals forum][rif], a place to ask questions and
|
||||
discuss Rust's internals
|
||||
- The [Rust reference][rr], even though it doesn't specifically talk about
|
||||
Rust's internals, is a great resource nonetheless
|
||||
- Although out of date, [Tom Lee's great blog article][tlgba] is very helpful
|
||||
- [rustaceans.org][ro] is helpful, but mostly dedicated to IRC
|
||||
- The [Rust Compiler Testing Docs][rctd]
|
||||
- For [@bors], [this cheat sheet][cheatsheet] is helpful
|
||||
- Google is always helpful when programming.
|
||||
You can [search all Rust documentation][gsearchdocs] (the standard library,
|
||||
the compiler, the books, the references, and the guides) to quickly find
|
||||
information about the language and compiler.
|
||||
- You can also use Rustdoc's built-in search feature to find documentation on
|
||||
types and functions within the crates you're looking at. You can also search
|
||||
by type signature! For example, searching for `* -> vec` should find all
|
||||
functions that return a `Vec<T>`.
|
||||
_Hint:_ Find more tips and keyboard shortcuts by typing `?` on any Rustdoc
|
||||
page!
|
||||
|
||||
|
||||
[rustc dev guide]: about-this-guide.md
|
||||
[gsearchdocs]: https://www.google.com/search?q=site:doc.rust-lang.org+your+query+here
|
||||
[stddocs]: https://doc.rust-lang.org/std
|
||||
[rif]: http://internals.rust-lang.org
|
||||
[rr]: https://doc.rust-lang.org/book/
|
||||
[rustforge]: https://forge.rust-lang.org/
|
||||
[tlgba]: https://tomlee.co/2014/04/a-more-detailed-tour-of-the-rust-compiler/
|
||||
[ro]: https://www.rustaceans.org/
|
||||
[rctd]: tests/intro.md
|
||||
[cheatsheet]: https://bors.rust-lang.org/
|
||||
[Miri]: https://github.com/rust-lang/miri
|
||||
[@bors]: https://github.com/bors
|
||||
[GitHub repository]: https://github.com/rust-lang/rustc-dev-guide/
|
||||
[rustc API docs]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle
|
||||
[Forge]: https://forge.rust-lang.org/
|
||||
[compiler-team]: https://github.com/rust-lang/compiler-team/
|
||||
[std-dev-guide]: https://std-dev-guide.rust-lang.org/
|
||||
[z]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
|
||||
333
src/doc/rustc-dev-guide/src/appendix/background.md
Normal file
333
src/doc/rustc-dev-guide/src/appendix/background.md
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
# Background topics
|
||||
|
||||
This section covers a numbers of common compiler terms that arise in
|
||||
this guide. We try to give the general definition while providing some
|
||||
Rust-specific context.
|
||||
|
||||
<a id="cfg"></a>
|
||||
|
||||
## What is a control-flow graph?
|
||||
|
||||
A control-flow graph (CFG) is a common term from compilers. If you've ever
|
||||
used a flow-chart, then the concept of a control-flow graph will be
|
||||
pretty familiar to you. It's a representation of your program that
|
||||
clearly exposes the underlying control flow.
|
||||
|
||||
A control-flow graph is structured as a set of **basic blocks**
|
||||
connected by edges. The key idea of a basic block is that it is a set
|
||||
of statements that execute "together" – that is, whenever you branch
|
||||
to a basic block, you start at the first statement and then execute
|
||||
all the remainder. Only at the end of the block is there the
|
||||
possibility of branching to more than one place (in MIR, we call that
|
||||
final statement the **terminator**):
|
||||
|
||||
```mir
|
||||
bb0: {
|
||||
statement0;
|
||||
statement1;
|
||||
statement2;
|
||||
...
|
||||
terminator;
|
||||
}
|
||||
```
|
||||
|
||||
Many expressions that you are used to in Rust compile down to multiple
|
||||
basic blocks. For example, consider an if statement:
|
||||
|
||||
```rust,ignore
|
||||
a = 1;
|
||||
if some_variable {
|
||||
b = 1;
|
||||
} else {
|
||||
c = 1;
|
||||
}
|
||||
d = 1;
|
||||
```
|
||||
|
||||
This would compile into four basic blocks in MIR. In textual form, it looks like
|
||||
this:
|
||||
|
||||
```mir
|
||||
BB0: {
|
||||
a = 1;
|
||||
if some_variable {
|
||||
goto BB1;
|
||||
} else {
|
||||
goto BB2;
|
||||
}
|
||||
}
|
||||
|
||||
BB1: {
|
||||
b = 1;
|
||||
goto BB3;
|
||||
}
|
||||
|
||||
BB2: {
|
||||
c = 1;
|
||||
goto BB3;
|
||||
}
|
||||
|
||||
BB3: {
|
||||
d = 1;
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
In graphical form, it looks like this:
|
||||
|
||||
```
|
||||
BB0
|
||||
+--------------------+
|
||||
| a = 1; |
|
||||
+--------------------+
|
||||
/ \
|
||||
if some_variable else
|
||||
/ \
|
||||
BB1 / \ BB2
|
||||
+-----------+ +-----------+
|
||||
| b = 1; | | c = 1; |
|
||||
+-----------+ +-----------+
|
||||
\ /
|
||||
\ /
|
||||
\ BB3 /
|
||||
+----------+
|
||||
| d = 1; |
|
||||
| ... |
|
||||
+----------+
|
||||
```
|
||||
|
||||
When using a control-flow graph, a loop simply appears as a cycle in
|
||||
the graph, and the `break` keyword translates into a path out of that
|
||||
cycle.
|
||||
|
||||
<a id="dataflow"></a>
|
||||
|
||||
## What is a dataflow analysis?
|
||||
|
||||
[*Static Program Analysis*](https://cs.au.dk/~amoeller/spa/) by Anders Møller
|
||||
and Michael I. Schwartzbach is an incredible resource!
|
||||
|
||||
_Dataflow analysis_ is a type of static analysis that is common in many
|
||||
compilers. It describes a general technique, rather than a particular analysis.
|
||||
|
||||
The basic idea is that we can walk over a [control-flow graph (CFG)](#cfg) and
|
||||
keep track of what some value could be. At the end of the walk, we might have
|
||||
shown that some claim is true or not necessarily true (e.g. "this variable must
|
||||
be initialized"). `rustc` tends to do dataflow analyses over the MIR, since MIR
|
||||
is already a CFG.
|
||||
|
||||
For example, suppose we want to check that `x` is initialized before it is used
|
||||
in this snippet:
|
||||
|
||||
```rust,ignore
|
||||
fn foo() {
|
||||
let mut x;
|
||||
|
||||
if some_cond {
|
||||
x = 1;
|
||||
}
|
||||
|
||||
dbg!(x);
|
||||
}
|
||||
```
|
||||
|
||||
A CFG for this code might look like this:
|
||||
|
||||
```txt
|
||||
+------+
|
||||
| Init | (A)
|
||||
+------+
|
||||
| \
|
||||
| if some_cond
|
||||
else \ +-------+
|
||||
| \| x = 1 | (B)
|
||||
| +-------+
|
||||
| /
|
||||
+---------+
|
||||
| dbg!(x) | (C)
|
||||
+---------+
|
||||
```
|
||||
|
||||
We can do the dataflow analysis as follows: we will start off with a flag `init`
|
||||
which indicates if we know `x` is initialized. As we walk the CFG, we will
|
||||
update the flag. At the end, we can check its value.
|
||||
|
||||
So first, in block (A), the variable `x` is declared but not initialized, so
|
||||
`init = false`. In block (B), we initialize the value, so we know that `x` is
|
||||
initialized. So at the end of (B), `init = true`.
|
||||
|
||||
Block (C) is where things get interesting. Notice that there are two incoming
|
||||
edges, one from (A) and one from (B), corresponding to whether `some_cond` is true or not.
|
||||
But we cannot know that! It could be the case the `some_cond` is always true,
|
||||
so that `x` is actually always initialized. It could also be the case that
|
||||
`some_cond` depends on something random (e.g. the time), so `x` may not be
|
||||
initialized. In general, we cannot know statically (due to [Rice's
|
||||
Theorem][rice]). So what should the value of `init` be in block (C)?
|
||||
|
||||
[rice]: https://en.wikipedia.org/wiki/Rice%27s_theorem
|
||||
|
||||
Generally, in dataflow analyses, if a block has multiple parents (like (C) in
|
||||
our example), its dataflow value will be some function of all its parents (and
|
||||
of course, what happens in (C)). Which function we use depends on the analysis
|
||||
we are doing.
|
||||
|
||||
In this case, we want to be able to prove definitively that `x` must be
|
||||
initialized before use. This forces us to be conservative and assume that
|
||||
`some_cond` might be false sometimes. So our "merging function" is "and". That
|
||||
is, `init = true` in (C) if `init = true` in (A) _and_ in (B) (or if `x` is
|
||||
initialized in (C)). But this is not the case; in particular, `init = false` in
|
||||
(A), and `x` is not initialized in (C). Thus, `init = false` in (C); we can
|
||||
report an error that "`x` may not be initialized before use".
|
||||
|
||||
There is definitely a lot more that can be said about dataflow analyses. There is an
|
||||
extensive body of research literature on the topic, including a lot of theory.
|
||||
We only discussed a forwards analysis, but backwards dataflow analysis is also
|
||||
useful. For example, rather than starting from block (A) and moving forwards,
|
||||
we might have started with the usage of `x` and moved backwards to try to find
|
||||
its initialization.
|
||||
|
||||
<a id="quantified"></a>
|
||||
|
||||
## What is "universally quantified"? What about "existentially quantified"?
|
||||
|
||||
In math, a predicate may be _universally quantified_ or _existentially
|
||||
quantified_:
|
||||
|
||||
- _Universal_ quantification:
|
||||
- the predicate holds if it is true for all possible inputs.
|
||||
- Traditional notation: ∀x: P(x). Read as "for all x, P(x) holds".
|
||||
- _Existential_ quantification:
|
||||
- the predicate holds if there is any input where it is true, i.e., there
|
||||
only has to be a single input.
|
||||
- Traditional notation: ∃x: P(x). Read as "there exists x such that P(x) holds".
|
||||
|
||||
In Rust, they come up in type checking and trait solving. For example,
|
||||
|
||||
```rust,ignore
|
||||
fn foo<T>()
|
||||
```
|
||||
This function claims that the function is well-typed for all types `T`: `∀ T: well_typed(foo)`.
|
||||
|
||||
Another example:
|
||||
|
||||
```rust,ignore
|
||||
fn foo<'a>(_: &'a usize)
|
||||
```
|
||||
This function claims that for any lifetime `'a` (determined by the
|
||||
caller), it is well-typed: `∀ 'a: well_typed(foo)`.
|
||||
|
||||
Another example:
|
||||
|
||||
```rust,ignore
|
||||
fn foo<F>()
|
||||
where for<'a> F: Fn(&'a u8)
|
||||
```
|
||||
This function claims that it is well-typed for all types `F` such that for all
|
||||
lifetimes `'a`, `F: Fn(&'a u8)`: `∀ F: ∀ 'a: (F: Fn(&'a u8)) => well_typed(foo)`.
|
||||
|
||||
One more example:
|
||||
|
||||
```rust,ignore
|
||||
fn foo(_: dyn Debug)
|
||||
```
|
||||
This function claims that there exists some type `T` that implements `Debug`
|
||||
such that the function is well-typed: `∃ T: (T: Debug) and well_typed(foo)`.
|
||||
|
||||
<a id="variance"></a>
|
||||
|
||||
## What is a de Bruijn Index?
|
||||
|
||||
[De Bruijn indices][wikideb] are a way of representing, using only integers,
|
||||
which variables are bound in which binders. They were originally invented for
|
||||
use in lambda calculus evaluation (see [this Wikipedia article][wikideb] for
|
||||
more). In `rustc`, we use de Bruijn indices to [represent generic types][sub].
|
||||
|
||||
[wikideb]: https://en.wikipedia.org/wiki/De_Bruijn_index
|
||||
[sub]: ../ty_module/generic_arguments.md
|
||||
|
||||
|
||||
Here is a basic example of how de Bruijn indices might be used for closures (we
|
||||
don't actually do this in `rustc` though!):
|
||||
|
||||
```rust,ignore
|
||||
|x| {
|
||||
f(x) // de Bruijn index of `x` is 1 because `x` is bound 1 level up
|
||||
|
||||
|y| {
|
||||
g(x, y) // index of `x` is 2 because it is bound 2 levels up
|
||||
// index of `y` is 1 because it is bound 1 level up
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## What are co- and contra-variance?
|
||||
|
||||
Check out the subtyping chapter from the
|
||||
[Rust Nomicon](https://doc.rust-lang.org/nomicon/subtyping.html).
|
||||
|
||||
See the [variance](../variance.html) chapter of this guide for more info on how
|
||||
the type checker handles variance.
|
||||
|
||||
<a id="free-vs-bound"></a>
|
||||
|
||||
## What is a "free region" or a "free variable"? What about "bound region"?
|
||||
|
||||
Let's describe the concepts of free vs bound in terms of program
|
||||
variables, since that's the thing we're most familiar with.
|
||||
|
||||
- Consider this expression, which creates a closure: `|a, b| a + b`.
|
||||
Here, the `a` and `b` in `a + b` refer to the arguments that the closure will
|
||||
be given when it is called. We say that the `a` and `b` there are **bound** to
|
||||
the closure, and that the closure signature `|a, b|` is a **binder** for the
|
||||
names `a` and `b` (because any references to `a` or `b` within refer to the
|
||||
variables that it introduces).
|
||||
- Consider this expression: `a + b`. In this expression, `a` and `b` refer to
|
||||
local variables that are defined *outside* of the expression. We say that
|
||||
those variables **appear free** in the expression (i.e., they are **free**,
|
||||
not **bound** (tied up)).
|
||||
|
||||
So there you have it: a variable "appears free" in some
|
||||
expression/statement/whatever if it refers to something defined
|
||||
outside of that expressions/statement/whatever. Equivalently, we can
|
||||
then refer to the "free variables" of an expression – which is just
|
||||
the set of variables that "appear free".
|
||||
|
||||
So what does this have to do with regions? Well, we can apply the
|
||||
analogous concept to type and regions. For example, in the type `&'a
|
||||
u32`, `'a` appears free. But in the type `for<'a> fn(&'a u32)`, it
|
||||
does not.
|
||||
|
||||
# Further Reading About Compilers
|
||||
|
||||
> Thanks to `mem`, `scottmcm`, and `Levi` on the official Discord for the
|
||||
> recommendations, and to `tinaun` for posting a link to a [twitter thread from
|
||||
> Graydon Hoare](https://web.archive.org/web/20181230012554/https://twitter.com/graydon_pub/status/1039615569132118016)
|
||||
> which had some more recommendations!
|
||||
>
|
||||
> Other sources: https://gcc.gnu.org/wiki/ListOfCompilerBooks
|
||||
>
|
||||
> If you have other suggestions, please feel free to open an issue or PR.
|
||||
|
||||
## Books
|
||||
- [Types and Programming Languages](https://www.cis.upenn.edu/~bcpierce/tapl/)
|
||||
- [Programming Language Pragmatics](https://www.cs.rochester.edu/~scott/pragmatics/)
|
||||
- [Practical Foundations for Programming Languages](https://www.cs.cmu.edu/~rwh/pfpl/)
|
||||
- [Compilers: Principles, Techniques, and Tools, 2nd Edition](https://www.pearson.com/us/higher-education/program/Aho-Compilers-Principles-Techniques-and-Tools-2nd-Edition/PGM167067.html)
|
||||
- [Garbage Collection: Algorithms for Automatic Dynamic Memory Management](https://www.cs.kent.ac.uk/people/staff/rej/gcbook/)
|
||||
- [Linkers and Loaders](https://www.amazon.com/Linkers-Kaufmann-Software-Engineering-Programming/dp/1558604960) (There are also free versions of this, but the version we had linked seems to be offline at the moment.)
|
||||
- [Advanced Compiler Design and Implementation](https://www.goodreads.com/book/show/887908.Advanced_Compiler_Design_and_Implementation)
|
||||
- [Building an Optimizing Compiler](https://www.goodreads.com/book/show/2063103.Building_an_Optimizing_Compiler)
|
||||
- [Crafting Interpreters](http://www.craftinginterpreters.com/)
|
||||
|
||||
## Courses
|
||||
- [University of Oregon Programming Languages Summer School archive](https://www.cs.uoregon.edu/research/summerschool/archives.html)
|
||||
|
||||
## Wikis
|
||||
- [Wikipedia](https://en.wikipedia.org/wiki/List_of_programming_languages_by_type)
|
||||
- [Esoteric Programming Languages](https://esolangs.org/wiki/Main_Page)
|
||||
- [Stanford Encyclopedia of Philosophy](https://plato.stanford.edu/index.html)
|
||||
- [nLab](https://ncatlab.org/nlab/show/HomePage)
|
||||
|
||||
## Misc Papers and Blog Posts
|
||||
- [Programming in Martin-Löf's Type Theory](https://www.cse.chalmers.se/research/group/logic/book/)
|
||||
- [Polymorphism, Subtyping, and Type Inference in MLsub](https://dl.acm.org/doi/10.1145/3093333.3009882)
|
||||
92
src/doc/rustc-dev-guide/src/appendix/bibliography.md
Normal file
92
src/doc/rustc-dev-guide/src/appendix/bibliography.md
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Rust Bibliography
|
||||
|
||||
This is a reading list of material relevant to Rust. It includes prior
|
||||
research that has - at one time or another - influenced the design of
|
||||
Rust, as well as publications about Rust.
|
||||
|
||||
## Type system
|
||||
|
||||
* [Alias burying](https://dl.acm.org/doi/10.1002/spe.370) - We tried something similar and abandoned it.
|
||||
* [External uniqueness is unique enough](https://lirias.kuleuven.be/retrieve/35835)
|
||||
* [Macros that work together](https://www.cs.utah.edu/plt/publications/jfp12-draft-fcdf.pdf)
|
||||
* [Making ad-hoc polymorphism less ad hoc](https://dl.acm.org/doi/10.1145/75277.75283)
|
||||
* [Region based memory management in Cyclone](https://www.cs.umd.edu/projects/cyclone/papers/cyclone-regions.pdf)
|
||||
* [Region Based Memory Management](https://www.cs.ucla.edu/~palsberg/tba/papers/tofte-talpin-iandc97.pdf)
|
||||
* [Safe manual memory management in Cyclone](https://www.cs.umd.edu/projects/PL/cyclone/scp.pdf)
|
||||
* [Skolem Normal Form](https://en.wikipedia.org/wiki/Skolem_normal_form)
|
||||
* [Traits: composable units of behavior](http://scg.unibe.ch/archive/papers/Scha03aTraits.pdf)
|
||||
* [Uniqueness and Reference Immutability for Safe Parallelism](https://research.microsoft.com/pubs/170528/msr-tr-2012-79.pdf)
|
||||
|
||||
## Concurrency
|
||||
|
||||
* [A Java fork/join calamity](https://web.archive.org/web/20190904045322/http://www.coopsoft.com/ar/CalamityArticle.html) - critique of Java's fork/join library, particularly its application of work stealing to non-strict computation
|
||||
* [Algorithms for scalable synchronization of shared-memory multiprocessors](https://www.cs.rochester.edu/u/scott/papers/1991_TOCS_synch.pdf)
|
||||
* [Balanced work stealing for time-sharing multicores](https://web.njit.edu/~dingxn/papers/BWS.pdf)
|
||||
* [Contention aware scheduling](https://www.blagodurov.net/files/a8-blagodurov.pdf)
|
||||
* [Dynamic circular work stealing deque](https://patents.google.com/patent/US7346753B2/en) - The Chase/Lev deque
|
||||
* [Epoch-based reclamation](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf).
|
||||
* [Language support for fast and reliable message passing in singularity OS](https://research.microsoft.com/pubs/67482/singsharp.pdf)
|
||||
* [Non-blocking steal-half work queues](https://www.cs.bgu.ac.il/%7Ehendlerd/papers/p280-hendler.pdf)
|
||||
* [Reagents: expressing and composing fine-grained concurrency](https://aturon.github.io/academic/reagents.pdf)
|
||||
* [Scheduling multithreaded computations by work stealing](https://www.lri.fr/~cecile/ENSEIGNEMENT/IPAR/Exposes/cilk1.pdf)
|
||||
* [Scheduling techniques for concurrent systems](https://www.stanford.edu/~ouster/cgi-bin/papers/coscheduling.pdf)
|
||||
* [Singularity: rethinking the software stack](https://research.microsoft.com/pubs/69431/osr2007_rethinkingsoftwarestack.pdf)
|
||||
* [The data locality of work stealing](http://www.aladdin.cs.cmu.edu/papers/pdfs/y2000/locality_spaa00.pdf)
|
||||
* [Thread scheduling for multiprogramming multiprocessors](https://www.eecis.udel.edu/%7Ecavazos/cisc879-spring2008/papers/arora98thread.pdf)
|
||||
* [Three layer cake for shared-memory programming](https://dl.acm.org/doi/10.1145/1953611.1953616)
|
||||
* [Work-first and help-first scheduling policies for async-finish task parallelism](https://dl.acm.org/doi/10.1109/IPDPS.2009.5161079) - More general than fully-strict work stealing
|
||||
|
||||
## Others
|
||||
|
||||
* [Composing High-Performance Memory Allocators](https://people.cs.umass.edu/~emery/pubs/berger-pldi2001.pdf)
|
||||
* [Crash-only software](https://www.usenix.org/legacy/events/hotos03/tech/full_papers/candea/candea.pdf)
|
||||
* [Reconsidering Custom Memory Allocation](https://people.cs.umass.edu/~emery/pubs/berger-oopsla2002.pdf)
|
||||
|
||||
## Papers *about* Rust
|
||||
|
||||
* [GPU Programming in Rust: Implementing High Level Abstractions in a Systems
|
||||
Level
|
||||
Language](https://ieeexplore.ieee.org/document/6650903).
|
||||
Early GPU work by Eric Holk.
|
||||
* [Parallel closures: a new twist on an old
|
||||
idea](https://www.usenix.org/conference/hotpar12/parallel-closures-new-twist-old-idea)
|
||||
- not exactly about Rust, but by nmatsakis
|
||||
* [Patina: A Formalization of the Rust Programming
|
||||
Language](https://dada.cs.washington.edu/research/tr/2015/03/UW-CSE-15-03-02.pdf).
|
||||
Early formalization of a subset of the type system, by Eric Reed.
|
||||
* [Experience Report: Developing the Servo Web Browser Engine using
|
||||
Rust](https://arxiv.org/abs/1505.07383). By Lars Bergstrom.
|
||||
* [Implementing a Generic Radix Trie in
|
||||
Rust](https://michaelsproul.github.io/rust_radix_paper/rust-radix-sproul.pdf). Undergrad
|
||||
paper by Michael Sproul.
|
||||
* [Reenix: Implementing a Unix-Like Operating System in
|
||||
Rust](https://scialex.github.io/reenix.pdf). Undergrad paper by Alex
|
||||
Light.
|
||||
* [Evaluation of performance and productivity metrics of potential programming languages in the HPC environment](https://github.com/1wilkens/thesis-ba).
|
||||
Bachelor's thesis by Florian Wilkens. Compares C, Go and Rust.
|
||||
* [Nom, a byte oriented, streaming, zero copy, parser combinators library
|
||||
in Rust](http://spw15.langsec.org/papers/couprie-nom.pdf). By
|
||||
Geoffroy Couprie, research for VLC.
|
||||
* [Graph-Based Higher-Order Intermediate
|
||||
Representation](https://compilers.cs.uni-saarland.de/papers/lkh15_cgo.pdf). An
|
||||
experimental IR implemented in Impala, a Rust-like language.
|
||||
* [Code Refinement of Stencil
|
||||
Codes](https://compilers.cs.uni-saarland.de/papers/ppl14_web.pdf). Another
|
||||
paper using Impala.
|
||||
* [Parallelization in Rust with fork-join and
|
||||
friends](http://publications.lib.chalmers.se/records/fulltext/219016/219016.pdf). Linus
|
||||
Farnstrand's master's thesis.
|
||||
* [Session Types for
|
||||
Rust](https://munksgaard.me/papers/laumann-munksgaard-larsen.pdf). Philip
|
||||
Munksgaard's master's thesis. Research for Servo.
|
||||
* [Ownership is Theft: Experiences Building an Embedded OS in Rust - Amit Levy, et. al.](https://amitlevy.com/papers/tock-plos2015.pdf)
|
||||
* [You can't spell trust without Rust](https://faultlore.com/blah/papers/thesis.pdf). Aria Beingessner's master's thesis.
|
||||
* [Rust-Bio: a fast and safe bioinformatics library](https://rust-bio.github.io/). Johannes Köster
|
||||
* [Safe, Correct, and Fast Low-Level Networking](https://octarineparrot.com/assets/msci_paper.pdf). Robert Clipsham's master's thesis.
|
||||
* [Formalizing Rust traits](https://open.library.ubc.ca/cIRcle/collections/ubctheses/24/items/1.0220521). Jonatan Milewski's master's thesis.
|
||||
* [Rust as a Language for High Performance GC Implementation](https://dl.acm.org/doi/pdf/10.1145/3241624.2926707)
|
||||
* [Simple Verification of Rust Programs via Functional Purification](https://github.com/Kha/electrolysis). Sebastian Ullrich's master's thesis.
|
||||
* [Writing parsers like it is 2017](http://spw17.langsec.org/papers/chifflier-parsing-in-2017.pdf) Pierre Chifflier and Geoffroy Couprie for the Langsec Workshop
|
||||
* [The Case for Writing a Kernel in Rust](https://www.tockos.org/assets/papers/rust-kernel-apsys2017.pdf)
|
||||
* [RustBelt: Securing the Foundations of the Rust Programming Language](https://plv.mpi-sws.org/rustbelt/popl18/)
|
||||
* [Oxide: The Essence of Rust](https://arxiv.org/abs/1903.00982). By Aaron Weiss, Olek Gierczak, Daniel Patterson, Nicholas D. Matsakis, and Amal Ahmed.
|
||||
45
src/doc/rustc-dev-guide/src/appendix/code-index.md
Normal file
45
src/doc/rustc-dev-guide/src/appendix/code-index.md
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# Code Index
|
||||
|
||||
rustc has a lot of important data structures. This is an attempt to give some
|
||||
guidance on where to learn more about some of the key data structures of the
|
||||
compiler.
|
||||
|
||||
Item | Kind | Short description | Chapter | Declaration
|
||||
----------------|----------|-----------------------------|--------------------|-------------------
|
||||
`BodyId` | struct | One of four types of HIR node identifiers | [Identifiers in the HIR] | [compiler/rustc_hir/src/hir.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/struct.BodyId.html)
|
||||
`Compiler` | struct | Represents a compiler session and can be used to drive a compilation. | [The Rustc Driver and Interface] | [compiler/rustc_interface/src/interface.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/interface/struct.Compiler.html)
|
||||
`ast::Crate` | struct | A syntax-level representation of a parsed crate | [The parser] | [compiler/rustc_ast/src/ast.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/ast/struct.Crate.html)
|
||||
`rustc_hir::Crate` | struct | A more abstract, compiler-friendly form of a crate's AST | [The Hir] | [compiler/rustc_hir/src/hir.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/struct.Crate.html)
|
||||
`DefId` | struct | One of four types of HIR node identifiers | [Identifiers in the HIR] | [compiler/rustc_hir/src/def_id.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/def_id/struct.DefId.html)
|
||||
`Diag` | struct | A struct for a compiler diagnostic, such as an error or lint | [Emitting Diagnostics] | [compiler/rustc_errors/src/diagnostic.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_errors/struct.Diag.html)
|
||||
`DocContext` | struct | A state container used by rustdoc when crawling through a crate to gather its documentation | [Rustdoc] | [src/librustdoc/core.rs](https://github.com/rust-lang/rust/blob/master/src/librustdoc/core.rs)
|
||||
`HirId` | struct | One of four types of HIR node identifiers | [Identifiers in the HIR] | [compiler/rustc_hir/src/hir_id.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir_id/struct.HirId.html)
|
||||
`NodeId` | struct | One of four types of HIR node identifiers. Being phased out | [Identifiers in the HIR] | [compiler/rustc_ast/src/ast.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html)
|
||||
`P` | struct | An owned immutable smart pointer. By contrast, `&T` is not owned, and `Box<T>` is not immutable. | None | [compiler/rustc_ast/src/ptr.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/ptr/struct.P.html)
|
||||
`ParamEnv` | struct | Information about generic parameters or `Self`, useful for working with associated or generic items | [Parameter Environment] | [compiler/rustc_middle/src/ty/mod.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.ParamEnv.html)
|
||||
`ParseSess` | struct | This struct contains information about a parsing session | [The parser] | [compiler/rustc_session/src/parse/parse.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/parse/struct.ParseSess.html)
|
||||
`Query` | struct | Represents the result of query to the `Compiler` interface and allows stealing, borrowing, and returning the results of compiler passes. | [The Rustc Driver and Interface] | [compiler/rustc_interface/src/queries.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/queries/struct.Query.html)
|
||||
`Rib` | struct | Represents a single scope of names | [Name resolution] | [compiler/rustc_resolve/src/lib.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_resolve/late/struct.Rib.html)
|
||||
`Session` | struct | The data associated with a compilation session | [The parser], [The Rustc Driver and Interface] | [compiler/rustc_session/src/session.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/struct.Session.html)
|
||||
`SourceFile` | struct | Part of the `SourceMap`. Maps AST nodes to their source code for a single source file. Was previously called FileMap | [The parser] | [compiler/rustc_span/src/lib.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.SourceFile.html)
|
||||
`SourceMap` | struct | Maps AST nodes to their source code. It is composed of `SourceFile`s. Was previously called CodeMap | [The parser] | [compiler/rustc_span/src/source_map.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/source_map/struct.SourceMap.html)
|
||||
`Span` | struct | A location in the user's source code, used for error reporting primarily | [Emitting Diagnostics] | [compiler/rustc_span/src/span_encoding.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.Span.html)
|
||||
`StringReader` | struct | This is the lexer used during parsing. It consumes characters from the raw source code being compiled and produces a series of tokens for use by the rest of the parser | [The parser] | [compiler/rustc_parse/src/lexer/mod.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/lexer/struct.StringReader.html)
|
||||
`rustc_ast::token_stream::TokenStream` | struct | An abstract sequence of tokens, organized into `TokenTree`s | [The parser], [Macro expansion] | [compiler/rustc_ast/src/tokenstream.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html)
|
||||
`TraitDef` | struct | This struct contains a trait's definition with type information | [The `ty` modules] | [compiler/rustc_middle/src/ty/trait_def.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/trait_def/struct.TraitDef.html)
|
||||
`TraitRef` | struct | The combination of a trait and its input types (e.g. `P0: Trait<P1...Pn>`) | [Trait Solving: Goals and Clauses] | [compiler/rustc_middle/src/ty/sty.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/type.TraitRef.html)
|
||||
`Ty<'tcx>` | struct | This is the internal representation of a type used for type checking | [Type checking] | [compiler/rustc_middle/src/ty/mod.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.Ty.html)
|
||||
`TyCtxt<'tcx>` | struct | The "typing context". This is the central data structure in the compiler. It is the context that you use to perform all manner of queries | [The `ty` modules] | [compiler/rustc_middle/src/ty/context.rs](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html)
|
||||
|
||||
[The HIR]: ../hir.html
|
||||
[Identifiers in the HIR]: ../hir.html#hir-id
|
||||
[The parser]: ../the-parser.html
|
||||
[The Rustc Driver and Interface]: ../rustc-driver/intro.html
|
||||
[Type checking]: ../type-checking.html
|
||||
[The `ty` modules]: ../ty.html
|
||||
[Rustdoc]: ../rustdoc.html
|
||||
[Emitting Diagnostics]: ../diagnostics.html
|
||||
[Macro expansion]: ../macro-expansion.html
|
||||
[Name resolution]: ../name-resolution.html
|
||||
[Parameter Environment]: ../param_env/param_env_summary.html
|
||||
[Trait Solving: Goals and Clauses]: ../traits/goals-and-clauses.html#domain-goals
|
||||
48
src/doc/rustc-dev-guide/src/appendix/compiler-lecture.md
Normal file
48
src/doc/rustc-dev-guide/src/appendix/compiler-lecture.md
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Compiler Lecture Series
|
||||
|
||||
These are videos where various experts explain different parts of the compiler:
|
||||
|
||||
## General
|
||||
- [January 2019: Tom Tromey discusses debugging support in rustc](https://www.youtube.com/watch?v=elBxMRSNYr4)
|
||||
- [June 2019: Responsive compilers - Nicholas Matsakis - PLISS 2019](https://www.youtube.com/watch?v=N6b44kMS6OM)
|
||||
- [June 2019: Things I Learned (TIL) - Nicholas Matsakis - PLISS 2019](https://www.youtube.com/watch?v=LIYkT3p5gTs)
|
||||
|
||||
## Rust Analyzer
|
||||
- [January 2019: How Salsa Works](https://www.youtube.com/watch?v=_muY4HjSqVw)
|
||||
- [January 2019: Salsa In More Depth](https://www.youtube.com/watch?v=i_IhACacPRY)
|
||||
- [January 2019: Rust analyzer guide](https://www.youtube.com/watch?v=ANKBNiSWyfc)
|
||||
- [February 2019: Rust analyzer syntax trees](https://www.youtube.com/watch?v=DGAuLWdCCAI)
|
||||
- [March 2019: rust-analyzer type-checker overview by flodiebold](https://www.youtube.com/watch?v=Lmp3P9WNL8o)
|
||||
- [March 2019: RLS 2.0, Salsa, and Name Resolution](https://www.youtube.com/watch?v=Xr-rBqLr-G4)
|
||||
|
||||
## Type System
|
||||
- [July 2015: Felix Klock - Rust: A type system you didn't know you wanted - Curry On](https://www.youtube.com/watch?v=Q7lQCgnNWU0)
|
||||
- [November 2016: Felix Klock - Subtyping in Rust and Clarke's Third Law](https://www.youtube.com/watch?v=fI4RG_uq-WU)
|
||||
- [February 2019: Universes and Lifetimes](https://www.youtube.com/watch?v=iV1Z0xYXkck)
|
||||
- [April 2019: Representing types in rustc](https://www.youtube.com/watch?v=c01TsOsr3-c)
|
||||
- [March 2019: RFC #2229 Disjoint Field Capture plan](https://www.youtube.com/watch?v=UTXOptVMuIc)
|
||||
|
||||
## Closures
|
||||
- [October 2018: closures and upvar capture](https://www.youtube.com/watch?v=fMopdkn5-Xw)
|
||||
- [October 2018: blitzerr closure upvar tys](https://www.youtube.com/watch?v=pLmVhSB-z4s)
|
||||
- [January 2019: Convert Closure Upvar Representation to Tuples with blitzerr](https://www.youtube.com/watch?v=2QCuNtISoYc)
|
||||
|
||||
## Chalk
|
||||
- [July 2018: Coherence in Chalk by Sunjay Varma - Bay Area Rust Meetup](https://www.youtube.com/watch?v=rZqS4bLPL24)
|
||||
- [March 2019: rustc-chalk integration overview](https://www.youtube.com/watch?v=MBWtbDifPeU)
|
||||
- [April 2019: How the chalk-engine crate works](https://www.youtube.com/watch?v=Ny2928cGDoM)
|
||||
- [May 2019: How the chalk-engine crate works 2](https://www.youtube.com/watch?v=hmV66tB79LM)
|
||||
|
||||
## Polonius
|
||||
- [March 2019: Polonius-rustc walkthrough](https://www.youtube.com/watch?v=i5KdU0ieb_A)
|
||||
- [May 2019: Polonius WG: Initialization and move tracking](https://www.youtube.com/watch?v=ilv9V-328HI)
|
||||
|
||||
## Miri
|
||||
- [March 2019: oli-obk on miri and constant evaluation](https://www.youtube.com/watch?v=5Pm2C1YXrvM)
|
||||
|
||||
## Async
|
||||
- [February 2019: async-await implementation plans](https://www.youtube.com/watch?v=xe2_whJWBC0)
|
||||
- [April 2019: async-await region inferencer](https://www.youtube.com/watch?v=hlOxfkUDLPQ)
|
||||
|
||||
## Code Generation
|
||||
- [January 2019: Cranelift](https://www.youtube.com/watch?v=9OIA7DTFQWU)
|
||||
104
src/doc/rustc-dev-guide/src/appendix/glossary.md
Normal file
104
src/doc/rustc-dev-guide/src/appendix/glossary.md
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
# Glossary
|
||||
|
||||
Term | Meaning
|
||||
------------------------------------------------------|--------
|
||||
<span id="arena">arena, arena allocation</span> | An _arena_ is a large memory buffer from which other memory allocations are made. This style of allocation is called _arena allocation_. See [this chapter](../memory.md) for more info.
|
||||
<span id="ast">AST</span> | The _abstract syntax tree_ produced by the `rustc_ast` crate; reflects user syntax very closely.
|
||||
<span id="apit">APIT</span> | An argument-position `impl Trait`. Also known as an anonymous type parameter. ([see the reference](https://doc.rust-lang.org/reference/types/impl-trait.html#anonymous-type-parameters)).
|
||||
<span id="binder">binder</span> | A _binder_ is a place where a variable or type is declared; for example, the `<T>` is a binder for the generic type parameter `T` in `fn foo<T>(..)`, and \|`a`\|` ...` is a binder for the parameter `a`. See [the background chapter for more](./background.md#free-vs-bound).
|
||||
<span id="body-id">`BodyId`</span> | An identifier that refers to a specific body (definition of a function or constant) in the crate. See [the HIR chapter for more](../hir.md#identifiers-in-the-hir).
|
||||
<span id="bound-var">bound variable</span> | A _bound variable_ is one that is declared within an expression/term. For example, the variable `a` is bound within the closure expression \|`a`\|` a * 2`. See [the background chapter for more](./background.md#free-vs-bound)
|
||||
<span id="codegen">codegen</span> | Short for _code generation_. The code to translate MIR into LLVM IR.
|
||||
<span id="codegen-unit">codegen unit</span> | When we produce LLVM IR, we group the Rust code into a number of codegen units (sometimes abbreviated as CGUs). Each of these units is processed by LLVM independently from one another, enabling parallelism. They are also the unit of incremental re-use. ([see more](../backend/codegen.md))
|
||||
<span id="completeness">completeness</span> | A technical term in type theory, it means that every type-safe program also type-checks. Having both soundness and completeness is very hard, and usually soundness is more important. (see "soundness").
|
||||
<span id="cfg">control-flow graph</span> | A representation of the control-flow of a program; see [the background chapter for more](./background.md#cfg)
|
||||
<span id="ctfe">CTFE</span> | Short for _compile-time function evaluation_, this is the ability of the compiler to evaluate `const fn`s at compile time. This is part of the compiler's constant evaluation system. ([see more](../const-eval.md))
|
||||
<span id="cx">`cx`</span> | We tend to use _cx_ as an abbreviation for _context_. See also `tcx`, `infcx`, etc.
|
||||
<span id="ctxt">`ctxt`</span> | We also use _ctxt_ as an abbreviation for _context_, e.g. [`TyCtxt`](#TyCtxt). See also [cx](#cx) or [tcx](#tcx).
|
||||
<span id="dag">DAG</span> | A _directed acyclic graph_ is used during compilation to keep track of dependencies between queries. ([see more](../queries/incremental-compilation.md))
|
||||
<span id="data-flow">data-flow analysis</span> | A static analysis that figures out what properties are true at each point in the control-flow of a program; see [the background chapter for more](./background.md#dataflow).
|
||||
<span id="debruijn">de Bruijn index</span> | A technique for describing which binder a variable is bound by using only integers. It has the benefit that it is invariant under variable renaming. ([see more](./background.md#what-is-a-debruijn-index))
|
||||
<span id="def-id">`DefId`</span> | An index identifying a definition (see `rustc_middle/src/hir/def_id.rs`). Uniquely identifies a `DefPath`. See [the HIR chapter for more](../hir.md#identifiers-in-the-hir).
|
||||
<span id="discriminant">discriminant</span> | The underlying value associated with an enum variant or generator state to indicate it as "active" (but not to be confused with its ["variant index"](#variant-idx)). At runtime, the discriminant of the active variant is encoded in the [tag](#tag).
|
||||
<span id="double-ptr">double pointer</span> | A pointer with additional metadata. See [fat pointer](#fat-ptr) for more.
|
||||
<span id="drop-glue">drop glue</span> | (Internal) compiler-generated instructions that handle calling the destructors (`Drop`) for data types.
|
||||
<span id="dst">DST</span> | Short for *dynamically-sized type*, this is a type for which the compiler cannot statically know the size in memory (e.g. `str` or `[u8]`). Such types don't implement `Sized` and cannot be allocated on the stack. They can only occur as the last field in a struct. They can only be used behind a pointer (e.g. `&str` or `&[u8]`).
|
||||
<span id="ebl">early-bound lifetime</span> | A lifetime region that is substituted at its definition site. Bound in an item's `Generics` and substituted/instantiated using a `GenericArgs`. Contrast with **late-bound lifetime**. ([see more](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_type_ir/region_kind/enum.RegionKind.html#bound-regions))
|
||||
<span id="effect">effects</span> | Right now only means const traits and `~const` bounds. ([see more](../effects.md))
|
||||
<span id="empty-type">empty type</span> | See [uninhabited type](#ut).
|
||||
<span id="fat-ptr">fat pointer</span> | A two word value carrying the address of some value, along with some further information necessary to put the value to use. Rust includes two kinds of _fat pointers_: references to slices, and trait objects. A reference to a slice carries the starting address of the slice and its length. A trait object carries a value's address and a pointer to the trait's implementation appropriate to that value. "Fat pointers" are also known as "wide pointers", and "double pointers".
|
||||
<span id="free-var">free variable</span> | A _free variable_ is one that is not bound within an expression or term; see [the background chapter for more](./background.md#free-vs-bound)
|
||||
<span id="generics">generics</span> | The list of generic parameters defined on an item. There are three kinds of generic parameters: Type, lifetime and const parameters.
|
||||
<span id="hir">HIR</span> | The _high-level [IR](#ir)_, created by lowering and desugaring the AST. ([see more](../hir.md))
|
||||
<span id="hir-id">`HirId`</span> | Identifies a particular node in the HIR by combining a def-id with an "intra-definition offset". See [the HIR chapter for more](../hir.md#identifiers-in-the-hir).
|
||||
<span id="hir-map">HIR map</span> | The HIR map, accessible via `tcx.hir()`, allows you to quickly navigate the HIR and convert between various forms of identifiers.
|
||||
<span id="ice">ICE</span> | Short for _internal compiler error_, this is when the compiler crashes.
|
||||
<span id="ich">ICH</span> | Short for _incremental compilation hash_, these are used as fingerprints for things such as HIR and crate metadata, to check if changes have been made. This is useful in incremental compilation to see if part of a crate has changed and should be recompiled.
|
||||
<span id="infcx">`infcx`</span> | The type inference context (`InferCtxt`). (see `rustc_middle::infer`)
|
||||
<span id="inf-var">inference variable, infer var </span> | When doing type, region, const inference, an _inference variable_ is a kind of special type/region that represents what you are trying to infer. Think of X in algebra. For example, if we are trying to infer the type of a variable in a program, we create an inference variable to represent that unknown type.
|
||||
<span id="intern">intern</span> | Interning refers to storing certain frequently-used constant data, such as strings, and then referring to the data by an identifier (e.g. a `Symbol`) rather than the data itself, to reduce memory usage and number of allocations. See [this chapter](../memory.md) for more info.
|
||||
<span id="interpreter">interpreter</span> | The heart of const evaluation, running MIR code at compile time. ([see more](../const-eval/interpret.md))
|
||||
<span id="intrinsic">intrinsic</span> | Intrinsics are special functions that are implemented in the compiler itself but exposed (often unstably) to users. They do magical and dangerous things. (See [`std::intrinsics`](https://doc.rust-lang.org/std/intrinsics/index.html))
|
||||
<span id="ir">IR</span> | Short for _intermediate representation_, a general term in compilers. During compilation, the code is transformed from raw source (ASCII text) to various IRs. In Rust, these are primarily HIR, MIR, and LLVM IR. Each IR is well-suited for some set of computations. For example, MIR is well-suited for the borrow checker, and LLVM IR is well-suited for codegen because LLVM accepts it.
|
||||
<span id="irlo">IRLO, irlo</span> | Sometimes used as an abbreviation for [internals.rust-lang.org](https://internals.rust-lang.org).
|
||||
<span id="item">item</span> | A kind of "definition" in the language, such as a static, const, use statement, module, struct, etc. Concretely, this corresponds to the `Item` type.
|
||||
<span id="lang-item">lang item</span> | Items that represent concepts intrinsic to the language itself, such as special built-in traits like `Sync` and `Send`; or traits representing operations such as `Add`; or functions that are called by the compiler. ([see more](https://doc.rust-lang.org/1.9.0/book/lang-items.html))
|
||||
<span id="lbl">late-bound lifetime</span> | A lifetime region that is substituted at its call site. Bound in a HRTB and substituted by specific functions in the compiler, such as `liberate_late_bound_regions`. Contrast with **early-bound lifetime**. ([see more](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_type_ir/region_kind/enum.RegionKind.html#bound-regions))
|
||||
<span id="local-crate">local crate</span> | The crate currently being compiled. This is in contrast to "upstream crates" which refer to dependencies of the local crate.
|
||||
<span id="lto">LTO</span> | Short for *link-time optimizations*, this is a set of optimizations offered by LLVM that occur just before the final binary is linked. These include optimizations like removing functions that are never used in the final program, for example. _ThinLTO_ is a variant of LTO that aims to be a bit more scalable and efficient, but possibly sacrifices some optimizations. You may also read issues in the Rust repo about "FatLTO", which is the loving nickname given to non-Thin LTO. LLVM documentation: [here][lto] and [here][thinlto].
|
||||
<span id="llvm">[LLVM]</span> | (actually not an acronym :P) an open-source compiler backend. It accepts LLVM IR and outputs native binaries. Various languages (e.g. Rust) can then implement a compiler front-end that outputs LLVM IR and use LLVM to compile to all the platforms LLVM supports.
|
||||
<span id="memoization">memoization</span> | The process of storing the results of (pure) computations (such as pure function calls) to avoid having to repeat them in the future. This is typically a trade-off between execution speed and memory usage.
|
||||
<span id="mir">MIR</span> | The _mid-level [IR](#ir)_ that is created after type-checking for use by borrowck and codegen. ([see more](../mir/index.md))
|
||||
<span id="miri">Miri</span> | A tool to detect Undefined Behavior in (unsafe) Rust code. ([see more](https://github.com/rust-lang/miri))
|
||||
<span id="mono">monomorphization</span> | The process of taking generic implementations of types and functions and instantiating them with concrete types. For example, in the code we might have `Vec<T>`, but in the final executable, we will have a copy of the `Vec` code for every concrete type used in the program (e.g. a copy for `Vec<usize>`, a copy for `Vec<MyStruct>`, etc).
|
||||
<span id="normalize">normalize</span> | A general term for converting to a more canonical form, but in the case of rustc typically refers to [associated type normalization](../traits/goals-and-clauses.md#normalizeprojection---type).
|
||||
<span id="newtype">newtype</span> | A wrapper around some other type (e.g., `struct Foo(T)` is a "newtype" for `T`). This is commonly used in Rust to give a stronger type for indices.
|
||||
<span id="niche">niche</span> | Invalid bit patterns for a type _that can be used_ for layout optimizations. Some types cannot have certain bit patterns. For example, the `NonZero*` integers or the reference `&T` cannot be represented by a 0 bitstring. This means the compiler can perform layout optimizations by taking advantage of the invalid "niche value". An example application for this is the [*Discriminant elision on `Option`-like enums*](https://rust-lang.github.io/unsafe-code-guidelines/layout/enums.html#discriminant-elision-on-option-like-enums), which allows using a type's niche as the ["tag"](#tag) for an `enum` without requiring a separate field.
|
||||
<span id="nll">NLL</span> | Short for [non-lexical lifetimes](../borrow_check/region_inference.md), this is an extension to Rust's borrowing system to make it be based on the control-flow graph.
|
||||
<span id="node-id">node-id or `NodeId`</span> | An index identifying a particular node in the AST or HIR; gradually being phased out and replaced with `HirId`. See [the HIR chapter for more](../hir.md#identifiers-in-the-hir).
|
||||
<span id="obligation">obligation</span> | Something that must be proven by the trait system. ([see more](../traits/resolution.md))
|
||||
<span id="placeholder">placeholder</span> | **NOTE: skolemization is deprecated by placeholder** a way of handling subtyping around "for-all" types (e.g., `for<'a> fn(&'a u32)`) as well as solving higher-ranked trait bounds (e.g., `for<'a> T: Trait<'a>`). See [the chapter on placeholder and universes](../borrow_check/region_inference/placeholders_and_universes.md) for more details.
|
||||
<span id="point">point</span> | Used in the NLL analysis to refer to some particular location in the MIR; typically used to refer to a node in the control-flow graph.
|
||||
<span id="projection">projection</span> | A general term for a "relative path", e.g. `x.f` is a "field projection", and `T::Item` is an ["associated type projection"](../traits/goals-and-clauses.md#trait-ref).
|
||||
<span id="pc">promoted constants</span> | Constants extracted from a function and lifted to static scope; see [this section](../mir/index.md#promoted) for more details.
|
||||
<span id="provider">provider</span> | The function that executes a query. ([see more](../query.md))
|
||||
<span id="quantified">quantified</span> | In math or logic, existential and universal quantification are used to ask questions like "is there any type T for which is true?" or "is this true for all types T?"; see [the background chapter for more](./background.md#quantified).
|
||||
<span id="query">query</span> | A sub-computation during compilation. Query results can be cached in the current session or to disk for incremental compilation. ([see more](../query.md))
|
||||
<span id="recovery">recovery</span> | Recovery refers to handling invalid syntax during parsing (e.g. a missing comma) and continuing to parse the AST. This avoid showing spurious errors to the user (e.g. showing 'missing field' errors when the struct definition contains errors).
|
||||
<span id="region">region</span> | Another term for "lifetime" often used in the literature and in the borrow checker.
|
||||
<span id="rib">rib</span> | A data structure in the name resolver that keeps track of a single scope for names. ([see more](../name-resolution.md))
|
||||
<span id="rpit">RPIT</span> | A return-position `impl Trait`. ([see the reference](https://doc.rust-lang.org/reference/types/impl-trait.html#abstract-return-types)).
|
||||
<span id="rpitit">RPITIT</span> | A return-position `impl Trait` in trait. Unlike RPIT, this is desugared to a generic associated type (GAT). Introduced in [RFC 3425](https://rust-lang.github.io/rfcs/3425-return-position-impl-trait-in-traits.html). ([see more](../return-position-impl-trait-in-trait.md))
|
||||
<span id="scrutinee">scrutinee</div> | A scrutinee is the expression that is matched on in `match` expressions and similar pattern matching constructs. For example, in `match x { A => 1, B => 2 }`, the expression `x` is the scrutinee.
|
||||
<span id="sess">`sess`</span> | The compiler _session_, which stores global data used throughout compilation
|
||||
<span id="side-tables">side tables</span> | Because the [AST](#ast) and HIR are immutable once created, we often carry extra information about them in the form of hashtables, indexed by the id of a particular node.
|
||||
<span id="sigil">sigil</span> | Like a keyword but composed entirely of non-alphanumeric tokens. For example, `&` is a sigil for references.
|
||||
<span id="soundness">soundness</span> | A technical term in type theory. Roughly, if a type system is sound, then a program that type-checks is type-safe. That is, one can never (in safe rust) force a value into a variable of the wrong type. (see "completeness").
|
||||
<span id="span">span</span> | A location in the user's source code, used for error reporting primarily. These are like a file-name/line-number/column tuple on steroids: they carry a start/end point, and also track macro expansions and compiler desugaring. All while being packed into a few bytes (really, it's an index into a table). See the [`Span`] datatype for more.
|
||||
<span id="subst">subst</span> | The act of _substituting_ the generic parameters inside of a type, constant expression, etc. with concrete generic arguments by supplying [substs](#substs). Nowadays referred to as _instantiating_ in the compiler.
|
||||
<span id="substs">substs</span> | The _substitutions_ for a given generic item (e.g. the `i32`, `u32` in `HashMap<i32, u32>`). Nowadays referred to as the list of _generic arguments_ in the compiler (but note that strictly speaking these two concepts differ, see the literature).
|
||||
<span id="sysroot">sysroot</span> | The directory for build artifacts that are loaded by the compiler at runtime. ([see more](../building/bootstrapping/what-bootstrapping-does.html#what-is-a-sysroot))
|
||||
<span id="tag">tag</span> | The "tag" of an enum/generator encodes the [discriminant](#discriminant) of the active variant/state. Tags can either be "direct" (simply storing the discriminant in a field) or use a ["niche"](#niche).
|
||||
<span id="tait">TAIT</span> | A type-alias `impl Trait`. Introduced in [RFC 2515](https://rust-lang.github.io/rfcs/2515-type_alias_impl_trait.html).
|
||||
<span id="tcx">`tcx`</span> | Standard variable name for the "typing context" (`TyCtxt`), main data structure of the compiler. ([see more](../ty.md))
|
||||
<span id="lifetime-tcx">`'tcx`</span> | The lifetime of the allocation arenas used by `TyCtxt`. Most data interned during a compilation session will use this lifetime with the exception of HIR data which uses the `'hir` lifetime. ([see more](../ty.md))
|
||||
<span id="token">token</span> | The smallest unit of parsing. Tokens are produced after lexing ([see more](../the-parser.md)).
|
||||
<span id="tls">[TLS]</span> | *Thread-local storage*. Variables may be defined so that each thread has its own copy (rather than all threads sharing the variable). This has some interactions with LLVM. Not all platforms support TLS.
|
||||
<span id="trait-ref">trait reference, trait ref </span> | The name of a trait along with a suitable list of generic arguments. ([see more](../traits/goals-and-clauses.md#trait-ref))
|
||||
<span id="trans">trans</span> | Short for _translation_, the code to translate MIR into LLVM IR. Renamed to [codegen](#codegen).
|
||||
<span id="ty">`Ty`</span> | The internal representation of a type. ([see more](../ty.md))
|
||||
<span id="tyctxt">`TyCtxt`</span> | The data structure often referred to as [`tcx`](#tcx) in code which provides access to session data and the query system.
|
||||
<span id="ufcs">UFCS</span> | Short for _universal function call syntax_, this is an unambiguous syntax for calling a method. **Term no longer in use!** Prefer _fully-qualified path/syntax_. ([see more](../type-checking.md), [see the reference](https://doc.rust-lang.org/reference/expressions/call-expr.html#disambiguating-function-calls))
|
||||
<span id="ut">uninhabited type</span> | A type which has _no_ values. This is not the same as a ZST, which has exactly 1 value. An example of an uninhabited type is `enum Foo {}`, which has no variants, and so, can never be created. The compiler can treat code that deals with uninhabited types as dead code, since there is no such value to be manipulated. `!` (the never type) is an uninhabited type. Uninhabited types are also called _empty types_.
|
||||
<span id="upvar">upvar</span> | A variable captured by a closure from outside the closure.
|
||||
<span id="variance">variance</span> | Determines how changes to a generic parameter affect subtyping; for example, if `T` is a subtype of `U`, then `Vec<T>` is a subtype `Vec<U>` because `Vec` is _covariant_ in its generic parameter. See [the background chapter](./background.md#variance) for a more general explanation. See the [variance chapter](../variance.md) for an explanation of how type checking handles variance.
|
||||
<span id="variant-idx">variant index</span> | In an enum, identifies a variant by assigning them indices starting at 0. This is purely internal and not to be confused with the ["discriminant"](#discriminant) which can be overwritten by the user (e.g. `enum Bool { True = 42, False = 0 }`).
|
||||
<span id="wf">well-formedness</span> | Semantically: An expression that evaluates to meaningful result. In type systems: A type related construct which follows rules of the type system.
|
||||
<span id="wide-ptr">wide pointer</span> | A pointer with additional metadata. See [fat pointer](#fat-ptr) for more.
|
||||
<span id="zst">ZST</span> | *Zero-sized type*. A type whose values have size 0 bytes. Since `2^0 = 1`, such types can have exactly one value. For example, `()` (unit) is a ZST. `struct Foo;` is also a ZST. The compiler can do some nice optimizations around ZSTs.
|
||||
|
||||
[LLVM]: https://llvm.org/
|
||||
[lto]: https://llvm.org/docs/LinkTimeOptimization.html
|
||||
[`Span`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.Span.html
|
||||
[thinlto]: https://clang.llvm.org/docs/ThinLTO.html
|
||||
[TLS]: https://llvm.org/docs/LangRef.html#thread-local-storage-models
|
||||
15
src/doc/rustc-dev-guide/src/appendix/humorust.md
Normal file
15
src/doc/rustc-dev-guide/src/appendix/humorust.md
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# Humor in Rust
|
||||
|
||||
What's a project without a sense of humor? And frankly some of these are
|
||||
enlightening?
|
||||
|
||||
- [Weird exprs test](https://github.com/rust-lang/rust/blob/master/tests/ui/weird-exprs.rs)
|
||||
- [Ferris Rap](https://fitzgen.com/2018/12/13/rust-raps.html)
|
||||
- [The Genesis of Generic Germination](https://github.com/rust-lang/rust/pull/53645#issue-210543221)
|
||||
- [The Bastion of the Turbofish test](https://github.com/rust-lang/rust/blob/79d8a0fcefa5134db2a94739b1d18daa01fc6e9f/src/test/ui/bastion-of-the-turbofish.rs)
|
||||
- [Rust Koans](https://users.rust-lang.org/t/rust-koans/2408)
|
||||
- [`break rust;`](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=0ab2bd6a9d722e0f05a95e2a5dcf89cc)
|
||||
- [The Nomicon Intro](https://doc.rust-lang.org/stable/nomicon/)
|
||||
- [`rustc-ty` renaming punfest](https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/rustc-ty.20naming.20bikeshed.20.2F.20punfest.20%28was.3A.20design.20meeting.202.2E.2E.2E/near/189906455)
|
||||
- [try using their name "ferris" instead](https://github.com/rust-lang/rust/pull/91476)
|
||||
- [Forbid pineapple on pizza](https://github.com/rust-lang/rust/pull/70645)
|
||||
163
src/doc/rustc-dev-guide/src/asm.md
Normal file
163
src/doc/rustc-dev-guide/src/asm.md
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
# Inline assembly
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
## Overview
|
||||
|
||||
Inline assembly in rustc mostly revolves around taking an `asm!` macro invocation and plumbing it
|
||||
through all of the compiler layers down to LLVM codegen. Throughout the various stages, an
|
||||
`InlineAsm` generally consists of 3 components:
|
||||
|
||||
- The template string, which is stored as an array of `InlineAsmTemplatePiece`. Each piece
|
||||
represents either a literal or a placeholder for an operand (just like format strings).
|
||||
|
||||
```rust
|
||||
pub enum InlineAsmTemplatePiece {
|
||||
String(String),
|
||||
Placeholder { operand_idx: usize, modifier: Option<char>, span: Span },
|
||||
}
|
||||
```
|
||||
|
||||
- The list of operands to the `asm!` (`in`, `[late]out`, `in[late]out`, `sym`, `const`). These are
|
||||
represented differently at each stage of lowering, but follow a common pattern:
|
||||
- `in`, `out` and `inout` all have an associated register class (`reg`) or explicit register
|
||||
(`"eax"`).
|
||||
- `inout` has 2 forms: one with a single expression that is both read from and written to, and
|
||||
one with two separate expressions for the input and output parts.
|
||||
- `out` and `inout` have a `late` flag (`lateout` / `inlateout`) to indicate that the register
|
||||
allocator is allowed to reuse an input register for this output.
|
||||
- `out` and the split variant of `inout` allow `_` to be specified for an output, which means
|
||||
that the output is discarded. This is used to allocate scratch registers for assembly code.
|
||||
- `const` refers to an anonymous constants and generally works like an inline const.
|
||||
- `sym` is a bit special since it only accepts a path expression, which must point to a `static`
|
||||
or a `fn`.
|
||||
|
||||
- The options set at the end of the `asm!` macro. The only ones that are of particular interest to
|
||||
rustc are `NORETURN` which makes `asm!` return `!` instead of `()`, and `RAW` which disables format
|
||||
string parsing. The remaining options are mostly passed through to LLVM with little processing.
|
||||
|
||||
```rust
|
||||
bitflags::bitflags! {
|
||||
pub struct InlineAsmOptions: u16 {
|
||||
const PURE = 1 << 0;
|
||||
const NOMEM = 1 << 1;
|
||||
const READONLY = 1 << 2;
|
||||
const PRESERVES_FLAGS = 1 << 3;
|
||||
const NORETURN = 1 << 4;
|
||||
const NOSTACK = 1 << 5;
|
||||
const ATT_SYNTAX = 1 << 6;
|
||||
const RAW = 1 << 7;
|
||||
const MAY_UNWIND = 1 << 8;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## AST
|
||||
|
||||
`InlineAsm` is represented as an expression in the AST with the [`ast::InlineAsm` type][inline_asm_ast].
|
||||
|
||||
The `asm!` macro is implemented in `rustc_builtin_macros` and outputs an `InlineAsm` AST node. The
|
||||
template string is parsed using `fmt_macros`, positional and named operands are resolved to
|
||||
explicit operand indices. Since target information is not available to macro invocations,
|
||||
validation of the registers and register classes is deferred to AST lowering.
|
||||
|
||||
[inline_asm_ast]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/ast/struct.InlineAsm.html
|
||||
|
||||
## HIR
|
||||
|
||||
`InlineAsm` is represented as an expression in the HIR with the [`hir::InlineAsm` type][inline_asm_hir].
|
||||
|
||||
AST lowering is where `InlineAsmRegOrRegClass` is converted from `Symbol`s to an actual register or
|
||||
register class. If any modifiers are specified for a template string placeholder, these are
|
||||
validated against the set allowed for that operand type. Finally, explicit registers for inputs and
|
||||
outputs are checked for conflicts (same register used for different operands).
|
||||
|
||||
[inline_asm_hir]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/hir/struct.InlineAsm.html
|
||||
|
||||
## Type checking
|
||||
|
||||
Each register class has a whitelist of types that it may be used with. After the types of all
|
||||
operands have been determined, the `intrinsicck` pass will check that these types are in the
|
||||
whitelist. It also checks that split `inout` operands have compatible types and that `const`
|
||||
operands are integers or floats. Suggestions are emitted where needed if a template modifier should
|
||||
be used for an operand based on the type that was passed into it.
|
||||
|
||||
## THIR
|
||||
|
||||
`InlineAsm` is represented as an expression in the THIR with the [`InlineAsmExpr` type][inline_asm_thir].
|
||||
|
||||
The only significant change compared to HIR is that `Sym` has been lowered to either a `SymFn`
|
||||
whose `expr` is a `Literal` ZST of the `fn`, or a `SymStatic` which points to the `DefId` of a
|
||||
`static`.
|
||||
|
||||
[inline_asm_thir]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/thir/struct.InlineAsmExpr.html
|
||||
|
||||
## MIR
|
||||
|
||||
`InlineAsm` is represented as a `Terminator` in the MIR with the [`TerminatorKind::InlineAsm` variant][inline_asm_mir]
|
||||
|
||||
As part of THIR lowering, `InOut` and `SplitInOut` operands are lowered to a split form with a
|
||||
separate `in_value` and `out_place`.
|
||||
|
||||
Semantically, the `InlineAsm` terminator is similar to the `Call` terminator except that it has
|
||||
multiple output places where a `Call` only has a single return place output.
|
||||
|
||||
[inline_asm_mir]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/enum.TerminatorKind.html#variant.InlineAsm
|
||||
|
||||
## Codegen
|
||||
|
||||
Operands are lowered one more time before being passed to LLVM codegen, this is represented by the [`InlineAsmOperandRef` type][inline_asm_codegen] from `rustc_codegen_ssa`.
|
||||
|
||||
The operands are lowered to LLVM operands and constraint codes as follows:
|
||||
- `out` and the output part of `inout` operands are added first, as required by LLVM. Late output
|
||||
operands have a `=` prefix added to their constraint code, non-late output operands have a `=&`
|
||||
prefix added to their constraint code.
|
||||
- `in` operands are added normally.
|
||||
- `inout` operands are tied to the matching output operand.
|
||||
- `sym` operands are passed as function pointers or pointers, using the `"s"` constraint.
|
||||
- `const` operands are formatted to a string and directly inserted in the template string.
|
||||
|
||||
The template string is converted to LLVM form:
|
||||
- `$` characters are escaped as `$$`.
|
||||
- `const` operands are converted to strings and inserted directly.
|
||||
- Placeholders are formatted as `${X:M}` where `X` is the operand index and `M` is the modifier
|
||||
character. Modifiers are converted from the Rust form to the LLVM form.
|
||||
|
||||
The various options are converted to clobber constraints or LLVM attributes, refer to the
|
||||
[RFC](https://github.com/Amanieu/rfcs/blob/inline-asm/text/0000-inline-asm.md#mapping-to-llvm-ir)
|
||||
for more details.
|
||||
|
||||
Note that LLVM is sometimes rather picky about what types it accepts for certain constraint codes
|
||||
so we sometimes need to insert conversions to/from a supported type. See the target-specific
|
||||
ISelLowering.cpp files in LLVM for details of what types are supported for each register class.
|
||||
|
||||
[inline_asm_codegen]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/traits/enum.InlineAsmOperandRef.html
|
||||
|
||||
## Adding support for new architectures
|
||||
|
||||
Adding inline assembly support to an architecture is mostly a matter of defining the registers and
|
||||
register classes for that architecture. All the definitions for register classes are located in
|
||||
`compiler/rustc_target/asm/`.
|
||||
|
||||
Additionally you will need to implement lowering of these register classes to LLVM constraint codes
|
||||
in `compiler/rustc_codegen_llvm/asm.rs`.
|
||||
|
||||
When adding a new architecture, make sure to cross-reference with the LLVM source code:
|
||||
- LLVM has restrictions on which types can be used with a particular constraint code. Refer to the
|
||||
`getRegForInlineAsmConstraint` function in `lib/Target/${ARCH}/${ARCH}ISelLowering.cpp`.
|
||||
- LLVM reserves certain registers for its internal use, which causes them to not be saved/restored
|
||||
properly around inline assembly blocks. These registers are listed in the `getReservedRegs`
|
||||
function in `lib/Target/${ARCH}/${ARCH}RegisterInfo.cpp`. Any "conditionally" reserved register
|
||||
such as the frame/base pointer must always be treated as reserved for Rust purposes because we
|
||||
can't know ahead of time whether a function will require a frame/base pointer.
|
||||
|
||||
## Tests
|
||||
|
||||
Various tests for inline assembly are available:
|
||||
|
||||
- `tests/assembly/asm`
|
||||
- `tests/ui/asm`
|
||||
- `tests/codegen/asm-*`
|
||||
|
||||
Every architecture supported by inline assembly must have exhaustive tests in
|
||||
`tests/assembly/asm` which test all combinations of register classes and types.
|
||||
48
src/doc/rustc-dev-guide/src/ast-lowering.md
Normal file
48
src/doc/rustc-dev-guide/src/ast-lowering.md
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# AST lowering
|
||||
|
||||
The AST lowering step converts AST to [HIR](hir.html).
|
||||
This means many structures are removed if they are irrelevant
|
||||
for type analysis or similar syntax agnostic analyses. Examples
|
||||
of such structures include but are not limited to
|
||||
|
||||
* Parenthesis
|
||||
* Removed without replacement, the tree structure makes order explicit
|
||||
* `for` loops and `while (let)` loops
|
||||
* Converted to `loop` + `match` and some `let` bindings
|
||||
* `if let`
|
||||
* Converted to `match`
|
||||
* Universal `impl Trait`
|
||||
* Converted to generic arguments
|
||||
(but with some flags, to know that the user didn't write them)
|
||||
* Existential `impl Trait`
|
||||
* Converted to a virtual `existential type` declaration
|
||||
|
||||
Lowering needs to uphold several invariants in order to not trigger the
|
||||
sanity checks in `compiler/rustc_passes/src/hir_id_validator.rs`:
|
||||
|
||||
1. A `HirId` must be used if created. So if you use the `lower_node_id`,
|
||||
you *must* use the resulting `NodeId` or `HirId` (either is fine, since
|
||||
any `NodeId`s in the `HIR` are checked for existing `HirId`s)
|
||||
2. Lowering a `HirId` must be done in the scope of the *owning* item.
|
||||
This means you need to use `with_hir_id_owner` if you are creating parts
|
||||
of an item other than the one being currently lowered. This happens for
|
||||
example during the lowering of existential `impl Trait`
|
||||
3. A `NodeId` that will be placed into a HIR structure must be lowered,
|
||||
even if its `HirId` is unused. Calling
|
||||
`let _ = self.lower_node_id(node_id);` is perfectly legitimate.
|
||||
4. If you are creating new nodes that didn't exist in the `AST`, you *must*
|
||||
create new ids for them. This is done by calling the `next_id` method,
|
||||
which produces both a new `NodeId` as well as automatically lowering it
|
||||
for you so you also get the `HirId`.
|
||||
|
||||
If you are creating new `DefId`s, since each `DefId` needs to have a
|
||||
corresponding `NodeId`, it is advisable to add these `NodeId`s to the
|
||||
`AST` so you don't have to generate new ones during lowering. This has
|
||||
the advantage of creating a way to find the `DefId` of something via its
|
||||
`NodeId`. If lowering needs this `DefId` in multiple places, you can't
|
||||
generate a new `NodeId` in all those places because you'd also get a new
|
||||
`DefId` then. With a `NodeId` from the `AST` this is not an issue.
|
||||
|
||||
Having the `NodeId` also allows the `DefCollector` to generate the `DefId`s
|
||||
instead of lowering having to do it on the fly. Centralizing the `DefId`
|
||||
generation in one place makes it easier to refactor and reason about.
|
||||
31
src/doc/rustc-dev-guide/src/ast-validation.md
Normal file
31
src/doc/rustc-dev-guide/src/ast-validation.md
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# AST Validation
|
||||
|
||||
_AST validation_ is a separate AST pass that visits each
|
||||
item in the tree and performs simple checks. This pass
|
||||
doesn't perform any complex analysis, type checking or
|
||||
name resolution.
|
||||
|
||||
Before performing any validation, the compiler first expands
|
||||
the macros. Then this pass performs validations to check
|
||||
that each AST item is in the correct state. And when this pass
|
||||
is done, the compiler runs the crate resolution pass.
|
||||
|
||||
## Validations
|
||||
|
||||
Validations are defined in `AstValidator` type, which
|
||||
itself is located in `rustc_ast_passes` crate. This
|
||||
type implements various simple checks which emit errors
|
||||
when certain language rules are broken.
|
||||
|
||||
In addition, `AstValidator` implements `Visitor` trait
|
||||
that defines how to visit AST items (which can be functions,
|
||||
traits, enums, etc).
|
||||
|
||||
For each item, visitor performs specific checks. For
|
||||
example, when visiting a function declaration,
|
||||
`AstValidator` checks that the function has:
|
||||
|
||||
* no more than `u16::MAX` parameters;
|
||||
* c-variadic argument goes the last in the declaration;
|
||||
* documentation comments aren't applied to function parameters;
|
||||
* and other validations.
|
||||
50
src/doc/rustc-dev-guide/src/attributes.md
Normal file
50
src/doc/rustc-dev-guide/src/attributes.md
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# Attributes
|
||||
|
||||
Attributes come in two types: *inert* (or *built-in*) and *active* (*non-builtin*).
|
||||
|
||||
## Builtin/inert attributes
|
||||
|
||||
These attributes are defined in the compiler itself, in
|
||||
[`compiler/rustc_feature/src/builtin_attrs.rs`][builtin_attrs].
|
||||
|
||||
Examples include `#[allow]` and `#[macro_use]`.
|
||||
|
||||
[builtin_attrs]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_feature/builtin_attrs/index.html
|
||||
|
||||
These attributes have several important characteristics:
|
||||
* They are always in scope, and do not participate in typical path-based resolution.
|
||||
* They cannot be renamed. For example, `use allow as foo` will compile, but writing `#[foo]` will
|
||||
produce an error.
|
||||
* They are 'inert', meaning they are left as-is by the macro expansion code.
|
||||
As a result, any behavior comes as a result of the compiler explicitly checking for their presence.
|
||||
For example, lint-related code explicitly checks for `#[allow]`, `#[warn]`, `#[deny]`, and
|
||||
`#[forbid]`, rather than the behavior coming from the expansion of the attributes themselves.
|
||||
|
||||
## 'Non-builtin'/'active' attributes
|
||||
|
||||
These attributes are defined by a crate - either the standard library, or a proc-macro crate.
|
||||
|
||||
**Important**: Many non-builtin attributes, such as `#[derive]`, are still considered part of the
|
||||
core Rust language. However, they are **not** called 'builtin attributes', since they have a
|
||||
corresponding definition in the standard library.
|
||||
|
||||
Definitions of non-builtin attributes take two forms:
|
||||
|
||||
1. Proc-macro attributes, defined via a function annotated with `#[proc_macro_attribute]` in a
|
||||
proc-macro crate.
|
||||
2. AST-based attributes, defined in the standard library. These attributes have special 'stub'
|
||||
macros defined in places like [`library/core/src/macros/mod.rs`][core_macros].
|
||||
|
||||
[core_macros]: https://github.com/rust-lang/rust/blob/master/library/core/src/macros/mod.rs
|
||||
|
||||
These definitions exist to allow the macros to participate in typical path-based resolution - they
|
||||
can be imported, re-exported, and renamed just like any other item definition. However, the body of
|
||||
the definition is empty. Instead, the macro is annotated with the `#[rustc_builtin_macro]`
|
||||
attribute, which tells the compiler to run a corresponding function in `rustc_builtin_macros`.
|
||||
|
||||
All non-builtin attributes have the following characteristics:
|
||||
* Like all other definitions (e.g. structs), they must be brought into scope via an import.
|
||||
Many standard library attributes are included in the prelude - this is why writing `#[derive]`
|
||||
works without an import.
|
||||
* They participate in macro expansion. The implementation of the macro may leave the attribute
|
||||
target unchanged, modify the target, produce new AST nodes, or remove the target entirely.
|
||||
209
src/doc/rustc-dev-guide/src/backend/backend-agnostic.md
Normal file
209
src/doc/rustc-dev-guide/src/backend/backend-agnostic.md
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
# Backend Agnostic Codegen
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
[`rustc_codegen_ssa`]
|
||||
provides an abstract interface for all backends to implement,
|
||||
namely LLVM, [Cranelift], and [GCC].
|
||||
|
||||
[Cranelift]: https://github.com/rust-lang/rustc_codegen_cranelift
|
||||
[GCC]: https://github.com/rust-lang/rustc_codegen_gcc
|
||||
[`rustc_codegen_ssa`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/index.html
|
||||
|
||||
Below is some background information on the refactoring that created this
|
||||
abstract interface.
|
||||
|
||||
## Refactoring of `rustc_codegen_llvm`
|
||||
by Denis Merigoux, October 23rd 2018
|
||||
|
||||
### State of the code before the refactoring
|
||||
|
||||
All the code related to the compilation of MIR into LLVM IR was contained
|
||||
inside the `rustc_codegen_llvm` crate. Here is the breakdown of the most
|
||||
important elements:
|
||||
* the `back` folder (7,800 LOC) implements the mechanisms for creating the
|
||||
different object files and archive through LLVM, but also the communication
|
||||
mechanisms for parallel code generation;
|
||||
* the `debuginfo` (3,200 LOC) folder contains all code that passes debug
|
||||
information down to LLVM;
|
||||
* the `llvm` (2,200 LOC) folder defines the FFI necessary to communicate with
|
||||
LLVM using the C++ API;
|
||||
* the `mir` (4,300 LOC) folder implements the actual lowering from MIR to LLVM
|
||||
IR;
|
||||
* the `base.rs` (1,300 LOC) file contains some helper functions but also the
|
||||
high-level code that launches the code generation and distributes the work.
|
||||
* the `builder.rs` (1,200 LOC) file contains all the functions generating
|
||||
individual LLVM IR instructions inside a basic block;
|
||||
* the `common.rs` (450 LOC) contains various helper functions and all the
|
||||
functions generating LLVM static values;
|
||||
* the `type_.rs` (300 LOC) defines most of the type translations to LLVM IR.
|
||||
|
||||
The goal of this refactoring is to separate inside this crate code that is
|
||||
specific to the LLVM from code that can be reused for other rustc backends. For
|
||||
instance, the `mir` folder is almost entirely backend-specific but it relies
|
||||
heavily on other parts of the crate. The separation of the code must not affect
|
||||
the logic of the code nor its performance.
|
||||
|
||||
For these reasons, the separation process involves two transformations that
|
||||
have to be done at the same time for the resulting code to compile :
|
||||
|
||||
1. replace all the LLVM-specific types by generics inside function signatures
|
||||
and structure definitions;
|
||||
2. encapsulate all functions calling the LLVM FFI inside a set of traits that
|
||||
will define the interface between backend-agnostic code and the backend.
|
||||
|
||||
While the LLVM-specific code will be left in `rustc_codegen_llvm`, all the new
|
||||
traits and backend-agnostic code will be moved in `rustc_codegen_ssa` (name
|
||||
suggestion by @eddyb).
|
||||
|
||||
### Generic types and structures
|
||||
|
||||
@irinagpopa started to parametrize the types of `rustc_codegen_llvm` by a
|
||||
generic `Value` type, implemented in LLVM by a reference `&'ll Value`. This
|
||||
work has been extended to all structures inside the `mir` folder and elsewhere,
|
||||
as well as for LLVM's `BasicBlock` and `Type` types.
|
||||
|
||||
The two most important structures for the LLVM codegen are `CodegenCx` and
|
||||
`Builder`. They are parametrized by multiple lifetime parameters and the type
|
||||
for `Value`.
|
||||
|
||||
```rust,ignore
|
||||
struct CodegenCx<'ll, 'tcx> {
|
||||
/* ... */
|
||||
}
|
||||
|
||||
struct Builder<'a, 'll, 'tcx> {
|
||||
cx: &'a CodegenCx<'ll, 'tcx>,
|
||||
/* ... */
|
||||
}
|
||||
```
|
||||
|
||||
`CodegenCx` is used to compile one codegen-unit that can contain multiple
|
||||
functions, whereas `Builder` is created to compile one basic block.
|
||||
|
||||
The code in `rustc_codegen_llvm` has to deal with multiple explicit lifetime
|
||||
parameters, that correspond to the following:
|
||||
* `'tcx` is the longest lifetime, that corresponds to the original `TyCtxt`
|
||||
containing the program's information;
|
||||
* `'a` is a short-lived reference of a `CodegenCx` or another object inside a
|
||||
struct;
|
||||
* `'ll` is the lifetime of references to LLVM objects such as `Value` or
|
||||
`Type`.
|
||||
|
||||
Although there are already many lifetime parameters in the code, making it
|
||||
generic uncovered situations where the borrow-checker was passing only due to
|
||||
the special nature of the LLVM objects manipulated (they are extern pointers).
|
||||
For instance, an additional lifetime parameter had to be added to
|
||||
`LocalAnalyser` in `analyse.rs`, leading to the definition:
|
||||
|
||||
```rust,ignore
|
||||
struct LocalAnalyzer<'mir, 'a, 'tcx> {
|
||||
/* ... */
|
||||
}
|
||||
```
|
||||
|
||||
However, the two most important structures `CodegenCx` and `Builder` are not
|
||||
defined in the backend-agnostic code. Indeed, their content is highly specific
|
||||
of the backend and it makes more sense to leave their definition to the backend
|
||||
implementor than to allow just a narrow spot via a generic field for the
|
||||
backend's context.
|
||||
|
||||
### Traits and interface
|
||||
|
||||
Because they have to be defined by the backend, `CodegenCx` and `Builder` will
|
||||
be the structures implementing all the traits defining the backend's interface.
|
||||
These traits are defined in the folder `rustc_codegen_ssa/traits` and all the
|
||||
backend-agnostic code is parametrized by them. For instance, let us explain how
|
||||
a function in `base.rs` is parametrized:
|
||||
|
||||
```rust,ignore
|
||||
pub fn codegen_instance<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
||||
cx: &'a Bx::CodegenCx,
|
||||
instance: Instance<'tcx>
|
||||
) {
|
||||
/* ... */
|
||||
}
|
||||
```
|
||||
|
||||
In this signature, we have the two lifetime parameters explained earlier and
|
||||
the master type `Bx` which satisfies the trait `BuilderMethods` corresponding
|
||||
to the interface satisfied by the `Builder` struct. The `BuilderMethods`
|
||||
defines an associated type `Bx::CodegenCx` that itself satisfies the
|
||||
`CodegenMethods` traits implemented by the struct `CodegenCx`.
|
||||
|
||||
On the trait side, here is an example with part of the definition of
|
||||
`BuilderMethods` in `traits/builder.rs`:
|
||||
|
||||
```rust,ignore
|
||||
pub trait BuilderMethods<'a, 'tcx>:
|
||||
HasCodegen<'tcx>
|
||||
+ DebugInfoBuilderMethods<'tcx>
|
||||
+ ArgTypeMethods<'tcx>
|
||||
+ AbiBuilderMethods<'tcx>
|
||||
+ IntrinsicCallMethods<'tcx>
|
||||
+ AsmBuilderMethods<'tcx>
|
||||
{
|
||||
fn new_block<'b>(
|
||||
cx: &'a Self::CodegenCx,
|
||||
llfn: Self::Function,
|
||||
name: &'b str
|
||||
) -> Self;
|
||||
/* ... */
|
||||
fn cond_br(
|
||||
&mut self,
|
||||
cond: Self::Value,
|
||||
then_llbb: Self::BasicBlock,
|
||||
else_llbb: Self::BasicBlock,
|
||||
);
|
||||
/* ... */
|
||||
}
|
||||
```
|
||||
|
||||
Finally, a master structure implementing the `ExtraBackendMethods` trait is
|
||||
used for high-level codegen-driving functions like `codegen_crate` in
|
||||
`base.rs`. For LLVM, it is the empty `LlvmCodegenBackend`.
|
||||
`ExtraBackendMethods` should be implemented by the same structure that
|
||||
implements the `CodegenBackend` defined in
|
||||
`rustc_codegen_utils/codegen_backend.rs`.
|
||||
|
||||
During the traitification process, certain functions have been converted from
|
||||
methods of a local structure to methods of `CodegenCx` or `Builder` and a
|
||||
corresponding `self` parameter has been added. Indeed, LLVM stores information
|
||||
internally that it can access when called through its API. This information
|
||||
does not show up in a Rust data structure carried around when these methods are
|
||||
called. However, when implementing a Rust backend for `rustc`, these methods
|
||||
will need information from `CodegenCx`, hence the additional parameter (unused
|
||||
in the LLVM implementation of the trait).
|
||||
|
||||
### State of the code after the refactoring
|
||||
|
||||
The traits offer an API which is very similar to the API of LLVM. This is not
|
||||
the best solution since LLVM has a very special way of doing things: when
|
||||
adding another backend, the traits definition might be changed in order to
|
||||
offer more flexibility.
|
||||
|
||||
However, the current separation between backend-agnostic and LLVM-specific code
|
||||
has allowed the reuse of a significant part of the old `rustc_codegen_llvm`.
|
||||
Here is the new LOC breakdown between backend-agnostic (BA) and LLVM for the
|
||||
most important elements:
|
||||
|
||||
* `back` folder: 3,800 (BA) vs 4,100 (LLVM);
|
||||
* `mir` folder: 4,400 (BA) vs 0 (LLVM);
|
||||
* `base.rs`: 1,100 (BA) vs 250 (LLVM);
|
||||
* `builder.rs`: 1,400 (BA) vs 0 (LLVM);
|
||||
* `common.rs`: 350 (BA) vs 350 (LLVM);
|
||||
|
||||
The `debuginfo` folder has been left almost untouched by the splitting and is
|
||||
specific to LLVM. Only its high-level features have been traitified.
|
||||
|
||||
The new `traits` folder has 1500 LOC only for trait definitions. Overall, the
|
||||
27,000 LOC-sized old `rustc_codegen_llvm` code has been split into the new
|
||||
18,500 LOC-sized new `rustc_codegen_llvm` and the 12,000 LOC-sized
|
||||
`rustc_codegen_ssa`. We can say that this refactoring allowed the reuse of
|
||||
approximately 10,000 LOC that would otherwise have had to be duplicated between
|
||||
the multiple backends of `rustc`.
|
||||
|
||||
The refactored version of `rustc`'s backend introduced no regression over the
|
||||
test suite nor in performance benchmark, which is in coherence with the nature
|
||||
of the refactoring that used only compile-time parametricity (no trait
|
||||
objects).
|
||||
79
src/doc/rustc-dev-guide/src/backend/codegen.md
Normal file
79
src/doc/rustc-dev-guide/src/backend/codegen.md
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# Code generation
|
||||
|
||||
Code generation (or "codegen") is the part of the compiler
|
||||
that actually generates an executable binary.
|
||||
Usually, rustc uses LLVM for code generation,
|
||||
but there is also support for [Cranelift] and [GCC].
|
||||
The key is that rustc doesn't implement codegen itself.
|
||||
It's worth noting, though, that in the Rust source code,
|
||||
many parts of the backend have `codegen` in their names
|
||||
(there are no hard boundaries).
|
||||
|
||||
[Cranelift]: https://github.com/bytecodealliance/wasmtime/tree/main/cranelift
|
||||
[GCC]: https://github.com/rust-lang/rustc_codegen_gcc
|
||||
|
||||
> NOTE: If you are looking for hints on how to debug code generation bugs,
|
||||
> please see [this section of the debugging chapter][debugging].
|
||||
|
||||
[debugging]: ./debugging.md
|
||||
|
||||
## What is LLVM?
|
||||
|
||||
[LLVM](https://llvm.org) is "a collection of modular and reusable compiler and
|
||||
toolchain technologies". In particular, the LLVM project contains a pluggable
|
||||
compiler backend (also called "LLVM"), which is used by many compiler projects,
|
||||
including the `clang` C compiler and our beloved `rustc`.
|
||||
|
||||
LLVM takes input in the form of LLVM IR. It is basically assembly code with
|
||||
additional low-level types and annotations added. These annotations are helpful
|
||||
for doing optimizations on the LLVM IR and outputted machine code. The end
|
||||
result of all this is (at long last) something executable (e.g. an ELF object,
|
||||
an EXE, or wasm).
|
||||
|
||||
There are a few benefits to using LLVM:
|
||||
|
||||
- We don't have to write a whole compiler backend. This reduces implementation
|
||||
and maintenance burden.
|
||||
- We benefit from the large suite of advanced optimizations that the LLVM
|
||||
project has been collecting.
|
||||
- We can automatically compile Rust to any of the platforms for which LLVM has
|
||||
support. For example, as soon as LLVM added support for wasm, voila! rustc,
|
||||
clang, and a bunch of other languages were able to compile to wasm! (Well,
|
||||
there was some extra stuff to be done, but we were 90% there anyway).
|
||||
- We and other compiler projects benefit from each other. For example, when the
|
||||
[Spectre and Meltdown security vulnerabilities][spectre] were discovered,
|
||||
only LLVM needed to be patched.
|
||||
|
||||
[spectre]: https://meltdownattack.com/
|
||||
|
||||
## Running LLVM, linking, and metadata generation
|
||||
|
||||
Once LLVM IR for all of the functions and statics, etc is built, it is time to
|
||||
start running LLVM and its optimization passes. LLVM IR is grouped into
|
||||
"modules". Multiple "modules" can be codegened at the same time to aid in
|
||||
multi-core utilization. These "modules" are what we refer to as _codegen
|
||||
units_. These units were established way back during monomorphization
|
||||
collection phase.
|
||||
|
||||
Once LLVM produces objects from these modules, these objects are passed to the
|
||||
linker along with, optionally, the metadata object and an archive or an
|
||||
executable is produced.
|
||||
|
||||
It is not necessarily the codegen phase described above that runs the
|
||||
optimizations. With certain kinds of LTO, the optimization might happen at the
|
||||
linking time instead. It is also possible for some optimizations to happen
|
||||
before objects are passed on to the linker and some to happen during the
|
||||
linking.
|
||||
|
||||
This all happens towards the very end of compilation. The code for this can be
|
||||
found in [`rustc_codegen_ssa::back`][ssaback] and
|
||||
[`rustc_codegen_llvm::back`][llvmback]. Sadly, this piece of code is not
|
||||
really well-separated into LLVM-dependent code; the [`rustc_codegen_ssa`][ssa]
|
||||
contains a fair amount of code specific to the LLVM backend.
|
||||
|
||||
Once these components are done with their work you end up with a number of
|
||||
files in your filesystem corresponding to the outputs you have requested.
|
||||
|
||||
[ssa]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/index.html
|
||||
[ssaback]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/back/index.html
|
||||
[llvmback]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/back/index.html
|
||||
258
src/doc/rustc-dev-guide/src/backend/debugging.md
Normal file
258
src/doc/rustc-dev-guide/src/backend/debugging.md
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
## Debugging LLVM
|
||||
|
||||
> NOTE: If you are looking for info about code generation, please see [this
|
||||
> chapter][codegen] instead.
|
||||
|
||||
[codegen]: ./codegen.md
|
||||
|
||||
This section is about debugging compiler bugs in code generation (e.g. why the
|
||||
compiler generated some piece of code or crashed in LLVM). LLVM is a big
|
||||
project on its own that probably needs to have its own debugging document (not
|
||||
that I could find one). But here are some tips that are important in a rustc
|
||||
context:
|
||||
|
||||
### Minimize the example
|
||||
|
||||
As a general rule, compilers generate lots of information from analyzing code.
|
||||
Thus, a useful first step is usually to find a minimal example. One way to do
|
||||
this is to
|
||||
|
||||
1. create a new crate that reproduces the issue (e.g. adding whatever crate is
|
||||
at fault as a dependency, and using it from there)
|
||||
|
||||
2. minimize the crate by removing external dependencies; that is, moving
|
||||
everything relevant to the new crate
|
||||
|
||||
3. further minimize the issue by making the code shorter (there are tools that
|
||||
help with this like `creduce`)
|
||||
|
||||
For more discussion on methodology for steps 2 and 3 above, there is an
|
||||
[epic blog post][mcve-blog] from pnkfelix specifically about Rust program minimization.
|
||||
|
||||
[mcve-blog]: https://blog.pnkfx.org/blog/2019/11/18/rust-bug-minimization-patterns/
|
||||
|
||||
### Enable LLVM internal checks
|
||||
|
||||
The official compilers (including nightlies) have LLVM assertions disabled,
|
||||
which means that LLVM assertion failures can show up as compiler crashes (not
|
||||
ICEs but "real" crashes) and other sorts of weird behavior. If you are
|
||||
encountering these, it is a good idea to try using a compiler with LLVM
|
||||
assertions enabled - either an "alt" nightly or a compiler you build yourself
|
||||
by setting `[llvm] assertions=true` in your config.toml - and see whether
|
||||
anything turns up.
|
||||
|
||||
The rustc build process builds the LLVM tools into
|
||||
`./build/<host-triple>/llvm/bin`. They can be called directly.
|
||||
These tools include:
|
||||
* [`llc`], which compiles bitcode (`.bc` files) to executable code; this can be used to
|
||||
replicate LLVM backend bugs.
|
||||
* [`opt`], a bitcode transformer that runs LLVM optimization passes.
|
||||
* [`bugpoint`], which reduces large test cases to small, useful ones.
|
||||
* and many others, some of which are referenced in the text below.
|
||||
|
||||
[`llc`]: https://llvm.org/docs/CommandGuide/llc.html
|
||||
[`opt`]: https://llvm.org/docs/CommandGuide/opt.html
|
||||
[`bugpoint`]: https://llvm.org/docs/Bugpoint.html
|
||||
|
||||
By default, the Rust build system does not check for changes to the LLVM source code or
|
||||
its build configuration settings. So, if you need to rebuild the LLVM that is linked
|
||||
into `rustc`, first delete the file `llvm-finished-building`, which should be located
|
||||
in `build/<host-triple>/llvm/`.
|
||||
|
||||
The default rustc compilation pipeline has multiple codegen units, which is
|
||||
hard to replicate manually and means that LLVM is called multiple times in
|
||||
parallel. If you can get away with it (i.e. if it doesn't make your bug
|
||||
disappear), passing `-C codegen-units=1` to rustc will make debugging easier.
|
||||
|
||||
### Get your hands on raw LLVM input
|
||||
|
||||
For rustc to generate LLVM IR, you need to pass the `--emit=llvm-ir` flag. If
|
||||
you are building via cargo, use the `RUSTFLAGS` environment variable (e.g.
|
||||
`RUSTFLAGS='--emit=llvm-ir'`). This causes rustc to spit out LLVM IR into the
|
||||
target directory.
|
||||
|
||||
`cargo llvm-ir [options] path` spits out the LLVM IR for a particular function
|
||||
at `path`. (`cargo install cargo-asm` installs `cargo asm` and `cargo
|
||||
llvm-ir`). `--build-type=debug` emits code for debug builds. There are also
|
||||
other useful options. Also, debug info in LLVM IR can clutter the output a lot:
|
||||
`RUSTFLAGS="-C debuginfo=0"` is really useful.
|
||||
|
||||
`RUSTFLAGS="-C save-temps"` outputs LLVM bitcode (not the same as IR) at
|
||||
different stages during compilation, which is sometimes useful. The output LLVM
|
||||
bitcode will be in `.bc` files in the compiler's output directory, set via the
|
||||
`--out-dir DIR` argument to `rustc`.
|
||||
|
||||
* If you are hitting an assertion failure or segmentation fault from the LLVM
|
||||
backend when invoking `rustc` itself, it is a good idea to try passing each
|
||||
of these `.bc` files to the `llc` command, and see if you get the same
|
||||
failure. (LLVM developers often prefer a bug reduced to a `.bc` file over one
|
||||
that uses a Rust crate for its minimized reproduction.)
|
||||
|
||||
* To get human readable versions of the LLVM bitcode, one just needs to convert
|
||||
the bitcode (`.bc`) files to `.ll` files using `llvm-dis`, which should be in
|
||||
the target local compilation of rustc.
|
||||
|
||||
|
||||
Note that rustc emits different IR depending on whether `-O` is enabled, even
|
||||
without LLVM's optimizations, so if you want to play with the IR rustc emits,
|
||||
you should:
|
||||
|
||||
```bash
|
||||
$ rustc +local my-file.rs --emit=llvm-ir -O -C no-prepopulate-passes \
|
||||
-C codegen-units=1
|
||||
$ OPT=./build/$TRIPLE/llvm/bin/opt
|
||||
$ $OPT -S -O2 < my-file.ll > my
|
||||
```
|
||||
|
||||
If you just want to get the LLVM IR during the LLVM pipeline, to e.g. see which
|
||||
IR causes an optimization-time assertion to fail, or to see when LLVM performs
|
||||
a particular optimization, you can pass the rustc flag `-C
|
||||
llvm-args=-print-after-all`, and possibly add `-C
|
||||
llvm-args='-filter-print-funcs=EXACT_FUNCTION_NAME` (e.g. `-C
|
||||
llvm-args='-filter-print-funcs=_ZN11collections3str21_$LT$impl$u20$str$GT$\
|
||||
7replace17hbe10ea2e7c809b0bE'`).
|
||||
|
||||
That produces a lot of output into standard error, so you'll want to pipe that
|
||||
to some file. Also, if you are using neither `-filter-print-funcs` nor `-C
|
||||
codegen-units=1`, then, because the multiple codegen units run in parallel, the
|
||||
printouts will mix together and you won't be able to read anything.
|
||||
|
||||
* One caveat to the aforementioned methodology: the `-print` family of options
|
||||
to LLVM only prints the IR unit that the pass runs on (e.g., just a
|
||||
function), and does not include any referenced declarations, globals,
|
||||
metadata, etc. This means you cannot in general feed the output of `-print`
|
||||
into `llc` to reproduce a given problem.
|
||||
|
||||
* Within LLVM itself, calling `F.getParent()->dump()` at the beginning of
|
||||
`SafeStackLegacyPass::runOnFunction` will dump the whole module, which
|
||||
may provide better basis for reproduction. (However, you
|
||||
should be able to get that same dump from the `.bc` files dumped by
|
||||
`-C save-temps`.)
|
||||
|
||||
If you want just the IR for a specific function (say, you want to see why it
|
||||
causes an assertion or doesn't optimize correctly), you can use `llvm-extract`,
|
||||
e.g.
|
||||
|
||||
```bash
|
||||
$ ./build/$TRIPLE/llvm/bin/llvm-extract \
|
||||
-func='_ZN11collections3str21_$LT$impl$u20$str$GT$7replace17hbe10ea2e7c809b0bE' \
|
||||
-S \
|
||||
< unextracted.ll \
|
||||
> extracted.ll
|
||||
```
|
||||
|
||||
### Investigate LLVM optimization passes
|
||||
|
||||
If you are seeing incorrect behavior due to an optimization pass, a very handy
|
||||
LLVM option is `-opt-bisect-limit`, which takes an integer denoting the index
|
||||
value of the highest pass to run. Index values for taken passes are stable
|
||||
from run to run; by coupling this with software that automates bisecting the
|
||||
search space based on the resulting program, an errant pass can be quickly
|
||||
determined. When an `-opt-bisect-limit` is specified, all runs are displayed
|
||||
to standard error, along with their index and output indicating if the
|
||||
pass was run or skipped. Setting the limit to an index of -1 (e.g.,
|
||||
`RUSTFLAGS="-C llvm-args=-opt-bisect-limit=-1"`) will show all passes and
|
||||
their corresponding index values.
|
||||
|
||||
If you want to play with the optimization pipeline, you can use the [`opt`] tool
|
||||
from `./build/<host-triple>/llvm/bin/` with the LLVM IR emitted by rustc.
|
||||
|
||||
When investigating the implementation of LLVM itself, you should be
|
||||
aware of its [internal debug infrastructure][llvm-debug].
|
||||
This is provided in LLVM Debug builds, which you enable for rustc
|
||||
LLVM builds by changing this setting in the config.toml:
|
||||
```
|
||||
[llvm]
|
||||
# Indicates whether the LLVM assertions are enabled or not
|
||||
assertions = true
|
||||
|
||||
# Indicates whether the LLVM build is a Release or Debug build
|
||||
optimize = false
|
||||
```
|
||||
The quick summary is:
|
||||
* Setting `assertions=true` enables coarse-grain debug messaging.
|
||||
* beyond that, setting `optimize=false` enables fine-grain debug messaging.
|
||||
* `LLVM_DEBUG(dbgs() << msg)` in LLVM is like `debug!(msg)` in `rustc`.
|
||||
* The `-debug` option turns on all messaging; it is like setting the
|
||||
environment variable `RUSTC_LOG=debug` in `rustc`.
|
||||
* The `-debug-only=<pass1>,<pass2>` variant is more selective; it is like
|
||||
setting the environment variable `RUSTC_LOG=path1,path2` in `rustc`.
|
||||
|
||||
[llvm-debug]: https://llvm.org/docs/ProgrammersManual.html#the-llvm-debug-macro-and-debug-option
|
||||
|
||||
### Getting help and asking questions
|
||||
|
||||
If you have some questions, head over to the [rust-lang Zulip] and
|
||||
specifically the `#t-compiler/wg-llvm` stream.
|
||||
|
||||
[rust-lang Zulip]: https://rust-lang.zulipchat.com/
|
||||
|
||||
### Compiler options to know and love
|
||||
|
||||
The `-C help` and `-Z help` compiler switches will list out a variety
|
||||
of interesting options you may find useful. Here are a few of the most
|
||||
common that pertain to LLVM development (some of them are employed in the
|
||||
tutorial above):
|
||||
|
||||
- The `--emit llvm-ir` option emits a `<filename>.ll` file with LLVM IR in textual format
|
||||
- The `--emit llvm-bc` option emits in bytecode format (`<filename>.bc`)
|
||||
- Passing `-C llvm-args=<foo>` allows passing pretty much all the
|
||||
options that tools like llc and opt would accept;
|
||||
e.g. `-C llvm-args=-print-before-all` to print IR before every LLVM
|
||||
pass.
|
||||
- The `-C no-prepopulate-passes` will avoid pre-populate the LLVM pass
|
||||
manager with a list of passes. This will allow you to view the LLVM
|
||||
IR that rustc generates, not the LLVM IR after optimizations.
|
||||
- The `-C passes=val` option allows you to supply a space separated list of extra LLVM passes to run
|
||||
- The `-C save-temps` option saves all temporary output files during compilation
|
||||
- The `-Z print-llvm-passes` option will print out LLVM optimization passes being run
|
||||
- The `-Z time-llvm-passes` option measures the time of each LLVM pass
|
||||
- The `-Z verify-llvm-ir` option will verify the LLVM IR for correctness
|
||||
- The `-Z no-parallel-backend` will disable parallel compilation of distinct compilation units
|
||||
- The `-Z llvm-time-trace` option will output a Chrome profiler compatible JSON file
|
||||
which contains details and timings for LLVM passes.
|
||||
- The `-C llvm-args=-opt-bisect-limit=<index>` option allows for bisecting LLVM
|
||||
optimizations.
|
||||
|
||||
### Filing LLVM bug reports
|
||||
|
||||
When filing an LLVM bug report, you will probably want some sort of minimal
|
||||
working example that demonstrates the problem. The Godbolt compiler explorer is
|
||||
really helpful for this.
|
||||
|
||||
1. Once you have some LLVM IR for the problematic code (see above), you can
|
||||
create a minimal working example with Godbolt. Go to
|
||||
[llvm.godbolt.org](https://llvm.godbolt.org).
|
||||
|
||||
2. Choose `LLVM-IR` as programming language.
|
||||
|
||||
3. Use `llc` to compile the IR to a particular target as is:
|
||||
- There are some useful flags: `-mattr` enables target features, `-march=`
|
||||
selects the target, `-mcpu=` selects the CPU, etc.
|
||||
- Commands like `llc -march=help` output all architectures available, which
|
||||
is useful because sometimes the Rust arch names and the LLVM names do not
|
||||
match.
|
||||
- If you have compiled rustc yourself somewhere, in the target directory
|
||||
you have binaries for `llc`, `opt`, etc.
|
||||
|
||||
4. If you want to optimize the LLVM-IR, you can use `opt` to see how the LLVM
|
||||
optimizations transform it.
|
||||
|
||||
5. Once you have a godbolt link demonstrating the issue, it is pretty easy to
|
||||
fill in an LLVM bug. Just visit their [github issues page][llvm-issues].
|
||||
|
||||
[llvm-issues]: https://github.com/llvm/llvm-project/issues
|
||||
|
||||
### Porting bug fixes from LLVM
|
||||
|
||||
Once you've identified the bug as an LLVM bug, you will sometimes
|
||||
find that it has already been reported and fixed in LLVM, but we haven't
|
||||
gotten the fix yet (or perhaps you are familiar enough with LLVM to fix it yourself).
|
||||
|
||||
In that case, we can sometimes opt to port the fix for the bug
|
||||
directly to our own LLVM fork, so that rustc can use it more easily.
|
||||
Our fork of LLVM is maintained in [rust-lang/llvm-project]. Once
|
||||
you've landed the fix there, you'll also need to land a PR modifying
|
||||
our submodule commits -- ask around on Zulip for help.
|
||||
|
||||
[rust-lang/llvm-project]: https://github.com/rust-lang/llvm-project/
|
||||
281
src/doc/rustc-dev-guide/src/backend/implicit-caller-location.md
Normal file
281
src/doc/rustc-dev-guide/src/backend/implicit-caller-location.md
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
# Implicit Caller Location
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
Approved in [RFC 2091], this feature enables the accurate reporting of caller location during panics
|
||||
initiated from functions like `Option::unwrap`, `Result::expect`, and `Index::index`. This feature
|
||||
adds the [`#[track_caller]`][attr-reference] attribute for functions, the
|
||||
[`caller_location`][intrinsic] intrinsic, and the stabilization-friendly
|
||||
[`core::panic::Location::caller`][wrapper] wrapper.
|
||||
|
||||
## Motivating Example
|
||||
|
||||
Take this example program:
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let foo: Option<()> = None;
|
||||
foo.unwrap(); // this should produce a useful panic message!
|
||||
}
|
||||
```
|
||||
|
||||
Prior to Rust 1.42, panics like this `unwrap()` printed a location in core:
|
||||
|
||||
```
|
||||
$ rustc +1.41.0 example.rs; example.exe
|
||||
thread 'main' panicked at 'called `Option::unwrap()` on a `None` value',...core\macros\mod.rs:15:40
|
||||
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace.
|
||||
```
|
||||
|
||||
As of 1.42, we get a much more helpful message:
|
||||
|
||||
```
|
||||
$ rustc +1.42.0 example.rs; example.exe
|
||||
thread 'main' panicked at 'called `Option::unwrap()` on a `None` value', example.rs:3:5
|
||||
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
|
||||
```
|
||||
|
||||
These error messages are achieved through a combination of changes to `panic!` internals to make use
|
||||
of `core::panic::Location::caller` and a number of `#[track_caller]` annotations in the standard
|
||||
library which propagate caller information.
|
||||
|
||||
## Reading Caller Location
|
||||
|
||||
Previously, `panic!` made use of the `file!()`, `line!()`, and `column!()` macros to construct a
|
||||
[`Location`] pointing to where the panic occurred. These macros couldn't be given an overridden
|
||||
location, so functions which intentionally invoked `panic!` couldn't provide their own location,
|
||||
hiding the actual source of error.
|
||||
|
||||
Internally, `panic!()` now calls [`core::panic::Location::caller()`][wrapper] to find out where it
|
||||
was expanded. This function is itself annotated with `#[track_caller]` and wraps the
|
||||
[`caller_location`][intrinsic] compiler intrinsic implemented by rustc. This intrinsic is easiest
|
||||
explained in terms of how it works in a `const` context.
|
||||
|
||||
## Caller Location in `const`
|
||||
|
||||
There are two main phases to returning the caller location in a const context: walking up the stack
|
||||
to find the right location and allocating a const value to return.
|
||||
|
||||
### Finding the right `Location`
|
||||
|
||||
In a const context we "walk up the stack" from where the intrinsic is invoked, stopping when we
|
||||
reach the first function call in the stack which does *not* have the attribute. This walk is in
|
||||
[`InterpCx::find_closest_untracked_caller_location()`][const-find-closest].
|
||||
|
||||
Starting at the bottom, we iterate up over stack [`Frame`][const-frame]s in the
|
||||
[`InterpCx::stack`][const-stack], calling
|
||||
[`InstanceKind::requires_caller_location`][requires-location] on the
|
||||
[`Instance`s from each `Frame`][frame-instance]. We stop once we find one that returns `false` and
|
||||
return the span of the *previous* frame which was the "topmost" tracked function.
|
||||
|
||||
### Allocating a static `Location`
|
||||
|
||||
Once we have a `Span`, we need to allocate static memory for the `Location`, which is performed by
|
||||
the [`TyCtxt::const_caller_location()`][const-location-query] query. Internally this calls
|
||||
[`InterpCx::alloc_caller_location()`][alloc-location] and results in a unique
|
||||
[memory kind][location-memory-kind] (`MemoryKind::CallerLocation`). The SSA codegen backend is able
|
||||
to emit code for these same values, and we use this code there as well.
|
||||
|
||||
Once our `Location` has been allocated in static memory, our intrinsic returns a reference to it.
|
||||
|
||||
## Generating code for `#[track_caller]` callees
|
||||
|
||||
To generate efficient code for a tracked function and its callers, we need to provide the same
|
||||
behavior from the intrinsic's point of view without having a stack to walk up at runtime. We invert
|
||||
the approach: as we grow the stack down we pass an additional argument to calls of tracked functions
|
||||
rather than walking up the stack when the intrinsic is called. That additional argument can be
|
||||
returned wherever the caller location is queried.
|
||||
|
||||
The argument we append is of type `&'static core::panic::Location<'static>`. A reference was chosen
|
||||
to avoid unnecessary copying because a pointer is a third the size of
|
||||
`std::mem::size_of::<core::panic::Location>() == 24` at time of writing.
|
||||
|
||||
When generating a call to a function which is tracked, we pass the location argument the value of
|
||||
[`FunctionCx::get_caller_location`][fcx-get].
|
||||
|
||||
If the calling function is tracked, `get_caller_location` returns the local in
|
||||
[`FunctionCx::caller_location`][fcx-location] which was populated by the current caller's caller.
|
||||
In these cases the intrinsic "returns" a reference which was actually provided in an argument to its
|
||||
caller.
|
||||
|
||||
If the calling function is not tracked, `get_caller_location` allocates a `Location` static from
|
||||
the current `Span` and returns a reference to that.
|
||||
|
||||
We more efficiently achieve the same behavior as a loop starting from the bottom by passing a single
|
||||
`&Location` value through the `caller_location` fields of multiple `FunctionCx`s as we grow the
|
||||
stack downward.
|
||||
|
||||
### Codegen examples
|
||||
|
||||
What does this transformation look like in practice? Take this example which uses the new feature:
|
||||
|
||||
```rust
|
||||
#![feature(track_caller)]
|
||||
use std::panic::Location;
|
||||
|
||||
#[track_caller]
|
||||
fn print_caller() {
|
||||
println!("called from {}", Location::caller());
|
||||
}
|
||||
|
||||
fn main() {
|
||||
print_caller();
|
||||
}
|
||||
```
|
||||
|
||||
Here `print_caller()` appears to take no arguments, but we compile it to something like this:
|
||||
|
||||
```rust
|
||||
#![feature(panic_internals)]
|
||||
use std::panic::Location;
|
||||
|
||||
fn print_caller(caller: &Location) {
|
||||
println!("called from {}", caller);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
print_caller(&Location::internal_constructor(file!(), line!(), column!()));
|
||||
}
|
||||
```
|
||||
|
||||
### Dynamic Dispatch
|
||||
|
||||
In codegen contexts we have to modify the callee ABI to pass this information down the stack, but
|
||||
the attribute expressly does *not* modify the type of the function. The ABI change must be
|
||||
transparent to type checking and remain sound in all uses.
|
||||
|
||||
Direct calls to tracked functions will always know the full codegen flags for the callee and can
|
||||
generate appropriate code. Indirect callers won't have this information and it's not encoded in
|
||||
the type of the function pointer they call, so we generate a [`ReifyShim`] around the function
|
||||
whenever taking a pointer to it. This shim isn't able to report the actual location of the indirect
|
||||
call (the function's definition site is reported instead), but it prevents miscompilation and is
|
||||
probably the best we can do without modifying fully-stabilized type signatures.
|
||||
|
||||
> *Note:* We always emit a [`ReifyShim`] when taking a pointer to a tracked function. While the
|
||||
> constraint here is imposed by codegen contexts, we don't know during MIR construction of the shim
|
||||
> whether we'll be called in a const context (safe to ignore shim) or in a codegen context (unsafe
|
||||
> to ignore shim). Even if we did know, the results from const and codegen contexts must agree.
|
||||
|
||||
## The Attribute
|
||||
|
||||
The `#[track_caller]` attribute is checked alongside other codegen attributes to ensure the
|
||||
function:
|
||||
|
||||
* has the `"Rust"` ABI (as opposed to e.g., `"C"`)
|
||||
* is not a closure
|
||||
* is not `#[naked]`
|
||||
|
||||
If the use is valid, we set [`CodegenFnAttrsFlags::TRACK_CALLER`][attrs-flags]. This flag influences
|
||||
the return value of [`InstanceKind::requires_caller_location`][requires-location] which is in turn
|
||||
used in both const and codegen contexts to ensure correct propagation.
|
||||
|
||||
### Traits
|
||||
|
||||
When applied to trait method implementations, the attribute works as it does for regular functions.
|
||||
|
||||
When applied to a trait method prototype, the attribute applies to all implementations of the
|
||||
method. When applied to a default trait method implementation, the attribute takes effect on
|
||||
that implementation *and* any overrides.
|
||||
|
||||
Examples:
|
||||
|
||||
```rust
|
||||
#![feature(track_caller)]
|
||||
|
||||
macro_rules! assert_tracked {
|
||||
() => {{
|
||||
let location = std::panic::Location::caller();
|
||||
assert_eq!(location.file(), file!());
|
||||
assert_ne!(location.line(), line!(), "line should be outside this fn");
|
||||
println!("called at {}", location);
|
||||
}};
|
||||
}
|
||||
|
||||
trait TrackedFourWays {
|
||||
/// All implementations inherit `#[track_caller]`.
|
||||
#[track_caller]
|
||||
fn blanket_tracked();
|
||||
|
||||
/// Implementors can annotate themselves.
|
||||
fn local_tracked();
|
||||
|
||||
/// This implementation is tracked (overrides are too).
|
||||
#[track_caller]
|
||||
fn default_tracked() {
|
||||
assert_tracked!();
|
||||
}
|
||||
|
||||
/// Overrides of this implementation are tracked (it is too).
|
||||
#[track_caller]
|
||||
fn default_tracked_to_override() {
|
||||
assert_tracked!();
|
||||
}
|
||||
}
|
||||
|
||||
/// This impl uses the default impl for `default_tracked` and provides its own for
|
||||
/// `default_tracked_to_override`.
|
||||
impl TrackedFourWays for () {
|
||||
fn blanket_tracked() {
|
||||
assert_tracked!();
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn local_tracked() {
|
||||
assert_tracked!();
|
||||
}
|
||||
|
||||
fn default_tracked_to_override() {
|
||||
assert_tracked!();
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
<() as TrackedFourWays>::blanket_tracked();
|
||||
<() as TrackedFourWays>::default_tracked();
|
||||
<() as TrackedFourWays>::default_tracked_to_override();
|
||||
<() as TrackedFourWays>::local_tracked();
|
||||
}
|
||||
```
|
||||
|
||||
## Background/History
|
||||
|
||||
Broadly speaking, this feature's goal is to improve common Rust error messages without breaking
|
||||
stability guarantees, requiring modifications to end-user source, relying on platform-specific
|
||||
debug-info, or preventing user-defined types from having the same error-reporting benefits.
|
||||
|
||||
Improving the output of these panics has been a goal of proposals since at least mid-2016 (see
|
||||
[non-viable alternatives] in the approved RFC for details). It took two more years until RFC 2091
|
||||
was approved, much of its [rationale] for this feature's design having been discovered through the
|
||||
discussion around several earlier proposals.
|
||||
|
||||
The design in the original RFC limited itself to implementations that could be done inside the
|
||||
compiler at the time without significant refactoring. However in the year and a half between the
|
||||
approval of the RFC and the actual implementation work, a [revised design] was proposed and written
|
||||
up on the tracking issue. During the course of implementing that, it was also discovered that an
|
||||
implementation was possible without modifying the number of arguments in a function's MIR, which
|
||||
would simplify later stages and unlock use in traits.
|
||||
|
||||
Because the RFC's implementation strategy could not readily support traits, the semantics were not
|
||||
originally specified. They have since been implemented following the path which seemed most correct
|
||||
to the author and reviewers.
|
||||
|
||||
[RFC 2091]: https://github.com/rust-lang/rfcs/blob/master/text/2091-inline-semantic.md
|
||||
[attr-reference]: https://doc.rust-lang.org/reference/attributes/codegen.html#the-track_caller-attribute
|
||||
[intrinsic]: https://doc.rust-lang.org/nightly/core/intrinsics/fn.caller_location.html
|
||||
[wrapper]: https://doc.rust-lang.org/nightly/core/panic/struct.Location.html#method.caller
|
||||
[non-viable alternatives]: https://github.com/rust-lang/rfcs/blob/master/text/2091-inline-semantic.md#non-viable-alternatives
|
||||
[rationale]: https://github.com/rust-lang/rfcs/blob/master/text/2091-inline-semantic.md#rationale
|
||||
[revised design]: https://github.com/rust-lang/rust/issues/47809#issuecomment-443538059
|
||||
[attrs-flags]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/middle/codegen_fn_attrs/struct.CodegenFnAttrFlags.html#associatedconstant.TRACK_CALLER
|
||||
[`ReifyShim`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/enum.InstanceKind.html#variant.ReifyShim
|
||||
[`Location`]: https://doc.rust-lang.org/core/panic/struct.Location.html
|
||||
[const-find-closest]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/struct.InterpCx.html#method.find_closest_untracked_caller_location
|
||||
[requires-location]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/instance/enum.InstanceKind.html#method.requires_caller_location
|
||||
[alloc-location]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/struct.InterpCx.html#method.alloc_caller_location
|
||||
[fcx-location]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/struct.FunctionCx.html#structfield.caller_location
|
||||
[const-location-query]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html#method.const_caller_location
|
||||
[location-memory-kind]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/enum.MemoryKind.html#variant.CallerLocation
|
||||
[const-frame]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/struct.Frame.html
|
||||
[const-stack]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/struct.InterpCx.html#structfield.stack
|
||||
[fcx-get]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/struct.FunctionCx.html#method.get_caller_location
|
||||
[frame-instance]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/struct.Frame.html#structfield.instance
|
||||
5
src/doc/rustc-dev-guide/src/backend/inline-asm.md
Normal file
5
src/doc/rustc-dev-guide/src/backend/inline-asm.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Inline Assembly
|
||||
|
||||
**TODO**: You can find more info
|
||||
[here](https://github.com/rust-lang/rust/pull/69171#issue-375572066)
|
||||
[#1162](https://github.com/rust-lang/rustc-dev-guide/issues/1162)
|
||||
191
src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md
Normal file
191
src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
# Libraries and Metadata
|
||||
|
||||
When the compiler sees a reference to an external crate, it needs to load some
|
||||
information about that crate. This chapter gives an overview of that process,
|
||||
and the supported file formats for crate libraries.
|
||||
|
||||
## Libraries
|
||||
|
||||
A crate dependency can be loaded from an `rlib`, `dylib`, or `rmeta` file. A
|
||||
key point of these file formats is that they contain `rustc`-specific
|
||||
[*metadata*](#metadata). This metadata allows the compiler to discover enough
|
||||
information about the external crate to understand the items it contains,
|
||||
which macros it exports, and *much* more.
|
||||
|
||||
### rlib
|
||||
|
||||
An `rlib` is an [archive file], which is similar to a tar file. This file
|
||||
format is specific to `rustc`, and may change over time. This file contains:
|
||||
|
||||
* Object code, which is the result of code generation. This is used during
|
||||
regular linking. There is a separate `.o` file for each [codegen unit]. The
|
||||
codegen step can be skipped with the [`-C
|
||||
linker-plugin-lto`][linker-plugin-lto] CLI option, which means each `.o`
|
||||
file will only contain LLVM bitcode.
|
||||
* [LLVM bitcode], which is a binary representation of LLVM's intermediate
|
||||
representation, which is embedded as a section in the `.o` files. This can
|
||||
be used for [Link Time Optimization] (LTO). This can be removed with the
|
||||
[`-C embed-bitcode=no`][embed-bitcode] CLI option to improve compile times
|
||||
and reduce disk space if LTO is not needed.
|
||||
* `rustc` [metadata], in a file named `lib.rmeta`.
|
||||
* A symbol table, which is generally a list of symbols with offsets to the
|
||||
object file that contain that symbol. This is pretty standard for archive
|
||||
files.
|
||||
|
||||
[archive file]: https://en.wikipedia.org/wiki/Ar_(Unix)
|
||||
[LLVM bitcode]: https://llvm.org/docs/BitCodeFormat.html
|
||||
[Link Time Optimization]: https://llvm.org/docs/LinkTimeOptimization.html
|
||||
[codegen unit]: ../backend/codegen.md
|
||||
[embed-bitcode]: https://doc.rust-lang.org/rustc/codegen-options/index.html#embed-bitcode
|
||||
[linker-plugin-lto]: https://doc.rust-lang.org/rustc/codegen-options/index.html#linker-plugin-lto
|
||||
|
||||
### dylib
|
||||
|
||||
A `dylib` is a platform-specific shared library. It includes the `rustc`
|
||||
[metadata] in a special link section called `.rustc` in a compressed format.
|
||||
|
||||
### rmeta
|
||||
|
||||
An `rmeta` file is custom binary format that contains the [metadata] for the
|
||||
crate. This file can be used for fast "checks" of a project by skipping all
|
||||
code generation (as is done with `cargo check`), collecting enough information
|
||||
for documentation (as is done with `cargo doc`), or for
|
||||
[pipelining](#pipelining). This file is created if the
|
||||
[`--emit=metadata`][emit] CLI option is used.
|
||||
|
||||
`rmeta` files do not support linking, since they do not contain compiled
|
||||
object files.
|
||||
|
||||
[emit]: https://doc.rust-lang.org/rustc/command-line-arguments.html#option-emit
|
||||
|
||||
## Metadata
|
||||
|
||||
The metadata contains a wide swath of different elements. This guide will not
|
||||
go into detail of every field it contains. You are encouraged to browse the
|
||||
[`CrateRoot`] definition to get a sense of the different elements it contains.
|
||||
Everything about metadata encoding and decoding is in the [`rustc_metadata`]
|
||||
package.
|
||||
|
||||
Here are a few highlights of things it contains:
|
||||
|
||||
* The version of the `rustc` compiler. The compiler will refuse to load files
|
||||
from any other version.
|
||||
* The [Strict Version Hash](#strict-version-hash) (SVH). This helps ensure the
|
||||
correct dependency is loaded.
|
||||
* The [Stable Crate Id](#stable-crate-id). This is a hash used
|
||||
to identify crates.
|
||||
* Information about all the source files in the library. This can be used for
|
||||
a variety of things, such as diagnostics pointing to sources in a
|
||||
dependency.
|
||||
* Information about exported macros, traits, types, and items. Generally,
|
||||
anything that's needed to be known when a path references something inside a
|
||||
crate dependency.
|
||||
* Encoded [MIR]. This is optional, and only encoded if needed for code
|
||||
generation. `cargo check` skips this for performance reasons.
|
||||
|
||||
[`CrateRoot`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.CrateRoot.html
|
||||
[`rustc_metadata`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/index.html
|
||||
[MIR]: ../mir/index.md
|
||||
|
||||
### Strict Version Hash
|
||||
|
||||
The Strict Version Hash ([SVH], also known as the "crate hash") is a 64-bit
|
||||
hash that is used to ensure that the correct crate dependencies are loaded. It
|
||||
is possible for a directory to contain multiple copies of the same dependency
|
||||
built with different settings, or built from different sources. The crate
|
||||
loader will skip any crates that have the wrong SVH.
|
||||
|
||||
The SVH is also used for the [incremental compilation] session filename,
|
||||
though that usage is mostly historic.
|
||||
|
||||
The hash includes a variety of elements:
|
||||
|
||||
* Hashes of the HIR nodes.
|
||||
* All of the upstream crate hashes.
|
||||
* All of the source filenames.
|
||||
* Hashes of certain command-line flags (like `-C metadata` via the [Stable
|
||||
Crate Id](#stable-crate-id), and all CLI options marked with `[TRACKED]`).
|
||||
|
||||
See [`compute_hir_hash`] for where the hash is actually computed.
|
||||
|
||||
[SVH]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/svh/struct.Svh.html
|
||||
[incremental compilation]: ../queries/incremental-compilation.md
|
||||
[`compute_hir_hash`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast_lowering/struct.LoweringContext.html#method.compute_hir_hash
|
||||
|
||||
### Stable Crate Id
|
||||
|
||||
The [`StableCrateId`] is a 64-bit hash used to identify different crates with
|
||||
potentially the same name. It is a hash of the crate name and all the
|
||||
[`-C metadata`] CLI options computed in [`StableCrateId::new`]. It is
|
||||
used in a variety of places, such as symbol name mangling, crate loading, and
|
||||
much more.
|
||||
|
||||
By default, all Rust symbols are mangled and incorporate the stable crate id.
|
||||
This allows multiple versions of the same crate to be included together. Cargo
|
||||
automatically generates `-C metadata` hashes based on a variety of factors,
|
||||
like the package version, source, and the target kind (a lib and test can have
|
||||
the same crate name, so they need to be disambiguated).
|
||||
|
||||
[`StableCrateId`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/def_id/struct.StableCrateId.html
|
||||
[`StableCrateId::new`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/def_id/struct.StableCrateId.html#method.new
|
||||
[`-C metadata`]: https://doc.rust-lang.org/rustc/codegen-options/index.html#metadata
|
||||
|
||||
## Crate loading
|
||||
|
||||
Crate loading can have quite a few subtle complexities. During [name
|
||||
resolution], when an external crate is referenced (via an `extern crate` or
|
||||
path), the resolver uses the [`CrateLoader`] which is responsible for finding
|
||||
the crate libraries and loading the [metadata] for them. After the dependency
|
||||
is loaded, the `CrateLoader` will provide the information the resolver needs
|
||||
to perform its job (such as expanding macros, resolving paths, etc.).
|
||||
|
||||
To load each external crate, the `CrateLoader` uses a [`CrateLocator`] to
|
||||
actually find the correct files for one specific crate. There is some great
|
||||
documentation in the [`locator`] module that goes into detail on how loading
|
||||
works, and I strongly suggest reading it to get the full picture.
|
||||
|
||||
The location of a dependency can come from several different places. Direct
|
||||
dependencies are usually passed with `--extern` flags, and the loader can look
|
||||
at those directly. Direct dependencies often have references to their own
|
||||
dependencies, which need to be loaded, too. These are usually found by
|
||||
scanning the directories passed with the `-L` flag for any file whose metadata
|
||||
contains a matching crate name and [SVH](#strict-version-hash). The loader
|
||||
will also look at the [sysroot] to find dependencies.
|
||||
|
||||
As crates are loaded, they are kept in the [`CStore`] with the crate metadata
|
||||
wrapped in the [`CrateMetadata`] struct. After resolution and expansion, the
|
||||
`CStore` will make its way into the [`GlobalCtxt`] for the rest of
|
||||
compilation.
|
||||
|
||||
[name resolution]: ../name-resolution.md
|
||||
[`CrateLoader`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/creader/struct.CrateLoader.html
|
||||
[`CrateLocator`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/locator/struct.CrateLocator.html
|
||||
[`locator`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/locator/index.html
|
||||
[`CStore`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/creader/struct.CStore.html
|
||||
[`CrateMetadata`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/decoder/struct.CrateMetadata.html
|
||||
[`GlobalCtxt`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.GlobalCtxt.html
|
||||
[sysroot]: ../building/bootstrapping/what-bootstrapping-does.md#what-is-a-sysroot
|
||||
|
||||
## Pipelining
|
||||
|
||||
One trick to improve compile times is to start building a crate as soon as the
|
||||
metadata for its dependencies is available. For a library, there is no need to
|
||||
wait for the code generation of dependencies to finish. Cargo implements this
|
||||
technique by telling `rustc` to emit an [`rmeta`](#rmeta) file for each
|
||||
dependency as well as an [`rlib`](#rlib). As early as it can, `rustc` will
|
||||
save the `rmeta` file to disk before it continues to the code generation
|
||||
phase. The compiler sends a JSON message to let the build tool know that it
|
||||
can start building the next crate if possible.
|
||||
|
||||
The [crate loading](#crate-loading) system is smart enough to know when it
|
||||
sees an `rmeta` file to use that if the `rlib` is not there (or has only been
|
||||
partially written).
|
||||
|
||||
This pipelining isn't possible for binaries, because the linking phase will
|
||||
require the code generation of all its dependencies. In the future, it may be
|
||||
possible to further improve this scenario by splitting linking into a separate
|
||||
command (see [#64191]).
|
||||
|
||||
[#64191]: https://github.com/rust-lang/rust/issues/64191
|
||||
|
||||
[metadata]: #metadata
|
||||
57
src/doc/rustc-dev-guide/src/backend/lowering-mir.md
Normal file
57
src/doc/rustc-dev-guide/src/backend/lowering-mir.md
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Lowering MIR to a Codegen IR
|
||||
|
||||
Now that we have a list of symbols to generate from the collector, we need to
|
||||
generate some sort of codegen IR. In this chapter, we will assume LLVM IR,
|
||||
since that's what rustc usually uses. The actual monomorphization is performed
|
||||
as we go, while we do the translation.
|
||||
|
||||
Recall that the backend is started by
|
||||
[`rustc_codegen_ssa::base::codegen_crate`][codegen1]. Eventually, this reaches
|
||||
[`rustc_codegen_ssa::mir::codegen_mir`][codegen2], which does the lowering from
|
||||
MIR to LLVM IR.
|
||||
|
||||
[codegen1]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/base/fn.codegen_crate.html
|
||||
[codegen2]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/fn.codegen_mir.html
|
||||
|
||||
The code is split into modules which handle particular MIR primitives:
|
||||
|
||||
- [`rustc_codegen_ssa::mir::block`][mirblk] will deal with translating
|
||||
blocks and their terminators. The most complicated and also the most
|
||||
interesting thing this module does is generating code for function calls,
|
||||
including the necessary unwinding handling IR.
|
||||
- [`rustc_codegen_ssa::mir::statement`][mirst] translates MIR statements.
|
||||
- [`rustc_codegen_ssa::mir::operand`][mirop] translates MIR operands.
|
||||
- [`rustc_codegen_ssa::mir::place`][mirpl] translates MIR place references.
|
||||
- [`rustc_codegen_ssa::mir::rvalue`][mirrv] translates MIR r-values.
|
||||
|
||||
[mirblk]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/block/index.html
|
||||
[mirst]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/statement/index.html
|
||||
[mirop]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/operand/index.html
|
||||
[mirpl]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/place/index.html
|
||||
[mirrv]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/rvalue/index.html
|
||||
|
||||
Before a function is translated a number of simple and primitive analysis
|
||||
passes will run to help us generate simpler and more efficient LLVM IR. An
|
||||
example of such an analysis pass would be figuring out which variables are
|
||||
SSA-like, so that we can translate them to SSA directly rather than relying on
|
||||
LLVM's `mem2reg` for those variables. The analysis can be found in
|
||||
[`rustc_codegen_ssa::mir::analyze`][mirana].
|
||||
|
||||
[mirana]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/analyze/index.html
|
||||
|
||||
Usually a single MIR basic block will map to a LLVM basic block, with very few
|
||||
exceptions: intrinsic or function calls and less basic MIR statements like
|
||||
`assert` can result in multiple basic blocks. This is a perfect lede into the
|
||||
non-portable LLVM-specific part of the code generation. Intrinsic generation is
|
||||
fairly easy to understand as it involves very few abstraction levels in between
|
||||
and can be found in [`rustc_codegen_llvm::intrinsic`][llvmint].
|
||||
|
||||
[llvmint]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/intrinsic/index.html
|
||||
|
||||
Everything else will use the [builder interface][builder]. This is the code that gets
|
||||
called in the [`rustc_codegen_ssa::mir::*`][ssamir] modules discussed above.
|
||||
|
||||
[builder]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/builder/index.html
|
||||
[ssamir]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/index.html
|
||||
|
||||
> TODO: discuss how constants are generated
|
||||
81
src/doc/rustc-dev-guide/src/backend/monomorph.md
Normal file
81
src/doc/rustc-dev-guide/src/backend/monomorph.md
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
# Monomorphization
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
As you probably know, Rust has a very expressive type system that has extensive
|
||||
support for generic types. But of course, assembly is not generic, so we need
|
||||
to figure out the concrete types of all the generics before the code can
|
||||
execute.
|
||||
|
||||
Different languages handle this problem differently. For example, in some
|
||||
languages, such as Java, we may not know the most precise type of value until
|
||||
runtime. In the case of Java, this is ok because (almost) all variables are
|
||||
reference values anyway (i.e. pointers to a heap allocated object). This
|
||||
flexibility comes at the cost of performance, since all accesses to an object
|
||||
must dereference a pointer.
|
||||
|
||||
Rust takes a different approach: it _monomorphizes_ all generic types. This
|
||||
means that compiler stamps out a different copy of the code of a generic
|
||||
function for each concrete type needed. For example, if I use a `Vec<u64>` and
|
||||
a `Vec<String>` in my code, then the generated binary will have two copies of
|
||||
the generated code for `Vec`: one for `Vec<u64>` and another for `Vec<String>`.
|
||||
The result is fast programs, but it comes at the cost of compile time (creating
|
||||
all those copies can take a while) and binary size (all those copies might take
|
||||
a lot of space).
|
||||
|
||||
Monomorphization is the first step in the backend of the Rust compiler.
|
||||
|
||||
## Collection
|
||||
|
||||
First, we need to figure out what concrete types we need for all the generic
|
||||
things in our program. This is called _collection_, and the code that does this
|
||||
is called the _monomorphization collector_.
|
||||
|
||||
Take this example:
|
||||
|
||||
```rust
|
||||
fn banana() {
|
||||
peach::<u64>();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
banana();
|
||||
}
|
||||
```
|
||||
|
||||
The monomorphization collector will give you a list of `[main, banana,
|
||||
peach::<u64>]`. These are the functions that will have machine code generated
|
||||
for them. Collector will also add things like statics to that list.
|
||||
|
||||
See [the collector rustdocs][collect] for more info.
|
||||
|
||||
[collect]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_monomorphize/collector/index.html
|
||||
|
||||
The monomorphization collector is run just before MIR lowering and codegen.
|
||||
[`rustc_codegen_ssa::base::codegen_crate`][codegen1] calls the
|
||||
[`collect_and_partition_mono_items`][mono] query, which does monomorphization
|
||||
collection and then partitions them into [codegen
|
||||
units](../appendix/glossary.md#codegen-unit).
|
||||
|
||||
## Codegen Unit (CGU) partitioning
|
||||
|
||||
For better incremental build times, the CGU partitioner creates two CGU for each source level
|
||||
modules. One is for "stable" i.e. non-generic code and the other is more volatile code i.e.
|
||||
monomorphized/specialized instances.
|
||||
|
||||
For dependencies, consider Crate A and Crate B, such that Crate B depends on Crate A.
|
||||
The following table lists different scenarios for a function in Crate A that might be used by one
|
||||
or more modules in Crate B.
|
||||
|
||||
| Crate A function | Behavior |
|
||||
| - | - |
|
||||
| Non-generic function | Crate A function doesn't appear in any codegen units of Crate B |
|
||||
| Non-generic `#[inline]` function | Crate A function appears within a single CGU of Crate B, and exists even after post-inlining stage|
|
||||
| Generic function | Regardless of inlining, all monomorphized (specialized) functions <br> from Crate A appear within a single codegen unit for Crate B. <br> The codegen unit exists even after the post inlining stage.|
|
||||
| Generic `#[inline]` function | - same - |
|
||||
|
||||
For more details about the partitioner read the module level [documentation].
|
||||
|
||||
[mono]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_monomorphize/partitioning/fn.collect_and_partition_mono_items.html
|
||||
[codegen1]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/base/fn.codegen_crate.html
|
||||
[documentation]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_monomorphize/partitioning/index.html
|
||||
237
src/doc/rustc-dev-guide/src/backend/updating-llvm.md
Normal file
237
src/doc/rustc-dev-guide/src/backend/updating-llvm.md
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
# Updating LLVM
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
<!-- date-check: Aug 2024 -->
|
||||
Rust supports building against multiple LLVM versions:
|
||||
|
||||
* Tip-of-tree for the current LLVM development branch is usually supported
|
||||
within a few days. PRs for such fixes are tagged with `llvm-main`.
|
||||
* The latest released major version is always supported.
|
||||
* The one or two preceding major versions are usually supported.
|
||||
|
||||
By default, Rust uses its own fork in the [rust-lang/llvm-project repository].
|
||||
This fork is based on a `release/$N.x` branch of the upstream project, where
|
||||
`$N` is either the latest released major version, or the current major version
|
||||
in release candidate phase. The fork is never based on the `main` development
|
||||
branch.
|
||||
|
||||
Our LLVM fork only accepts:
|
||||
|
||||
* Backports of changes that have already landed upstream.
|
||||
* Workarounds for build issues affecting our CI environment.
|
||||
|
||||
With the exception of one grandfathered-in patch for SGX enablement, we do not
|
||||
accept functional patches that have not been upstreamed first.
|
||||
|
||||
There are three types of LLVM updates, with different procedures:
|
||||
|
||||
* Backports while the current major LLVM version is supported.
|
||||
* Backports while the current major LLVM version is no longer supported (or
|
||||
the change is not eligible for upstream backport).
|
||||
* Update to a new major LLVM version.
|
||||
|
||||
## Backports (upstream supported)
|
||||
|
||||
While the current major LLVM version is supported upstream, fixes should be
|
||||
backported upstream first, and the release branch then merged back into the
|
||||
Rust fork.
|
||||
|
||||
1. Make sure the bugfix is in upstream LLVM.
|
||||
2. If this hasn't happened already, request a backport to the upstream release
|
||||
branch. If you have LLVM commit access, follow the [backport process].
|
||||
Otherwise, open an issue requesting the backport. Continue once the
|
||||
backport has been approved and merged.
|
||||
3. Identify the branch that rustc is currently using. The `src/llvm-project`
|
||||
submodule is always pinned to a branch of the
|
||||
[rust-lang/llvm-project repository].
|
||||
4. Fork the rust-lang/llvm-project repository.
|
||||
5. Check out the appropriate branch (typically named `rustc/a.b-yyyy-mm-dd`).
|
||||
6. Add a remote for the upstream repository using
|
||||
`git remote add upstream https://github.com/llvm/llvm-project.git` and
|
||||
fetch it using `git fetch upstream`.
|
||||
7. Merge the `upstream/release/$N.x` branch.
|
||||
8. Push this branch to your fork.
|
||||
9. Send a Pull Request to rust-lang/llvm-project to the same branch as before.
|
||||
Be sure to reference the Rust and/or LLVM issue that you're fixing in the PR
|
||||
description.
|
||||
10. Wait for the PR to be merged.
|
||||
11. Send a PR to rust-lang/rust updating the `src/llvm-project` submodule with
|
||||
your bugfix. This can be done locally with `git submodule update --remote
|
||||
src/llvm-project` typically.
|
||||
12. Wait for PR to be merged.
|
||||
|
||||
An example PR:
|
||||
[#59089](https://github.com/rust-lang/rust/pull/59089)
|
||||
|
||||
## Backports (upstream not supported)
|
||||
|
||||
Upstream LLVM releases are only supported for two to three months after the
|
||||
GA release. Once upstream backports are no longer accepted, changes should be
|
||||
cherry-picked directly to our fork.
|
||||
|
||||
1. Make sure the bugfix is in upstream LLVM.
|
||||
2. Identify the branch that rustc is currently using. The `src/llvm-project`
|
||||
submodule is always pinned to a branch of the
|
||||
[rust-lang/llvm-project repository].
|
||||
3. Fork the rust-lang/llvm-project repository.
|
||||
4. Check out the appropriate branch (typically named `rustc/a.b-yyyy-mm-dd`).
|
||||
5. Add a remote for the upstream repository using
|
||||
`git remote add upstream https://github.com/llvm/llvm-project.git` and
|
||||
fetch it using `git fetch upstream`.
|
||||
6. Cherry-pick the relevant commit(s) using `git cherry-pick -x`.
|
||||
7. Push this branch to your fork.
|
||||
8. Send a Pull Request to rust-lang/llvm-project to the same branch as before.
|
||||
Be sure to reference the Rust and/or LLVM issue that you're fixing in the PR
|
||||
description.
|
||||
9. Wait for the PR to be merged.
|
||||
10. Send a PR to rust-lang/rust updating the `src/llvm-project` submodule with
|
||||
your bugfix. This can be done locally with `git submodule update --remote
|
||||
src/llvm-project` typically.
|
||||
11. Wait for PR to be merged.
|
||||
|
||||
An example PR:
|
||||
[#59089](https://github.com/rust-lang/rust/pull/59089)
|
||||
|
||||
## New LLVM Release Updates
|
||||
|
||||
<!-- date-check: Jul 2023 -->
|
||||
|
||||
Unlike bugfixes,
|
||||
updating to a new release of LLVM typically requires a lot more work.
|
||||
This is where we can't reasonably cherry-pick commits backwards,
|
||||
so we need to do a full update.
|
||||
There's a lot of stuff to do here,
|
||||
so let's go through each in detail.
|
||||
|
||||
1. LLVM announces that its latest release version has branched.
|
||||
This will show up as a branch in the [llvm/llvm-project repository],
|
||||
typically named `release/$N.x`,
|
||||
where `$N` is the version of LLVM that's being released.
|
||||
|
||||
1. Create a new branch in the [rust-lang/llvm-project repository]
|
||||
from this `release/$N.x` branch,
|
||||
and name it `rustc/a.b-yyyy-mm-dd`,
|
||||
where `a.b` is the current version number of LLVM in-tree
|
||||
at the time of the branch,
|
||||
and the remaining part is the current date.
|
||||
|
||||
2. Apply Rust-specific patches to the llvm-project repository.
|
||||
All features and bugfixes are upstream,
|
||||
but there's often some weird build-related patches
|
||||
that don't make sense to upstream.
|
||||
These patches are typically the latest patches in the
|
||||
rust-lang/llvm-project branch that rustc is currently using.
|
||||
|
||||
3. Build the new LLVM in the `rust` repository.
|
||||
To do this,
|
||||
you'll want to update the `src/llvm-project` repository to your branch,
|
||||
and the revision you've created.
|
||||
It's also typically a good idea to update `.gitmodules` with the new
|
||||
branch name of the LLVM submodule.
|
||||
Make sure you've committed changes to
|
||||
`src/llvm-project` to ensure submodule updates aren't reverted.
|
||||
Some commands you should execute are:
|
||||
|
||||
* `./x build src/llvm-project` - test that LLVM still builds
|
||||
* `./x build` - build the rest of rustc
|
||||
|
||||
You'll likely need to update [`llvm-wrapper/*.cpp`][`llvm-wrapper`]
|
||||
to compile with updated LLVM bindings.
|
||||
Note that you should use `#ifdef` and such to ensure
|
||||
that the bindings still compile on older LLVM versions.
|
||||
|
||||
Note that `profile = "compiler"` and other defaults set by `./x setup`
|
||||
download LLVM from CI instead of building it from source.
|
||||
You should disable this temporarily to make sure your changes are being used.
|
||||
This is done by having the following setting in `config.toml`:
|
||||
|
||||
```toml
|
||||
[llvm]
|
||||
download-ci-llvm = false
|
||||
```
|
||||
|
||||
4. Test for regressions across other platforms. LLVM often has at least one bug
|
||||
for non-tier-1 architectures, so it's good to do some more testing before
|
||||
sending this to bors! If you're low on resources you can send the PR as-is
|
||||
now to bors, though, and it'll get tested anyway.
|
||||
|
||||
Ideally, build LLVM and test it on a few platforms:
|
||||
|
||||
* Linux
|
||||
* macOS
|
||||
* Windows
|
||||
|
||||
Afterwards, run some docker containers that CI also does:
|
||||
|
||||
* `./src/ci/docker/run.sh wasm32`
|
||||
* `./src/ci/docker/run.sh arm-android`
|
||||
* `./src/ci/docker/run.sh dist-various-1`
|
||||
* `./src/ci/docker/run.sh dist-various-2`
|
||||
* `./src/ci/docker/run.sh armhf-gnu`
|
||||
|
||||
5. Prepare a PR to `rust-lang/rust`. Work with maintainers of
|
||||
`rust-lang/llvm-project` to get your commit in a branch of that repository,
|
||||
and then you can send a PR to `rust-lang/rust`. You'll change at least
|
||||
`src/llvm-project` and will likely also change [`llvm-wrapper`] as well.
|
||||
|
||||
<!-- date-check: Sep 2024 -->
|
||||
> For prior art, here are some previous LLVM updates:
|
||||
> - [LLVM 11](https://github.com/rust-lang/rust/pull/73526)
|
||||
> - [LLVM 12](https://github.com/rust-lang/rust/pull/81451)
|
||||
> - [LLVM 13](https://github.com/rust-lang/rust/pull/87570)
|
||||
> - [LLVM 14](https://github.com/rust-lang/rust/pull/93577)
|
||||
> - [LLVM 15](https://github.com/rust-lang/rust/pull/99464)
|
||||
> - [LLVM 16](https://github.com/rust-lang/rust/pull/109474)
|
||||
> - [LLVM 17](https://github.com/rust-lang/rust/pull/115959)
|
||||
> - [LLVM 18](https://github.com/rust-lang/rust/pull/120055)
|
||||
> - [LLVM 19](https://github.com/rust-lang/rust/pull/127513)
|
||||
|
||||
Note that sometimes it's easiest to land [`llvm-wrapper`] compatibility as a PR
|
||||
before actually updating `src/llvm-project`.
|
||||
This way,
|
||||
while you're working through LLVM issues,
|
||||
others interested in trying out the new LLVM can benefit from work you've done
|
||||
to update the C++ bindings.
|
||||
|
||||
3. Over the next few months,
|
||||
LLVM will continually push commits to its `release/a.b` branch.
|
||||
We will often want to have those bug fixes as well.
|
||||
The merge process for that is to use `git merge` itself to merge LLVM's
|
||||
`release/a.b` branch with the branch created in step 2.
|
||||
This is typically
|
||||
done multiple times when necessary while LLVM's release branch is baking.
|
||||
|
||||
4. LLVM then announces the release of version `a.b`.
|
||||
|
||||
5. After LLVM's official release,
|
||||
we follow the process of creating a new branch on the
|
||||
rust-lang/llvm-project repository again,
|
||||
this time with a new date.
|
||||
It is only then that the PR to update Rust to use that version is merged.
|
||||
|
||||
The commit history of `rust-lang/llvm-project`
|
||||
should look much cleaner as a `git rebase` is done,
|
||||
where just a few Rust-specific commits are stacked on top of stock LLVM's release branch.
|
||||
|
||||
### Caveats and gotchas
|
||||
|
||||
Ideally the above instructions are pretty smooth, but here's some caveats to
|
||||
keep in mind while going through them:
|
||||
|
||||
* LLVM bugs are hard to find, don't hesitate to ask for help!
|
||||
Bisection is definitely your friend here
|
||||
(yes LLVM takes forever to build, yet bisection is still your friend).
|
||||
Note that you can make use of [Dev Desktops],
|
||||
which is an initiative to provide the contributors with remote access to powerful hardware.
|
||||
* If you've got general questions, [wg-llvm] can help you out.
|
||||
* Creating branches is a privileged operation on GitHub, so you'll need someone
|
||||
with write access to create the branches for you most likely.
|
||||
|
||||
|
||||
[rust-lang/llvm-project repository]: https://github.com/rust-lang/llvm-project
|
||||
[llvm/llvm-project repository]: https://github.com/llvm/llvm-project
|
||||
[`llvm-wrapper`]: https://github.com/rust-lang/rust/tree/master/compiler/rustc_llvm/llvm-wrapper
|
||||
[wg-llvm]: https://rust-lang.zulipchat.com/#narrow/stream/187780-t-compiler.2Fwg-llvm
|
||||
[Dev Desktops]: https://forge.rust-lang.org/infra/docs/dev-desktop.html
|
||||
[backport process]: https://llvm.org/docs/GitHub.html#backporting-fixes-to-the-release-branches
|
||||
59
src/doc/rustc-dev-guide/src/borrow_check.md
Normal file
59
src/doc/rustc-dev-guide/src/borrow_check.md
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# MIR borrow check
|
||||
|
||||
The borrow check is Rust's "secret sauce" – it is tasked with
|
||||
enforcing a number of properties:
|
||||
|
||||
- That all variables are initialized before they are used.
|
||||
- That you can't move the same value twice.
|
||||
- That you can't move a value while it is borrowed.
|
||||
- That you can't access a place while it is mutably borrowed (except through
|
||||
the reference).
|
||||
- That you can't mutate a place while it is immutably borrowed.
|
||||
- etc
|
||||
|
||||
The borrow checker operates on the MIR. An older implementation operated on the
|
||||
HIR. Doing borrow checking on MIR has several advantages:
|
||||
|
||||
- The MIR is *far* less complex than the HIR; the radical desugaring
|
||||
helps prevent bugs in the borrow checker. (If you're curious, you
|
||||
can see
|
||||
[a list of bugs that the MIR-based borrow checker fixes here][47366].)
|
||||
- Even more importantly, using the MIR enables ["non-lexical lifetimes"][nll],
|
||||
which are regions derived from the control-flow graph.
|
||||
|
||||
[47366]: https://github.com/rust-lang/rust/issues/47366
|
||||
[nll]: https://rust-lang.github.io/rfcs/2094-nll.html
|
||||
|
||||
### Major phases of the borrow checker
|
||||
|
||||
The borrow checker source is found in
|
||||
[the `rustc_borrowck` crate][b_c]. The main entry point is
|
||||
the [`mir_borrowck`] query.
|
||||
|
||||
[b_c]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/index.html
|
||||
[`mir_borrowck`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/fn.mir_borrowck.html
|
||||
|
||||
- We first create a **local copy** of the MIR. In the coming steps,
|
||||
we will modify this copy in place to modify the types and things to
|
||||
include references to the new regions that we are computing.
|
||||
- We then invoke [`replace_regions_in_mir`] to modify our local MIR.
|
||||
Among other things, this function will replace all of the [regions](./appendix/glossary.md#region)
|
||||
in the MIR with fresh [inference variables](./appendix/glossary.md#inf-var).
|
||||
- Next, we perform a number of
|
||||
[dataflow analyses](./appendix/background.md#dataflow) that
|
||||
compute what data is moved and when.
|
||||
- We then do a [second type check](borrow_check/type_check.md) across the MIR:
|
||||
the purpose of this type check is to determine all of the constraints between
|
||||
different regions.
|
||||
- Next, we do [region inference](borrow_check/region_inference.md), which computes
|
||||
the values of each region — basically, the points in the control-flow graph where
|
||||
each lifetime must be valid according to the constraints we collected.
|
||||
- At this point, we can compute the "borrows in scope" at each point.
|
||||
- Finally, we do a second walk over the MIR, looking at the actions it
|
||||
does and reporting errors. For example, if we see a statement like
|
||||
`*a + 1`, then we would check that the variable `a` is initialized
|
||||
and that it is not mutably borrowed, as either of those would
|
||||
require an error to be reported. Doing this check requires the results of all
|
||||
the previous analyses.
|
||||
|
||||
[`replace_regions_in_mir`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/nll/fn.replace_regions_in_mir.html
|
||||
156
src/doc/rustc-dev-guide/src/borrow_check/drop_check.md
Normal file
156
src/doc/rustc-dev-guide/src/borrow_check/drop_check.md
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
# Drop Check
|
||||
|
||||
We generally require the type of locals to be well-formed whenever the
|
||||
local is used. This includes proving the where-bounds of the local and
|
||||
also requires all regions used by it to be live.
|
||||
|
||||
The only exception to this is when implicitly dropping values when they
|
||||
go out of scope. This does not necessarily require the value to be live:
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let x = vec![];
|
||||
{
|
||||
let y = String::from("I am temporary");
|
||||
x.push(&y);
|
||||
}
|
||||
// `x` goes out of scope here, after the reference to `y`
|
||||
// is invalidated. This means that while dropping `x` its type
|
||||
// is not well-formed as it contain regions which are not live.
|
||||
}
|
||||
```
|
||||
|
||||
This is only sound if dropping the value does not try to access any dead
|
||||
region. We check this by requiring the type of the value to be
|
||||
drop-live.
|
||||
The requirements for which are computed in `fn dropck_outlives`.
|
||||
|
||||
The rest of this section uses the following type definition for a type
|
||||
which requires its region parameter to be live:
|
||||
|
||||
```rust
|
||||
struct PrintOnDrop<'a>(&'a str);
|
||||
impl<'a> Drop for PrintOnDrop<'_> {
|
||||
fn drop(&mut self) {
|
||||
println!("{}", self.0);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## How values are dropped
|
||||
|
||||
At its core, a value of type `T` is dropped by executing its "drop
|
||||
glue". Drop glue is compiler generated and first calls `<T as
|
||||
Drop>::drop` and then recursively calls the drop glue of any recursively
|
||||
owned values.
|
||||
|
||||
- If `T` has an explicit `Drop` impl, call `<T as Drop>::drop`.
|
||||
- Regardless of whether `T` implements `Drop`, recurse into all values
|
||||
*owned* by `T`:
|
||||
- references, raw pointers, function pointers, function items, trait
|
||||
objects[^traitobj], and scalars do not own anything.
|
||||
- tuples, slices, and arrays consider their elements to be owned.
|
||||
For arrays of length zero we do not own any value of the element
|
||||
type.
|
||||
- all fields (of all variants) of ADTs are considered owned. We
|
||||
consider all variants for enums. The exception here is
|
||||
`ManuallyDrop<U>` which is not considered to own `U`.
|
||||
`PhantomData<U>` also does not own anything.
|
||||
closures and generators own their captured upvars.
|
||||
|
||||
Whether a type has drop glue is returned by [`fn
|
||||
Ty::needs_drop`](https://github.com/rust-lang/rust/blob/320b412f9c55bf480d26276ff0ab480e4ecb29c0/compiler/rustc_middle/src/ty/util.rs#L1086-L1108).
|
||||
|
||||
### Partially dropping a local
|
||||
|
||||
For types which do not implement `Drop` themselves, we can also
|
||||
partially move parts of the value before dropping the rest. In this case
|
||||
only the drop glue for the not-yet moved values is called, e.g.
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let mut x = (PrintOnDrop("third"), PrintOnDrop("first"));
|
||||
drop(x.1);
|
||||
println!("second")
|
||||
}
|
||||
```
|
||||
|
||||
During MIR building we assume that a local may get dropped whenever it
|
||||
goes out of scope *as long as its type needs drop*. Computing the exact
|
||||
drop glue for a variable happens **after** borrowck in the
|
||||
`ElaborateDrops` pass. This means that even if some part of the local
|
||||
have been dropped previously, dropck still requires this value to be
|
||||
live. This is the case even if we completely moved a local.
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let mut x;
|
||||
{
|
||||
let temp = String::from("I am temporary");
|
||||
x = PrintOnDrop(&temp);
|
||||
drop(x);
|
||||
}
|
||||
} //~ ERROR `temp` does not live long enough.
|
||||
```
|
||||
|
||||
It should be possible to add some amount of drop elaboration before
|
||||
borrowck, allowing this example to compile. There is an unstable feature
|
||||
to move drop elaboration before const checking:
|
||||
[#73255](https://github.com/rust-lang/rust/issues/73255). Such a feature
|
||||
gate does not exist for doing some drop elaboration before borrowck,
|
||||
although there's a [relevant
|
||||
MCP](https://github.com/rust-lang/compiler-team/issues/558).
|
||||
|
||||
[^traitobj]: you can consider trait objects to have a builtin `Drop`
|
||||
implementation which directly uses the `drop_in_place` provided by the
|
||||
vtable. This `Drop` implementation requires all its generic parameters
|
||||
to be live.
|
||||
|
||||
### `dropck_outlives`
|
||||
|
||||
There are two distinct "liveness" computations that we perform:
|
||||
|
||||
* a value `v` is *use-live* at location `L` if it may be "used" later; a
|
||||
*use* here is basically anything that is not a *drop*
|
||||
* a value `v` is *drop-live* at location `L` if it maybe dropped later
|
||||
|
||||
When things are *use-live*, their entire type must be valid at `L`. When
|
||||
they are *drop-live*, all regions that are required by dropck must be
|
||||
valid at `L`. The values dropped in the MIR are *places*.
|
||||
|
||||
The constraints computed by `dropck_outlives` for a type closely match
|
||||
the generated drop glue for that type. Unlike drop glue,
|
||||
`dropck_outlives` cares about the types of owned values, not the values
|
||||
itself. For a value of type `T`
|
||||
|
||||
- if `T` has an explicit `Drop`, require all generic arguments to be
|
||||
live, unless they are marked with `#[may_dangle]` in which case they
|
||||
are fully ignored
|
||||
- regardless of whether `T` has an explicit `Drop`, recurse into all
|
||||
types *owned* by `T`
|
||||
- references, raw pointers, function pointers, function items, trait
|
||||
objects[^traitobj], and scalars do not own anything.
|
||||
- tuples, slices and arrays consider their element type to be owned.
|
||||
**For arrays we currently do not check whether their length is
|
||||
zero**.
|
||||
- all fields (of all variants) of ADTs are considered owned. The
|
||||
exception here is `ManuallyDrop<U>` which is not considered to own
|
||||
`U`. **We consider `PhantomData<U>` to own `U`**.
|
||||
- closures and generators own their captured upvars.
|
||||
|
||||
The sections marked in bold are cases where `dropck_outlives` considers
|
||||
types to be owned which are ignored by `Ty::needs_drop`. We only rely on
|
||||
`dropck_outlives` if `Ty::needs_drop` for the containing local returned
|
||||
`true`.This means liveness requirements can change depending on whether
|
||||
a type is contained in a larger local. **This is inconsistent, and
|
||||
should be fixed: an example [for
|
||||
arrays](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=8b5f5f005a03971b22edb1c20c5e6cbe)
|
||||
and [for
|
||||
`PhantomData`](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=44c6e2b1fae826329fd54c347603b6c8).**[^core]
|
||||
|
||||
One possible way these inconsistencies can be fixed is by MIR building
|
||||
to be more pessimistic, probably by making `Ty::needs_drop` weaker, or
|
||||
alternatively, changing `dropck_outlives` to be more precise, requiring
|
||||
fewer regions to be live.
|
||||
|
||||
[^core]: This is the core assumption of [#110288](https://github.com/rust-lang/rust/issues/110288) and [RFC 3417](https://github.com/rust-lang/rfcs/pull/3417).
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
# Tracking moves and initialization
|
||||
|
||||
Part of the borrow checker's job is to track which variables are
|
||||
"initialized" at any given point in time -- this also requires
|
||||
figuring out where moves occur and tracking those.
|
||||
|
||||
## Initialization and moves
|
||||
|
||||
From a user's perspective, initialization -- giving a variable some
|
||||
value -- and moves -- transferring ownership to another place -- might
|
||||
seem like distinct topics. Indeed, our borrow checker error messages
|
||||
often talk about them differently. But **within the borrow checker**,
|
||||
they are not nearly as separate. Roughly speaking, the borrow checker
|
||||
tracks the set of "initialized places" at any point in the source
|
||||
code. Assigning to a previously uninitialized local variable adds it
|
||||
to that set; moving from a local variable removes it from that set.
|
||||
|
||||
Consider this example:
|
||||
|
||||
```rust,ignore
|
||||
fn foo() {
|
||||
let a: Vec<u32>;
|
||||
|
||||
// a is not initialized yet
|
||||
|
||||
a = vec![22];
|
||||
|
||||
// a is initialized here
|
||||
|
||||
std::mem::drop(a); // a is moved here
|
||||
|
||||
// a is no longer initialized here
|
||||
|
||||
let l = a.len(); //~ ERROR
|
||||
}
|
||||
```
|
||||
|
||||
Here you can see that `a` starts off as uninitialized; once it is
|
||||
assigned, it becomes initialized. But when `drop(a)` is called, that
|
||||
moves `a` into the call, and hence it becomes uninitialized again.
|
||||
|
||||
## Subsections
|
||||
|
||||
To make it easier to peruse, this section is broken into a number of
|
||||
subsections:
|
||||
|
||||
- [Move paths](./moves_and_initialization/move_paths.html) the
|
||||
*move path* concept that we use to track which local variables (or parts of
|
||||
local variables, in some cases) are initialized.
|
||||
- TODO *Rest not yet written* =)
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
# Move paths
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
In reality, it's not enough to track initialization at the granularity
|
||||
of local variables. Rust also allows us to do moves and initialization
|
||||
at the field granularity:
|
||||
|
||||
```rust,ignore
|
||||
fn foo() {
|
||||
let a: (Vec<u32>, Vec<u32>) = (vec![22], vec![44]);
|
||||
|
||||
// a.0 and a.1 are both initialized
|
||||
|
||||
let b = a.0; // moves a.0
|
||||
|
||||
// a.0 is not initialized, but a.1 still is
|
||||
|
||||
let c = a.0; // ERROR
|
||||
let d = a.1; // OK
|
||||
}
|
||||
```
|
||||
|
||||
To handle this, we track initialization at the granularity of a **move
|
||||
path**. A [`MovePath`] represents some location that the user can
|
||||
initialize, move, etc. So e.g. there is a move-path representing the
|
||||
local variable `a`, and there is a move-path representing `a.0`. Move
|
||||
paths roughly correspond to the concept of a [`Place`] from MIR, but
|
||||
they are indexed in ways that enable us to do move analysis more
|
||||
efficiently.
|
||||
|
||||
[`MovePath`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MovePath.html
|
||||
[`Place`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.Place.html
|
||||
|
||||
## Move path indices
|
||||
|
||||
Although there is a [`MovePath`] data structure, they are never referenced
|
||||
directly. Instead, all the code passes around *indices* of type
|
||||
[`MovePathIndex`]. If you need to get information about a move path, you use
|
||||
this index with the [`move_paths` field of the `MoveData`][move_paths]. For
|
||||
example, to convert a [`MovePathIndex`] `mpi` into a MIR [`Place`], you might
|
||||
access the [`MovePath::place`] field like so:
|
||||
|
||||
```rust,ignore
|
||||
move_data.move_paths[mpi].place
|
||||
```
|
||||
|
||||
[move_paths]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MoveData.html#structfield.move_paths
|
||||
[`MovePath::place`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MovePath.html#structfield.place
|
||||
[`MovePathIndex`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MovePathIndex.html
|
||||
|
||||
## Building move paths
|
||||
|
||||
One of the first things we do in the MIR borrow check is to construct
|
||||
the set of move paths. This is done as part of the
|
||||
[`MoveData::gather_moves`] function. This function uses a MIR visitor
|
||||
called [`MoveDataBuilder`] to walk the MIR and look at how each [`Place`]
|
||||
within is accessed. For each such [`Place`], it constructs a
|
||||
corresponding [`MovePathIndex`]. It also records when/where that
|
||||
particular move path is moved/initialized, but we'll get to that in a
|
||||
later section.
|
||||
|
||||
[`MoveDataBuilder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/builder/struct.MoveDataBuilder.html
|
||||
[`MoveData::gather_moves`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MoveData.html#method.gather_moves
|
||||
|
||||
### Illegal move paths
|
||||
|
||||
We don't actually create a move-path for **every** [`Place`] that gets
|
||||
used. In particular, if it is illegal to move from a [`Place`], then
|
||||
there is no need for a [`MovePathIndex`]. Some examples:
|
||||
|
||||
- You cannot move from a static variable, so we do not create a [`MovePathIndex`]
|
||||
for static variables.
|
||||
- You cannot move an individual element of an array, so if we have e.g. `foo: [String; 3]`,
|
||||
there would be no move-path for `foo[1]`.
|
||||
- You cannot move from inside of a borrowed reference, so if we have e.g. `foo: &String`,
|
||||
there would be no move-path for `*foo`.
|
||||
|
||||
These rules are enforced by the [`move_path_for`] function, which
|
||||
converts a [`Place`] into a [`MovePathIndex`] -- in error cases like
|
||||
those just discussed, the function returns an `Err`. This in turn
|
||||
means we don't have to bother tracking whether those places are
|
||||
initialized (which lowers overhead).
|
||||
|
||||
[`move_path_for`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/builder/struct.MoveDataBuilder.html#method.move_path_for
|
||||
|
||||
## Looking up a move-path
|
||||
|
||||
If you have a [`Place`] and you would like to convert it to a [`MovePathIndex`], you
|
||||
can do that using the [`MovePathLookup`] structure found in the [`rev_lookup`] field
|
||||
of [`MoveData`]. There are two different methods:
|
||||
|
||||
[`MoveData`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MoveData.html
|
||||
[`MovePathLookup`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MovePathLookup.html
|
||||
[`rev_lookup`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MoveData.html#structfield.rev_lookup
|
||||
|
||||
- [`find_local`], which takes a [`mir::Local`] representing a local
|
||||
variable. This is the easier method, because we **always** create a
|
||||
[`MovePathIndex`] for every local variable.
|
||||
- [`find`], which takes an arbitrary [`Place`]. This method is a bit
|
||||
more annoying to use, precisely because we don't have a
|
||||
[`MovePathIndex`] for **every** [`Place`] (as we just discussed in
|
||||
the "illegal move paths" section). Therefore, [`find`] returns a
|
||||
[`LookupResult`] indicating the closest path it was able to find
|
||||
that exists (e.g., for `foo[1]`, it might return just the path for
|
||||
`foo`).
|
||||
|
||||
[`find`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MovePathLookup.html#method.find
|
||||
[`find_local`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MovePathLookup.html#method.find_local
|
||||
[`mir::Local`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.Local.html
|
||||
[`LookupResult`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/enum.LookupResult.html
|
||||
|
||||
## Cross-references
|
||||
|
||||
As we noted above, move-paths are stored in a big vector and
|
||||
referenced via their [`MovePathIndex`]. However, within this vector,
|
||||
they are also structured into a tree. So for example if you have the
|
||||
[`MovePathIndex`] for `a.b.c`, you can go to its parent move-path
|
||||
`a.b`. You can also iterate over all children paths: so, from `a.b`,
|
||||
you might iterate to find the path `a.b.c` (here you are iterating
|
||||
just over the paths that are **actually referenced** in the source,
|
||||
not all **possible** paths that could have been referenced). These
|
||||
references are used for example in the
|
||||
[`find_in_move_path_or_its_descendants`] function, which determines
|
||||
whether a move-path (e.g., `a.b`) or any child of that move-path
|
||||
(e.g.,`a.b.c`) matches a given predicate.
|
||||
|
||||
[`Place`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.Place.html
|
||||
[`find_in_move_path_or_its_descendants`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/move_paths/struct.MoveData.html#method.find_in_move_path_or_its_descendants
|
||||
|
|
@ -0,0 +1,264 @@
|
|||
# Opaque types region inference restrictions
|
||||
|
||||
In this chapter we discuss the various restrictions we impose on the generic arguments of
|
||||
opaque types when defining their hidden types
|
||||
`Opaque<'a, 'b, .., A, B, ..> := SomeHiddenType`.
|
||||
|
||||
These restrictions are implemented in borrow checking ([Source][source-borrowck-opaque])
|
||||
as it is the final step opaque types inference.
|
||||
|
||||
[source-borrowck-opaque]: https://github.com/rust-lang/rust/blob/435b5255148617128f0a9b17bacd3cc10e032b23/compiler/rustc_borrowck/src/region_infer/opaque_types.rs
|
||||
|
||||
## Background: type and const generic arguments
|
||||
For type arguments, two restrictions are necessary: each type argument must be
|
||||
(1) a type parameter and
|
||||
(2) is unique among the generic arguments.
|
||||
The same is applied to const arguments.
|
||||
|
||||
Example of case (1):
|
||||
```rust
|
||||
type Opaque<X> = impl Sized;
|
||||
|
||||
// `T` is a type parameter.
|
||||
// Opaque<T> := ();
|
||||
fn good<T>() -> Opaque<T> {}
|
||||
|
||||
// `()` is not a type parameter.
|
||||
// Opaque<()> := ();
|
||||
fn bad() -> Opaque<()> {} //~ ERROR
|
||||
```
|
||||
|
||||
Example of case (2):
|
||||
```rust
|
||||
type Opaque<X, Y> = impl Sized;
|
||||
|
||||
// `T` and `U` are unique in the generic args.
|
||||
// Opaque<T, U> := T;
|
||||
fn good<T, U>(t: T, _u: U) -> Opaque<T, U> { t }
|
||||
|
||||
// `T` appears twice in the generic args.
|
||||
// Opaque<T, T> := T;
|
||||
fn bad<T>(t: T) -> Opaque<T, T> { t } //~ ERROR
|
||||
```
|
||||
**Motivation:** In the first case `Opaque<()> := ()`, the hidden type is ambiguous because
|
||||
it is compatible with two different interpretaions: `Opaque<X> := X` and `Opaque<X> := ()`.
|
||||
Similarly for the second case `Opaque<T, T> := T`, it is ambiguous whether it should be
|
||||
interpreted as `Opaque<X, Y> := X` or as `Opaque<X, Y> := Y`.
|
||||
Because of this ambiguity, both cases are rejected as invalid defining uses.
|
||||
|
||||
## Uniqueness restriction
|
||||
|
||||
Each lifetime argument must be unique in the arguments list and must not be `'static`.
|
||||
This is in order to avoid an ambiguity with hidden type inference similar to the case of
|
||||
type parameters.
|
||||
For example, the invalid defining use below `Opaque<'static> := Inv<'static>` is compatible with
|
||||
both `Opaque<'x> := Inv<'static>` and `Opaque<'x> := Inv<'x>`.
|
||||
|
||||
```rust
|
||||
type Opaque<'x> = impl Sized + 'x;
|
||||
type Inv<'a> = Option<*mut &'a ()>;
|
||||
|
||||
fn good<'a>() -> Opaque<'a> { Inv::<'static>::None }
|
||||
|
||||
fn bad() -> Opaque<'static> { Inv::<'static>::None }
|
||||
//~^ ERROR
|
||||
```
|
||||
|
||||
```rust
|
||||
type Opaque<'x, 'y> = impl Trait<'x, 'y>;
|
||||
|
||||
fn good<'a, 'b>() -> Opaque<'a, 'b> {}
|
||||
|
||||
fn bad<'a>() -> Opaque<'a, 'a> {}
|
||||
//~^ ERROR
|
||||
```
|
||||
|
||||
**Semantic lifetime equality:**
|
||||
One complexity with lifetimes compared to type parameters is that
|
||||
two lifetimes that are syntactically different may be semantically equal.
|
||||
Therefore, we need to be cautious when verifying that the lifetimes are unique.
|
||||
|
||||
```rust
|
||||
// This is also invalid because `'a` is *semantically* equal to `'static`.
|
||||
fn still_bad_1<'a: 'static>() -> Opaque<'a> {}
|
||||
//~^ Should error!
|
||||
|
||||
// This is also invalid because `'a` and `'b` are *semantically* equal.
|
||||
fn still_bad_2<'a: 'b, 'b: 'a>() -> Opaque<'a, 'b> {}
|
||||
//~^ Should error!
|
||||
```
|
||||
|
||||
## An exception to uniqueness rule
|
||||
|
||||
An exception to the uniqueness rule above is when the bounds at the opaque type's definition require
|
||||
a lifetime parameter to be equal to another one or to the `'static` lifetime.
|
||||
```rust
|
||||
// The definition requires `'x` to be equal to `'static`.
|
||||
type Opaque<'x: 'static> = impl Sized + 'x;
|
||||
|
||||
fn good() -> Opaque<'static> {}
|
||||
```
|
||||
|
||||
**Motivation:** an attempt to implement the uniqueness restriction for RPITs resulted in a
|
||||
[breakage found by crater]( https://github.com/rust-lang/rust/pull/112842#issuecomment-1610057887).
|
||||
This can be mitigated by this exception to the rule.
|
||||
An example of the code that would otherwise break:
|
||||
```rust
|
||||
struct Type<'a>(&'a ());
|
||||
impl<'a> Type<'a> {
|
||||
// `'b == 'a`
|
||||
fn do_stuff<'b: 'a>(&'b self) -> impl Trait<'a, 'b> {}
|
||||
}
|
||||
```
|
||||
|
||||
**Why this is correct:** for such a defining use like `Opaque<'a, 'a> := &'a str`,
|
||||
it can be interpreted in either way—either as `Opaque<'x, 'y> := &'x str` or as
|
||||
`Opaque<'x, 'y> := &'y str` and it wouldn't matter because every use of `Opaque`
|
||||
will guarantee that both parameters are equal as per the well-formedness rules.
|
||||
|
||||
## Universal lifetimes restriction
|
||||
|
||||
Only universally quantified lifetimes are allowed in the opaque type arguments.
|
||||
This includes lifetime parameters and placeholders.
|
||||
|
||||
```rust
|
||||
type Opaque<'x> = impl Sized + 'x;
|
||||
|
||||
fn test<'a>() -> Opaque<'a> {
|
||||
// `Opaque<'empty> := ()`
|
||||
let _: Opaque<'_> = ();
|
||||
//~^ ERROR
|
||||
}
|
||||
```
|
||||
|
||||
**Motivation:**
|
||||
This makes the lifetime and type arguments behave consistently but this is only as a bonus.
|
||||
The real reason behind this restriction is purely technical, as the [member constraints] algorithm
|
||||
faces a fundamental limitation:
|
||||
When encountering an opaque type definition `Opaque<'?1> := &'?2 u8`,
|
||||
a member constraint `'?2 member-of ['static, '?1]` is registered.
|
||||
In order for the algorithm to pick the right choice, the *complete* set of "outlives" relationships
|
||||
between the choice regions `['static, '?1]` must already be known *before* doing the region
|
||||
inference. This can be satisfied only if each choice region is either:
|
||||
1. a universal region, i.e. `RegionKind::Re{EarlyParam,LateParam,Placeholder,Static}`,
|
||||
because the relations between universal regions are completely known, prior to region inference,
|
||||
from the explicit and implied bounds.
|
||||
1. or an existential region that is "strictly equal" to a universal region.
|
||||
Strict lifetime equality is defined below and is required here because it is the only type of
|
||||
equality that can be evaluated prior to full region inference.
|
||||
|
||||
**Strict lifetime equality:**
|
||||
We say that two lifetimes are strictly equal if there are bidirectional outlives constraints
|
||||
between them. In NLL terms, this means the lifetimes are part of the same [SCC].
|
||||
Importantly this type of equality can be evaluated prior to full region inference
|
||||
(but of course after constraint collection).
|
||||
The other type of equality is when region inference ends up giving two lifetimes variables
|
||||
the same value even if they are not strictly equal.
|
||||
See [#113971] for how we used to conflate the difference.
|
||||
|
||||
[#113971]: https://github.com/rust-lang/rust/issues/113971
|
||||
[SCC]: https://en.wikipedia.org/wiki/Strongly_connected_component
|
||||
[member constraints]: ./region_inference/member_constraints.md
|
||||
|
||||
**interaction with "once modulo regions" restriction**
|
||||
In the example above, note the opaque type in the signature is `Opaque<'a>` and the one in the
|
||||
invalid defining use is `Opaque<'empty>`.
|
||||
In the proposed MiniTAIT plan, namely the ["once modulo regions"][#116935] rule,
|
||||
we already disallow this.
|
||||
Although it might appear that "universal lifetimes" restriction becomes redundant as it logically
|
||||
follows from "MiniTAIT" restrictions, the subsequent related discussion on lifetime equality and
|
||||
closures remains relevant.
|
||||
|
||||
[#116935]: https://github.com/rust-lang/rust/pull/116935
|
||||
|
||||
|
||||
## Closure restrictions
|
||||
|
||||
When the opaque type is defined in a closure/coroutine/inline-const body, universal lifetimes that
|
||||
are "external" to the closure are not allowed in the opaque type arguments.
|
||||
External regions are defined in [`RegionClassification::External`][source-external-region]
|
||||
|
||||
[source-external-region]: https://github.com/rust-lang/rust/blob/caf730043232affb6b10d1393895998cb4968520/compiler/rustc_borrowck/src/universal_regions.rs#L201.
|
||||
|
||||
Example: (This one happens to compile in the current nightly but more practical examples are
|
||||
already rejected with confusing errors.)
|
||||
```rust
|
||||
type Opaque<'x> = impl Sized + 'x;
|
||||
|
||||
fn test<'a>() -> Opaque<'a> {
|
||||
let _ = || {
|
||||
// `'a` is external to the closure
|
||||
let _: Opaque<'a> = ();
|
||||
//~^ Should be an error!
|
||||
};
|
||||
()
|
||||
}
|
||||
```
|
||||
|
||||
**Motivation:**
|
||||
In closure bodies, external lifetimes, although being categorized as "universal" lifetimes,
|
||||
behave more like existential lifetimes in that the relations between them are not known ahead of
|
||||
time, instead their values are inferred just like existential lifetimes and the requirements are
|
||||
propagated back to the parent fn. This breaks the member constraints algorithm as described above:
|
||||
> In order for the algorithm to pick the right choice, the complete set of “outlives” relationships
|
||||
between the choice regions `['static, '?1]` must already be known before doing the region inference
|
||||
|
||||
Here is an example that details how :
|
||||
|
||||
```rust
|
||||
type Opaque<'x, 'y> = impl Sized;
|
||||
|
||||
//
|
||||
fn test<'a, 'b>(s: &'a str) -> impl FnOnce() -> Opaque<'a, 'b> {
|
||||
move || { s }
|
||||
//~^ ERROR hidden type for `Opaque<'_, '_>` captures lifetime that does not appear in bounds
|
||||
}
|
||||
|
||||
// The above closure body is desugared into something like:
|
||||
fn test::{closure#0}(_upvar: &'?8 str) -> Opaque<'?6, '?7> {
|
||||
return _upvar
|
||||
}
|
||||
|
||||
// where `['?8, '?6, ?7]` are universal lifetimes *external* to the closure.
|
||||
// There are no known relations between them *inside* the closure.
|
||||
// But in the parent fn it is known that `'?6: '?8`.
|
||||
//
|
||||
// When encountering an opaque definition `Opaque<'?6, '?7> := &'8 str`,
|
||||
// The member constraints algorithm does not know enough to safely make `?8 = '?6`.
|
||||
// For this reason, it errors with a sensible message:
|
||||
// "hidden type captures lifetime that does not appear in bounds".
|
||||
```
|
||||
|
||||
Without these restrictions, error messages are confusing and, more importantly, there is a risk that
|
||||
we accept code that would likely break in the future because member constraints are super broken
|
||||
in closures.
|
||||
|
||||
**Output types:**
|
||||
I believe the most common scenario where this causes issues in real-world code is with
|
||||
closure/async-block output types. It is worth noting that there is a discrepancy between closures
|
||||
and async blocks that further demonstrates this issue and is attributed to the
|
||||
[hack of `replace_opaque_types_with_inference_vars`][source-replace-opaques],
|
||||
which is applied to futures only.
|
||||
|
||||
[source-replace-opaques]: https://github.com/rust-lang/rust/blob/9cf18e98f82d85fa41141391d54485b8747da46f/compiler/rustc_hir_typeck/src/closure.rs#L743
|
||||
|
||||
```rust
|
||||
type Opaque<'x> = impl Sized + 'x;
|
||||
fn test<'a>() -> impl FnOnce() -> Opaque<'a> {
|
||||
// Output type of the closure is Opaque<'a>
|
||||
// -> hidden type definition happens *inside* the closure
|
||||
// -> rejected.
|
||||
move || {}
|
||||
//~^ ERROR expected generic lifetime parameter, found `'_`
|
||||
}
|
||||
```
|
||||
```rust
|
||||
use std::future::Future;
|
||||
type Opaque<'x> = impl Sized + 'x;
|
||||
fn test<'a>() -> impl Future<Output = Opaque<'a>> {
|
||||
// Output type of the async block is unit `()`
|
||||
// -> hidden type definition happens in the parent fn
|
||||
// -> accepted.
|
||||
async move {}
|
||||
}
|
||||
```
|
||||
236
src/doc/rustc-dev-guide/src/borrow_check/region_inference.md
Normal file
236
src/doc/rustc-dev-guide/src/borrow_check/region_inference.md
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
# Region inference (NLL)
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
The MIR-based region checking code is located in [the `rustc_mir::borrow_check`
|
||||
module][nll].
|
||||
|
||||
[nll]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/index.html
|
||||
|
||||
The MIR-based region analysis consists of two major functions:
|
||||
|
||||
- [`replace_regions_in_mir`], invoked first, has two jobs:
|
||||
- First, it finds the set of regions that appear within the
|
||||
signature of the function (e.g., `'a` in `fn foo<'a>(&'a u32) {
|
||||
... }`). These are called the "universal" or "free" regions – in
|
||||
particular, they are the regions that [appear free][fvb] in the
|
||||
function body.
|
||||
- Second, it replaces all the regions from the function body with
|
||||
fresh inference variables. This is because (presently) those
|
||||
regions are the results of lexical region inference and hence are
|
||||
not of much interest. The intention is that – eventually – they
|
||||
will be "erased regions" (i.e., no information at all), since we
|
||||
won't be doing lexical region inference at all.
|
||||
- [`compute_regions`], invoked second: this is given as argument the
|
||||
results of move analysis. It has the job of computing values for all
|
||||
the inference variables that `replace_regions_in_mir` introduced.
|
||||
- To do that, it first runs the [MIR type checker]. This is
|
||||
basically a normal type-checker but specialized to MIR, which is
|
||||
much simpler than full Rust, of course. Running the MIR type
|
||||
checker will however create various [constraints][cp] between region
|
||||
variables, indicating their potential values and relationships to
|
||||
one another.
|
||||
- After this, we perform [constraint propagation][cp] by creating a
|
||||
[`RegionInferenceContext`] and invoking its [`solve`]
|
||||
method.
|
||||
- The [NLL RFC] also includes fairly thorough (and hopefully readable)
|
||||
coverage.
|
||||
|
||||
[cp]: ./region_inference/constraint_propagation.md
|
||||
[fvb]: ../appendix/background.md#free-vs-bound
|
||||
[`replace_regions_in_mir`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/nll/fn.replace_regions_in_mir.html
|
||||
[`compute_regions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/nll/fn.compute_regions.html
|
||||
[`RegionInferenceContext`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html
|
||||
[`solve`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.solve
|
||||
[NLL RFC]: https://rust-lang.github.io/rfcs/2094-nll.html
|
||||
[MIR type checker]: ./type_check.md
|
||||
|
||||
## Universal regions
|
||||
|
||||
The [`UniversalRegions`] type represents a collection of _universal_ regions
|
||||
corresponding to some MIR `DefId`. It is constructed in
|
||||
[`replace_regions_in_mir`] when we replace all regions with fresh inference
|
||||
variables. [`UniversalRegions`] contains indices for all the free regions in
|
||||
the given MIR along with any relationships that are _known_ to hold between
|
||||
them (e.g. implied bounds, where clauses, etc.).
|
||||
|
||||
For example, given the MIR for the following function:
|
||||
|
||||
```rust
|
||||
fn foo<'a>(x: &'a u32) {
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
we would create a universal region for `'a` and one for `'static`. There may
|
||||
also be some complications for handling closures, but we will ignore those for
|
||||
the moment.
|
||||
|
||||
TODO: write about _how_ these regions are computed.
|
||||
|
||||
[`UniversalRegions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/universal_regions/struct.UniversalRegions.html
|
||||
|
||||
<a id="region-variables"></a>
|
||||
|
||||
## Region variables
|
||||
|
||||
The value of a region can be thought of as a **set**. This set contains all
|
||||
points in the MIR where the region is valid along with any regions that are
|
||||
outlived by this region (e.g. if `'a: 'b`, then `end('b)` is in the set for
|
||||
`'a`); we call the domain of this set a `RegionElement`. In the code, the value
|
||||
for all regions is maintained in [the `rustc_borrowck::region_infer` module][ri].
|
||||
For each region we maintain a set storing what elements are present in its value (to make this
|
||||
efficient, we give each kind of element an index, the `RegionElementIndex`, and
|
||||
use sparse bitsets).
|
||||
|
||||
[ri]: https://github.com/rust-lang/rust/tree/master/compiler/rustc_borrowck/src/region_infer
|
||||
|
||||
The kinds of region elements are as follows:
|
||||
|
||||
- Each **[`location`]** in the MIR control-flow graph: a location is just
|
||||
the pair of a basic block and an index. This identifies the point
|
||||
**on entry** to the statement with that index (or the terminator, if
|
||||
the index is equal to `statements.len()`).
|
||||
- There is an element `end('a)` for each universal region `'a`,
|
||||
corresponding to some portion of the caller's (or caller's caller,
|
||||
etc) control-flow graph.
|
||||
- Similarly, there is an element denoted `end('static)` corresponding
|
||||
to the remainder of program execution after this function returns.
|
||||
- There is an element `!1` for each placeholder region `!1`. This
|
||||
corresponds (intuitively) to some unknown set of other elements –
|
||||
for details on placeholders, see the section
|
||||
[placeholders and universes](region_inference/placeholders_and_universes.md).
|
||||
|
||||
## Constraints
|
||||
|
||||
Before we can infer the value of regions, we need to collect
|
||||
constraints on the regions. The full set of constraints is described
|
||||
in [the section on constraint propagation][cp], but the two most
|
||||
common sorts of constraints are:
|
||||
|
||||
1. Outlives constraints. These are constraints that one region outlives another
|
||||
(e.g. `'a: 'b`). Outlives constraints are generated by the [MIR type
|
||||
checker].
|
||||
2. Liveness constraints. Each region needs to be live at points where it can be
|
||||
used.
|
||||
|
||||
## Inference Overview
|
||||
|
||||
So how do we compute the contents of a region? This process is called _region
|
||||
inference_. The high-level idea is pretty simple, but there are some details we
|
||||
need to take care of.
|
||||
|
||||
Here is the high-level idea: we start off each region with the MIR locations we
|
||||
know must be in it from the liveness constraints. From there, we use all of the
|
||||
outlives constraints computed from the type checker to _propagate_ the
|
||||
constraints: for each region `'a`, if `'a: 'b`, then we add all elements of
|
||||
`'b` to `'a`, including `end('b)`. This all happens in
|
||||
[`propagate_constraints`].
|
||||
|
||||
Then, we will check for errors. We first check that type tests are satisfied by
|
||||
calling [`check_type_tests`]. This checks constraints like `T: 'a`. Second, we
|
||||
check that universal regions are not "too big". This is done by calling
|
||||
[`check_universal_regions`]. This checks that for each region `'a` if `'a`
|
||||
contains the element `end('b)`, then we must already know that `'a: 'b` holds
|
||||
(e.g. from a where clause). If we don't already know this, that is an error...
|
||||
well, almost. There is some special handling for closures that we will discuss
|
||||
later.
|
||||
|
||||
### Example
|
||||
|
||||
Consider the following example:
|
||||
|
||||
```rust,ignore
|
||||
fn foo<'a, 'b>(x: &'a usize) -> &'b usize {
|
||||
x
|
||||
}
|
||||
```
|
||||
|
||||
Clearly, this should not compile because we don't know if `'a` outlives `'b`
|
||||
(if it doesn't then the return value could be a dangling reference).
|
||||
|
||||
Let's back up a bit. We need to introduce some free inference variables (as is
|
||||
done in [`replace_regions_in_mir`]). This example doesn't use the exact regions
|
||||
produced, but it (hopefully) is enough to get the idea across.
|
||||
|
||||
```rust,ignore
|
||||
fn foo<'a, 'b>(x: &'a /* '#1 */ usize) -> &'b /* '#3 */ usize {
|
||||
x // '#2, location L1
|
||||
}
|
||||
```
|
||||
|
||||
Some notation: `'#1`, `'#3`, and `'#2` represent the universal regions for the
|
||||
argument, return value, and the expression `x`, respectively. Additionally, I
|
||||
will call the location of the expression `x` `L1`.
|
||||
|
||||
So now we can use the liveness constraints to get the following starting points:
|
||||
|
||||
Region | Contents
|
||||
--------|----------
|
||||
'#1 |
|
||||
'#2 | `L1`
|
||||
'#3 | `L1`
|
||||
|
||||
Now we use the outlives constraints to expand each region. Specifically, we
|
||||
know that `'#2: '#3` ...
|
||||
|
||||
Region | Contents
|
||||
--------|----------
|
||||
'#1 | `L1`
|
||||
'#2 | `L1, end('#3) // add contents of '#3 and end('#3)`
|
||||
'#3 | `L1`
|
||||
|
||||
... and `'#1: '#2`, so ...
|
||||
|
||||
Region | Contents
|
||||
--------|----------
|
||||
'#1 | `L1, end('#2), end('#3) // add contents of '#2 and end('#2)`
|
||||
'#2 | `L1, end('#3)`
|
||||
'#3 | `L1`
|
||||
|
||||
Now, we need to check that no regions were too big (we don't have any type
|
||||
tests to check in this case). Notice that `'#1` now contains `end('#3)`, but
|
||||
we have no `where` clause or implied bound to say that `'a: 'b`... that's an
|
||||
error!
|
||||
|
||||
### Some details
|
||||
|
||||
The [`RegionInferenceContext`] type contains all of the information needed to
|
||||
do inference, including the universal regions from [`replace_regions_in_mir`] and
|
||||
the constraints computed for each region. It is constructed just after we
|
||||
compute the liveness constraints.
|
||||
|
||||
Here are some of the fields of the struct:
|
||||
|
||||
- [`constraints`]: contains all the outlives constraints.
|
||||
- [`liveness_constraints`]: contains all the liveness constraints.
|
||||
- [`universal_regions`]: contains the `UniversalRegions` returned by
|
||||
[`replace_regions_in_mir`].
|
||||
- [`universal_region_relations`]: contains relations known to be true about
|
||||
universal regions. For example, if we have a where clause that `'a: 'b`, that
|
||||
relation is assumed to be true while borrow checking the implementation (it
|
||||
is checked at the caller), so `universal_region_relations` would contain `'a:
|
||||
'b`.
|
||||
- [`type_tests`]: contains some constraints on types that we must check after
|
||||
inference (e.g. `T: 'a`).
|
||||
- [`closure_bounds_mapping`]: used for propagating region constraints from
|
||||
closures back out to the creator of the closure.
|
||||
|
||||
[`constraints`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.constraints
|
||||
[`liveness_constraints`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.liveness_constraints
|
||||
[`location`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.Location.html
|
||||
[`universal_regions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.universal_regions
|
||||
[`universal_region_relations`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.universal_region_relations
|
||||
[`type_tests`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.type_tests
|
||||
[`closure_bounds_mapping`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.closure_bounds_mapping
|
||||
|
||||
TODO: should we discuss any of the others fields? What about the SCCs?
|
||||
|
||||
Ok, now that we have constructed a `RegionInferenceContext`, we can do
|
||||
inference. This is done by calling the [`solve`] method on the context. This
|
||||
is where we call [`propagate_constraints`] and then check the resulting type
|
||||
tests and universal regions, as discussed above.
|
||||
|
||||
[`propagate_constraints`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.propagate_constraints
|
||||
[`check_type_tests`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.check_type_tests
|
||||
[`check_universal_regions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.check_universal_regions
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# Propagating closure constraints
|
||||
|
||||
When we are checking the type tests and universal regions, we may come
|
||||
across a constraint that we can't prove yet if we are in a closure
|
||||
body! However, the necessary constraints may actually hold (we just
|
||||
don't know it yet). Thus, if we are inside a closure, we just collect
|
||||
all the constraints we can't prove yet and return them. Later, when we
|
||||
are borrow check the MIR node that created the closure, we can also
|
||||
check that these constraints hold. At that time, if we can't prove
|
||||
they hold, we report an error.
|
||||
|
||||
## How this is implemented
|
||||
|
||||
While borrow-checking a closure inside of `RegionInferenceContext::solve` we separately try to propagate type-outlives and region-outlives constraints to the parent if we're unable to prove them locally.
|
||||
|
||||
### Region-outlive constraints
|
||||
|
||||
If `RegionInferenceContext::check_universal_regions` fails to prove some outlives constraint `'longer_fr: 'shorter_fr`, we try to propagate it in `fn try_propagate_universal_region_error`. Both these universal regions are either local to the closure or an external region.
|
||||
|
||||
In case `'longer_fr` is a local universal region, we search for the largest external region `'fr_minus` which is outlived by `'longer_fr`, i.e. `'longer_fr: 'fr_minus`. In case there are multiple such regions, we pick the `mutual_immediate_postdominator`: the fixpoint of repeatedly computing the GLB of all GLBs, see [TransitiveRelation::postdom_upper_bound](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/transitive_relation/struct.TransitiveRelation.html#method.postdom_upper_bound) for more details.
|
||||
|
||||
If `'fr_minus` exists we require it to outlive all non-local upper bounds of `'shorter_fr`. There will always be at least one non-local upper bound `'static`.
|
||||
|
||||
### Type-outlive constraints
|
||||
|
||||
Type-outlives constraints are proven in `check_type_tests`. This happens after computing the outlives graph, which is now immutable.
|
||||
|
||||
For all type tests we fail to prove via `fn eval_verify_bound` inside of the closure we call `try_promote_type_test`. A `TypeTest` represents a type-outlives bound `generic_kind: lower_bound` together with a `verify_bound`. If the `VerifyBound` holds for the `lower_bound`, the constraint is satisfied. `try_promote_type_test` does not care about the ` verify_bound`.
|
||||
|
||||
It starts by calling `fn try_promote_type_test_subject`. This function takes the `GenericKind` and tries to transform it to a `ClosureOutlivesSubject` which is no longer references anything local to the closure. This is done by replacing all free regions in that type with either `'static` or region parameters which are equal to that free region. This operation fails if the `generic_kind` contains a region which cannot be replaced.
|
||||
|
||||
We then promote the `lower_bound` into the context of the caller. If the lower bound is equal to a placeholder, we replace it with `'static`
|
||||
|
||||
We then look at all universal regions `uv` which are required to outlive `lower_bound`, i.e. for which borrow checking adding region constraints. For each of these we then emit a `ClosureOutlivesRequirement` for non-local universal regions which are known to outlive `uv`.
|
||||
|
||||
As we've already built the region graph of the closure at this point and emitted errors if that one is inconsistent, we are also able to assume that the outlive constraints `uv: lower_bound` hold.
|
||||
|
||||
So if we have a type-outlives bounds we can't prove, e.g. `T: 'local_infer`, we use the region graph to go to universal variables `'a` with `'a: local_infer`. In case `'a` are local, we then use the assumed outlived constraints to go to non-local ones.
|
||||
|
||||
We then store the list of promoted type tests in the `BorrowCheckResults`.
|
||||
We then apply them in while borrow-checking its parent in `TypeChecker::prove_closure_bounds`.
|
||||
|
||||
TODO: explain how exactly that works :3
|
||||
|
|
@ -0,0 +1,224 @@
|
|||
# Constraint propagation
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
The main work of the region inference is **constraint propagation**,
|
||||
which is done in the [`propagate_constraints`] function. There are
|
||||
three sorts of constraints that are used in NLL, and we'll explain how
|
||||
`propagate_constraints` works by "layering" those sorts of constraints
|
||||
on one at a time (each of them is fairly independent from the others):
|
||||
|
||||
- liveness constraints (`R live at E`), which arise from liveness;
|
||||
- outlives constraints (`R1: R2`), which arise from subtyping;
|
||||
- [member constraints][m_c] (`member R_m of [R_c...]`), which arise from impl Trait.
|
||||
|
||||
[`propagate_constraints`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.propagate_constraints
|
||||
[m_c]: ./member_constraints.md
|
||||
|
||||
In this chapter, we'll explain the "heart" of constraint propagation,
|
||||
covering both liveness and outlives constraints.
|
||||
|
||||
## Notation and high-level concepts
|
||||
|
||||
Conceptually, region inference is a "fixed-point" computation. It is
|
||||
given some set of constraints `{C}` and it computes a set of values
|
||||
`Values: R -> {E}` that maps each region `R` to a set of elements
|
||||
`{E}` (see [here][riv] for more notes on region elements):
|
||||
|
||||
- Initially, each region is mapped to an empty set, so `Values(R) =
|
||||
{}` for all regions `R`.
|
||||
- Next, we process the constraints repeatedly until a fixed-point is reached:
|
||||
- For each constraint C:
|
||||
- Update `Values` as needed to satisfy the constraint
|
||||
|
||||
[riv]: ../region_inference.md#region-variables
|
||||
|
||||
As a simple example, if we have a liveness constraint `R live at E`,
|
||||
then we can apply `Values(R) = Values(R) union {E}` to make the
|
||||
constraint be satisfied. Similarly, if we have an outlives constraints
|
||||
`R1: R2`, we can apply `Values(R1) = Values(R1) union Values(R2)`.
|
||||
(Member constraints are more complex and we discuss them [in this section][m_c].)
|
||||
|
||||
In practice, however, we are a bit more clever. Instead of applying
|
||||
the constraints in a loop, we can analyze the constraints and figure
|
||||
out the correct order to apply them, so that we only have to apply
|
||||
each constraint once in order to find the final result.
|
||||
|
||||
Similarly, in the implementation, the `Values` set is stored in the
|
||||
`scc_values` field, but they are indexed not by a *region* but by a
|
||||
*strongly connected component* (SCC). SCCs are an optimization that
|
||||
avoids a lot of redundant storage and computation. They are explained
|
||||
in the section on outlives constraints.
|
||||
|
||||
## Liveness constraints
|
||||
|
||||
A **liveness constraint** arises when some variable whose type
|
||||
includes a region R is live at some [point] P. This simply means that
|
||||
the value of R must include the point P. Liveness constraints are
|
||||
computed by the MIR type checker.
|
||||
|
||||
[point]: ../../appendix/glossary.md#point
|
||||
|
||||
A liveness constraint `R live at E` is satisfied if `E` is a member of
|
||||
`Values(R)`. So to "apply" such a constraint to `Values`, we just have
|
||||
to compute `Values(R) = Values(R) union {E}`.
|
||||
|
||||
The liveness values are computed in the type-check and passed to the
|
||||
region inference upon creation in the `liveness_constraints` argument.
|
||||
These are not represented as individual constraints like `R live at E`
|
||||
though; instead, we store a (sparse) bitset per region variable (of
|
||||
type [`LivenessValues`]). This way we only need a single bit for each
|
||||
liveness constraint.
|
||||
|
||||
[`liveness_constraints`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.liveness_constraints
|
||||
[`LivenessValues`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/values/struct.LivenessValues.html
|
||||
|
||||
One thing that is worth mentioning: All lifetime parameters are always
|
||||
considered to be live over the entire function body. This is because
|
||||
they correspond to some portion of the *caller's* execution, and that
|
||||
execution clearly includes the time spent in this function, since the
|
||||
caller is waiting for us to return.
|
||||
|
||||
## Outlives constraints
|
||||
|
||||
An outlives constraint `'a: 'b` indicates that the value of `'a` must
|
||||
be a **superset** of the value of `'b`. That is, an outlives
|
||||
constraint `R1: R2` is satisfied if `Values(R1)` is a superset of
|
||||
`Values(R2)`. So to "apply" such a constraint to `Values`, we just
|
||||
have to compute `Values(R1) = Values(R1) union Values(R2)`.
|
||||
|
||||
One observation that follows from this is that if you have `R1: R2`
|
||||
and `R2: R1`, then `R1 = R2` must be true. Similarly, if you have:
|
||||
|
||||
```txt
|
||||
R1: R2
|
||||
R2: R3
|
||||
R3: R4
|
||||
R4: R1
|
||||
```
|
||||
|
||||
then `R1 = R2 = R3 = R4` follows. We take advantage of this to make things
|
||||
much faster, as described shortly.
|
||||
|
||||
In the code, the set of outlives constraints is given to the region
|
||||
inference context on creation in a parameter of type
|
||||
[`OutlivesConstraintSet`]. The constraint set is basically just a list of `'a:
|
||||
'b` constraints.
|
||||
|
||||
### The outlives constraint graph and SCCs
|
||||
|
||||
In order to work more efficiently with outlives constraints, they are
|
||||
[converted into the form of a graph][graph-fn], where the nodes of the
|
||||
graph are region variables (`'a`, `'b`) and each constraint `'a: 'b`
|
||||
induces an edge `'a -> 'b`. This conversion happens in the
|
||||
[`RegionInferenceContext::new`] function that creates the inference
|
||||
context.
|
||||
|
||||
[`OutlivesConstraintSet`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/constraints/struct.OutlivesConstraintSet.html
|
||||
[graph-fn]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/constraints/struct.OutlivesConstraintSet.html#method.graph
|
||||
[`RegionInferenceContext::new`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.new
|
||||
|
||||
When using a graph representation, we can detect regions that must be equal
|
||||
by looking for cycles. That is, if you have a constraint like
|
||||
|
||||
```txt
|
||||
'a: 'b
|
||||
'b: 'c
|
||||
'c: 'd
|
||||
'd: 'a
|
||||
```
|
||||
|
||||
then this will correspond to a cycle in the graph containing the
|
||||
elements `'a...'d`.
|
||||
|
||||
Therefore, one of the first things that we do in propagating region
|
||||
values is to compute the **strongly connected components** (SCCs) in
|
||||
the constraint graph. The result is stored in the [`constraint_sccs`]
|
||||
field. You can then easily find the SCC that a region `r` is a part of
|
||||
by invoking `constraint_sccs.scc(r)`.
|
||||
|
||||
Working in terms of SCCs allows us to be more efficient: if we have a
|
||||
set of regions `'a...'d` that are part of a single SCC, we don't have
|
||||
to compute/store their values separately. We can just store one value
|
||||
**for the SCC**, since they must all be equal.
|
||||
|
||||
If you look over the region inference code, you will see that a number
|
||||
of fields are defined in terms of SCCs. For example, the
|
||||
[`scc_values`] field stores the values of each SCC. To get the value
|
||||
of a specific region `'a` then, we first figure out the SCC that the
|
||||
region is a part of, and then find the value of that SCC.
|
||||
|
||||
[`constraint_sccs`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.constraint_sccs
|
||||
[`scc_values`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#structfield.scc_values
|
||||
|
||||
When we compute SCCs, we not only figure out which regions are a
|
||||
member of each SCC, we also figure out the edges between them. So for example
|
||||
consider this set of outlives constraints:
|
||||
|
||||
```txt
|
||||
'a: 'b
|
||||
'b: 'a
|
||||
|
||||
'a: 'c
|
||||
|
||||
'c: 'd
|
||||
'd: 'c
|
||||
```
|
||||
|
||||
Here we have two SCCs: S0 contains `'a` and `'b`, and S1 contains `'c`
|
||||
and `'d`. But these SCCs are not independent: because `'a: 'c`, that
|
||||
means that `S0: S1` as well. That is -- the value of `S0` must be a
|
||||
superset of the value of `S1`. One crucial thing is that this graph of
|
||||
SCCs is always a DAG -- that is, it never has cycles. This is because
|
||||
all the cycles have been removed to form the SCCs themselves.
|
||||
|
||||
### Applying liveness constraints to SCCs
|
||||
|
||||
The liveness constraints that come in from the type-checker are
|
||||
expressed in terms of regions -- that is, we have a map like
|
||||
`Liveness: R -> {E}`. But we want our final result to be expressed
|
||||
in terms of SCCs -- we can integrate these liveness constraints very
|
||||
easily just by taking the union:
|
||||
|
||||
```txt
|
||||
for each region R:
|
||||
let S be the SCC that contains R
|
||||
Values(S) = Values(S) union Liveness(R)
|
||||
```
|
||||
|
||||
In the region inferencer, this step is done in [`RegionInferenceContext::new`].
|
||||
|
||||
### Applying outlives constraints
|
||||
|
||||
Once we have computed the DAG of SCCs, we use that to structure out
|
||||
entire computation. If we have an edge `S1 -> S2` between two SCCs,
|
||||
that means that `Values(S1) >= Values(S2)` must hold. So, to compute
|
||||
the value of `S1`, we first compute the values of each successor `S2`.
|
||||
Then we simply union all of those values together. To use a
|
||||
quasi-iterator-like notation:
|
||||
|
||||
```txt
|
||||
Values(S1) =
|
||||
s1.successors()
|
||||
.map(|s2| Values(s2))
|
||||
.union()
|
||||
```
|
||||
|
||||
In the code, this work starts in the [`propagate_constraints`]
|
||||
function, which iterates over all the SCCs. For each SCC `S1`, we
|
||||
compute its value by first computing the value of its
|
||||
successors. Since SCCs form a DAG, we don't have to be concerned about
|
||||
cycles, though we do need to keep a set around to track whether we
|
||||
have already processed a given SCC or not. For each successor `S2`, once
|
||||
we have computed `S2`'s value, we can union those elements into the
|
||||
value for `S1`. (Although we have to be careful in this process to
|
||||
properly handle [higher-ranked
|
||||
placeholders](./placeholders_and_universes.html). Note that the value
|
||||
for `S1` already contains the liveness constraints, since they were
|
||||
added in [`RegionInferenceContext::new`].
|
||||
|
||||
Once that process is done, we now have the "minimal value" for `S1`,
|
||||
taking into account all of the liveness and outlives
|
||||
constraints. However, in order to complete the process, we must also
|
||||
consider [member constraints][m_c], which are described in [a later
|
||||
section][m_c].
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Reporting region errors
|
||||
|
||||
TODO: we should discuss how to generate errors from the results of these analyses.
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
# Universal regions
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
"Universal regions" is the name that the code uses to refer to "named
|
||||
lifetimes" -- e.g., lifetime parameters and `'static`. The name
|
||||
derives from the fact that such lifetimes are "universally quantified"
|
||||
(i.e., we must make sure the code is true for all values of those
|
||||
lifetimes). It is worth spending a bit of discussing how lifetime
|
||||
parameters are handled during region inference. Consider this example:
|
||||
|
||||
```rust,ignore
|
||||
fn foo<'a, 'b>(x: &'a u32, y: &'b u32) -> &'b u32 {
|
||||
x
|
||||
}
|
||||
```
|
||||
|
||||
This example is intended not to compile, because we are returning `x`,
|
||||
which has type `&'a u32`, but our signature promises that we will
|
||||
return a `&'b u32` value. But how are lifetimes like `'a` and `'b`
|
||||
integrated into region inference, and how this error wind up being
|
||||
detected?
|
||||
|
||||
## Universal regions and their relationships to one another
|
||||
|
||||
Early on in region inference, one of the first things we do is to
|
||||
construct a [`UniversalRegions`] struct. This struct tracks the
|
||||
various universal regions in scope on a particular function. We also
|
||||
create a [`UniversalRegionRelations`] struct, which tracks their
|
||||
relationships to one another. So if you have e.g. `where 'a: 'b`, then
|
||||
the [`UniversalRegionRelations`] struct would track that `'a: 'b` is
|
||||
known to hold (which could be tested with the [`outlives`] function).
|
||||
|
||||
[`UniversalRegions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/universal_regions/struct.UniversalRegions.html
|
||||
[`UniversalRegionRelations`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/type_check/free_region_relations/struct.UniversalRegionRelations.html
|
||||
[`outlives`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/type_check/free_region_relations/struct.UniversalRegionRelations.html#method.outlives
|
||||
|
||||
## Everything is a region variable
|
||||
|
||||
One important aspect of how NLL region inference works is that **all
|
||||
lifetimes** are represented as numbered variables. This means that the
|
||||
only variant of [`region_kind::RegionKind`] that we use is the [`ReVar`]
|
||||
variant. These region variables are broken into two major categories,
|
||||
based on their index:
|
||||
|
||||
[`region_kind::RegionKind`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_type_ir/region_kind/enum.RegionKind.html
|
||||
[`ReVar`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_type_ir/region_kind/enum.RegionKind.html#variant.ReVar
|
||||
|
||||
- 0..N: universal regions -- the ones we are discussing here. In this
|
||||
case, the code must be correct with respect to any value of those
|
||||
variables that meets the declared relationships.
|
||||
- N..M: existential regions -- inference variables where the region
|
||||
inferencer is tasked with finding *some* suitable value.
|
||||
|
||||
In fact, the universal regions can be further subdivided based on
|
||||
where they were brought into scope (see the [`RegionClassification`]
|
||||
type). These subdivisions are not important for the topics discussed
|
||||
here, but become important when we consider [closure constraint
|
||||
propagation](./closure_constraints.html), so we discuss them there.
|
||||
|
||||
[`RegionClassification`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/universal_regions/enum.RegionClassification.html#variant.Local
|
||||
|
||||
## Universal lifetimes as the elements of a region's value
|
||||
|
||||
As noted previously, the value that we infer for each region is a set
|
||||
`{E}`. The elements of this set can be points in the control-flow
|
||||
graph, but they can also be an element `end('a)` corresponding to each
|
||||
universal lifetime `'a`. If the value for some region `R0` includes
|
||||
`end('a`), then this implies that `R0` must extend until the end of `'a`
|
||||
in the caller.
|
||||
|
||||
## The "value" of a universal region
|
||||
|
||||
During region inference, we compute a value for each universal region
|
||||
in the same way as we compute values for other regions. This value
|
||||
represents, effectively, the **lower bound** on that universal region
|
||||
-- the things that it must outlive. We now describe how we use this
|
||||
value to check for errors.
|
||||
|
||||
## Liveness and universal regions
|
||||
|
||||
All universal regions have an initial liveness constraint that
|
||||
includes the entire function body. This is because lifetime parameters
|
||||
are defined in the caller and must include the entirety of the
|
||||
function call that invokes this particular function. In addition, each
|
||||
universal region `'a` includes itself (that is, `end('a)`) in its
|
||||
liveness constraint (i.e., `'a` must extend until the end of
|
||||
itself). In the code, these liveness constraints are setup in
|
||||
[`init_free_and_bound_regions`].
|
||||
|
||||
[`init_free_and_bound_regions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.init_free_and_bound_regions
|
||||
|
||||
## Propagating outlives constraints for universal regions
|
||||
|
||||
So, consider the first example of this section:
|
||||
|
||||
```rust,ignore
|
||||
fn foo<'a, 'b>(x: &'a u32, y: &'b u32) -> &'b u32 {
|
||||
x
|
||||
}
|
||||
```
|
||||
|
||||
Here, returning `x` requires that `&'a u32 <: &'b u32`, which gives
|
||||
rise to an outlives constraint `'a: 'b`. Combined with our default liveness
|
||||
constraints we get:
|
||||
|
||||
```txt
|
||||
'a live at {B, end('a)} // B represents the "function body"
|
||||
'b live at {B, end('b)}
|
||||
'a: 'b
|
||||
```
|
||||
|
||||
When we process the `'a: 'b` constraint, therefore, we will add
|
||||
`end('b)` into the value for `'a`, resulting in a final value of `{B,
|
||||
end('a), end('b)}`.
|
||||
|
||||
## Detecting errors
|
||||
|
||||
Once we have finished constraint propagation, we then enforce a
|
||||
constraint that if some universal region `'a` includes an element
|
||||
`end('b)`, then `'a: 'b` must be declared in the function's bounds. If
|
||||
not, as in our example, that is an error. This check is done in the
|
||||
[`check_universal_regions`] function, which simply iterates over all
|
||||
universal regions, inspects their final value, and tests against the
|
||||
declared [`UniversalRegionRelations`].
|
||||
|
||||
[`check_universal_regions`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/region_infer/struct.RegionInferenceContext.html#method.check_universal_regions
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
# Member constraints
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
A member constraint `'m member of ['c_1..'c_N]` expresses that the
|
||||
region `'m` must be *equal* to some **choice regions** `'c_i` (for
|
||||
some `i`). These constraints cannot be expressed by users, but they
|
||||
arise from `impl Trait` due to its lifetime capture rules. Consider a
|
||||
function such as the following:
|
||||
|
||||
```rust,ignore
|
||||
fn make(a: &'a u32, b: &'b u32) -> impl Trait<'a, 'b> { .. }
|
||||
```
|
||||
|
||||
Here, the true return type (often called the "hidden type") is only
|
||||
permitted to capture the lifetimes `'a` or `'b`. You can kind of see
|
||||
this more clearly by desugaring that `impl Trait` return type into its
|
||||
more explicit form:
|
||||
|
||||
```rust,ignore
|
||||
type MakeReturn<'x, 'y> = impl Trait<'x, 'y>;
|
||||
fn make(a: &'a u32, b: &'b u32) -> MakeReturn<'a, 'b> { .. }
|
||||
```
|
||||
|
||||
Here, the idea is that the hidden type must be some type that could
|
||||
have been written in place of the `impl Trait<'x, 'y>` -- but clearly
|
||||
such a type can only reference the regions `'x` or `'y` (or
|
||||
`'static`!), as those are the only names in scope. This limitation is
|
||||
then translated into a restriction to only access `'a` or `'b` because
|
||||
we are returning `MakeReturn<'a, 'b>`, where `'x` and `'y` have been
|
||||
replaced with `'a` and `'b` respectively.
|
||||
|
||||
## Detailed example
|
||||
|
||||
To help us explain member constraints in more detail, let's spell out
|
||||
the `make` example in a bit more detail. First off, let's assume that
|
||||
you have some dummy trait:
|
||||
|
||||
```rust,ignore
|
||||
trait Trait<'a, 'b> { }
|
||||
impl<T> Trait<'_, '_> for T { }
|
||||
```
|
||||
|
||||
and this is the `make` function (in desugared form):
|
||||
|
||||
```rust,ignore
|
||||
type MakeReturn<'x, 'y> = impl Trait<'x, 'y>;
|
||||
fn make(a: &'a u32, b: &'b u32) -> MakeReturn<'a, 'b> {
|
||||
(a, b)
|
||||
}
|
||||
```
|
||||
|
||||
What happens in this case is that the return type will be `(&'0 u32, &'1 u32)`,
|
||||
where `'0` and `'1` are fresh region variables. We will have the following
|
||||
region constraints:
|
||||
|
||||
```txt
|
||||
'0 live at {L}
|
||||
'1 live at {L}
|
||||
'a: '0
|
||||
'b: '1
|
||||
'0 member of ['a, 'b, 'static]
|
||||
'1 member of ['a, 'b, 'static]
|
||||
```
|
||||
|
||||
Here the "liveness set" `{L}` corresponds to that subset of the body
|
||||
where `'0` and `'1` are live -- basically the point from where the
|
||||
return tuple is constructed to where it is returned (in fact, `'0` and
|
||||
`'1` might have slightly different liveness sets, but that's not very
|
||||
interesting to the point we are illustrating here).
|
||||
|
||||
The `'a: '0` and `'b: '1` constraints arise from subtyping. When we
|
||||
construct the `(a, b)` value, it will be assigned type `(&'0 u32, &'1
|
||||
u32)` -- the region variables reflect that the lifetimes of these
|
||||
references could be made smaller. For this value to be created from
|
||||
`a` and `b`, however, we do require that:
|
||||
|
||||
```txt
|
||||
(&'a u32, &'b u32) <: (&'0 u32, &'1 u32)
|
||||
```
|
||||
|
||||
which means in turn that `&'a u32 <: &'0 u32` and hence that `'a: '0`
|
||||
(and similarly that `&'b u32 <: &'1 u32`, `'b: '1`).
|
||||
|
||||
Note that if we ignore member constraints, the value of `'0` would be
|
||||
inferred to some subset of the function body (from the liveness
|
||||
constraints, which we did not write explicitly). It would never become
|
||||
`'a`, because there is no need for it too -- we have a constraint that
|
||||
`'a: '0`, but that just puts a "cap" on how *large* `'0` can grow to
|
||||
become. Since we compute the *minimal* value that we can, we are happy
|
||||
to leave `'0` as being just equal to the liveness set. This is where
|
||||
member constraints come in.
|
||||
|
||||
## Choices are always lifetime parameters
|
||||
|
||||
At present, the "choice" regions from a member constraint are always lifetime
|
||||
parameters from the current function. As of <!-- date-check --> October 2021,
|
||||
this falls out from the placement of impl Trait, though in the future it may not
|
||||
be the case. We take some advantage of this fact, as it simplifies the current
|
||||
code. In particular, we don't have to consider a case like `'0 member of ['1,
|
||||
'static]`, in which the value of both `'0` and `'1` are being inferred and hence
|
||||
changing. See [rust-lang/rust#61773][#61773] for more information.
|
||||
|
||||
[#61773]: https://github.com/rust-lang/rust/issues/61773
|
||||
|
||||
## Applying member constraints
|
||||
|
||||
Member constraints are a bit more complex than other forms of
|
||||
constraints. This is because they have a "or" quality to them -- that
|
||||
is, they describe multiple choices that we must select from. E.g., in
|
||||
our example constraint `'0 member of ['a, 'b, 'static]`, it might be
|
||||
that `'0` is equal to `'a`, `'b`, *or* `'static`. How can we pick the
|
||||
correct one? What we currently do is to look for a *minimal choice*
|
||||
-- if we find one, then we will grow `'0` to be equal to that minimal
|
||||
choice. To find that minimal choice, we take two factors into
|
||||
consideration: lower and upper bounds.
|
||||
|
||||
### Lower bounds
|
||||
|
||||
The *lower bounds* are those lifetimes that `'0` *must outlive* --
|
||||
i.e., that `'0` must be larger than. In fact, when it comes time to
|
||||
apply member constraints, we've already *computed* the lower bounds of
|
||||
`'0` because we computed its minimal value (or at least, the lower
|
||||
bounds considering everything but member constraints).
|
||||
|
||||
Let `LB` be the current value of `'0`. We know then that `'0: LB` must
|
||||
hold, whatever the final value of `'0` is. Therefore, we can rule out
|
||||
any choice `'choice` where `'choice: LB` does not hold.
|
||||
|
||||
Unfortunately, in our example, this is not very helpful. The lower
|
||||
bound for `'0` will just be the liveness set `{L}`, and we know that
|
||||
all the lifetime parameters outlive that set. So we are left with the
|
||||
same set of choices here. (But in other examples, particularly those
|
||||
with different variance, lower bound constraints may be relevant.)
|
||||
|
||||
### Upper bounds
|
||||
|
||||
The *upper bounds* are those lifetimes that *must outlive* `'0` --
|
||||
i.e., that `'0` must be *smaller* than. In our example, this would be
|
||||
`'a`, because we have the constraint that `'a: '0`. In more complex
|
||||
examples, the chain may be more indirect.
|
||||
|
||||
We can use upper bounds to rule out members in a very similar way to
|
||||
lower bounds. If UB is some upper bound, then we know that `UB:
|
||||
'0` must hold, so we can rule out any choice `'choice` where `UB:
|
||||
'choice` does not hold.
|
||||
|
||||
In our example, we would be able to reduce our choice set from `['a,
|
||||
'b, 'static]` to just `['a]`. This is because `'0` has an upper bound
|
||||
of `'a`, and neither `'a: 'b` nor `'a: 'static` is known to hold.
|
||||
|
||||
(For notes on how we collect upper bounds in the implementation, see
|
||||
[the section below](#collecting).)
|
||||
|
||||
### Minimal choice
|
||||
|
||||
After applying lower and upper bounds, we can still sometimes have
|
||||
multiple possibilities. For example, imagine a variant of our example
|
||||
using types with the opposite variance. In that case, we would have
|
||||
the constraint `'0: 'a` instead of `'a: '0`. Hence the current value
|
||||
of `'0` would be `{L, 'a}`. Using this as a lower bound, we would be
|
||||
able to narrow down the member choices to `['a, 'static]` because `'b:
|
||||
'a` is not known to hold (but `'a: 'a` and `'static: 'a` do hold). We
|
||||
would not have any upper bounds, so that would be our final set of choices.
|
||||
|
||||
In that case, we apply the **minimal choice** rule -- basically, if
|
||||
one of our choices if smaller than the others, we can use that. In
|
||||
this case, we would opt for `'a` (and not `'static`).
|
||||
|
||||
This choice is consistent with the general 'flow' of region
|
||||
propagation, which always aims to compute a minimal value for the
|
||||
region being inferred. However, it is somewhat arbitrary.
|
||||
|
||||
<a id="collecting"></a>
|
||||
|
||||
### Collecting upper bounds in the implementation
|
||||
|
||||
In practice, computing upper bounds is a bit inconvenient, because our
|
||||
data structures are setup for the opposite. What we do is to compute
|
||||
the **reverse SCC graph** (we do this lazily and cache the result) --
|
||||
that is, a graph where `'a: 'b` induces an edge `SCC('b) ->
|
||||
SCC('a)`. Like the normal SCC graph, this is a DAG. We can then do a
|
||||
depth-first search starting from `SCC('0)` in this graph. This will
|
||||
take us to all the SCCs that must outlive `'0`.
|
||||
|
||||
One wrinkle is that, as we walk the "upper bound" SCCs, their values
|
||||
will not yet have been fully computed. However, we **have** already
|
||||
applied their liveness constraints, so we have some information about
|
||||
their value. In particular, for any regions representing lifetime
|
||||
parameters, their value will contain themselves (i.e., the initial
|
||||
value for `'a` includes `'a` and the value for `'b` contains `'b`). So
|
||||
we can collect all of the lifetime parameters that are reachable,
|
||||
which is precisely what we are interested in.
|
||||
|
|
@ -0,0 +1,442 @@
|
|||
# Placeholders and universes
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
From time to time we have to reason about regions that we can't
|
||||
concretely know. For example, consider this program:
|
||||
|
||||
```rust,ignore
|
||||
// A function that needs a static reference
|
||||
fn foo(x: &'static u32) { }
|
||||
|
||||
fn bar(f: for<'a> fn(&'a u32)) {
|
||||
// ^^^^^^^^^^^^^^^^^^^ a function that can accept **any** reference
|
||||
let x = 22;
|
||||
f(&x);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
bar(foo);
|
||||
}
|
||||
```
|
||||
|
||||
This program ought not to type-check: `foo` needs a static reference
|
||||
for its argument, and `bar` wants to be given a function that
|
||||
accepts **any** reference (so it can call it with something on its
|
||||
stack, for example). But *how* do we reject it and *why*?
|
||||
|
||||
## Subtyping and Placeholders
|
||||
|
||||
When we type-check `main`, and in particular the call `bar(foo)`, we
|
||||
are going to wind up with a subtyping relationship like this one:
|
||||
|
||||
```text
|
||||
fn(&'static u32) <: for<'a> fn(&'a u32)
|
||||
---------------- -------------------
|
||||
the type of `foo` the type `bar` expects
|
||||
```
|
||||
|
||||
We handle this sort of subtyping by taking the variables that are
|
||||
bound in the supertype and replacing them with
|
||||
[universally quantified](../../appendix/background.md#quantified)
|
||||
representatives, denoted like `!1` here. We call these regions "placeholder
|
||||
regions" – they represent, basically, "some unknown region".
|
||||
|
||||
Once we've done that replacement, we have the following relation:
|
||||
|
||||
```text
|
||||
fn(&'static u32) <: fn(&'!1 u32)
|
||||
```
|
||||
|
||||
The key idea here is that this unknown region `'!1` is not related to
|
||||
any other regions. So if we can prove that the subtyping relationship
|
||||
is true for `'!1`, then it ought to be true for any region, which is
|
||||
what we wanted.
|
||||
|
||||
So let's work through what happens next. To check if two functions are
|
||||
subtypes, we check if their arguments have the desired relationship
|
||||
(fn arguments are [contravariant](../../appendix/background.md#variance), so
|
||||
we swap the left and right here):
|
||||
|
||||
```text
|
||||
&'!1 u32 <: &'static u32
|
||||
```
|
||||
|
||||
According to the basic subtyping rules for a reference, this will be
|
||||
true if `'!1: 'static`. That is – if "some unknown region `!1`" outlives `'static`.
|
||||
Now, this *might* be true – after all, `'!1` could be `'static` –
|
||||
but we don't *know* that it's true. So this should yield up an error (eventually).
|
||||
|
||||
## What is a universe?
|
||||
|
||||
In the previous section, we introduced the idea of a placeholder
|
||||
region, and we denoted it `!1`. We call this number `1` the **universe
|
||||
index**. The idea of a "universe" is that it is a set of names that
|
||||
are in scope within some type or at some point. Universes are formed
|
||||
into a tree, where each child extends its parents with some new names.
|
||||
So the **root universe** conceptually contains global names, such as
|
||||
the lifetime `'static` or the type `i32`. In the compiler, we also
|
||||
put generic type parameters into this root universe (in this sense,
|
||||
there is not just one root universe, but one per item). So consider
|
||||
this function `bar`:
|
||||
|
||||
```rust,ignore
|
||||
struct Foo { }
|
||||
|
||||
fn bar<'a, T>(t: &'a T) {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Here, the root universe would consist of the lifetimes `'static` and
|
||||
`'a`. In fact, although we're focused on lifetimes here, we can apply
|
||||
the same concept to types, in which case the types `Foo` and `T` would
|
||||
be in the root universe (along with other global types, like `i32`).
|
||||
Basically, the root universe contains all the names that
|
||||
[appear free](../../appendix/background.md#free-vs-bound) in the body of `bar`.
|
||||
|
||||
Now let's extend `bar` a bit by adding a variable `x`:
|
||||
|
||||
```rust,ignore
|
||||
fn bar<'a, T>(t: &'a T) {
|
||||
let x: for<'b> fn(&'b u32) = ...;
|
||||
}
|
||||
```
|
||||
|
||||
Here, the name `'b` is not part of the root universe. Instead, when we
|
||||
"enter" into this `for<'b>` (e.g., by replacing it with a placeholder), we will create
|
||||
a child universe of the root, let's call it U1:
|
||||
|
||||
```text
|
||||
U0 (root universe)
|
||||
│
|
||||
└─ U1 (child universe)
|
||||
```
|
||||
|
||||
The idea is that this child universe U1 extends the root universe U0
|
||||
with a new name, which we are identifying by its universe number:
|
||||
`!1`.
|
||||
|
||||
Now let's extend `bar` a bit by adding one more variable, `y`:
|
||||
|
||||
```rust,ignore
|
||||
fn bar<'a, T>(t: &'a T) {
|
||||
let x: for<'b> fn(&'b u32) = ...;
|
||||
let y: for<'c> fn(&'c u32) = ...;
|
||||
}
|
||||
```
|
||||
|
||||
When we enter *this* type, we will again create a new universe, which
|
||||
we'll call `U2`. Its parent will be the root universe, and U1 will be
|
||||
its sibling:
|
||||
|
||||
```text
|
||||
U0 (root universe)
|
||||
│
|
||||
├─ U1 (child universe)
|
||||
│
|
||||
└─ U2 (child universe)
|
||||
```
|
||||
|
||||
This implies that, while in U2, we can name things from U0 or U2, but
|
||||
not U1.
|
||||
|
||||
**Giving existential variables a universe.** Now that we have this
|
||||
notion of universes, we can use it to extend our type-checker and
|
||||
things to prevent illegal names from leaking out. The idea is that we
|
||||
give each inference (existential) variable – whether it be a type or
|
||||
a lifetime – a universe. That variable's value can then only
|
||||
reference names visible from that universe. So for example if a
|
||||
lifetime variable is created in U0, then it cannot be assigned a value
|
||||
of `!1` or `!2`, because those names are not visible from the universe
|
||||
U0.
|
||||
|
||||
**Representing universes with just a counter.** You might be surprised
|
||||
to see that the compiler doesn't keep track of a full tree of
|
||||
universes. Instead, it just keeps a counter – and, to determine if
|
||||
one universe can see another one, it just checks if the index is
|
||||
greater. For example, U2 can see U0 because 2 >= 0. But U0 cannot see
|
||||
U2, because 0 >= 2 is false.
|
||||
|
||||
How can we get away with this? Doesn't this mean that we would allow
|
||||
U2 to also see U1? The answer is that, yes, we would, **if that
|
||||
question ever arose**. But because of the structure of our type
|
||||
checker etc, there is no way for that to happen. In order for
|
||||
something happening in the universe U1 to "communicate" with something
|
||||
happening in U2, they would have to have a shared inference variable X
|
||||
in common. And because everything in U1 is scoped to just U1 and its
|
||||
children, that inference variable X would have to be in U0. And since
|
||||
X is in U0, it cannot name anything from U1 (or U2). This is perhaps easiest
|
||||
to see by using a kind of generic "logic" example:
|
||||
|
||||
```text
|
||||
exists<X> {
|
||||
forall<Y> { ... /* Y is in U1 ... */ }
|
||||
forall<Z> { ... /* Z is in U2 ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
Here, the only way for the two foralls to interact would be through X,
|
||||
but neither Y nor Z are in scope when X is declared, so its value
|
||||
cannot reference either of them.
|
||||
|
||||
## Universes and placeholder region elements
|
||||
|
||||
But where does that error come from? The way it happens is like this.
|
||||
When we are constructing the region inference context, we can tell
|
||||
from the type inference context how many placeholder variables exist
|
||||
(the `InferCtxt` has an internal counter). For each of those, we
|
||||
create a corresponding universal region variable `!n` and a "region
|
||||
element" `placeholder(n)`. This corresponds to "some unknown set of other
|
||||
elements". The value of `!n` is `{placeholder(n)}`.
|
||||
|
||||
At the same time, we also give each existential variable a
|
||||
**universe** (also taken from the `InferCtxt`). This universe
|
||||
determines which placeholder elements may appear in its value: For
|
||||
example, a variable in universe U3 may name `placeholder(1)`, `placeholder(2)`, and
|
||||
`placeholder(3)`, but not `placeholder(4)`. Note that the universe of an inference
|
||||
variable controls what region elements **can** appear in its value; it
|
||||
does not say region elements **will** appear.
|
||||
|
||||
## Placeholders and outlives constraints
|
||||
|
||||
In the region inference engine, outlives constraints have the form:
|
||||
|
||||
```text
|
||||
V1: V2 @ P
|
||||
```
|
||||
|
||||
where `V1` and `V2` are region indices, and hence map to some region
|
||||
variable (which may be universally or existentially quantified). The
|
||||
`P` here is a "point" in the control-flow graph; it's not important
|
||||
for this section. This variable will have a universe, so let's call
|
||||
those universes `U(V1)` and `U(V2)` respectively. (Actually, the only
|
||||
one we are going to care about is `U(V1)`.)
|
||||
|
||||
When we encounter this constraint, the ordinary procedure is to start
|
||||
a DFS from `P`. We keep walking so long as the nodes we are walking
|
||||
are present in `value(V2)` and we add those nodes to `value(V1)`. If
|
||||
we reach a return point, we add in any `end(X)` elements. That part
|
||||
remains unchanged.
|
||||
|
||||
But then *after that* we want to iterate over the placeholder `placeholder(x)`
|
||||
elements in V2 (each of those must be visible to `U(V2)`, but we
|
||||
should be able to just assume that is true, we don't have to check
|
||||
it). We have to ensure that `value(V1)` outlives each of those
|
||||
placeholder elements.
|
||||
|
||||
Now there are two ways that could happen. First, if `U(V1)` can see
|
||||
the universe `x` (i.e., `x <= U(V1)`), then we can just add `placeholder(x)`
|
||||
to `value(V1)` and be done. But if not, then we have to approximate:
|
||||
we may not know what set of elements `placeholder(x)` represents, but we
|
||||
should be able to compute some sort of **upper bound** B for it –
|
||||
some region B that outlives `placeholder(x)`. For now, we'll just use
|
||||
`'static` for that (since it outlives everything) – in the future, we
|
||||
can sometimes be smarter here (and in fact we have code for doing this
|
||||
already in other contexts). Moreover, since `'static` is in the root
|
||||
universe U0, we know that all variables can see it – so basically if
|
||||
we find that `value(V2)` contains `placeholder(x)` for some universe `x`
|
||||
that `V1` can't see, then we force `V1` to `'static`.
|
||||
|
||||
## Extending the "universal regions" check
|
||||
|
||||
After all constraints have been propagated, the NLL region inference
|
||||
has one final check, where it goes over the values that wound up being
|
||||
computed for each universal region and checks that they did not get
|
||||
'too large'. In our case, we will go through each placeholder region
|
||||
and check that it contains *only* the `placeholder(u)` element it is known to
|
||||
outlive. (Later, we might be able to know that there are relationships
|
||||
between two placeholder regions and take those into account, as we do
|
||||
for universal regions from the fn signature.)
|
||||
|
||||
Put another way, the "universal regions" check can be considered to be
|
||||
checking constraints like:
|
||||
|
||||
```text
|
||||
{placeholder(1)}: V1
|
||||
```
|
||||
|
||||
where `{placeholder(1)}` is like a constant set, and V1 is the variable we
|
||||
made to represent the `!1` region.
|
||||
|
||||
## Back to our example
|
||||
|
||||
OK, so far so good. Now let's walk through what would happen with our
|
||||
first example:
|
||||
|
||||
```text
|
||||
fn(&'static u32) <: fn(&'!1 u32) @ P // this point P is not imp't here
|
||||
```
|
||||
|
||||
The region inference engine will create a region element domain like this:
|
||||
|
||||
```text
|
||||
{ CFG; end('static); placeholder(1) }
|
||||
--- ------------ ------- from the universe `!1`
|
||||
| 'static is always in scope
|
||||
all points in the CFG; not especially relevant here
|
||||
```
|
||||
|
||||
It will always create two universal variables, one representing
|
||||
`'static` and one representing `'!1`. Let's call them Vs and V1. They
|
||||
will have initial values like so:
|
||||
|
||||
```text
|
||||
Vs = { CFG; end('static) } // it is in U0, so can't name anything else
|
||||
V1 = { placeholder(1) }
|
||||
```
|
||||
|
||||
From the subtyping constraint above, we would have an outlives constraint like
|
||||
|
||||
```text
|
||||
'!1: 'static @ P
|
||||
```
|
||||
|
||||
To process this, we would grow the value of V1 to include all of Vs:
|
||||
|
||||
```text
|
||||
Vs = { CFG; end('static) }
|
||||
V1 = { CFG; end('static), placeholder(1) }
|
||||
```
|
||||
|
||||
At that point, constraint propagation is complete, because all the
|
||||
outlives relationships are satisfied. Then we would go to the "check
|
||||
universal regions" portion of the code, which would test that no
|
||||
universal region grew too large.
|
||||
|
||||
In this case, `V1` *did* grow too large – it is not known to outlive
|
||||
`end('static)`, nor any of the CFG – so we would report an error.
|
||||
|
||||
## Another example
|
||||
|
||||
What about this subtyping relationship?
|
||||
|
||||
```text
|
||||
for<'a> fn(&'a u32, &'a u32)
|
||||
<:
|
||||
for<'b, 'c> fn(&'b u32, &'c u32)
|
||||
```
|
||||
|
||||
Here we would replace the bound region in the supertype with a placeholder, as before, yielding:
|
||||
|
||||
```text
|
||||
for<'a> fn(&'a u32, &'a u32)
|
||||
<:
|
||||
fn(&'!1 u32, &'!2 u32)
|
||||
```
|
||||
|
||||
then we instantiate the variable on the left-hand side with an
|
||||
existential in universe U2, yielding the following (`?n` is a notation
|
||||
for an existential variable):
|
||||
|
||||
```text
|
||||
fn(&'?3 u32, &'?3 u32)
|
||||
<:
|
||||
fn(&'!1 u32, &'!2 u32)
|
||||
```
|
||||
|
||||
Then we break this down further:
|
||||
|
||||
```text
|
||||
&'!1 u32 <: &'?3 u32
|
||||
&'!2 u32 <: &'?3 u32
|
||||
```
|
||||
|
||||
and even further, yield up our region constraints:
|
||||
|
||||
```text
|
||||
'!1: '?3
|
||||
'!2: '?3
|
||||
```
|
||||
|
||||
Note that, in this case, both `'!1` and `'!2` have to outlive the
|
||||
variable `'?3`, but the variable `'?3` is not forced to outlive
|
||||
anything else. Therefore, it simply starts and ends as the empty set
|
||||
of elements, and hence the type-check succeeds here.
|
||||
|
||||
(This should surprise you a little. It surprised me when I first realized it.
|
||||
We are saying that if we are a fn that **needs both of its arguments to have
|
||||
the same region**, we can accept being called with **arguments with two
|
||||
distinct regions**. That seems intuitively unsound. But in fact, it's fine, as
|
||||
I tried to explain in [this issue][ohdeargoditsallbroken] on the Rust issue
|
||||
tracker long ago. The reason is that even if we get called with arguments of
|
||||
two distinct lifetimes, those two lifetimes have some intersection (the call
|
||||
itself), and that intersection can be our value of `'a` that we use as the
|
||||
common lifetime of our arguments. -nmatsakis)
|
||||
|
||||
[ohdeargoditsallbroken]: https://github.com/rust-lang/rust/issues/32330#issuecomment-202536977
|
||||
|
||||
## Final example
|
||||
|
||||
Let's look at one last example. We'll extend the previous one to have
|
||||
a return type:
|
||||
|
||||
```text
|
||||
for<'a> fn(&'a u32, &'a u32) -> &'a u32
|
||||
<:
|
||||
for<'b, 'c> fn(&'b u32, &'c u32) -> &'b u32
|
||||
```
|
||||
|
||||
Despite seeming very similar to the previous example, this case is going to get
|
||||
an error. That's good: the problem is that we've gone from a fn that promises
|
||||
to return one of its two arguments, to a fn that is promising to return the
|
||||
first one. That is unsound. Let's see how it plays out.
|
||||
|
||||
First, we replace the bound region in the supertype with a placeholder:
|
||||
|
||||
```text
|
||||
for<'a> fn(&'a u32, &'a u32) -> &'a u32
|
||||
<:
|
||||
fn(&'!1 u32, &'!2 u32) -> &'!1 u32
|
||||
```
|
||||
|
||||
Then we instantiate the subtype with existentials (in U2):
|
||||
|
||||
```text
|
||||
fn(&'?3 u32, &'?3 u32) -> &'?3 u32
|
||||
<:
|
||||
fn(&'!1 u32, &'!2 u32) -> &'!1 u32
|
||||
```
|
||||
|
||||
And now we create the subtyping relationships:
|
||||
|
||||
```text
|
||||
&'!1 u32 <: &'?3 u32 // arg 1
|
||||
&'!2 u32 <: &'?3 u32 // arg 2
|
||||
&'?3 u32 <: &'!1 u32 // return type
|
||||
```
|
||||
|
||||
And finally the outlives relationships. Here, let V1, V2, and V3 be the
|
||||
variables we assign to `!1`, `!2`, and `?3` respectively:
|
||||
|
||||
```text
|
||||
V1: V3
|
||||
V2: V3
|
||||
V3: V1
|
||||
```
|
||||
|
||||
Those variables will have these initial values:
|
||||
|
||||
```text
|
||||
V1 in U1 = {placeholder(1)}
|
||||
V2 in U2 = {placeholder(2)}
|
||||
V3 in U2 = {}
|
||||
```
|
||||
|
||||
Now because of the `V3: V1` constraint, we have to add `placeholder(1)` into `V3` (and
|
||||
indeed it is visible from `V3`), so we get:
|
||||
|
||||
```text
|
||||
V3 in U2 = {placeholder(1)}
|
||||
```
|
||||
|
||||
then we have this constraint `V2: V3`, so we wind up having to enlarge
|
||||
`V2` to include `placeholder(1)` (which it can also see):
|
||||
|
||||
```text
|
||||
V2 in U2 = {placeholder(1), placeholder(2)}
|
||||
```
|
||||
|
||||
Now constraint propagation is done, but when we check the outlives
|
||||
relationships, we find that `V2` includes this new element `placeholder(1)`,
|
||||
so we report an error.
|
||||
100
src/doc/rustc-dev-guide/src/borrow_check/two_phase_borrows.md
Normal file
100
src/doc/rustc-dev-guide/src/borrow_check/two_phase_borrows.md
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
# Two-phase borrows
|
||||
|
||||
Two-phase borrows are a more permissive version of mutable borrows that allow
|
||||
nested method calls such as `vec.push(vec.len())`. Such borrows first act as
|
||||
shared borrows in a "reservation" phase and can later be "activated" into a
|
||||
full mutable borrow.
|
||||
|
||||
Only certain implicit mutable borrows can be two-phase, any `&mut` or `ref mut`
|
||||
in the source code is never a two-phase borrow. The cases where we generate a
|
||||
two-phase borrow are:
|
||||
|
||||
1. The autoref borrow when calling a method with a mutable reference receiver.
|
||||
2. A mutable reborrow in function arguments.
|
||||
3. The implicit mutable borrow in an overloaded compound assignment operator.
|
||||
|
||||
To give some examples:
|
||||
|
||||
```rust2018
|
||||
// In the source code
|
||||
|
||||
// Case 1:
|
||||
let mut v = Vec::new();
|
||||
v.push(v.len());
|
||||
let r = &mut Vec::new();
|
||||
r.push(r.len());
|
||||
|
||||
// Case 2:
|
||||
std::mem::replace(r, vec![1, r.len()]);
|
||||
|
||||
// Case 3:
|
||||
let mut x = std::num::Wrapping(2);
|
||||
x += x;
|
||||
```
|
||||
|
||||
Expanding these enough to show the two-phase borrows:
|
||||
|
||||
```rust,ignore
|
||||
// Case 1:
|
||||
let mut v = Vec::new();
|
||||
let temp1 = &two_phase v;
|
||||
let temp2 = v.len();
|
||||
Vec::push(temp1, temp2);
|
||||
let r = &mut Vec::new();
|
||||
let temp3 = &two_phase *r;
|
||||
let temp4 = r.len();
|
||||
Vec::push(temp3, temp4);
|
||||
|
||||
// Case 2:
|
||||
let temp5 = &two_phase *r;
|
||||
let temp6 = vec![1, r.len()];
|
||||
std::mem::replace(temp5, temp6);
|
||||
|
||||
// Case 3:
|
||||
let mut x = std::num::Wrapping(2);
|
||||
let temp7 = &two_phase x;
|
||||
let temp8 = x;
|
||||
std::ops::AddAssign::add_assign(temp7, temp8);
|
||||
```
|
||||
|
||||
Whether a borrow can be two-phase is tracked by a flag on the [`AutoBorrow`]
|
||||
after type checking, which is then [converted] to a [`BorrowKind`] during MIR
|
||||
construction.
|
||||
|
||||
Each two-phase borrow is assigned to a temporary that is only used once. As
|
||||
such we can define:
|
||||
|
||||
* The point where the temporary is assigned to is called the *reservation*
|
||||
point of the two-phase borrow.
|
||||
* The point where the temporary is used, which is effectively always a
|
||||
function call, is called the *activation* point.
|
||||
|
||||
The activation points are found using the [`GatherBorrows`] visitor. The
|
||||
[`BorrowData`] then holds both the reservation and activation points for the
|
||||
borrow.
|
||||
|
||||
[`AutoBorrow`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/adjustment/enum.AutoBorrow.html
|
||||
[converted]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_build/thir/cx/expr/trait.ToBorrowKind.html#method.to_borrow_kind
|
||||
[`BorrowKind`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/enum.BorrowKind.html
|
||||
[`GatherBorrows`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/visit/trait.Visitor.html#method.visit_local
|
||||
[`BorrowData`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/borrow_set/struct.BorrowData.html
|
||||
|
||||
## Checking two-phase borrows
|
||||
|
||||
Two-phase borrows are treated as if they were mutable borrows with the
|
||||
following exceptions:
|
||||
|
||||
1. At every location in the MIR we [check] if any two-phase borrows are
|
||||
activated at this location. If a live two phase borrow is activated at a
|
||||
location, then we check that there are no borrows that conflict with the
|
||||
two-phase borrow.
|
||||
2. At the reservation point we error if there are conflicting live *mutable*
|
||||
borrows. And lint if there are any conflicting shared borrows.
|
||||
3. Between the reservation and the activation point, the two-phase borrow acts
|
||||
as a shared borrow. We determine (in [`is_active`]) if we're at such a point
|
||||
by using the [`Dominators`] for the MIR graph.
|
||||
4. After the activation point, the two-phase borrow acts as a mutable borrow.
|
||||
|
||||
[check]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/struct.MirBorrowckCtxt.html#method.check_activations
|
||||
[`Dominators`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/graph/dominators/struct.Dominators.html
|
||||
[`is_active`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/path_utils/fn.is_active.html
|
||||
64
src/doc/rustc-dev-guide/src/borrow_check/type_check.md
Normal file
64
src/doc/rustc-dev-guide/src/borrow_check/type_check.md
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# The MIR type-check
|
||||
|
||||
A key component of the borrow check is the
|
||||
[MIR type-check](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_borrowck/type_check/index.html).
|
||||
This check walks the MIR and does a complete "type check" -- the same
|
||||
kind you might find in any other language. In the process of doing
|
||||
this type-check, we also uncover the region constraints that apply to
|
||||
the program.
|
||||
|
||||
TODO -- elaborate further? Maybe? :)
|
||||
|
||||
## User types
|
||||
|
||||
At the start of MIR type-check, we replace all regions in the body with new unconstrained regions.
|
||||
However, this would cause us to accept the following program:
|
||||
```rust
|
||||
fn foo<'a>(x: &'a u32) {
|
||||
let y: &'static u32 = x;
|
||||
}
|
||||
```
|
||||
By erasing the lifetimes in the type of `y` we no longer know that it is supposed to be `'static`,
|
||||
ignoring the intentions of the user.
|
||||
|
||||
To deal with this we remember all places where the user explicitly mentioned a type during
|
||||
HIR type-check as [`CanonicalUserTypeAnnotations`][annot].
|
||||
|
||||
There are two different annotations we care about:
|
||||
- explicit type ascriptions, e.g. `let y: &'static u32` results in `UserType::Ty(&'static u32)`.
|
||||
- explicit generic arguments, e.g. `x.foo<&'a u32, Vec<String>>`
|
||||
results in `UserType::TypeOf(foo_def_id, [&'a u32, Vec<String>])`.
|
||||
|
||||
As we do not want the region inference from the HIR type-check to influence MIR typeck,
|
||||
we store the user type right after lowering it from the HIR.
|
||||
This means that it may still contain inference variables,
|
||||
which is why we are using **canonical** user type annotations.
|
||||
We replace all inference variables with existential bound variables instead.
|
||||
Something like `let x: Vec<_>` would therefore result in `exists<T> UserType::Ty(Vec<T>)`.
|
||||
|
||||
A pattern like `let Foo(x): Foo<&'a u32>` has a user type `Foo<&'a u32>` but
|
||||
the actual type of `x` should only be `&'a u32`. For this, we use a [`UserTypeProjection`][proj].
|
||||
|
||||
In the MIR, we deal with user types in two slightly different ways.
|
||||
|
||||
Given a MIR local corresponding to a variable in a pattern which has an explicit type annotation,
|
||||
we require the type of that local to be equal to the type of the [`UserTypeProjection`][proj].
|
||||
This is directly stored in the [`LocalDecl`][decl].
|
||||
|
||||
We also constrain the type of scrutinee expressions, e.g. the type of `x` in `let _: &'a u32 = x;`.
|
||||
Here `T_x` only has to be a subtype of the user type, so we instead use
|
||||
[`StatementKind::AscribeUserType`][stmt] for that.
|
||||
|
||||
Note that we do not directly use the user type as the MIR typechecker
|
||||
doesn't really deal with type and const inference variables. We instead store the final
|
||||
[`inferred_type`][inf] from the HIR type-checker. During MIR typeck, we then replace its regions
|
||||
with new nll inference vars and relate it with the actual `UserType` to get the correct region
|
||||
constraints again.
|
||||
|
||||
After the MIR type-check, all user type annotations get discarded, as they aren't needed anymore.
|
||||
|
||||
[annot]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.CanonicalUserTypeAnnotation.html
|
||||
[proj]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.UserTypeProjection.html
|
||||
[decl]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.LocalDecl.html
|
||||
[stmt]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/enum.StatementKind.html#variant.AscribeUserType
|
||||
[inf]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.CanonicalUserTypeAnnotation.html#structfield.inferred_ty
|
||||
341
src/doc/rustc-dev-guide/src/bug-fix-procedure.md
Normal file
341
src/doc/rustc-dev-guide/src/bug-fix-procedure.md
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
# Procedures for Breaking Changes
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
This page defines the best practices procedure for making bug fixes or soundness
|
||||
corrections in the compiler that can cause existing code to stop compiling. This
|
||||
text is based on
|
||||
[RFC 1589](https://github.com/rust-lang/rfcs/blob/master/text/1589-rustc-bug-fix-procedure.md).
|
||||
|
||||
# Motivation
|
||||
|
||||
[motivation]: #motivation
|
||||
|
||||
From time to time, we encounter the need to make a bug fix, soundness
|
||||
correction, or other change in the compiler which will cause existing code to
|
||||
stop compiling. When this happens, it is important that we handle the change in
|
||||
a way that gives users of Rust a smooth transition. What we want to avoid is
|
||||
that existing programs suddenly stop compiling with opaque error messages: we
|
||||
would prefer to have a gradual period of warnings, with clear guidance as to
|
||||
what the problem is, how to fix it, and why the change was made. This RFC
|
||||
describes the procedure that we have been developing for handling breaking
|
||||
changes that aims to achieve that kind of smooth transition.
|
||||
|
||||
One of the key points of this policy is that (a) warnings should be issued
|
||||
initially rather than hard errors if at all possible and (b) every change that
|
||||
causes existing code to stop compiling will have an associated tracking issue.
|
||||
This issue provides a point to collect feedback on the results of that change.
|
||||
Sometimes changes have unexpectedly large consequences or there may be a way to
|
||||
avoid the change that was not considered. In those cases, we may decide to
|
||||
change course and roll back the change, or find another solution (if warnings
|
||||
are being used, this is particularly easy to do).
|
||||
|
||||
### What qualifies as a bug fix?
|
||||
|
||||
Note that this RFC does not try to define when a breaking change is permitted.
|
||||
That is already covered under [RFC 1122][]. This document assumes that the
|
||||
change being made is in accordance with those policies. Here is a summary of the
|
||||
conditions from RFC 1122:
|
||||
|
||||
- **Soundness changes:** Fixes to holes uncovered in the type system.
|
||||
- **Compiler bugs:** Places where the compiler is not implementing the specified
|
||||
semantics found in an RFC or lang-team decision.
|
||||
- **Underspecified language semantics:** Clarifications to grey areas where the
|
||||
compiler behaves inconsistently and no formal behavior had been previously
|
||||
decided.
|
||||
|
||||
Please see [the RFC][rfc 1122] for full details!
|
||||
|
||||
# Detailed design
|
||||
|
||||
[design]: #detailed-design
|
||||
|
||||
The procedure for making a breaking change is as follows (each of these steps is
|
||||
described in more detail below):
|
||||
|
||||
1. Do a **crater run** to assess the impact of the change.
|
||||
2. Make a **special tracking issue** dedicated to the change.
|
||||
3. Do not report an error right away. Instead, **issue forwards-compatibility
|
||||
lint warnings**.
|
||||
- Sometimes this is not straightforward. See the text below for suggestions
|
||||
on different techniques we have employed in the past.
|
||||
- For cases where warnings are infeasible:
|
||||
- Report errors, but make every effort to give a targeted error message
|
||||
that directs users to the tracking issue
|
||||
- Submit PRs to all known affected crates that fix the issue
|
||||
- or, at minimum, alert the owners of those crates to the problem and
|
||||
direct them to the tracking issue
|
||||
4. Once the change has been in the wild for at least one cycle, we can
|
||||
**stabilize the change**, converting those warnings into errors.
|
||||
|
||||
Finally, for changes to `rustc_ast` that will affect plugins, the general policy
|
||||
is to batch these changes. That is discussed below in more detail.
|
||||
|
||||
### Tracking issue
|
||||
|
||||
Every breaking change should be accompanied by a **dedicated tracking issue**
|
||||
for that change. The main text of this issue should describe the change being
|
||||
made, with a focus on what users must do to fix their code. The issue should be
|
||||
approachable and practical; it may make sense to direct users to an RFC or some
|
||||
other issue for the full details. The issue also serves as a place where users
|
||||
can comment with questions or other concerns.
|
||||
|
||||
A template for these breaking-change tracking issues can be found below. An
|
||||
example of how such an issue should look can be [found
|
||||
here][breaking-change-issue].
|
||||
|
||||
The issue should be tagged with (at least) `B-unstable` and `T-compiler`.
|
||||
|
||||
### Tracking issue template
|
||||
|
||||
This is a template to use for tracking issues:
|
||||
|
||||
```
|
||||
This is the **summary issue** for the `YOUR_LINT_NAME_HERE`
|
||||
future-compatibility warning and other related errors. The goal of
|
||||
this page is describe why this change was made and how you can fix
|
||||
code that is affected by it. It also provides a place to ask questions
|
||||
or register a complaint if you feel the change should not be made. For
|
||||
more information on the policy around future-compatibility warnings,
|
||||
see our [breaking change policy guidelines][guidelines].
|
||||
|
||||
[guidelines]: LINK_TO_THIS_RFC
|
||||
|
||||
#### What is the warning for?
|
||||
|
||||
*Describe the conditions that trigger the warning and how they can be
|
||||
fixed. Also explain why the change was made.**
|
||||
|
||||
#### When will this warning become a hard error?
|
||||
|
||||
At the beginning of each 6-week release cycle, the Rust compiler team
|
||||
will review the set of outstanding future compatibility warnings and
|
||||
nominate some of them for **Final Comment Period**. Toward the end of
|
||||
the cycle, we will review any comments and make a final determination
|
||||
whether to convert the warning into a hard error or remove it
|
||||
entirely.
|
||||
```
|
||||
|
||||
### Issuing future compatibility warnings
|
||||
|
||||
The best way to handle a breaking change is to begin by issuing
|
||||
future-compatibility warnings. These are a special category of lint warning.
|
||||
Adding a new future-compatibility warning can be done as follows.
|
||||
|
||||
```rust
|
||||
// 1. Define the lint in `compiler/rustc_middle/src/lint/builtin.rs`:
|
||||
declare_lint! {
|
||||
pub YOUR_ERROR_HERE,
|
||||
Warn,
|
||||
"illegal use of foo bar baz"
|
||||
}
|
||||
|
||||
// 2. Add to the list of HardwiredLints in the same file:
|
||||
impl LintPass for HardwiredLints {
|
||||
fn get_lints(&self) -> LintArray {
|
||||
lint_array!(
|
||||
..,
|
||||
YOUR_ERROR_HERE
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Register the lint in `compiler/rustc_lint/src/lib.rs`:
|
||||
store.register_future_incompatible(sess, vec![
|
||||
...,
|
||||
FutureIncompatibleInfo {
|
||||
id: LintId::of(YOUR_ERROR_HERE),
|
||||
reference: "issue #1234", // your tracking issue here!
|
||||
},
|
||||
]);
|
||||
|
||||
// 4. Report the lint:
|
||||
tcx.lint_node(
|
||||
lint::builtin::YOUR_ERROR_HERE,
|
||||
path_id,
|
||||
binding.span,
|
||||
format!("some helper message here"));
|
||||
```
|
||||
|
||||
#### Helpful techniques
|
||||
|
||||
It can often be challenging to filter out new warnings from older, pre-existing
|
||||
errors. One technique that has been used in the past is to run the older code
|
||||
unchanged and collect the errors it would have reported. You can then issue
|
||||
warnings for any errors you would give which do not appear in that original set.
|
||||
Another option is to abort compilation after the original code completes if
|
||||
errors are reported: then you know that your new code will only execute when
|
||||
there were no errors before.
|
||||
|
||||
#### Crater and crates.io
|
||||
|
||||
[Crater] is a bot that will compile all crates.io crates and many
|
||||
public github repos with the compiler with your changes. A report will then be
|
||||
generated with crates that ceased to compile with or began to compile with your
|
||||
changes. Crater runs can take a few days to complete.
|
||||
|
||||
[Crater]: ./tests/crater.md
|
||||
|
||||
We should always do a crater run to assess impact. It is polite and considerate
|
||||
to at least notify the authors of affected crates the breaking change. If we can
|
||||
submit PRs to fix the problem, so much the better.
|
||||
|
||||
#### Is it ever acceptable to go directly to issuing errors?
|
||||
|
||||
Changes that are believed to have negligible impact can go directly to issuing
|
||||
an error. One rule of thumb would be to check against `crates.io`: if fewer than
|
||||
10 **total** affected projects are found (**not** root errors), we can move
|
||||
straight to an error. In such cases, we should still make the "breaking change"
|
||||
page as before, and we should ensure that the error directs users to this page.
|
||||
In other words, everything should be the same except that users are getting an
|
||||
error, and not a warning. Moreover, we should submit PRs to the affected
|
||||
projects (ideally before the PR implementing the change lands in rustc).
|
||||
|
||||
If the impact is not believed to be negligible (e.g., more than 10 crates are
|
||||
affected), then warnings are required (unless the compiler team agrees to grant
|
||||
a special exemption in some particular case). If implementing warnings is not
|
||||
feasible, then we should make an aggressive strategy of migrating crates before
|
||||
we land the change so as to lower the number of affected crates. Here are some
|
||||
techniques for approaching this scenario:
|
||||
|
||||
1. Issue warnings for subparts of the problem, and reserve the new errors for
|
||||
the smallest set of cases you can.
|
||||
2. Try to give a very precise error message that suggests how to fix the problem
|
||||
and directs users to the tracking issue.
|
||||
3. It may also make sense to layer the fix:
|
||||
- First, add warnings where possible and let those land before proceeding to
|
||||
issue errors.
|
||||
- Work with authors of affected crates to ensure that corrected versions are
|
||||
available _before_ the fix lands, so that downstream users can use them.
|
||||
|
||||
### Stabilization
|
||||
|
||||
After a change is made, we will **stabilize** the change using the same process
|
||||
that we use for unstable features:
|
||||
|
||||
- After a new release is made, we will go through the outstanding tracking
|
||||
issues corresponding to breaking changes and nominate some of them for **final
|
||||
comment period** (FCP).
|
||||
- The FCP for such issues lasts for one cycle. In the final week or two of the
|
||||
cycle, we will review comments and make a final determination:
|
||||
|
||||
- Convert to error: the change should be made into a hard error.
|
||||
- Revert: we should remove the warning and continue to allow the older code to
|
||||
compile.
|
||||
- Defer: can't decide yet, wait longer, or try other strategies.
|
||||
|
||||
Ideally, breaking changes should have landed on the **stable branch** of the
|
||||
compiler before they are finalized.
|
||||
|
||||
<a id="guide">
|
||||
|
||||
### Removing a lint
|
||||
|
||||
Once we have decided to make a "future warning" into a hard error, we need a PR
|
||||
that removes the custom lint. As an example, here are the steps required to
|
||||
remove the `overlapping_inherent_impls` compatibility lint. First, convert the
|
||||
name of the lint to uppercase (`OVERLAPPING_INHERENT_IMPLS`) ripgrep through the
|
||||
source for that string. We will basically by converting each place where this
|
||||
lint name is mentioned (in the compiler, we use the upper-case name, and a macro
|
||||
automatically generates the lower-case string; so searching for
|
||||
`overlapping_inherent_impls` would not find much).
|
||||
|
||||
> NOTE: these exact files don't exist anymore, but the procedure is still the same.
|
||||
|
||||
#### Remove the lint.
|
||||
|
||||
The first reference you will likely find is the lint definition [in
|
||||
`rustc_session/src/lint/builtin.rs` that resembles this][defsource]:
|
||||
|
||||
[defsource]: https://github.com/rust-lang/rust/blob/085d71c3efe453863739c1fb68fd9bd1beff214f/src/librustc/lint/builtin.rs#L171-L175
|
||||
|
||||
```rust
|
||||
declare_lint! {
|
||||
pub OVERLAPPING_INHERENT_IMPLS,
|
||||
Deny, // this may also say Warning
|
||||
"two overlapping inherent impls define an item with the same name were erroneously allowed"
|
||||
}
|
||||
```
|
||||
|
||||
This `declare_lint!` macro creates the relevant data structures. Remove it. You
|
||||
will also find that there is a mention of `OVERLAPPING_INHERENT_IMPLS` later in
|
||||
the file as [part of a `lint_array!`][lintarraysource]; remove it too.
|
||||
|
||||
[lintarraysource]: https://github.com/rust-lang/rust/blob/085d71c3efe453863739c1fb68fd9bd1beff214f/src/librustc/lint/builtin.rs#L252-L290
|
||||
|
||||
Next, you see [a reference to `OVERLAPPING_INHERENT_IMPLS` in
|
||||
`rustc_lint/src/lib.rs`][futuresource]. This is defining the lint as a "future
|
||||
compatibility lint":
|
||||
|
||||
```rust
|
||||
FutureIncompatibleInfo {
|
||||
id: LintId::of(OVERLAPPING_INHERENT_IMPLS),
|
||||
reference: "issue #36889 <https://github.com/rust-lang/rust/issues/36889>",
|
||||
},
|
||||
```
|
||||
|
||||
Remove this too.
|
||||
|
||||
#### Add the lint to the list of removed lints.
|
||||
|
||||
In `compiler/rustc_lint/src/lib.rs` there is a list of "renamed and removed lints".
|
||||
You can add this lint to the list:
|
||||
|
||||
```rust
|
||||
store.register_removed("overlapping_inherent_impls", "converted into hard error, see #36889");
|
||||
```
|
||||
|
||||
where `#36889` is the tracking issue for your lint.
|
||||
|
||||
#### Update the places that issue the lint
|
||||
|
||||
Finally, the last class of references you will see are the places that actually
|
||||
**trigger** the lint itself (i.e., what causes the warnings to appear). These
|
||||
you do not want to delete. Instead, you want to convert them into errors. In
|
||||
this case, the [`add_lint` call][addlintsource] looks like this:
|
||||
|
||||
```rust
|
||||
self.tcx.sess.add_lint(lint::builtin::OVERLAPPING_INHERENT_IMPLS,
|
||||
node_id,
|
||||
self.tcx.span_of_impl(item1).unwrap(),
|
||||
msg);
|
||||
```
|
||||
|
||||
We want to convert this into an error. In some cases, there may be an
|
||||
existing error for this scenario. In others, we will need to allocate a
|
||||
fresh diagnostic code. [Instructions for allocating a fresh diagnostic
|
||||
code can be found here.](./diagnostics/error-codes.md) You may want
|
||||
to mention in the extended description that the compiler behavior
|
||||
changed on this point, and include a reference to the tracking issue for
|
||||
the change.
|
||||
|
||||
Let's say that we've adopted `E0592` as our code. Then we can change the
|
||||
`add_lint()` call above to something like:
|
||||
|
||||
```rust
|
||||
struct_span_code_err!(self.dcx(), self.tcx.span_of_impl(item1).unwrap(), E0592, msg)
|
||||
.emit();
|
||||
```
|
||||
|
||||
#### Update tests
|
||||
|
||||
Finally, run the test suite. These should be some tests that used to reference
|
||||
the `overlapping_inherent_impls` lint, those will need to be updated. In
|
||||
general, if the test used to have `#[deny(overlapping_inherent_impls)]`, that
|
||||
can just be removed.
|
||||
|
||||
```
|
||||
./x test
|
||||
```
|
||||
|
||||
#### All done!
|
||||
|
||||
Open a PR. =)
|
||||
|
||||
[addlintsource]: https://github.com/rust-lang/rust/blob/085d71c3efe453863739c1fb68fd9bd1beff214f/src/librustc_typeck/coherence/inherent.rs#L300-L303
|
||||
[futuresource]: https://github.com/rust-lang/rust/blob/085d71c3efe453863739c1fb68fd9bd1beff214f/src/librustc_lint/lib.rs#L202-L205
|
||||
|
||||
<!-- -Links--------------------------------------------------------------------- -->
|
||||
|
||||
[rfc 1122]: https://github.com/rust-lang/rfcs/blob/master/text/1122-language-semver.md
|
||||
[breaking-change-issue]: https://gist.github.com/nikomatsakis/631ec8b4af9a18b5d062d9d9b7d3d967
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# How Bootstrap does it
|
||||
|
||||
The core concept in Bootstrap is a build [`Step`], which are chained together
|
||||
by [`Builder::ensure`]. [`Builder::ensure`] takes a [`Step`] as input, and runs
|
||||
the [`Step`] if and only if it has not already been run. Let's take a closer
|
||||
look at [`Step`].
|
||||
|
||||
## Synopsis of [`Step`]
|
||||
|
||||
A [`Step`] represents a granular collection of actions involved in the process
|
||||
of producing some artifact. It can be thought of like a rule in Makefiles.
|
||||
The [`Step`] trait is defined as:
|
||||
|
||||
```rs,no_run
|
||||
pub trait Step: 'static + Clone + Debug + PartialEq + Eq + Hash {
|
||||
type Output: Clone;
|
||||
|
||||
const DEFAULT: bool = false;
|
||||
const ONLY_HOSTS: bool = false;
|
||||
|
||||
// Required methods
|
||||
fn run(self, builder: &Builder<'_>) -> Self::Output;
|
||||
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_>;
|
||||
|
||||
// Provided method
|
||||
fn make_run(_run: RunConfig<'_>) { ... }
|
||||
}
|
||||
```
|
||||
|
||||
- `run` is the function that is responsible for doing the work.
|
||||
[`Builder::ensure`] invokes `run`.
|
||||
- `should_run` is the command-line interface, which determines if an invocation
|
||||
such as `x build foo` should run a given [`Step`]. In a "default" context
|
||||
where no paths are provided, then `make_run` is called directly.
|
||||
- `make_run` is invoked only for things directly asked via the CLI and not
|
||||
for steps which are dependencies of other steps.
|
||||
|
||||
## The entry points
|
||||
|
||||
There's a couple of preliminary steps before core Bootstrap code is reached:
|
||||
|
||||
1. Shell script or `make`: [`./x`](https://github.com/rust-lang/rust/blob/master/x) or [`./x.ps1`](https://github.com/rust-lang/rust/blob/master/x.ps1) or `make`
|
||||
2. Convenience wrapper script: [`x.py`](https://github.com/rust-lang/rust/blob/master/x.py)
|
||||
3. [`src/bootstrap/bootstrap.py`](https://github.com/rust-lang/rust/blob/master/src/bootstrap/bootstrap.py)
|
||||
4. [`src/bootstrap/src/bin/main.rs`](https://github.com/rust-lang/rust/blob/master/src/bootstrap/src/bin/main.rs)
|
||||
|
||||
See [src/bootstrap/README.md](https://github.com/rust-lang/rust/blob/master/src/bootstrap/README.md)
|
||||
for a more specific description of the implementation details.
|
||||
|
||||
[`Step`]: https://doc.rust-lang.org/nightly/nightly-rustc/bootstrap/core/builder/trait.Step.html
|
||||
[`Builder::ensure`]: https://doc.rust-lang.org/nightly/nightly-rustc/bootstrap/core/builder/struct.Builder.html#method.ensure
|
||||
21
src/doc/rustc-dev-guide/src/building/bootstrapping/intro.md
Normal file
21
src/doc/rustc-dev-guide/src/building/bootstrapping/intro.md
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Bootstrapping the compiler
|
||||
|
||||
[*Bootstrapping*][boot] is the process of using a compiler to compile itself.
|
||||
More accurately, it means using an older compiler to compile a newer version
|
||||
of the same compiler.
|
||||
|
||||
This raises a chicken-and-egg paradox: where did the first compiler come from?
|
||||
It must have been written in a different language. In Rust's case it was
|
||||
[written in OCaml][ocaml-compiler]. However it was abandoned long ago and the
|
||||
only way to build a modern version of rustc is a slightly less modern
|
||||
version.
|
||||
|
||||
This is exactly how `x.py` works: it downloads the current beta release of
|
||||
rustc, then uses it to compile the new compiler.
|
||||
|
||||
In this section, we give a high-level overview of
|
||||
[what Bootstrap does](./what-bootstrapping-does.md), followed by a high-level
|
||||
introduction to [how Bootstrap does it](./how-bootstrap-does-it.md).
|
||||
|
||||
[boot]: https://en.wikipedia.org/wiki/Bootstrapping_(compilers)
|
||||
[ocaml-compiler]: https://github.com/rust-lang/rust/tree/ef75860a0a72f79f97216f8aaa5b388d98da6480/src/boot
|
||||
|
|
@ -0,0 +1,465 @@
|
|||
# What Bootstrapping does
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
[*Bootstrapping*][boot] is the process of using a compiler to compile itself.
|
||||
More accurately, it means using an older compiler to compile a newer version of
|
||||
the same compiler.
|
||||
|
||||
This raises a chicken-and-egg paradox: where did the first compiler come from?
|
||||
It must have been written in a different language. In Rust's case it was
|
||||
[written in OCaml][ocaml-compiler]. However it was abandoned long ago and the
|
||||
only way to build a modern version of `rustc` is a slightly less modern version.
|
||||
|
||||
This is exactly how [`./x.py`] works: it downloads the current beta release of
|
||||
`rustc`, then uses it to compile the new compiler.
|
||||
|
||||
[`./x.py`]: https://github.com/rust-lang/rust/blob/master/x.py
|
||||
|
||||
Note that this documentation mostly covers user-facing information. See
|
||||
[bootstrap/README.md][bootstrap-internals] to read about bootstrap internals.
|
||||
|
||||
[bootstrap-internals]: https://github.com/rust-lang/rust/blob/master/src/bootstrap/README.md
|
||||
|
||||
## Stages of bootstrapping
|
||||
|
||||
### Overview
|
||||
|
||||
- Stage 0: the pre-compiled compiler
|
||||
- Stage 1: from current code, by an earlier compiler
|
||||
- Stage 2: the truly current compiler
|
||||
- Stage 3: the same-result test
|
||||
|
||||
Compiling `rustc` is done in stages. Here's a diagram, adapted from Jynn
|
||||
Nelson's [talk on bootstrapping][rustconf22-talk] at RustConf 2022, with
|
||||
detailed explanations below.
|
||||
|
||||
The `A`, `B`, `C`, and `D` show the ordering of the stages of bootstrapping.
|
||||
<span style="background-color: lightblue; color: black">Blue</span> nodes are
|
||||
downloaded, <span style="background-color: yellow; color: black">yellow</span>
|
||||
nodes are built with the `stage0` compiler, and <span style="background-color:
|
||||
lightgreen; color: black">green</span> nodes are built with the `stage1`
|
||||
compiler.
|
||||
|
||||
[rustconf22-talk]: https://www.youtube.com/watch?v=oUIjG-y4zaA
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
s0c["stage0 compiler (1.63)"]:::downloaded -->|A| s0l("stage0 std (1.64)"):::with-s0c;
|
||||
s0c & s0l --- stepb[ ]:::empty;
|
||||
stepb -->|B| s0ca["stage0 compiler artifacts (1.64)"]:::with-s0c;
|
||||
s0ca -->|copy| s1c["stage1 compiler (1.64)"]:::with-s0c;
|
||||
s1c -->|C| s1l("stage1 std (1.64)"):::with-s1c;
|
||||
s1c & s1l --- stepd[ ]:::empty;
|
||||
stepd -->|D| s1ca["stage1 compiler artifacts (1.64)"]:::with-s1c;
|
||||
s1ca -->|copy| s2c["stage2 compiler"]:::with-s1c;
|
||||
|
||||
classDef empty width:0px,height:0px;
|
||||
classDef downloaded fill: lightblue;
|
||||
classDef with-s0c fill: yellow;
|
||||
classDef with-s1c fill: lightgreen;
|
||||
```
|
||||
|
||||
### Stage 0: the pre-compiled compiler
|
||||
|
||||
The stage0 compiler is usually the current _beta_ `rustc` compiler and its
|
||||
associated dynamic libraries, which `./x.py` will download for you. (You can
|
||||
also configure `./x.py` to use something else.)
|
||||
|
||||
The stage0 compiler is then used only to compile [`src/bootstrap`],
|
||||
[`library/std`], and [`compiler/rustc`]. When assembling the libraries and
|
||||
binaries that will become the stage1 `rustc` compiler, the freshly compiled
|
||||
`std` and `rustc` are used. There are two concepts at play here: a compiler
|
||||
(with its set of dependencies) and its 'target' or 'object' libraries (`std` and
|
||||
`rustc`). Both are staged, but in a staggered manner.
|
||||
|
||||
[`compiler/rustc`]: https://github.com/rust-lang/rust/tree/master/compiler/rustc
|
||||
[`library/std`]: https://github.com/rust-lang/rust/tree/master/library/std
|
||||
[`src/bootstrap`]: https://github.com/rust-lang/rust/tree/master/src/bootstrap
|
||||
|
||||
### Stage 1: from current code, by an earlier compiler
|
||||
|
||||
The rustc source code is then compiled with the `stage0` compiler to produce the
|
||||
`stage1` compiler.
|
||||
|
||||
### Stage 2: the truly current compiler
|
||||
|
||||
We then rebuild our `stage1` compiler with itself to produce the `stage2`
|
||||
compiler.
|
||||
|
||||
In theory, the `stage1` compiler is functionally identical to the `stage2`
|
||||
compiler, but in practice there are subtle differences. In particular, the
|
||||
`stage1` compiler itself was built by `stage0` and hence not by the source in
|
||||
your working directory. This means that the ABI generated by the `stage0`
|
||||
compiler may not match the ABI that would have been made by the `stage1`
|
||||
compiler, which can cause problems for dynamic libraries, tests, and tools using
|
||||
`rustc_private`.
|
||||
|
||||
Note that the `proc_macro` crate avoids this issue with a `C` FFI layer called
|
||||
`proc_macro::bridge`, allowing it to be used with `stage1`.
|
||||
|
||||
The `stage2` compiler is the one distributed with `rustup` and all other install
|
||||
methods. However, it takes a very long time to build because one must first
|
||||
build the new compiler with an older compiler and then use that to build the new
|
||||
compiler with itself. For development, you usually only want the `stage1`
|
||||
compiler, which you can build with `./x build library`. See [Building the
|
||||
compiler](../how-to-build-and-run.html#building-the-compiler).
|
||||
|
||||
### Stage 3: the same-result test
|
||||
|
||||
Stage 3 is optional. To sanity check our new compiler we can build the libraries
|
||||
with the `stage2` compiler. The result ought to be identical to before, unless
|
||||
something has broken.
|
||||
|
||||
### Building the stages
|
||||
|
||||
The script [`./x`] tries to be helpful and pick the stage you most likely meant
|
||||
for each subcommand. These defaults are as follows:
|
||||
|
||||
- `check`: `--stage 0`
|
||||
- `doc`: `--stage 0`
|
||||
- `build`: `--stage 1`
|
||||
- `test`: `--stage 1`
|
||||
- `dist`: `--stage 2`
|
||||
- `install`: `--stage 2`
|
||||
- `bench`: `--stage 2`
|
||||
|
||||
You can always override the stage by passing `--stage N` explicitly.
|
||||
|
||||
For more information about stages, [see
|
||||
below](#understanding-stages-of-bootstrap).
|
||||
|
||||
[`./x`]: https://github.com/rust-lang/rust/blob/master/x
|
||||
|
||||
## Complications of bootstrapping
|
||||
|
||||
Since the build system uses the current beta compiler to build a `stage1`
|
||||
bootstrapping compiler, the compiler source code can't use some features until
|
||||
they reach beta (because otherwise the beta compiler doesn't support them). On
|
||||
the other hand, for [compiler intrinsics][intrinsics] and internal features, the
|
||||
features _have_ to be used. Additionally, the compiler makes heavy use of
|
||||
`nightly` features (`#![feature(...)]`). How can we resolve this problem?
|
||||
|
||||
There are two methods used:
|
||||
|
||||
1. The build system sets `--cfg bootstrap` when building with `stage0`, so we
|
||||
can use `cfg(not(bootstrap))` to only use features when built with `stage1`.
|
||||
Setting `--cfg bootstrap` in this way is used for features that were just
|
||||
stabilized, which require `#![feature(...)]` when built with `stage0`, but
|
||||
not for `stage1`.
|
||||
2. The build system sets `RUSTC_BOOTSTRAP=1`. This special variable means to
|
||||
_break the stability guarantees_ of Rust: allowing use of `#![feature(...)]`
|
||||
with a compiler that's not `nightly`. _Setting `RUSTC_BOOTSTRAP=1` should
|
||||
never be used except when bootstrapping the compiler._
|
||||
|
||||
[boot]: https://en.wikipedia.org/wiki/Bootstrapping_(compilers)
|
||||
[intrinsics]: ../../appendix/glossary.md#intrinsic
|
||||
[ocaml-compiler]: https://github.com/rust-lang/rust/tree/ef75860a0a72f79f97216f8aaa5b388d98da6480/src/boot
|
||||
|
||||
## Understanding stages of bootstrap
|
||||
|
||||
### Overview
|
||||
|
||||
This is a detailed look into the separate bootstrap stages.
|
||||
|
||||
The convention `./x` uses is that:
|
||||
|
||||
- A `--stage N` flag means to run the stage N compiler (`stageN/rustc`).
|
||||
- A "stage N artifact" is a build artifact that is _produced_ by the stage N
|
||||
compiler.
|
||||
- The stage N+1 compiler is assembled from stage N *artifacts*. This process is
|
||||
called _uplifting_.
|
||||
|
||||
#### Build artifacts
|
||||
|
||||
Anything you can build with `./x` is a _build artifact_. Build artifacts
|
||||
include, but are not limited to:
|
||||
|
||||
- binaries, like `stage0-rustc/rustc-main`
|
||||
- shared objects, like `stage0-sysroot/rustlib/libstd-6fae108520cf72fe.so`
|
||||
- [rlib] files, like `stage0-sysroot/rustlib/libstd-6fae108520cf72fe.rlib`
|
||||
- HTML files generated by rustdoc, like `doc/std`
|
||||
|
||||
[rlib]: ../../serialization.md
|
||||
|
||||
#### Examples
|
||||
|
||||
- `./x test tests/ui` means to build the `stage1` compiler and run `compiletest`
|
||||
on it. If you're working on the compiler, this is normally the test command
|
||||
you want.
|
||||
- `./x test --stage 0 library/std` means to run tests on the standard library
|
||||
without building `rustc` from source ('build with `stage0`, then test the
|
||||
artifacts'). If you're working on the standard library, this is normally the
|
||||
test command you want.
|
||||
- `./x build --stage 0` means to build with the beta `rustc`.
|
||||
- `./x doc --stage 0` means to document using the beta `rustdoc`.
|
||||
|
||||
#### Examples of what *not* to do
|
||||
|
||||
- `./x test --stage 0 tests/ui` is not useful: it runs tests on the _beta_
|
||||
compiler and doesn't build `rustc` from source. Use `test tests/ui` instead,
|
||||
which builds `stage1` from source.
|
||||
- `./x test --stage 0 compiler/rustc` builds the compiler but runs no tests:
|
||||
it's running `cargo test -p rustc`, but `cargo` doesn't understand Rust's
|
||||
tests. You shouldn't need to use this, use `test` instead (without arguments).
|
||||
- `./x build --stage 0 compiler/rustc` builds the compiler, but does not build
|
||||
`libstd` or even `libcore`. Most of the time, you'll want `./x build library`
|
||||
instead, which allows compiling programs without needing to define lang items.
|
||||
|
||||
### Building vs. running
|
||||
|
||||
Note that `build --stage N compiler/rustc` **does not** build the stage N
|
||||
compiler: instead it builds the stage N+1 compiler _using_ the stage N compiler.
|
||||
|
||||
In short, _stage 0 uses the `stage0` compiler to create `stage0` artifacts which
|
||||
will later be uplifted to be the stage1 compiler_.
|
||||
|
||||
In each stage, two major steps are performed:
|
||||
|
||||
1. `std` is compiled by the stage N compiler.
|
||||
2. That `std` is linked to programs built by the stage N compiler, including the
|
||||
stage N artifacts (stage N+1 compiler).
|
||||
|
||||
This is somewhat intuitive if one thinks of the stage N artifacts as "just"
|
||||
another program we are building with the stage N compiler: `build --stage N
|
||||
compiler/rustc` is linking the stage N artifacts to the `std` built by the stage
|
||||
N compiler.
|
||||
|
||||
### Stages and `std`
|
||||
|
||||
Note that there are two `std` libraries in play here:
|
||||
|
||||
1. The library _linked_ to `stageN/rustc`, which was built by stage N-1 (stage
|
||||
N-1 `std`)
|
||||
2. The library _used to compile programs_ with `stageN/rustc`, which was built
|
||||
by stage N (stage N `std`).
|
||||
|
||||
Stage N `std` is pretty much necessary for any useful work with the stage N
|
||||
compiler. Without it, you can only compile programs with `#![no_core]` -- not
|
||||
terribly useful!
|
||||
|
||||
The reason these need to be different is because they aren't necessarily
|
||||
ABI-compatible: there could be new layout optimizations, changes to `MIR`, or
|
||||
other changes to Rust metadata on `nightly` that aren't present in beta.
|
||||
|
||||
This is also where `--keep-stage 1 library/std` comes into play. Since most
|
||||
changes to the compiler don't actually change the ABI, once you've produced a
|
||||
`std` in `stage1`, you can probably just reuse it with a different compiler. If
|
||||
the ABI hasn't changed, you're good to go, no need to spend time recompiling
|
||||
that `std`. The flag `--keep-stage` simply instructs the build script to assumes
|
||||
the previous compile is fine and copies those artifacts into the appropriate
|
||||
place, skipping the `cargo` invocation.
|
||||
|
||||
### Cross-compiling rustc
|
||||
|
||||
*Cross-compiling* is the process of compiling code that will run on another
|
||||
architecture. For instance, you might want to build an ARM version of rustc
|
||||
using an x86 machine. Building `stage2` `std` is different when you are
|
||||
cross-compiling.
|
||||
|
||||
This is because `./x` uses the following logic: if `HOST` and `TARGET` are the
|
||||
same, it will reuse `stage1` `std` for `stage2`! This is sound because `stage1`
|
||||
`std` was compiled with the `stage1` compiler, i.e. a compiler using the source
|
||||
code you currently have checked out. So it should be identical (and therefore
|
||||
ABI-compatible) to the `std` that `stage2/rustc` would compile.
|
||||
|
||||
However, when cross-compiling, `stage1` `std` will only run on the host. So the
|
||||
`stage2` compiler has to recompile `std` for the target.
|
||||
|
||||
(See in the table how `stage2` only builds non-host `std` targets).
|
||||
|
||||
### Why does only libstd use `cfg(bootstrap)`?
|
||||
|
||||
For docs on `cfg(bootstrap)` itself, see [Complications of
|
||||
Bootstrapping](#complications-of-bootstrapping).
|
||||
|
||||
The `rustc` generated by the `stage0` compiler is linked to the freshly-built
|
||||
`std`, which means that for the most part only `std` needs to be `cfg`-gated, so
|
||||
that `rustc` can use features added to `std` immediately after their addition,
|
||||
without need for them to get into the downloaded `beta` compiler.
|
||||
|
||||
Note this is different from any other Rust program: `stage1` `rustc` is built by
|
||||
the _beta_ compiler, but using the _master_ version of `libstd`!
|
||||
|
||||
The only time `rustc` uses `cfg(bootstrap)` is when it adds internal lints that
|
||||
use diagnostic items, or when it uses unstable library features that were
|
||||
recently changed.
|
||||
|
||||
### What is a 'sysroot'?
|
||||
|
||||
When you build a project with `cargo`, the build artifacts for dependencies are
|
||||
normally stored in `target/debug/deps`. This only contains dependencies `cargo`
|
||||
knows about; in particular, it doesn't have the standard library. Where do `std`
|
||||
or `proc_macro` come from? They come from the **sysroot**, the root of a number
|
||||
of directories where the compiler loads build artifacts at runtime. The
|
||||
`sysroot` doesn't just store the standard library, though - it includes anything
|
||||
that needs to be loaded at runtime. That includes (but is not limited to):
|
||||
|
||||
- Libraries `libstd`/`libtest`/`libproc_macro`.
|
||||
- Compiler crates themselves, when using `rustc_private`. In-tree these are
|
||||
always present; out of tree, you need to install `rustc-dev` with `rustup`.
|
||||
- Shared object file `libLLVM.so` for the LLVM project. In-tree this is either
|
||||
built from source or downloaded from CI; out-of-tree, you need to install
|
||||
`llvm-tools-preview` with `rustup`.
|
||||
|
||||
All the artifacts listed so far are *compiler* runtime dependencies. You can see
|
||||
them with `rustc --print sysroot`:
|
||||
|
||||
```
|
||||
$ ls $(rustc --print sysroot)/lib
|
||||
libchalk_derive-0685d79833dc9b2b.so libstd-25c6acf8063a3802.so
|
||||
libLLVM-11-rust-1.50.0-nightly.so libtest-57470d2aa8f7aa83.so
|
||||
librustc_driver-4f0cc9f50e53f0ba.so libtracing_attributes-e4be92c35ab2a33b.so
|
||||
librustc_macros-5f0ec4a119c6ac86.so rustlib
|
||||
```
|
||||
|
||||
There are also runtime dependencies for the standard library! These are in
|
||||
`lib/rustlib/`, not `lib/` directly.
|
||||
|
||||
```
|
||||
$ ls $(rustc --print sysroot)/lib/rustlib/x86_64-unknown-linux-gnu/lib | head -n 5
|
||||
libaddr2line-6c8e02b8fedc1e5f.rlib
|
||||
libadler-9ef2480568df55af.rlib
|
||||
liballoc-9c4002b5f79ba0e1.rlib
|
||||
libcfg_if-512eb53291f6de7e.rlib
|
||||
libcompiler_builtins-ef2408da76957905.rlib
|
||||
```
|
||||
|
||||
Directory `lib/rustlib/` includes libraries like `hashbrown` and `cfg_if`, which
|
||||
are not part of the public API of the standard library, but are used to
|
||||
implement it. Also `lib/rustlib/` is part of the search path for linkers, but
|
||||
`lib` will never be part of the search path.
|
||||
|
||||
#### `-Z force-unstable-if-unmarked`
|
||||
|
||||
Since `lib/rustlib/` is part of the search path we have to be careful about
|
||||
which crates are included in it. In particular, all crates except for the
|
||||
standard library are built with the flag `-Z force-unstable-if-unmarked`, which
|
||||
means that you have to use `#![feature(rustc_private)]` in order to load it (as
|
||||
opposed to the standard library, which is always available).
|
||||
|
||||
The `-Z force-unstable-if-unmarked` flag has a variety of purposes to help
|
||||
enforce that the correct crates are marked as `unstable`. It was introduced
|
||||
primarily to allow rustc and the standard library to link to arbitrary crates on
|
||||
crates.io which do not themselves use `staged_api`. `rustc` also relies on this
|
||||
flag to mark all of its crates as `unstable` with the `rustc_private` feature so
|
||||
that each crate does not need to be carefully marked with `unstable`.
|
||||
|
||||
This flag is automatically applied to all of `rustc` and the standard library by
|
||||
the bootstrap scripts. This is needed because the compiler and all of its
|
||||
dependencies are shipped in `sysroot` to all users.
|
||||
|
||||
This flag has the following effects:
|
||||
|
||||
- Marks the crate as "`unstable`" with the `rustc_private` feature if it is not
|
||||
itself marked as `stable` or `unstable`.
|
||||
- Allows these crates to access other forced-unstable crates without any need
|
||||
for attributes. Normally a crate would need a `#![feature(rustc_private)]`
|
||||
attribute to use other `unstable` crates. However, that would make it
|
||||
impossible for a crate from crates.io to access its own dependencies since
|
||||
that crate won't have a `feature(rustc_private)` attribute, but *everything*
|
||||
is compiled with `-Z force-unstable-if-unmarked`.
|
||||
|
||||
Code which does not use `-Z force-unstable-if-unmarked` should include the
|
||||
`#![feature(rustc_private)]` crate attribute to access these forced-unstable
|
||||
crates. This is needed for things which link `rustc` its self, such as `MIRI` or
|
||||
`clippy`.
|
||||
|
||||
You can find more discussion about sysroots in:
|
||||
- The [rustdoc PR] explaining why it uses `extern crate` for dependencies loaded
|
||||
from `sysroot`
|
||||
- [Discussions about sysroot on
|
||||
Zulip](https://rust-lang.zulipchat.com/#narrow/stream/182449-t-compiler.2Fhelp/topic/deps.20in.20sysroot/)
|
||||
- [Discussions about building rustdoc out of
|
||||
tree](https://rust-lang.zulipchat.com/#narrow/stream/182449-t-compiler.2Fhelp/topic/How.20to.20create.20an.20executable.20accessing.20.60rustc_private.60.3F)
|
||||
|
||||
[rustdoc PR]: https://github.com/rust-lang/rust/pull/76728
|
||||
|
||||
## Passing flags to commands invoked by `bootstrap`
|
||||
|
||||
Conveniently `./x` allows you to pass stage-specific flags to `rustc` and
|
||||
`cargo` when bootstrapping. The `RUSTFLAGS_BOOTSTRAP` environment variable is
|
||||
passed as `RUSTFLAGS` to the bootstrap stage (`stage0`), and
|
||||
`RUSTFLAGS_NOT_BOOTSTRAP` is passed when building artifacts for later stages.
|
||||
`RUSTFLAGS` will work, but also affects the build of `bootstrap` itself, so it
|
||||
will be rare to want to use it. Finally, `MAGIC_EXTRA_RUSTFLAGS` bypasses the
|
||||
`cargo` cache to pass flags to rustc without recompiling all dependencies.
|
||||
|
||||
- `RUSTDOCFLAGS`, `RUSTDOCFLAGS_BOOTSTRAP` and `RUSTDOCFLAGS_NOT_BOOTSTRAP` are
|
||||
analogous to `RUSTFLAGS`, but for `rustdoc`.
|
||||
- `CARGOFLAGS` will pass arguments to cargo itself (e.g. `--timings`).
|
||||
`CARGOFLAGS_BOOTSTRAP` and `CARGOFLAGS_NOT_BOOTSTRAP` work analogously to
|
||||
`RUSTFLAGS_BOOTSTRAP`.
|
||||
- `--test-args` will pass arguments through to the test runner. For `tests/ui`,
|
||||
this is `compiletest`. For unit tests and doc tests this is the `libtest`
|
||||
runner.
|
||||
|
||||
Most test runner accept `--help`, which you can use to find out the options
|
||||
accepted by the runner.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
During bootstrapping, there are a bunch of compiler-internal environment
|
||||
variables that are used. If you are trying to run an intermediate version of
|
||||
`rustc`, sometimes you may need to set some of these environment variables
|
||||
manually. Otherwise, you get an error like the following:
|
||||
|
||||
```text
|
||||
thread 'main' panicked at 'RUSTC_STAGE was not set: NotPresent', library/core/src/result.rs:1165:5
|
||||
```
|
||||
|
||||
If `./stageN/bin/rustc` gives an error about environment variables, that usually
|
||||
means something is quite wrong -- such as you're trying to compile `rustc` or
|
||||
`std` or something which depends on environment variables. In the unlikely case
|
||||
that you actually need to invoke `rustc` in such a situation, you can tell the
|
||||
bootstrap shim to print all `env` variables by adding `-vvv` to your `x`
|
||||
command.
|
||||
|
||||
Finally, bootstrap makes use of the [cc-rs crate] which has [its own
|
||||
method][env-vars] of configuring `C` compilers and `C` flags via environment
|
||||
variables.
|
||||
|
||||
[cc-rs crate]: https://github.com/rust-lang/cc-rs
|
||||
[env-vars]: https://docs.rs/cc/latest/cc/#external-configuration-via-environment-variables
|
||||
|
||||
## Clarification of build command's `stdout`
|
||||
|
||||
In this part, we will investigate the build command's `stdout` in an action
|
||||
(similar, but more detailed and complete documentation compare to topic above).
|
||||
When you execute `x build --dry-run` command, the build output will be something
|
||||
like the following:
|
||||
|
||||
```text
|
||||
Building stage0 library artifacts (x86_64-unknown-linux-gnu -> x86_64-unknown-linux-gnu)
|
||||
Copying stage0 library from stage0 (x86_64-unknown-linux-gnu -> x86_64-unknown-linux-gnu / x86_64-unknown-linux-gnu)
|
||||
Building stage0 compiler artifacts (x86_64-unknown-linux-gnu -> x86_64-unknown-linux-gnu)
|
||||
Copying stage0 rustc from stage0 (x86_64-unknown-linux-gnu -> x86_64-unknown-linux-gnu / x86_64-unknown-linux-gnu)
|
||||
Assembling stage1 compiler (x86_64-unknown-linux-gnu)
|
||||
Building stage1 library artifacts (x86_64-unknown-linux-gnu -> x86_64-unknown-linux-gnu)
|
||||
Copying stage1 library from stage1 (x86_64-unknown-linux-gnu -> x86_64-unknown-linux-gnu / x86_64-unknown-linux-gnu)
|
||||
Building stage1 tool rust-analyzer-proc-macro-srv (x86_64-unknown-linux-gnu)
|
||||
Building rustdoc for stage1 (x86_64-unknown-linux-gnu)
|
||||
```
|
||||
|
||||
### Building stage0 {std,compiler} artifacts
|
||||
|
||||
These steps use the provided (downloaded, usually) compiler to compile the local
|
||||
Rust source into libraries we can use.
|
||||
|
||||
### Copying stage0 \{std,rustc\}
|
||||
|
||||
This copies the library and compiler artifacts from `cargo` into
|
||||
`stage0-sysroot/lib/rustlib/{target-triple}/lib`
|
||||
|
||||
### Assembling stage1 compiler
|
||||
|
||||
This copies the libraries we built in "building `stage0` ... artifacts" into the
|
||||
`stage1` compiler's `lib/` directory. These are the host libraries that the
|
||||
compiler itself uses to run. These aren't actually used by artifacts the new
|
||||
compiler generates. This step also copies the `rustc` and `rustdoc` binaries we
|
||||
generated into `build/$HOST/stage/bin`.
|
||||
|
||||
The `stage1/bin/rustc` is a fully functional compiler, but it doesn't yet have
|
||||
any libraries to link built binaries or libraries to. The next 3 steps will
|
||||
provide those libraries for it; they are mostly equivalent to constructing the
|
||||
`stage1/bin` compiler so we don't go through them individually here.
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
# Build distribution artifacts
|
||||
|
||||
You might want to build and package up the compiler for distribution.
|
||||
You’ll want to run this command to do it:
|
||||
|
||||
```bash
|
||||
./x dist
|
||||
```
|
||||
|
||||
# Install from source
|
||||
|
||||
You might want to prefer installing Rust (and tools configured in your configuration)
|
||||
by building from source. If so, you want to run this command:
|
||||
|
||||
```bash
|
||||
./x install
|
||||
```
|
||||
|
||||
Note: If you are testing out a modification to a compiler, you might
|
||||
want to build the compiler (with `./x build`) then create a toolchain as
|
||||
discussed in [here][create-rustup-toolchain].
|
||||
|
||||
For example, if the toolchain you created is called "foo", you would then
|
||||
invoke it with `rustc +foo ...` (where ... represents the rest of the arguments).
|
||||
|
||||
Instead of installing Rust (and tools in your config file) globally, you can set `DESTDIR`
|
||||
environment variable to change the installation path. If you want to set installation paths
|
||||
more dynamically, you should prefer [install options] in your config file to achieve that.
|
||||
|
||||
[create-rustup-toolchain]: ./how-to-build-and-run.md#creating-a-rustup-toolchain
|
||||
[install options]: https://github.com/rust-lang/rust/blob/f7c8928f035370be33463bb7f1cd1aeca2c5f898/config.example.toml#L422-L442
|
||||
51
src/doc/rustc-dev-guide/src/building/compiler-documenting.md
Normal file
51
src/doc/rustc-dev-guide/src/building/compiler-documenting.md
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Building documentation
|
||||
|
||||
This chapter describes how to build documentation of toolchain components,
|
||||
like the standard library (std) or the compiler (rustc).
|
||||
|
||||
- Document everything
|
||||
|
||||
This uses `rustdoc` from the beta toolchain,
|
||||
so will produce (slightly) different output to stage 1 rustdoc,
|
||||
as rustdoc is under active development:
|
||||
|
||||
```bash
|
||||
./x doc
|
||||
```
|
||||
|
||||
If you want to be sure the documentation looks the same as on CI:
|
||||
|
||||
```bash
|
||||
./x doc --stage 1
|
||||
```
|
||||
|
||||
This ensures that (current) rustdoc gets built,
|
||||
then that is used to document the components.
|
||||
|
||||
- Much like running individual tests or building specific components,
|
||||
you can build just the documentation you want:
|
||||
|
||||
```bash
|
||||
./x doc src/doc/book
|
||||
./x doc src/doc/nomicon
|
||||
./x doc compiler library
|
||||
```
|
||||
|
||||
See [the nightly docs index page](https://doc.rust-lang.org/nightly/) for a full list of books.
|
||||
|
||||
- Document internal rustc items
|
||||
|
||||
Compiler documentation is not built by default.
|
||||
To create it by default with `x doc`, modify `config.toml`:
|
||||
|
||||
```toml
|
||||
[build]
|
||||
compiler-docs = true
|
||||
```
|
||||
|
||||
Note that when enabled,
|
||||
documentation for internal compiler items will also be built.
|
||||
|
||||
NOTE: The documentation for the compiler is found at [this link].
|
||||
|
||||
[this link]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/
|
||||
404
src/doc/rustc-dev-guide/src/building/how-to-build-and-run.md
Normal file
404
src/doc/rustc-dev-guide/src/building/how-to-build-and-run.md
Normal file
|
|
@ -0,0 +1,404 @@
|
|||
# How to build and run the compiler
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
The compiler is built using a tool called `x.py`. You will need to
|
||||
have Python installed to run it.
|
||||
|
||||
## Quick Start
|
||||
|
||||
For a less in-depth quick-start of getting the compiler running, see [quickstart](./quickstart.md).
|
||||
|
||||
|
||||
## Get the source code
|
||||
|
||||
The main repository is [`rust-lang/rust`][repo]. This contains the compiler,
|
||||
the standard library (including `core`, `alloc`, `test`, `proc_macro`, etc),
|
||||
and a bunch of tools (e.g. `rustdoc`, the bootstrapping infrastructure, etc).
|
||||
|
||||
[repo]: https://github.com/rust-lang/rust
|
||||
|
||||
The very first step to work on `rustc` is to clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/rust-lang/rust.git
|
||||
cd rust
|
||||
```
|
||||
|
||||
### Partial clone the repository
|
||||
|
||||
Due to the size of the repository, cloning on a slower internet connection can take a long time,
|
||||
and requires disk space to store the full history of every file and directory.
|
||||
Instead, it is possible to tell git to perform a _partial clone_, which will only fully retrieve
|
||||
the current file contents, but will automatically retrieve further file contents when you, e.g.,
|
||||
jump back in the history.
|
||||
All git commands will continue to work as usual, at the price of requiring an internet connection
|
||||
to visit not-yet-loaded points in history.
|
||||
|
||||
```bash
|
||||
git clone --filter='blob:none' https://github.com/rust-lang/rust.git
|
||||
cd rust
|
||||
```
|
||||
|
||||
> **NOTE**: [This link](https://github.blog/open-source/git/get-up-to-speed-with-partial-clone-and-shallow-clone/)
|
||||
> describes this type of checkout in more detail, and also compares it to other modes, such as
|
||||
> shallow cloning.
|
||||
|
||||
### Shallow clone the repository
|
||||
|
||||
An older alternative to partial clones is to use shallow clone the repository instead.
|
||||
To do so, you can use the `--depth N` option with the `git clone` command.
|
||||
This instructs `git` to perform a "shallow clone", cloning the repository but truncating it to
|
||||
the last `N` commits.
|
||||
|
||||
Passing `--depth 1` tells `git` to clone the repository but truncate the history to the latest
|
||||
commit that is on the `master` branch, which is usually fine for browsing the source code or
|
||||
building the compiler.
|
||||
|
||||
```bash
|
||||
git clone --depth 1 https://github.com/rust-lang/rust.git
|
||||
cd rust
|
||||
```
|
||||
|
||||
> **NOTE**: A shallow clone limits which `git` commands can be run.
|
||||
> If you intend to work on and contribute to the compiler, it is
|
||||
> generally recommended to fully clone the repository [as shown above](#get-the-source-code),
|
||||
> or to perform a [partial clone](#shallow-clone-the-repository) instead.
|
||||
>
|
||||
> For example, `git bisect` and `git blame` require access to the commit history,
|
||||
> so they don't work if the repository was cloned with `--depth 1`.
|
||||
|
||||
## What is `x.py`?
|
||||
|
||||
`x.py` is the build tool for the `rust` repository. It can build docs, run tests, and compile the
|
||||
compiler and standard library.
|
||||
|
||||
This chapter focuses on the basics to be productive, but
|
||||
if you want to learn more about `x.py`, [read this chapter][bootstrap].
|
||||
|
||||
[bootstrap]: ./bootstrapping/intro.md
|
||||
|
||||
Also, using `x` rather than `x.py` is recommended as:
|
||||
|
||||
> `./x` is the most likely to work on every system (on Unix it runs the shell script
|
||||
> that does python version detection, on Windows it will probably run the
|
||||
> powershell script - certainly less likely to break than `./x.py` which often just
|
||||
> opens the file in an editor).[^1]
|
||||
|
||||
(You can find the platform related scripts around the `x.py`, like `x.ps1`)
|
||||
|
||||
Notice that this is not absolute. For instance, using Nushell in VSCode on Win10,
|
||||
typing `x` or `./x` still opens `x.py` in an editor rather than invoking the program. :)
|
||||
|
||||
In the rest of this guide, we use `x` rather than `x.py` directly. The following
|
||||
command:
|
||||
|
||||
```bash
|
||||
./x check
|
||||
```
|
||||
|
||||
could be replaced by:
|
||||
|
||||
```bash
|
||||
./x.py check
|
||||
```
|
||||
|
||||
### Running `x.py`
|
||||
|
||||
The `x.py` command can be run directly on most Unix systems in the following format:
|
||||
|
||||
```sh
|
||||
./x <subcommand> [flags]
|
||||
```
|
||||
|
||||
This is how the documentation and examples assume you are running `x.py`.
|
||||
Some alternative ways are:
|
||||
|
||||
```sh
|
||||
# On a Unix shell if you don't have the necessary `python3` command
|
||||
./x <subcommand> [flags]
|
||||
|
||||
# In Windows Powershell (if powershell is configured to run scripts)
|
||||
./x <subcommand> [flags]
|
||||
./x.ps1 <subcommand> [flags]
|
||||
|
||||
# On the Windows Command Prompt (if .py files are configured to run Python)
|
||||
x.py <subcommand> [flags]
|
||||
|
||||
# You can also run Python yourself, e.g.:
|
||||
python x.py <subcommand> [flags]
|
||||
```
|
||||
|
||||
On Windows, the Powershell commands may give you an error that looks like this:
|
||||
```
|
||||
PS C:\Users\vboxuser\rust> ./x
|
||||
./x : File C:\Users\vboxuser\rust\x.ps1 cannot be loaded because running scripts is disabled on this system. For more
|
||||
information, see about_Execution_Policies at https:/go.microsoft.com/fwlink/?LinkID=135170.
|
||||
At line:1 char:1
|
||||
+ ./x
|
||||
+ ~~~
|
||||
+ CategoryInfo : SecurityError: (:) [], PSSecurityException
|
||||
+ FullyQualifiedErrorId : UnauthorizedAccess
|
||||
```
|
||||
|
||||
You can avoid this error by allowing powershell to run local scripts:
|
||||
```
|
||||
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
```
|
||||
|
||||
#### Running `x.py` slightly more conveniently
|
||||
|
||||
There is a binary that wraps `x.py` called `x` in `src/tools/x`. All it does is
|
||||
run `x.py`, but it can be installed system-wide and run from any subdirectory
|
||||
of a checkout. It also looks up the appropriate version of `python` to use.
|
||||
|
||||
You can install it with `cargo install --path src/tools/x`.
|
||||
|
||||
To clarify that this is another global installed binary util, which is
|
||||
similar to the one declared in section [What is `x.py`](#what-is-xpy), but
|
||||
it works as an independent process to execute the `x.py` rather than calling the
|
||||
shell to run the platform related scripts.
|
||||
|
||||
## Create a `config.toml`
|
||||
|
||||
To start, run `./x setup` and select the `compiler` defaults. This will do some initialization
|
||||
and create a `config.toml` for you with reasonable defaults. If you use a different default (which
|
||||
you'll likely want to do if you want to contribute to an area of rust other than the compiler, such
|
||||
as rustdoc), make sure to read information about that default (located in `src/bootstrap/defaults`)
|
||||
as the build process may be different for other defaults.
|
||||
|
||||
Alternatively, you can write `config.toml` by hand. See `config.example.toml` for all the available
|
||||
settings and explanations of them. See `src/bootstrap/defaults` for common settings to change.
|
||||
|
||||
If you have already built `rustc` and you change settings related to LLVM, then you may have to
|
||||
execute `rm -rf build` for subsequent configuration changes to take effect. Note that `./x
|
||||
clean` will not cause a rebuild of LLVM.
|
||||
|
||||
## Common `x` commands
|
||||
|
||||
Here are the basic invocations of the `x` commands most commonly used when
|
||||
working on `rustc`, `std`, `rustdoc`, and other tools.
|
||||
|
||||
| Command | When to use it |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------ |
|
||||
| `./x check` | Quick check to see if most things compile; [rust-analyzer can run this automatically for you][rust-analyzer] |
|
||||
| `./x build` | Builds `rustc`, `std`, and `rustdoc` |
|
||||
| `./x test` | Runs all tests |
|
||||
| `./x fmt` | Formats all code |
|
||||
|
||||
As written, these commands are reasonable starting points. However, there are
|
||||
additional options and arguments for each of them that are worth learning for
|
||||
serious development work. In particular, `./x build` and `./x test`
|
||||
provide many ways to compile or test a subset of the code, which can save a lot
|
||||
of time.
|
||||
|
||||
Also, note that `x` supports all kinds of path suffixes for `compiler`, `library`,
|
||||
and `src/tools` directories. So, you can simply run `x test tidy` instead of
|
||||
`x test src/tools/tidy`. Or, `x build std` instead of `x build library/std`.
|
||||
|
||||
[rust-analyzer]: suggested.html#configuring-rust-analyzer-for-rustc
|
||||
|
||||
See the chapters on
|
||||
[testing](../tests/running.md) and [rustdoc](../rustdoc.md) for more details.
|
||||
|
||||
### Building the compiler
|
||||
|
||||
Note that building will require a relatively large amount of storage space.
|
||||
You may want to have upwards of 10 or 15 gigabytes available to build the compiler.
|
||||
|
||||
Once you've created a `config.toml`, you are now ready to run
|
||||
`x`. There are a lot of options here, but let's start with what is
|
||||
probably the best "go to" command for building a local compiler:
|
||||
|
||||
```bash
|
||||
./x build library
|
||||
```
|
||||
|
||||
This may *look* like it only builds the standard library, but that is not the case.
|
||||
What this command does is the following:
|
||||
|
||||
- Build `std` using the stage0 compiler
|
||||
- Build `rustc` using the stage0 compiler
|
||||
- This produces the stage1 compiler
|
||||
- Build `std` using the stage1 compiler
|
||||
|
||||
This final product (stage1 compiler + libs built using that compiler)
|
||||
is what you need to build other Rust programs (unless you use `#![no_std]` or
|
||||
`#![no_core]`).
|
||||
|
||||
You will probably find that building the stage1 `std` is a bottleneck for you,
|
||||
but fear not, there is a (hacky) workaround...
|
||||
see [the section on avoiding rebuilds for std][keep-stage].
|
||||
|
||||
[keep-stage]: ./suggested.md#faster-builds-with---keep-stage
|
||||
|
||||
Sometimes you don't need a full build. When doing some kind of
|
||||
"type-based refactoring", like renaming a method, or changing the
|
||||
signature of some function, you can use `./x check` instead for a much faster build.
|
||||
|
||||
Note that this whole command just gives you a subset of the full `rustc`
|
||||
build. The **full** `rustc` build (what you get with `./x build
|
||||
--stage 2 compiler/rustc`) has quite a few more steps:
|
||||
|
||||
- Build `rustc` with the stage1 compiler.
|
||||
- The resulting compiler here is called the "stage2" compiler.
|
||||
- Build `std` with stage2 compiler.
|
||||
- Build `librustdoc` and a bunch of other things with the stage2 compiler.
|
||||
|
||||
You almost never need to do this.
|
||||
|
||||
### Build specific components
|
||||
|
||||
If you are working on the standard library, you probably don't need to build
|
||||
the compiler unless you are planning to use a recently added nightly feature.
|
||||
Instead, you can just build using the bootstrap compiler.
|
||||
|
||||
```bash
|
||||
./x build --stage 0 library
|
||||
```
|
||||
|
||||
If you choose the `library` profile when running `x setup`, you can omit `--stage 0` (it's the
|
||||
default).
|
||||
|
||||
## Creating a rustup toolchain
|
||||
|
||||
Once you have successfully built `rustc`, you will have created a bunch
|
||||
of files in your `build` directory. In order to actually run the
|
||||
resulting `rustc`, we recommend creating rustup toolchains. The first
|
||||
one will run the stage1 compiler (which we built above). The second
|
||||
will execute the stage2 compiler (which we did not build, but which
|
||||
you will likely need to build at some point; for example, if you want
|
||||
to run the entire test suite).
|
||||
|
||||
```bash
|
||||
rustup toolchain link stage0 build/host/stage0-sysroot # beta compiler + stage0 std
|
||||
rustup toolchain link stage1 build/host/stage1
|
||||
rustup toolchain link stage2 build/host/stage2
|
||||
```
|
||||
|
||||
Now you can run the `rustc` you built with. If you run with `-vV`, you
|
||||
should see a version number ending in `-dev`, indicating a build from
|
||||
your local environment:
|
||||
|
||||
```bash
|
||||
$ rustc +stage1 -vV
|
||||
rustc 1.48.0-dev
|
||||
binary: rustc
|
||||
commit-hash: unknown
|
||||
commit-date: unknown
|
||||
host: x86_64-unknown-linux-gnu
|
||||
release: 1.48.0-dev
|
||||
LLVM version: 11.0
|
||||
```
|
||||
|
||||
The rustup toolchain points to the specified toolchain compiled in your `build` directory,
|
||||
so the rustup toolchain will be updated whenever `x build` or `x test` are run for
|
||||
that toolchain/stage.
|
||||
|
||||
**Note:** the toolchain we've built does not include `cargo`. In this case, `rustup` will
|
||||
fall back to using `cargo` from the installed `nightly`, `beta`, or `stable` toolchain
|
||||
(in that order). If you need to use unstable `cargo` flags, be sure to run
|
||||
`rustup install nightly` if you haven't already. See the
|
||||
[rustup documentation on custom toolchains](https://rust-lang.github.io/rustup/concepts/toolchains.html#custom-toolchains).
|
||||
|
||||
**Note:** rust-analyzer and IntelliJ Rust plugin use a component called
|
||||
`rust-analyzer-proc-macro-srv` to work with proc macros. If you intend to use a
|
||||
custom toolchain for a project (e.g. via `rustup override set stage1`) you may
|
||||
want to build this component:
|
||||
|
||||
```bash
|
||||
./x build proc-macro-srv-cli
|
||||
```
|
||||
|
||||
## Building targets for cross-compilation
|
||||
|
||||
To produce a compiler that can cross-compile for other targets,
|
||||
pass any number of `target` flags to `x build`.
|
||||
For example, if your host platform is `x86_64-unknown-linux-gnu`
|
||||
and your cross-compilation target is `wasm32-wasip1`, you can build with:
|
||||
|
||||
```bash
|
||||
./x build --target x86_64-unknown-linux-gnu,wasm32-wasip1
|
||||
```
|
||||
|
||||
Note that if you want the resulting compiler to be able to build crates that
|
||||
involve proc macros or build scripts, you must be sure to explicitly build target support for the
|
||||
host platform (in this case, `x86_64-unknown-linux-gnu`).
|
||||
|
||||
If you want to always build for other targets without needing to pass flags to `x build`,
|
||||
you can configure this in the `[build]` section of your `config.toml` like so:
|
||||
|
||||
```toml
|
||||
[build]
|
||||
target = ["x86_64-unknown-linux-gnu", "wasm32-wasip1"]
|
||||
```
|
||||
|
||||
Note that building for some targets requires having external dependencies installed
|
||||
(e.g. building musl targets requires a local copy of musl).
|
||||
Any target-specific configuration (e.g. the path to a local copy of musl)
|
||||
will need to be provided by your `config.toml`.
|
||||
Please see `config.example.toml` for information on target-specific configuration keys.
|
||||
|
||||
For examples of the complete configuration necessary to build a target, please visit
|
||||
[the rustc book](https://doc.rust-lang.org/rustc/platform-support.html),
|
||||
select any target under the "Platform Support" heading on the left,
|
||||
and see the section related to building a compiler for that target.
|
||||
For targets without a corresponding page in the rustc book,
|
||||
it may be useful to [inspect the Dockerfiles](../tests/docker.md)
|
||||
that the Rust infrastructure itself uses to set up and configure cross-compilation.
|
||||
|
||||
If you have followed the directions from the prior section on creating a rustup toolchain,
|
||||
then once you have built your compiler you will be able to use it to cross-compile like so:
|
||||
|
||||
```bash
|
||||
cargo +stage1 build --target wasm32-wasip1
|
||||
```
|
||||
|
||||
## Other `x` commands
|
||||
|
||||
Here are a few other useful `x` commands. We'll cover some of them in detail
|
||||
in other sections:
|
||||
|
||||
- Building things:
|
||||
- `./x build` – builds everything using the stage 1 compiler,
|
||||
not just up to `std`
|
||||
- `./x build --stage 2` – builds everything with the stage 2 compiler including
|
||||
`rustdoc`
|
||||
- Running tests (see the [section on running tests](../tests/running.html) for
|
||||
more details):
|
||||
- `./x test library/std` – runs the unit tests and integration tests from `std`
|
||||
- `./x test tests/ui` – runs the `ui` test suite
|
||||
- `./x test tests/ui/const-generics` - runs all the tests in
|
||||
the `const-generics/` subdirectory of the `ui` test suite
|
||||
- `./x test tests/ui/const-generics/const-types.rs` - runs
|
||||
the single test `const-types.rs` from the `ui` test suite
|
||||
|
||||
### Cleaning out build directories
|
||||
|
||||
Sometimes you need to start fresh, but this is normally not the case.
|
||||
If you need to run this then bootstrap is most likely not acting right and
|
||||
you should file a bug as to what is going wrong. If you do need to clean
|
||||
everything up then you only need to run one command!
|
||||
|
||||
```bash
|
||||
./x clean
|
||||
```
|
||||
|
||||
`rm -rf build` works too, but then you have to rebuild LLVM, which can take
|
||||
a long time even on fast computers.
|
||||
|
||||
## Remarks on disk space
|
||||
|
||||
Building the compiler (especially if beyond stage 1) can require significant amounts of free disk
|
||||
space, possibly around 100GB. This is compounded if you have a separate build directory for
|
||||
rust-analyzer (e.g. `build-rust-analyzer`). This is easy to hit with dev-desktops which have a [set
|
||||
disk
|
||||
quota](https://github.com/rust-lang/simpleinfra/blob/8a59e4faeb75a09b072671c74a7cb70160ebef50/ansible/roles/dev-desktop/defaults/main.yml#L7)
|
||||
for each user, but this also applies to local development as well. Occasionally, you may need to:
|
||||
|
||||
- Remove `build/` directory.
|
||||
- Remove `build-rust-analyzer/` directory (if you have a separate rust-analyzer build directory).
|
||||
- Uninstall unnecessary toolchains if you use `cargo-bisect-rustc`. You can check which toolchains
|
||||
are installed with `rustup toolchain list`.
|
||||
|
||||
[^1]: issue[#1707](https://github.com/rust-lang/rustc-dev-guide/issues/1707)
|
||||
179
src/doc/rustc-dev-guide/src/building/new-target.md
Normal file
179
src/doc/rustc-dev-guide/src/building/new-target.md
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
# Adding a new target
|
||||
|
||||
These are a set of steps to add support for a new target. There are
|
||||
numerous end states and paths to get there, so not all sections may be
|
||||
relevant to your desired goal.
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
## Specifying a new LLVM
|
||||
|
||||
For very new targets, you may need to use a different fork of LLVM
|
||||
than what is currently shipped with Rust. In that case, navigate to
|
||||
the `src/llvm-project` git submodule (you might need to run `./x
|
||||
check` at least once so the submodule is updated), check out the
|
||||
appropriate commit for your fork, then commit that new submodule
|
||||
reference in the main Rust repository.
|
||||
|
||||
An example would be:
|
||||
|
||||
```
|
||||
cd src/llvm-project
|
||||
git remote add my-target-llvm some-llvm-repository
|
||||
git checkout my-target-llvm/my-branch
|
||||
cd ..
|
||||
git add llvm-project
|
||||
git commit -m 'Use my custom LLVM'
|
||||
```
|
||||
|
||||
### Using pre-built LLVM
|
||||
|
||||
If you have a local LLVM checkout that is already built, you may be
|
||||
able to configure Rust to treat your build as the system LLVM to avoid
|
||||
redundant builds.
|
||||
|
||||
You can tell Rust to use a pre-built version of LLVM using the `target` section
|
||||
of `config.toml`:
|
||||
|
||||
```toml
|
||||
[target.x86_64-unknown-linux-gnu]
|
||||
llvm-config = "/path/to/llvm/llvm-7.0.1/bin/llvm-config"
|
||||
```
|
||||
|
||||
If you are attempting to use a system LLVM, we have observed the following paths
|
||||
before, though they may be different from your system:
|
||||
|
||||
- `/usr/bin/llvm-config-8`
|
||||
- `/usr/lib/llvm-8/bin/llvm-config`
|
||||
|
||||
Note that you need to have the LLVM `FileCheck` tool installed, which is used
|
||||
for codegen tests. This tool is normally built with LLVM, but if you use your
|
||||
own preinstalled LLVM, you will need to provide `FileCheck` in some other way.
|
||||
On Debian-based systems, you can install the `llvm-N-tools` package (where `N`
|
||||
is the LLVM version number, e.g. `llvm-8-tools`). Alternately, you can specify
|
||||
the path to `FileCheck` with the `llvm-filecheck` config item in `config.toml`
|
||||
or you can disable codegen test with the `codegen-tests` item in `config.toml`.
|
||||
|
||||
## Creating a target specification
|
||||
|
||||
You should start with a target JSON file. You can see the specification
|
||||
for an existing target using `--print target-spec-json`:
|
||||
|
||||
```
|
||||
rustc -Z unstable-options --target=wasm32-unknown-unknown --print target-spec-json
|
||||
```
|
||||
|
||||
Save that JSON to a file and modify it as appropriate for your target.
|
||||
|
||||
### Adding a target specification
|
||||
|
||||
Once you have filled out a JSON specification and been able to compile
|
||||
somewhat successfully, you can copy the specification into the
|
||||
compiler itself.
|
||||
|
||||
You will need to add a line to the big table inside of the
|
||||
`supported_targets` macro in the `rustc_target::spec` module. You
|
||||
will then add a corresponding file for your new target containing a
|
||||
`target` function.
|
||||
|
||||
Look for existing targets to use as examples.
|
||||
|
||||
After adding your target to the `rustc_target` crate you may want to add
|
||||
`core`, `std`, ... with support for your new target. In that case you will
|
||||
probably need access to some `target_*` cfg. Unfortunately when building with
|
||||
stage0 (the beta compiler), you'll get an error that the target cfg is
|
||||
unexpected because stage0 doesn't know about the new target specification and
|
||||
we pass `--check-cfg` in order to tell it to check.
|
||||
|
||||
To fix the errors you will need to manually add the unexpected value to the
|
||||
different `Cargo.toml` in `library/{std,alloc,core}/Cargo.toml`. Here is an
|
||||
example for adding `NEW_TARGET_ARCH` as `target_arch`:
|
||||
|
||||
*`library/std/Cargo.toml`*:
|
||||
```diff
|
||||
[lints.rust.unexpected_cfgs]
|
||||
level = "warn"
|
||||
check-cfg = [
|
||||
'cfg(bootstrap)',
|
||||
- 'cfg(target_arch, values("xtensa"))',
|
||||
+ # #[cfg(bootstrap)] NEW_TARGET_ARCH
|
||||
+ 'cfg(target_arch, values("xtensa", "NEW_TARGET_ARCH"))',
|
||||
```
|
||||
|
||||
To use this target in bootstrap, we need to explicitly add the target triple to the `STAGE0_MISSING_TARGETS`
|
||||
list in `src/bootstrap/src/core/sanity.rs`. This is necessary because the default compiler bootstrap uses does
|
||||
not recognize the new target we just added. Therefore, it should be added to `STAGE0_MISSING_TARGETS` so that the
|
||||
bootstrap is aware that this target is not yet supported by the stage0 compiler.
|
||||
|
||||
```diff
|
||||
const STAGE0_MISSING_TARGETS: &[&str] = &[
|
||||
+ "NEW_TARGET_TRIPLE"
|
||||
];
|
||||
```
|
||||
|
||||
## Patching crates
|
||||
|
||||
You may need to make changes to crates that the compiler depends on,
|
||||
such as [`libc`][] or [`cc`][]. If so, you can use Cargo's
|
||||
[`[patch]`][patch] ability. For example, if you want to use an
|
||||
unreleased version of `libc`, you can add it to the top-level
|
||||
`Cargo.toml` file:
|
||||
|
||||
```diff
|
||||
diff --git a/Cargo.toml b/Cargo.toml
|
||||
index 1e83f05e0ca..4d0172071c1 100644
|
||||
--- a/Cargo.toml
|
||||
+++ b/Cargo.toml
|
||||
@@ -113,6 +113,8 @@ cargo-util = { path = "src/tools/cargo/crates/cargo-util" }
|
||||
[patch.crates-io]
|
||||
+libc = { git = "https://github.com/rust-lang/libc", rev = "0bf7ce340699dcbacabdf5f16a242d2219a49ee0" }
|
||||
|
||||
# See comments in `src/tools/rustc-workspace-hack/README.md` for what's going on
|
||||
# here
|
||||
rustc-workspace-hack = { path = 'src/tools/rustc-workspace-hack' }
|
||||
```
|
||||
|
||||
After this, run `cargo update -p libc` to update the lockfiles.
|
||||
|
||||
Beware that if you patch to a local `path` dependency, this will enable
|
||||
warnings for that dependency. Some dependencies are not warning-free, and due
|
||||
to the `deny-warnings` setting in `config.toml`, the build may suddenly start
|
||||
to fail.
|
||||
To work around warnings, you may want to:
|
||||
- Modify the dependency to remove the warnings
|
||||
- Or for local development purposes, suppress the warnings by setting deny-warnings = false in config.toml.
|
||||
|
||||
```toml
|
||||
# config.toml
|
||||
[rust]
|
||||
deny-warnings = false
|
||||
```
|
||||
|
||||
[`libc`]: https://crates.io/crates/libc
|
||||
[`cc`]: https://crates.io/crates/cc
|
||||
[patch]: https://doc.rust-lang.org/stable/cargo/reference/overriding-dependencies.html#the-patch-section
|
||||
|
||||
## Cross-compiling
|
||||
|
||||
Once you have a target specification in JSON and in the code, you can
|
||||
cross-compile `rustc`:
|
||||
|
||||
```
|
||||
DESTDIR=/path/to/install/in \
|
||||
./x install -i --stage 1 --host aarch64-apple-darwin.json --target aarch64-apple-darwin \
|
||||
compiler/rustc library/std
|
||||
```
|
||||
|
||||
If your target specification is already available in the bootstrap
|
||||
compiler, you can use it instead of the JSON file for both arguments.
|
||||
|
||||
## Promoting a target from tier 2 (target) to tier 2 (host)
|
||||
|
||||
There are two levels of tier 2 targets:
|
||||
a) Targets that are only cross-compiled (`rustup target add`)
|
||||
b) Targets that [have a native toolchain][tier2-native] (`rustup toolchain install`)
|
||||
|
||||
[tier2-native]: https://doc.rust-lang.org/nightly/rustc/target-tier-policy.html#tier-2-with-host-tools
|
||||
|
||||
For an example of promoting a target from cross-compiled to native,
|
||||
see [#75914](https://github.com/rust-lang/rust/pull/75914).
|
||||
129
src/doc/rustc-dev-guide/src/building/optimized-build.md
Normal file
129
src/doc/rustc-dev-guide/src/building/optimized-build.md
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
# Optimized build of the compiler
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
There are multiple additional build configuration options and techniques that can be used to compile a
|
||||
build of `rustc` that is as optimized as possible (for example when building `rustc` for a Linux
|
||||
distribution). The status of these configuration options for various Rust targets is tracked [here].
|
||||
This page describes how you can use these approaches when building `rustc` yourself.
|
||||
|
||||
[here]: https://github.com/rust-lang/rust/issues/103595
|
||||
|
||||
## Link-time optimization
|
||||
|
||||
Link-time optimization is a powerful compiler technique that can increase program performance. To
|
||||
enable (Thin-)LTO when building `rustc`, set the `rust.lto` config option to `"thin"`
|
||||
in `config.toml`:
|
||||
|
||||
```toml
|
||||
[rust]
|
||||
lto = "thin"
|
||||
```
|
||||
|
||||
> Note that LTO for `rustc` is currently supported and tested only for
|
||||
> the `x86_64-unknown-linux-gnu` target. Other targets *may* work, but no guarantees are provided.
|
||||
> Notably, LTO-optimized `rustc` currently produces [miscompilations] on Windows.
|
||||
|
||||
[miscompilations]: https://github.com/rust-lang/rust/issues/109114
|
||||
|
||||
Enabling LTO on Linux has [produced] speed-ups by up to 10%.
|
||||
|
||||
[produced]: https://github.com/rust-lang/rust/pull/101403#issuecomment-1288190019
|
||||
|
||||
## Memory allocator
|
||||
|
||||
Using a different memory allocator for `rustc` can provide significant performance benefits. If you
|
||||
want to enable the `jemalloc` allocator, you can set the `rust.jemalloc` option to `true`
|
||||
in `config.toml`:
|
||||
|
||||
```toml
|
||||
[rust]
|
||||
jemalloc = true
|
||||
```
|
||||
|
||||
> Note that this option is currently only supported for Linux and macOS targets.
|
||||
|
||||
## Codegen units
|
||||
|
||||
Reducing the amount of codegen units per `rustc` crate can produce a faster build of the compiler.
|
||||
You can modify the number of codegen units for `rustc` and `libstd` in `config.toml` with the
|
||||
following options:
|
||||
|
||||
```toml
|
||||
[rust]
|
||||
codegen-units = 1
|
||||
codegen-units-std = 1
|
||||
```
|
||||
|
||||
## Instruction set
|
||||
|
||||
By default, `rustc` is compiled for a generic (and conservative) instruction set architecture
|
||||
(depending on the selected target), to make it support as many CPUs as possible. If you want to
|
||||
compile `rustc` for a specific instruction set architecture, you can set the `target_cpu` compiler
|
||||
option in `RUSTFLAGS`:
|
||||
|
||||
```bash
|
||||
RUSTFLAGS="-C target_cpu=x86-64-v3" ./x build ...
|
||||
```
|
||||
|
||||
If you also want to compile LLVM for a specific instruction set, you can set `llvm` flags
|
||||
in `config.toml`:
|
||||
|
||||
```toml
|
||||
[llvm]
|
||||
cxxflags = "-march=x86-64-v3"
|
||||
cflags = "-march=x86-64-v3"
|
||||
```
|
||||
|
||||
## Profile-guided optimization
|
||||
|
||||
Applying profile-guided optimizations (or more generally, feedback-directed optimizations) can
|
||||
produce a large increase to `rustc` performance, by up to 15% ([1], [2]). However, these techniques
|
||||
are not simply enabled by a configuration option, but rather they require a complex build workflow
|
||||
that compiles `rustc` multiple times and profiles it on selected benchmarks.
|
||||
|
||||
There is a tool called `opt-dist` that is used to optimize `rustc` with [PGO] (profile-guided
|
||||
optimizations) and [BOLT] (a post-link binary optimizer) for builds distributed to end users. You
|
||||
can examine the tool, which is located in `src/tools/opt-dist`, and build a custom PGO build
|
||||
workflow based on it, or try to use it directly. Note that the tool is currently quite hardcoded to
|
||||
the way we use it in Rust's continuous integration workflows, and it might require some custom
|
||||
changes to make it work in a different environment.
|
||||
|
||||
[1]: https://blog.rust-lang.org/inside-rust/2020/11/11/exploring-pgo-for-the-rust-compiler.html#final-numbers-and-a-benchmarking-plot-twist
|
||||
[2]: https://github.com/rust-lang/rust/pull/96978
|
||||
|
||||
[PGO]: https://doc.rust-lang.org/rustc/profile-guided-optimization.html
|
||||
|
||||
[BOLT]: https://github.com/llvm/llvm-project/blob/main/bolt/README.md
|
||||
|
||||
To use the tool, you will need to provide some external dependencies:
|
||||
|
||||
- A Python3 interpreter (for executing `x.py`).
|
||||
- Compiled LLVM toolchain, with the `llvm-profdata` binary. Optionally, if you want to use BOLT,
|
||||
the `llvm-bolt` and
|
||||
`merge-fdata` binaries have to be available in the toolchain.
|
||||
|
||||
These dependencies are provided to `opt-dist` by an implementation of the [`Environment`] struct.
|
||||
It specifies directories where will the PGO/BOLT pipeline take place, and also external dependencies
|
||||
like Python or LLVM.
|
||||
|
||||
Here is an example of how can `opt-dist` be used locally (outside of CI):
|
||||
|
||||
1. Build the tool with the following command:
|
||||
```bash
|
||||
./x build tools/opt-dist
|
||||
```
|
||||
2. Run the tool with the `local` mode and provide necessary parameters:
|
||||
```bash
|
||||
./build/host/stage0-tools-bin/opt-dist local \
|
||||
--target-triple <target> \ # select target, e.g. "x86_64-unknown-linux-gnu"
|
||||
--checkout-dir <path> \ # path to rust checkout, e.g. "."
|
||||
--llvm-dir <path> \ # path to built LLVM toolchain, e.g. "/foo/bar/llvm/install"
|
||||
-- python3 x.py dist # pass the actual build command
|
||||
```
|
||||
You can run `--help` to see further parameters that you can modify.
|
||||
|
||||
[`Environment`]: https://github.com/rust-lang/rust/blob/ee451f8faccf3050c76cdcd82543c917b40c7962/src/tools/opt-dist/src/environment.rs#L5
|
||||
|
||||
> Note: if you want to run the actual CI pipeline, instead of running `opt-dist` locally,
|
||||
> you can execute `DEPLOY=1 src/ci/docker/run.sh dist-x86_64-linux`.
|
||||
41
src/doc/rustc-dev-guide/src/building/prerequisites.md
Normal file
41
src/doc/rustc-dev-guide/src/building/prerequisites.md
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# Prerequisites
|
||||
|
||||
## Dependencies
|
||||
|
||||
See [the `rust-lang/rust` INSTALL](https://github.com/rust-lang/rust/blob/master/INSTALL.md#dependencies).
|
||||
|
||||
## Hardware
|
||||
|
||||
You will need an internet connection to build. The bootstrapping process
|
||||
involves updating git submodules and downloading a beta compiler. It doesn't
|
||||
need to be super fast, but that can help.
|
||||
|
||||
There are no strict hardware requirements, but building the compiler is
|
||||
computationally expensive, so a beefier machine will help, and I wouldn't
|
||||
recommend trying to build on a Raspberry Pi! We recommend the following.
|
||||
* 30GB+ of free disk space. Otherwise, you will have to keep
|
||||
clearing incremental caches. More space is better, the compiler is a bit of a
|
||||
hog; it's a problem we are aware of.
|
||||
* 8GB+ RAM
|
||||
* 2+ cores. Having more cores really helps. 10 or 20 or more is not too many!
|
||||
|
||||
Beefier machines will lead to much faster builds. If your machine is not very
|
||||
powerful, a common strategy is to only use `./x check` on your local machine
|
||||
and let the CI build test your changes when you push to a PR branch.
|
||||
|
||||
Building the compiler takes more than half an hour on my moderately powerful
|
||||
laptop. We suggest downloading LLVM from CI so you don't have to build it from source
|
||||
([see here][config]).
|
||||
|
||||
Like `cargo`, the build system will use as many cores as possible. Sometimes
|
||||
this can cause you to run low on memory. You can use `-j` to adjust the number
|
||||
of concurrent jobs. If a full build takes more than ~45 minutes to an hour, you
|
||||
are probably spending most of the time swapping memory in and out; try using
|
||||
`-j1`.
|
||||
|
||||
If you don't have too much free disk space, you may want to turn off
|
||||
incremental compilation ([see here][config]). This will make compilation take
|
||||
longer (especially after a rebase), but will save a ton of space from the
|
||||
incremental caches.
|
||||
|
||||
[config]: ./how-to-build-and-run.md#create-a-configtoml
|
||||
74
src/doc/rustc-dev-guide/src/building/quickstart.md
Normal file
74
src/doc/rustc-dev-guide/src/building/quickstart.md
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# Quickstart
|
||||
|
||||
This is a quickstart guide about getting the compiler running. For more
|
||||
information on the individual steps, see the other pages in this chapter.
|
||||
|
||||
First, clone the repository:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/rust-lang/rust.git
|
||||
cd rust
|
||||
```
|
||||
|
||||
When building the compiler, we don't use `cargo` directly, instead we use a
|
||||
wrapper called "x". It is invoked with `./x`.
|
||||
|
||||
We need to create a configuration for the build. Use `./x setup` to create a
|
||||
good default.
|
||||
|
||||
```sh
|
||||
./x setup
|
||||
```
|
||||
|
||||
Then, we can build the compiler. Use `./x build` to build the compiler, standard
|
||||
library and a few tools. You can also `./x check` to just check it. All these
|
||||
commands can take specific components/paths as arguments, for example `./x check
|
||||
compiler` to just check the compiler.
|
||||
|
||||
```sh
|
||||
./x build
|
||||
```
|
||||
|
||||
> When doing a change to the compiler that does not affect the way it compiles
|
||||
the standard library (so for example, a change to an error message), use
|
||||
`--keep-stage-std 1` to avoid recompiling it.
|
||||
|
||||
After building the compiler and standard library, you now have a working
|
||||
compiler toolchain. You can use it with rustup by linking it.
|
||||
|
||||
```sh
|
||||
rustup toolchain link stage1 build/host/stage1
|
||||
```
|
||||
|
||||
Now you have a toolchain called `stage1` linked to your build. You can use it to
|
||||
test the compiler.
|
||||
|
||||
```sh
|
||||
rustc +stage1 testfile.rs
|
||||
```
|
||||
|
||||
After doing a change, you can run the compiler test suite with `./x test`.
|
||||
|
||||
`./x test` runs the full test suite, which is slow and rarely what you want.
|
||||
Usually, `./x test tests/ui` is what you want after a compiler change, testing
|
||||
all [UI tests](../tests/ui.md) that invoke the compiler on a specific test file
|
||||
and check the output.
|
||||
|
||||
```sh
|
||||
./x test tests/ui
|
||||
```
|
||||
|
||||
Use `--bless` if you've made a change and want to update the `.stderr` files
|
||||
with the new output.
|
||||
|
||||
> `./x suggest` can also be helpful for suggesting which tests to run after a
|
||||
> change.
|
||||
|
||||
Congrats, you are now ready to make a change to the compiler! If you have more
|
||||
questions, [the full chapter](./how-to-build-and-run.md) might contain the
|
||||
answers, and if it doesn't, feel free to ask for help on
|
||||
[Zulip](https://rust-lang.zulipchat.com/#narrow/stream/182449-t-compiler.2Fhelp).
|
||||
|
||||
If you use VSCode, Vim, Emacs or Helix, `./x setup` will ask you if you want to
|
||||
set up the editor config. For more information, check out [suggested
|
||||
workflows](./suggested.md).
|
||||
391
src/doc/rustc-dev-guide/src/building/suggested.md
Normal file
391
src/doc/rustc-dev-guide/src/building/suggested.md
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
# Suggested Workflows
|
||||
|
||||
The full bootstrapping process takes quite a while. Here are some suggestions to
|
||||
make your life easier.
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
## Installing a pre-push hook
|
||||
|
||||
CI will automatically fail your build if it doesn't pass `tidy`, our internal
|
||||
tool for ensuring code quality. If you'd like, you can install a [Git
|
||||
hook](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks) that will
|
||||
automatically run `./x test tidy` on each push, to ensure your code is up to
|
||||
par. If the hook fails then run `./x test tidy --bless` and commit the changes.
|
||||
If you decide later that the pre-push behavior is undesirable, you can delete
|
||||
the `pre-push` file in `.git/hooks`.
|
||||
|
||||
A prebuilt git hook lives at [`src/etc/pre-push.sh`]. It can be copied into
|
||||
your `.git/hooks` folder as `pre-push` (without the `.sh` extension!).
|
||||
|
||||
You can also install the hook as a step of running `./x setup`!
|
||||
|
||||
## Configuring `rust-analyzer` for `rustc`
|
||||
|
||||
### Project-local rust-analyzer setup
|
||||
|
||||
`rust-analyzer` can help you check and format your code whenever you save a
|
||||
file. By default, `rust-analyzer` runs the `cargo check` and `rustfmt` commands,
|
||||
but you can override these commands to use more adapted versions of these tools
|
||||
when hacking on `rustc`. With custom setup, `rust-analyzer` can use `./x check`
|
||||
to check the sources, and the stage 0 rustfmt to format them.
|
||||
|
||||
The default `rust-analyzer.check.overrideCommand` command line will check all
|
||||
the crates and tools in the repository. If you are working on a specific part,
|
||||
you can override the command to only check the part you are working on to save
|
||||
checking time. For example, if you are working on the compiler, you can override
|
||||
the command to `x check compiler --json-output` to only check the compiler part.
|
||||
You can run `x check --help --verbose` to see the available parts.
|
||||
|
||||
Running `./x setup editor` will prompt you to create a project-local LSP config
|
||||
file for one of the supported editors. You can also create the config file as a
|
||||
step of running `./x setup`.
|
||||
|
||||
### Using a separate build directory for rust-analyzer
|
||||
|
||||
By default, when rust-analyzer runs a check or format command, it will share
|
||||
the same build directory as manual command-line builds. This can be inconvenient
|
||||
for two reasons:
|
||||
- Each build will lock the build directory and force the other to wait, so it
|
||||
becomes impossible to run command-line builds while rust-analyzer is running
|
||||
commands in the background.
|
||||
- There is an increased risk of one of the builds deleting previously-built
|
||||
artifacts due to conflicting compiler flags or other settings, forcing
|
||||
additional rebuilds in some cases.
|
||||
|
||||
To avoid these problems:
|
||||
- Add `--build-dir=build-rust-analyzer` to all of the custom `x` commands in
|
||||
your editor's rust-analyzer configuration.
|
||||
(Feel free to choose a different directory name if desired.)
|
||||
- Modify the `rust-analyzer.rustfmt.overrideCommand` setting so that it points
|
||||
to the copy of `rustfmt` in that other build directory.
|
||||
- Modify the `rust-analyzer.procMacro.server` setting so that it points to the
|
||||
copy of `rust-analyzer-proc-macro-srv` in that other build directory.
|
||||
|
||||
Using separate build directories for command-line builds and rust-analyzer
|
||||
requires extra disk space, and also means that running `./x clean` on the
|
||||
command-line will not clean out the separate build directory. To clean the
|
||||
separate build directory, run `./x clean --build-dir=build-rust-analyzer`
|
||||
instead.
|
||||
|
||||
### Visual Studio Code
|
||||
|
||||
Selecting `vscode` in `./x setup editor` will prompt you to create a
|
||||
`.vscode/settings.json` file which will configure Visual Studio code. The
|
||||
recommended `rust-analyzer` settings live at
|
||||
[`src/etc/rust_analyzer_settings.json`].
|
||||
|
||||
If running `./x check` on save is inconvenient, in VS Code you can use a [Build
|
||||
Task] instead:
|
||||
|
||||
```JSON
|
||||
// .vscode/tasks.json
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "./x check",
|
||||
"command": "./x check",
|
||||
"type": "shell",
|
||||
"problemMatcher": "$rustc",
|
||||
"presentation": { "clear": true },
|
||||
"group": { "kind": "build", "isDefault": true }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
[Build Task]: https://code.visualstudio.com/docs/editor/tasks
|
||||
|
||||
|
||||
### Neovim
|
||||
|
||||
For Neovim users there are several options for configuring for rustc. The
|
||||
easiest way is by using [neoconf.nvim](https://github.com/folke/neoconf.nvim/),
|
||||
which allows for project-local configuration files with the native LSP. The
|
||||
steps for how to use it are below. Note that they require rust-analyzer to
|
||||
already be configured with Neovim. Steps for this can be [found
|
||||
here](https://rust-analyzer.github.io/manual.html#nvim-lsp).
|
||||
|
||||
1. First install the plugin. This can be done by following the steps in the
|
||||
README.
|
||||
2. Run `./x setup editor`, and select `vscode` to create a
|
||||
`.vscode/settings.json` file. `neoconf` is able to read and update
|
||||
rust-analyzer settings automatically when the project is opened when this
|
||||
file is detected.
|
||||
|
||||
If you're using `coc.nvim`, you can run `./x setup editor` and select `vim` to
|
||||
create a `.vim/coc-settings.json`. The settings can be edited with
|
||||
`:CocLocalConfig`. The recommended settings live at
|
||||
[`src/etc/rust_analyzer_settings.json`].
|
||||
|
||||
Another way is without a plugin, and creating your own logic in your
|
||||
configuration. To do this you must translate the JSON to Lua yourself. The
|
||||
translation is 1:1 and fairly straight-forward. It must be put in the
|
||||
`["rust-analyzer"]` key of the setup table, which is [shown
|
||||
here](https://github.com/neovim/nvim-lspconfig/blob/master/doc/server_configurations.md#rust_analyzer).
|
||||
|
||||
If you would like to use the build task that is described above, you may either
|
||||
make your own command in your config, or you can install a plugin such as
|
||||
[overseer.nvim](https://github.com/stevearc/overseer.nvim) that can [read
|
||||
VSCode's `task.json`
|
||||
files](https://github.com/stevearc/overseer.nvim/blob/master/doc/guides.md#vs-code-tasks),
|
||||
and follow the same instructions as above.
|
||||
|
||||
### Emacs
|
||||
|
||||
Emacs provides support for rust-analyzer with project-local configuration
|
||||
through [Eglot](https://www.gnu.org/software/emacs/manual/html_node/eglot/).
|
||||
Steps for setting up Eglot with rust-analyzer can be [found
|
||||
here](https://rust-analyzer.github.io/manual.html#eglot).
|
||||
Having set up Emacs & Eglot for Rust development in general, you can run
|
||||
`./x setup editor` and select `emacs`, which will prompt you to create
|
||||
`.dir-locals.el` with the recommended configuration for Eglot.
|
||||
The recommended settings live at [`src/etc/rust_analyzer_eglot.el`].
|
||||
For more information on project-specific Eglot configuration, consult [the
|
||||
manual](https://www.gnu.org/software/emacs/manual/html_node/eglot/Project_002dspecific-configuration.html).
|
||||
|
||||
### Helix
|
||||
|
||||
Helix comes with built-in LSP and rust-analyzer support.
|
||||
It can be configured through `languages.toml`, as described
|
||||
[here](https://docs.helix-editor.com/languages.html).
|
||||
You can run `./x setup editor` and select `helix`, which will prompt you to
|
||||
create `languages.toml` with the recommended configuration for Helix. The
|
||||
recommended settings live at [`src/etc/rust_analyzer_helix.toml`].
|
||||
|
||||
## Check, check, and check again
|
||||
|
||||
When doing simple refactoring, it can be useful to run `./x check`
|
||||
continuously. If you set up `rust-analyzer` as described above, this will be
|
||||
done for you every time you save a file. Here you are just checking that the
|
||||
compiler can **build**, but often that is all you need (e.g., when renaming a
|
||||
method). You can then run `./x build` when you actually need to run tests.
|
||||
|
||||
In fact, it is sometimes useful to put off tests even when you are not 100% sure
|
||||
the code will work. You can then keep building up refactoring commits and only
|
||||
run the tests at some later time. You can then use `git bisect` to track down
|
||||
**precisely** which commit caused the problem. A nice side-effect of this style
|
||||
is that you are left with a fairly fine-grained set of commits at the end, all
|
||||
of which build and pass tests. This often helps reviewing.
|
||||
|
||||
## `x suggest`
|
||||
|
||||
The `x suggest` subcommand suggests (and runs) a subset of the extensive
|
||||
`rust-lang/rust` tests based on files you have changed. This is especially
|
||||
useful for new contributors who have not mastered the arcane `x` flags yet and
|
||||
more experienced contributors as a shorthand for reducing mental effort. In all
|
||||
cases it is useful not to run the full tests (which can take on the order of
|
||||
tens of minutes) and just run a subset which are relevant to your changes. For
|
||||
example, running `tidy` and `linkchecker` is useful when editing Markdown files,
|
||||
whereas UI tests are much less likely to be helpful. While `x suggest` is a
|
||||
useful tool, it does not guarantee perfect coverage (just as PR CI isn't a
|
||||
substitute for bors). See the [dedicated chapter](../tests/suggest-tests.md) for
|
||||
more information and contribution instructions.
|
||||
|
||||
Please note that `x suggest` is in a beta state currently and the tests that it
|
||||
will suggest are limited.
|
||||
|
||||
## Configuring `rustup` to use nightly
|
||||
|
||||
Some parts of the bootstrap process uses pinned, nightly versions of tools like
|
||||
rustfmt. To make things like `cargo fmt` work correctly in your repo, run
|
||||
|
||||
```console
|
||||
cd <path to rustc repo>
|
||||
rustup override set nightly
|
||||
```
|
||||
|
||||
after [installing a nightly toolchain] with `rustup`. Don't forget to do this
|
||||
for all directories you have [setup a worktree for]. You may need to use the
|
||||
pinned nightly version from `src/stage0`, but often the normal `nightly` channel
|
||||
will work.
|
||||
|
||||
**Note** see [the section on vscode] for how to configure it with this real
|
||||
rustfmt `x` uses, and [the section on rustup] for how to setup `rustup`
|
||||
toolchain for your bootstrapped compiler
|
||||
|
||||
**Note** This does _not_ allow you to build `rustc` with cargo directly. You
|
||||
still have to use `x` to work on the compiler or standard library, this just
|
||||
lets you use `cargo fmt`.
|
||||
|
||||
[installing a nightly toolchain]: https://rust-lang.github.io/rustup/concepts/channels.html?highlight=nightl#working-with-nightly-rust
|
||||
[setup a worktree for]: ./suggested.md#working-on-multiple-branches-at-the-same-time
|
||||
[the section on vscode]: suggested.md#configuring-rust-analyzer-for-rustc
|
||||
[the section on rustup]: how-to-build-and-run.md?highlight=rustup#creating-a-rustup-toolchain
|
||||
|
||||
## Faster builds with `--keep-stage`.
|
||||
|
||||
Sometimes just checking whether the compiler builds is not enough. A common
|
||||
example is that you need to add a `debug!` statement to inspect the value of
|
||||
some state or better understand the problem. In that case, you don't really need
|
||||
a full build. By bypassing bootstrap's cache invalidation, you can often get
|
||||
these builds to complete very fast (e.g., around 30 seconds). The only catch is
|
||||
this requires a bit of fudging and may produce compilers that don't work (but
|
||||
that is easily detected and fixed).
|
||||
|
||||
The sequence of commands you want is as follows:
|
||||
|
||||
- Initial build: `./x build library`
|
||||
- As [documented previously], this will build a functional stage1 compiler as
|
||||
part of running all stage0 commands (which include building a `std`
|
||||
compatible with the stage1 compiler) as well as the first few steps of the
|
||||
"stage 1 actions" up to "stage1 (sysroot stage1) builds std".
|
||||
- Subsequent builds: `./x build library --keep-stage 1`
|
||||
- Note that we added the `--keep-stage 1` flag here
|
||||
|
||||
[documented previously]: ./how-to-build-and-run.md#building-the-compiler
|
||||
|
||||
As mentioned, the effect of `--keep-stage 1` is that we just _assume_ that the
|
||||
old standard library can be re-used. If you are editing the compiler, this is
|
||||
almost always true: you haven't changed the standard library, after all. But
|
||||
sometimes, it's not true: for example, if you are editing the "metadata" part of
|
||||
the compiler, which controls how the compiler encodes types and other states
|
||||
into the `rlib` files, or if you are editing things that wind up in the metadata
|
||||
(such as the definition of the MIR).
|
||||
|
||||
**The TL;DR is that you might get weird behavior from a compile when using
|
||||
`--keep-stage 1`** -- for example, strange [ICEs](../appendix/glossary.html#ice)
|
||||
or other panics. In that case, you should simply remove the `--keep-stage 1`
|
||||
from the command and rebuild. That ought to fix the problem.
|
||||
|
||||
You can also use `--keep-stage 1` when running tests. Something like this:
|
||||
|
||||
- Initial test run: `./x test tests/ui`
|
||||
- Subsequent test run: `./x test tests/ui --keep-stage 1`
|
||||
|
||||
### Iterating the standard library with `--keep-stage`
|
||||
|
||||
If you are making changes to the standard library, you can use `./x build
|
||||
--keep-stage 0 library` to iteratively rebuild the standard library without
|
||||
rebuilding the compiler.
|
||||
|
||||
## Using incremental compilation
|
||||
|
||||
You can further enable the `--incremental` flag to save additional time in
|
||||
subsequent rebuilds:
|
||||
|
||||
```bash
|
||||
./x test tests/ui --incremental --test-args issue-1234
|
||||
```
|
||||
|
||||
If you don't want to include the flag with every command, you can enable it in
|
||||
the `config.toml`:
|
||||
|
||||
```toml
|
||||
[rust]
|
||||
incremental = true
|
||||
```
|
||||
|
||||
Note that incremental compilation will use more disk space than usual. If disk
|
||||
space is a concern for you, you might want to check the size of the `build`
|
||||
directory from time to time.
|
||||
|
||||
## Fine-tuning optimizations
|
||||
|
||||
Setting `optimize = false` makes the compiler too slow for tests. However, to
|
||||
improve the test cycle, you can disable optimizations selectively only for the
|
||||
crates you'll have to rebuild
|
||||
([source](https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/incremental.20compilation.20question/near/202712165)).
|
||||
For example, when working on `rustc_mir_build`, the `rustc_mir_build` and
|
||||
`rustc_driver` crates take the most time to incrementally rebuild. You could
|
||||
therefore set the following in the root `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[profile.release.package.rustc_mir_build]
|
||||
opt-level = 0
|
||||
[profile.release.package.rustc_driver]
|
||||
opt-level = 0
|
||||
```
|
||||
|
||||
## Working on multiple branches at the same time
|
||||
|
||||
Working on multiple branches in parallel can be a little annoying, since
|
||||
building the compiler on one branch will cause the old build and the incremental
|
||||
compilation cache to be overwritten. One solution would be to have multiple
|
||||
clones of the repository, but that would mean storing the Git metadata multiple
|
||||
times, and having to update each clone individually.
|
||||
|
||||
Fortunately, Git has a better solution called [worktrees]. This lets you create
|
||||
multiple "working trees", which all share the same Git database. Moreover,
|
||||
because all of the worktrees share the same object database, if you update a
|
||||
branch (e.g. master) in any of them, you can use the new commits from any of the
|
||||
worktrees. One caveat, though, is that submodules do not get shared. They will
|
||||
still be cloned multiple times.
|
||||
|
||||
[worktrees]: https://git-scm.com/docs/git-worktree
|
||||
|
||||
Given you are inside the root directory for your Rust repository, you can create
|
||||
a "linked working tree" in a new "rust2" directory by running the following
|
||||
command:
|
||||
|
||||
```bash
|
||||
git worktree add ../rust2
|
||||
```
|
||||
|
||||
Creating a new worktree for a new branch based on `master` looks like:
|
||||
|
||||
```bash
|
||||
git worktree add -b my-feature ../rust2 master
|
||||
```
|
||||
|
||||
You can then use that rust2 folder as a separate workspace for modifying and
|
||||
building `rustc`!
|
||||
|
||||
## Using nix-shell
|
||||
|
||||
If you're using nix, you can use the following nix-shell to work on Rust:
|
||||
|
||||
```nix
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
pkgs.mkShell {
|
||||
name = "rustc";
|
||||
nativeBuildInputs = with pkgs; [
|
||||
binutils cmake ninja pkg-config python3 git curl cacert patchelf nix
|
||||
];
|
||||
buildInputs = with pkgs; [
|
||||
openssl glibc.out glibc.static
|
||||
];
|
||||
# Avoid creating text files for ICEs.
|
||||
RUSTC_ICE = "0";
|
||||
# Provide `libstdc++.so.6` for the self-contained lld.
|
||||
LD_LIBRARY_PATH = "${with pkgs; lib.makeLibraryPath [
|
||||
stdenv.cc.cc.lib
|
||||
]}";
|
||||
}
|
||||
```
|
||||
|
||||
Note that when using nix on a not-NixOS distribution, it may be necessary to set
|
||||
**`patch-binaries-for-nix = true` in `config.toml`**. Bootstrap tries to detect
|
||||
whether it's running in nix and enable patching automatically, but this
|
||||
detection can have false negatives.
|
||||
|
||||
You can also use your nix shell to manage `config.toml`:
|
||||
|
||||
```nix
|
||||
let
|
||||
config = pkgs.writeText "rustc-config" ''
|
||||
# Your config.toml content goes here
|
||||
''
|
||||
pkgs.mkShell {
|
||||
/* ... */
|
||||
# This environment variable tells bootstrap where our config.toml is.
|
||||
RUST_BOOTSTRAP_CONFIG = config;
|
||||
}
|
||||
```
|
||||
|
||||
## Shell Completions
|
||||
|
||||
If you use Bash, Zsh, Fish or PowerShell, you can find automatically-generated shell
|
||||
completion scripts for `x.py` in
|
||||
[`src/etc/completions`](https://github.com/rust-lang/rust/tree/master/src/etc/completions).
|
||||
|
||||
You can use `source ./src/etc/completions/x.py.<extension>` to load completions
|
||||
for your shell of choice, or `& .\src\etc\completions\x.py.ps1` for PowerShell.
|
||||
Adding this to your shell's startup script (e.g. `.bashrc`) will automatically
|
||||
load this completion.
|
||||
|
||||
[`src/etc/rust_analyzer_settings.json`]: https://github.com/rust-lang/rust/blob/master/src/etc/rust_analyzer_settings.json
|
||||
[`src/etc/rust_analyzer_eglot.el`]: https://github.com/rust-lang/rust/blob/master/src/etc/rust_analyzer_eglot.el
|
||||
[`src/etc/rust_analyzer_helix.toml`]: https://github.com/rust-lang/rust/blob/master/src/etc/rust_analyzer_helix.toml
|
||||
[`src/etc/pre-push.sh`]: https://github.com/rust-lang/rust/blob/master/src/etc/pre-push.sh
|
||||
33
src/doc/rustc-dev-guide/src/cli.md
Normal file
33
src/doc/rustc-dev-guide/src/cli.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# Command-line Arguments
|
||||
|
||||
Command-line flags are documented in the [rustc book][cli-docs]. All *stable*
|
||||
flags should be documented there. Unstable flags should be documented in the
|
||||
[unstable book].
|
||||
|
||||
See the [forge guide for new options] for details on the *procedure* for
|
||||
adding a new command-line argument.
|
||||
|
||||
## Guidelines
|
||||
|
||||
- Flags should be orthogonal to each other. For example, if we'd have a
|
||||
json-emitting variant of multiple actions `foo` and `bar`, an additional
|
||||
`--json` flag is better than adding `--foo-json` and `--bar-json`.
|
||||
- Avoid flags with the `no-` prefix. Instead, use the [`parse_bool`] function,
|
||||
such as `-C embed-bitcode=no`.
|
||||
- Consider the behavior if the flag is passed multiple times. In some
|
||||
situations, the values should be accumulated (in order!). In other
|
||||
situations, subsequent flags should override previous flags (for example,
|
||||
the lint-level flags). And some flags (like `-o`) should generate an error
|
||||
if it is too ambiguous what multiple flags would mean.
|
||||
- Always give options a long descriptive name, if only for more understandable
|
||||
compiler scripts.
|
||||
- The `--verbose` flag is for adding verbose information to `rustc`
|
||||
output. For example, using it with the `--version`
|
||||
flag gives information about the hashes of the compiler code.
|
||||
- Experimental flags and options must be guarded behind the `-Z
|
||||
unstable-options` flag.
|
||||
|
||||
[cli-docs]: https://doc.rust-lang.org/rustc/command-line-arguments.html
|
||||
[forge guide for new options]: https://forge.rust-lang.org/compiler/new_option.html
|
||||
[unstable book]: https://doc.rust-lang.org/nightly/unstable-book/
|
||||
[`parse_bool`]: https://github.com/rust-lang/rust/blob/e5335592e78354e33d798d20c04bcd677c1df62d/src/librustc_session/options.rs#L307-L313
|
||||
215
src/doc/rustc-dev-guide/src/closure.md
Normal file
215
src/doc/rustc-dev-guide/src/closure.md
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
# Closure Capture Inference
|
||||
|
||||
This section describes how rustc handles closures. Closures in Rust are
|
||||
effectively "desugared" into structs that contain the values they use (or
|
||||
references to the values they use) from their creator's stack frame. rustc has
|
||||
the job of figuring out which values a closure uses and how, so it can decide
|
||||
whether to capture a given variable by shared reference, mutable reference, or
|
||||
by move. rustc also has to figure out which of the closure traits ([`Fn`][fn],
|
||||
[`FnMut`][fn_mut], or [`FnOnce`][fn_once]) a closure is capable of
|
||||
implementing.
|
||||
|
||||
[fn]: https://doc.rust-lang.org/std/ops/trait.Fn.html
|
||||
[fn_mut]:https://doc.rust-lang.org/std/ops/trait.FnMut.html
|
||||
[fn_once]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html
|
||||
|
||||
Let's start with a few examples:
|
||||
|
||||
### Example 1
|
||||
|
||||
To start, let's take a look at how the closure in the following example is desugared:
|
||||
|
||||
```rust
|
||||
fn closure(f: impl Fn()) {
|
||||
f();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let x: i32 = 10;
|
||||
closure(|| println!("Hi {}", x)); // The closure just reads x.
|
||||
println!("Value of x after return {}", x);
|
||||
}
|
||||
```
|
||||
|
||||
Let's say the above is the content of a file called `immut.rs`. If we compile
|
||||
`immut.rs` using the following command. The [`-Z dump-mir=all`][dump-mir] flag will cause
|
||||
`rustc` to generate and dump the [MIR][mir] to a directory called `mir_dump`.
|
||||
```console
|
||||
> rustc +stage1 immut.rs -Z dump-mir=all
|
||||
```
|
||||
|
||||
[mir]: ./mir/index.md
|
||||
[dump-mir]: ./mir/passes.md
|
||||
|
||||
After we run this command, we will see a newly generated directory in our
|
||||
current working directory called `mir_dump`, which will contain several files.
|
||||
If we look at file `rustc.main.-------.mir_map.0.mir`, we will find, among
|
||||
other things, it also contains this line:
|
||||
|
||||
```rust,ignore
|
||||
_4 = &_1;
|
||||
_3 = [closure@immut.rs:7:13: 7:36] { x: move _4 };
|
||||
```
|
||||
|
||||
Note that in the MIR examples in this chapter, `_1` is `x`.
|
||||
|
||||
Here in first line `_4 = &_1;`, the `mir_dump` tells us that `x` was borrowed
|
||||
as an immutable reference. This is what we would hope as our closure just
|
||||
reads `x`.
|
||||
|
||||
### Example 2
|
||||
|
||||
Here is another example:
|
||||
|
||||
```rust
|
||||
fn closure(mut f: impl FnMut()) {
|
||||
f();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut x: i32 = 10;
|
||||
closure(|| {
|
||||
x += 10; // The closure mutates the value of x
|
||||
println!("Hi {}", x)
|
||||
});
|
||||
println!("Value of x after return {}", x);
|
||||
}
|
||||
```
|
||||
|
||||
```rust,ignore
|
||||
_4 = &mut _1;
|
||||
_3 = [closure@mut.rs:7:13: 10:6] { x: move _4 };
|
||||
```
|
||||
This time along, in the line `_4 = &mut _1;`, we see that the borrow is changed to mutable borrow.
|
||||
Fair enough! The closure increments `x` by 10.
|
||||
|
||||
### Example 3
|
||||
|
||||
One more example:
|
||||
|
||||
```rust
|
||||
fn closure(f: impl FnOnce()) {
|
||||
f();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let x = vec![21];
|
||||
closure(|| {
|
||||
drop(x); // Makes x unusable after the fact.
|
||||
});
|
||||
// println!("Value of x after return {:?}", x);
|
||||
}
|
||||
```
|
||||
|
||||
```rust,ignore
|
||||
_6 = [closure@move.rs:7:13: 9:6] { x: move _1 }; // bb16[3]: scope 1 at move.rs:7:13: 9:6
|
||||
```
|
||||
Here, `x` is directly moved into the closure and the access to it will not be permitted after the
|
||||
closure.
|
||||
|
||||
## Inferences in the compiler
|
||||
|
||||
Now let's dive into rustc code and see how all these inferences are done by the compiler.
|
||||
|
||||
Let's start with defining a term that we will be using quite a bit in the rest of the discussion -
|
||||
*upvar*. An **upvar** is a variable that is local to the function where the closure is defined. So,
|
||||
in the above examples, **x** will be an upvar to the closure. They are also sometimes referred to as
|
||||
the *free variables* meaning they are not bound to the context of the closure.
|
||||
[`compiler/rustc_passes/src/upvars.rs`][upvars] defines a query called *upvars_mentioned*
|
||||
for this purpose.
|
||||
|
||||
[upvars]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_passes/upvars/index.html
|
||||
|
||||
Other than lazy invocation, one other thing that distinguishes a closure from a
|
||||
normal function is that it can use the upvars. It borrows these upvars from its surrounding
|
||||
context; therefore the compiler has to determine the upvar's borrow type. The compiler starts with
|
||||
assigning an immutable borrow type and lowers the restriction (that is, changes it from
|
||||
**immutable** to **mutable** to **move**) as needed, based on the usage. In the Example 1 above, the
|
||||
closure only uses the variable for printing but does not modify it in any way and therefore, in the
|
||||
`mir_dump`, we find the borrow type for the upvar `x` to be immutable. In example 2, however, the
|
||||
closure modifies `x` and increments it by some value. Because of this mutation, the compiler, which
|
||||
started off assigning `x` as an immutable reference type, has to adjust it as a mutable reference.
|
||||
Likewise in the third example, the closure drops the vector and therefore this requires the variable
|
||||
`x` to be moved into the closure. Depending on the borrow kind, the closure has to implement the
|
||||
appropriate trait: `Fn` trait for immutable borrow, `FnMut` for mutable borrow,
|
||||
and `FnOnce` for move semantics.
|
||||
|
||||
Most of the code related to the closure is in the
|
||||
[`compiler/rustc_hir_typeck/src/upvar.rs`][upvar] file and the data structures are
|
||||
declared in the file [`compiler/rustc_middle/src/ty/mod.rs`][ty].
|
||||
|
||||
[upvar]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir_typeck/upvar/index.html
|
||||
[ty]:https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/index.html
|
||||
|
||||
Before we go any further, let's discuss how we can examine the flow of control through the rustc
|
||||
codebase. For closures specifically, set the `RUSTC_LOG` env variable as below and collect the
|
||||
output in a file:
|
||||
|
||||
```console
|
||||
> RUSTC_LOG=rustc_hir_typeck::upvar rustc +stage1 -Z dump-mir=all \
|
||||
<.rs file to compile> 2> <file where the output will be dumped>
|
||||
```
|
||||
|
||||
This uses the stage1 compiler and enables `debug!` logging for the
|
||||
`rustc_hir_typeck::upvar` module.
|
||||
|
||||
The other option is to step through the code using lldb or gdb.
|
||||
|
||||
1. `rust-lldb build/host/stage1/bin/rustc test.rs`
|
||||
2. In lldb:
|
||||
1. `b upvar.rs:134` // Setting the breakpoint on a certain line in the upvar.rs file`
|
||||
2. `r` // Run the program until it hits the breakpoint
|
||||
|
||||
Let's start with [`upvar.rs`][upvar]. This file has something called
|
||||
the [`euv::ExprUseVisitor`] which walks the source of the closure and
|
||||
invokes a callback for each upvar that is borrowed, mutated, or moved.
|
||||
|
||||
[`euv::ExprUseVisitor`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir_typeck/expr_use_visitor/struct.ExprUseVisitor.html
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let mut x = vec![21];
|
||||
let _cl = || {
|
||||
let y = x[0]; // 1.
|
||||
x[0] += 1; // 2.
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
In the above example, our visitor will be called twice, for the lines marked 1 and 2, once for a
|
||||
shared borrow and another one for a mutable borrow. It will also tell us what was borrowed.
|
||||
|
||||
The callbacks are defined by implementing the [`Delegate`] trait. The
|
||||
[`InferBorrowKind`][ibk] type implements `Delegate` and keeps a map that
|
||||
records for each upvar which mode of capture was required. The modes of capture
|
||||
can be `ByValue` (moved) or `ByRef` (borrowed). For `ByRef` borrows, the possible
|
||||
[`BorrowKind`]s are `ImmBorrow`, `UniqueImmBorrow`, `MutBorrow` as defined in the
|
||||
[`compiler/rustc_middle/src/ty/mod.rs`][middle_ty].
|
||||
|
||||
[`BorrowKind`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/enum.BorrowKind.html
|
||||
[middle_ty]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/index.html
|
||||
|
||||
`Delegate` defines a few different methods (the different callbacks):
|
||||
**consume** for *move* of a variable, **borrow** for a *borrow* of some kind
|
||||
(shared or mutable), and **mutate** when we see an *assignment* of something.
|
||||
|
||||
All of these callbacks have a common argument *cmt* which stands for Category,
|
||||
Mutability and Type and is defined in
|
||||
[`compiler/rustc_hir_typeck/src/expr_use_visitor.rs`][cmt]. Borrowing from the code
|
||||
comments, "`cmt` is a complete categorization of a value indicating where it
|
||||
originated and how it is located, as well as the mutability of the memory in
|
||||
which the value is stored". Based on the callback (consume, borrow etc.), we
|
||||
will call the relevant `adjust_upvar_borrow_kind_for_<something>` and pass the
|
||||
`cmt` along. Once the borrow type is adjusted, we store it in the table, which
|
||||
basically says what borrows were made for each closure.
|
||||
|
||||
```rust,ignore
|
||||
self.tables
|
||||
.borrow_mut()
|
||||
.upvar_capture_map
|
||||
.extend(delegate.adjust_upvar_captures);
|
||||
```
|
||||
|
||||
[`Delegate`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir_typeck/expr_use_visitor/trait.Delegate.html
|
||||
[ibk]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir_typeck/upvar/struct.InferBorrowKind.html
|
||||
[cmt]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir_typeck/expr_use_visitor/index.html
|
||||
95
src/doc/rustc-dev-guide/src/coherence.md
Normal file
95
src/doc/rustc-dev-guide/src/coherence.md
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
|
||||
# Coherence
|
||||
|
||||
> NOTE: this is based on [notes by @lcnr](https://github.com/rust-lang/rust/pull/121848)
|
||||
|
||||
Coherence checking is what detects both of trait impls and inherent impls overlapping with others.
|
||||
(reminder: [inherent impls](https://doc.rust-lang.org/reference/items/implementations.html#inherent-implementations) are impls of concrete types like `impl MyStruct {}`)
|
||||
|
||||
Overlapping trait impls always produce an error,
|
||||
while overlapping inherent impls result in an error only if they have methods with the same name.
|
||||
|
||||
Checking for overlaps is split in two parts. First there's the [overlap check(s)](#overlap-checks),
|
||||
which finds overlaps between traits and inherent implementations that the compiler currently knows about.
|
||||
|
||||
However, Coherence also results in an error if any other impls **could** exist,
|
||||
even if they are currently unknown.
|
||||
This affects impls which may get added to upstream crates in a backwards compatible way,
|
||||
and impls from downstream crates.
|
||||
This is called the Orphan check.
|
||||
|
||||
## Overlap checks
|
||||
|
||||
Overlap checks are performed for both inherent impls, and for trait impls.
|
||||
This uses the same overlap checking code, really done as two separate analyses.
|
||||
Overlap checks always consider pairs of implementations, comparing them to each other.
|
||||
|
||||
Overlap checking for inherent impl blocks is done through `fn check_item` (in coherence/inherent_impls_overlap.rs),
|
||||
where you can very clearly see that (at least for small `n`), the check really performs `n^2`
|
||||
comparisons between impls.
|
||||
|
||||
In the case of traits, this check is currently done as part of building the [specialization graph](traits/specialization.md),
|
||||
to handle specializing impls overlapping with their parent, but this may change in the future.
|
||||
|
||||
In both cases, all pairs of impls are checked for overlap.
|
||||
|
||||
Overlapping is sometimes partially allowed:
|
||||
|
||||
1. for marker traits
|
||||
2. under [specialization](traits/specialization.md)
|
||||
|
||||
but normally isn't.
|
||||
|
||||
The overlap check has various modes (see [`OverlapMode`]).
|
||||
Importantly, there's the explicit negative impl check, and the implicit negative impl check.
|
||||
Both try to prove that an overlap is definitely impossible.
|
||||
|
||||
[`OverlapMode`]: https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/traits/specialization_graph/enum.OverlapMode.html
|
||||
|
||||
### The explicit negative impl check
|
||||
|
||||
This check is done in [`impl_intersection_has_negative_obligation`].
|
||||
|
||||
This check tries to find a negative trait implementation.
|
||||
For example:
|
||||
|
||||
```rust
|
||||
struct MyCustomErrorType;
|
||||
|
||||
// both in your own crate
|
||||
impl From<&str> for MyCustomErrorType {}
|
||||
impl<E> From<E> for MyCustomErrorType where E: Error {}
|
||||
```
|
||||
|
||||
In this example, we'd get:
|
||||
`MyCustomErrorType: From<&str>` and `MyCustomErrorType: From<?E>`, giving `?E = &str`.
|
||||
|
||||
And thus, these two implementations would overlap.
|
||||
However, libstd provides `&str: !Error`, and therefore guarantees that there
|
||||
will never be a positive implementation of `&str: Error`, and thus there is no overlap.
|
||||
|
||||
Note that for this kind of negative impl check, we must have explicit negative implementations provided.
|
||||
This is not currently stable.
|
||||
|
||||
[`impl_intersection_has_negative_obligation`]: https://doc.rust-lang.org/beta/nightly-rustc/rustc_trait_selection/traits/coherence/fn.impl_intersection_has_negative_obligation.html
|
||||
|
||||
### The implicit negative impl check
|
||||
|
||||
This check is done in [`impl_intersection_has_impossible_obligation`],
|
||||
and does not rely on negative trait implementations and is stable.
|
||||
|
||||
Let's say there's a
|
||||
```rust
|
||||
impl From<MyLocalType> for Box<dyn Error> {} // in your own crate
|
||||
impl<E> From<E> for Box<dyn Error> where E: Error {} // in std
|
||||
```
|
||||
|
||||
This would give: `Box<dyn Error>: From<MyLocalType>`, and `Box<dyn Error>: From<?E>`,
|
||||
giving `?E = MyLocalType`.
|
||||
|
||||
In your crate there's no `MyLocalType: Error`, downstream crates cannot implement `Error` (a remote trait) for `MyLocalType` (a remote type).
|
||||
Therefore, these two impls do not overlap.
|
||||
Importantly, this works even if there isn't a `impl !Error for MyLocalType`.
|
||||
|
||||
[`impl_intersection_has_impossible_obligation`]: https://doc.rust-lang.org/beta/nightly-rustc/rustc_trait_selection/traits/coherence/fn.impl_intersection_has_impossible_obligation.html
|
||||
|
||||
405
src/doc/rustc-dev-guide/src/compiler-debugging.md
Normal file
405
src/doc/rustc-dev-guide/src/compiler-debugging.md
Normal file
|
|
@ -0,0 +1,405 @@
|
|||
# Debugging the compiler
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
This chapter contains a few tips to debug the compiler. These tips aim to be
|
||||
useful no matter what you are working on. Some of the other chapters have
|
||||
advice about specific parts of the compiler (e.g. the [Queries Debugging and
|
||||
Testing chapter](./incrcomp-debugging.html) or the [LLVM Debugging
|
||||
chapter](./backend/debugging.md)).
|
||||
|
||||
## Configuring the compiler
|
||||
|
||||
By default, rustc is built without most debug information. To enable debug info,
|
||||
set `debug = true` in your config.toml.
|
||||
|
||||
Setting `debug = true` turns on many different debug options (e.g., `debug-assertions`,
|
||||
`debug-logging`, etc.) which can be individually tweaked if you want to, but many people
|
||||
simply set `debug = true`.
|
||||
|
||||
If you want to use GDB to debug rustc, please set `config.toml` with options:
|
||||
|
||||
```toml
|
||||
[rust]
|
||||
debug = true
|
||||
debuginfo-level = 2
|
||||
```
|
||||
|
||||
> NOTE:
|
||||
> This will use a lot of disk space
|
||||
> (upwards of <!-- date-check Aug 2022 --> 35GB),
|
||||
> and will take a lot more compile time.
|
||||
> With `debuginfo-level = 1` (the default when `debug = true`),
|
||||
> you will be able to track the execution path,
|
||||
> but will lose the symbol information for debugging.
|
||||
|
||||
The default configuration will enable `symbol-mangling-version` v0.
|
||||
This requires at least GDB v10.2,
|
||||
otherwise you need to disable new symbol-mangling-version in `config.toml`.
|
||||
|
||||
```toml
|
||||
[rust]
|
||||
new-symbol-mangling = false
|
||||
```
|
||||
|
||||
> See the comments in `config.example.toml` for more info.
|
||||
|
||||
You will need to rebuild the compiler after changing any configuration option.
|
||||
|
||||
## Suppressing the ICE file
|
||||
|
||||
By default, if rustc encounters an Internal Compiler Error (ICE) it will dump the ICE contents to an
|
||||
ICE file within the current working directory named `rustc-ice-<timestamp>-<pid>.txt`. If this is
|
||||
not desirable, you can prevent the ICE file from being created with `RUSTC_ICE=0`.
|
||||
|
||||
## Getting a backtrace
|
||||
[getting-a-backtrace]: #getting-a-backtrace
|
||||
|
||||
When you have an ICE (panic in the compiler), you can set
|
||||
`RUST_BACKTRACE=1` to get the stack trace of the `panic!` like in
|
||||
normal Rust programs. IIRC backtraces **don't work** on MinGW,
|
||||
sorry. If you have trouble or the backtraces are full of `unknown`,
|
||||
you might want to find some way to use Linux, Mac, or MSVC on Windows.
|
||||
|
||||
In the default configuration (without `debug` set to `true`), you don't have line numbers
|
||||
enabled, so the backtrace looks like this:
|
||||
|
||||
```text
|
||||
stack backtrace:
|
||||
0: std::sys::imp::backtrace::tracing::imp::unwind_backtrace
|
||||
1: std::sys_common::backtrace::_print
|
||||
2: std::panicking::default_hook::{{closure}}
|
||||
3: std::panicking::default_hook
|
||||
4: std::panicking::rust_panic_with_hook
|
||||
5: std::panicking::begin_panic
|
||||
(~~~~ LINES REMOVED BY ME FOR BREVITY ~~~~)
|
||||
32: rustc_typeck::check_crate
|
||||
33: <std::thread::local::LocalKey<T>>::with
|
||||
34: <std::thread::local::LocalKey<T>>::with
|
||||
35: rustc::ty::context::TyCtxt::create_and_enter
|
||||
36: rustc_driver::driver::compile_input
|
||||
37: rustc_driver::run_compiler
|
||||
```
|
||||
|
||||
If you set `debug = true`, you will get line numbers for the stack trace.
|
||||
Then the backtrace will look like this:
|
||||
|
||||
```text
|
||||
stack backtrace:
|
||||
(~~~~ LINES REMOVED BY ME FOR BREVITY ~~~~)
|
||||
at /home/user/rust/compiler/rustc_typeck/src/check/cast.rs:110
|
||||
7: rustc_typeck::check::cast::CastCheck::check
|
||||
at /home/user/rust/compiler/rustc_typeck/src/check/cast.rs:572
|
||||
at /home/user/rust/compiler/rustc_typeck/src/check/cast.rs:460
|
||||
at /home/user/rust/compiler/rustc_typeck/src/check/cast.rs:370
|
||||
(~~~~ LINES REMOVED BY ME FOR BREVITY ~~~~)
|
||||
33: rustc_driver::driver::compile_input
|
||||
at /home/user/rust/compiler/rustc_driver/src/driver.rs:1010
|
||||
at /home/user/rust/compiler/rustc_driver/src/driver.rs:212
|
||||
34: rustc_driver::run_compiler
|
||||
at /home/user/rust/compiler/rustc_driver/src/lib.rs:253
|
||||
```
|
||||
|
||||
## `-Z` flags
|
||||
|
||||
The compiler has a bunch of `-Z *` flags. These are unstable flags that are only
|
||||
enabled on nightly. Many of them are useful for debugging. To get a full listing
|
||||
of `-Z` flags, use `-Z help`.
|
||||
|
||||
One useful flag is `-Z verbose-internals`, which generally enables printing more
|
||||
info that could be useful for debugging.
|
||||
|
||||
Right below you can find elaborate explainers on a selected few.
|
||||
|
||||
### Getting a backtrace for errors
|
||||
[getting-a-backtrace-for-errors]: #getting-a-backtrace-for-errors
|
||||
|
||||
If you want to get a backtrace to the point where the compiler emits an
|
||||
error message, you can pass the `-Z treat-err-as-bug=n`, which will make
|
||||
the compiler panic on the `nth` error. If you leave off `=n`, the compiler will
|
||||
assume `1` for `n` and thus panic on the first error it encounters.
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
$ cat error.rs
|
||||
```
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
1 + ();
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
$ rustc +stage1 error.rs
|
||||
error[E0277]: cannot add `()` to `{integer}`
|
||||
--> error.rs:2:7
|
||||
|
|
||||
2 | 1 + ();
|
||||
| ^ no implementation for `{integer} + ()`
|
||||
|
|
||||
= help: the trait `Add<()>` is not implemented for `{integer}`
|
||||
|
||||
error: aborting due to previous error
|
||||
```
|
||||
|
||||
Now, where does the error above come from?
|
||||
|
||||
```
|
||||
$ RUST_BACKTRACE=1 rustc +stage1 error.rs -Z treat-err-as-bug
|
||||
error[E0277]: the trait bound `{integer}: std::ops::Add<()>` is not satisfied
|
||||
--> error.rs:2:7
|
||||
|
|
||||
2 | 1 + ();
|
||||
| ^ no implementation for `{integer} + ()`
|
||||
|
|
||||
= help: the trait `std::ops::Add<()>` is not implemented for `{integer}`
|
||||
|
||||
error: internal compiler error: unexpected panic
|
||||
|
||||
note: the compiler unexpectedly panicked. this is a bug.
|
||||
|
||||
note: we would appreciate a bug report: https://github.com/rust-lang/rust/blob/master/CONTRIBUTING.md#bug-reports
|
||||
|
||||
note: rustc 1.24.0-dev running on x86_64-unknown-linux-gnu
|
||||
|
||||
note: run with `RUST_BACKTRACE=1` for a backtrace
|
||||
|
||||
thread 'rustc' panicked at 'encountered error with `-Z treat_err_as_bug',
|
||||
/home/user/rust/compiler/rustc_errors/src/lib.rs:411:12
|
||||
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose
|
||||
backtrace.
|
||||
stack backtrace:
|
||||
(~~~ IRRELEVANT PART OF BACKTRACE REMOVED BY ME ~~~)
|
||||
7: rustc::traits::error_reporting::<impl rustc::infer::InferCtxt<'a, 'tcx>>
|
||||
::report_selection_error
|
||||
at /home/user/rust/compiler/rustc_middle/src/traits/error_reporting.rs:823
|
||||
8: rustc::traits::error_reporting::<impl rustc::infer::InferCtxt<'a, 'tcx>>
|
||||
::report_fulfillment_errors
|
||||
at /home/user/rust/compiler/rustc_middle/src/traits/error_reporting.rs:160
|
||||
at /home/user/rust/compiler/rustc_middle/src/traits/error_reporting.rs:112
|
||||
9: rustc_typeck::check::FnCtxt::select_obligations_where_possible
|
||||
at /home/user/rust/compiler/rustc_typeck/src/check/mod.rs:2192
|
||||
(~~~ IRRELEVANT PART OF BACKTRACE REMOVED BY ME ~~~)
|
||||
36: rustc_driver::run_compiler
|
||||
at /home/user/rust/compiler/rustc_driver/src/lib.rs:253
|
||||
```
|
||||
|
||||
Cool, now I have a backtrace for the error!
|
||||
|
||||
### Debugging delayed bugs
|
||||
|
||||
The `-Z eagerly-emit-delayed-bugs` option makes it easy to debug delayed bugs.
|
||||
It turns them into normal errors, i.e. makes them visible. This can be used in
|
||||
combination with `-Z treat-err-as-bug` to stop at a particular delayed bug and
|
||||
get a backtrace.
|
||||
|
||||
### Getting the error creation location
|
||||
|
||||
`-Z track-diagnostics` can help figure out where errors are emitted. It uses `#[track_caller]`
|
||||
for this and prints its location alongside the error:
|
||||
|
||||
```
|
||||
$ RUST_BACKTRACE=1 rustc +stage1 error.rs -Z track-diagnostics
|
||||
error[E0277]: cannot add `()` to `{integer}`
|
||||
--> src\error.rs:2:7
|
||||
|
|
||||
2 | 1 + ();
|
||||
| ^ no implementation for `{integer} + ()`
|
||||
-Ztrack-diagnostics: created at compiler/rustc_trait_selection/src/traits/error_reporting/mod.rs:638:39
|
||||
|
|
||||
= help: the trait `Add<()>` is not implemented for `{integer}`
|
||||
= help: the following other types implement trait `Add<Rhs>`:
|
||||
<&'a f32 as Add<f32>>
|
||||
<&'a f64 as Add<f64>>
|
||||
<&'a i128 as Add<i128>>
|
||||
<&'a i16 as Add<i16>>
|
||||
<&'a i32 as Add<i32>>
|
||||
<&'a i64 as Add<i64>>
|
||||
<&'a i8 as Add<i8>>
|
||||
<&'a isize as Add<isize>>
|
||||
and 48 others
|
||||
|
||||
For more information about this error, try `rustc --explain E0277`.
|
||||
```
|
||||
|
||||
This is similar but different to `-Z treat-err-as-bug`:
|
||||
- it will print the locations for all errors emitted
|
||||
- it does not require a compiler built with debug symbols
|
||||
- you don't have to read through a big stack trace.
|
||||
|
||||
## Getting logging output
|
||||
|
||||
The compiler uses the [`tracing`] crate for logging.
|
||||
|
||||
[`tracing`]: https://docs.rs/tracing
|
||||
|
||||
For details see [the guide section on tracing](./tracing.md)
|
||||
|
||||
## Narrowing (Bisecting) Regressions
|
||||
|
||||
The [cargo-bisect-rustc][bisect] tool can be used as a quick and easy way to
|
||||
find exactly which PR caused a change in `rustc` behavior. It automatically
|
||||
downloads `rustc` PR artifacts and tests them against a project you provide
|
||||
until it finds the regression. You can then look at the PR to get more context
|
||||
on *why* it was changed. See [this tutorial][bisect-tutorial] on how to use
|
||||
it.
|
||||
|
||||
[bisect]: https://github.com/rust-lang/cargo-bisect-rustc
|
||||
[bisect-tutorial]: https://rust-lang.github.io/cargo-bisect-rustc/tutorial.html
|
||||
|
||||
## Downloading Artifacts from Rust's CI
|
||||
|
||||
The [rustup-toolchain-install-master][rtim] tool by kennytm can be used to
|
||||
download the artifacts produced by Rust's CI for a specific SHA1 -- this
|
||||
basically corresponds to the successful landing of some PR -- and then sets
|
||||
them up for your local use. This also works for artifacts produced by `@bors
|
||||
try`. This is helpful when you want to examine the resulting build of a PR
|
||||
without doing the build yourself.
|
||||
|
||||
[rtim]: https://github.com/kennytm/rustup-toolchain-install-master
|
||||
|
||||
## `#[rustc_*]` TEST attributes
|
||||
|
||||
The compiler defines a whole lot of internal (perma-unstable) attributes some of which are useful
|
||||
for debugging by dumping extra compiler-internal information. These are prefixed with `rustc_` and
|
||||
are gated behind the internal feature `rustc_attrs` (enabled via e.g. `#![feature(rustc_attrs)]`).
|
||||
|
||||
For a complete and up to date list, see [`builtin_attrs`]. More specifically, the ones marked `TEST`.
|
||||
Here are some notable ones:
|
||||
|
||||
| Attribute | Description |
|
||||
|----------------|-------------|
|
||||
| `rustc_def_path` | Dumps the [`def_path_str`] of an item. |
|
||||
| `rustc_dump_def_parents` | Dumps the chain of `DefId` parents of certain definitions. |
|
||||
| `rustc_dump_item_bounds` | Dumps the [`item_bounds`] of an item. |
|
||||
| `rustc_dump_predicates` | Dumps the [`predicates_of`] an item. |
|
||||
| `rustc_dump_vtable` | |
|
||||
| `rustc_hidden_type_of_opaques` | Dumps the [hidden type of each opaque types][opaq] in the crate. |
|
||||
| `rustc_layout` | [See this section](#debugging-type-layouts). |
|
||||
| `rustc_object_lifetime_default` | Dumps the [object lifetime defaults] of an item. |
|
||||
| `rustc_outlives` | Dumps implied bounds of an item. More precisely, the [`inferred_outlives_of`] an item. |
|
||||
| `rustc_regions` | Dumps NLL closure region requirements. |
|
||||
| `rustc_symbol_name` | Dumps the mangled & demangled [`symbol_name`] of an item. |
|
||||
| `rustc_variances` | Dumps the [variances] of an item. |
|
||||
|
||||
Right below you can find elaborate explainers on a selected few.
|
||||
|
||||
[`builtin_attrs`]: https://github.com/rust-lang/rust/blob/master/compiler/rustc_feature/src/builtin_attrs.rs
|
||||
[`def_path_str`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.def_path_str
|
||||
[`inferred_outlives_of`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.inferred_outlives_of
|
||||
[`item_bounds`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.item_bounds
|
||||
[`predicates_of`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.predicates_of
|
||||
[`symbol_name`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.symbol_name
|
||||
[object lifetime defaults]: https://doc.rust-lang.org/reference/lifetime-elision.html#default-trait-object-lifetimes
|
||||
[opaq]: ./opaque-types-impl-trait-inference.md
|
||||
[variances]: ./variance.md
|
||||
|
||||
### Formatting Graphviz output (.dot files)
|
||||
[formatting-graphviz-output]: #formatting-graphviz-output
|
||||
|
||||
Some compiler options for debugging specific features yield graphviz graphs -
|
||||
e.g. the `#[rustc_mir(borrowck_graphviz_postflow="suffix.dot")]` attribute
|
||||
dumps various borrow-checker dataflow graphs.
|
||||
|
||||
These all produce `.dot` files. To view these files, install graphviz (e.g.
|
||||
`apt-get install graphviz`) and then run the following commands:
|
||||
|
||||
```bash
|
||||
$ dot -T pdf maybe_init_suffix.dot > maybe_init_suffix.pdf
|
||||
$ firefox maybe_init_suffix.pdf # Or your favorite pdf viewer
|
||||
```
|
||||
|
||||
### Debugging type layouts
|
||||
|
||||
The internal attribute `#[rustc_layout]` can be used to dump the [`Layout`] of
|
||||
the type it is attached to. For example:
|
||||
|
||||
```rust
|
||||
#![feature(rustc_attrs)]
|
||||
|
||||
#[rustc_layout(debug)]
|
||||
type T<'a> = &'a u32;
|
||||
```
|
||||
|
||||
Will emit the following:
|
||||
|
||||
```text
|
||||
error: layout_of(&'a u32) = Layout {
|
||||
fields: Primitive,
|
||||
variants: Single {
|
||||
index: 0,
|
||||
},
|
||||
abi: Scalar(
|
||||
Scalar {
|
||||
value: Pointer,
|
||||
valid_range: 1..=18446744073709551615,
|
||||
},
|
||||
),
|
||||
largest_niche: Some(
|
||||
Niche {
|
||||
offset: Size {
|
||||
raw: 0,
|
||||
},
|
||||
scalar: Scalar {
|
||||
value: Pointer,
|
||||
valid_range: 1..=18446744073709551615,
|
||||
},
|
||||
},
|
||||
),
|
||||
align: AbiAndPrefAlign {
|
||||
abi: Align {
|
||||
pow2: 3,
|
||||
},
|
||||
pref: Align {
|
||||
pow2: 3,
|
||||
},
|
||||
},
|
||||
size: Size {
|
||||
raw: 8,
|
||||
},
|
||||
}
|
||||
--> src/lib.rs:4:1
|
||||
|
|
||||
4 | type T<'a> = &'a u32;
|
||||
| ^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
```
|
||||
|
||||
[`Layout`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_target/abi/struct.Layout.html
|
||||
|
||||
|
||||
## Configuring CodeLLDB for debugging `rustc`
|
||||
|
||||
If you are using VSCode, and have edited your `config.toml` to request debugging
|
||||
level 1 or 2 for the parts of the code you're interested in, then you should be
|
||||
able to use the [CodeLLDB] extension in VSCode to debug it.
|
||||
|
||||
Here is a sample `launch.json` file, being used to run a stage 1 compiler direct
|
||||
from the directory where it is built (does not have to be "installed"):
|
||||
|
||||
```javascript
|
||||
// .vscode/launch.json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Launch",
|
||||
"args": [], // array of string command-line arguments to pass to compiler
|
||||
"program": "${workspaceFolder}/build/host/stage1/bin/rustc",
|
||||
"windows": { // applicable if using windows
|
||||
"program": "${workspaceFolder}/build/host/stage1/bin/rustc.exe"
|
||||
},
|
||||
"cwd": "${workspaceFolder}", // current working directory at program start
|
||||
"stopOnEntry": false,
|
||||
"sourceLanguages": ["rust"]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
[CodeLLDB]: https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb
|
||||
215
src/doc/rustc-dev-guide/src/compiler-src.md
Normal file
215
src/doc/rustc-dev-guide/src/compiler-src.md
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
# High-level overview of the compiler source
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
Now that we have [seen what the compiler does][orgch],
|
||||
let's take a look at the structure of the [`rust-lang/rust`] repository,
|
||||
where the rustc source code lives.
|
||||
|
||||
[`rust-lang/rust`]: https://github.com/rust-lang/rust
|
||||
|
||||
> You may find it helpful to read the ["Overview of the compiler"][orgch]
|
||||
> chapter, which introduces how the compiler works, before this one.
|
||||
|
||||
[orgch]: ./overview.md
|
||||
|
||||
## Workspace structure
|
||||
|
||||
The [`rust-lang/rust`] repository consists of a single large cargo workspace
|
||||
containing the compiler, the standard libraries ([`core`], [`alloc`], [`std`],
|
||||
[`proc_macro`], [`etc`]), and [`rustdoc`], along with the build system and a
|
||||
bunch of tools and submodules for building a full Rust distribution.
|
||||
|
||||
The repository consists of three main directories:
|
||||
|
||||
- [`compiler/`] contains the source code for `rustc`. It consists of many crates
|
||||
that together make up the compiler.
|
||||
|
||||
- [`library/`] contains the standard libraries ([`core`], [`alloc`], [`std`],
|
||||
[`proc_macro`], [`test`]), as well as the Rust runtime ([`backtrace`], [`rtstartup`],
|
||||
[`lang_start`]).
|
||||
|
||||
- [`tests/`] contains the compiler tests.
|
||||
|
||||
- [`src/`] contains the source code for [`rustdoc`], [`clippy`], [`cargo`], the build system,
|
||||
language docs, etc.
|
||||
|
||||
[`alloc`]: https://github.com/rust-lang/rust/tree/master/library/alloc
|
||||
[`backtrace`]: https://github.com/rust-lang/backtrace-rs/
|
||||
[`cargo`]: https://github.com/rust-lang/cargo
|
||||
[`clippy`]: https://github.com/rust-lang/rust/tree/master/src/tools/clippy
|
||||
[`compiler/`]: https://github.com/rust-lang/rust/tree/master/compiler
|
||||
[`core`]: https://github.com/rust-lang/rust/tree/master/library/core
|
||||
[`etc`]: https://github.com/rust-lang/rust/tree/master/src/etc
|
||||
[`lang_start`]: https://github.com/rust-lang/rust/blob/master/library/std/src/rt.rs
|
||||
[`library/`]: https://github.com/rust-lang/rust/tree/master/library
|
||||
[`proc_macro`]: https://github.com/rust-lang/rust/tree/master/library/proc_macro
|
||||
[`rtstartup`]: https://github.com/rust-lang/rust/tree/master/library/rtstartup
|
||||
[`rust-lang/rust`]: https://github.com/rust-lang/rust
|
||||
[`rustdoc`]: https://github.com/rust-lang/rust/tree/master/src/tools/rustdoc
|
||||
[`src/`]: https://github.com/rust-lang/rust/tree/master/src
|
||||
[`std`]: https://github.com/rust-lang/rust/tree/master/library/std
|
||||
[`test`]: https://github.com/rust-lang/rust/tree/master/library/test
|
||||
[`tests/`]: https://github.com/rust-lang/rust/tree/master/tests
|
||||
|
||||
## Compiler
|
||||
|
||||
The compiler is implemented in the various [`compiler/`] crates.
|
||||
The [`compiler/`] crates all have names starting with `rustc_*`. These are a
|
||||
collection of around 50 interdependent crates ranging in size from tiny to
|
||||
huge. There is also the `rustc` crate which is the actual binary (i.e. the
|
||||
`main` function); it doesn't actually do anything besides calling the
|
||||
[`rustc_driver`] crate, which drives the various parts of compilation in other
|
||||
crates.
|
||||
|
||||
The dependency structure of these crates is complex, but roughly it is
|
||||
something like this:
|
||||
|
||||
- `rustc` (the binary) calls [`rustc_driver::main`][main].
|
||||
- [`rustc_driver`] depends on a lot of other crates, but the main one is
|
||||
[`rustc_interface`].
|
||||
- [`rustc_interface`] depends on most of the other compiler crates. It
|
||||
is a fairly generic interface for driving the whole compilation.
|
||||
- Most of the other `rustc_*` crates depend on [`rustc_middle`],
|
||||
which defines a lot of central data structures in the compiler.
|
||||
- [`rustc_middle`] and most of the other crates depend on a
|
||||
handful of crates representing the early parts of the
|
||||
compiler (e.g. the parser), fundamental data structures (e.g.
|
||||
[`Span`]), or error reporting: [`rustc_data_structures`],
|
||||
[`rustc_span`], [`rustc_errors`], etc.
|
||||
|
||||
[`rustc_data_structures`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/index.html
|
||||
[`rustc_driver`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_driver/index.html
|
||||
[`rustc_errors`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_errors/index.html
|
||||
[`rustc_interface`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/index.html
|
||||
[`rustc_middle`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/index.html
|
||||
[`rustc_span`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/index.html
|
||||
[`Span`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.Span.html
|
||||
[main]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_driver/fn.main.html
|
||||
|
||||
You can see the exact dependencies by reading the [`Cargo.toml`] for the various
|
||||
crates, just like a normal Rust crate.
|
||||
|
||||
One final thing: [`src/llvm-project`] is a submodule for our fork of LLVM.
|
||||
During bootstrapping, LLVM is built and the [`compiler/rustc_llvm`] crate
|
||||
contains Rust wrappers around LLVM (which is written in C++), so that the
|
||||
compiler can interface with it.
|
||||
|
||||
Most of this book is about the compiler, so we won't have any further
|
||||
explanation of these crates here.
|
||||
|
||||
[`compiler/rustc_llvm`]: https://github.com/rust-lang/rust/tree/master/compiler/rustc_llvm
|
||||
[`src/llvm-project`]: https://github.com/rust-lang/rust/tree/master/src/
|
||||
[`Cargo.toml`]: https://github.com/rust-lang/rust/blob/master/Cargo.toml
|
||||
|
||||
### Big picture
|
||||
|
||||
The dependency structure of the compiler is influenced by two main factors:
|
||||
|
||||
1. Organization. The compiler is a _huge_ codebase; it would be an impossibly
|
||||
large crate. In part, the dependency structure reflects the code structure
|
||||
of the compiler.
|
||||
2. Compile-time. By breaking the compiler into multiple crates, we can take
|
||||
better advantage of incremental/parallel compilation using cargo. In
|
||||
particular, we try to have as few dependencies between crates as possible so
|
||||
that we don't have to rebuild as many crates if you change one.
|
||||
|
||||
At the very bottom of the dependency tree are a handful of crates that are used
|
||||
by the whole compiler (e.g. [`rustc_span`]). The very early parts of the
|
||||
compilation process (e.g. [parsing and the Abstract Syntax Tree (`AST`)][parser])
|
||||
depend on only these.
|
||||
|
||||
After the [`AST`][parser] is constructed and other early analysis is done, the
|
||||
compiler's [query system][query] gets set up. The query system is set up in a
|
||||
clever way using function pointers. This allows us to break dependencies
|
||||
between crates, allowing more parallel compilation. The query system is defined
|
||||
in [`rustc_middle`], so nearly all subsequent parts of the compiler depend on
|
||||
this crate. It is a really large crate, leading to long compile times. Some
|
||||
efforts have been made to move stuff out of it with varying success. Another
|
||||
side-effect is that sometimes related functionality gets scattered across
|
||||
different crates. For example, linting functionality is found across earlier
|
||||
parts of the crate, [`rustc_lint`], [`rustc_middle`], and other places.
|
||||
|
||||
Ideally there would be fewer, more cohesive crates, with incremental and
|
||||
parallel compilation making sure compile times stay reasonable. However,
|
||||
incremental and parallel compilation haven't gotten good enough for that yet,
|
||||
so breaking things into separate crates has been our solution so far.
|
||||
|
||||
At the top of the dependency tree is [`rustc_driver`] and [`rustc_interface`]
|
||||
which is an unstable wrapper around the query system helping drive various
|
||||
stages of compilation. Other consumers of the compiler may use this interface
|
||||
in different ways (e.g. [`rustdoc`] or maybe eventually `rust-analyzer`). The
|
||||
[`rustc_driver`] crate first parses command line arguments and then uses
|
||||
[`rustc_interface`] to drive the compilation to completion.
|
||||
|
||||
[parser]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_parse/index.html
|
||||
[`rustc_lint`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/index.html
|
||||
[query]: ./query.md
|
||||
|
||||
## rustdoc
|
||||
|
||||
The bulk of [`rustdoc`] is in [`librustdoc`]. However, the [`rustdoc`] binary
|
||||
itself is [`src/tools/rustdoc`], which does nothing except call [`rustdoc::main`].
|
||||
|
||||
There is also `JavaScript` and `CSS` for the docs in [`src/tools/rustdoc-js`]
|
||||
and [`src/tools/rustdoc-themes`].
|
||||
|
||||
You can read more about [`rustdoc`] in [this chapter][rustdoc-chapter].
|
||||
|
||||
[`librustdoc`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustdoc/index.html
|
||||
[`rustdoc::main`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustdoc/fn.main.html
|
||||
[`src/tools/rustdoc-js`]: https://github.com/rust-lang/rust/tree/master/src/tools/rustdoc-js
|
||||
[`src/tools/rustdoc-themes`]: https://github.com/rust-lang/rust/tree/master/src/tools/rustdoc-themes
|
||||
[`src/tools/rustdoc`]: https://github.com/rust-lang/rust/tree/master/src/tools/rustdoc
|
||||
[rustdoc-chapter]: ./rustdoc.md
|
||||
|
||||
## Tests
|
||||
|
||||
The test suite for all of the above is in [`tests/`]. You can read more
|
||||
about the test suite [in this chapter][testsch].
|
||||
|
||||
The test harness is in [`src/tools/compiletest/`][`compiletest/`].
|
||||
|
||||
[`tests/`]: https://github.com/rust-lang/rust/tree/master/tests
|
||||
[testsch]: ./tests/intro.md
|
||||
|
||||
## Build System
|
||||
|
||||
There are a number of tools in the repository just for building the compiler,
|
||||
standard library, [`rustdoc`], etc, along with testing, building a full Rust
|
||||
distribution, etc.
|
||||
|
||||
One of the primary tools is [`src/bootstrap/`]. You can read more about
|
||||
bootstrapping [in this chapter][bootstch]. The process may also use other tools
|
||||
from [`src/tools/`], such as [`tidy/`] or [`compiletest/`].
|
||||
|
||||
[`compiletest/`]: https://github.com/rust-lang/rust/tree/master/src/tools/compiletest
|
||||
[`src/bootstrap/`]: https://github.com/rust-lang/rust/tree/master/src/bootstrap
|
||||
[`src/tools/`]: https://github.com/rust-lang/rust/tree/master/src/tools
|
||||
[`tidy/`]: https://github.com/rust-lang/rust/tree/master/src/tools/tidy
|
||||
[bootstch]: ./building/bootstrapping/intro.md
|
||||
|
||||
## Standard library
|
||||
|
||||
This code is fairly similar to most other Rust crates except that it must be
|
||||
built in a special way because it can use unstable ([`nightly`]) features.
|
||||
The standard library is sometimes referred to as [`libstd or the "standard facade"`].
|
||||
|
||||
[`libstd or the "standard facade"`]: https://rust-lang.github.io/rfcs/0040-libstd-facade.html
|
||||
[`nightly`]: https://doc.rust-lang.org/nightly/nightly-rustc/
|
||||
|
||||
## Other
|
||||
|
||||
There are a lot of other things in the `rust-lang/rust` repo that are related
|
||||
to building a full Rust distribution. Most of the time you don't need to worry about them.
|
||||
|
||||
These include:
|
||||
- [`src/ci`]: The CI configuration. This actually quite extensive because we
|
||||
run a lot of tests on a lot of platforms.
|
||||
- [`src/doc`]: Various documentation, including submodules for a few books.
|
||||
- [`src/etc`]: Miscellaneous utilities.
|
||||
- And more...
|
||||
|
||||
[`src/ci`]: https://github.com/rust-lang/rust/tree/master/src/ci
|
||||
[`src/doc`]: https://github.com/rust-lang/rust/tree/master/src/doc
|
||||
[`src/etc`]: https://github.com/rust-lang/rust/tree/master/src/etc
|
||||
141
src/doc/rustc-dev-guide/src/compiler-team.md
Normal file
141
src/doc/rustc-dev-guide/src/compiler-team.md
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
# About the compiler team
|
||||
|
||||
rustc is maintained by the [Rust compiler team][team]. The people who belong to
|
||||
this team collectively work to track regressions and implement new features.
|
||||
Members of the Rust compiler team are people who have made significant
|
||||
contributions to rustc and its design.
|
||||
|
||||
[team]: https://www.rust-lang.org/governance/teams/compiler
|
||||
|
||||
## Discussion
|
||||
|
||||
Currently the compiler team chats in Zulip:
|
||||
|
||||
- Team chat occurs in the [`t-compiler`][zulip-t-compiler] stream on the Zulip instance
|
||||
- There are also a number of other associated Zulip streams,
|
||||
such as [`t-compiler/help`][zulip-help], where people can ask for help
|
||||
with rustc development, or [`t-compiler/meetings`][zulip-meetings],
|
||||
where the team holds their weekly triage and steering meetings.
|
||||
|
||||
## Reviewers
|
||||
|
||||
If you're interested in figuring out who can answer questions about a
|
||||
particular part of the compiler, or you'd just like to know who works on what,
|
||||
check out [triagebot.toml's assign section][map].
|
||||
It contains a listing of the various parts of the compiler and a list of people
|
||||
who are reviewers of each part.
|
||||
|
||||
[map]: https://github.com/rust-lang/rust/blob/master/triagebot.toml
|
||||
|
||||
## Rust compiler meeting
|
||||
|
||||
The compiler team has a weekly meeting where we do triage and try to
|
||||
generally stay on top of new bugs, regressions, and discuss important
|
||||
things in general.
|
||||
They are held on [Zulip][zulip-meetings]. It works roughly as follows:
|
||||
|
||||
- **Announcements, MCPs/FCPs, and WG-check-ins:** We share some
|
||||
announcements with the rest of the team about important things we want
|
||||
everyone to be aware of. We also share the status of MCPs and FCPs and we
|
||||
use the opportunity to have a couple of WGs giving us an update about
|
||||
their work.
|
||||
- **Check for beta and stable nominations:** These are nominations of things to
|
||||
backport to beta and stable respectively.
|
||||
We then look for new cases where the compiler broke previously working
|
||||
code in the wild. Regressions are important issues to fix, so it's
|
||||
likely that they are tagged as P-critical or P-high; the major
|
||||
exception would be bug fixes (though even there we often [aim to give
|
||||
warnings first][procedure]).
|
||||
- **Review P-critical and P-high bugs:** P-critical and P-high bugs are
|
||||
those that are sufficiently important for us to actively track
|
||||
progress. P-critical and P-high bugs should ideally always have an
|
||||
assignee.
|
||||
- **Check S-waiting-on-team and I-nominated issues:** These are issues where feedback from
|
||||
the team is desired.
|
||||
- **Look over the performance triage report:** We check for PRs that made the
|
||||
performance worse and try to decide if it's worth reverting the performance regression or if
|
||||
the regression can be addressed in a future PR.
|
||||
|
||||
The meeting currently takes place on Thursdays at 10am Boston time
|
||||
(UTC-4 typically, but daylight savings time sometimes makes things
|
||||
complicated).
|
||||
|
||||
[procedure]: ./bug-fix-procedure.md
|
||||
[zulip-t-compiler]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
|
||||
[zulip-help]: https://rust-lang.zulipchat.com/#narrow/stream/182449-t-compiler.2Fhelp
|
||||
[zulip-meetings]: https://rust-lang.zulipchat.com/#narrow/stream/238009-t-compiler.2Fmeetings
|
||||
|
||||
## Team membership
|
||||
|
||||
Membership in the Rust team is typically offered when someone has been
|
||||
making significant contributions to the compiler for some
|
||||
time. Membership is both a recognition but also an obligation:
|
||||
compiler team members are generally expected to help with upkeep as
|
||||
well as doing reviews and other work.
|
||||
|
||||
If you are interested in becoming a compiler team member, the first
|
||||
thing to do is to start fixing some bugs, or get involved in a working
|
||||
group. One good way to find bugs is to look for
|
||||
[open issues tagged with E-easy](https://github.com/rust-lang/rust/issues?q=is%3Aopen+is%3Aissue+label%3AE-easy)
|
||||
or
|
||||
[E-mentor](https://github.com/rust-lang/rust/issues?q=is%3Aopen+is%3Aissue+label%3AE-mentor).
|
||||
|
||||
You can also dig through the graveyard of PRs that were
|
||||
[closed due to inactivity](https://github.com/rust-lang/rust/pulls?q=is%3Apr+label%3AS-inactive),
|
||||
some of them may contain work that is still useful - refer to the
|
||||
associated issues, if any - and only needs some finishing touches
|
||||
for which the original author didn't have time.
|
||||
|
||||
### r+ rights
|
||||
|
||||
Once you have made a number of individual PRs to rustc, we will often
|
||||
offer r+ privileges. This means that you have the right to instruct
|
||||
"bors" (the robot that manages which PRs get landed into rustc) to
|
||||
merge a PR
|
||||
([here are some instructions for how to talk to bors][homu-guide]).
|
||||
|
||||
[homu-guide]: https://bors.rust-lang.org/
|
||||
|
||||
The guidelines for reviewers are as follows:
|
||||
|
||||
- You are always welcome to review any PR, regardless of who it is
|
||||
assigned to. However, do not r+ PRs unless:
|
||||
- You are confident in that part of the code.
|
||||
- You are confident that nobody else wants to review it first.
|
||||
- For example, sometimes people will express a desire to review a
|
||||
PR before it lands, perhaps because it touches a particularly
|
||||
sensitive part of the code.
|
||||
- Always be polite when reviewing: you are a representative of the
|
||||
Rust project, so it is expected that you will go above and beyond
|
||||
when it comes to the [Code of Conduct].
|
||||
|
||||
[Code of Conduct]: https://www.rust-lang.org/policies/code-of-conduct
|
||||
|
||||
### Reviewer rotation
|
||||
|
||||
Once you have r+ rights, you can also be added to the [reviewer rotation].
|
||||
[triagebot] is the bot that [automatically assigns] incoming PRs to reviewers.
|
||||
If you are added, you will be randomly selected to review
|
||||
PRs. If you find you are assigned a PR that you don't feel comfortable
|
||||
reviewing, you can also leave a comment like `r? @so-and-so` to assign
|
||||
to someone else — if you don't know who to request, just write `r?
|
||||
@nikomatsakis for reassignment` and @nikomatsakis will pick someone
|
||||
for you.
|
||||
|
||||
[reviewer rotation]: https://github.com/rust-lang/rust/blob/36285c5de8915ecc00d91ae0baa79a87ed5858d5/triagebot.toml#L528-L577
|
||||
[triagebot]: https://github.com/rust-lang/triagebot/
|
||||
[automatically assigns]: https://forge.rust-lang.org/triagebot/pr-assignment.html
|
||||
|
||||
Getting on the reviewer rotation is much appreciated as it lowers the
|
||||
review burden for all of us! However, if you don't have time to give
|
||||
people timely feedback on their PRs, it may be better that you don't
|
||||
get on the list.
|
||||
|
||||
### Full team membership
|
||||
|
||||
Full team membership is typically extended once someone made many
|
||||
contributions to the Rust compiler over time, ideally (but not
|
||||
necessarily) to multiple areas. Sometimes this might be implementing a
|
||||
new feature, but it is also important — perhaps more important! — to
|
||||
have time and willingness to help out with general upkeep such as
|
||||
bugfixes, tracking regressions, and other less glamorous work.
|
||||
50
src/doc/rustc-dev-guide/src/const-eval.md
Normal file
50
src/doc/rustc-dev-guide/src/const-eval.md
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# Constant Evaluation
|
||||
|
||||
Constant evaluation is the process of computing values at compile time. For a
|
||||
specific item (constant/static/array length) this happens after the MIR for the
|
||||
item is borrow-checked and optimized. In many cases trying to const evaluate an
|
||||
item will trigger the computation of its MIR for the first time.
|
||||
|
||||
Prominent examples are:
|
||||
|
||||
* The initializer of a `static`
|
||||
* Array length
|
||||
* needs to be known to reserve stack or heap space
|
||||
* Enum variant discriminants
|
||||
* needs to be known to prevent two variants from having the same
|
||||
discriminant
|
||||
* Patterns
|
||||
* need to be known to check for overlapping patterns
|
||||
|
||||
Additionally constant evaluation can be used to reduce the workload or binary
|
||||
size at runtime by precomputing complex operations at compiletime and only
|
||||
storing the result.
|
||||
|
||||
All uses of constant evaluation can either be categorized as "influencing the type system"
|
||||
(array lengths, enum variant discriminants, const generic parameters), or as solely being
|
||||
done to precompute expressions to be used at runtime.
|
||||
|
||||
Constant evaluation can be done by calling the `const_eval_*` functions of `TyCtxt`.
|
||||
They're the wrappers of the `const_eval` query.
|
||||
|
||||
* `const_eval_global_id_for_typeck` evaluates a constant to a valtree,
|
||||
so the result value can be further inspected by the compiler.
|
||||
* `const_eval_global_id` evaluate a constant to an "opaque blob" containing its final value;
|
||||
this is only useful for codegen backends and the CTFE evaluator engine itself.
|
||||
* `eval_static_initializer` specifically computes the initial values of a static.
|
||||
Statics are special; all other functions do not represent statics correctly
|
||||
and have thus assertions preventing their use on statics.
|
||||
|
||||
The `const_eval_*` functions use a [`ParamEnv`](./param_env/param_env_summary.html) of environment
|
||||
in which the constant is evaluated (e.g. the function within which the constant is used)
|
||||
and a [`GlobalId`]. The `GlobalId` is made up of an `Instance` referring to a constant
|
||||
or static or of an `Instance` of a function and an index into the function's `Promoted` table.
|
||||
|
||||
Constant evaluation returns an [`EvalToValTreeResult`] for type system constants
|
||||
or [`EvalToConstValueResult`] with either the error, or a representation of the
|
||||
evaluated constant: a [valtree](mir/index.md#valtrees) or a [MIR constant
|
||||
value](mir/index.md#mir-constant-values), respectively.
|
||||
|
||||
[`GlobalId`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/struct.GlobalId.html
|
||||
[`EvalToConstValueResult`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/error/type.EvalToConstValueResult.html
|
||||
[`EvalToValTreeResult`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/error/type.EvalToValTreeResult.html
|
||||
238
src/doc/rustc-dev-guide/src/const-eval/interpret.md
Normal file
238
src/doc/rustc-dev-guide/src/const-eval/interpret.md
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
# Interpreter
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
The interpreter is a virtual machine for executing MIR without compiling to
|
||||
machine code. It is usually invoked via `tcx.const_eval_*` functions. The
|
||||
interpreter is shared between the compiler (for compile-time function
|
||||
evaluation, CTFE) and the tool [Miri](https://github.com/rust-lang/miri/), which
|
||||
uses the same virtual machine to detect Undefined Behavior in (unsafe) Rust
|
||||
code.
|
||||
|
||||
If you start out with a constant:
|
||||
|
||||
```rust
|
||||
const FOO: usize = 1 << 12;
|
||||
```
|
||||
|
||||
rustc doesn't actually invoke anything until the constant is either used or
|
||||
placed into metadata.
|
||||
|
||||
Once you have a use-site like:
|
||||
|
||||
```rust,ignore
|
||||
type Foo = [u8; FOO - 42];
|
||||
```
|
||||
|
||||
The compiler needs to figure out the length of the array before being able to
|
||||
create items that use the type (locals, constants, function arguments, ...).
|
||||
|
||||
To obtain the (in this case empty) parameter environment, one can call
|
||||
`let param_env = tcx.param_env(length_def_id);`. The `GlobalId` needed is
|
||||
|
||||
```rust,ignore
|
||||
let gid = GlobalId {
|
||||
promoted: None,
|
||||
instance: Instance::mono(length_def_id),
|
||||
};
|
||||
```
|
||||
|
||||
Invoking `tcx.const_eval(param_env.and(gid))` will now trigger the creation of
|
||||
the MIR of the array length expression. The MIR will look something like this:
|
||||
|
||||
```mir
|
||||
Foo::{{constant}}#0: usize = {
|
||||
let mut _0: usize;
|
||||
let mut _1: (usize, bool);
|
||||
|
||||
bb0: {
|
||||
_1 = CheckedSub(const FOO, const 42usize);
|
||||
assert(!move (_1.1: bool), "attempt to subtract with overflow") -> bb1;
|
||||
}
|
||||
|
||||
bb1: {
|
||||
_0 = move (_1.0: usize);
|
||||
return;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Before the evaluation, a virtual memory location (in this case essentially a
|
||||
`vec![u8; 4]` or `vec![u8; 8]`) is created for storing the evaluation result.
|
||||
|
||||
At the start of the evaluation, `_0` and `_1` are
|
||||
`Operand::Immediate(Immediate::Scalar(ScalarMaybeUndef::Undef))`. This is quite
|
||||
a mouthful: [`Operand`] can represent either data stored somewhere in the
|
||||
[interpreter memory](#memory) (`Operand::Indirect`), or (as an optimization)
|
||||
immediate data stored in-line. And [`Immediate`] can either be a single
|
||||
(potentially uninitialized) [scalar value][`Scalar`] (integer or thin pointer),
|
||||
or a pair of two of them. In our case, the single scalar value is *not* (yet)
|
||||
initialized.
|
||||
|
||||
When the initialization of `_1` is invoked, the value of the `FOO` constant is
|
||||
required, and triggers another call to `tcx.const_eval_*`, which will not be shown
|
||||
here. If the evaluation of FOO is successful, `42` will be subtracted from its
|
||||
value `4096` and the result stored in `_1` as
|
||||
`Operand::Immediate(Immediate::ScalarPair(Scalar::Raw { data: 4054, .. },
|
||||
Scalar::Raw { data: 0, .. })`. The first part of the pair is the computed value,
|
||||
the second part is a bool that's true if an overflow happened. A `Scalar::Raw`
|
||||
also stores the size (in bytes) of this scalar value; we are eliding that here.
|
||||
|
||||
The next statement asserts that said boolean is `0`. In case the assertion
|
||||
fails, its error message is used for reporting a compile-time error.
|
||||
|
||||
Since it does not fail, `Operand::Immediate(Immediate::Scalar(Scalar::Raw {
|
||||
data: 4054, .. }))` is stored in the virtual memory it was allocated before the
|
||||
evaluation. `_0` always refers to that location directly.
|
||||
|
||||
After the evaluation is done, the return value is converted from [`Operand`] to
|
||||
[`ConstValue`] by [`op_to_const`]: the former representation is geared towards
|
||||
what is needed *during* const evaluation, while [`ConstValue`] is shaped by the
|
||||
needs of the remaining parts of the compiler that consume the results of const
|
||||
evaluation. As part of this conversion, for types with scalar values, even if
|
||||
the resulting [`Operand`] is `Indirect`, it will return an immediate
|
||||
`ConstValue::Scalar(computed_value)` (instead of the usual `ConstValue::ByRef`).
|
||||
This makes using the result much more efficient and also more convenient, as no
|
||||
further queries need to be executed in order to get at something as simple as a
|
||||
`usize`.
|
||||
|
||||
Future evaluations of the same constants will not actually invoke
|
||||
the interpreter, but just use the cached result.
|
||||
|
||||
[`Operand`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/operand/enum.Operand.html
|
||||
[`Immediate`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/enum.Immediate.html
|
||||
[`ConstValue`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/consts/enum.ConstValue.html
|
||||
[`Scalar`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/enum.Scalar.html
|
||||
[`op_to_const`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/const_eval/eval_queries/fn.op_to_const.html
|
||||
|
||||
## Datastructures
|
||||
|
||||
The interpreter's outside-facing datastructures can be found in
|
||||
[rustc_middle/src/mir/interpret](https://github.com/rust-lang/rust/blob/master/compiler/rustc_middle/src/mir/interpret).
|
||||
This is mainly the error enum and the [`ConstValue`] and [`Scalar`] types. A
|
||||
`ConstValue` can be either `Scalar` (a single `Scalar`, i.e., integer or thin
|
||||
pointer), `Slice` (to represent byte slices and strings, as needed for pattern
|
||||
matching) or `ByRef`, which is used for anything else and refers to a virtual
|
||||
allocation. These allocations can be accessed via the methods on
|
||||
`tcx.interpret_interner`. A `Scalar` is either some `Raw` integer or a pointer;
|
||||
see [the next section](#memory) for more on that.
|
||||
|
||||
If you are expecting a numeric result, you can use `eval_usize` (panics on
|
||||
anything that can't be represented as a `u64`) or `try_eval_usize` which results
|
||||
in an `Option<u64>` yielding the `Scalar` if possible.
|
||||
|
||||
## Memory
|
||||
|
||||
To support any kind of pointers, the interpreter needs to have a "virtual memory" that the
|
||||
pointers can point to. This is implemented in the [`Memory`] type. In the
|
||||
simplest model, every global variable, stack variable and every dynamic
|
||||
allocation corresponds to an [`Allocation`] in that memory. (Actually using an
|
||||
allocation for every MIR stack variable would be very inefficient; that's why we
|
||||
have `Operand::Immediate` for stack variables that are both small and never have
|
||||
their address taken. But that is purely an optimization.)
|
||||
|
||||
Such an `Allocation` is basically just a sequence of `u8` storing the value of
|
||||
each byte in this allocation. (Plus some extra data, see below.) Every
|
||||
`Allocation` has a globally unique `AllocId` assigned in `Memory`. With that, a
|
||||
[`Pointer`] consists of a pair of an `AllocId` (indicating the allocation) and
|
||||
an offset into the allocation (indicating which byte of the allocation the
|
||||
pointer points to). It may seem odd that a `Pointer` is not just an integer
|
||||
address, but remember that during const evaluation, we cannot know at which
|
||||
actual integer address the allocation will end up -- so we use `AllocId` as
|
||||
symbolic base addresses, which means we need a separate offset. (As an aside,
|
||||
it turns out that pointers at run-time are
|
||||
[more than just integers, too](https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#pointer-provenance).)
|
||||
|
||||
These allocations exist so that references and raw pointers have something to
|
||||
point to. There is no global linear heap in which things are allocated, but each
|
||||
allocation (be it for a local variable, a static or a (future) heap allocation)
|
||||
gets its own little memory with exactly the required size. So if you have a
|
||||
pointer to an allocation for a local variable `a`, there is no possible (no
|
||||
matter how unsafe) operation that you can do that would ever change said pointer
|
||||
to a pointer to a different local variable `b`.
|
||||
Pointer arithmetic on `a` will only ever change its offset; the `AllocId` stays the same.
|
||||
|
||||
This, however, causes a problem when we want to store a `Pointer` into an
|
||||
`Allocation`: we cannot turn it into a sequence of `u8` of the right length!
|
||||
`AllocId` and offset together are twice as big as a pointer "seems" to be. This
|
||||
is what the `relocation` field of `Allocation` is for: the byte offset of the
|
||||
`Pointer` gets stored as a bunch of `u8`, while its `AllocId` gets stored
|
||||
out-of-band. The two are reassembled when the `Pointer` is read from memory.
|
||||
The other bit of extra data an `Allocation` needs is `undef_mask` for keeping
|
||||
track of which of its bytes are initialized.
|
||||
|
||||
### Global memory and exotic allocations
|
||||
|
||||
`Memory` exists only during evaluation; it gets destroyed when the
|
||||
final value of the constant is computed. In case that constant contains any
|
||||
pointers, those get "interned" and moved to a global "const eval memory" that is
|
||||
part of `TyCtxt`. These allocations stay around for the remaining computation
|
||||
and get serialized into the final output (so that dependent crates can use
|
||||
them).
|
||||
|
||||
Moreover, to also support function pointers, the global memory in `TyCtxt` can
|
||||
also contain "virtual allocations": instead of an `Allocation`, these contain an
|
||||
`Instance`. That allows a `Pointer` to point to either normal data or a
|
||||
function, which is needed to be able to evaluate casts from function pointers to
|
||||
raw pointers.
|
||||
|
||||
Finally, the [`GlobalAlloc`] type used in the global memory also contains a
|
||||
variant `Static` that points to a particular `const` or `static` item. This is
|
||||
needed to support circular statics, where we need to have a `Pointer` to a
|
||||
`static` for which we cannot yet have an `Allocation` as we do not know the
|
||||
bytes of its value.
|
||||
|
||||
[`Memory`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/interpret/struct.Memory.html
|
||||
[`Allocation`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/struct.Allocation.html
|
||||
[`Pointer`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/struct.Pointer.html
|
||||
[`GlobalAlloc`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/interpret/enum.GlobalAlloc.html
|
||||
|
||||
### Pointer values vs Pointer types
|
||||
|
||||
One common cause of confusion in the interpreter is that being a pointer *value* and having
|
||||
a pointer *type* are entirely independent properties. By "pointer value", we
|
||||
refer to a `Scalar::Ptr` containing a `Pointer` and thus pointing somewhere into
|
||||
the interpreter's virtual memory. This is in contrast to `Scalar::Raw`, which is just some
|
||||
concrete integer.
|
||||
|
||||
However, a variable of pointer or reference *type*, such as `*const T` or `&T`,
|
||||
does not have to have a pointer *value*: it could be obtained by casting or
|
||||
transmuting an integer to a pointer.
|
||||
And similarly, when casting or transmuting a reference to some
|
||||
actual allocation to an integer, we end up with a pointer *value*
|
||||
(`Scalar::Ptr`) at integer *type* (`usize`). This is a problem because we
|
||||
cannot meaningfully perform integer operations such as division on pointer
|
||||
values.
|
||||
|
||||
## Interpretation
|
||||
|
||||
Although the main entry point to constant evaluation is the `tcx.const_eval_*`
|
||||
functions, there are additional functions in
|
||||
[rustc_const_eval/src/const_eval](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_const_eval/index.html)
|
||||
that allow accessing the fields of a `ConstValue` (`ByRef` or otherwise). You should
|
||||
never have to access an `Allocation` directly except for translating it to the
|
||||
compilation target (at the moment just LLVM).
|
||||
|
||||
The interpreter starts by creating a virtual stack frame for the current constant that is
|
||||
being evaluated. There's essentially no difference between a constant and a
|
||||
function with no arguments, except that constants do not allow local (named)
|
||||
variables at the time of writing this guide.
|
||||
|
||||
A stack frame is defined by the `Frame` type in
|
||||
[rustc_const_eval/src/interpret/eval_context.rs](https://github.com/rust-lang/rust/blob/master/compiler/rustc_const_eval/src/interpret/eval_context.rs)
|
||||
and contains all the local
|
||||
variables memory (`None` at the start of evaluation). Each frame refers to the
|
||||
evaluation of either the root constant or subsequent calls to `const fn`. The
|
||||
evaluation of another constant simply calls `tcx.const_eval_*`, which produce an
|
||||
entirely new and independent stack frame.
|
||||
|
||||
The frames are just a `Vec<Frame>`, there's no way to actually refer to a
|
||||
`Frame`'s memory even if horrible shenanigans are done via unsafe code. The only
|
||||
memory that can be referred to are `Allocation`s.
|
||||
|
||||
The interpreter now calls the `step` method (in
|
||||
[rustc_const_eval/src/interpret/step.rs](https://github.com/rust-lang/rust/blob/master/compiler/rustc_const_eval/src/interpret/step.rs)
|
||||
) until it either returns an error or has no further statements to execute. Each
|
||||
statement will now initialize or modify the locals or the virtual memory
|
||||
referred to by a local. This might require evaluating other constants or
|
||||
statics, which just recursively invokes `tcx.const_eval_*`.
|
||||
544
src/doc/rustc-dev-guide/src/contributing.md
Normal file
544
src/doc/rustc-dev-guide/src/contributing.md
Normal file
|
|
@ -0,0 +1,544 @@
|
|||
# Contribution Procedures
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
## Bug reports
|
||||
|
||||
While bugs are unfortunate, they're a reality in software. We can't fix what we
|
||||
don't know about, so please report liberally. If you're not sure if something
|
||||
is a bug or not, feel free to file a bug anyway.
|
||||
|
||||
**If you believe reporting your bug publicly represents a security risk to Rust users,
|
||||
please follow our [instructions for reporting security vulnerabilities][vuln]**.
|
||||
|
||||
[vuln]: https://www.rust-lang.org/policies/security
|
||||
|
||||
If you're using the nightly channel, please check if the bug exists in the
|
||||
latest toolchain before filing your bug. It might be fixed already.
|
||||
|
||||
If you have the chance, before reporting a bug, please [search existing issues],
|
||||
as it's possible that someone else has already reported your error. This doesn't
|
||||
always work, and sometimes it's hard to know what to search for, so consider this
|
||||
extra credit. We won't mind if you accidentally file a duplicate report.
|
||||
|
||||
Similarly, to help others who encountered the bug find your issue, consider
|
||||
filing an issue with a descriptive title, which contains information that might
|
||||
be unique to it. This can be the language or compiler feature used, the
|
||||
conditions that trigger the bug, or part of the error message if there is any.
|
||||
An example could be: **"impossible case reached" on lifetime inference for impl
|
||||
Trait in return position**.
|
||||
|
||||
Opening an issue is as easy as following [this
|
||||
link](https://github.com/rust-lang/rust/issues/new/choose) and filling out the fields
|
||||
in the appropriate provided template.
|
||||
|
||||
## Bug fixes or "normal" code changes
|
||||
|
||||
For most PRs, no special procedures are needed. You can just [open a PR], and it
|
||||
will be reviewed, approved, and merged. This includes most bug fixes,
|
||||
refactorings, and other user-invisible changes. The next few sections talk
|
||||
about exceptions to this rule.
|
||||
|
||||
Also, note that it is perfectly acceptable to open WIP PRs or GitHub [Draft PRs].
|
||||
Some people prefer to do this so they can get feedback along the
|
||||
way or share their code with a collaborator. Others do this so they can utilize
|
||||
the CI to build and test their PR (e.g. when developing on a slow machine).
|
||||
|
||||
[open a PR]: #pull-requests
|
||||
[Draft PRs]: https://github.blog/2019-02-14-introducing-draft-pull-requests/
|
||||
|
||||
## New features
|
||||
|
||||
Rust has strong backwards-compatibility guarantees. Thus, new features can't
|
||||
just be implemented directly in stable Rust. Instead, we have 3 release
|
||||
channels: stable, beta, and nightly.
|
||||
|
||||
- **Stable**: this is the latest stable release for general usage.
|
||||
- **Beta**: this is the next release (will be stable within 6 weeks).
|
||||
- **Nightly**: follows the `master` branch of the repo. This is the only
|
||||
channel where unstable, incomplete, or experimental features are usable with
|
||||
feature gates.
|
||||
|
||||
See [this chapter on implementing new features](./implementing_new_features.md) for more
|
||||
information.
|
||||
|
||||
### Breaking changes
|
||||
|
||||
Breaking changes have a [dedicated section][Breaking Changes] in the dev-guide.
|
||||
|
||||
### Major changes
|
||||
|
||||
The compiler team has a special process for large changes, whether or not they
|
||||
cause breakage. This process is called a Major Change Proposal (MCP). MCP is a
|
||||
relatively lightweight mechanism for getting feedback on large changes to the
|
||||
compiler (as opposed to a full RFC or a design meeting with the team).
|
||||
|
||||
Example of things that might require MCPs include major refactorings, changes
|
||||
to important types, or important changes to how the compiler does something, or
|
||||
smaller user-facing changes.
|
||||
|
||||
**When in doubt, ask on [zulip]. It would be a shame to put a lot of work
|
||||
into a PR that ends up not getting merged!** [See this document][mcpinfo] for
|
||||
more info on MCPs.
|
||||
|
||||
[mcpinfo]: https://forge.rust-lang.org/compiler/mcp.html
|
||||
[zulip]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
|
||||
|
||||
### Performance
|
||||
|
||||
Compiler performance is important. We have put a lot of effort over the last
|
||||
few years into [gradually improving it][perfdash].
|
||||
|
||||
[perfdash]: https://perf.rust-lang.org/dashboard.html
|
||||
|
||||
If you suspect that your change may cause a performance regression (or
|
||||
improvement), you can request a "perf run" (and your reviewer may also request one
|
||||
before approving). This is yet another bot that will compile a collection of
|
||||
benchmarks on a compiler with your changes. The numbers are reported
|
||||
[here][perf], and you can see a comparison of your changes against the latest
|
||||
master.
|
||||
|
||||
> For an introduction to the performance of Rust code in general
|
||||
> which would also be useful in rustc development, see [The Rust Performance Book].
|
||||
|
||||
[perf]: https://perf.rust-lang.org
|
||||
[The Rust Performance Book]: https://nnethercote.github.io/perf-book/
|
||||
|
||||
## Pull requests
|
||||
|
||||
Pull requests (or PRs for short) are the primary mechanism we use to change Rust.
|
||||
GitHub itself has some [great documentation][about-pull-requests] on using the
|
||||
Pull Request feature. We use the "fork and pull" model [described here][development-models],
|
||||
where contributors push changes to their personal fork and create pull requests to
|
||||
bring those changes into the source repository. We have more info about how to use git
|
||||
when contributing to Rust under [the git section](./git.md).
|
||||
|
||||
> **Advice for potentially large, complex, cross-cutting and/or very domain-specific changes**
|
||||
>
|
||||
> The compiler reviewers on rotation usually each have areas of the compiler that they know well,
|
||||
> but also have areas that they are not very familiar with. If your PR contains changes that are
|
||||
> large, complex, cross-cutting and/or highly domain-specific, it becomes very difficult to find a
|
||||
> suitable reviewer who is comfortable in reviewing all of the changes in such a PR. This is also
|
||||
> true if the changes are not only compiler-specific but also contains changes which fall under the
|
||||
> purview of reviewers from other teams, like the standard library team. [There's a bot][triagebot]
|
||||
> which notifies the relevant teams and pings people who have setup specific alerts based on the
|
||||
> files modified.
|
||||
>
|
||||
> Before making such changes, you are strongly encouraged to **discuss your proposed changes with
|
||||
> the compiler team beforehand** (and with other teams that the changes would require approval
|
||||
> from), and work with the compiler team to see if we can help you **break down a large potentially
|
||||
> unreviewable PR into a series of smaller more individually reviewable PRs**.
|
||||
>
|
||||
> You can communicate with the compiler team by creating a [#t-compiler thread on zulip][t-compiler]
|
||||
> to discuss your proposed changes.
|
||||
>
|
||||
> Communicating with the compiler team beforehand helps in several ways:
|
||||
>
|
||||
> 1. It increases the likelihood of your PRs being reviewed in a timely manner.
|
||||
> - We can help you identify suitable reviewers *before* you open actual PRs, or help find
|
||||
> advisors and liaisons to help you navigate the change procedures, or help with running
|
||||
> try-jobs, perf runs and crater runs as suitable.
|
||||
> 2. It helps the compiler team track your changes.
|
||||
> 3. The compiler team can perform vibe checks on your changes early and often, to see if the
|
||||
> direction of the changes align with what the compiler team prefers to see.
|
||||
> 4. Helps to avoid situations where you may have invested significant time and effort into large
|
||||
> changes that the compiler team might not be willing to accept, or finding out very late that the
|
||||
> changes are in a direction that the compiler team disagrees with.
|
||||
|
||||
[about-pull-requests]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests
|
||||
[development-models]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/getting-started/about-collaborative-development-models#fork-and-pull-model
|
||||
[t-compiler]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler
|
||||
[triagebot]: https://github.com/rust-lang/rust/blob/master/triagebot.toml
|
||||
|
||||
### r?
|
||||
|
||||
All pull requests are reviewed by another person. We have a bot,
|
||||
[@rustbot], that will automatically assign a random person
|
||||
to review your request based on which files you changed.
|
||||
|
||||
If you want to request that a specific person reviews your pull request, you
|
||||
can add an `r?` to the pull request description or in a comment. For example,
|
||||
if you want to ask a review to @awesome-reviewer, add
|
||||
|
||||
r? @awesome-reviewer
|
||||
|
||||
to the end of the pull request description, and [@rustbot] will assign
|
||||
them instead of a random person. This is entirely optional.
|
||||
|
||||
You can also assign a random reviewer from a specific team by writing `r? rust-lang/groupname`.
|
||||
As an example,
|
||||
if you were making a diagnostics change,
|
||||
then you could get a reviewer from the diagnostics team by adding:
|
||||
|
||||
r? rust-lang/diagnostics
|
||||
|
||||
For a full list of possible `groupname`s,
|
||||
check the `adhoc_groups` section at the [triagebot.toml config file],
|
||||
or the list of teams in the [rust-lang teams database].
|
||||
|
||||
### Waiting for reviews
|
||||
|
||||
> NOTE
|
||||
>
|
||||
> Pull request reviewers are often working at capacity,
|
||||
> and many of them are contributing on a volunteer basis.
|
||||
> In order to minimize review delays,
|
||||
> pull request authors and assigned reviewers should ensure that the review label
|
||||
> (`S-waiting-on-review` and `S-waiting-on-author`) stays updated,
|
||||
> invoking these commands when appropriate:
|
||||
>
|
||||
> - `@rustbot author`:
|
||||
> the review is finished,
|
||||
> and PR author should check the comments and take action accordingly.
|
||||
>
|
||||
> - `@rustbot review`:
|
||||
> the author is ready for a review,
|
||||
> and this PR will be queued again in the reviewer's queue.
|
||||
|
||||
Please note that the reviewers are humans, who for the most part work on `rustc`
|
||||
in their free time. This means that they can take some time to respond and review
|
||||
your PR. It also means that reviewers can miss some PRs that are assigned to them.
|
||||
|
||||
To try to move PRs forward, the Triage WG regularly goes through all PRs that
|
||||
are waiting for review and haven't been discussed for at least 2 weeks. If you
|
||||
don't get a review within 2 weeks, feel free to ask the Triage WG on
|
||||
Zulip ([#t-release/triage]). They have knowledge of when to ping, who might be
|
||||
on vacation, etc.
|
||||
|
||||
The reviewer may request some changes using the GitHub code review interface.
|
||||
They may also request special procedures for some PRs.
|
||||
See [Crater] and [Breaking Changes] chapters for some examples of such procedures.
|
||||
|
||||
[r?]: https://github.com/rust-lang/rust/pull/78133#issuecomment-712692371
|
||||
[#t-release/triage]: https://rust-lang.zulipchat.com/#narrow/stream/242269-t-release.2Ftriage
|
||||
[Crater]: tests/crater.md
|
||||
|
||||
### CI
|
||||
|
||||
In addition to being reviewed by a human, pull requests are automatically tested,
|
||||
thanks to continuous integration (CI). Basically, every time you open and update
|
||||
a pull request, CI builds the compiler and tests it against the
|
||||
[compiler test suite], and also performs other tests such as checking that
|
||||
your pull request is in compliance with Rust's style guidelines.
|
||||
|
||||
Running continuous integration tests allows PR authors to catch mistakes early
|
||||
without going through a first review cycle, and also helps reviewers stay aware
|
||||
of the status of a particular pull request.
|
||||
|
||||
Rust has plenty of CI capacity, and you should never have to worry about wasting
|
||||
computational resources each time you push a change. It is also perfectly fine
|
||||
(and even encouraged!) to use the CI to test your changes if it can help your
|
||||
productivity. In particular, we don't recommend running the full `./x test` suite locally,
|
||||
since it takes a very long time to execute.
|
||||
|
||||
### r+
|
||||
|
||||
After someone has reviewed your pull request, they will leave an annotation
|
||||
on the pull request with an `r+`. It will look something like this:
|
||||
|
||||
@bors r+
|
||||
|
||||
This tells [@bors], our lovable integration bot, that your pull request has
|
||||
been approved. The PR then enters the [merge queue], where [@bors]
|
||||
will run *all* the tests on *every* platform we support. If it all works out,
|
||||
[@bors] will merge your code into `master` and close the pull request.
|
||||
|
||||
Depending on the scale of the change, you may see a slightly different form of `r+`:
|
||||
|
||||
@bors r+ rollup
|
||||
|
||||
The additional `rollup` tells [@bors] that this change should always be "rolled up".
|
||||
Changes that are rolled up are tested and merged alongside other PRs, to
|
||||
speed the process up. Typically only small changes that are expected not to conflict
|
||||
with one another are marked as "always roll up".
|
||||
|
||||
Be patient; this can take a while and the queue can sometimes be long. PRs are never merged by hand.
|
||||
|
||||
[@rustbot]: https://github.com/rustbot
|
||||
[@bors]: https://github.com/bors
|
||||
|
||||
### Opening a PR
|
||||
|
||||
You are now ready to file a pull request? Great! Here are a few points you
|
||||
should be aware of.
|
||||
|
||||
All pull requests should be filed against the `master` branch,
|
||||
unless you know for sure that you should target a different branch.
|
||||
|
||||
Make sure your pull request is in compliance with Rust's style guidelines by running
|
||||
|
||||
$ ./x test tidy --bless
|
||||
|
||||
We recommend to make this check before every pull request (and every new commit
|
||||
in a pull request); you can add [git hooks]
|
||||
before every push to make sure you never forget to make this check.
|
||||
The CI will also run tidy and will fail if tidy fails.
|
||||
|
||||
Rust follows a _no merge-commit policy_, meaning, when you encounter merge
|
||||
conflicts you are expected to always rebase instead of merging. E.g. always use
|
||||
rebase when bringing the latest changes from the master branch to your feature
|
||||
branch. If your PR contains merge commits, it will get marked as `has-merge-commits`.
|
||||
Once you have removed the merge commits, e.g., through an interactive rebase, you
|
||||
should remove the label again:
|
||||
|
||||
@rustbot label -has-merge-commits
|
||||
|
||||
See [this chapter][labeling] for more details.
|
||||
|
||||
If you encounter merge conflicts or when a reviewer asks you to perform some
|
||||
changes, your PR will get marked as `S-waiting-on-author`. When you resolve
|
||||
them, you should use `@rustbot` to mark it as `S-waiting-on-review`:
|
||||
|
||||
@rustbot ready
|
||||
|
||||
GitHub allows [closing issues using keywords][closing-keywords]. This feature
|
||||
should be used to keep the issue tracker tidy. However, it is generally preferred
|
||||
to put the "closes #123" text in the PR description rather than the issue commit;
|
||||
particularly during rebasing, citing the issue number in the commit can "spam"
|
||||
the issue in question.
|
||||
|
||||
However, if your PR fixes a stable-to-beta or stable-to-stable regression and has
|
||||
been accepted for a beta and/or stable backport (i.e., it is marked `beta-accepted`
|
||||
and/or `stable-accepted`), please do *not* use any such keywords since we don't
|
||||
want the corresponding issue to get auto-closed once the fix lands on master.
|
||||
Please update the PR description while still mentioning the issue somewhere.
|
||||
For example, you could write `Fixes (after beta backport) #NNN.`.
|
||||
|
||||
As for further actions, please keep a sharp look-out for a PR whose title begins with
|
||||
`[beta]` or `[stable]` and which backports the PR in question. When that one gets
|
||||
merged, the relevant issue can be closed. The closing comment should mention all
|
||||
PRs that were involved. If you don't have the permissions to close the issue, please
|
||||
leave a comment on the original PR asking the reviewer to close it for you.
|
||||
|
||||
[labeling]: ./rustbot.md#issue-relabeling
|
||||
[closing-keywords]: https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue
|
||||
|
||||
### Reverting a PR
|
||||
|
||||
When a PR leads to miscompile, significant performance regressions, or other critical issues, we may
|
||||
want to revert that PR with a regression test case. You can also check out the [revert policy] on
|
||||
Forge docs (which is mainly targeted for reviewers, but contains useful info for PR authors too).
|
||||
|
||||
If the PR contains huge changes, it can be challenging to revert, making it harder to review
|
||||
incremental fixes in subsequent updates. Or if certain code in that PR is heavily depended upon by
|
||||
subsequent PRs, reverting it can become difficult.
|
||||
|
||||
In such cases, we can identify the problematic code and disable it for some input, as shown in [#128271][#128271].
|
||||
|
||||
For MIR optimizations, we can also use the `-Zunsound-mir-opt` option to gate the mir-opt, as shown
|
||||
in [#132356][#132356].
|
||||
|
||||
[revert policy]: https://forge.rust-lang.org/compiler/reviews.html?highlight=revert#reverts
|
||||
[#128271]: https://github.com/rust-lang/rust/pull/128271
|
||||
[#132356]: https://github.com/rust-lang/rust/pull/132356
|
||||
|
||||
## External dependencies
|
||||
|
||||
This section has moved to ["Using External Repositories"](./external-repos.md).
|
||||
|
||||
## Writing documentation
|
||||
|
||||
Documentation improvements are very welcome. The source of `doc.rust-lang.org`
|
||||
is located in [`src/doc`] in the tree, and standard API documentation is generated
|
||||
from the source code itself (e.g. [`library/std/src/lib.rs`][std-root]). Documentation pull requests
|
||||
function in the same way as other pull requests.
|
||||
|
||||
[`src/doc`]: https://github.com/rust-lang/rust/tree/master/src/doc
|
||||
[std-root]: https://github.com/rust-lang/rust/blob/master/library/std/src/lib.rs#L1
|
||||
|
||||
To find documentation-related issues, sort by the [A-docs label].
|
||||
|
||||
You can find documentation style guidelines in [RFC 1574].
|
||||
|
||||
To build the standard library documentation, use `x doc --stage 0 library --open`.
|
||||
To build the documentation for a book (e.g. the unstable book), use `x doc src/doc/unstable-book.`
|
||||
Results should appear in `build/host/doc`, as well as automatically open in your default browser.
|
||||
See [Building Documentation](./building/compiler-documenting.md#building-documentation) for more
|
||||
information.
|
||||
|
||||
You can also use `rustdoc` directly to check small fixes. For example,
|
||||
`rustdoc src/doc/reference.md` will render reference to `doc/reference.html`.
|
||||
The CSS might be messed up, but you can verify that the HTML is right.
|
||||
|
||||
### Contributing to rustc-dev-guide
|
||||
|
||||
Contributions to the [rustc-dev-guide] are always welcome, and can be made directly at
|
||||
[the rust-lang/rustc-dev-guide repo][rdgrepo].
|
||||
The issue tracker in that repo is also a great way to find things that need doing.
|
||||
There are issues for beginners and advanced compiler devs alike!
|
||||
|
||||
Just a few things to keep in mind:
|
||||
|
||||
- Please try to avoid overly long lines and use semantic line breaks (where you break the line after each sentence).
|
||||
There is no strict limit on line lengths; let the sentence or part of the sentence flow to its proper end on the same line.
|
||||
|
||||
- When contributing text to the guide, please contextualize the information with some time period
|
||||
and/or a reason so that the reader knows how much to trust or mistrust the information.
|
||||
Aim to provide a reasonable amount of context, possibly including but not limited to:
|
||||
|
||||
- A reason for why the data may be out of date other than "change",
|
||||
as change is a constant across the project.
|
||||
|
||||
- The date the comment was added, e.g. instead of writing _"Currently, ..."_
|
||||
or _"As of now, ..."_,
|
||||
consider adding the date, in one of the following formats:
|
||||
- Jan 2021
|
||||
- January 2021
|
||||
- jan 2021
|
||||
- january 2021
|
||||
|
||||
There is a CI action (in `~/.github/workflows/date-check.yml`)
|
||||
that generates a monthly showing those that are over 6 months old
|
||||
([example](https://github.com/rust-lang/rustc-dev-guide/issues/2052)).
|
||||
|
||||
For the action to pick the date,
|
||||
add a special annotation before specifying the date:
|
||||
|
||||
```md
|
||||
<!-- date-check --> Sep 2024
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
```md
|
||||
As of <!-- date-check --> Sep 2024, the foo did the bar.
|
||||
```
|
||||
|
||||
For cases where the date should not be part of the visible rendered output,
|
||||
use the following instead:
|
||||
|
||||
```md
|
||||
<!-- date-check: Sep 2024 -->
|
||||
```
|
||||
|
||||
- A link to a relevant WG, tracking issue, `rustc` rustdoc page, or similar, that may provide
|
||||
further explanation for the change process or a way to verify that the information is not
|
||||
outdated.
|
||||
|
||||
- If a text grows rather long (more than a few page scrolls) or complicated (more than four
|
||||
subsections),
|
||||
it might benefit from having a Table of Contents at the beginning,
|
||||
which you can auto-generate by including the `<!-- toc -->` marker at the top.
|
||||
|
||||
## Issue triage
|
||||
|
||||
Sometimes, an issue will stay open, even though the bug has been fixed.
|
||||
And sometimes, the original bug may go stale because something has changed in the meantime.
|
||||
|
||||
It can be helpful to go through older bug reports and make sure that they are still valid.
|
||||
Load up an older issue, double check that it's still true,
|
||||
and leave a comment letting us know if it is or is not.
|
||||
The [least recently updated sort][lru] is good for finding issues like this.
|
||||
|
||||
[Thanks to `@rustbot`][rustbot], anyone can help triage issues by adding
|
||||
appropriate labels to issues that haven't been triaged yet:
|
||||
|
||||
[lru]: https://github.com/rust-lang/rust/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-asc
|
||||
[rustbot]: ./rustbot.md
|
||||
|
||||
<style>
|
||||
.label-color {
|
||||
border-radius:0.5em;
|
||||
}
|
||||
table td:nth-child(2) {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
| Labels | Color | Description |
|
||||
|--------|-------|-------------|
|
||||
| [A-] | <span class="label-color" style="background-color:#f7e101;"> </span> Yellow | The **area** of the project an issue relates to. |
|
||||
| [B-] | <span class="label-color" style="background-color:#d304cb;"> </span> Magenta | Issues which are **blockers**. |
|
||||
| [beta-] | <span class="label-color" style="background-color:#1e76d9;"> </span> Dark Blue | Tracks changes which need to be [backported to beta][beta-backport] |
|
||||
| [C-] | <span class="label-color" style="background-color:#f5f1fd;"> </span> Light Purple | The **category** of an issue. |
|
||||
| [D-] | <span class="label-color" style="background-color:#c9f7a3;"> </span> Mossy Green | Issues for **diagnostics**. |
|
||||
| [E-] | <span class="label-color" style="background-color:#02e10c;"> </span> Green | The **experience** level necessary to fix an issue. |
|
||||
| [F-] | <span class="label-color" style="background-color:#f9c0cc;"> </span> Peach | Issues for **nightly features**. |
|
||||
| [I-] | <span class="label-color" style="background-color:#e10c02;"> </span> Red | The **importance** of the issue. |
|
||||
| [I-\*-nominated] | <span class="label-color" style="background-color:#e10c02;"> </span> Red | The issue has been nominated for discussion at the next meeting of the corresponding team. |
|
||||
| [I-prioritize] | <span class="label-color" style="background-color:#e10c02;"> </span> Red | The issue has been nominated for prioritization by the team tagged with a **T**-prefixed label. |
|
||||
| [L-] | <span class="label-color" style="background-color:#64E9CF;"> </span> Teal | The relevant **lint**. |
|
||||
| [metabug] | <span class="label-color" style="background-color:#5319e7;"> </span> Purple | Bugs that collect other bugs. |
|
||||
| [O-] | <span class="label-color" style="background-color:#6e6ec0;"> </span> Purple Grey | The **operating system** or platform that the issue is specific to. |
|
||||
| [P-] | <span class="label-color" style="background-color:#eb6420;"> </span> Orange | The issue **priority**. These labels can be assigned by anyone that understand the issue and is able to prioritize it, and remove the [I-prioritize] label. |
|
||||
| [regression-] | <span class="label-color" style="background-color:#e4008a;"> </span> Pink | Tracks regressions from a stable release. |
|
||||
| [relnotes] | <span class="label-color" style="background-color:#fad8c7;"> </span> Light Orange | Changes that should be documented in the release notes of the next release. |
|
||||
| [S-] | <span class="label-color" style="background-color:#d3dddd;"> </span> Gray | Tracks the **status** of pull requests. |
|
||||
| [S-tracking-] | <span class="label-color" style="background-color:#4682b4;"> </span> Steel Blue | Tracks the **status** of [tracking issues]. |
|
||||
| [stable-] | <span class="label-color" style="background-color:#00229c;"> </span> Dark Blue | Tracks changes which need to be [backported to stable][stable-backport] in anticipation of a point release. |
|
||||
| [T-] | <span class="label-color" style="background-color:#bfd4f2;"> </span> Blue | Denotes which **team** the issue belongs to. |
|
||||
| [WG-] | <span class="label-color" style="background-color:#c2e0c6;"> </span> Green | Denotes which **working group** the issue belongs to. |
|
||||
|
||||
|
||||
[A-]: https://github.com/rust-lang/rust/labels?q=A
|
||||
[B-]: https://github.com/rust-lang/rust/labels?q=B
|
||||
[C-]: https://github.com/rust-lang/rust/labels?q=C
|
||||
[D-]: https://github.com/rust-lang/rust/labels?q=D
|
||||
[E-]: https://github.com/rust-lang/rust/labels?q=E
|
||||
[F-]: https://github.com/rust-lang/rust/labels?q=F
|
||||
[I-]: https://github.com/rust-lang/rust/labels?q=I
|
||||
[L-]: https://github.com/rust-lang/rust/labels?q=L
|
||||
[O-]: https://github.com/rust-lang/rust/labels?q=O
|
||||
[P-]: https://github.com/rust-lang/rust/labels?q=P
|
||||
[S-]: https://github.com/rust-lang/rust/labels?q=S
|
||||
[T-]: https://github.com/rust-lang/rust/labels?q=T
|
||||
[WG-]: https://github.com/rust-lang/rust/labels?q=WG
|
||||
[stable-]: https://github.com/rust-lang/rust/labels?q=stable
|
||||
[beta-]: https://github.com/rust-lang/rust/labels?q=beta
|
||||
[I-\*-nominated]: https://github.com/rust-lang/rust/labels?q=nominated
|
||||
[I-prioritize]: https://github.com/rust-lang/rust/labels/I-prioritize
|
||||
[tracking issues]: https://github.com/rust-lang/rust/labels/C-tracking-issue
|
||||
[beta-backport]: https://forge.rust-lang.org/release/backporting.html#beta-backporting-in-rust-langrust
|
||||
[stable-backport]: https://forge.rust-lang.org/release/backporting.html#stable-backporting-in-rust-langrust
|
||||
[metabug]: https://github.com/rust-lang/rust/labels/metabug
|
||||
[regression-]: https://github.com/rust-lang/rust/labels?q=regression
|
||||
[relnotes]: https://github.com/rust-lang/rust/labels/relnotes
|
||||
[S-tracking-]: https://github.com/rust-lang/rust/labels?q=s-tracking
|
||||
|
||||
### Rfcbot labels
|
||||
|
||||
[rfcbot] uses its own labels for tracking the process of coordinating
|
||||
asynchronous decisions, such as approving or rejecting a change.
|
||||
This is used for [RFCs], issues, and pull requests.
|
||||
|
||||
| Labels | Color | Description |
|
||||
|--------|-------|-------------|
|
||||
| [proposed-final-comment-period] | <span class="label-color" style="background-color:#ededed;"> </span> Gray | Currently awaiting signoff of all team members in order to enter the final comment period. |
|
||||
| [disposition-merge] | <span class="label-color" style="background-color:#008800;"> </span> Green | Indicates the intent is to merge the change. |
|
||||
| [disposition-close] | <span class="label-color" style="background-color:#dd0000;"> </span> Red | Indicates the intent is to not accept the change and close it. |
|
||||
| [disposition-postpone] | <span class="label-color" style="background-color:#ededed;"> </span> Gray | Indicates the intent is to not accept the change at this time and postpone it to a later date. |
|
||||
| [final-comment-period] | <span class="label-color" style="background-color:#1e76d9;"> </span> Blue | Currently soliciting final comments before merging or closing. |
|
||||
| [finished-final-comment-period] | <span class="label-color" style="background-color:#f9e189;"> </span> Light Yellow | The final comment period has concluded, and the issue will be merged or closed. |
|
||||
| [postponed] | <span class="label-color" style="background-color:#fbca04;"> </span> Yellow | The issue has been postponed. |
|
||||
| [closed] | <span class="label-color" style="background-color:#dd0000;"> </span> Red | The issue has been rejected. |
|
||||
| [to-announce] | <span class="label-color" style="background-color:#ededed;"> </span> Gray | Issues that have finished their final-comment-period and should be publicly announced. Note: the rust-lang/rust repository uses this label differently, to announce issues at the triage meetings. |
|
||||
|
||||
[disposition-merge]: https://github.com/rust-lang/rust/labels/disposition-merge
|
||||
[disposition-close]: https://github.com/rust-lang/rust/labels/disposition-close
|
||||
[disposition-postpone]: https://github.com/rust-lang/rust/labels/disposition-postpone
|
||||
[proposed-final-comment-period]: https://github.com/rust-lang/rust/labels/proposed-final-comment-period
|
||||
[final-comment-period]: https://github.com/rust-lang/rust/labels/final-comment-period
|
||||
[finished-final-comment-period]: https://github.com/rust-lang/rust/labels/finished-final-comment-period
|
||||
[postponed]: https://github.com/rust-lang/rfcs/labels/postponed
|
||||
[closed]: https://github.com/rust-lang/rfcs/labels/closed
|
||||
[to-announce]: https://github.com/rust-lang/rfcs/labels/to-announce
|
||||
[rfcbot]: https://github.com/anp/rfcbot-rs/
|
||||
[RFCs]: https://github.com/rust-lang/rfcs
|
||||
|
||||
## Helpful links and information
|
||||
|
||||
This section has moved to the ["About this guide"] chapter.
|
||||
|
||||
["About this guide"]: about-this-guide.md#other-places-to-find-information
|
||||
[search existing issues]: https://github.com/rust-lang/rust/issues?q=is%3Aissue
|
||||
[Breaking Changes]: bug-fix-procedure.md
|
||||
[triagebot.toml config file]: https://github.com/rust-lang/rust/blob/HEAD/triagebot.toml
|
||||
[rust-lang teams database]: https://github.com/rust-lang/team/tree/HEAD/teams
|
||||
[compiler test suite]: tests/intro.md
|
||||
[merge queue]: https://bors.rust-lang.org/queue/rust
|
||||
[git hooks]: https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks
|
||||
[A-docs label]: https://github.com/rust-lang/rust/issues?q=is%3Aopen%20is%3Aissue%20label%3AA-docs
|
||||
[RFC 1574]: https://github.com/rust-lang/rfcs/blob/master/text/1574-more-api-documentation-conventions.md#appendix-a-full-conventions-text
|
||||
[rustc-dev-guide]: https://rustc-dev-guide.rust-lang.org/
|
||||
[rdgrepo]: https://github.com/rust-lang/rustc-dev-guide
|
||||
168
src/doc/rustc-dev-guide/src/conventions.md
Normal file
168
src/doc/rustc-dev-guide/src/conventions.md
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
This file offers some tips on the coding conventions for rustc. This
|
||||
chapter covers [formatting](#formatting), [coding for correctness](#cc),
|
||||
[using crates from crates.io](#cio), and some tips on
|
||||
[structuring your PR for easy review](#er).
|
||||
|
||||
<a id="formatting"></a>
|
||||
|
||||
# Formatting and the tidy script
|
||||
|
||||
rustc is moving towards the [Rust standard coding style][fmt].
|
||||
|
||||
However, for now we don't use stable `rustfmt`; we use a pinned version with a
|
||||
special config, so this may result in different style from normal [`rustfmt`].
|
||||
Therefore, formatting this repository using `cargo fmt` is not recommended.
|
||||
|
||||
Instead, formatting should be done using `./x fmt`. It's a good habit to run
|
||||
`./x fmt` before every commit, as this reduces conflicts later.
|
||||
|
||||
Formatting is checked by the `tidy` script. It runs automatically when you do
|
||||
`./x test` and can be run in isolation with `./x fmt --check`.
|
||||
|
||||
If you want to use format-on-save in your editor, the pinned version of
|
||||
`rustfmt` is built under `build/<target>/stage0/bin/rustfmt`. You'll have to
|
||||
pass the <!-- date-check: nov 2022 --> `--edition=2021` argument yourself when calling
|
||||
`rustfmt` directly.
|
||||
|
||||
[fmt]: https://github.com/rust-dev-tools/fmt-rfcs
|
||||
[`rustfmt`]:https://github.com/rust-lang/rustfmt
|
||||
|
||||
## Formatting C++ code
|
||||
|
||||
The compiler contains some C++ code for interfacing with parts of LLVM that
|
||||
don't have a stable C API.
|
||||
When modifying that code, use this command to format it:
|
||||
|
||||
```sh
|
||||
./x test tidy --extra-checks=cpp:fmt --bless
|
||||
```
|
||||
|
||||
This uses a pinned version of `clang-format`, to avoid relying on the local
|
||||
environment.
|
||||
|
||||
<a id="copyright"></a>
|
||||
|
||||
<!-- REUSE-IgnoreStart -->
|
||||
<!-- Prevent REUSE from interpreting the heading as a copyright notice -->
|
||||
## Copyright notice
|
||||
<!-- REUSE-IgnoreEnd -->
|
||||
|
||||
In the past, files began with a copyright and license notice. Please **omit**
|
||||
this notice for new files licensed under the standard terms (dual
|
||||
MIT/Apache-2.0).
|
||||
|
||||
All of the copyright notices should be gone by now, but if you come across one
|
||||
in the rust-lang/rust repo, feel free to open a PR to remove it.
|
||||
|
||||
## Line length
|
||||
|
||||
Lines should be at most 100 characters. It's even better if you can
|
||||
keep things to 80.
|
||||
|
||||
**Ignoring the line length limit.** Sometimes – in particular for
|
||||
tests – it can be necessary to exempt yourself from this limit. In
|
||||
that case, you can add a comment towards the top of the file like so:
|
||||
|
||||
```rust
|
||||
// ignore-tidy-linelength
|
||||
```
|
||||
|
||||
## Tabs vs spaces
|
||||
|
||||
Prefer 4-space indent.
|
||||
|
||||
<a id="cc"></a>
|
||||
|
||||
# Coding for correctness
|
||||
|
||||
Beyond formatting, there are a few other tips that are worth
|
||||
following.
|
||||
|
||||
## Prefer exhaustive matches
|
||||
|
||||
Using `_` in a match is convenient, but it means that when new
|
||||
variants are added to the enum, they may not get handled correctly.
|
||||
Ask yourself: if a new variant were added to this enum, what's the
|
||||
chance that it would want to use the `_` code, versus having some
|
||||
other treatment? Unless the answer is "low", then prefer an
|
||||
exhaustive match. (The same advice applies to `if let` and `while
|
||||
let`, which are effectively tests for a single variant.)
|
||||
|
||||
## Use "TODO" comments for things you don't want to forget
|
||||
|
||||
As a useful tool to yourself, you can insert a `// TODO` comment
|
||||
for something that you want to get back to before you land your PR:
|
||||
|
||||
```rust,ignore
|
||||
fn do_something() {
|
||||
if something_else {
|
||||
unimplemented!(); // TODO write this
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The tidy script will report an error for a `// TODO` comment, so this
|
||||
code would not be able to land until the TODO is fixed (or removed).
|
||||
|
||||
This can also be useful in a PR as a way to signal from one commit that you are
|
||||
leaving a bug that a later commit will fix:
|
||||
|
||||
```rust,ignore
|
||||
if foo {
|
||||
return true; // TODO wrong, but will be fixed in a later commit
|
||||
}
|
||||
```
|
||||
|
||||
<a id="cio"></a>
|
||||
|
||||
# Using crates from crates.io
|
||||
|
||||
See the [crates.io dependencies][crates] section.
|
||||
|
||||
<a id="er"></a>
|
||||
|
||||
# How to structure your PR
|
||||
|
||||
How you prepare the commits in your PR can make a big difference for the
|
||||
reviewer. Here are some tips.
|
||||
|
||||
**Isolate "pure refactorings" into their own commit.** For example, if
|
||||
you rename a method, then put that rename into its own commit, along
|
||||
with the renames of all the uses.
|
||||
|
||||
**More commits is usually better.** If you are doing a large change,
|
||||
it's almost always better to break it up into smaller steps that can
|
||||
be independently understood. The one thing to be aware of is that if
|
||||
you introduce some code following one strategy, then change it
|
||||
dramatically (versus adding to it) in a later commit, that
|
||||
'back-and-forth' can be confusing.
|
||||
|
||||
**Format liberally.** While only the final commit of a PR must be correctly
|
||||
formatted, it is both easier to review and less noisy to format each commit
|
||||
individually using `./x fmt`.
|
||||
|
||||
**No merges.** We do not allow merge commits into our history, other
|
||||
than those by bors. If you get a merge conflict, rebase instead via a
|
||||
command like `git rebase -i rust-lang/master` (presuming you use the
|
||||
name `rust-lang` for your remote).
|
||||
|
||||
**Individual commits do not have to build (but it's nice).** We do not
|
||||
require that every intermediate commit successfully builds – we only
|
||||
expect to be able to bisect at a PR level. However, if you *can* make
|
||||
individual commits build, that is always helpful.
|
||||
|
||||
# Naming conventions
|
||||
|
||||
Apart from normal Rust style/naming conventions, there are also some specific
|
||||
to the compiler.
|
||||
|
||||
- `cx` tends to be short for "context" and is often used as a suffix. For
|
||||
example, `tcx` is a common name for the [Typing Context][tcx].
|
||||
|
||||
- [`'tcx`][tcx] is used as the lifetime name for the Typing Context.
|
||||
|
||||
- Because `crate` is a keyword, if you need a variable to represent something
|
||||
crate-related, often the spelling is changed to `krate`.
|
||||
|
||||
[tcx]: ./ty.md
|
||||
[crates]: ./crates-io.md
|
||||
293
src/doc/rustc-dev-guide/src/coroutine-closures.md
Normal file
293
src/doc/rustc-dev-guide/src/coroutine-closures.md
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
Please read [RFC 3668](https://rust-lang.github.io/rfcs/3668-async-closures.html) to understand the general motivation of the feature. This is a very technical and somewhat "vertical" chapter; ideally we'd split this and sprinkle it across all the relevant chapters, but for the purposes of understanding async closures *holistically*, I've put this together all here in one chapter.
|
||||
|
||||
# Coroutine-closures -- a technical deep dive
|
||||
|
||||
Coroutine-closures are a generalization of async closures, being special syntax for closure expressions which return a coroutine, notably one that is allowed to capture from the closure's upvars.
|
||||
|
||||
For now, the only usable kind of coroutine-closure is the async closure, and supporting async closures is the extent of this PR. We may eventually support `gen || {}`, etc., and most of the problems and curiosities described in this document apply to all coroutine-closures in general.
|
||||
|
||||
As a consequence of the code being somewhat general, this document may flip between calling them "async closures" and "coroutine-closures". The future that is returned by the async closure will generally be called the "coroutine" or the "child coroutine".
|
||||
|
||||
## HIR
|
||||
|
||||
Async closures (and in the future, other coroutine flavors such as `gen`) are represented in HIR as a `hir::Closure` whose closure-kind is `ClosureKind::CoroutineClosure(_)`[^k1], which wraps an async block, which is also represented in HIR as a `hir::Closure`) and whose closure-kind is `ClosureKind::Closure(CoroutineKind::Desugared(_, CoroutineSource::Closure))`[^k2].
|
||||
|
||||
[^k1]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_ast_lowering/src/expr.rs#L1147>
|
||||
|
||||
[^k2]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_ast_lowering/src/expr.rs#L1117>
|
||||
|
||||
Like `async fn`, when lowering an async closure's body, we need to unconditionally move all of the closures arguments into the body so they are captured. This is handled by `lower_coroutine_body_with_moved_arguments`[^l1]. The only notable quirk with this function is that the async block we end up generating as a capture kind of `CaptureBy::ByRef`[^l2]. We later force all of the *closure args* to be captured by-value[^l3], but we don't want the *whole* async block to act as if it were an `async move`, since that would defeat the purpose of the self-borrowing of an async closure.
|
||||
|
||||
[^l1]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_ast_lowering/src/item.rs#L1096-L1100>
|
||||
|
||||
[^l2]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_ast_lowering/src/item.rs#L1276-L1279>
|
||||
|
||||
[^l3]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_hir_typeck/src/upvar.rs#L250-L256>
|
||||
|
||||
## `rustc_middle::ty` Representation
|
||||
|
||||
For the purposes of keeping the implementation mostly future-compatible (i.e. with gen `|| {}` and `async gen || {}`), most of this section calls async closures "coroutine-closures".
|
||||
|
||||
The main thing that this PR introduces is a new `TyKind` called `CoroutineClosure`[^t1] and corresponding variants on other relevant enums in typeck and borrowck (`UpvarArgs`, `DefiningTy`, `AggregateKind`).
|
||||
|
||||
[^t1]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_type_ir/src/ty_kind.rs#L163-L168>
|
||||
|
||||
We introduce a new `TyKind` instead of generalizing the existing `TyKind::Closure` due to major representational differences in the type. The major differences between `CoroutineClosure`s can be explored by first inspecting the `CoroutineClosureArgsParts`, which is the "unpacked" representation of the coroutine-closure's generics.
|
||||
|
||||
#### Similarities to closures
|
||||
|
||||
Like a closure, we have `parent_args`, a `closure_kind_ty`, and a `tupled_upvars_ty`. These represent the same thing as their closure counterparts; namely: the generics inherited from the body that the closure is defined in, the maximum "calling capability" of the closure (i.e. must it be consumed to be called, like `FnOnce`, or can it be called by-ref), and the captured upvars of the closure itself.
|
||||
|
||||
#### The signature
|
||||
|
||||
A traditional closure has a `fn_sig_as_fn_ptr_ty` which it uses to represent the signature of the closure. In contrast, we store the signature of a coroutine closure in a somewhat "exploded" way, since coroutine-closures have *two* signatures depending on what `AsyncFn*` trait you call it with (see below sections).
|
||||
|
||||
Conceptually, the coroutine-closure may be thought as containing several different signature types depending on whether it is being called by-ref or by-move.
|
||||
|
||||
To conveniently recreate both of these signatures, the `signature_parts_ty` stores all of the relevant parts of the coroutine returned by this coroutine-closure. This signature parts type will have the general shape of `fn(tupled_inputs, resume_ty) -> (return_ty, yield_ty)`, where `resume_ty`, `return_ty`, and `yield_ty` are the respective types for the *coroutine* returned by the coroutine-closure[^c1].
|
||||
|
||||
[^c1]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_type_ir/src/ty_kind/closure.rs#L221-L229>
|
||||
|
||||
The compiler mainly deals with the `CoroutineClosureSignature` type[^c2], which is created by extracting the relevant types out of the `fn()` ptr type described above, and which exposes methods that can be used to construct the *coroutine* that the coroutine-closure ultimately returns.
|
||||
|
||||
[^c2]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_type_ir/src/ty_kind/closure.rs#L362>
|
||||
|
||||
#### The data we need to carry along to construct a `Coroutine` return type
|
||||
|
||||
Along with the data stored in the signature, to construct a `TyKind::Coroutine` to return, we also need to store the "witness" of the coroutine.
|
||||
|
||||
So what about the upvars of the `Coroutine` that is returned? Well, for `AsyncFnOnce` (i.e. call-by-move), this is simply the same upvars that the coroutine returns. But for `AsyncFnMut`/`AsyncFn`, the coroutine that is returned from the coroutine-closure borrows data from the coroutine-closure with a given "environment" lifetime[^c3]. This corresponds to the `&self` lifetime[^c4] on the `AsyncFnMut`/`AsyncFn` call signature, and the GAT lifetime of the `ByRef`[^c5].
|
||||
|
||||
[^c3]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_type_ir/src/ty_kind/closure.rs#L447-L455>
|
||||
|
||||
[^c4]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/library/core/src/ops/async_function.rs#L36>
|
||||
|
||||
[^c5]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/library/core/src/ops/async_function.rs#L30>
|
||||
|
||||
#### Actually getting the coroutine return type(s)
|
||||
|
||||
To most easily construct the `Coroutine` that a coroutine-closure returns, you can use the `to_coroutine_given_kind_and_upvars`[^helper] helper on `CoroutineClosureSignature`, which can be acquired from the `CoroutineClosureArgs`.
|
||||
|
||||
[^helper]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_type_ir/src/ty_kind/closure.rs#L419>
|
||||
|
||||
Most of the args to that function will be components that you can get out of the `CoroutineArgs`, except for the `goal_kind: ClosureKind` which controls which flavor of coroutine to return based off of the `ClosureKind` passed in -- i.e. it will prepare the by-ref coroutine if `ClosureKind::Fn | ClosureKind::FnMut`, and the by-move coroutine if `ClosureKind::FnOnce`.
|
||||
|
||||
## Trait Hierarchy
|
||||
|
||||
We introduce a parallel hierarchy of `Fn*` traits that are implemented for . The motivation for the introduction was covered in a blog post: [Async Closures](https://hackmd.io/@compiler-errors/async-closures).
|
||||
|
||||
All currently-stable callable types (i.e., closures, function items, function pointers, and `dyn Fn*` trait objects) automatically implement `AsyncFn*() -> T` if they implement `Fn*() -> Fut` for some output type `Fut`, and `Fut` implements `Future<Output = T>`[^tr1].
|
||||
|
||||
[^tr1]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_next_trait_solver/src/solve/assembly/structural_traits.rs#L404-L409>
|
||||
|
||||
Async closures implement `AsyncFn*` as their bodies permit; i.e. if they end up using upvars in a way that is compatible (i.e. if they consume or mutate their upvars, it may affect whether they implement `AsyncFn` and `AsyncFnMut`...)
|
||||
|
||||
#### Lending
|
||||
|
||||
We may in the future move `AsyncFn*` onto a more general set of `LendingFn*` traits; however, there are some concrete technical implementation details that limit our ability to use `LendingFn` ergonomically in the compiler today. These have to do with:
|
||||
|
||||
- Closure signature inference.
|
||||
- Limitations around higher-ranked trait bounds.
|
||||
- Shortcomings with error messages.
|
||||
|
||||
These limitations, plus the fact that the underlying trait should have no effect on the user experience of async closures and async `Fn` trait bounds, leads us to `AsyncFn*` for now. To ensure we can eventually move to these more general traits, the precise `AsyncFn*` trait definitions (including the associated types) are left as an implementation detail.
|
||||
|
||||
#### When do async closures implement the regular `Fn*` traits?
|
||||
|
||||
We mention above that "regular" callable types can implement `AsyncFn*`, but the reverse question exists of "can async closures implement `Fn*` too"? The short answer is "when it's valid", i.e. when the coroutine that would have been returned from `AsyncFn`/`AsyncFnMut` does not actually have any upvars that are "lent" from the parent coroutine-closure.
|
||||
|
||||
See the "follow-up: when do..." section below for an elaborated answer. The full answer describes a pretty interesting and hopefully thorough heuristic that is used to ensure that most async closures "just work".
|
||||
|
||||
## Tale of two bodies...
|
||||
|
||||
When async closures are called with `AsyncFn`/`AsyncFnMut`, they return a coroutine that borrows from the closure. However, when they are called via `AsyncFnOnce`, we consume that closure, and cannot return a coroutine that borrows from data that is now dropped.
|
||||
|
||||
To work around around this limitation, we synthesize a separate by-move MIR body for calling `AsyncFnOnce::call_once` on a coroutine-closure that can be called by-ref.
|
||||
|
||||
This body operates identically to the "normal" coroutine returned from calling the coroutine-closure, except for the fact that it has a different set of upvars, since we must *move* the captures from the parent coroutine-closure into the child coroutine.
|
||||
|
||||
#### Synthesizing the by-move body
|
||||
|
||||
When we want to access the by-move body of the coroutine returned by a coroutine-closure, we can do so via the `coroutine_by_move_body_def_id`[^b1] query.
|
||||
|
||||
[^b1]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_mir_transform/src/coroutine/by_move_body.rs#L1-L70>
|
||||
|
||||
This query synthesizes a new MIR body by copying the MIR body of the coroutine and inserting additional derefs and field projections[^b2] to preserve the semantics of the body.
|
||||
|
||||
[^b2]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_mir_transform/src/coroutine/by_move_body.rs#L131-L195>
|
||||
|
||||
Since we've synthesized a new def id, this query is also responsible for feeding a ton of other relevant queries for the MIR body. This query is `ensure()`d[^b3] during the `mir_promoted` query, since it operates on the *built* mir of the coroutine.
|
||||
|
||||
[^b3]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_mir_transform/src/lib.rs#L339-L342>
|
||||
|
||||
## Closure signature inference
|
||||
|
||||
The closure signature inference algorithm for async closures is a bit more complicated than the inference algorithm for "traditional" closures. Like closures, we iterate through all of the clauses that may be relevant (for the expectation type passed in)[^deduce1].
|
||||
|
||||
To extract a signature, we consider two situations:
|
||||
* Projection predicates with `AsyncFnOnce::Output`, which we will use to extract the inputs and output type for the closure. This corresponds to the situation that there was a `F: AsyncFn*() -> T` bound[^deduce2].
|
||||
* Projection predicates with `FnOnce::Output`, which we will use to extract the inputs. For the output, we also try to deduce an output by looking for relevant `Future::Output` projection predicates. This corresponds to the situation that there was an `F: Fn*() -> T, T: Future<Output = U>` bound.[^deduce3]
|
||||
* If there is no `Future` bound, we simply use a fresh infer var for the output. This corresponds to the case where one can pass an async closure to a combinator function like `Option::map`.[^deduce4]
|
||||
|
||||
[^deduce1]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_hir_typeck/src/closure.rs#L345-L362>
|
||||
|
||||
[^deduce2]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_hir_typeck/src/closure.rs#L486-L487>
|
||||
|
||||
[^deduce3]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_hir_typeck/src/closure.rs#L517-L534>
|
||||
|
||||
[^deduce4]: <https://github.com/rust-lang/rust/blob/5ca0e9fa9b2f92b463a0a2b0b34315e09c0b7236/compiler/rustc_hir_typeck/src/closure.rs#L575-L590>
|
||||
|
||||
We support the latter case simply to make it easier for users to simply drop-in `async || {}` syntax, even when they're calling an API that was designed before first-class `AsyncFn*` traits were available.
|
||||
|
||||
#### Calling a closure before its kind has been inferred
|
||||
|
||||
We defer[^call1] the computation of a coroutine-closure's "kind" (i.e. its maximum calling mode: `AsyncFnOnce`/`AsyncFnMut`/`AsyncFn`) until the end of typeck. However, since we want to be able to call that coroutine-closure before the end of typeck, we need to come up with the return type of the coroutine-closure before that.
|
||||
|
||||
[^call1]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_hir_typeck/src/callee.rs#L169-L210>
|
||||
|
||||
Unlike regular closures, whose return type does not change depending on what `Fn*` trait we call it with, coroutine-closures *do* end up returning different coroutine types depending on the flavor of `AsyncFn*` trait used to call it.
|
||||
|
||||
Specifically, while the def-id of the returned coroutine does not change, the upvars[^call2] (which are either borrowed or moved from the parent coroutine-closure) and the coroutine-kind[^call3] are dependent on the calling mode.
|
||||
|
||||
[^call2]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_type_ir/src/ty_kind/closure.rs#L574-L576>
|
||||
|
||||
[^call3]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_type_ir/src/ty_kind/closure.rs#L554-L563>
|
||||
|
||||
We introduce a `AsyncFnKindHelper` trait which allows us to defer the question of "does this coroutine-closure support this calling mode"[^helper1] via a trait goal, and "what are the tupled upvars of this calling mode"[^helper2] via an associated type, which can be computed by appending the input types of the coroutine-closure to either the upvars or the "by ref" upvars computed during upvar analysis.
|
||||
|
||||
[^helper1]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/library/core/src/ops/async_function.rs#L135-L144>
|
||||
|
||||
[^helper2]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/library/core/src/ops/async_function.rs#L146-L154>
|
||||
|
||||
#### Ok, so why?
|
||||
|
||||
This seems a bit roundabout and complex, and I admit that it is. But let's think of the "do nothing" alternative -- we could instead mark all `AsyncFn*` goals as ambiguous until upvar analysis, at which point we would know exactly what to put into the upvars of the coroutine we return. However, this is actually *very* detrimental to inference in the program, since it means that programs like this would not be valid:
|
||||
|
||||
```rust!
|
||||
let c = async || -> String { .. };
|
||||
let s = c().await;
|
||||
// ^^^ If we can't project `<{c} as AsyncFn>::call()` to a coroutine, then the `IntoFuture::into_future` call inside of the `.await` stalls, and the type of `s` is left unconstrained as an infer var.
|
||||
s.as_bytes();
|
||||
// ^^^ That means we can't call any methods on the awaited return of a coroutine-closure, like... at all!
|
||||
```
|
||||
|
||||
So *instead*, we use this alias (in this case, a projection: `AsyncFnKindHelper::Upvars<'env, ...>`) to delay the computation of the *tupled upvars* and give us something to put in its place, while still allowing us to return a `TyKind::Coroutine` (which is a rigid type) and we may successfully confirm the built-in traits we need (in our case, `Future`), since the `Future` implementation doesn't depend on the upvars at all.
|
||||
|
||||
## Upvar analysis
|
||||
|
||||
By and large, the upvar analysis for coroutine-closures and their child coroutines proceeds like normal upvar analysis. However, there are several interesting bits that happen to account for async closures' special natures:
|
||||
|
||||
#### Forcing all inputs to be captured
|
||||
|
||||
Like async fn, all input arguments are captured. We explicitly force[^f1] all of these inputs to be captured by move so that the future coroutine returned by async closures does not depend on whether the input is *used* by the body or not, which would impart an interesting semver hazard.
|
||||
|
||||
[^f1]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_hir_typeck/src/upvar.rs#L250-L259>
|
||||
|
||||
#### Computing the by-ref captures
|
||||
|
||||
For a coroutine-closure that supports `AsyncFn`/`AsyncFnMut`, we must also compute the relationship between the captures of the coroutine-closure and its child coroutine. Specifically, the coroutine-closure may `move` a upvar into its captures, but the coroutine may only borrow that upvar.
|
||||
|
||||
We compute the "`coroutine_captures_by_ref_ty`" by looking at all of the child coroutine's captures and comparing them to the corresponding capture of the parent coroutine-closure[^br1]. This `coroutine_captures_by_ref_ty` ends up being represented as a `for<'env> fn() -> captures...` type, with the additional binder lifetime representing the "`&self`" lifetime of calling `AsyncFn::async_call` or `AsyncFnMut::async_call_mut`. We instantiate that binder later when actually calling the methods.
|
||||
|
||||
[^br1]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_hir_typeck/src/upvar.rs#L375-L471>
|
||||
|
||||
Note that not every by-ref capture from the parent coroutine-closure results in a "lending" borrow. See the **Follow-up: When do async closures implement the regular `Fn*` traits?** section below for more details, since this intimately influences whether or not the coroutine-closure is allowed to implement the `Fn*` family of traits.
|
||||
|
||||
#### By-move body + `FnOnce` quirk
|
||||
|
||||
There are several situations where the closure upvar analysis ends up inferring upvars for the coroutine-closure's child coroutine that are too relaxed, and end up resulting in borrow-checker errors. This is best illustrated via examples. For example, given:
|
||||
|
||||
```rust
|
||||
fn force_fnonce<T: async FnOnce()>(t: T) -> T { t }
|
||||
|
||||
let x = String::new();
|
||||
let c = force_fnonce(async move || {
|
||||
println!("{x}");
|
||||
});
|
||||
```
|
||||
|
||||
`x` will be moved into the coroutine-closure, but the coroutine that is returned would only borrow `&x`. However, since `force_fnonce` forces the coroutine-closure to `AsyncFnOnce`, which is not *lending*, we must force the capture to happen by-move[^bm1].
|
||||
|
||||
Similarly:
|
||||
|
||||
```rust
|
||||
let x = String::new();
|
||||
let y = String::new();
|
||||
let c = async move || {
|
||||
drop(y);
|
||||
println!("{x}");
|
||||
};
|
||||
```
|
||||
|
||||
`x` will be moved into the coroutine-closure, but the coroutine that is returned would only borrow `&x`. However, since we also capture `y` and drop it, the coroutine-closure is forced to be `AsyncFnOnce`. We must also force the capture of `x` to happen by-move. To determine this situation in particular, since unlike the last example the coroutine-kind's closure-kind has not yet been constrained, we must analyze the body of the coroutine-closure to see if how all of the upvars are used, to determine if they've been used in a way that is "consuming" -- i.e. that would force it to `FnOnce`[^bm2].
|
||||
|
||||
[^bm1]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_hir_typeck/src/upvar.rs#L211-L248>
|
||||
|
||||
[^bm2]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_hir_typeck/src/upvar.rs#L532-L539>
|
||||
|
||||
#### Follow-up: When do async closures implement the regular `Fn*` traits?
|
||||
|
||||
Well, first of all, all async closures implement `FnOnce` since they can always be called *at least once*.
|
||||
|
||||
For `Fn`/`FnMut`, the detailed answer involves answering a related question: is the coroutine-closure lending? Because if it is, then it cannot implement the non-lending `Fn`/`FnMut` traits.
|
||||
|
||||
Determining when the coroutine-closure must *lend* its upvars is implemented in the `should_reborrow_from_env_of_parent_coroutine_closure` helper function[^u1]. Specifically, this needs to happen in two places:
|
||||
|
||||
[^u1]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_hir_typeck/src/upvar.rs#L1818-L1860>
|
||||
|
||||
1. Are we borrowing data owned by the parent closure? We can determine if that is the case by checking if the parent capture is by-move, EXCEPT if we apply a deref projection, which means we're reborrowing a reference that we captured by-move.
|
||||
|
||||
```rust
|
||||
let x = &1i32; // Let's call this lifetime `'1`.
|
||||
let c = async move || {
|
||||
println!("{:?}", *x);
|
||||
// Even though the inner coroutine borrows by ref, we're only capturing `*x`,
|
||||
// not `x`, so the inner closure is allowed to reborrow the data for `'1`.
|
||||
};
|
||||
```
|
||||
|
||||
2. If a coroutine is mutably borrowing from a parent capture, then that mutable borrow cannot live for longer than either the parent *or* the borrow that we have on the original upvar. Therefore we always need to borrow the child capture with the lifetime of the parent coroutine-closure's env.
|
||||
|
||||
```rust
|
||||
let mut x = 1i32;
|
||||
let c = async || {
|
||||
x = 1;
|
||||
// The parent borrows `x` for some `&'1 mut i32`.
|
||||
// However, when we call `c()`, we implicitly autoref for the signature of
|
||||
// `AsyncFnMut::async_call_mut`. Let's call that lifetime `'call`. Since
|
||||
// the maximum that `&'call mut &'1 mut i32` can be reborrowed is `&'call mut i32`,
|
||||
// the inner coroutine should capture w/ the lifetime of the coroutine-closure.
|
||||
};
|
||||
```
|
||||
|
||||
If either of these cases apply, then we should capture the borrow with the lifetime of the parent coroutine-closure's env. Luckily, if this function is not correct, then the program is not unsound, since we still borrowck and validate the choices made from this function -- the only side-effect is that the user may receive unnecessary borrowck errors.
|
||||
|
||||
## Instance resolution
|
||||
|
||||
If a coroutine-closure has a closure-kind of `FnOnce`, then its `AsyncFnOnce::call_once` and `FnOnce::call_once` implementations resolve to the coroutine-closure's body[^res1], and the `Future::poll` of the coroutine that gets returned resolves to the body of the child closure.
|
||||
|
||||
[^res1]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_ty_utils/src/instance.rs#L351>
|
||||
|
||||
If a coroutine-closure has a closure-kind of `FnMut`/`Fn`, then the same applies to `AsyncFn` and the corresponding `Future` implementation of the coroutine that gets returned.[^res1] However, we use a MIR shim to generate the implementation of `AsyncFnOnce::call_once`/`FnOnce::call_once`[^res2], and `Fn::call`/`FnMut::call_mut` instances if they exist[^res3].
|
||||
|
||||
[^res2]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_ty_utils/src/instance.rs#L341-L349>
|
||||
|
||||
[^res3]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_ty_utils/src/instance.rs#L312-L326>
|
||||
|
||||
This is represented by the `ConstructCoroutineInClosureShim`[^i1]. The `receiver_by_ref` bool will be true if this is the instance of `Fn::call`/`FnMut::call_mut`.[^i2] The coroutine that all of these instances returns corresponds to the by-move body we will have synthesized by this point.[^i3]
|
||||
|
||||
[^i1]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_middle/src/ty/instance.rs#L129-L134>
|
||||
|
||||
[^i2]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_middle/src/ty/instance.rs#L136-L141>
|
||||
|
||||
[^i3]: <https://github.com/rust-lang/rust/blob/07cbbdd69363da97075650e9be24b78af0bcdd23/compiler/rustc_middle/src/ty/instance.rs#L841>
|
||||
|
||||
## Borrow-checking
|
||||
|
||||
It turns out that borrow-checking async closures is pretty straightforward. After adding a new `DefiningTy::CoroutineClosure`[^bck1] variant, and teaching borrowck how to generate the signature of the coroutine-closure[^bck2], borrowck proceeds totally fine.
|
||||
|
||||
One thing to note is that we don't borrow-check the synthetic body we make for by-move coroutines, since by construction (and the validity of the by-ref coroutine body it was derived from) it must be valid.
|
||||
|
||||
[^bck1]: <https://github.com/rust-lang/rust/blob/705cfe0e966399e061d64dd3661bfbc57553ed87/compiler/rustc_borrowck/src/universal_regions.rs#L110-L115>
|
||||
|
||||
[^bck2]: <https://github.com/rust-lang/rust/blob/7c7bb7dc017545db732f5cffec684bbaeae0a9a0/compiler/rustc_borrowck/src/universal_regions.rs#L743-L790>
|
||||
23
src/doc/rustc-dev-guide/src/crates-io.md
Normal file
23
src/doc/rustc-dev-guide/src/crates-io.md
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# crates.io Dependencies
|
||||
|
||||
The Rust compiler supports building with some dependencies from `crates.io`.
|
||||
Examples are `log` and `env_logger`.
|
||||
|
||||
In general,
|
||||
you should avoid adding dependencies to the compiler for several reasons:
|
||||
|
||||
- The dependency may not be of high quality or well-maintained.
|
||||
- The dependency may not be using a compatible license.
|
||||
- The dependency may have transitive dependencies that have one of the above
|
||||
problems.
|
||||
|
||||
<!-- date-check: Feb 2023 -->
|
||||
Note that there is no official policy for vetting new dependencies to the compiler.
|
||||
Decisions are made on a case-by-case basis, during code review.
|
||||
|
||||
## Permitted dependencies
|
||||
|
||||
The `tidy` tool has [a list of crates that are allowed]. To add a
|
||||
dependency that is not already in the compiler, you will need to add it to the list.
|
||||
|
||||
[a list of crates that are allowed]: https://github.com/rust-lang/rust/blob/9d1b2106e23b1abd32fce1f17267604a5102f57a/src/tools/tidy/src/deps.rs#L73
|
||||
354
src/doc/rustc-dev-guide/src/debugging-support-in-rustc.md
Normal file
354
src/doc/rustc-dev-guide/src/debugging-support-in-rustc.md
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
# Debugging support in the Rust compiler
|
||||
|
||||
<!-- toc -->
|
||||
|
||||
This document explains the state of debugging tools support in the Rust compiler (rustc).
|
||||
It gives an overview of GDB, LLDB, WinDbg/CDB,
|
||||
as well as infrastructure around Rust compiler to debug Rust code.
|
||||
If you want to learn how to debug the Rust compiler itself,
|
||||
see [Debugging the Compiler].
|
||||
|
||||
The material is gathered from the video,
|
||||
[Tom Tromey discusses debugging support in rustc].
|
||||
|
||||
## Preliminaries
|
||||
|
||||
### Debuggers
|
||||
|
||||
According to Wikipedia
|
||||
|
||||
> A [debugger or debugging tool] is a computer program that is used to test and debug
|
||||
> other programs (the "target" program).
|
||||
|
||||
Writing a debugger from scratch for a language requires a lot of work, especially if
|
||||
debuggers have to be supported on various platforms. GDB and LLDB, however, can be
|
||||
extended to support debugging a language. This is the path that Rust has chosen.
|
||||
This document's main goal is to document the said debuggers support in Rust compiler.
|
||||
|
||||
### DWARF
|
||||
|
||||
According to the [DWARF] standard website
|
||||
|
||||
> DWARF is a debugging file format used by many compilers and debuggers to support source level
|
||||
> debugging. It addresses the requirements of a number of procedural languages,
|
||||
> such as C, C++, and Fortran, and is designed to be extensible to other languages.
|
||||
> DWARF is architecture independent and applicable to any processor or operating system.
|
||||
> It is widely used on Unix, Linux and other operating systems,
|
||||
> as well as in stand-alone environments.
|
||||
|
||||
DWARF reader is a program that consumes the DWARF format and creates debugger compatible output.
|
||||
This program may live in the compiler itself. DWARF uses a data structure called
|
||||
Debugging Information Entry (DIE) which stores the information as "tags" to denote functions,
|
||||
variables etc., e.g., `DW_TAG_variable`, `DW_TAG_pointer_type`, `DW_TAG_subprogram` etc.
|
||||
You can also invent your own tags and attributes.
|
||||
|
||||
### CodeView/PDB
|
||||
|
||||
[PDB] (Program Database) is a file format created by Microsoft that contains debug information.
|
||||
PDBs can be consumed by debuggers such as WinDbg/CDB and other tools to display debug information.
|
||||
A PDB contains multiple streams that describe debug information about a specific binary such
|
||||
as types, symbols, and source files used to compile the given binary. CodeView is another
|
||||
format which defines the structure of [symbol records] and [type records] that appear within
|
||||
PDB streams.
|
||||
|
||||
## Supported debuggers
|
||||
|
||||
### GDB
|
||||
|
||||
#### Rust expression parser
|
||||
|
||||
To be able to show debug output, we need an expression parser.
|
||||
This (GDB) expression parser is written in [Bison],
|
||||
and can parse only a subset of Rust expressions.
|
||||
GDB parser was written from scratch and has no relation to any other parser,
|
||||
including that of rustc.
|
||||
|
||||
GDB has Rust-like value and type output. It can print values and types in a way
|
||||
that look like Rust syntax in the output. Or when you print a type as [ptype] in GDB,
|
||||
it also looks like Rust source code. Checkout the documentation in the [manual for GDB/Rust].
|
||||
|
||||
#### Parser extensions
|
||||
|
||||
Expression parser has a couple of extensions in it to facilitate features that you cannot do
|
||||
with Rust. Some limitations are listed in the [manual for GDB/Rust]. There is some special
|
||||
code in the DWARF reader in GDB to support the extensions.
|
||||
|
||||
A couple of examples of DWARF reader support needed are as follows:
|
||||
|
||||
1. Enum: Needed for support for enum types.
|
||||
The Rust compiler writes the information about enum into DWARF,
|
||||
and GDB reads the DWARF to understand where is the tag field,
|
||||
or if there is a tag field,
|
||||
or if the tag slot is shared with non-zero optimization etc.
|
||||
|
||||
2. Dissect trait objects: DWARF extension where the trait object's description in the DWARF
|
||||
also points to a stub description of the corresponding vtable which in turn points to the
|
||||
concrete type for which this trait object exists. This means that you can do a `print *object`
|
||||
for that trait object, and GDB will understand how to find the correct type of the payload in
|
||||
the trait object.
|
||||
|
||||
**TODO**: Figure out if the following should be mentioned in the GDB-Rust document rather than
|
||||
this guide page so there is no duplication. This is regarding the following comments:
|
||||
|
||||
[This comment by Tom](https://github.com/rust-lang/rustc-dev-guide/pull/316#discussion_r284027340)
|
||||
> gdb's Rust extensions and limitations are documented in the gdb manual:
|
||||
https://sourceware.org/gdb/onlinedocs/gdb/Rust.html -- however, this neglects to mention that
|
||||
gdb convenience variables and registers follow the gdb $ convention, and that the Rust parser
|
||||
implements the gdb @ extension.
|
||||
|
||||
[This question by Aman](https://github.com/rust-lang/rustc-dev-guide/pull/316#discussion_r285401353)
|
||||
> @tromey do you think we should mention this part in the GDB-Rust document rather than this
|
||||
document so there is no duplication etc.?
|
||||
|
||||
### LLDB
|
||||
|
||||
#### Rust expression parser
|
||||
|
||||
This expression parser is written in C++. It is a type of [Recursive Descent parser].
|
||||
It implements slightly less of the Rust language than GDB.
|
||||
LLDB has Rust-like value and type output.
|
||||
|
||||
#### Developer notes
|
||||
|
||||
* LLDB has a plugin architecture but that does not work for language support.
|
||||
* GDB generally works better on Linux.
|
||||
|
||||
### WinDbg/CDB
|
||||
|
||||
Microsoft provides [Windows Debugging Tools] such as the Windows Debugger (WinDbg) and
|
||||
the Console Debugger (CDB) which both support debugging programs written in Rust. These
|
||||
debuggers parse the debug info for a binary from the `PDB`, if available, to construct a
|
||||
visualization to serve up in the debugger.
|
||||
|
||||
#### Natvis
|
||||
|
||||
Both WinDbg and CDB support defining and viewing custom visualizations for any given type
|
||||
within the debugger using the Natvis framework. The Rust compiler defines a set of Natvis
|
||||
files that define custom visualizations for a subset of types in the standard libraries such
|
||||
as, `std`, `core`, and `alloc`. These Natvis files are embedded into `PDBs` generated by the
|
||||
`*-pc-windows-msvc` target triples to automatically enable these custom visualizations when
|
||||
debugging. This default can be overridden by setting the `strip` rustc flag to either `debuginfo`
|
||||
or `symbols`.
|
||||
|
||||
Rust has support for embedding Natvis files for crates outside of the standard libraries by
|
||||
using the `#[debugger_visualizer]` attribute.
|
||||
For more details on how to embed debugger visualizers,
|
||||
please refer to the section on the [`debugger_visualizer` attribute].
|
||||
|
||||
## DWARF and `rustc`
|
||||
|
||||
[DWARF] is the standard way compilers generate debugging information that debuggers read.
|
||||
It is _the_ debugging format on macOS and Linux.
|
||||
It is a multi-language and extensible format,
|
||||
and is mostly good enough for Rust's purposes.
|
||||
Hence, the current implementation reuses DWARF's concepts.
|
||||
This is true even if some of the concepts in DWARF do not align with Rust semantically because,
|
||||
generally, there can be some kind of mapping between the two.
|
||||
|
||||
We have some DWARF extensions that the Rust compiler emits and the debuggers understand that
|
||||
are _not_ in the DWARF standard.
|
||||
|
||||
* Rust compiler will emit DWARF for a virtual table, and this `vtable` object will have a
|
||||
`DW_AT_containing_type` that points to the real type. This lets debuggers dissect a trait object
|
||||
pointer to correctly find the payload. E.g., here's such a DIE, from a test case in the gdb
|
||||
repository:
|
||||
|
||||
```asm
|
||||
<1><1a9>: Abbrev Number: 3 (DW_TAG_structure_type)
|
||||
<1aa> DW_AT_containing_type: <0x1b4>
|
||||
<1ae> DW_AT_name : (indirect string, offset: 0x23d): vtable
|
||||
<1b2> DW_AT_byte_size : 0
|
||||
<1b3> DW_AT_alignment : 8
|
||||
```
|
||||
|
||||
* The other extension is that the Rust compiler can emit a tagless discriminated union.
|
||||
See [DWARF feature request] for this item.
|
||||
|
||||
### Current limitations of DWARF
|
||||
|
||||
* Traits - require a bigger change than normal to DWARF, on how to represent Traits in DWARF.
|
||||
* DWARF provides no way to differentiate between Structs and Tuples. Rust compiler emits
|
||||
fields with `__0` and debuggers look for a sequence of such names to overcome this limitation.
|
||||
For example, in this case the debugger would look at a field via `x.__0` instead of `x.0`.
|
||||
This is resolved via the Rust parser in the debugger so now you can do `x.0`.
|
||||
|
||||
DWARF relies on debuggers to know some information about platform ABI.
|
||||
Rust does not do that all the time.
|
||||
|
||||
## Developer notes
|
||||
|
||||
This section is from the talk about certain aspects of development.
|
||||
|
||||
## What is missing
|
||||
|
||||
### Code signing for LLDB debug server on macOS
|
||||
|
||||
According to Wikipedia, [System Integrity Protection] is
|
||||
|
||||
> System Integrity Protection (SIP, sometimes referred to as rootless) is a security feature
|
||||
> of Apple's macOS operating system introduced in OS X El Capitan. It comprises a number of
|
||||
> mechanisms that are enforced by the kernel. A centerpiece is the protection of system-owned
|
||||
> files and directories against modifications by processes without a specific "entitlement",
|
||||
> even when executed by the root user or a user with root privileges (sudo).
|
||||
|
||||
It prevents processes using `ptrace` syscall. If a process wants to use `ptrace` it has to be
|
||||
code signed. The certificate that signs it has to be trusted on your machine.
|
||||
|
||||
See [Apple developer documentation for System Integrity Protection].
|
||||
|
||||
We may need to sign up with Apple and get the keys to do this signing. Tom has looked into if
|
||||
Mozilla cannot do this because it is at the maximum number of
|
||||
keys it is allowed to sign. Tom does not know if Mozilla could get more keys.
|
||||
|
||||
Alternatively, Tom suggests that maybe a Rust legal entity is needed to get the keys via Apple.
|
||||
This problem is not technical in nature. If we had such a key we could sign GDB as well and
|
||||
ship that.
|
||||
|
||||
### DWARF and Traits
|
||||
|
||||
Rust traits are not emitted into DWARF at all. The impact of this is calling a method `x.method()`
|
||||
does not work as is. The reason being that method is implemented by a trait, as opposed
|
||||
to a type. That information is not present so finding trait methods is missing.
|
||||
|
||||
DWARF has a notion of interface types (possibly added for Java). Tom's idea was to use this
|
||||
interface type as traits.
|
||||
|
||||
DWARF only deals with concrete names, not the reference types. So, a given implementation of a
|
||||
trait for a type would be one of these interfaces (`DW_tag_interface` type). Also, the type for
|
||||
which it is implemented would describe all the interfaces this type implements. This requires a
|
||||
DWARF extension.
|
||||
|
||||
Issue on Github: [https://github.com/rust-lang/rust/issues/33014]
|
||||
|
||||
## Typical process for a Debug Info change (LLVM)
|
||||
|
||||
LLVM has Debug Info (DI) builders. This is the primary thing that Rust calls into.
|
||||
This is why we need to change LLVM first because that is emitted first and not DWARF directly.
|
||||
This is a kind of metadata that you construct and hand-off to LLVM. For the Rustc/LLVM hand-off
|
||||
some LLVM DI builder methods are called to construct representation of a type.
|
||||
|
||||
The steps of this process are as follows:
|
||||
|
||||
1. LLVM needs changing.
|
||||
|
||||
LLVM does not emit Interface types at all, so this needs to be implemented in the LLVM first.
|
||||
|
||||
Get sign off on LLVM maintainers that this is a good idea.
|
||||
|
||||
2. Change the DWARF extension.
|
||||
|
||||
3. Update the debuggers.
|
||||
|
||||
Update DWARF readers, expression evaluators.
|
||||
|
||||
4. Update Rust compiler.
|
||||
|
||||
Change it to emit this new information.
|
||||
|
||||
### Procedural macro stepping
|
||||
|
||||
A deeply profound question is that how do you actually debug a procedural macro?
|
||||
What is the location you emit for a macro expansion? Consider some of the following cases -
|
||||
|
||||
* You can emit location of the invocation of the macro.
|
||||
* You can emit the location of the definition of the macro.
|
||||
* You can emit locations of the content of the macro.
|
||||
|
||||
RFC: [https://github.com/rust-lang/rfcs/pull/2117]
|
||||
|
||||
Focus is to let macros decide what to do. This can be achieved by having some kind of attribute
|
||||
that lets the macro tell the compiler where the line marker should be. This affects where you
|
||||
set the breakpoints and what happens when you step it.
|
||||
|
||||
## Source file checksums in debug info
|
||||
|
||||
Both DWARF and CodeView (PDB) support embedding a cryptographic hash of each source file that
|
||||
contributed to the associated binary.
|
||||
|
||||
The cryptographic hash can be used by a debugger to verify that the source file matches the
|
||||
executable. If the source file does not match, the debugger can provide a warning to the user.
|
||||
|
||||
The hash can also be used to prove that a given source file has not been modified since it was
|
||||
used to compile an executable. Because MD5 and SHA1 both have demonstrated vulnerabilities,
|
||||
using SHA256 is recommended for this application.
|
||||
|
||||
The Rust compiler stores the hash for each source file in the corresponding `SourceFile` in
|
||||
the `SourceMap`. The hashes of input files to external crates are stored in `rlib` metadata.
|
||||
|
||||
A default hashing algorithm is set in the target specification. This allows the target to
|
||||
specify the best hash available, since not all targets support all hash algorithms.
|
||||
|
||||
The hashing algorithm for a target can also be overridden with the `-Z source-file-checksum=`
|
||||
command-line option.
|
||||
|
||||
#### DWARF 5
|
||||
DWARF version 5 supports embedding an MD5 hash to validate the source file version in use.
|
||||
DWARF 5 - Section 6.2.4.1 opcode DW_LNCT_MD5
|
||||
|
||||
#### LLVM
|
||||
LLVM IR supports MD5 and SHA1 (and SHA256 in LLVM 11+) source file checksums in the DIFile node.
|
||||
|
||||
[LLVM DIFile documentation](https://llvm.org/docs/LangRef.html#difile)
|
||||
|
||||
#### Microsoft Visual C++ Compiler /ZH option
|
||||
The MSVC compiler supports embedding MD5, SHA1, or SHA256 hashes in the PDB using the `/ZH`
|
||||
compiler option.
|
||||
|
||||
[MSVC /ZH documentation](https://docs.microsoft.com/en-us/cpp/build/reference/zh)
|
||||
|
||||
#### Clang
|
||||
Clang always embeds an MD5 checksum, though this does not appear in documentation.
|
||||
|
||||
## Future work
|
||||
|
||||
#### Name mangling changes
|
||||
|
||||
* New demangler in `libiberty` (gcc source tree).
|
||||
* New demangler in LLVM or LLDB.
|
||||
|
||||
**TODO**: Check the location of the demangler source. [#1157](https://github.com/rust-lang/rustc-dev-guide/issues/1157)
|
||||
|
||||
#### Reuse Rust compiler for expressions
|
||||
|
||||
This is an important idea because debuggers by and large do not try to implement type
|
||||
inference. You need to be much more explicit when you type into the debugger than your
|
||||
actual source code. So, you cannot just copy and paste an expression from your source
|
||||
code to debugger and expect the same answer but this would be nice. This can be helped
|
||||
by using compiler.
|
||||
|
||||
It is certainly doable but it is a large project. You certainly need a bridge to the
|
||||
debugger because the debugger alone has access to the memory. Both GDB (gcc) and LLDB (clang)
|
||||
have this feature. LLDB uses Clang to compile code to JIT and GDB can do the same with GCC.
|
||||
|
||||
Both debuggers expression evaluation implement both a superset and a subset of Rust.
|
||||
They implement just the expression language,
|
||||
but they also add some extensions like GDB has convenience variables.
|
||||
Therefore, if you are taking this route,
|
||||
then you not only need to do this bridge,
|
||||
but may have to add some mode to let the compiler understand some extensions.
|
||||
|
||||
[Tom Tromey discusses debugging support in rustc]: https://www.youtube.com/watch?v=elBxMRSNYr4
|
||||
[Debugging the Compiler]: compiler-debugging.md
|
||||
[debugger or debugging tool]: https://en.wikipedia.org/wiki/Debugger
|
||||
[Bison]: https://www.gnu.org/software/bison/
|
||||
[ptype]: https://ftp.gnu.org/old-gnu/Manuals/gdb/html_node/gdb_109.html
|
||||
[rust-lang/lldb wiki page]: https://github.com/rust-lang/lldb/wiki
|
||||
[DWARF]: http://dwarfstd.org
|
||||
[manual for GDB/Rust]: https://sourceware.org/gdb/onlinedocs/gdb/Rust.html
|
||||
[GDB Bugzilla]: https://sourceware.org/bugzilla/
|
||||
[Recursive Descent parser]: https://en.wikipedia.org/wiki/Recursive_descent_parser
|
||||
[System Integrity Protection]: https://en.wikipedia.org/wiki/System_Integrity_Protection
|
||||
[https://github.com/rust-dev-tools/gdb]: https://github.com/rust-dev-tools/gdb
|
||||
[DWARF feature request]: http://dwarfstd.org/ShowIssue.php?issue=180517.2
|
||||
[https://docs.python.org/3/c-api/stable.html]: https://docs.python.org/3/c-api/stable.html
|
||||
[https://github.com/rust-lang/rfcs/pull/2117]: https://github.com/rust-lang/rfcs/pull/2117
|
||||
[https://github.com/rust-lang/rust/issues/33014]: https://github.com/rust-lang/rust/issues/33014
|
||||
[https://github.com/rust-lang/rust/issues/34457]: https://github.com/rust-lang/rust/issues/34457
|
||||
[Apple developer documentation for System Integrity Protection]: https://developer.apple.com/library/archive/releasenotes/MacOSX/WhatsNewInOSX/Articles/MacOSX10_11.html#//apple_ref/doc/uid/TP40016227-SW11
|
||||
[https://github.com/rust-lang/lldb]: https://github.com/rust-lang/lldb
|
||||
[https://github.com/rust-lang/llvm-project]: https://github.com/rust-lang/llvm-project
|
||||
[PDB]: https://llvm.org/docs/PDB/index.html
|
||||
[symbol records]: https://llvm.org/docs/PDB/CodeViewSymbols.html
|
||||
[type records]: https://llvm.org/docs/PDB/CodeViewTypes.html
|
||||
[Windows Debugging Tools]: https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/
|
||||
[`debugger_visualizer` attribute]: https://doc.rust-lang.org/nightly/reference/attributes/debugger.html#the-debugger_visualizer-attribute
|
||||
1012
src/doc/rustc-dev-guide/src/diagnostics.md
Normal file
1012
src/doc/rustc-dev-guide/src/diagnostics.md
Normal file
File diff suppressed because it is too large
Load diff
166
src/doc/rustc-dev-guide/src/diagnostics/diagnostic-items.md
Normal file
166
src/doc/rustc-dev-guide/src/diagnostics/diagnostic-items.md
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
# Diagnostic Items
|
||||
|
||||
While writing lints it's common to check for specific types, traits and
|
||||
functions. This raises the question on how to check for these. Types can be
|
||||
checked by their complete type path. However, this requires hard coding paths
|
||||
and can lead to misclassifications in some edge cases. To counteract this,
|
||||
rustc has introduced diagnostic items that are used to identify types via
|
||||
[`Symbol`]s.
|
||||
|
||||
## Finding diagnostic items
|
||||
|
||||
Diagnostic items are added to items inside `rustc`/`std`/`core`/`alloc` with the
|
||||
`rustc_diagnostic_item` attribute. The item for a specific type can be found by
|
||||
opening the source code in the documentation and looking for this attribute.
|
||||
Note that it's often added with the `cfg_attr` attribute to avoid compilation
|
||||
errors during tests. A definition often looks like this:
|
||||
|
||||
```rs
|
||||
// This is the diagnostic item for this type vvvvvvv
|
||||
#[cfg_attr(not(test), rustc_diagnostic_item = "Penguin")]
|
||||
struct Penguin;
|
||||
```
|
||||
|
||||
Diagnostic items are usually only added to traits,
|
||||
types,
|
||||
and standalone functions.
|
||||
If the goal is to check for an associated type or method,
|
||||
please use the diagnostic item of the item and reference
|
||||
[*Using Diagnostic Items*](#using-diagnostic-items).
|
||||
|
||||
## Adding diagnostic items
|
||||
|
||||
A new diagnostic item can be added with these two steps:
|
||||
|
||||
1. Find the target item inside the Rust repo. Now add the diagnostic item as a
|
||||
string via the `rustc_diagnostic_item` attribute. This can sometimes cause
|
||||
compilation errors while running tests. These errors can be avoided by using
|
||||
the `cfg_attr` attribute with the `not(test)` condition (it's fine adding
|
||||
then for all `rustc_diagnostic_item` attributes as a preventive manner). At
|
||||
the end, it should look like this:
|
||||
|
||||
```rs
|
||||
// This will be the new diagnostic item vvv
|
||||
#[cfg_attr(not(test), rustc_diagnostic_item = "Cat")]
|
||||
struct Cat;
|
||||
```
|
||||
|
||||
For the naming conventions of diagnostic items, please refer to
|
||||
[*Naming Conventions*](#naming-conventions).
|
||||
|
||||
2. <!-- date-check: Feb 2023 -->
|
||||
Diagnostic items in code are accessed via symbols in
|
||||
[`rustc_span::symbol::sym`].
|
||||
To add your newly-created diagnostic item,
|
||||
simply open the module file,
|
||||
and add the name (In this case `Cat`) at the correct point in the list.
|
||||
|
||||
Now you can create a pull request with your changes. :tada:
|
||||
|
||||
> NOTE:
|
||||
> When using diagnostic items in other projects like Clippy,
|
||||
> it might take some time until the repos get synchronized.
|
||||
|
||||
## Naming conventions
|
||||
|
||||
Diagnostic items don't have a naming convention yet.
|
||||
Following are some guidelines that should be used in future,
|
||||
but might differ from existing names:
|
||||
|
||||
* Types, traits, and enums are named using UpperCamelCase
|
||||
(Examples: `Iterator` and `HashMap`)
|
||||
* For type names that are used multiple times,
|
||||
like `Writer`,
|
||||
it's good to choose a more precise name,
|
||||
maybe by adding the module to it
|
||||
(Example: `IoWriter`)
|
||||
* Associated items should not get their own diagnostic items,
|
||||
but instead be accessed indirectly by the diagnostic item
|
||||
of the type they're originating from.
|
||||
* Freestanding functions like `std::mem::swap()` should be named using
|
||||
`snake_case` with one important (export) module as a prefix
|
||||
(Examples: `mem_swap` and `cmp_max`)
|
||||
* Modules should usually not have a diagnostic item attached to them.
|
||||
Diagnostic items were added to avoid the usage of paths,
|
||||
and using them on modules would therefore most likely be counterproductive.
|
||||
|
||||
## Using diagnostic items
|
||||
|
||||
In rustc, diagnostic items are looked up via [`Symbol`]s from inside the
|
||||
[`rustc_span::symbol::sym`] module. These can then be mapped to [`DefId`]s
|
||||
using [`TyCtxt::get_diagnostic_item()`] or checked if they match a [`DefId`]
|
||||
using [`TyCtxt::is_diagnostic_item()`]. When mapping from a diagnostic item to
|
||||
a [`DefId`], the method will return a `Option<DefId>`. This can be `None` if
|
||||
either the symbol isn't a diagnostic item or the type is not registered, for
|
||||
instance when compiling with `#[no_std]`.
|
||||
All the following examples are based on [`DefId`]s and their usage.
|
||||
|
||||
### Example: Checking for a type
|
||||
|
||||
```rust
|
||||
use rustc_span::symbol::sym;
|
||||
|
||||
/// This example checks if the given type (`ty`) has the type `HashMap` using
|
||||
/// `TyCtxt::is_diagnostic_item()`
|
||||
fn example_1(cx: &LateContext<'_>, ty: Ty<'_>) -> bool {
|
||||
match ty.kind() {
|
||||
ty::Adt(adt, _) => cx.tcx.is_diagnostic_item(sym::HashMap, adt.did()),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Example: Checking for a trait implementation
|
||||
|
||||
```rust
|
||||
/// This example checks if a given [`DefId`] from a method is part of a trait
|
||||
/// implementation defined by a diagnostic item.
|
||||
fn is_diag_trait_item(
|
||||
cx: &LateContext<'_>,
|
||||
def_id: DefId,
|
||||
diag_item: Symbol
|
||||
) -> bool {
|
||||
if let Some(trait_did) = cx.tcx.trait_of_item(def_id) {
|
||||
return cx.tcx.is_diagnostic_item(diag_item, trait_did);
|
||||
}
|
||||
false
|
||||
}
|
||||
```
|
||||
|
||||
### Associated Types
|
||||
|
||||
Associated types of diagnostic items can be accessed indirectly by first
|
||||
getting the [`DefId`] of the trait and then calling
|
||||
[`TyCtxt::associated_items()`]. This returns an [`AssocItems`] object which can
|
||||
be used for further checks. Checkout
|
||||
[`clippy_utils::ty::get_iterator_item_ty()`] for an example usage of this.
|
||||
|
||||
### Usage in Clippy
|
||||
|
||||
Clippy tries to use diagnostic items where possible and has developed some
|
||||
wrapper and utility functions. Please also refer to its documentation when
|
||||
using diagnostic items in Clippy. (See [*Common tools for writing
|
||||
lints*][clippy-Common-tools-for-writing-lints].)
|
||||
|
||||
## Related issues
|
||||
|
||||
These are probably only interesting to people
|
||||
who really want to take a deep dive into the topic :)
|
||||
|
||||
* [rust#60966]: The Rust PR that introduced diagnostic items
|
||||
* [rust-clippy#5393]: Clippy's tracking issue for moving away from hard coded paths to
|
||||
diagnostic item
|
||||
|
||||
<!-- Links -->
|
||||
|
||||
[`rustc_span::symbol::sym`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/symbol/sym/index.html
|
||||
[`Symbol`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/symbol/struct.Symbol.html
|
||||
[`DefId`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/def_id/struct.DefId.html
|
||||
[`TyCtxt::get_diagnostic_item()`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.get_diagnostic_item
|
||||
[`TyCtxt::is_diagnostic_item()`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.is_diagnostic_item
|
||||
[`TyCtxt::associated_items()`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.associated_items
|
||||
[`AssocItems`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/assoc/struct.AssocItems.html
|
||||
[`clippy_utils::ty::get_iterator_item_ty()`]: https://github.com/rust-lang/rust-clippy/blob/305177342fbc622c0b3cb148467bab4b9524c934/clippy_utils/src/ty.rs#L55-L72
|
||||
[clippy-Common-tools-for-writing-lints]: https://doc.rust-lang.org/nightly/clippy/development/common_tools_writing_lints.html
|
||||
[rust#60966]: https://github.com/rust-lang/rust/pull/60966
|
||||
[rust-clippy#5393]: https://github.com/rust-lang/rust-clippy/issues/5393
|
||||
401
src/doc/rustc-dev-guide/src/diagnostics/diagnostic-structs.md
Normal file
401
src/doc/rustc-dev-guide/src/diagnostics/diagnostic-structs.md
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
# Diagnostic and subdiagnostic structs
|
||||
rustc has three diagnostic traits that can be used to create diagnostics:
|
||||
`Diagnostic`, `LintDiagnostic`, and `Subdiagnostic`. For simple diagnostics,
|
||||
instead of using the `Diag` API to create and emit diagnostics,
|
||||
derived impls can be used. They are only suitable for simple diagnostics that
|
||||
don't require much logic in deciding whether or not to add additional
|
||||
subdiagnostics.
|
||||
|
||||
Such diagnostic can be translated into
|
||||
different languages and each has a slug that uniquely identifies the
|
||||
diagnostic.
|
||||
|
||||
## `#[derive(Diagnostic)]` and `#[derive(LintDiagnostic)]`
|
||||
|
||||
Consider the [definition][defn] of the "field already declared" diagnostic
|
||||
shown below:
|
||||
|
||||
```rust,ignore
|
||||
#[derive(Diagnostic)]
|
||||
#[diag(hir_analysis_field_already_declared, code = E0124)]
|
||||
pub struct FieldAlreadyDeclared {
|
||||
pub field_name: Ident,
|
||||
#[primary_span]
|
||||
#[label]
|
||||
pub span: Span,
|
||||
#[label(previous_decl_label)]
|
||||
pub prev_span: Span,
|
||||
}
|
||||
```
|
||||
|
||||
`Diagnostic` can only be derived on structs and enums.
|
||||
Attributes that are placed on the type for structs are placed on each
|
||||
variants for enums (or vice versa). Each `Diagnostic` has to have one
|
||||
attribute, `#[diag(...)]`, applied to the struct or each enum variant.
|
||||
|
||||
If an error has an error code (e.g. "E0624"), then that can be specified using
|
||||
the `code` sub-attribute. Specifying a `code` isn't mandatory, but if you are
|
||||
porting a diagnostic that uses `Diag` to use `Diagnostic`
|
||||
then you should keep the code if there was one.
|
||||
|
||||
`#[diag(..)]` must provide a slug as the first positional argument (a path to an
|
||||
item in `rustc_errors::fluent::*`). A slug uniquely identifies the diagnostic
|
||||
and is also how the compiler knows what error message to emit (in the default
|
||||
locale of the compiler, or in the locale requested by the user). See
|
||||
[translation documentation](./translation.md) to learn more about how
|
||||
translatable error messages are written and how slug items are generated.
|
||||
|
||||
In our example, the Fluent message for the "field already declared" diagnostic
|
||||
looks like this:
|
||||
|
||||
```fluent
|
||||
hir_analysis_field_already_declared =
|
||||
field `{$field_name}` is already declared
|
||||
.label = field already declared
|
||||
.previous_decl_label = `{$field_name}` first declared here
|
||||
```
|
||||
|
||||
`hir_analysis_field_already_declared` is the slug from our example and is followed
|
||||
by the diagnostic message.
|
||||
|
||||
Every field of the `Diagnostic` which does not have an annotation is
|
||||
available in Fluent messages as a variable, like `field_name` in the example
|
||||
above. Fields can be annotated `#[skip_arg]` if this is undesired.
|
||||
|
||||
Using the `#[primary_span]` attribute on a field (that has type `Span`)
|
||||
indicates the primary span of the diagnostic which will have the main message
|
||||
of the diagnostic.
|
||||
|
||||
Diagnostics are more than just their primary message, they often include
|
||||
labels, notes, help messages and suggestions, all of which can also be
|
||||
specified on a `Diagnostic`.
|
||||
|
||||
`#[label]`, `#[help]`, `#[warning]` and `#[note]` can all be applied to fields which have the
|
||||
type `Span`. Applying any of these attributes will create the corresponding
|
||||
subdiagnostic with that `Span`. These attributes will look for their
|
||||
diagnostic message in a Fluent attribute attached to the primary Fluent
|
||||
message. In our example, `#[label]` will look for
|
||||
`hir_analysis_field_already_declared.label` (which has the message "field already
|
||||
declared"). If there is more than one subdiagnostic of the same type, then
|
||||
these attributes can also take a value that is the attribute name to look for
|
||||
(e.g. `previous_decl_label` in our example).
|
||||
|
||||
Other types have special behavior when used in a `Diagnostic` derive:
|
||||
|
||||
- Any attribute applied to an `Option<T>` will only emit a
|
||||
subdiagnostic if the option is `Some(..)`.
|
||||
- Any attribute applied to a `Vec<T>` will be repeated for each element of the
|
||||
vector.
|
||||
|
||||
`#[help]`, `#[warning]` and `#[note]` can also be applied to the struct itself, in which case
|
||||
they work exactly like when applied to fields except the subdiagnostic won't
|
||||
have a `Span`. These attributes can also be applied to fields of type `()` for
|
||||
the same effect, which when combined with the `Option` type can be used to
|
||||
represent optional `#[note]`/`#[help]`/`#[warning]` subdiagnostics.
|
||||
|
||||
Suggestions can be emitted using one of four field attributes:
|
||||
|
||||
- `#[suggestion(slug, code = "...", applicability = "...")]`
|
||||
- `#[suggestion_hidden(slug, code = "...", applicability = "...")]`
|
||||
- `#[suggestion_short(slug, code = "...", applicability = "...")]`
|
||||
- `#[suggestion_verbose(slug, code = "...", applicability = "...")]`
|
||||
|
||||
Suggestions must be applied on either a `Span` field or a `(Span,
|
||||
MachineApplicability)` field. Similarly to other field attributes, the slug
|
||||
specifies the Fluent attribute with the message and defaults to the equivalent
|
||||
of `.suggestion`. `code` specifies the code that should be suggested as a
|
||||
replacement and is a format string (e.g. `{field_name}` would be replaced by
|
||||
the value of the `field_name` field of the struct), not a Fluent identifier.
|
||||
`applicability` can be used to specify the applicability in the attribute, it
|
||||
cannot be used when the field's type contains an `Applicability`.
|
||||
|
||||
In the end, the `Diagnostic` derive will generate an implementation of
|
||||
`Diagnostic` that looks like the following:
|
||||
|
||||
```rust,ignore
|
||||
impl<'a, G: EmissionGuarantee> Diagnostic<'a> for FieldAlreadyDeclared {
|
||||
fn into_diag(self, dcx: &'a DiagCtxt, level: Level) -> Diag<'a, G> {
|
||||
let mut diag = Diag::new(dcx, level, fluent::hir_analysis_field_already_declared);
|
||||
diag.set_span(self.span);
|
||||
diag.span_label(
|
||||
self.span,
|
||||
fluent::hir_analysis_label
|
||||
);
|
||||
diag.span_label(
|
||||
self.prev_span,
|
||||
fluent::hir_analysis_previous_decl_label
|
||||
);
|
||||
diag
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now that we've defined our diagnostic, how do we [use it][use]? It's quite
|
||||
straightforward, just create an instance of the struct and pass it to
|
||||
`emit_err` (or `emit_warning`):
|
||||
|
||||
```rust,ignore
|
||||
tcx.dcx().emit_err(FieldAlreadyDeclared {
|
||||
field_name: f.ident,
|
||||
span: f.span,
|
||||
prev_span,
|
||||
});
|
||||
```
|
||||
|
||||
### Reference
|
||||
`#[derive(Diagnostic)]` and `#[derive(LintDiagnostic)]` support the
|
||||
following attributes:
|
||||
|
||||
- `#[diag(slug, code = "...")]`
|
||||
- _Applied to struct or enum variant._
|
||||
- _Mandatory_
|
||||
- Defines the text and error code to be associated with the diagnostic.
|
||||
- Slug (_Mandatory_)
|
||||
- Uniquely identifies the diagnostic and corresponds to its Fluent message,
|
||||
mandatory.
|
||||
- A path to an item in `rustc_errors::fluent`, e.g.
|
||||
`rustc_errors::fluent::hir_analysis_field_already_declared`
|
||||
(`rustc_errors::fluent` is implicit in the attribute, so just
|
||||
`hir_analysis_field_already_declared`).
|
||||
- See [translation documentation](./translation.md).
|
||||
- `code = "..."` (_Optional_)
|
||||
- Specifies the error code.
|
||||
- `#[note]` or `#[note(slug)]` (_Optional_)
|
||||
- _Applied to struct or struct fields of type `Span`, `Option<()>` or `()`._
|
||||
- Adds a note subdiagnostic.
|
||||
- Value is a path to an item in `rustc_errors::fluent` for the note's
|
||||
message.
|
||||
- Defaults to equivalent of `.note`.
|
||||
- If applied to a `Span` field, creates a spanned note.
|
||||
- `#[help]` or `#[help(slug)]` (_Optional_)
|
||||
- _Applied to struct or struct fields of type `Span`, `Option<()>` or `()`._
|
||||
- Adds a help subdiagnostic.
|
||||
- Value is a path to an item in `rustc_errors::fluent` for the note's
|
||||
message.
|
||||
- Defaults to equivalent of `.help`.
|
||||
- If applied to a `Span` field, creates a spanned help.
|
||||
- `#[label]` or `#[label(slug)]` (_Optional_)
|
||||
- _Applied to `Span` fields._
|
||||
- Adds a label subdiagnostic.
|
||||
- Value is a path to an item in `rustc_errors::fluent` for the note's
|
||||
message.
|
||||
- Defaults to equivalent of `.label`.
|
||||
- `#[warning]` or `#[warning(slug)]` (_Optional_)
|
||||
- _Applied to struct or struct fields of type `Span`, `Option<()>` or `()`._
|
||||
- Adds a warning subdiagnostic.
|
||||
- Value is a path to an item in `rustc_errors::fluent` for the note's
|
||||
message.
|
||||
- Defaults to equivalent of `.warn`.
|
||||
- `#[suggestion{,_hidden,_short,_verbose}(slug, code = "...", applicability = "...")]`
|
||||
(_Optional_)
|
||||
- _Applied to `(Span, MachineApplicability)` or `Span` fields._
|
||||
- Adds a suggestion subdiagnostic.
|
||||
- Slug (_Mandatory_)
|
||||
- A path to an item in `rustc_errors::fluent`, e.g.
|
||||
`rustc_errors::fluent::hir_analysis_field_already_declared`
|
||||
(`rustc_errors::fluent` is implicit in the attribute, so just
|
||||
`hir_analysis_field_already_declared`). Fluent attributes for all messages
|
||||
exist as top-level items in that module (so `hir_analysis_message.attr` is just
|
||||
`attr`).
|
||||
- See [translation documentation](./translation.md).
|
||||
- Defaults to `rustc_errors::fluent::_subdiag::suggestion` (or
|
||||
- `.suggestion` in Fluent).
|
||||
- `code = "..."`/`code("...", ...)` (_Mandatory_)
|
||||
- One or multiple format strings indicating the code to be suggested as a
|
||||
replacement. Multiple values signify multiple possible replacements.
|
||||
- `applicability = "..."` (_Optional_)
|
||||
- String which must be one of `machine-applicable`, `maybe-incorrect`,
|
||||
`has-placeholders` or `unspecified`.
|
||||
- `#[subdiagnostic]`
|
||||
- _Applied to a type that implements `Subdiagnostic` (from
|
||||
`#[derive(Subdiagnostic)]`)._
|
||||
- Adds the subdiagnostic represented by the subdiagnostic struct.
|
||||
- `#[primary_span]` (_Optional_)
|
||||
- _Applied to `Span` fields on `Subdiagnostic`s. Not used for `LintDiagnostic`s._
|
||||
- Indicates the primary span of the diagnostic.
|
||||
- `#[skip_arg]` (_Optional_)
|
||||
- _Applied to any field._
|
||||
- Prevents the field from being provided as a diagnostic argument.
|
||||
|
||||
## `#[derive(Subdiagnostic)]`
|
||||
It is common in the compiler to write a function that conditionally adds a
|
||||
specific subdiagnostic to an error if it is applicable. Oftentimes these
|
||||
subdiagnostics could be represented using a diagnostic struct even if the
|
||||
overall diagnostic could not. In this circumstance, the `Subdiagnostic`
|
||||
derive can be used to represent a partial diagnostic (e.g a note, label, help or
|
||||
suggestion) as a struct.
|
||||
|
||||
Consider the [definition][subdiag_defn] of the "expected return type" label
|
||||
shown below:
|
||||
|
||||
```rust
|
||||
#[derive(Subdiagnostic)]
|
||||
pub enum ExpectedReturnTypeLabel<'tcx> {
|
||||
#[label(hir_analysis_expected_default_return_type)]
|
||||
Unit {
|
||||
#[primary_span]
|
||||
span: Span,
|
||||
},
|
||||
#[label(hir_analysis_expected_return_type)]
|
||||
Other {
|
||||
#[primary_span]
|
||||
span: Span,
|
||||
expected: Ty<'tcx>,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Like `Diagnostic`, `Subdiagnostic` can be derived for structs or
|
||||
enums. Attributes that are placed on the type for structs are placed on each
|
||||
variants for enums (or vice versa). Each `Subdiagnostic` should have one
|
||||
attribute applied to the struct or each variant, one of:
|
||||
|
||||
- `#[label(..)]` for defining a label
|
||||
- `#[note(..)]` for defining a note
|
||||
- `#[help(..)]` for defining a help
|
||||
- `#[warning(..)]` for defining a warning
|
||||
- `#[suggestion{,_hidden,_short,_verbose}(..)]` for defining a suggestion
|
||||
|
||||
All of the above must provide a slug as the first positional argument (a path
|
||||
to an item in `rustc_errors::fluent::*`). A slug uniquely identifies the
|
||||
diagnostic and is also how the compiler knows what error message to emit (in
|
||||
the default locale of the compiler, or in the locale requested by the user).
|
||||
See [translation documentation](./translation.md) to learn more about how
|
||||
translatable error messages are written and how slug items are generated.
|
||||
|
||||
In our example, the Fluent message for the "expected return type" label
|
||||
looks like this:
|
||||
|
||||
```fluent
|
||||
hir_analysis_expected_default_return_type = expected `()` because of default return type
|
||||
|
||||
hir_analysis_expected_return_type = expected `{$expected}` because of return type
|
||||
```
|
||||
|
||||
Using the `#[primary_span]` attribute on a field (with type `Span`) will denote
|
||||
the primary span of the subdiagnostic. A primary span is only necessary for a
|
||||
label or suggestion, which can not be spanless.
|
||||
|
||||
Every field of the type/variant which does not have an annotation is available
|
||||
in Fluent messages as a variable. Fields can be annotated `#[skip_arg]` if this
|
||||
is undesired.
|
||||
|
||||
Like `Diagnostic`, `Subdiagnostic` supports `Option<T>` and
|
||||
`Vec<T>` fields.
|
||||
|
||||
Suggestions can be emitted using one of four attributes on the type/variant:
|
||||
|
||||
- `#[suggestion(..., code = "...", applicability = "...")]`
|
||||
- `#[suggestion_hidden(..., code = "...", applicability = "...")]`
|
||||
- `#[suggestion_short(..., code = "...", applicability = "...")]`
|
||||
- `#[suggestion_verbose(..., code = "...", applicability = "...")]`
|
||||
|
||||
Suggestions require `#[primary_span]` be set on a field and can have the
|
||||
following sub-attributes:
|
||||
|
||||
- The first positional argument specifies the path to a item in
|
||||
`rustc_errors::fluent` corresponding to the Fluent attribute with the message
|
||||
and defaults to the equivalent of `.suggestion`.
|
||||
- `code` specifies the code that should be suggested as a replacement and is a
|
||||
format string (e.g. `{field_name}` would be replaced by the value of the
|
||||
`field_name` field of the struct), not a Fluent identifier.
|
||||
- `applicability` can be used to specify the applicability in the attribute, it
|
||||
cannot be used when the field's type contains an `Applicability`.
|
||||
|
||||
Applicabilities can also be specified as a field (of type `Applicability`)
|
||||
using the `#[applicability]` attribute.
|
||||
|
||||
In the end, the `Subdiagnostic` derive will generate an implementation
|
||||
of `Subdiagnostic` that looks like the following:
|
||||
|
||||
```rust
|
||||
impl<'tcx> Subdiagnostic for ExpectedReturnTypeLabel<'tcx> {
|
||||
fn add_to_diag(self, diag: &mut rustc_errors::Diagnostic) {
|
||||
use rustc_errors::{Applicability, IntoDiagArg};
|
||||
match self {
|
||||
ExpectedReturnTypeLabel::Unit { span } => {
|
||||
diag.span_label(span, rustc_errors::fluent::hir_analysis_expected_default_return_type)
|
||||
}
|
||||
ExpectedReturnTypeLabel::Other { span, expected } => {
|
||||
diag.set_arg("expected", expected);
|
||||
diag.span_label(span, rustc_errors::fluent::hir_analysis_expected_return_type)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Once defined, a subdiagnostic can be used by passing it to the `subdiagnostic`
|
||||
function ([example][subdiag_use_1] and [example][subdiag_use_2]) on a
|
||||
diagnostic or by assigning it to a `#[subdiagnostic]`-annotated field of a
|
||||
diagnostic struct.
|
||||
|
||||
### Reference
|
||||
`#[derive(Subdiagnostic)]` supports the following attributes:
|
||||
|
||||
- `#[label(slug)]`, `#[help(slug)]`, `#[warning(slug)]` or `#[note(slug)]`
|
||||
- _Applied to struct or enum variant. Mutually exclusive with struct/enum variant attributes._
|
||||
- _Mandatory_
|
||||
- Defines the type to be representing a label, help or note.
|
||||
- Slug (_Mandatory_)
|
||||
- Uniquely identifies the diagnostic and corresponds to its Fluent message,
|
||||
mandatory.
|
||||
- A path to an item in `rustc_errors::fluent`, e.g.
|
||||
`rustc_errors::fluent::hir_analysis_field_already_declared`
|
||||
(`rustc_errors::fluent` is implicit in the attribute, so just
|
||||
`hir_analysis_field_already_declared`).
|
||||
- See [translation documentation](./translation.md).
|
||||
- `#[suggestion{,_hidden,_short,_verbose}(slug, code = "...", applicability = "...")]`
|
||||
- _Applied to struct or enum variant. Mutually exclusive with struct/enum variant attributes._
|
||||
- _Mandatory_
|
||||
- Defines the type to be representing a suggestion.
|
||||
- Slug (_Mandatory_)
|
||||
- A path to an item in `rustc_errors::fluent`, e.g.
|
||||
`rustc_errors::fluent::hir_analysis_field_already_declared`
|
||||
(`rustc_errors::fluent` is implicit in the attribute, so just
|
||||
`hir_analysis::field_already_declared`). Fluent attributes for all messages
|
||||
exist as top-level items in that module (so `hir_analysis_message.attr` is just
|
||||
`hir_analysis::attr`).
|
||||
- See [translation documentation](./translation.md).
|
||||
- Defaults to `rustc_errors::fluent::_subdiag::suggestion` (or
|
||||
- `.suggestion` in Fluent).
|
||||
- `code = "..."`/`code("...", ...)` (_Mandatory_)
|
||||
- One or multiple format strings indicating the code to be suggested as a
|
||||
replacement. Multiple values signify multiple possible replacements.
|
||||
- `applicability = "..."` (_Optional_)
|
||||
- _Mutually exclusive with `#[applicability]` on a field._
|
||||
- Value is the applicability of the suggestion.
|
||||
- String which must be one of:
|
||||
- `machine-applicable`
|
||||
- `maybe-incorrect`
|
||||
- `has-placeholders`
|
||||
- `unspecified`
|
||||
- `#[multipart_suggestion{,_hidden,_short,_verbose}(slug, applicability = "...")]`
|
||||
- _Applied to struct or enum variant. Mutually exclusive with struct/enum variant attributes._
|
||||
- _Mandatory_
|
||||
- Defines the type to be representing a multipart suggestion.
|
||||
- Slug (_Mandatory_): see `#[suggestion]`
|
||||
- `applicability = "..."` (_Optional_): see `#[suggestion]`
|
||||
- `#[primary_span]` (_Mandatory_ for labels and suggestions; _optional_ otherwise; not applicable
|
||||
to multipart suggestions)
|
||||
- _Applied to `Span` fields._
|
||||
- Indicates the primary span of the subdiagnostic.
|
||||
- `#[suggestion_part(code = "...")]` (_Mandatory_; only applicable to multipart suggestions)
|
||||
- _Applied to `Span` fields._
|
||||
- Indicates the span to be one part of the multipart suggestion.
|
||||
- `code = "..."` (_Mandatory_)
|
||||
- Value is a format string indicating the code to be suggested as a
|
||||
replacement.
|
||||
- `#[applicability]` (_Optional_; only applicable to (simple and multipart) suggestions)
|
||||
- _Applied to `Applicability` fields._
|
||||
- Indicates the applicability of the suggestion.
|
||||
- `#[skip_arg]` (_Optional_)
|
||||
- _Applied to any field._
|
||||
- Prevents the field from being provided as a diagnostic argument.
|
||||
|
||||
[defn]: https://github.com/rust-lang/rust/blob/6201eabde85db854c1ebb57624be5ec699246b50/compiler/rustc_hir_analysis/src/errors.rs#L68-L77
|
||||
[use]: https://github.com/rust-lang/rust/blob/f1112099eba41abadb6f921df7edba70affe92c5/compiler/rustc_hir_analysis/src/collect.rs#L823-L827
|
||||
|
||||
[subdiag_defn]: https://github.com/rust-lang/rust/blob/f1112099eba41abadb6f921df7edba70affe92c5/compiler/rustc_hir_analysis/src/errors.rs#L221-L234
|
||||
[subdiag_use_1]: https://github.com/rust-lang/rust/blob/f1112099eba41abadb6f921df7edba70affe92c5/compiler/rustc_hir_analysis/src/check/fn_ctxt/suggestions.rs#L670-L674
|
||||
[subdiag_use_2]: https://github.com/rust-lang/rust/blob/f1112099eba41abadb6f921df7edba70affe92c5/compiler/rustc_hir_analysis/src/check/fn_ctxt/suggestions.rs#L704-L707
|
||||
104
src/doc/rustc-dev-guide/src/diagnostics/error-codes.md
Normal file
104
src/doc/rustc-dev-guide/src/diagnostics/error-codes.md
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
# Error codes
|
||||
We generally try to assign each error message a unique code like `E0123`. These
|
||||
codes are defined in the compiler in the `diagnostics.rs` files found in each
|
||||
crate, which basically consist of macros. All error codes have an associated
|
||||
explanation: new error codes must include them. Note that not all _historical_
|
||||
(no longer emitted) error codes have explanations.
|
||||
|
||||
## Error explanations
|
||||
|
||||
The explanations are written in Markdown (see the [CommonMark Spec] for
|
||||
specifics around syntax), and all of them are linked in the [`rustc_error_codes`]
|
||||
crate. Please read [RFC 1567] for details on how to format and write long error
|
||||
codes. As of <!-- date-check --> February 2023, there is an
|
||||
effort[^new-explanations] to replace this largely outdated RFC with a new more
|
||||
flexible standard.
|
||||
|
||||
Error explanations should expand on the error message and provide details about
|
||||
_why_ the error occurs. It is not helpful for users to copy-paste a quick fix;
|
||||
explanations should help users understand why their code cannot be accepted by
|
||||
the compiler. Rust prides itself on helpful error messages and long-form
|
||||
explanations are no exception. However, before error explanations are
|
||||
overhauled[^new-explanations] it is a bit open as to how exactly they should be
|
||||
written, as always: ask your reviewer or ask around on the Rust Discord or Zulip.
|
||||
|
||||
[^new-explanations]: See the draft RFC [here][new-explanations-rfc].
|
||||
|
||||
[`rustc_error_codes`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_error_codes/index.html
|
||||
[CommonMark Spec]: https://spec.commonmark.org/current/
|
||||
[RFC 1567]: https://github.com/rust-lang/rfcs/blob/master/text/1567-long-error-codes-explanation-normalization.md
|
||||
[new-explanations-rfc]: https://github.com/rust-lang/rfcs/pull/3370
|
||||
|
||||
## Allocating a fresh code
|
||||
|
||||
Error codes are stored in `compiler/rustc_error_codes`.
|
||||
|
||||
To create a new error, you first need to find the next available
|
||||
code. You can find it with `tidy`:
|
||||
|
||||
```
|
||||
./x test tidy
|
||||
```
|
||||
|
||||
This will invoke the tidy script, which generally checks that your code obeys
|
||||
our coding conventions. Some of these jobs check error codes and ensure that
|
||||
there aren't duplicates, etc (the tidy check is defined in
|
||||
`src/tools/tidy/src/error_codes.rs`). Once it is finished with that, tidy will
|
||||
print out the highest used error code:
|
||||
|
||||
```
|
||||
...
|
||||
tidy check
|
||||
Found 505 error codes
|
||||
Highest error code: `E0591`
|
||||
...
|
||||
```
|
||||
|
||||
Here we see the highest error code in use is `E0591`, so we _probably_ want
|
||||
`E0592`. To be sure, run `rg E0592` and check, you should see no references.
|
||||
|
||||
You will have to write an extended description for your error,
|
||||
which will go in `rustc_error_codes/src/error_codes/E0592.md`.
|
||||
To register the error, open `rustc_error_codes/src/error_codes.rs` and add the
|
||||
code (in its proper numerical order) into` register_diagnostics!` macro, like
|
||||
this:
|
||||
|
||||
```rust
|
||||
register_diagnostics! {
|
||||
...
|
||||
E0592: include_str!("./error_codes/E0592.md"),
|
||||
}
|
||||
```
|
||||
|
||||
To actually issue the error, you can use the `struct_span_code_err!` macro:
|
||||
|
||||
```rust
|
||||
struct_span_code_err!(self.dcx(), // some path to the `DiagCtxt` here
|
||||
span, // whatever span in the source you want
|
||||
E0592, // your new error code
|
||||
fluent::example::an_error_message)
|
||||
.emit() // actually issue the error
|
||||
```
|
||||
|
||||
If you want to add notes or other snippets, you can invoke methods before you
|
||||
call `.emit()`:
|
||||
|
||||
```rust
|
||||
struct_span_code_err!(...)
|
||||
.span_label(another_span, fluent::example::example_label)
|
||||
.span_note(another_span, fluent::example::separate_note)
|
||||
.emit()
|
||||
```
|
||||
|
||||
For an example of a PR adding an error code, see [#76143].
|
||||
|
||||
[#76143]: https://github.com/rust-lang/rust/pull/76143
|
||||
|
||||
## Running error code doctests
|
||||
|
||||
To test the examples added in `rustc_error_codes/src/error_codes`, run the
|
||||
error index generator using:
|
||||
|
||||
```
|
||||
./x test ./src/tools/error_index_generator
|
||||
```
|
||||
33
src/doc/rustc-dev-guide/src/diagnostics/error-guaranteed.md
Normal file
33
src/doc/rustc-dev-guide/src/diagnostics/error-guaranteed.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# `ErrorGuaranteed`
|
||||
The previous sections have been about the error message that a user of the
|
||||
compiler sees. But emitting an error can also have a second important side
|
||||
effect within the compiler source code: it generates an
|
||||
[`ErrorGuaranteed`][errorguar].
|
||||
|
||||
`ErrorGuaranteed` is a zero-sized type that is unconstructable outside of the
|
||||
[`rustc_errors`][rerrors] crate. It is generated whenever an error is reported
|
||||
to the user, so that if your compiler code ever encounters a value of type
|
||||
`ErrorGuaranteed`, the compilation is _statically guaranteed to fail_. This is
|
||||
useful for avoiding unsoundness bugs because you can statically check that an
|
||||
error code path leads to a failure.
|
||||
|
||||
There are some important considerations about the usage of `ErrorGuaranteed`:
|
||||
|
||||
* It does _not_ convey information about the _kind_ of error. For example, the
|
||||
error may be due (indirectly) to a delayed bug or other compiler error.
|
||||
Thus, you should not rely on
|
||||
`ErrorGuaranteed` when deciding whether to emit an error, or what kind of error
|
||||
to emit.
|
||||
* `ErrorGuaranteed` should not be used to indicate that a compilation _will
|
||||
emit_ an error in the future. It should be used to indicate that an error
|
||||
_has already been_ emitted -- that is, the [`emit()`][emit] function has
|
||||
already been called. For example, if we detect that a future part of the
|
||||
compiler will error, we _cannot_ use `ErrorGuaranteed` unless we first emit
|
||||
an error or delayed bug ourselves.
|
||||
|
||||
Thankfully, in most cases, it should be statically impossible to abuse
|
||||
`ErrorGuaranteed`.
|
||||
|
||||
[errorguar]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_errors/struct.ErrorGuaranteed.html
|
||||
[rerrors]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_errors/index.html
|
||||
[emit]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_errors/diagnostic/struct.Diag.html#method.emit
|
||||
124
src/doc/rustc-dev-guide/src/diagnostics/lintstore.md
Normal file
124
src/doc/rustc-dev-guide/src/diagnostics/lintstore.md
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
# Lints
|
||||
|
||||
This page documents some of the machinery around lint registration and how we
|
||||
run lints in the compiler.
|
||||
|
||||
The [`LintStore`] is the central piece of infrastructure, around which
|
||||
everything rotates. The `LintStore` is held as part of the [`Session`], and it
|
||||
gets populated with the list of lints shortly after the `Session` is created.
|
||||
|
||||
## Lints vs. lint passes
|
||||
|
||||
There are two parts to the linting mechanism within the compiler: lints and
|
||||
lint passes. Unfortunately, a lot of the documentation we have refers to both
|
||||
of these as just "lints."
|
||||
|
||||
First, we have the lint declarations themselves,
|
||||
and this is where the name and default lint level and other metadata come from.
|
||||
These are normally defined by way of the [`declare_lint!`] macro,
|
||||
which boils down to a static with type [`&rustc_lint_defs::Lint`]
|
||||
(although this may change in the future,
|
||||
as the macro is somewhat unwieldy to add new fields to,
|
||||
like all macros).
|
||||
|
||||
As of <!-- date-check --> Aug 2022,
|
||||
we lint against direct declarations without the use of the macro.
|
||||
|
||||
Lint declarations don't carry any "state" - they are merely global identifiers
|
||||
and descriptions of lints. We assert at runtime that they are not registered
|
||||
twice (by lint name).
|
||||
|
||||
Lint passes are the meat of any lint. Notably, there is not a one-to-one
|
||||
relationship between lints and lint passes; a lint might not have any lint pass
|
||||
that emits it, it could have many, or just one -- the compiler doesn't track
|
||||
whether a pass is in any way associated with a particular lint, and frequently
|
||||
lints are emitted as part of other work (e.g., type checking, etc.).
|
||||
|
||||
## Registration
|
||||
|
||||
### High-level overview
|
||||
|
||||
In [`rustc_interface::run_compiler`],
|
||||
the [`LintStore`] is created,
|
||||
and all lints are registered.
|
||||
|
||||
There are three 'sources' of lints:
|
||||
|
||||
* internal lints: lints only used by the rustc codebase
|
||||
* builtin lints: lints built into the compiler and not provided by some outside
|
||||
source
|
||||
* `rustc_interface::Config`[`register_lints`]: lints passed into the compiler
|
||||
during construction
|
||||
|
||||
Lints are registered via the [`LintStore::register_lint`] function. This should
|
||||
happen just once for any lint, or an ICE will occur.
|
||||
|
||||
Once the registration is complete, we "freeze" the lint store by placing it in
|
||||
an `Lrc`.
|
||||
|
||||
Lint passes are registered separately into one of the categories
|
||||
(pre-expansion, early, late, late module). Passes are registered as a closure
|
||||
-- i.e., `impl Fn() -> Box<dyn X>`, where `dyn X` is either an early or late
|
||||
lint pass trait object. When we run the lint passes, we run the closure and
|
||||
then invoke the lint pass methods. The lint pass methods take `&mut self` so
|
||||
they can keep track of state internally.
|
||||
|
||||
#### Internal lints
|
||||
|
||||
These are lints used just by the compiler or drivers like `clippy`. They can be
|
||||
found in [`rustc_lint::internal`].
|
||||
|
||||
An example of such a lint is the check that lint passes are implemented using
|
||||
the `declare_lint_pass!` macro and not by hand. This is accomplished with the
|
||||
`LINT_PASS_IMPL_WITHOUT_MACRO` lint.
|
||||
|
||||
Registration of these lints happens in the [`rustc_lint::register_internals`]
|
||||
function which is called when constructing a new lint store inside
|
||||
[`rustc_lint::new_lint_store`].
|
||||
|
||||
#### Builtin Lints
|
||||
|
||||
These are primarily described in two places,
|
||||
`rustc_lint_defs::builtin` and `rustc_lint::builtin`.
|
||||
Often the first provides the definitions for the lints themselves,
|
||||
and the latter provides the lint pass definitions (and implementations),
|
||||
but this is not always true.
|
||||
|
||||
The builtin lint registration happens in
|
||||
the [`rustc_lint::register_builtins`] function.
|
||||
Just like with internal lints,
|
||||
this happens inside of [`rustc_lint::new_lint_store`].
|
||||
|
||||
#### Driver lints
|
||||
|
||||
These are the lints provided by drivers via the `rustc_interface::Config`
|
||||
[`register_lints`] field, which is a callback. Drivers should, if finding it
|
||||
already set, call the function currently set within the callback they add. The
|
||||
best way for drivers to get access to this is by overriding the
|
||||
`Callbacks::config` function which gives them direct access to the `Config`
|
||||
structure.
|
||||
|
||||
## Compiler lint passes are combined into one pass
|
||||
|
||||
Within the compiler, for performance reasons, we usually do not register dozens
|
||||
of lint passes. Instead, we have a single lint pass of each variety (e.g.,
|
||||
`BuiltinCombinedModuleLateLintPass`) which will internally call all of the
|
||||
individual lint passes; this is because then we get the benefits of static over
|
||||
dynamic dispatch for each of the (often empty) trait methods.
|
||||
|
||||
Ideally, we'd not have to do this, since it adds to the complexity of
|
||||
understanding the code. However, with the current type-erased lint store
|
||||
approach, it is beneficial to do so for performance reasons.
|
||||
|
||||
[`LintStore`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/struct.LintStore.html
|
||||
[`LintStore::register_lint`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/struct.LintStore.html#method.register_lints
|
||||
[`rustc_lint::register_builtins`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/fn.register_builtins.html
|
||||
[`rustc_lint::register_internals`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/fn.register_internals.html
|
||||
[`rustc_lint::new_lint_store`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/fn.new_lint_store.html
|
||||
[`declare_lint!`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/macro.declare_lint.html
|
||||
[`declare_tool_lint!`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/macro.declare_tool_lint.html
|
||||
[`register_lints`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/interface/struct.Config.html#structfield.register_lints
|
||||
[`&rustc_lint_defs::Lint`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint_defs/struct.Lint.html
|
||||
[`Session`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_session/struct.Session.html
|
||||
[`rustc_interface::run_compiler`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_interface/index.html#reexport.run_compiler
|
||||
[`rustc_lint::internal`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint/internal/index.html
|
||||
226
src/doc/rustc-dev-guide/src/diagnostics/translation.md
Normal file
226
src/doc/rustc-dev-guide/src/diagnostics/translation.md
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
# Translation
|
||||
|
||||
<div class="warning">
|
||||
rustc's current diagnostics translation infrastructure (as of
|
||||
<!-- date-check --> October 2024
|
||||
) unfortunately causes some friction for compiler contributors, and the current
|
||||
infrastructure is mostly pending a redesign that better addresses needs of both
|
||||
compiler contributors and translation teams. Note that there is no current
|
||||
active redesign proposals (as of
|
||||
<!-- date-check --> October 2024
|
||||
)!
|
||||
|
||||
Please see the tracking issue <https://github.com/rust-lang/rust/issues/132181>
|
||||
for status updates.
|
||||
|
||||
We have downgraded the internal lints `untranslatable_diagnostic` and
|
||||
`diagnostic_outside_of_impl`. Those internal lints previously required new code
|
||||
to use the current translation infrastructure. However, because the translation
|
||||
infra is waiting for a yet-to-be-proposed redesign and thus rework, we are not
|
||||
mandating usage of current translation infra. Use the infra if you *want to* or
|
||||
otherwise makes the code cleaner, but otherwise sidestep the translation infra
|
||||
if you need more flexibility.
|
||||
</div>
|
||||
|
||||
rustc's diagnostic infrastructure supports translatable diagnostics using
|
||||
[Fluent].
|
||||
|
||||
## Writing translatable diagnostics
|
||||
|
||||
There are two ways of writing translatable diagnostics:
|
||||
|
||||
1. For simple diagnostics, using a diagnostic (or subdiagnostic) derive.
|
||||
("Simple" diagnostics being those that don't require a lot of logic in
|
||||
deciding to emit subdiagnostics and can therefore be represented as
|
||||
diagnostic structs). See [the diagnostic and subdiagnostic structs
|
||||
documentation](./diagnostic-structs.md).
|
||||
2. Using typed identifiers with `Diag` APIs (in
|
||||
`Diagnostic` or `Subdiagnostic` or `LintDiagnostic` implementations).
|
||||
|
||||
When adding or changing a translatable diagnostic,
|
||||
you don't need to worry about the translations.
|
||||
Only updating the original English message is required.
|
||||
Currently,
|
||||
each crate which defines translatable diagnostics has its own Fluent resource,
|
||||
which is a file named `messages.ftl`,
|
||||
located in the root of the crate
|
||||
(such as`compiler/rustc_expand/messages.ftl`).
|
||||
|
||||
## Fluent
|
||||
|
||||
Fluent is built around the idea of "asymmetric localization", which aims to
|
||||
decouple the expressiveness of translations from the grammar of the source
|
||||
language (English in rustc's case). Prior to translation, rustc's diagnostics
|
||||
relied heavily on interpolation to build the messages shown to the users.
|
||||
Interpolated strings are hard to translate because writing a natural-sounding
|
||||
translation might require more, less, or just different interpolation than the
|
||||
English string, all of which would require changes to the compiler's source
|
||||
code to support.
|
||||
|
||||
Diagnostic messages are defined in Fluent resources. A combined set of Fluent
|
||||
resources for a given locale (e.g. `en-US`) is known as Fluent bundle.
|
||||
|
||||
```fluent
|
||||
typeck_address_of_temporary_taken = cannot take address of a temporary
|
||||
```
|
||||
|
||||
In the above example, `typeck_address_of_temporary_taken` is the identifier for
|
||||
a Fluent message and corresponds to the diagnostic message in English. Other
|
||||
Fluent resources can be written which would correspond to a message in another
|
||||
language. Each diagnostic therefore has at least one Fluent message.
|
||||
|
||||
```fluent
|
||||
typeck_address_of_temporary_taken = cannot take address of a temporary
|
||||
.label = temporary value
|
||||
```
|
||||
|
||||
By convention, diagnostic messages for subdiagnostics are specified as
|
||||
"attributes" on Fluent messages (additional related messages, denoted by the
|
||||
`.<attribute-name>` syntax). In the above example, `label` is an attribute of
|
||||
`typeck_address_of_temporary_taken` which corresponds to the message for the
|
||||
label added to this diagnostic.
|
||||
|
||||
Diagnostic messages often interpolate additional context into the message shown
|
||||
to the user, such as the name of a type or of a variable. Additional context to
|
||||
Fluent messages is provided as an "argument" to the diagnostic.
|
||||
|
||||
```fluent
|
||||
typeck_struct_expr_non_exhaustive =
|
||||
cannot create non-exhaustive {$what} using struct expression
|
||||
```
|
||||
|
||||
In the above example, the Fluent message refers to an argument named `what`
|
||||
which is expected to exist (how arguments are provided to diagnostics is
|
||||
discussed in detail later).
|
||||
|
||||
You can consult the [Fluent] documentation for other usage examples of Fluent
|
||||
and its syntax.
|
||||
|
||||
### Guideline for message naming
|
||||
|
||||
Usually, fluent uses `-` for separating words inside a message name. However,
|
||||
`_` is accepted by fluent as well. As `_` fits Rust's use cases better, due to
|
||||
the identifiers on the Rust side using `_` as well, inside rustc, `-` is not
|
||||
allowed for separating words, and instead `_` is recommended. The only exception
|
||||
is for leading `-`s, for message names like `-passes_see_issue`.
|
||||
|
||||
### Guidelines for writing translatable messages
|
||||
|
||||
For a message to be translatable into different languages, all of the
|
||||
information required by any language must be provided to the diagnostic as an
|
||||
argument (not just the information required in the English message).
|
||||
|
||||
As the compiler team gain more experience writing diagnostics that have all of
|
||||
the information necessary to be translated into different languages, this page
|
||||
will be updated with more guidance. For now, the [Fluent] documentation has
|
||||
excellent examples of translating messages into different locales and the
|
||||
information that needs to be provided by the code to do so.
|
||||
|
||||
### Compile-time validation and typed identifiers
|
||||
|
||||
rustc's `fluent_messages` macro performs compile-time validation of Fluent
|
||||
resources and generates code to make it easier to refer to Fluent messages in
|
||||
diagnostics.
|
||||
|
||||
Compile-time validation of Fluent resources will emit any parsing errors
|
||||
from Fluent resources while building the compiler, preventing invalid Fluent
|
||||
resources from causing panics in the compiler. Compile-time validation also
|
||||
emits an error if multiple Fluent messages have the same identifier.
|
||||
|
||||
## Internals
|
||||
|
||||
Various parts of rustc's diagnostic internals are modified in order to support
|
||||
translation.
|
||||
|
||||
### Messages
|
||||
|
||||
All of rustc's traditional diagnostic APIs (e.g. `struct_span_err` or `note`)
|
||||
take any message that can be converted into a `DiagMessage` (or
|
||||
`SubdiagMessage`).
|
||||
|
||||
[`rustc_error_messages::DiagMessage`] can represent legacy non-translatable
|
||||
diagnostic messages and translatable messages. Non-translatable messages are
|
||||
just `String`s. Translatable messages are just a `&'static str` with the
|
||||
identifier of the Fluent message (sometimes with an additional `&'static str`
|
||||
with an attribute).
|
||||
|
||||
`DiagMessage` never needs to be interacted with directly:
|
||||
`DiagMessage` constants are created for each diagnostic message in a
|
||||
Fluent resource (described in more detail below), or `DiagMessage`s will
|
||||
either be created in the macro-generated code of a diagnostic derive.
|
||||
|
||||
`rustc_error_messages::SubdiagMessage` is similar, it can correspond to a
|
||||
legacy non-translatable diagnostic message or the name of an attribute to a
|
||||
Fluent message. Translatable `SubdiagMessage`s must be combined with a
|
||||
`DiagMessage` (using `DiagMessage::with_subdiagnostic_message`) to
|
||||
be emitted (an attribute name on its own is meaningless without a corresponding
|
||||
message identifier, which is what `DiagMessage` provides).
|
||||
|
||||
Both `DiagMessage` and `SubdiagMessage` implement `Into` for any
|
||||
type that can be converted into a string, and converts these into
|
||||
non-translatable diagnostics - this keeps all existing diagnostic calls
|
||||
working.
|
||||
|
||||
### Arguments
|
||||
|
||||
Additional context for Fluent messages which are interpolated into message
|
||||
contents needs to be provided to translatable diagnostics.
|
||||
|
||||
Diagnostics have a `set_arg` function that can be used to provide this
|
||||
additional context to a diagnostic.
|
||||
|
||||
Arguments have both a name (e.g. "what" in the earlier example) and a value.
|
||||
Argument values are represented using the `DiagArgValue` type, which is
|
||||
just a string or a number. rustc types can implement `IntoDiagArg` with
|
||||
conversion into a string or a number, and common types like `Ty<'tcx>` already
|
||||
have such implementations.
|
||||
|
||||
`set_arg` calls are handled transparently by diagnostic derives but need to be
|
||||
added manually when using diagnostic builder APIs.
|
||||
|
||||
### Loading
|
||||
|
||||
rustc makes a distinction between the "fallback bundle" for `en-US` that is used
|
||||
by default and when another locale is missing a message; and the primary fluent
|
||||
bundle which is requested by the user.
|
||||
|
||||
Diagnostic emitters implement the `Emitter` trait which has two functions for
|
||||
accessing the fallback and primary fluent bundles (`fallback_fluent_bundle` and
|
||||
`fluent_bundle` respectively).
|
||||
|
||||
`Emitter` also has member functions with default implementations for performing
|
||||
translation of a `DiagMessage` using the results of
|
||||
`fallback_fluent_bundle` and `fluent_bundle`.
|
||||
|
||||
All of the emitters in rustc load the fallback Fluent bundle lazily, only
|
||||
reading Fluent resources and parsing them when an error message is first being
|
||||
translated (for performance reasons - it doesn't make sense to do this if no
|
||||
error is being emitted). `rustc_error_messages::fallback_fluent_bundle` returns
|
||||
a `std::lazy::Lazy<FluentBundle>` which is provided to emitters and evaluated
|
||||
in the first call to `Emitter::fallback_fluent_bundle`.
|
||||
|
||||
The primary Fluent bundle (for the user's desired locale) is expected to be
|
||||
returned by `Emitter::fluent_bundle`. This bundle is used preferentially when
|
||||
translating messages, the fallback bundle is only used if the primary bundle is
|
||||
missing a message or not provided.
|
||||
|
||||
There are no locale bundles distributed with the compiler,
|
||||
but mechanisms are implemented for loading them.
|
||||
|
||||
- `-Ztranslate-additional-ftl` can be used to load a specific resource as the
|
||||
primary bundle for testing purposes.
|
||||
- `-Ztranslate-lang` can be provided a language identifier (something like
|
||||
`en-US`) and will load any Fluent resources found in
|
||||
`$sysroot/share/locale/$locale/` directory (both the user provided
|
||||
sysroot and any sysroot candidates).
|
||||
|
||||
Primary bundles are not currently loaded lazily and if requested will be loaded
|
||||
at the start of compilation regardless of whether an error occurs. Lazily
|
||||
loading primary bundles is possible if it can be assumed that loading a bundle
|
||||
won't fail. Bundle loading can fail if a requested locale is missing, Fluent
|
||||
files are malformed, or a message is duplicated in multiple resources.
|
||||
|
||||
[Fluent]: https://projectfluent.org
|
||||
[`compiler/rustc_borrowck/messages.ftl`]: https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc_borrowck/messages.ftl
|
||||
[`compiler/rustc_parse/messages.ftl`]: https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc_parse/messages.ftl
|
||||
[`rustc_error_messages::DiagMessage`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_error_messages/enum.DiagMessage.html
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
# Early and Late Bound Parameter Implementation Nuances
|
||||
|
||||
> Note: this chapter makes reference to information discussed later on in the [representing types][ch_representing_types] chapter. Specifically, it uses concise notation to represent some more complex kinds of types that have not yet been discussed, such as inference variables.
|
||||
|
||||
[ch_representing_types]: ../ty.md
|
||||
|
||||
Understanding this page likely requires a rudimentary understanding of higher ranked
|
||||
trait bounds/`for<'a>`and also what types such as `dyn for<'a> Trait<'a>` and
|
||||
`for<'a> fn(&'a u32)` mean. Reading [the nomincon chapter](https://doc.rust-lang.org/nomicon/hrtb.html)
|
||||
on HRTB may be useful for understanding this syntax. The meaning of `for<'a> fn(&'a u32)`
|
||||
is incredibly similar to the meaning of `T: for<'a> Trait<'a>`.
|
||||
|
||||
## What does it mean for parameters to be early or late bound
|
||||
|
||||
All function definitions conceptually have a ZST (this is represented by `TyKind::FnDef` in rustc).
|
||||
The only generics on this ZST are the early bound parameters of the function definition. e.g.
|
||||
```rust
|
||||
fn foo<'a>(_: &'a u32) {}
|
||||
|
||||
fn main() {
|
||||
let b = foo;
|
||||
// ^ `b` has type `FnDef(foo, [])` (no args because `'a` is late bound)
|
||||
assert!(std::mem::size_of_val(&b) == 0);
|
||||
}
|
||||
```
|
||||
|
||||
In order to call `b` the late bound parameters do need to be provided, these are inferred at the
|
||||
call site instead of when we refer to `foo`.
|
||||
```rust
|
||||
fn main() {
|
||||
let b = foo;
|
||||
let a: &'static u32 = &10;
|
||||
foo(a);
|
||||
// the lifetime argument for `'a` on `foo` is inferred at the callsite
|
||||
// the generic parameter `'a` on `foo` is inferred to `'static` here
|
||||
}
|
||||
```
|
||||
|
||||
Because late bound parameters are not part of the `FnDef`'s args this allows us to prove trait
|
||||
bounds such as `F: for<'a> Fn(&'a u32)` where `F` is `foo`'s `FnDef`. e.g.
|
||||
```rust
|
||||
fn foo_early<'a, T: Trait<'a>>(_: &'a u32, _: T) {}
|
||||
fn foo_late<'a, T>(_: &'a u32, _: T) {}
|
||||
|
||||
fn accepts_hr_func<F: for<'a> Fn(&'a u32, u32)>(_: F) {}
|
||||
|
||||
fn main() {
|
||||
// doesn't work, the instantiated bound is `for<'a> FnDef<'?0>: Fn(&'a u32, u32)`
|
||||
// `foo_early` only implements `for<'a> FnDef<'a>: Fn(&'a u32, u32)`- the lifetime
|
||||
// of the borrow in the function argument must be the same as the lifetime
|
||||
// on the `FnDef`.
|
||||
accepts_hr_func(foo_early);
|
||||
|
||||
// works, the instantiated bound is `for<'a> FnDef: Fn(&'a u32, u32)`
|
||||
accepts_hr_func(foo_late);
|
||||
}
|
||||
|
||||
// the builtin `Fn` impls for `foo_early` and `foo_late` look something like:
|
||||
// `foo_early`
|
||||
impl<'a, T: Trait<'a>> Fn(&'a u32, T) for FooEarlyFnDef<'a, T> { ... }
|
||||
// `foo_late`
|
||||
impl<'a, T> Fn(&'a u32, T) for FooLateFnDef<T> { ... }
|
||||
|
||||
```
|
||||
|
||||
Early bound parameters are present on the `FnDef`. Late bound generic parameters are not present
|
||||
on the `FnDef` but are instead constrained by the builtin `Fn*` impl.
|
||||
|
||||
The same distinction applies to closures. Instead of `FnDef` we are talking about the anonymous
|
||||
closure type. Closures are [currently unsound](https://github.com/rust-lang/rust/issues/84366) in
|
||||
ways that are closely related to the distinction between early/late bound
|
||||
parameters (more on this later)
|
||||
|
||||
The early/late boundness of generic parameters is only relevant for the desugaring of
|
||||
functions/closures into types with builtin `Fn*` impls. It does not make sense to talk about
|
||||
in other contexts.
|
||||
|
||||
The `generics_of` query in rustc only contains early bound parameters. In this way it acts more
|
||||
like `generics_of(my_func)` is the generics for the FnDef than the generics provided to the function
|
||||
body although it's not clear to the author of this section if this was the actual justification for
|
||||
making `generics_of` behave this way.
|
||||
|
||||
## What parameters are currently late bound
|
||||
|
||||
Below are the current requirements for determining if a generic parameter is late bound. It is worth
|
||||
keeping in mind that these are not necessarily set in stone and it is almost certainly possible to
|
||||
be more flexible.
|
||||
|
||||
### Must be a lifetime parameter
|
||||
|
||||
Rust can't support types such as `for<T> dyn Trait<T>` or `for<T> fn(T)`, this is a
|
||||
fundamental limitation of the language as we are required to monomorphize type/const
|
||||
parameters and cannot do so behind dynamic dispatch. (technically we could probably
|
||||
support `for<T> dyn MarkerTrait<T>` as there is nothing to monomorphize)
|
||||
|
||||
Not being able to support `for<T> dyn Trait<T>` resulted in making all type and const
|
||||
parameters early bound. Only lifetime parameters can be late bound.
|
||||
|
||||
### Must not appear in the where clauses
|
||||
|
||||
In order for a generic parameter to be late bound it must not appear in any where clauses.
|
||||
This is currently an incredibly simplistic check that causes lifetimes to be early bound even
|
||||
if the where clause they appear in are always true, or implied by well formedness of function
|
||||
arguments. e.g.
|
||||
```rust
|
||||
fn foo1<'a: 'a>(_: &'a u32) {}
|
||||
// ^^ early bound parameter because it's in a `'a: 'a` clause
|
||||
// even though the bound obviously holds all the time
|
||||
fn foo2<'a, T: Trait<'a>(a: T, b: &'a u32) {}
|
||||
// ^^ early bound parameter because it's used in the `T: Trait<'a>` clause
|
||||
fn foo3<'a, T: 'a>(_: &'a T) {}
|
||||
// ^^ early bound parameter because it's used in the `T: 'a` clause
|
||||
// even though that bound is implied by wellformedness of `&'a T`
|
||||
fn foo4<'a, 'b: 'a>(_: Inv<&'a ()>, _: Inv<&'b ()>) {}
|
||||
// ^^ ^^ ^^^ note:
|
||||
// ^^ ^^ `Inv` stands for `Invariant` and is used to
|
||||
// ^^ ^^ make the type parameter invariant. This
|
||||
// ^^ ^^ is necessary for demonstration purposes as
|
||||
// ^^ ^^ `for<'a, 'b> fn(&'a (), &'b ())` and
|
||||
// ^^ ^^ `for<'a> fn(&'a u32, &'a u32)` are subtypes-
|
||||
// ^^ ^^ of each other which makes the bound trivially
|
||||
// ^^ ^^ satisfiable when making the fnptr. `Inv`
|
||||
// ^^ ^^ disables this subtyping.
|
||||
// ^^ ^^
|
||||
// ^^^^^^ both early bound parameters because they are present in the
|
||||
// `'b: 'a` clause
|
||||
```
|
||||
|
||||
The reason for this requirement is that we cannot represent the `T: Trait<'a>` or `'a: 'b` clauses
|
||||
on a function pointer. `for<'a, 'b> fn(Inv<&'a ()>, Inv<&'b ()>)` is not a valid function pointer to
|
||||
represent`foo4` as it would allow calling the function without `'b: 'a` holding.
|
||||
|
||||
### Must be constrained by where clauses or function argument types
|
||||
|
||||
The builtin impls of the `Fn*` traits for closures and `FnDef`s cannot not have any unconstrained
|
||||
parameters. For example the following impl is illegal:
|
||||
```rust
|
||||
impl<'a> Trait for u32 { type Assoc = &'a u32; }
|
||||
```
|
||||
We must not end up with a similar impl for the `Fn*` traits e.g.
|
||||
```rust
|
||||
impl<'a> Fn<()> for FnDef { type Assoc = &'a u32 }
|
||||
```
|
||||
|
||||
Violating this rule can trivially lead to unsoundness as seen in [#84366](https://github.com/rust-lang/rust/issues/84366).
|
||||
Additionally if we ever support late bound type params then an impl like:
|
||||
```rust
|
||||
impl<T> Fn<()> for FnDef { type Assoc = T; }
|
||||
```
|
||||
would break the compiler in various ways.
|
||||
|
||||
In order to ensure that everything functions correctly, we do not allow generic parameters to
|
||||
be late bound if it would result in a builtin impl that does not constrain all of the generic
|
||||
parameters on the builtin impl. Making a generic parameter be early bound trivially makes it be
|
||||
constrained by the builtin impl as it ends up on the self type.
|
||||
|
||||
Because of the requirement that late bound parameters must not appear in where clauses, checking
|
||||
this is simpler than the rules for checking impl headers constrain all the parameters on the impl.
|
||||
We only have to ensure that all late bound parameters appear at least once in the function argument
|
||||
types outside of an alias (e.g. an associated type).
|
||||
|
||||
The requirement that they not indirectly be in the args of an alias for it to count is the
|
||||
same as why the follow code is forbidden:
|
||||
```rust
|
||||
impl<T: Trait> OtherTrait for <T as Trait>::Assoc { type Assoc = T }
|
||||
```
|
||||
There is no guarantee that `<T as Trait>::Assoc` will normalize to different types for every
|
||||
instantiation of `T`. If we were to allow this impl we could get overlapping impls and the
|
||||
same is true of the builtin `Fn*` impls.
|
||||
|
||||
## Making more generic parameters late bound
|
||||
|
||||
It is generally considered desirable for more parameters to be late bound as it makes
|
||||
the builtin `Fn*` impls more flexible. Right now many of the requirements for making
|
||||
a parameter late bound are overly restrictive as they are tied to what we can currently
|
||||
(or can ever) do with fn ptrs.
|
||||
|
||||
It would be theoretically possible to support late bound params in `where`-clauses in the
|
||||
language by introducing implication types which would allow us to express types such as:
|
||||
`for<'a, 'b: 'a> fn(Inv<&'a u32>, Inv<&'b u32>)` which would ensure `'b: 'a` is upheld when
|
||||
calling the function pointer.
|
||||
|
||||
It would also be theoretically possible to support it by making the coercion to a fn ptr
|
||||
instantiate the parameter with an infer var while still allowing the FnDef to not have the
|
||||
generic parameter present as trait impls are perfectly capable of representing the where clauses
|
||||
on the function on the impl itself. This would also allow us to support late bound type/const
|
||||
vars allowing bounds like `F: for<T> Fn(T)` to hold.
|
||||
|
||||
It is almost somewhat unclear if we can change the `Fn` traits to be structured differently
|
||||
so that we never have to make a parameter early bound just to make the builtin impl have all
|
||||
generics be constrained. Of all the possible causes of a generic parameter being early bound
|
||||
this seems the most difficult to remove.
|
||||
|
||||
Whether these would be good ideas to implement is a separate question- they are only brought
|
||||
up to illustrate that the current rules are not necessarily set in stone and a result of
|
||||
"its the only way of doing this".
|
||||
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
# Turbofishing's interactions with early/late bound parameters
|
||||
|
||||
> Note: this chapter makes reference to information discussed later on in the [representing types][ch_representing_types] chapter. Specifically, it uses concise notation to represent some more complex kinds of types that have not yet been discussed, such as inference variables.
|
||||
|
||||
[ch_representing_types]: ../ty.md
|
||||
|
||||
The early/late bound parameter distinction on functions introduces some complications
|
||||
when providing generic arguments to functions. This document discusses what those are
|
||||
and how they might interact with future changes to make more things late bound.
|
||||
|
||||
## Can't turbofish generic arguments on functions sometimes
|
||||
|
||||
When a function has any late bound lifetime parameters (be they explicitly defined or
|
||||
implicitly introduced via lifetime elision) we disallow specifying any lifetime arguments
|
||||
on the function. Sometimes this is a hard error other times it is a future compat lint
|
||||
([`late_bound_lifetime_arguments`](https://github.com/rust-lang/rust/issues/42868)).
|
||||
|
||||
```rust
|
||||
fn early<'a: 'a>(a: &'a ()) -> &'a () { a }
|
||||
fn late<'a>(a: &'a ()) -> &'a () { a }
|
||||
|
||||
fn mixed<'a, 'b: 'b>(a: &'a (), b: &'b ()) -> &'a () { a }
|
||||
|
||||
struct Foo;
|
||||
impl Foo {
|
||||
fn late<'a>(self, a: &'a ()) -> &'a () { a }
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// fine
|
||||
let f = early::<'static>;
|
||||
|
||||
// some variation of hard errors and future compat lints
|
||||
Foo.late::<'static>(&());
|
||||
let f = late::<'static>;
|
||||
let f = mixed::<'static, 'static>;
|
||||
let f = mixed::<'static>;
|
||||
late::<'static>(&());
|
||||
}
|
||||
```
|
||||
|
||||
The justification for this is that late bound parameters are not present on the
|
||||
`FnDef` so the arguments to late bound parameters can't be present in the generic arguments
|
||||
for the type. i.e. the `late` function in the above code snippet would not have
|
||||
any generic parameters on the `FnDef` zst:
|
||||
```rust
|
||||
// example desugaring of the `late` function and its zst + builtin Fn impl
|
||||
struct LateFnDef;
|
||||
impl<'a> Fn<(&'a ())> for LateFnDef {
|
||||
type Output = &'a ();
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
The cause for some situations giving future compat lints and others giving hard errors
|
||||
is a little arbitrary but explainable:
|
||||
- It's always a hard error for method calls
|
||||
- It's only a hard error on paths to free functions if there is no unambiguous way to
|
||||
create the generic arguments for the fndef from the lifetime arguments. (i.e. the amount of
|
||||
lifetimes provided must be exactly equal to the amount of early bound lifetimes or
|
||||
else it's a hard error)
|
||||
|
||||
## Back compat issues from turning early bound to late bound
|
||||
|
||||
Because of the previously mentioned restriction on turbofishing generic arguments, it
|
||||
is a breaking change to upgrade a lifetime from early bound to late bound as it can cause
|
||||
existing turbofishies to become hard errors/future compat lints.
|
||||
|
||||
Many t-types members have expressed interest in wanting more parameters to be late bound.
|
||||
We cannot do so if making something late bound is going to break code that many would
|
||||
expect to work (judging by the future compat lint issue many people do expect to be able
|
||||
to turbofish late bound parameters).
|
||||
|
||||
## Interactions with late bound type/const parameters
|
||||
|
||||
If we were to make some type/const parameters late bound we would definitely not want
|
||||
to disallow turbofishing them as it presumably(?) would break a Tonne of code.
|
||||
|
||||
While lifetimes do differ from type/consts in some ways I(BoxyUwU) do not believe there
|
||||
is any justification for why it would make sense to allow turbofishing late bound
|
||||
type/const parameters but not late bound lifetimes.
|
||||
|
||||
## Removing the hard error/fcw
|
||||
|
||||
From reasons above it seems reasonable that we may want to remove the hard error and fcw
|
||||
(removing the errors/fcw is definitely a blocker for making more things late bound).
|
||||
|
||||
example behaviour:
|
||||
```rust
|
||||
fn late<'a>(a: &'a ()) -> &'a () { a }
|
||||
|
||||
fn accepts_fn(_: impl for<'a> Fn(&'a ()) -> &'a ()) {}
|
||||
fn accepts_fn_2(_: impl Fn(&'static ()) -> &'static ()) {}
|
||||
|
||||
fn main() {
|
||||
let f = late::<'static>;
|
||||
|
||||
accepts_fn(f); //~ error: `f` doesn't implement `for<'a> Fn(&'a ()) -> &'a ()`
|
||||
accepts_fn_2(f) // works
|
||||
|
||||
accepts_fn(late) // works
|
||||
}
|
||||
````
|
||||
|
||||
one potential complication is that we would want a way to specify a generic argument
|
||||
to a function without having to specify arguments for all previous parameters. i.e.
|
||||
ideally you could write the following code somehow.
|
||||
```rust
|
||||
fn late<'a, 'b>(_: &'a (), _: &'b ()) {}
|
||||
|
||||
fn accepts_fn(_: impl for<'a> Fn(&'a (), &'static ())) {}
|
||||
|
||||
fn main() {
|
||||
// a naive implementation would have an inference variable as
|
||||
// the argument to the `'a` parameter no longer allowing the `FnDef`
|
||||
// to satisfy the bound `for<'a> Fn(&'a ())`
|
||||
let f = late::<'_, 'static>;
|
||||
accepts_fn(f);
|
||||
}
|
||||
```
|
||||
Maybe we can just special case HIR ty lowering for `_`/`'_` arguments for late bound
|
||||
parameters somehow and have it not mean the same thing as `_` for early bound parameters.
|
||||
Regardless I think we would need a solution that would allow writing the above code even
|
||||
if it was done by some new syntax such as having to write `late::<k#no_argument, 'static>`
|
||||
(naturally `k#no_argument` would only make sense as an argument to late bound parameters).
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue