Implement date-checker

This tool looks for HTML comments like `<!-- date: 2021-01 -->` in each
Markdown source file and compiles a list of dates that are older than
six months. It then opens an issue with that list, with checkboxes for
each file and date. Note that it will only open an issue if there was at
least one date older than six months; it does nothing if the list is
empty.

This tool is automatically run monthly in a GitHub Actions workflow.
I have tested the tool on a private repo and confirmed that it works.
This commit is contained in:
Camelid 2021-01-23 13:38:22 -08:00 committed by Joshua Nelson
parent 9fb6690ae9
commit bc320e7800
5 changed files with 442 additions and 1 deletions

View file

@ -0,0 +1,44 @@
name: Date-Check
on:
schedule:
# Run at noon UTC every 1st of the month
- cron: '00 12 01 * *'
# Allow manually starting the workflow
workflow_dispatch:
jobs:
date-check:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Run `date-check`
working-directory: ci/date-check
run: |
cargo run -- ../../src/ > ../../date-check-output.txt
- name: Open issue
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const fs = require('fs');
const rawText = fs.readFileSync('date-check-output.txt', { encoding: 'utf8' });
const title = rawText.split('\n')[0];
if (title != 'empty') {
const body = rawText.split('\n').slice(1).join('\n');
github.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title,
body,
});
console.log('Opened issue.');
} else {
console.log('No dates to triage.');
}

View file

@ -1,4 +1,6 @@
book
# prevent accidentally changes
# prevent accidental changes
book.toml
ci/date-check/target/

View file

@ -0,0 +1,147 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi",
]
[[package]]
name = "date-check"
version = "0.1.0"
dependencies = [
"chrono",
"glob",
"regex",
]
[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929"
[[package]]
name = "memchr"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "regex"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
[[package]]
name = "thread_local"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb9bc092d0d51e76b2b19d9d85534ffc9ec2db959a2523cdae0697e2972cd447"
dependencies = [
"lazy_static",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -0,0 +1,12 @@
[package]
name = "date-check"
version = "0.1.0"
authors = ["Camelid <camelidcamel@gmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
glob = "0.3"
regex = "1"
chrono = "0.4"

View file

@ -0,0 +1,236 @@
use std::{
collections::BTreeMap,
convert::TryInto as _,
env, fmt, fs,
path::{Path, PathBuf},
};
use chrono::{Datelike as _, TimeZone as _, Utc};
use glob::glob;
use regex::Regex;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
struct Date {
year: u32,
month: u32,
}
impl Date {
fn months_since(self, other: Date) -> Option<u32> {
let self_chrono = Utc.ymd(self.year.try_into().unwrap(), self.month, 1);
let other_chrono = Utc.ymd(other.year.try_into().unwrap(), other.month, 1);
let duration_since = self_chrono.signed_duration_since(other_chrono);
let months_since = duration_since.num_days() / 30;
if months_since < 0 {
None
} else {
Some(months_since.try_into().unwrap())
}
}
}
impl fmt::Display for Date {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:04}-{:02}", self.year, self.month)
}
}
fn make_date_regex() -> Regex {
Regex::new(
r"(?x) # insignificant whitespace mode
<!--\s*
date:\s*
(?P<y>\d{4}) # year
-
(?P<m>\d{2}) # month
\s*-->",
)
.unwrap()
}
fn collect_dates_from_file(date_regex: &Regex, text: &str) -> Vec<(usize, Date)> {
let mut line = 1;
let mut end_of_last_cap = 0;
date_regex
.captures_iter(&text)
.map(|cap| {
(
cap.get(0).unwrap().range(),
Date {
year: cap["y"].parse().unwrap(),
month: cap["m"].parse().unwrap(),
},
)
})
.map(|(byte_range, date)| {
line += text[end_of_last_cap..byte_range.end]
.chars()
.filter(|c| *c == '\n')
.count();
end_of_last_cap = byte_range.end;
(line, date)
})
.collect()
}
fn collect_dates(paths: impl Iterator<Item = PathBuf>) -> BTreeMap<PathBuf, Vec<(usize, Date)>> {
let date_regex = make_date_regex();
let mut data = BTreeMap::new();
for path in paths {
let text = fs::read_to_string(&path).unwrap();
let dates = collect_dates_from_file(&date_regex, &text);
if !dates.is_empty() {
data.insert(path, dates);
}
}
data
}
fn filter_dates(
current_month: Date,
min_months_since: u32,
dates_by_file: impl Iterator<Item = (PathBuf, Vec<(usize, Date)>)>,
) -> impl Iterator<Item = (PathBuf, Vec<(usize, Date)>)> {
dates_by_file
.map(move |(path, dates)| {
(
path,
dates
.into_iter()
.filter(|(_, date)| {
current_month
.months_since(*date)
.expect("found date that is after current month")
>= min_months_since
})
.collect::<Vec<_>>(),
)
})
.filter(|(_, dates)| !dates.is_empty())
}
fn main() {
let root_dir = env::args()
.nth(1)
.expect("expect root Markdown directory as CLI argument");
let root_dir_path = Path::new(&root_dir);
let glob_pat = format!("{}/**/*.md", root_dir);
let today_chrono = Utc::today();
let current_month = Date {
year: today_chrono.year_ce().1,
month: today_chrono.month(),
};
let dates_by_file = collect_dates(glob(&glob_pat).unwrap().map(Result::unwrap));
let dates_by_file: BTreeMap<_, _> =
filter_dates(current_month, 6, dates_by_file.into_iter()).collect();
if dates_by_file.is_empty() {
println!("empty");
} else {
println!("Date Reference Triage for {}", current_month);
println!("## Procedure");
println!();
println!(
"Each of these dates should be checked to see if the docs they annotate are \
up-to-date. Each date should be updated (in the Markdown file where it appears) to \
use the current month ({current_month}), or removed if the docs it annotates are not \
expected to fall out of date quickly.",
current_month = current_month
);
println!();
println!(
"Please check off each date once a PR to update it (and, if applicable, its \
surrounding docs) has been merged. Please also mention that you are working on a \
particular set of dates so duplicate work is avoided."
);
println!();
println!("Finally, once all the dates have been updated, please close this issue.");
println!();
println!("## Dates");
println!();
for (path, dates) in dates_by_file {
println!(
"- [ ] {}",
path.strip_prefix(&root_dir_path).unwrap().display()
);
for (line, date) in dates {
println!(" - [ ] line {}: {}", line, date);
}
}
println!();
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_months_since() {
let date1 = Date {
year: 2020,
month: 3,
};
let date2 = Date {
year: 2021,
month: 1,
};
assert_eq!(date2.months_since(date1), Some(10));
}
#[test]
fn test_date_regex() {
let regex = make_date_regex();
assert!(regex.is_match("foo <!-- date: 2021-01 --> bar"));
}
#[test]
fn test_collect_dates_from_file() {
let text = "Test1\n<!-- date: 2021-01 -->\nTest2\nFoo<!-- date: 2021-02 \
-->\nTest3\nTest4\nFoo<!-- date: 2021-03 -->Bar\n<!-- date: 2021-04 \
-->\nTest5\nTest6\nTest7\n<!-- date: \n\n2021-05 -->\nTest8
";
assert_eq!(
collect_dates_from_file(&make_date_regex(), text),
vec![
(
2,
Date {
year: 2021,
month: 1,
}
),
(
4,
Date {
year: 2021,
month: 2,
}
),
(
7,
Date {
year: 2021,
month: 3,
}
),
(
8,
Date {
year: 2021,
month: 4,
}
),
(
14,
Date {
year: 2021,
month: 5,
}
),
]
);
}
}