As derived from extensive testing of `argv` in a C/C++ application. Co-Authored-By: Jane Lusby <jlusby42@gmail.com>
236 lines
9.2 KiB
Rust
236 lines
9.2 KiB
Rust
//! The Windows command line is just a string
|
|
//! <https://docs.microsoft.com/en-us/archive/blogs/larryosterman/the-windows-command-line-is-just-a-string>
|
|
//!
|
|
//! This module implements the parsing necessary to turn that string into a list of arguments.
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
use crate::ffi::OsString;
|
|
use crate::fmt;
|
|
use crate::marker::PhantomData;
|
|
use crate::num::NonZeroU16;
|
|
use crate::os::windows::prelude::*;
|
|
use crate::path::PathBuf;
|
|
use crate::ptr::NonNull;
|
|
use crate::sys::c;
|
|
use crate::sys::windows::os::current_exe;
|
|
use crate::vec;
|
|
|
|
use core::iter;
|
|
|
|
pub fn args() -> Args {
|
|
// SAFETY: `GetCommandLineW` returns a pointer to a null terminated UTF-16
|
|
// string so it's safe for `WStrUnits` to use.
|
|
unsafe {
|
|
let lp_cmd_line = c::GetCommandLineW();
|
|
let parsed_args_list = parse_lp_cmd_line(WStrUnits::new(lp_cmd_line), || {
|
|
current_exe().map(PathBuf::into_os_string).unwrap_or_else(|_| OsString::new())
|
|
});
|
|
|
|
Args { parsed_args_list: parsed_args_list.into_iter() }
|
|
}
|
|
}
|
|
|
|
/// Implements the Windows command-line argument parsing algorithm.
|
|
///
|
|
/// Microsoft's documentation for the Windows CLI argument format can be found at
|
|
/// <https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments>
|
|
///
|
|
/// A more in-depth explanation is here:
|
|
/// <https://daviddeley.com/autohotkey/parameters/parameters.htm#WIN>
|
|
///
|
|
/// Windows includes a function to do command line parsing in shell32.dll.
|
|
/// However, this is not used for two reasons:
|
|
///
|
|
/// 1. Linking with that DLL causes the process to be registered as a GUI application.
|
|
/// GUI applications add a bunch of overhead, even if no windows are drawn. See
|
|
/// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
|
|
///
|
|
/// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above.
|
|
///
|
|
/// This function was tested for equivalence to the C/C++ parsing rules using an
|
|
/// extensive test suite available at
|
|
/// <https://github.com/ChrisDenton/winarg/tree/std>.
|
|
fn parse_lp_cmd_line<'a, F: Fn() -> OsString>(
|
|
lp_cmd_line: Option<WStrUnits<'a>>,
|
|
exe_name: F,
|
|
) -> Vec<OsString> {
|
|
const BACKSLASH: NonZeroU16 = NonZeroU16::new(b'\\' as u16).unwrap();
|
|
const QUOTE: NonZeroU16 = NonZeroU16::new(b'"' as u16).unwrap();
|
|
const TAB: NonZeroU16 = NonZeroU16::new(b'\t' as u16).unwrap();
|
|
const SPACE: NonZeroU16 = NonZeroU16::new(b' ' as u16).unwrap();
|
|
|
|
let mut ret_val = Vec::new();
|
|
// If the cmd line pointer is null or it points to an empty string then
|
|
// return the name of the executable as argv[0].
|
|
if lp_cmd_line.as_ref().and_then(|cmd| cmd.peek()).is_none() {
|
|
ret_val.push(exe_name());
|
|
return ret_val;
|
|
}
|
|
let mut code_units = lp_cmd_line.unwrap();
|
|
|
|
// The executable name at the beginning is special.
|
|
let mut in_quotes = false;
|
|
let mut cur = Vec::new();
|
|
for w in &mut code_units {
|
|
match w {
|
|
// A quote mark always toggles `in_quotes` no matter what because
|
|
// there are no escape characters when parsing the executable name.
|
|
QUOTE => in_quotes = !in_quotes,
|
|
// If not `in_quotes` then whitespace ends argv[0].
|
|
SPACE | TAB if !in_quotes => break,
|
|
// In all other cases the code unit is taken literally.
|
|
_ => cur.push(w.get()),
|
|
}
|
|
}
|
|
// Skip whitespace.
|
|
code_units.advance_while(|w| w == SPACE || w == TAB);
|
|
ret_val.push(OsString::from_wide(&cur));
|
|
|
|
// Parse the arguments according to these rules:
|
|
// * All code units are taken literally except space, tab, quote and backslash.
|
|
// * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are
|
|
// treated as a single separator.
|
|
// * A space or tab `in_quotes` is taken literally.
|
|
// * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally.
|
|
// * A quote can be escaped if preceded by an odd number of backslashes.
|
|
// * If any number of backslashes is immediately followed by a quote then the number of
|
|
// backslashes is halved (rounding down).
|
|
// * Backslashes not followed by a quote are all taken literally.
|
|
// * If `in_quotes` then a quote can also be escaped using another quote
|
|
// (i.e. two consecutive quotes become one literal quote).
|
|
let mut cur = Vec::new();
|
|
let mut in_quotes = false;
|
|
while let Some(w) = code_units.next() {
|
|
match w {
|
|
// If not `in_quotes`, a space or tab ends the argument.
|
|
SPACE | TAB if !in_quotes => {
|
|
ret_val.push(OsString::from_wide(&cur[..]));
|
|
cur.truncate(0);
|
|
|
|
// Skip whitespace.
|
|
code_units.advance_while(|w| w == SPACE || w == TAB);
|
|
}
|
|
// Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote.
|
|
BACKSLASH => {
|
|
let backslash_count = code_units.advance_while(|w| w == BACKSLASH) + 1;
|
|
if code_units.peek() == Some(QUOTE) {
|
|
cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count / 2));
|
|
// The quote is escaped if there are an odd number of backslashes.
|
|
if backslash_count % 2 == 1 {
|
|
code_units.next();
|
|
cur.push(QUOTE.get());
|
|
}
|
|
} else {
|
|
// If there is no quote on the end then there is no escaping.
|
|
cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count));
|
|
}
|
|
}
|
|
// If `in_quotes` and not backslash escaped (see above) then a quote either
|
|
// unsets `in_quote` or is escaped by another quote.
|
|
QUOTE if in_quotes => match code_units.peek() {
|
|
// Two consecutive quotes when `in_quotes` produces one literal quote.
|
|
Some(QUOTE) => {
|
|
cur.push(QUOTE.get());
|
|
code_units.next();
|
|
}
|
|
// Otherwise set `in_quotes`.
|
|
Some(_) => in_quotes = false,
|
|
// The end of the command line.
|
|
// Push `cur` even if empty, which we do by breaking while `in_quotes` is still set.
|
|
None => break,
|
|
},
|
|
// If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`.
|
|
QUOTE => in_quotes = true,
|
|
// Everything else is always taken literally.
|
|
_ => cur.push(w.get()),
|
|
}
|
|
}
|
|
// Push the final argument, if any.
|
|
if !cur.is_empty() || in_quotes {
|
|
ret_val.push(OsString::from_wide(&cur[..]));
|
|
}
|
|
ret_val
|
|
}
|
|
|
|
pub struct Args {
|
|
parsed_args_list: vec::IntoIter<OsString>,
|
|
}
|
|
|
|
impl fmt::Debug for Args {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
self.parsed_args_list.as_slice().fmt(f)
|
|
}
|
|
}
|
|
|
|
impl Iterator for Args {
|
|
type Item = OsString;
|
|
fn next(&mut self) -> Option<OsString> {
|
|
self.parsed_args_list.next()
|
|
}
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
self.parsed_args_list.size_hint()
|
|
}
|
|
}
|
|
|
|
impl DoubleEndedIterator for Args {
|
|
fn next_back(&mut self) -> Option<OsString> {
|
|
self.parsed_args_list.next_back()
|
|
}
|
|
}
|
|
|
|
impl ExactSizeIterator for Args {
|
|
fn len(&self) -> usize {
|
|
self.parsed_args_list.len()
|
|
}
|
|
}
|
|
|
|
/// A safe iterator over a LPWSTR
|
|
/// (aka a pointer to a series of UTF-16 code units terminated by a NULL).
|
|
struct WStrUnits<'a> {
|
|
// The pointer must never be null...
|
|
lpwstr: NonNull<u16>,
|
|
// ...and the memory it points to must be valid for this lifetime.
|
|
lifetime: PhantomData<&'a [u16]>,
|
|
}
|
|
impl WStrUnits<'_> {
|
|
/// Create the iterator. Returns `None` if `lpwstr` is null.
|
|
///
|
|
/// SAFETY: `lpwstr` must point to a null-terminated wide string that lives
|
|
/// at least as long as the lifetime of this struct.
|
|
unsafe fn new(lpwstr: *const u16) -> Option<Self> {
|
|
Some(Self { lpwstr: NonNull::new(lpwstr as _)?, lifetime: PhantomData })
|
|
}
|
|
fn peek(&self) -> Option<NonZeroU16> {
|
|
// SAFETY: It's always safe to read the current item because we don't
|
|
// ever move out of the array's bounds.
|
|
unsafe { NonZeroU16::new(*self.lpwstr.as_ptr()) }
|
|
}
|
|
/// Advance the iterator while `predicate` returns true.
|
|
/// Returns the number of items it advanced by.
|
|
fn advance_while<P: FnMut(NonZeroU16) -> bool>(&mut self, mut predicate: P) -> usize {
|
|
let mut counter = 0;
|
|
while let Some(w) = self.peek() {
|
|
if !predicate(w) {
|
|
break;
|
|
}
|
|
counter += 1;
|
|
self.next();
|
|
}
|
|
counter
|
|
}
|
|
}
|
|
impl Iterator for WStrUnits<'_> {
|
|
// This can never return zero as that marks the end of the string.
|
|
type Item = NonZeroU16;
|
|
fn next(&mut self) -> Option<NonZeroU16> {
|
|
// SAFETY: If NULL is reached we immediately return.
|
|
// Therefore it's safe to advance the pointer after that.
|
|
unsafe {
|
|
let next = self.peek()?;
|
|
self.lpwstr = NonNull::new_unchecked(self.lpwstr.as_ptr().add(1));
|
|
Some(next)
|
|
}
|
|
}
|
|
}
|