parse.rs (28890B)
1 use super::{ 2 ExitCode, HashSet, convert, 3 io::{self, Write as _}, 4 }; 5 use core::{ 6 cmp::Ordering, 7 hash::{Hash, Hasher}, 8 }; 9 // We skip all lines until: "name default meaning" trimming leading spaces. 10 // We verify the next line is "---- ------- -------" trimming leading spaces. 11 // 12 // Next we grab the lints. 13 // The regex for lints is verified to be `^ *[\-0-9A-Za-z]+ +(allow|warn|deny) +.*$`. 14 // 15 // We verify all three kinds of lints are non-empty and no lint exists more than once. We verify the 16 // `warn` lints contain a lint called "warnings" which does not count towards the emptiness of 17 // `warn` lints. 18 // 19 // Next we skip all lines until: "name sub-lints" trimming leading spaces. 20 // We verify the next line is "---- ---------" trimming leading spaces. 21 // 22 // Next we grab the lint groups. 23 // The regex for lint groups is verified to be `^ +[\-0-9A-Za-z]+ +[ ,\-0-9A-Za-z]+$`. 24 // 25 // We verify lint group names are unique among themselves and the individual lints sans `"warnings"` 26 // which is allowed to exist. 27 // We verify the lint groups are non-empty except "warnings" if it is defined. 28 // We verify each lint in a lint group is unique. 29 // We verify each lint is the name of one of the lints unless `--allow-undefined-lints` was passed. 30 // We verify when there is multiple lints, each lint is separated with a single comma and space. 31 /// Error from parsing. 32 #[cfg_attr(test, derive(Debug, PartialEq))] 33 pub(crate) enum E<'a> { 34 /// Output doesn't start as expected. 35 Start, 36 /// The contained line is not the format of a lint. 37 UnexpectedLintLine(&'a [u8]), 38 /// The contained lint appeared more than once. 39 DuplicateLint(&'a [u8]), 40 /// There wasn't a `warn`-by-default lint called `"warnings"`. 41 MissingWarningLint, 42 /// There were no `allow`-by-default lints. 43 NoAllowLints, 44 /// There were no `warn`-by-default lints except for `"warnings"`. 45 NoWarnLints, 46 /// There were no `deny`-by-default lints. 47 NoDenyLints, 48 /// Output doesn't contain what it should between the lints and lint groups. 49 Middle, 50 /// The contained line is not the format of a lint group. 51 UnexpectedLintGroupLine(&'a [u8]), 52 /// The contained lint group name appeared more than once. 53 DuplicateLintGroup(&'a [u8]), 54 /// The contained lint group name is the same as the name of a lint. 55 LintSameNameAsLintGroup(&'a [u8]), 56 /// The contained lint group name contained a lint more than once. 57 LintGroupContainsDuplicateLint(&'a [u8], &'a [u8]), 58 /// The contained lint group name contained an unknown lint. 59 LintGroupContainsUnknownLint(&'a [u8], &'a [u8]), 60 /// The contained lint group name has no lints. 61 EmptyLintGroup(&'a [u8]), 62 /// There were no lint groups. 63 NoLintGroups, 64 /// Output doesn't end as expected. 65 End, 66 } 67 /// Lines before lints. 68 const START: &str = "name default meaning 69 ---- ------- -------"; 70 /// Lines between lints and lint groups. 71 const MIDDLE: &str = " 72 name sub-lints 73 ---- ---------"; 74 impl E<'_> { 75 /// Writes `self` into `stderr`. 76 pub(crate) fn into_exit_code(self) -> ExitCode { 77 let mut stderr = io::stderr().lock(); 78 match self { 79 Self::Start => writeln!(stderr, "rustc -Whelp doesn't contain '{START}' ignoring leading spaces"), 80 Self::UnexpectedLintLine(line) => writeln!( 81 stderr, 82 "rustc -Whelp contained the following line that is not the expected format of a lint: {}.", 83 String::from_utf8_lossy(line), 84 ), 85 Self::DuplicateLint(lint) => writeln!( 86 stderr, 87 "rustc -Whelp contained the lint '{}' more than once.", 88 super::as_str(lint) 89 ), 90 Self::MissingWarningLint => writeln!( 91 stderr, 92 "rustc -Whelp didn't contain a warn-by-default lint called 'warnings'." 93 ), 94 Self::NoAllowLints => writeln!( 95 stderr, 96 "rustc -Whelp didn't contain any allow-by-default lints." 97 ), 98 Self::NoWarnLints => writeln!( 99 stderr, 100 "rustc -Whelp didn't contain any warn-by-default lints except for 'warnings'." 101 ), 102 Self::NoDenyLints => writeln!( 103 stderr, 104 "rustc -Whelp didn't contain any deny-by-default lints." 105 ), 106 Self::Middle => writeln!( 107 stderr, 108 "rustc -Whelp doesn't contain '{MIDDLE}' ignoring leading spaces after the lints." 109 ), 110 Self::UnexpectedLintGroupLine(line) => writeln!( 111 stderr, 112 "rustc -Whelp contained the following line that is not the expected format of a lint group: {}.", 113 String::from_utf8_lossy(line), 114 ), 115 Self::DuplicateLintGroup(group) => { 116 writeln!( 117 stderr, 118 "rustc -Whelp contained multiple lint groups called '{}'.", 119 super::as_str(group) 120 ) 121 } 122 Self::LintSameNameAsLintGroup(group) => { 123 writeln!( 124 stderr, 125 "rustc -Whelp contained a lint and lint group both named '{}'.", 126 super::as_str(group) 127 ) 128 } 129 Self::LintGroupContainsDuplicateLint(group, lint) => writeln!( 130 stderr, 131 "rustc -Whelp contained the lint group '{}' which has the lint '{}' more than once.", 132 super::as_str(group), 133 super::as_str(lint), 134 ), 135 Self::LintGroupContainsUnknownLint(group, lint) => writeln!( 136 stderr, 137 "rustc -Whelp contained the lint group '{}' which has the unknown lint '{}'.", 138 super::as_str(group), 139 super::as_str(lint), 140 ), 141 Self::EmptyLintGroup(group) => { 142 writeln!( 143 stderr, 144 "rustc -Whelp contained the empty lint group '{}'.", 145 super::as_str(group), 146 ) 147 } 148 Self::NoLintGroups => writeln!( 149 stderr, 150 "rustc -Whelp didn't contain any lint groups." 151 ), 152 Self::End => writeln!(stderr, "rustc -Whelp did not have at least one empty line after the lint groups."), 153 }.map_or(ExitCode::FAILURE, |()| ExitCode::FAILURE) 154 } 155 } 156 /// Moves `line` to start at the first non-space. 157 #[expect( 158 clippy::arithmetic_side_effects, 159 clippy::indexing_slicing, 160 reason = "comments justifies correctness" 161 )] 162 fn skip_leading_space(line: &mut &[u8]) { 163 // The `usize` contained in the `Result` is at most `line.len()`, we indexing is fine. 164 *line = &line[line 165 .iter() 166 .try_fold(0, |idx, b| { 167 if *b == b' ' { 168 // `idx < line.len()`, so overflow is not possible. 169 Ok(idx + 1) 170 } else { 171 Err(idx) 172 } 173 }) 174 .map_or_else(convert::identity, convert::identity)..]; 175 } 176 /// [`Iterator`] of lines. 177 /// 178 /// In the event a `b'\n'` doesn't exist, `None` will be returned. This means in the event the last 179 /// "line" does not end with `b'\n'`, it won't be returned. 180 struct Lines<'a>(&'a [u8]); 181 impl<'a> Iterator for Lines<'a> { 182 type Item = &'a [u8]; 183 #[expect( 184 clippy::arithmetic_side_effects, 185 clippy::indexing_slicing, 186 reason = "comments justify correctness" 187 )] 188 fn next(&mut self) -> Option<Self::Item> { 189 self.0 190 .iter() 191 .try_fold(0, |idx, b| { 192 if *b == b'\n' { 193 Err(idx) 194 } else { 195 // `idx < self.0.len()`, so overflow is not possible. 196 Ok(idx + 1) 197 } 198 }) 199 .map_or_else( 200 |idx| { 201 // `idx <= self.0.len()`, so this won't `panic`. 202 let (val, rem) = self.0.split_at(idx); 203 // `rem` starts with a newline, so this won't `panic`. 204 self.0 = &rem[1..]; 205 Some(val) 206 }, 207 |_| None, 208 ) 209 } 210 } 211 /// Extracts the lint or lint group name from `line`. 212 /// 213 /// Returns `Some` iff a valid lint or lint group name is found ignoring leading spaces and 214 /// if there is non-spaces after. The first `slice` is the name and the second slice is 215 /// the remaining portion of `line` with leading spaces removed. 216 #[expect( 217 clippy::arithmetic_side_effects, 218 reason = "comment justifies correctness" 219 )] 220 fn get_lint_name(mut line: &[u8]) -> Option<(&[u8], &[u8])> { 221 skip_leading_space(&mut line); 222 line.iter() 223 .try_fold(0, |idx, b| { 224 if *b == b' ' { 225 Err(Some(idx)) 226 } else if *b == b'-' || b.is_ascii_alphanumeric() { 227 // `idx < line.len()`, so overflow is not possible. 228 Ok(idx + 1) 229 } else { 230 Err(None) 231 } 232 }) 233 .map_or_else( 234 |opt| { 235 opt.and_then(|idx| { 236 let (name, mut rem) = line.split_at(idx); 237 let len = rem.len(); 238 skip_leading_space(&mut rem); 239 if len == rem.len() { 240 None 241 } else { 242 Some((name, rem)) 243 } 244 }) 245 }, 246 |_| None, 247 ) 248 } 249 /// The lints. 250 /// 251 /// All `HashSet`s are non-empty with no overlap. `warn` doesn't contain `"warnings"`. 252 struct Lints<'a> { 253 /// `allow`-by-default lints. 254 allow: HashSet<&'a [u8]>, 255 /// `warn`-by-default lints. 256 warn: HashSet<&'a [u8]>, 257 /// `deny`-by-default lints. 258 deny: HashSet<&'a [u8]>, 259 } 260 /// `"warnings"`. 261 pub(crate) const WARNINGS: &str = "warnings"; 262 impl<'a> Lints<'a> { 263 /// Gets the lints from `lines` erring when there are duplicates, there is no `warn`-by-default 264 /// lint called `"warnings"`, or the lints are empty (ignoring the `"warnings"` lint). 265 #[expect( 266 clippy::arithmetic_side_effects, 267 clippy::indexing_slicing, 268 reason = "comments justify correctness" 269 )] 270 fn new(lines: &mut Lines<'a>) -> Result<Self, E<'a>> { 271 /// Get the lint from `line`. 272 /// 273 /// Returns `Some` iff a valid lint name is found after removing leading spaces. 274 /// The first `slice` is the name, and the second `slice` is the next "word". 275 fn get_lint(line: &[u8]) -> Option<(&[u8], &[u8])> { 276 get_lint_name(line).and_then(|(lint, rem)| { 277 rem.iter() 278 // `idx < rem.len()`, so overflow is not possible. 279 .try_fold(0, |idx, b| if *b == b' ' { Err(idx) } else { Ok(idx + 1) }) 280 // `idx <= rem.len()`, so this won't `panic`. 281 .map_or_else(|idx| Some((lint, &rem[..idx])), |_| None) 282 }) 283 } 284 let mut allow = HashSet::with_capacity(128); 285 let mut warn = HashSet::with_capacity(128); 286 let mut deny = HashSet::with_capacity(128); 287 lines 288 .try_fold((), |(), line| { 289 if line.is_empty() { 290 Err(None) 291 } else { 292 get_lint(line) 293 .ok_or(Some(E::UnexpectedLintLine(line))) 294 .and_then(|(lint, status)| match status { 295 b"allow" => { 296 if !allow.insert(lint) || warn.contains(lint) || deny.contains(lint) 297 { 298 Err(Some(E::DuplicateLint(lint))) 299 } else { 300 Ok(()) 301 } 302 } 303 b"warn" => { 304 if !warn.insert(lint) || allow.contains(lint) || deny.contains(lint) 305 { 306 Err(Some(E::DuplicateLint(lint))) 307 } else { 308 Ok(()) 309 } 310 } 311 b"deny" => { 312 if !deny.insert(lint) || allow.contains(lint) || warn.contains(lint) 313 { 314 Err(Some(E::DuplicateLint(lint))) 315 } else { 316 Ok(()) 317 } 318 } 319 _ => Err(Some(E::UnexpectedLintLine(line))), 320 }) 321 } 322 }) 323 .map_or_else( 324 |opt| { 325 opt.map_or_else( 326 || { 327 if warn.remove(WARNINGS.as_bytes()) { 328 if allow.is_empty() { 329 Err(E::NoAllowLints) 330 } else if warn.is_empty() { 331 Err(E::NoWarnLints) 332 } else if deny.is_empty() { 333 Err(E::NoDenyLints) 334 } else { 335 Ok(Self { allow, warn, deny }) 336 } 337 } else { 338 Err(E::MissingWarningLint) 339 } 340 }, 341 Err, 342 ) 343 }, 344 |()| Err(E::Middle), 345 ) 346 } 347 } 348 /// [`Iterator`] of values separated by commas and spaces. 349 /// 350 /// In the event a value comes after a comma, a single leading space is assumed to exist and will be removed. 351 struct Csv<'a>(&'a [u8]); 352 impl<'a> Iterator for Csv<'a> { 353 type Item = Result<&'a [u8], ()>; 354 #[expect( 355 clippy::arithmetic_side_effects, 356 reason = "comments justify correctness" 357 )] 358 fn next(&mut self) -> Option<Self::Item> { 359 /// `b", "`. 360 const COMMA_SPACE: &[u8; 2] = b", "; 361 (!self.0.is_empty()).then(|| { 362 match self.0.iter().try_fold(0, |idx, b| { 363 if *b == b',' { 364 Err(idx) 365 } else { 366 // `idx < self.0.len()`, so overflow is not possible. 367 Ok(idx + 1) 368 } 369 }) { 370 Ok(_) => { 371 let val = self.0; 372 self.0 = &[]; 373 Ok(val) 374 } 375 Err(idx) => { 376 // `idx <= self.0.len()`, so this won't `panic`. 377 let (val, rem) = self.0.split_at(idx); 378 rem.split_at_checked(COMMA_SPACE.len()) 379 .ok_or(()) 380 .and_then(|(fst, fst_rem)| { 381 if fst == COMMA_SPACE { 382 self.0 = fst_rem; 383 Ok(val) 384 } else { 385 Err(()) 386 } 387 }) 388 } 389 } 390 }) 391 } 392 } 393 /// Group of lints. 394 pub(crate) struct LintGroup<'a> { 395 /// Name of the group. 396 pub name: &'a [u8], 397 /// Lints that make up the group. 398 pub lints: HashSet<&'a [u8]>, 399 } 400 impl Eq for LintGroup<'_> {} 401 impl PartialEq for LintGroup<'_> { 402 fn eq(&self, other: &Self) -> bool { 403 self.name == other.name 404 } 405 } 406 impl Hash for LintGroup<'_> { 407 fn hash<H: Hasher>(&self, state: &mut H) { 408 self.name.hash(state); 409 } 410 } 411 impl PartialOrd for LintGroup<'_> { 412 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 413 Some(self.cmp(other)) 414 } 415 } 416 impl Ord for LintGroup<'_> { 417 fn cmp(&self, other: &Self) -> Ordering { 418 self.name.cmp(other.name) 419 } 420 } 421 /// Data from `rustc -Whelp`. 422 pub(crate) struct Data<'a> { 423 /// `allow`-by-default lints. 424 pub allow: Vec<&'a [u8]>, 425 /// `warn`-by-default lints. 426 pub warn: Vec<&'a [u8]>, 427 /// `deny`-by-default lints. 428 pub deny: Vec<&'a [u8]>, 429 /// Lint groups. 430 pub groups: Vec<LintGroup<'a>>, 431 } 432 impl<'a> Data<'a> { 433 /// Moves until the lints. 434 /// 435 /// Returns `true` iff the header was found. 436 fn move_to_lints(lines: &mut Lines<'_>) -> bool { 437 lines 438 .try_fold((), |(), mut line| { 439 skip_leading_space(&mut line); 440 if line == b"name default meaning" { 441 Err(()) 442 } else { 443 Ok(()) 444 } 445 }) 446 .map_or_else( 447 |()| { 448 lines.next().is_some_and(|mut line| { 449 skip_leading_space(&mut line); 450 line == b"---- ------- -------" 451 }) 452 }, 453 |()| false, 454 ) 455 } 456 /// Moves until the lint groups. 457 /// 458 /// Returns `true` iff the header was found. 459 fn move_to_lint_groups(lines: &mut Lines<'_>) -> bool { 460 lines 461 .try_fold((), |(), mut line| { 462 skip_leading_space(&mut line); 463 if line == b"name sub-lints" { 464 Err(()) 465 } else { 466 Ok(()) 467 } 468 }) 469 .map_or_else( 470 |()| { 471 lines.next().is_some_and(|mut line| { 472 skip_leading_space(&mut line); 473 line == b"---- ---------" 474 }) 475 }, 476 |()| false, 477 ) 478 } 479 /// Gets the lint groups from `lines` erring when there are duplicate lint group names or if a lint group 480 /// name is the name of a lint (with the exception of `"warnings"`), or if a group contains duplicate lints. 481 /// In the event `allow_undefined_lints` is `false`, every lint in a lint group must exist in `single_lints`. 482 fn get_lint_groups( 483 output: &mut Lines<'a>, 484 single_lints: &Lints<'_>, 485 allow_undefined_lints: bool, 486 ) -> Result<HashSet<LintGroup<'a>>, E<'a>> { 487 let mut groups = HashSet::with_capacity(16); 488 output 489 .try_fold((), |(), line| { 490 if line.is_empty() { 491 Err(None) 492 } else { 493 get_lint_name(line) 494 .ok_or(Some(E::UnexpectedLintGroupLine(line))) 495 .and_then(|(name, group_lints)| { 496 if name == WARNINGS.as_bytes() { 497 Ok(()) 498 } else if single_lints.allow.contains(name) 499 || single_lints.warn.contains(name) 500 || single_lints.deny.contains(name) 501 { 502 Err(Some(E::LintSameNameAsLintGroup(name))) 503 } else { 504 let mut lints = HashSet::with_capacity(32); 505 Csv(group_lints) 506 .try_fold((), |(), res| { 507 res.map_err(|()| Some(E::UnexpectedLintGroupLine(line))) 508 .and_then(|lint| { 509 if allow_undefined_lints 510 || single_lints.allow.contains(lint) 511 || single_lints.warn.contains(lint) 512 || single_lints.deny.contains(lint) 513 { 514 if lints.insert(lint) { 515 Ok(()) 516 } else { 517 Err(Some( 518 E::LintGroupContainsDuplicateLint( 519 name, lint, 520 ), 521 )) 522 } 523 } else { 524 Err(Some(E::LintGroupContainsUnknownLint( 525 name, lint, 526 ))) 527 } 528 }) 529 }) 530 .and_then(|()| { 531 if lints.is_empty() { 532 Err(Some(E::EmptyLintGroup(name))) 533 } else if groups.insert(LintGroup { name, lints }) { 534 Ok(()) 535 } else { 536 Err(Some(E::DuplicateLintGroup(name))) 537 } 538 }) 539 } 540 }) 541 } 542 }) 543 .map_or_else( 544 |opt| { 545 opt.map_or_else( 546 || { 547 if groups.is_empty() { 548 Err(E::NoLintGroups) 549 } else { 550 Ok(groups) 551 } 552 }, 553 Err, 554 ) 555 }, 556 |()| Err(E::End), 557 ) 558 } 559 /// Parses output and returns lints and lint groups. 560 pub(crate) fn new(output: &'a [u8], allow_undefined_lints: bool) -> Result<Self, E<'a>> { 561 let mut lines = Lines(output); 562 if Self::move_to_lints(&mut lines) { 563 Lints::new(&mut lines).and_then(|lints| { 564 if Self::move_to_lint_groups(&mut lines) { 565 Self::get_lint_groups(&mut lines, &lints, allow_undefined_lints).map( 566 |group_set| { 567 let mut allow = Vec::with_capacity(lints.allow.len()); 568 lints.allow.into_iter().fold((), |(), lint| { 569 allow.push(lint); 570 }); 571 allow.sort_unstable(); 572 let mut warn = Vec::with_capacity(lints.warn.len()); 573 lints.warn.into_iter().fold((), |(), lint| { 574 warn.push(lint); 575 }); 576 warn.sort_unstable(); 577 let mut deny = Vec::with_capacity(lints.deny.len()); 578 lints.deny.into_iter().fold((), |(), lint| { 579 deny.push(lint); 580 }); 581 deny.sort_unstable(); 582 let mut groups = Vec::with_capacity(group_set.len()); 583 group_set.into_iter().fold((), |(), group| { 584 groups.push(group); 585 }); 586 groups.sort_unstable(); 587 Self { 588 allow, 589 warn, 590 deny, 591 groups, 592 } 593 }, 594 ) 595 } else { 596 Err(E::Middle) 597 } 598 }) 599 } else { 600 Err(E::Start) 601 } 602 } 603 } 604 #[cfg(all(test, not(target_pointer_width = "16")))] 605 mod tests { 606 use super::{Data, E, io::Read as _}; 607 use std::fs::{self, File}; 608 #[expect( 609 clippy::assertions_on_constants, 610 reason = "want to pretty-print problematic file" 611 )] 612 #[expect(clippy::verbose_file_reads, reason = "want to lock file")] 613 #[expect(clippy::tests_outside_test_module, reason = "false positive")] 614 #[test] 615 fn outputs() { 616 let mut output = Vec::with_capacity(u16::MAX.into()); 617 assert!( 618 fs::read_dir("./outputs/").is_ok_and(|mut dir| { 619 dir.try_fold((), |(), ent_res| { 620 if ent_res.is_ok_and(|ent| { 621 File::options() 622 .read(true) 623 .open(ent.path()) 624 .is_ok_and(|mut file| { 625 file.lock_shared().is_ok_and(|()| { 626 output.clear(); 627 file.read_to_end(&mut output).is_ok_and(|_| { 628 // Release lock. 629 drop(file); 630 let file_name = ent.file_name(); 631 let file_name_bytes = file_name.as_encoded_bytes(); 632 Data::new(&output, false).map_or_else( 633 |e| match file_name_bytes { 634 b"1.34.0.txt" | b"1.34.1.txt" | b"1.34.2.txt" => { 635 assert_eq!( 636 e, 637 E::LintGroupContainsUnknownLint( 638 b"future-incompatible", 639 b"duplicate-matcher-binding-name" 640 ), 641 "1.34.0.txt, 1.34.1.txt, and 1.34.2.txt can't be parsed for a reason other than the expected reason" 642 ); 643 Data::new(&output, true).is_ok() 644 } 645 b"1.48.0.txt" => { 646 assert_eq!( 647 e, 648 E::LintGroupContainsUnknownLint( 649 b"rustdoc", 650 b"private-intra-doc-links" 651 ), 652 "1.48.0.txt can't be parsed for a reason other than the expected reason" 653 ); 654 Data::new(&output, true).is_ok() 655 } 656 _ => { 657 assert!( 658 false, 659 "{} cannot be parsed due to {e:?}.", 660 String::from_utf8_lossy(file_name_bytes), 661 ); 662 false 663 } 664 }, 665 |_| { 666 if matches!(file_name_bytes, b"1.34.0.txt" | b"1.34.1.txt" | b"1.34.2.txt" | b"1.48.0.txt") { 667 assert!(false, "{} shouldn't be parsable", String::from_utf8_lossy(file_name_bytes)); 668 false 669 } else { 670 true 671 } 672 }, 673 ) 674 }) 675 }) 676 }) 677 }) { 678 Ok(()) 679 } else { 680 Err(()) 681 } 682 }) 683 .is_ok() 684 }) 685 ); 686 } 687 }