From 48594a75e6d1c178d6be80c01810d9e9a494f1b0 Mon Sep 17 00:00:00 2001 From: Bert Peters Date: Thu, 1 Dec 2022 11:28:59 +0100 Subject: [PATCH] Make parsers more robust --- 2022/src/common.rs | 81 ++++++++++++++++++++++++++++++++++++++++++++-- 2022/src/day01.rs | 10 ++---- 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/2022/src/common.rs b/2022/src/common.rs index 636e230..6eff6c8 100644 --- a/2022/src/common.rs +++ b/2022/src/common.rs @@ -1,7 +1,11 @@ //! Common helper utilities to all days use anyhow::Result; +use nom::error::ErrorKind; +use nom::error::ParseError; use nom::Finish; +use nom::IResult; +use nom::InputLength; use nom::Parser; /// Parse input from some nom parser and return as an anyhow result @@ -12,8 +16,79 @@ pub fn parse_input<'a, O>( input: &'a [u8], mut parser: impl Parser<&'a [u8], O, nom::error::Error<&'a [u8]>>, ) -> Result { - match parser.parse(input).finish() { - Ok((_, value)) => Ok(value), - Err(err) => anyhow::bail!("Failed to parse at: {err:?}"), + let (unparsed, output) = parser.parse(input).finish().map_err(|e| { + anyhow::anyhow!( + "Parser error {:?} to parse at {}", + e.code, + String::from_utf8_lossy(e.input) + ) + })?; + + if !unparsed.is_empty() { + Err(anyhow::anyhow!( + "Not all input consumed: {}", + String::from_utf8_lossy(unparsed) + )) + } else { + Ok(output) + } +} + +/// Applies a parser iteratively and reduces the results using the given function. Fails if the +/// embedded parser doesn't return at least one result. +/// +/// # Arguments +/// - `f`: the function to apply +/// - `g`: the function that combines the result o `f` with previous results +/// +/// This implementation is based on [`nom::multi::fold_many1`] with minor differences. If +/// successful, this should probably be upstreamed. +pub fn reduce_many1( + mut f: F, + mut g: impl FnMut(O, O) -> O, +) -> impl FnMut(I) -> IResult +where + I: Clone + InputLength, + E: ParseError, + F: Parser, +{ + // Cannot delegate to fold_many0 because that would make the function FnOnce rather than FnMut, + // since it would transfer ownership of the embedded parser to fold_many0. + move |i: I| { + let _i = i.clone(); + match f.parse(_i) { + Err(nom::Err::Error(_)) => { + Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Many1))) + } + Err(e) => Err(e), + Ok((i1, mut acc)) => { + let mut input = i1; + + loop { + let _input = input.clone(); + let len = input.input_len(); + match f.parse(_input) { + Err(nom::Err::Error(_)) => { + break; + } + Err(e) => return Err(e), + Ok((i, o)) => { + // infinite loop check: the parser must always consume + if i.input_len() == len { + return Err(nom::Err::Failure(E::from_error_kind( + i, + ErrorKind::Many1, + ))); + } + + acc = g(acc, o); + input = i; + } + } + } + + Ok((input, acc)) + } + } } } diff --git a/2022/src/day01.rs b/2022/src/day01.rs index 98e14df..e5d59c8 100644 --- a/2022/src/day01.rs +++ b/2022/src/day01.rs @@ -3,23 +3,19 @@ use std::ops::Add; use anyhow::Result; use nom::character::complete::newline; use nom::combinator::opt; -use nom::multi::fold_many1; use nom::multi::separated_list0; use nom::sequence::terminated; use nom::IResult; use crate::common::parse_input; +use crate::common::reduce_many1; fn parse_elf(input: &[u8]) -> IResult<&[u8], i32> { - fold_many1( - terminated(nom::character::complete::i32, newline), - || 0, - Add::add, - )(input) + reduce_many1(terminated(nom::character::complete::i32, newline), Add::add)(input) } fn parse_max(input: &[u8]) -> IResult<&[u8], i32> { - fold_many1(terminated(parse_elf, opt(newline)), || 0, Ord::max)(input) + reduce_many1(terminated(parse_elf, opt(newline)), Ord::max)(input) } pub fn part1(input: &[u8]) -> Result {