Files
rust-lib/src/nom/combinators.rs

215 lines
6.3 KiB
Rust
Raw Normal View History

use nom::IResult;
use nom::bytes::complete::take_while_m_n;
use nom::character::complete::char;
use nom::character::complete::multispace0;
use nom::combinator::eof;
use nom::error::ParseError;
use nom::sequence::delimited;
use nom::sequence::terminated;
use nom::{Input, Parser};
2024-06-22 17:19:55 +02:00
/// Trim leading and trailing whitespace from the input Parser
/// - Parameters
/// - `inner`: The parser to trim
/// - Returns: A parser that trims leading and trailing whitespace from the input and then runs the value from the inner parser
pub fn trim<I, O, F, E: ParseError<I>>(inner: F) -> impl Parser<I, Output = O, Error = E>
2024-06-22 17:19:55 +02:00
where
I: Input,
F: Parser<I, Output = O, Error = E>,
<I as Input>::Item: nom::AsChar,
2024-06-22 17:19:55 +02:00
{
delimited(multispace0, inner, multispace0)
}
/// Parse a parenthesized expression. This parser will parse an expression that is surrounded by parentheses
/// and will trim the whitespace surrounding the expression.
/// - Parameters
/// - `inner`: The parser to run inside the parentheses
/// - Returns: A parser that parses a parenthesized expression
pub fn parenthesized<I, O, F, E: ParseError<I>>(inner: F) -> impl Parser<I, Output = O, Error = E>
2024-06-22 17:19:55 +02:00
where
I: Input,
F: Parser<I, Output = O, Error = E>,
<I as Input>::Item: nom::AsChar,
2024-06-22 17:19:55 +02:00
{
2024-07-17 12:48:09 +02:00
delimited(char('('), inner, char(')'))
2024-06-22 17:19:55 +02:00
}
/// Take where the predicate is true and the length is exactly `n`
/// - Parameters
/// - `n`: The length of the string to take
/// - `predicate`: The predicate to call to validate the input
/// - Returns: A parser that takes `n` characters from the input
pub fn take_where<F, I>(n: usize, predicate: F) -> impl FnMut(I) -> IResult<I, I>
2024-06-22 17:19:55 +02:00
where
I: Input,
F: Fn(<I as Input>::Item) -> bool,
2024-06-22 17:19:55 +02:00
{
take_while_m_n(n, n, predicate)
}
/// Parse the inner parser and then the end of the input.
/// Very useful for ensuring that the entire input is consumed.
/// - Parameters
/// - `inner`: The parser to run
/// - Returns: A parser that runs the inner parser and then the end of the input
/// # Example
/// ```
/// use lib::nom::combinators::exhausted;
/// use nom::bytes::complete::{tag};
/// use nom::Parser;
///
/// let input = "test";
/// let (remaining, result) = exhausted(tag::<&str, &str, nom::error::Error<&str>>("test")).parse(input).unwrap();
/// assert_eq!(remaining, "");
/// assert_eq!(result, "test");
/// ```
/// - Fails if the input is not exhausted
/// ```
/// use lib::nom::combinators::exhausted;
/// use nom::bytes::complete::{tag};
/// use nom::Parser;
///
/// let input = "test";
/// assert!(exhausted(tag::<&str, &str, nom::error::Error<&str>>("tes")).parse(input).is_err());
/// ```
pub fn exhausted<F, I, O, E: ParseError<I>>(inner: F) -> impl Parser<I, Output = O, Error = E>
2024-06-22 17:19:55 +02:00
where
I: Input,
F: Parser<I, Output = O, Error = E>,
2024-06-22 17:19:55 +02:00
{
terminated(inner, eof)
}
#[cfg(test)]
2024-06-22 17:19:55 +02:00
mod tests {
2024-07-17 12:48:09 +02:00
use super::*;
use nom::bytes::complete::take_while;
2024-07-17 12:48:09 +02:00
2024-06-22 17:19:55 +02:00
#[test]
fn test_trim_both_sides() {
let input = " test ";
let (remaining, result) = trim(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.unwrap();
2024-06-22 17:19:55 +02:00
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_trim_leading() {
let input = " test";
let (remaining, result) = trim(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.unwrap();
2024-06-22 17:19:55 +02:00
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_trim_trailing() {
let input = "test ";
let (remaining, result) = trim(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.unwrap();
2024-06-22 17:19:55 +02:00
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_trim_no_trim() {
let input = "test";
let (remaining, result) = trim(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.unwrap();
2024-06-22 17:19:55 +02:00
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_parenthesized() {
let input = "(test)";
let (remaining, result) = parenthesized(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.unwrap();
2024-06-22 17:19:55 +02:00
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_parenthesized_parse_until_end() {
let input = "(test)";
assert!(
parenthesized::<&str, &str, _, nom::error::Error<&str>>(take_while(|_| true))
.parse(input)
.is_err()
);
2024-06-22 17:19:55 +02:00
}
#[test]
fn test_take_where() {
let input = "test";
let (remaining, result) = take_where(4, |c: char| c.is_ascii_alphabetic())(input).unwrap();
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_take_where_not_enough() {
let input = "tes";
assert!(take_where(4, |c: char| c.is_ascii_alphabetic())(input).is_err());
}
#[test]
fn test_take_where_too_much() {
let input = "testing";
assert_eq!(
take_where(4, |c: char| c.is_ascii_alphabetic()).parse(input),
2024-06-22 17:19:55 +02:00
Ok(("ing", "test"))
);
}
#[test]
fn test_take_where_predicate_false() {
let input = "test";
assert!(
take_where(4, |c: char| c.is_ascii_digit())
.parse(input)
.is_err()
);
2024-06-22 17:19:55 +02:00
}
#[test]
fn test_exhausted() {
let input = "test";
let (remaining, result) = exhausted(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.unwrap();
2024-06-22 17:19:55 +02:00
assert_eq!(remaining, "");
assert_eq!(result, "test");
}
#[test]
fn test_exhausted_not_exhausted() {
let input = "test ";
assert!(
exhausted(take_where(4, |c: char| c.is_ascii_alphabetic()))
.parse(input)
.is_err()
);
2024-06-22 17:19:55 +02:00
}
2024-07-02 13:24:16 +02:00
#[test]
fn test_exhausted_tuple() {
let input = "test";
let (remaining, result) = exhausted((
2024-07-02 13:24:16 +02:00
take_where(3, |c: char| c.is_ascii_alphabetic()),
take_while(|c: char| c.is_ascii_alphabetic()),
))
.parse(input)
2024-07-02 13:24:16 +02:00
.unwrap();
assert_eq!(remaining, "");
assert_eq!(result, ("tes", "t"));
}
2024-06-22 17:19:55 +02:00
}