Skip to main content

object_storage_proxy/parsers/
path.rs

1use std::{collections::HashMap, string::FromUtf8Error};
2
3use nom::{
4    IResult, Parser,
5    branch::alt,
6    bytes::complete::{tag, take_until, take_while1},
7    character::complete::char,
8    combinator::{eof, map, map_res, rest},
9    multi::separated_list0,
10    sequence::{preceded, separated_pair},
11};
12
13/// Parse an S3-style request path into `(bucket, object_path)`.
14///
15/// The expected format is `/<bucket>[/<object_path>]`.
16///
17/// * `/my-bucket` -> `("my-bucket", "/")`
18/// * `/my-bucket/prefix/key` -> `("my-bucket", "/prefix/key")`
19///
20/// # Errors
21///
22/// Returns a nom error if the input is empty or does not start with `/`.
23pub(crate) fn parse_path(input: &str) -> IResult<&str, (&str, &str)> {
24    let (_remaining, (_, bucket, rest)) = (
25        char('/'),
26        take_while1(|c| c != '/'),
27        alt((preceded(char('/'), rest), map(eof, |_| ""))),
28    )
29        .parse(input)?;
30
31    let rest_path = if rest.is_empty() {
32        "/"
33    } else {
34        // recover the slash before `rest`
35        &input[input.find(rest).expect("rest is a substring of input") - 1..]
36    };
37
38    Ok(("", (bucket, rest_path)))
39}
40
41fn decode_segment(input: &str) -> Result<String, FromUtf8Error> {
42    urlencoding::decode(input).map(|s| s.to_string())
43}
44
45fn key_value_pair(input: &str) -> IResult<&str, (String, String)> {
46    // First try the normal key=value form.
47    if input.contains('=')
48        && (input
49            .find('&')
50            .is_none_or(|a| input.find('=').is_some_and(|e| e < a)))
51    {
52        let (input, (key, value)) = (separated_pair(
53            map_res(take_until("="), decode_segment),
54            tag("="),
55            map_res(take_until_either("&"), decode_segment),
56        ))
57        .parse(input)?;
58        return Ok((input, (key, value)));
59    }
60    // Bare sub-resource key with no value (e.g. "delete", "uploads", "tagging").
61    let (input, key) = map_res(take_until_either("&"), decode_segment).parse(input)?;
62    Ok((input, (key, String::new())))
63}
64
65fn take_until_either<'a>(end: &'static str) -> impl FnMut(&'a str) -> IResult<&'a str, &'a str> {
66    move |input: &'a str| match input.find(end) {
67        Some(idx) => Ok((&input[idx..], &input[..idx])),
68        None => rest(input),
69    }
70}
71
72/// Parse a URL-encoded query string into a key/value map.
73///
74/// Keys and values are percent-decoded.  An empty input yields an empty map.
75///
76/// # Errors
77///
78/// Returns a nom error if any key/value pair cannot be parsed.
79pub fn parse_query(input: &str) -> IResult<&str, HashMap<String, String>> {
80    let (rest, pairs) = (separated_list0(char('&'), key_value_pair)).parse(input)?;
81    let map = pairs.into_iter().collect::<HashMap<_, _>>();
82    Ok((rest, map))
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    #[test]
90    fn test_parse_path_with_bucket_and_path() {
91        let input = "/bucket_name/some/path";
92        let result = parse_path(input);
93        assert_eq!(result, Ok(("", ("bucket_name", "/some/path"))));
94    }
95
96    #[test]
97    fn test_parse_path_with_bucket_only() {
98        let input = "/bucket_name";
99        let result = parse_path(input);
100        assert_eq!(result, Ok(("", ("bucket_name", "/"))));
101    }
102
103    #[test]
104    fn test_parse_path_with_empty_input() {
105        let input = "";
106        let result = parse_path(input);
107        assert!(result.is_err());
108    }
109
110    #[test]
111    fn test_parse_path_with_no_leading_slash() {
112        let input = "bucket_name/some/path";
113        let result = parse_path(input);
114        assert!(result.is_err());
115    }
116
117    #[test]
118    fn test_parse_path_with_trailing_slash() {
119        let input = "/bucket_name/";
120        let result = parse_path(input);
121        assert_eq!(result, Ok(("", ("bucket_name", "/"))));
122    }
123
124    #[test]
125    fn test_parse_path_with_multiple_slashes_in_path() {
126        let input = "/bucket_name/some//path";
127        let result = parse_path(input);
128        assert_eq!(result, Ok(("", ("bucket_name", "/some//path"))));
129    }
130
131    #[test]
132    fn test_parse_path_with_special_characters_in_bucket() {
133        let input = "/bucket-name_123/some/path";
134        let result = parse_path(input);
135        assert_eq!(result, Ok(("", ("bucket-name_123", "/some/path"))));
136    }
137
138    #[test]
139    fn test_parse_path_with_special_characters_in_path() {
140        let input = "/bucket_name/some/path-with_special.chars";
141        let result = parse_path(input);
142        assert_eq!(
143            result,
144            Ok(("", ("bucket_name", "/some/path-with_special.chars")))
145        );
146    }
147
148    #[test]
149    fn test_parse_query_nom_urlencoded() {
150        let input = "name=John%20Doe&path=%2Fusr%2Fbin&lang=Rust%26C%2B%2B";
151        let (_rest, map) = parse_query(input).unwrap();
152        assert_eq!(map.get("name").unwrap(), "John Doe");
153        assert_eq!(map.get("path").unwrap(), "/usr/bin");
154        assert_eq!(map.get("lang").unwrap(), "Rust&C++");
155    }
156}