Skip to main content

object_storage_proxy/parsers/
path.rs

1use std::{collections::HashMap, string::FromUtf8Error};
2
3use nom::{
4    IResult, Parser,
5    branch::alt,
6    bytes::complete::{tag, take_until, take_while1},
7    character::complete::char,
8    combinator::{eof, map, map_res, rest},
9    multi::separated_list0,
10    sequence::{preceded, separated_pair},
11};
12
13/// Parse an S3-style request path into `(bucket, object_path)`.
14///
15/// The expected format is `/<bucket>[/<object_path>]`.
16///
17/// * `/my-bucket` → `("my-bucket", "/")`
18/// * `/my-bucket/prefix/key` → `("my-bucket", "/prefix/key")`
19///
20/// # Errors
21///
22/// Returns a nom error if the input is empty or does not start with `/`.
23pub(crate) fn parse_path(input: &str) -> IResult<&str, (&str, &str)> {
24    let (_remaining, (_, bucket, rest)) = (
25        char('/'),
26        take_while1(|c| c != '/'),
27        alt((preceded(char('/'), rest), map(eof, |_| ""))),
28    )
29        .parse(input)?;
30
31    let rest_path = if rest.is_empty() {
32        "/"
33    } else {
34        // recover the slash before `rest`
35        &input[input.find(rest).expect("rest is a substring of input") - 1..]
36    };
37
38    Ok(("", (bucket, rest_path)))
39}
40
41fn decode_segment(input: &str) -> Result<String, FromUtf8Error> {
42    urlencoding::decode(input).map(|s| s.to_string())
43}
44
45fn key_value_pair(input: &str) -> IResult<&str, (String, String)> {
46    let (input, (key, value)) = (separated_pair(
47        map_res(take_until("="), decode_segment),
48        tag("="),
49        map_res(take_until_either("&"), decode_segment),
50    ))
51    .parse(input)?;
52    Ok((input, (key, value)))
53}
54
55fn take_until_either<'a>(end: &'static str) -> impl FnMut(&'a str) -> IResult<&'a str, &'a str> {
56    move |input: &'a str| match input.find(end) {
57        Some(idx) => Ok((&input[idx..], &input[..idx])),
58        None => rest(input),
59    }
60}
61
62/// Parse a URL-encoded query string into a key/value map.
63///
64/// Keys and values are percent-decoded.  An empty input yields an empty map.
65///
66/// # Errors
67///
68/// Returns a nom error if any key/value pair cannot be parsed.
69pub fn parse_query(input: &str) -> IResult<&str, HashMap<String, String>> {
70    let (rest, pairs) = (separated_list0(char('&'), key_value_pair)).parse(input)?;
71    let map = pairs.into_iter().collect::<HashMap<_, _>>();
72    Ok((rest, map))
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn test_parse_path_with_bucket_and_path() {
81        let input = "/bucket_name/some/path";
82        let result = parse_path(input);
83        assert_eq!(result, Ok(("", ("bucket_name", "/some/path"))));
84    }
85
86    #[test]
87    fn test_parse_path_with_bucket_only() {
88        let input = "/bucket_name";
89        let result = parse_path(input);
90        assert_eq!(result, Ok(("", ("bucket_name", "/"))));
91    }
92
93    #[test]
94    fn test_parse_path_with_empty_input() {
95        let input = "";
96        let result = parse_path(input);
97        assert!(result.is_err());
98    }
99
100    #[test]
101    fn test_parse_path_with_no_leading_slash() {
102        let input = "bucket_name/some/path";
103        let result = parse_path(input);
104        assert!(result.is_err());
105    }
106
107    #[test]
108    fn test_parse_path_with_trailing_slash() {
109        let input = "/bucket_name/";
110        let result = parse_path(input);
111        assert_eq!(result, Ok(("", ("bucket_name", "/"))));
112    }
113
114    #[test]
115    fn test_parse_path_with_multiple_slashes_in_path() {
116        let input = "/bucket_name/some//path";
117        let result = parse_path(input);
118        assert_eq!(result, Ok(("", ("bucket_name", "/some//path"))));
119    }
120
121    #[test]
122    fn test_parse_path_with_special_characters_in_bucket() {
123        let input = "/bucket-name_123/some/path";
124        let result = parse_path(input);
125        assert_eq!(result, Ok(("", ("bucket-name_123", "/some/path"))));
126    }
127
128    #[test]
129    fn test_parse_path_with_special_characters_in_path() {
130        let input = "/bucket_name/some/path-with_special.chars";
131        let result = parse_path(input);
132        assert_eq!(
133            result,
134            Ok(("", ("bucket_name", "/some/path-with_special.chars")))
135        );
136    }
137
138    #[test]
139    fn test_parse_query_nom_urlencoded() {
140        let input = "name=John%20Doe&path=%2Fusr%2Fbin&lang=Rust%26C%2B%2B";
141        let (_rest, map) = parse_query(input).unwrap();
142        assert_eq!(map.get("name").unwrap(), "John Doe");
143        assert_eq!(map.get("path").unwrap(), "/usr/bin");
144        assert_eq!(map.get("lang").unwrap(), "Rust&C++");
145    }
146}