Skip to content

Commit

Permalink
feat: high level repodata access (#560)
Browse files Browse the repository at this point in the history
This PR introduces the concept of a `Gateway` into
`rattler_repodata_gateway`.

-------
Co-authored-by: Wolf Vollprecht <w.vollprecht@gmail.com>
  • Loading branch information
baszalmstra authored May 10, 2024
1 parent 96c969a commit 50dae59
Show file tree
Hide file tree
Showing 79 changed files with 4,938 additions and 1,144 deletions.
2 changes: 1 addition & 1 deletion .cargo/config
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ rustflags = [
"-Wfuture_incompatible",
"-Wnonstandard_style",
"-Wrust_2018_idioms",
]
]
8 changes: 4 additions & 4 deletions .github/workflows/python-bindings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
lfs: true
- uses: prefix-dev/setup-pixi@v0.6.0
with:
pixi-version: v0.13.0
pixi-version: v0.20.1
cache: true
manifest-path: py-rattler/pixi.toml
- uses: actions-rust-lang/setup-rust-toolchain@v1
Expand All @@ -44,9 +44,9 @@ jobs:
- name: Format and Lint
run: |
cd py-rattler
pixi run lint
pixi run fmt-check
pixi run -e test lint
pixi run -e test fmt-check
- name: Run tests
run: |
cd py-rattler
pixi run test
pixi run -e test test
2 changes: 1 addition & 1 deletion .github/workflows/rust-compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ env:
RUST_BACKTRACE: 1
RUSTFLAGS: "-D warnings"
CARGO_TERM_COLOR: always
DEFAULT_FEATURES: tokio,serde,reqwest,sparse,sysinfo,resolvo
DEFAULT_FEATURES: tokio,serde,reqwest,sparse,sysinfo,resolvo,gateway

jobs:
check-rustdoc-links:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ Cargo.lock
# pixi
.pixi/
pixi.lock

# Visual studio files
.vs/
11 changes: 10 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ clap = { version = "4.5.4", features = ["derive"] }
cmake = "0.1.50"
console = { version = "0.15.8", features = ["windows-console-colors"] }
criterion = "0.5"
dashmap = "5.5.3"
difference = "2.0.0"
digest = "0.10.7"
dirs = "5.0.1"
Expand All @@ -63,12 +64,14 @@ fslock = "0.2.1"
futures = "0.3.30"
futures-util = "0.3.30"
fxhash = "0.2.1"
generic-array = "0.14.4"
getrandom = { version = "0.2.14", default-features = false }
glob = "0.3.1"
google-cloud-auth = { version = "0.13.2", default-features = false}
google-cloud-auth = { version = "0.13.2", default-features = false }
hex = "0.4.3"
hex-literal = "0.4.1"
http = "1.1"
http-cache-semantics = "2.1.0"
humansize = "2.1.3"
humantime = "2.1.0"
indexmap = "2.2.6"
Expand All @@ -90,6 +93,7 @@ nom = "7.1.3"
num_cpus = "1.16.0"
once_cell = "1.19.0"
ouroboros = "0.18.3"
parking_lot = "0.12.1"
pathdiff = "0.2.1"
pep440_rs = { version = "0.5.0" }
pep508_rs = { version = "0.4.2" }
Expand All @@ -106,6 +110,7 @@ reqwest-middleware = "0.3.0"
reqwest-retry = "0.5.0"
resolvo = { version = "0.4.0" }
retry-policies = { version = "0.3.0", default-features = false }
rmp-serde = { version = "1.2.0" }
rstest = { version = "0.19.0" }
rstest_reuse = "0.6.0"
serde = { version = "1.0.198" }
Expand Down Expand Up @@ -141,6 +146,7 @@ tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", default-features = false }
tracing-test = { version = "0.2.4" }
trybuild = { version = "1.0.91" }
typed-path = { version = "0.8.0" }
url = { version = "2.5.0" }
uuid = { version = "1.8.0", default-features = false }
walkdir = "2.5.0"
Expand All @@ -149,3 +155,6 @@ zip = { version = "0.6.6", default-features = false }
zstd = { version = "0.13.1", default-features = false }

[patch.crates-io]

[profile.release]
debug = true
1 change: 1 addition & 0 deletions crates/file_url/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target
18 changes: 18 additions & 0 deletions crates/file_url/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "file_url"
version = "0.1.0"
edition.workspace = true
authors = ["Bas Zalmstra <zalmstra.bas@gmail.com>"]
description = "Helper functions to work with file:// urls"
categories.workspace = true
repository.workspace = true
license.workspace = true

[dependencies]
url = { workspace = true }
percent-encoding = { workspace = true }
itertools = { workspace = true }
typed-path = { workspace = true }

[dev-dependencies]
rstest = { workspace = true }
214 changes: 214 additions & 0 deletions crates/file_url/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
//! The URL crate parses `file://` URLs differently on Windows and other operating systems.
//! This crates provides functionality that tries to parse a `file://` URL as a path on all operating
//! systems. This is useful when you want to convert a `file://` URL to a path and vice versa.

use itertools::Itertools;
use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS};
use std::fmt::Write;
use std::path::PathBuf;
use std::str::FromStr;
use typed_path::{
Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix,
};
use url::{Host, Url};

/// Returns true if the specified segment is considered to be a Windows drive letter segment.
/// E.g. the segment `C:` or `C%3A` would be considered a drive letter segment.
fn is_windows_drive_letter_segment(segment: &str) -> Option<String> {
// Segment is a simple drive letter: X:
if let Some((drive_letter, ':')) = segment.chars().collect_tuple() {
if drive_letter.is_ascii_alphabetic() {
return Some(format!("{drive_letter}:\\"));
}
}

// Segment is a simple drive letter but the colon is percent escaped: E.g. X%3A
if let Some((drive_letter, '%', '3', 'a' | 'A')) = segment.chars().collect_tuple() {
if drive_letter.is_ascii_alphabetic() {
return Some(format!("{drive_letter}:\\"));
}
}

None
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
if url.scheme() != "file" {
return None;
}

let mut segments = url.path_segments()?;
let host = match url.host() {
None | Some(Host::Domain("localhost")) => None,
Some(host) => Some(host),
};

let (mut path, seperator) = if let Some(host) = host {
// A host is only present for Windows UNC paths
(format!("\\\\{host}\\"), "\\")
} else {
let first = segments.next()?;
if first.starts_with('.') {
// Relative file paths are not supported
return None;
}

match is_windows_drive_letter_segment(first) {
Some(drive_letter) => (drive_letter, "\\"),
None => (format!("/{first}/"), "/"),
}
};

for (idx, segment) in segments.enumerate() {
if idx > 0 {
path.push_str(seperator);
}
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
Ok(s) => path.push_str(&s),
_ => return None,
}
}

Some(PathBuf::from(path))
}

const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');

/// Whether the scheme is file:, the path has a single segment, and that segment
/// is a Windows drive letter
#[inline]
pub fn is_windows_drive_letter(segment: &str) -> bool {
segment.len() == 2 && starts_with_windows_drive_letter(segment)
}

fn starts_with_windows_drive_letter(s: &str) -> bool {
s.len() >= 2
&& (s.as_bytes()[0] as char).is_ascii_alphabetic()
&& matches!(s.as_bytes()[1], b':' | b'|')
&& (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
}

fn path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<String, NotAnAbsolutePath> {
let path = path.into();
let mut components = path.components();

let mut result = String::from("file://");
let host_start = result.len() + 1;

let root = components.next();
match root {
Some(Utf8TypedComponent::Windows(Utf8WindowsComponent::Prefix(ref p))) => match p.kind() {
Utf8WindowsPrefix::Disk(letter) | Utf8WindowsPrefix::VerbatimDisk(letter) => {
result.push('/');
result.push(letter);
result.push(':');
}
Utf8WindowsPrefix::UNC(server, share)
| Utf8WindowsPrefix::VerbatimUNC(server, share) => {
let host = Host::parse(server).map_err(|_err| NotAnAbsolutePath)?;
write!(result, "{host}").unwrap();
result.push('/');
result.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
}
_ => return Err(NotAnAbsolutePath),
},
Some(Utf8TypedComponent::Unix(Utf8UnixComponent::RootDir)) => {}
_ => return Err(NotAnAbsolutePath),
}

let mut path_only_has_prefix = true;
for component in components {
if matches!(
component,
Utf8TypedComponent::Windows(Utf8WindowsComponent::RootDir)
| Utf8TypedComponent::Unix(Utf8UnixComponent::RootDir)
) {
continue;
}

path_only_has_prefix = false;
let component = component.as_str();

result.push('/');
result.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
}

// A windows drive letter must end with a slash.
if result.len() > host_start
&& is_windows_drive_letter(&result[host_start..])
&& path_only_has_prefix
{
result.push('/');
}

Ok(result)
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NotAnAbsolutePath;

pub fn file_path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<Url, NotAnAbsolutePath> {
let url = path_to_url(path)?;
Ok(Url::from_str(&url).expect("url string must be a valid url"))
}

pub fn directory_path_to_url<'a>(
path: impl Into<Utf8TypedPath<'a>>,
) -> Result<Url, NotAnAbsolutePath> {
let mut url = path_to_url(path)?;
if !url.ends_with('/') {
url.push('/');
}
Ok(Url::from_str(&url).expect("url string must be a valid url"))
}

#[cfg(test)]
mod tests {
use rstest::rstest;
use std::path::PathBuf;
use url::Url;

#[rstest]
#[case("file:///home/bob/test-file.txt", Some("/home/bob/test-file.txt"))]
#[case("file:///C:/Test/Foo.txt", Some("C:\\Test\\Foo.txt"))]
#[case("file:///c:/temp/test-file.txt", Some("c:\\temp\\test-file.txt"))]
#[case("file:///c:\\temp\\test-file.txt", Some("c:\\temp\\test-file.txt"))]
// Percent encoding
#[case("file:///foo/ba%20r", Some("/foo/ba r"))]
#[case("file:///C%3A/Test/Foo.txt", Some("C:\\Test\\Foo.txt"))]
// Non file URLs
#[case("http://example.com", None)]
fn test_url_to_path(#[case] url: &str, #[case] expected: Option<&str>) {
let url = url.parse::<Url>().unwrap();
let expected = expected.map(PathBuf::from);
assert_eq!(super::url_to_path(&url), expected);
}

#[rstest]
#[case::win_drive("C:/", Some("file:///C:/"))]
#[case::unix_path("/root", Some("file:///root"))]
#[case::not_absolute("root", None)]
#[case::win_share("//servername/path", Some("file://servername/path"))]
#[case::dos_device_path("\\\\?\\C:\\Test\\Foo.txt", Some("file:///C:/Test/Foo.txt"))]
#[case::unsupported_guid_volumes(
"\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt",
None
)]
#[case::percent_encoding("//foo/ba r", Some("file://foo/ba%20r"))]
fn test_file_path_to_url(#[case] path: &str, #[case] expected: Option<&str>) {
let expected = expected.map(|s| s.to_string());
assert_eq!(
super::file_path_to_url(path).map(|u| u.to_string()).ok(),
expected
);
}
}
3 changes: 2 additions & 1 deletion crates/rattler-bin/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ once_cell = { workspace = true }
rattler = { path="../rattler", version = "0.24.0", default-features = false }
rattler_conda_types = { path="../rattler_conda_types", version = "0.22.1", default-features = false }
rattler_networking = { path="../rattler_networking", version = "0.20.5", default-features = false }
rattler_repodata_gateway = { path="../rattler_repodata_gateway", version = "0.19.11", default-features = false, features = ["sparse"] }
rattler_repodata_gateway = { path="../rattler_repodata_gateway", version = "0.19.11", default-features = false, features = ["gateway"] }
rattler_solve = { path="../rattler_solve", version = "0.21.1", default-features = false, features = ["resolvo", "libsolv_c"] }
rattler_virtual_packages = { path="../rattler_virtual_packages", version = "0.19.9", default-features = false }
reqwest = { workspace = true }
reqwest-middleware = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
itertools = { workspace = true }

[package.metadata.release]
# Dont publish the binary
Expand Down
Loading

0 comments on commit 50dae59

Please sign in to comment.