Skip to content

Commit

Permalink
perf(rspack_core): replace HASH_PLACEHOLDER_REGEX with simple strin…
Browse files Browse the repository at this point in the history
…g parsing (#7907)

* perf(rspack_core): replace HASH_PLACEHOLDER_REGEX with simple string parsing

* feat: add replace_all_hash_pattern

* refactor: replace_all_hash_pattern

* improve

* fix

* improve

* solve conflicts
  • Loading branch information
shulaoda authored Oct 14, 2024
1 parent a5b74a0 commit 233e811
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 63 deletions.
58 changes: 32 additions & 26 deletions crates/rspack_core/src/options/filename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ use std::sync::Arc;
use std::sync::LazyLock;
use std::{borrow::Cow, convert::Infallible, ptr};

use regex::{Captures, NoExpand, Regex};
use regex::{NoExpand, Regex};
use rspack_error::error;
use rspack_macros::MergeFrom;
use rspack_util::atom::Atom;
use rspack_util::ext::CowExt;
use rspack_util::MergeFrom;

use crate::replace_all_hash_pattern;
use crate::{parse_resource, AssetInfo, PathData, ResourceParsedData};

pub static FILE_PLACEHOLDER: LazyLock<Regex> =
Expand All @@ -35,14 +36,11 @@ pub static RUNTIME_PLACEHOLDER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[runtime\]").expect("Should generate regex"));
pub static URL_PLACEHOLDER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[url\]").expect("Should generate regex"));
pub static HASH_PLACEHOLDER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[hash(:(\d*))?]").expect("Invalid regex"));
pub static CHUNK_HASH_PLACEHOLDER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[chunkhash(:(\d*))?]").expect("Invalid regex"));
pub static CONTENT_HASH_PLACEHOLDER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[contenthash(:(\d*))?]").expect("Invalid regex"));
pub static FULL_HASH_PLACEHOLDER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[fullhash(:(\d*))?]").expect("Invalid regex"));

pub static HASH_PLACEHOLDER: &str = "[hash]";
pub static FULL_HASH_PLACEHOLDER: &str = "[fullhash]";
pub static CHUNK_HASH_PLACEHOLDER: &str = "[chunkhash]";
pub static CONTENT_HASH_PLACEHOLDER: &str = "[contenthash]";

static DATA_URI_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^data:([^;,]+)").expect("Invalid regex"));
Expand Down Expand Up @@ -183,17 +181,22 @@ impl<F> FromStr for Filename<F> {
}
}

fn hash_len(hash: &str, caps: &Captures) -> usize {
#[inline]
fn hash_len(hash: &str, len: Option<usize>) -> usize {
let hash_len = hash.len();
caps
.get(2)
.and_then(|m| m.as_str().parse().ok())
.unwrap_or(hash_len)
.min(hash_len)
len.unwrap_or(hash_len).min(hash_len)
}

pub fn has_hash_placeholder(template: &str) -> bool {
HASH_PLACEHOLDER.is_match(template) || FULL_HASH_PLACEHOLDER.is_match(template)
for key in [HASH_PLACEHOLDER, FULL_HASH_PLACEHOLDER] {
let offset = key.len() - 1;
if let Some(start) = template.find(&key[..offset]) {
if template[start + offset..].find(']').is_some() {
return true;
}
}
}
false
}

impl<F> Filename<F> {
Expand Down Expand Up @@ -310,27 +313,29 @@ fn render_template(
asset_info.version = content_hash.to_string();
}
t = t.map(|t| {
CONTENT_HASH_PLACEHOLDER.replace_all(t, |caps: &Captures| {
let content_hash = &content_hash[..hash_len(content_hash, caps)];
replace_all_hash_pattern(t, CONTENT_HASH_PLACEHOLDER, |len| {
let hash: &str = &content_hash[..hash_len(content_hash, len)];
if let Some(asset_info) = asset_info.as_mut() {
asset_info.set_immutable(Some(true));
asset_info.set_content_hash(content_hash.to_owned());
asset_info.set_content_hash(hash.to_owned());
}
content_hash
hash
})
.map_or(Cow::Borrowed(t), Cow::Owned)
});
}
if let Some(hash) = options.hash {
for reg in [&HASH_PLACEHOLDER, &FULL_HASH_PLACEHOLDER] {
for key in [HASH_PLACEHOLDER, FULL_HASH_PLACEHOLDER] {
t = t.map(|t| {
reg.replace_all(t, |caps: &Captures| {
let hash = &hash[..hash_len(hash, caps)];
replace_all_hash_pattern(t, key, |len| {
let hash = &hash[..hash_len(hash, len)];
if let Some(asset_info) = asset_info.as_mut() {
asset_info.set_immutable(Some(true));
asset_info.set_full_hash(hash.to_owned());
}
hash
})
.map_or(Cow::Borrowed(t), Cow::Owned)
});
}
}
Expand All @@ -345,15 +350,16 @@ fn render_template(
}
if let Some(d) = chunk.rendered_hash.as_ref() {
t = t.map(|t| {
CHUNK_HASH_PLACEHOLDER.replace_all(t, |caps: &Captures| {
let hash = &**d;
let hash = &hash[..hash_len(hash, caps)];
let hash = &**d;
replace_all_hash_pattern(t, CHUNK_HASH_PLACEHOLDER, |len| {
let hash: &str = &hash[..hash_len(hash, len)];
if let Some(asset_info) = asset_info.as_mut() {
asset_info.set_immutable(Some(true));
asset_info.set_chunk_hash(hash.to_owned());
}
hash
})
.map_or(Cow::Borrowed(t), Cow::Owned)
});
}
}
Expand Down
121 changes: 95 additions & 26 deletions crates/rspack_core/src/utils/runtime.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::sync::LazyLock;
use std::borrow::Cow;

use cow_utils::CowUtils;
use indexmap::IndexMap;
use regex::{Captures, Regex};
use rustc_hash::FxHashMap as HashMap;
use rustc_hash::FxHashSet as HashSet;

use crate::{merge_runtime, EntryData, EntryOptions, Filename, RuntimeSpec};
use crate::{
merge_runtime, EntryData, EntryOptions, Filename, RuntimeSpec, CHUNK_HASH_PLACEHOLDER,
CONTENT_HASH_PLACEHOLDER, FULL_HASH_PLACEHOLDER, HASH_PLACEHOLDER,
CHUNK_HASH_PLACEHOLDER, CONTENT_HASH_PLACEHOLDER, FULL_HASH_PLACEHOLDER, HASH_PLACEHOLDER,
};

pub fn get_entry_runtime(
Expand Down Expand Up @@ -48,14 +48,83 @@ pub fn get_entry_runtime(
}
}

static HASH_REPLACERS: LazyLock<Vec<(&LazyLock<Regex>, &str)>> = LazyLock::new(|| {
vec![
(&HASH_PLACEHOLDER, "[hash]"),
(&FULL_HASH_PLACEHOLDER, "[fullhash]"),
(&CHUNK_HASH_PLACEHOLDER, "[chunkhash]"),
(&CONTENT_HASH_PLACEHOLDER, "[contenthash]"),
]
});
pub struct ExtractedHashPattern {
pub pattern: String,
pub len: Option<usize>,
}

/// Extract `[hash]` or `[hash:8]` in the template
pub fn extract_hash_pattern(pattern: &str, key: &str) -> Option<ExtractedHashPattern> {
let key_offset = key.len() - 1;
let start = pattern.find(&key[..key_offset])?;
let end = pattern[start + key_offset..].find(']')?;
let len = pattern[start + key_offset..start + key_offset + end]
.strip_prefix(':')
.and_then(|n| n.parse::<usize>().ok());

let pattern = &pattern[start..=start + key_offset + end];
Some(ExtractedHashPattern {
pattern: pattern.to_string(),
len,
})
}

/// Replace all `[hash]` or `[hash:8]` in the pattern
pub fn replace_all_hash_pattern<'a, F, S>(
pattern: &'a str,
key: &'a str,
mut hash: F,
) -> Option<String>
where
F: FnMut(Option<usize>) -> S,
S: AsRef<str>,
{
let offset = key.len() - 1;
let mut iter = pattern.match_indices(&key[..offset]).peekable();

iter.peek()?;

let mut ending = 0;
let mut result = String::with_capacity(pattern.len());

for (start, _) in iter {
if start < ending {
continue;
}

let start_offset = start + offset;
if let Some(end) = pattern[start_offset..].find(']') {
let end = start_offset + end;

let hash = hash(
pattern[start_offset..end]
.strip_prefix(':')
.and_then(|n| n.parse::<usize>().ok()),
);

result.push_str(&pattern[ending..start]);
result.push_str(hash.as_ref());

ending = end + 1;
}
}

if ending < pattern.len() {
result.push_str(&pattern[ending..]);
}

Some(result)
}

#[test]
fn test_replace_all_hash_pattern() {
let result = replace_all_hash_pattern("hello-[hash].js", "[hash]", |_| "abc");
assert_eq!(result, Some("hello-abc.js".to_string()));
let result = replace_all_hash_pattern("hello-[hash]-[hash:5].js", "[hash]", |n| {
&"abcdefgh"[..n.unwrap_or(8)]
});
assert_eq!(result, Some("hello-abcdefgh-abcde.js".to_string()));
}

pub fn get_filename_without_hash_length<F: Clone>(
filename: &Filename<F>,
Expand All @@ -64,19 +133,19 @@ pub fn get_filename_without_hash_length<F: Clone>(
let Some(template) = filename.template() else {
return (filename.clone(), hash_len_map);
};
let mut template = template.to_string();
for (reg, key) in HASH_REPLACERS.iter() {
template = reg
.replace_all(&template, |caps: &Captures| {
if let Some(hash_len) = match caps.get(2) {
Some(m) => m.as_str().parse().ok(),
None => None,
} {
hash_len_map.insert((*key).to_string(), hash_len);
}
key
})
.into_owned();
let mut template = Cow::Borrowed(template);
for key in [
HASH_PLACEHOLDER,
FULL_HASH_PLACEHOLDER,
CHUNK_HASH_PLACEHOLDER,
CONTENT_HASH_PLACEHOLDER,
] {
if let Some(p) = extract_hash_pattern(&template, key) {
if let Some(hash_len) = p.len {
hash_len_map.insert((*key).to_string(), hash_len);
}
template = Cow::Owned(template.cow_replace(&p.pattern, key).into_owned());
}
}
(Filename::from(template), hash_len_map)
(Filename::from(template.into_owned()), hash_len_map)
}
23 changes: 12 additions & 11 deletions crates/rspack_plugin_library/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@ version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
async-trait = { workspace = true }
regex = { workspace = true }
rspack_collections = { version = "0.1.0", path = "../rspack_collections" }
rspack_core = { version = "0.1.0", path = "../rspack_core" }
rspack_error = { version = "0.1.0", path = "../rspack_error" }
rspack_hash = { version = "0.1.0", path = "../rspack_hash" }
rspack_hook = { version = "0.1.0", path = "../rspack_hook" }
async-trait = { workspace = true }
regex = { workspace = true }
rspack_collections = { version = "0.1.0", path = "../rspack_collections" }
rspack_core = { version = "0.1.0", path = "../rspack_core" }
rspack_error = { version = "0.1.0", path = "../rspack_error" }
rspack_hash = { version = "0.1.0", path = "../rspack_hash" }
rspack_hook = { version = "0.1.0", path = "../rspack_hook" }
rspack_plugin_javascript = { version = "0.1.0", path = "../rspack_plugin_javascript" }
rspack_util = { version = "0.1.0", path = "../rspack_util" }
rustc-hash = { workspace = true }
serde_json = { workspace = true }
rspack_util = { version = "0.1.0", path = "../rspack_util" }
rustc-hash = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }

swc_core = { workspace = true, features = [
"__parser",
"__utils",
Expand All @@ -33,7 +35,6 @@ swc_core = { workspace = true, features = [
"base",
"ecma_quote",
] }
tracing = { workspace = true }

[package.metadata.cargo-shear]
ignored = ["tracing"]

2 comments on commit 233e811

@rspack-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📝 Benchmark detail: Open

Name Base (2024-10-14 d38945b) Current Change
10000_development-mode + exec 2.11 s ± 19 ms 2.12 s ± 34 ms +0.76 %
10000_development-mode_hmr + exec 662 ms ± 8.2 ms 666 ms ± 17 ms +0.50 %
10000_production-mode + exec 2.66 s ± 21 ms 2.67 s ± 49 ms +0.27 %
arco-pro_development-mode + exec 1.77 s ± 63 ms 1.8 s ± 74 ms +1.31 %
arco-pro_development-mode_hmr + exec 426 ms ± 2.6 ms 426 ms ± 0.94 ms +0.07 %
arco-pro_production-mode + exec 3.1 s ± 106 ms 3.1 s ± 76 ms +0.06 %
arco-pro_production-mode_generate-package-json-webpack-plugin + exec 3.13 s ± 85 ms 3.1 s ± 75 ms -1.00 %
threejs_development-mode_10x + exec 1.65 s ± 14 ms 1.65 s ± 22 ms +0.08 %
threejs_development-mode_10x_hmr + exec 788 ms ± 7.9 ms 792 ms ± 11 ms +0.47 %
threejs_production-mode_10x + exec 4.98 s ± 32 ms 5 s ± 24 ms +0.40 %

@rspack-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📝 Ran ecosystem CI: Open

suite result
modernjs ✅ success
_selftest ✅ success
rspress ✅ success
rslib ✅ success
rsbuild ✅ success
examples ✅ success
devserver ❌ failure

Please sign in to comment.