Upgrade stringdex to 0.0.3

This commit is contained in:
Yotam Ofek 2025-11-13 10:14:24 +02:00
parent 5dbf4069dc
commit e921e28c7a
3 changed files with 53 additions and 44 deletions

View file

@ -1289,7 +1289,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
dependencies = [
"libc",
"windows-sys 0.52.0",
"windows-sys 0.60.2",
]
[[package]]
@ -2155,7 +2155,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [
"cfg-if",
"windows-targets 0.52.6",
"windows-targets 0.53.3",
]
[[package]]
@ -4907,7 +4907,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.52.0",
"windows-sys 0.61.2",
]
[[package]]
@ -5275,9 +5275,9 @@ dependencies = [
[[package]]
name = "stringdex"
version = "0.0.2"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18b3bd4f10d15ef859c40291769f0d85209de6b0f1c30713ff9cdf45ac43ea36"
checksum = "556a6126952cb2f5150057c98a77cc6c771027dea2825bf7fa03d3d638b0a4f8"
dependencies = [
"stacker",
]

View file

@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
smallvec = "1.8.1"
stringdex = "=0.0.2"
stringdex = "=0.0.3"
tempfile = "3"
threadpool = "1.8.1"
tikv-jemalloc-sys = { version = "0.6.1", optional = true, features = ['override_allocator_on_supported_platforms'] }

View file

@ -3,7 +3,9 @@ mod serde;
use std::collections::BTreeSet;
use std::collections::hash_map::Entry;
use std::io;
use std::path::Path;
use std::string::FromUtf8Error;
use ::serde::de::{self, Deserializer, Error as _};
use ::serde::ser::{SerializeSeq, Serializer};
@ -95,21 +97,22 @@ impl SerializedSearchIndex {
) -> Result<(), Error> {
let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js"));
let column_path = doc_root.join(format!("search.index/{column_name}/"));
let mut consume = |_, cell: &[u8]| {
column.push(String::from_utf8(cell.to_vec())?);
Ok::<_, FromUtf8Error>(())
};
stringdex_internals::read_data_from_disk_column(
root_path,
column_name.as_bytes(),
column_path.clone(),
&mut |_id, item| {
column.push(String::from_utf8(item.to_vec())?);
Ok(())
},
)
.map_err(
|error: stringdex_internals::ReadDataError<Box<dyn std::error::Error>>| Error {
file: column_path,
error: format!("failed to read column from disk: {error}"),
},
&mut consume,
)
.map_err(|error| Error {
file: column_path,
error: format!("failed to read column from disk: {error}"),
})
}
fn perform_read_serde(
resource_suffix: &str,
@ -119,25 +122,26 @@ impl SerializedSearchIndex {
) -> Result<(), Error> {
let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js"));
let column_path = doc_root.join(format!("search.index/{column_name}/"));
let mut consume = |_, cell: &[u8]| {
if cell.is_empty() {
column.push(None);
} else {
column.push(Some(serde_json::from_slice(cell)?));
}
Ok::<_, serde_json::Error>(())
};
stringdex_internals::read_data_from_disk_column(
root_path,
column_name.as_bytes(),
column_path.clone(),
&mut |_id, item| {
if item.is_empty() {
column.push(None);
} else {
column.push(Some(serde_json::from_slice(item)?));
}
Ok(())
},
)
.map_err(
|error: stringdex_internals::ReadDataError<Box<dyn std::error::Error>>| Error {
file: column_path,
error: format!("failed to read column from disk: {error}"),
},
&mut consume,
)
.map_err(|error| Error {
file: column_path,
error: format!("failed to read column from disk: {error}"),
})
}
fn perform_read_postings(
resource_suffix: &str,
@ -147,23 +151,28 @@ impl SerializedSearchIndex {
) -> Result<(), Error> {
let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js"));
let column_path = doc_root.join(format!("search.index/{column_name}/"));
fn consumer(
column: &mut Vec<Vec<Vec<u32>>>,
) -> impl FnMut(u32, &[u8]) -> io::Result<()> {
|_, cell| {
let mut postings = Vec::new();
encode::read_postings_from_string(&mut postings, cell);
column.push(postings);
Ok(())
}
}
stringdex_internals::read_data_from_disk_column(
root_path,
column_name.as_bytes(),
column_path.clone(),
&mut |_id, buf| {
let mut postings = Vec::new();
encode::read_postings_from_string(&mut postings, buf);
column.push(postings);
Ok(())
},
)
.map_err(
|error: stringdex_internals::ReadDataError<Box<dyn std::error::Error>>| Error {
file: column_path,
error: format!("failed to read column from disk: {error}"),
},
&mut consumer(column),
)
.map_err(|error| Error {
file: column_path,
error: format!("failed to read column from disk: {error}"),
})
}
assert_eq!(names.len(), path_data.len());
@ -1055,12 +1064,12 @@ impl Serialize for TypeData {
let mut buf = Vec::new();
encode::write_postings_to_string(&self.inverted_function_inputs_index, &mut buf);
let mut serialized_result = Vec::new();
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result);
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result).unwrap();
seq.serialize_element(&str::from_utf8(&serialized_result).unwrap())?;
buf.clear();
serialized_result.clear();
encode::write_postings_to_string(&self.inverted_function_output_index, &mut buf);
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result);
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result).unwrap();
seq.serialize_element(&str::from_utf8(&serialized_result).unwrap())?;
if self.search_unbox {
seq.serialize_element(&1)?;