Skip to content

Commit de5a090

Browse files
committed
optimize SSD performance
1 parent 38a97c5 commit de5a090

16 files changed

Lines changed: 1959 additions & 90 deletions

File tree

src/cozip/src/lib.rs

Lines changed: 529 additions & 52 deletions
Large diffs are not rendered by default.

src/cozip_deflate/src/lib.rs

Lines changed: 251 additions & 29 deletions
Large diffs are not rendered by default.

src/cozip_desktop/locales/en_US.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ settings.gpu_slot_count = "GPU slot count"
144144
settings.gpu_submit_chunks = "GPU submit chunks"
145145
settings.gpu_min_chunk_kib = "GPU min chunk"
146146
settings.gpu_decode = "GPU decode"
147+
settings.parallel_read_threads = "Parallel read threads"
147148
settings.parallel_write_threads = "Parallel write threads"
148149
settings.force_gpu = "Force GPU"
149150
settings.no_pdeflate_options = "The selected archives are ZIP only, so no extra GPU decode settings are available."

src/cozip_desktop/locales/ja_JP.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ settings.gpu_slot_count = "GPU slot 数"
144144
settings.gpu_submit_chunks = "GPU submit chunks"
145145
settings.gpu_min_chunk_kib = "GPU 最小チャンク"
146146
settings.gpu_decode = "GPU 解凍"
147+
settings.parallel_read_threads = "並列読み込みスレッド数"
147148
settings.parallel_write_threads = "並列書き込みスレッド数"
148149
settings.force_gpu = "GPU 強制"
149150
settings.no_pdeflate_options = "選択されたアーカイブは ZIP のみなので、追加の GPU 解凍設定はありません。"

src/cozip_desktop/src/app.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,27 @@ impl CozipDesktopApp {
635635
match plan.format {
636636
ArchiveFormat::Zip => {
637637
form = form
638+
.child(self.settings_row(
639+
self.t("settings.parallel_read_threads"),
640+
self.stepper_control(
641+
"zip-read-threads-dec",
642+
"zip-read-threads-inc",
643+
plan.zip_options.parallel_read_threads.to_string(),
644+
|this, _, _| {
645+
if let Some(plan) = this.compress_plan_mut() {
646+
plan.zip_options.parallel_read_threads =
647+
plan.zip_options.parallel_read_threads.saturating_sub(1).max(1);
648+
}
649+
},
650+
|this, _, _| {
651+
if let Some(plan) = this.compress_plan_mut() {
652+
plan.zip_options.parallel_read_threads =
653+
plan.zip_options.parallel_read_threads.saturating_add(1).min(128);
654+
}
655+
},
656+
cx,
657+
),
658+
))
638659
.child(self.settings_row(
639660
self.t("settings.compression_level"),
640661
self.stepper_control(
@@ -680,6 +701,27 @@ impl CozipDesktopApp {
680701
ArchiveFormat::Cozip => {
681702
let opts = &plan.pdeflate_options;
682703
form = form
704+
.child(self.settings_row(
705+
self.t("settings.parallel_read_threads"),
706+
self.stepper_control(
707+
"pdeflate-read-threads-dec",
708+
"pdeflate-read-threads-inc",
709+
opts.parallel_read_threads.to_string(),
710+
|this, _, _| {
711+
if let Some(plan) = this.compress_plan_mut() {
712+
plan.pdeflate_options.parallel_read_threads =
713+
plan.pdeflate_options.parallel_read_threads.saturating_sub(1).max(1);
714+
}
715+
},
716+
|this, _, _| {
717+
if let Some(plan) = this.compress_plan_mut() {
718+
plan.pdeflate_options.parallel_read_threads =
719+
plan.pdeflate_options.parallel_read_threads.saturating_add(1).min(128);
720+
}
721+
},
722+
cx,
723+
),
724+
))
683725
.child(self.settings_row(
684726
self.t("settings.huffman"),
685727
self.toggle_control(

src/cozip_desktop/src/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ fn main() {
1717
Some(InitialScreen::Compress) | Some(InitialScreen::Decompress) => {
1818
size(px(620.0), px(300.0))
1919
}
20-
Some(InitialScreen::CompressSettings) | Some(InitialScreen::DecompressSettings) => {
20+
Some(InitialScreen::DecompressSettings) => {
2121
size(px(760.0), px(520.0))
2222
}
23+
Some(InitialScreen::CompressSettings) => {
24+
size(px(760.0), px(620.0))
25+
}
2326
None => size(px(1360.0), px(920.0)),
2427
};
2528
let bounds = Bounds::centered(None, window_size, cx);

src/cozip_pdeflate/src/lib.rs

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use std::collections::VecDeque;
22
use std::fs::{File as StdFile, OpenOptions};
3-
use std::io::{BufReader, BufWriter, Read, Write};
3+
use std::io::{self, BufReader, BufWriter, Read, Write};
44
use std::path::{Path, PathBuf};
55
use std::sync::Arc;
66
use std::time::Instant;
77

8+
use cozip_util::{ParallelFileReader, ParallelFileReaderOptions, ParallelReadHandle};
89
use thiserror::Error;
910
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
1011

@@ -137,6 +138,124 @@ impl<R: AsyncRead + Unpin> AsyncStream<R> {
137138
}
138139
}
139140

141+
struct ParallelPrefetchReader {
142+
reader: ParallelFileReader,
143+
file_len: u64,
144+
next_submit_offset: u64,
145+
request_size: usize,
146+
max_inflight_ops: usize,
147+
max_inflight_bytes: usize,
148+
inflight_bytes: usize,
149+
inflight: VecDeque<(ParallelReadHandle, usize)>,
150+
current: Vec<u8>,
151+
current_pos: usize,
152+
}
153+
154+
impl ParallelPrefetchReader {
155+
fn new(
156+
file: StdFile,
157+
chunk_size: usize,
158+
options: ParallelFileReaderOptions,
159+
) -> Result<Self, CozipDeflateError> {
160+
let file_len = file.metadata()?.len();
161+
let request_size = chunk_size.max(1);
162+
let max_inflight_ops = if options.max_inflight_ops > 0 {
163+
options.max_inflight_ops
164+
} else {
165+
let by_bytes = options.max_backlog_bytes.max(request_size) / request_size;
166+
by_bytes.clamp(64, 4096)
167+
};
168+
let max_inflight_bytes = options.max_backlog_bytes.max(request_size);
169+
let reader =
170+
ParallelFileReader::new(file, options).map_err(|error| io::Error::other(error.to_string()))?;
171+
let mut this = Self {
172+
reader,
173+
file_len,
174+
next_submit_offset: 0,
175+
request_size,
176+
max_inflight_ops,
177+
max_inflight_bytes,
178+
inflight_bytes: 0,
179+
inflight: VecDeque::new(),
180+
current: Vec::new(),
181+
current_pos: 0,
182+
};
183+
this.fill_prefetch()?;
184+
Ok(this)
185+
}
186+
187+
fn fill_prefetch(&mut self) -> io::Result<()> {
188+
while self.inflight.len() < self.max_inflight_ops
189+
&& self.inflight_bytes < self.max_inflight_bytes
190+
&& self.next_submit_offset < self.file_len
191+
{
192+
let remaining = self.file_len.saturating_sub(self.next_submit_offset);
193+
let mut len =
194+
usize::try_from(remaining.min(self.request_size as u64)).unwrap_or(self.request_size);
195+
let available_budget = self.max_inflight_bytes.saturating_sub(self.inflight_bytes);
196+
if len > available_budget && available_budget > 0 {
197+
len = available_budget.min(len);
198+
}
199+
if len == 0 {
200+
break;
201+
}
202+
let handle = self
203+
.reader
204+
.submit(self.next_submit_offset, len)
205+
.map_err(|error| io::Error::other(error.to_string()))?;
206+
self.inflight.push_back((handle, len));
207+
self.inflight_bytes = self.inflight_bytes.saturating_add(len);
208+
self.next_submit_offset = self.next_submit_offset.saturating_add(len as u64);
209+
}
210+
Ok(())
211+
}
212+
213+
fn finish(self) -> Result<(), CozipDeflateError> {
214+
self.reader
215+
.drain()
216+
.map_err(|error| io::Error::other(error.to_string()).into())
217+
}
218+
}
219+
220+
impl Read for ParallelPrefetchReader {
221+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
222+
if buf.is_empty() {
223+
return Ok(0);
224+
}
225+
226+
let mut written = 0usize;
227+
loop {
228+
if self.current_pos >= self.current.len() {
229+
let Some((handle, len)) = self.inflight.pop_front() else {
230+
return Ok(written);
231+
};
232+
self.inflight_bytes = self.inflight_bytes.saturating_sub(len);
233+
self.current = handle
234+
.recv()
235+
.map_err(|error| io::Error::other(error.to_string()))?;
236+
self.current_pos = 0;
237+
self.fill_prefetch()?;
238+
if self.current.is_empty() {
239+
if written > 0 {
240+
return Ok(written);
241+
}
242+
continue;
243+
}
244+
}
245+
246+
let available = self.current.len().saturating_sub(self.current_pos);
247+
let take = available.min(buf.len().saturating_sub(written));
248+
buf[written..written + take]
249+
.copy_from_slice(&self.current[self.current_pos..self.current_pos + take]);
250+
self.current_pos = self.current_pos.saturating_add(take);
251+
written = written.saturating_add(take);
252+
if written == buf.len() {
253+
return Ok(written);
254+
}
255+
}
256+
}
257+
}
258+
140259
#[derive(Debug, Clone, Copy, Default)]
141260
pub struct CoZipDeflateInitStats {
142261
pub gpu_context_init_ms: f64,
@@ -200,6 +319,10 @@ impl CoZipDeflate {
200319
self.options.parallel_write_threads
201320
}
202321

322+
pub fn parallel_read_threads(&self) -> usize {
323+
self.options.parallel_read_threads
324+
}
325+
203326
pub fn compress_stream<R: Read + Send, W: Write>(
204327
&self,
205328
reader: &mut R,
@@ -431,6 +554,22 @@ impl CoZipDeflate {
431554
.map_err(map_pdeflate_error)
432555
}
433556

557+
pub fn compress_file_parallel_read_with_options(
558+
&self,
559+
input_file: StdFile,
560+
output_file: StdFile,
561+
stream_options: StreamOptions,
562+
reader_options: ParallelFileReaderOptions,
563+
) -> Result<PDeflateStats, CozipDeflateError> {
564+
let chunk_size = self.options.chunk_size.max(1);
565+
let mut reader = ParallelPrefetchReader::new(input_file, chunk_size, reader_options)?;
566+
let mut writer = BufWriter::new(output_file);
567+
let stats = self.compress_stream_with_options(&mut reader, &mut writer, stream_options)?;
568+
writer.flush()?;
569+
reader.finish()?;
570+
Ok(stats)
571+
}
572+
434573
pub fn decompress_file_from_name<PIn: AsRef<Path>, POut: AsRef<Path>>(
435574
&self,
436575
input_path: PIn,

src/cozip_pdeflate/src/pdeflate/mod.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ pub struct PDeflateOptions {
204204
pub gpu_pipelined_submit_chunks: usize,
205205
pub gpu_min_chunk_size: usize,
206206
pub gpu_tail_stop_ratio: f32,
207+
pub parallel_read_threads: usize,
207208
pub parallel_write_threads: usize,
208209
pub huffman_encode_enabled: bool,
209210
pub compression_mode: PDeflateCompressionMode,
@@ -212,7 +213,7 @@ pub struct PDeflateOptions {
212213

213214
impl Default for PDeflateOptions {
214215
fn default() -> Self {
215-
let parallel_write_threads = thread::available_parallelism()
216+
let parallel_threads = thread::available_parallelism()
216217
.map(|value| value.get())
217218
.unwrap_or(1)
218219
.max(1);
@@ -236,7 +237,8 @@ impl Default for PDeflateOptions {
236237
gpu_pipelined_submit_chunks: 4,
237238
gpu_min_chunk_size: 64 * 1024,
238239
gpu_tail_stop_ratio: 1.0,
239-
parallel_write_threads,
240+
parallel_read_threads: parallel_threads,
241+
parallel_write_threads: parallel_threads,
240242
huffman_encode_enabled: false,
241243
compression_mode: PDeflateCompressionMode::Speed,
242244
hybrid_scheduler_policy: PDeflateHybridSchedulerPolicy::GlobalQueue,
@@ -1739,6 +1741,12 @@ fn validate_options(options: &PDeflateOptions) -> Result<(), PDeflateError> {
17391741
"gpu_table_sample_stride must be > 0",
17401742
));
17411743
}
1744+
if options.parallel_read_threads == 0 {
1745+
return Err(PDeflateError::InvalidOptions(
1746+
"parallel_read_threads must be > 0",
1747+
));
1748+
}
1749+
17421750
if options.parallel_write_threads == 0 {
17431751
return Err(PDeflateError::InvalidOptions(
17441752
"parallel_write_threads must be > 0",

0 commit comments

Comments
 (0)