From cc3736727e2c2cc1fa94e62896b6b3954676fe47 Mon Sep 17 00:00:00 2001 From: wenxuanjun Date: Sun, 12 May 2024 03:10:01 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20refactor:=20Replace=20`image`=20?= =?UTF-8?q?with=20`resize`=20and=20`zune-jpeg`=20to=20reduce=20package=20s?= =?UTF-8?q?ize?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit πŸ”§ chore: Update version in Cargo.toml to 0.4.1 πŸ”§ chore: Update CHANGELOG.md with new version details πŸ”§ chore: Replace `jpeg-decoder` and `image` dependencies with `rgb`, `zune-jpeg`, and `resize` πŸ”§ chore: Update `classifier.rs` to use `zune-jpeg` and `resize` for image processing πŸ”§ chore: Update `lib.rs` and `main.rs` to use new `ModelChannels` and `ResizeParam` types The changes were made to reduce the package size by replacing the `image` library with `resize` and `zune-jpeg`. This also involved updating the code to use these new libraries for image processing. The version was updated in `Cargo.toml` and `CHANGELOG.md` to reflect these changes. --- CHANGELOG.md | 5 ++ Cargo.lock | 80 +++++++------------------------ Cargo.toml | 27 ++++++----- src/classifier.rs | 119 ++++++++++++++++++++++++++++++++-------------- src/lib.rs | 2 +- src/main.rs | 7 ++- 6 files changed, 127 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 103f91b..98c69a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## [0.4.1] - 2024-05-12 + +- ε°† `image` ζ›Ώζ’ζˆ `resize` ε’Œ `zune-jpeg` δ»₯ε‡ε°εŒ…δ½“η§― + ## [0.4.0] - 2024-05-11 - ζ–°ε’žζ”―ζŒε€šδΈͺζžΆζž„οΌˆ`x86_64`、`aarch64`οΌ‰οΌˆ`Windows`、`Linux`、`macOS`οΌ‰ @@ -22,6 +26,7 @@ - Initial release - 在 Windows ε’Œ Linux δΈŠι€šθΏ‡ζ΅‹θ―• +[0.4.1]: https://github.com/ShanghaitechGeekPie/net-loginer/releases/tag/v0.4.1 [0.4.0]: https://github.com/ShanghaitechGeekPie/net-loginer/releases/tag/v0.4.0 [0.3.1]: https://github.com/ShanghaitechGeekPie/net-loginer/releases/tag/v0.3.1 [0.3.0]: https://github.com/ShanghaitechGeekPie/net-loginer/releases/tag/v0.3.0 diff --git a/Cargo.lock b/Cargo.lock index 95911a9..3233da4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -180,25 +180,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -241,12 +222,6 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" -[[package]] -name = "either" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" - [[package]] name = "errno" version = "0.3.9" @@ -377,19 +352,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "image" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" -dependencies = [ - "bytemuck", - "byteorder", - "num-traits", - "zune-core", - "zune-jpeg", -] - [[package]] name = "inout" version = "0.1.3" @@ -414,15 +376,6 @@ dependencies = [ "libc", ] -[[package]] -name = "jpeg-decoder" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" -dependencies = [ - "rayon", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -499,22 +452,23 @@ dependencies = [ [[package]] name = "net-loginer" -version = "0.1.0" +version = "0.4.1" dependencies = [ "anyhow", "dotenv", "get_if_addrs", - "image", - "jpeg-decoder", "log", "native-tls", "once_cell", "onnxruntime", + "resize", + "rgb", "serde_json", "simple_logger", "thiserror", "ureq", "url", + "zune-jpeg", ] [[package]] @@ -711,32 +665,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] -name = "rayon" -version = "1.10.0" +name = "redox_syscall" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "either", - "rayon-core", + "bitflags 1.3.2", ] [[package]] -name = "rayon-core" -version = "1.12.1" +name = "resize" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "c3e29f584c07a8396c5e2eee0bd8d7aec5c8d9e0a3c2333806fd2ec1d2a5b080" dependencies = [ - "crossbeam-deque", - "crossbeam-utils", + "rgb", ] [[package]] -name = "redox_syscall" -version = "0.4.1" +name = "rgb" +version = "0.8.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" dependencies = [ - "bitflags 1.3.2", + "bytemuck", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 15ded77..0d1a977 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "net-loginer" -version = "0.1.0" +version = "0.4.1" edition = "2021" [profile.release] @@ -19,12 +19,7 @@ serde_json = "1.0.116" thiserror = "1.0.59" anyhow = "1.0.83" url = "2.5.0" -jpeg-decoder = "0.3.1" - -[dependencies.image] -version = "0.25.1" -features = ["jpeg"] -default-features = false +rgb = "0.8.37" [dependencies.native-tls] version = "0.2.8" @@ -34,14 +29,24 @@ optional = true version = "5.0.0" features = ["colors", "timestamps"] -[dependencies.onnxruntime] -git = "https://github.com/VOICEVOX/onnxruntime-rs.git" -branch = "master" - [dependencies.ureq] version = "2.9.7" default-features = false +[dependencies.zune-jpeg] +version = "0.4.11" +features = ["std"] +default-features = false + +[dependencies.resize] +version = "0.8.4" +features = ["std"] +default-features = false + +[dependencies.onnxruntime] +git = "https://github.com/VOICEVOX/onnxruntime-rs.git" +branch = "master" + [features] default = ["native-tls"] native-tls = ["ureq/native-tls", "dep:native-tls"] diff --git a/src/classifier.rs b/src/classifier.rs index 39049f6..3d3f469 100644 --- a/src/classifier.rs +++ b/src/classifier.rs @@ -1,11 +1,13 @@ use anyhow::Result; -use image::imageops::FilterType; -use image::EncodableLayout; use once_cell::sync::Lazy; use onnxruntime::environment::Environment; use onnxruntime::session::NdArray; use onnxruntime::{ndarray::Array, session::Session}; +use resize::Pixel::RGB8; +use resize::Type::Lanczos3; +use rgb::FromSlice; use std::sync::Mutex; +use zune_jpeg::JpegDecoder; static ENVIRONMENT: Lazy = Lazy::new(|| { Environment::builder() @@ -13,19 +15,50 @@ static ENVIRONMENT: Lazy = Lazy::new(|| { .expect("environment initialization exception!") }); +#[derive(PartialEq, Copy, Clone)] +#[repr(u8)] +pub enum ModelChannels { + Gray = 1, + RGB = 3, +} + +pub enum ResizeParam { + FixedWidth(usize), + FixedHeight(usize), + FixedSize(usize, usize), +} + +impl ResizeParam { + pub fn get_param(&self, image_info: (usize, usize)) -> (usize, usize) { + let (origin_width, origin_height) = (image_info.0 as f32, image_info.1 as f32); + + match self { + ResizeParam::FixedWidth(width) => { + let height = (origin_height * *width as f32 / origin_width).round() as usize; + (*width, height) + } + ResizeParam::FixedHeight(height) => { + let width = (origin_width * *height as f32 / origin_height).round() as usize; + (width, *height) + } + ResizeParam::FixedSize(width, height) => (*width, *height), + } + } +} + pub struct Classifier { session: Mutex>, charset: Vec, - resize_param: [i64; 2], - channels: usize, + resize_param: ResizeParam, + channels: ModelChannels, } impl Classifier { pub fn new>( model: M, charset: Vec, - resize_param: [i64; 2], - channels: usize, + resize_param: ResizeParam, + channels: ModelChannels, ) -> Result { let session = Mutex::new( ENVIRONMENT @@ -42,45 +75,37 @@ impl Classifier { } pub fn classification>(&self, image: I) -> Result { - let image = { - let image = image::load_from_memory(image.as_ref())?; - let resize_width = if self.resize_param[0] == -1 { - image.width() * self.resize_param[1] as u32 / image.height() - } else { - self.resize_param[0] as u32 - }; - image.resize( - resize_width, - self.resize_param[1] as u32, - FilterType::Lanczos3, - ) - }; + let (image, width, height) = self.resize_image(image)?; - let image_bytes = if self.channels == 1 { - EncodableLayout::as_bytes(image.to_luma8().as_ref()).to_vec() - } else { - image.to_rgb8().to_vec() + let image_bytes = match self.channels { + ModelChannels::Gray => { + let mut gray_image = vec![0; image.len() / 3]; + for (i, pixels) in image.chunks(3).enumerate() { + let gray = 0.2989 * pixels[0] as f32 + + 0.5870 * pixels[1] as f32 + + 0.1140 * pixels[2] as f32; + gray_image[i] = gray as u8; + } + gray_image + } + ModelChannels::RGB => image, }; - let width = image.width() as usize; - let height = image.height() as usize; - - let image_vec = Array::from_shape_vec((self.channels, height, width), image_bytes)?; - let tensor = Array::from_shape_fn( (1, self.channels as usize, height, width), |(_, c, i, j)| { - let now = image_vec[[c as usize, i, j]] as f32; - if self.channels == 1 { - ((now / 255f32) - 0.456f32) / 0.224f32 + let now = image_bytes[(i * width + j) * self.channels as usize + c] as f32; + let (mean, std) = if self.channels == ModelChannels::Gray { + (0.456f32, 0.224f32) } else { match c { - 0 => ((now / 255f32) - 0.485f32) / 0.229f32, - 1 => ((now / 255f32) - 0.456f32) / 0.224f32, - 2 => ((now / 255f32) - 0.406f32) / 0.225f32, + 0 => (0.485f32, 0.229f32), + 1 => (0.456f32, 0.224f32), + 2 => (0.406f32, 0.225f32), _ => unreachable!(), } - } + }; + ((now / 255f32) - mean) / std }, ); @@ -102,4 +127,28 @@ impl Classifier { Ok(classification) } + + fn resize_image>(&self, image: I) -> Result<(Vec, usize, usize)> { + let mut decoder = JpegDecoder::new(image.as_ref()); + let image = decoder.decode()?; + let image_info = decoder.info().unwrap(); + + let (resize_width, resize_height) = self + .resize_param + .get_param((image_info.width as usize, image_info.height as usize)); + + let mut resizer = resize::new( + image_info.width as usize, + image_info.height as usize, + resize_width, + resize_height, + RGB8, + Lanczos3, + )?; + + let mut resized_image = vec![0; resize_width * resize_height as usize * 3]; + resizer.resize(&image.as_rgb(), resized_image.as_rgb_mut())?; + + Ok((resized_image, resize_width, resize_height as usize)) + } } diff --git a/src/lib.rs b/src/lib.rs index 5aa130d..257c609 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,4 +2,4 @@ mod auth; mod classifier; pub use auth::Authenticator; -pub use classifier::Classifier; +pub use classifier::{Classifier, ModelChannels, ResizeParam}; diff --git a/src/main.rs b/src/main.rs index 67f4d7b..2c8eede 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ use anyhow::Result; use dotenv::dotenv; use log::LevelFilter; -use net_loginer::{Authenticator, Classifier}; +use net_loginer::Authenticator; +use net_loginer::{Classifier, ModelChannels, ResizeParam}; use simple_logger::SimpleLogger; use std::env; @@ -21,7 +22,9 @@ fn main() -> Result<()> { let model = include_bytes!("../model/shtu_captcha.onnx"); let charset = serde_json::from_slice(include_bytes!("../model/charset.json"))?; - let classifier = Classifier::new(model, charset, [-1, 64], 1)?; + + let resize_param = ResizeParam::FixedHeight(64); + let classifier = Classifier::new(model, charset, resize_param, ModelChannels::Gray)?; let authenticator = Authenticator::new(user_id, password, classifier)?; authenticator.perform_login()?;