From da40fb8f59a6aca784f7822504ac04e118a1bad8 Mon Sep 17 00:00:00 2001 From: Mathieu Poumeyrol Date: Fri, 29 Sep 2023 09:34:39 +0200 Subject: [PATCH 1/3] leakyrelu and scan --- core/src/half.rs | 4 ++++ core/src/ops/nn/mod.rs | 5 ++++- core/src/ops/scan/mir.rs | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/core/src/half.rs b/core/src/half.rs index a9a0b68cc5..568c7582f0 100644 --- a/core/src/half.rs +++ b/core/src/half.rs @@ -4,6 +4,7 @@ use crate::ops::array::{Pad, PadMode}; use crate::ops::cnn::{ConvUnary, DeconvUnary}; use crate::ops::einsum::EinSum; use crate::ops::konst::Const; +use crate::ops::scan::Scan; use crate::ops::source::TypedSource; #[derive(Debug)] @@ -27,6 +28,9 @@ impl Translate, TypedFact, Box> for Hal bias: op.bias.as_ref().map(tensor_f32_to_f16), ..op.clone() }) + } else if let Some(op) = node.op_as::() { + let body = HalfTranslator.translate_model(&op.body)?; + Box::new(Scan { body, .. op.clone() }) } else if let Some(op) = node.op_as::() { Box::new(EinSum { operating_dt: dt_f32_to_f16(op.operating_dt), ..op.clone() }) } else if let Some(op) = node.op_as::() { diff --git a/core/src/ops/nn/mod.rs b/core/src/ops/nn/mod.rs index 1a3d52e814..777a8a370e 100644 --- a/core/src/ops/nn/mod.rs +++ b/core/src/ops/nn/mod.rs @@ -2,6 +2,8 @@ mod data_formats; mod reduce; mod softmax; +use tract_num_traits::{AsPrimitive, Zero}; + pub use self::data_formats::{BaseDataShape, DataFormat, DataShape, SymDataShape}; pub use self::reduce::{Reduce, Reducer}; pub use self::softmax::Softmax; @@ -19,5 +21,6 @@ element_wise!(hard_swish, HardSwish, ); element_wise!(leaky_relu, LeakyRelu { alpha: f32 }, - [f32] => |op, xs| { xs.iter_mut().for_each(|x| *x *= if *x < 0. { op.alpha } else { 1.0 }); Ok(()) } + [f32] => |op, xs| { xs.iter_mut().for_each(|x| *x *= if *x < 0. { op.alpha } else { 1.0 }); Ok(()) }, + [f16] => |op, xs| { xs.iter_mut().for_each(|x| *x *= if *x < f16::zero() { AsPrimitive::::as_(op.alpha) } else { (1.0).as_() }); Ok(()) } ); diff --git a/core/src/ops/scan/mir.rs b/core/src/ops/scan/mir.rs index cd90ad0c9b..8ad0854c97 100644 --- a/core/src/ops/scan/mir.rs +++ b/core/src/ops/scan/mir.rs @@ -12,7 +12,7 @@ pub struct Scan { pub skip: usize, pub reset_every_turn: bool, pub body: TypedModel, - decluttered: bool, + pub decluttered: bool, pub input_mapping: Vec, pub output_mapping: Vec>, } From ad99044b8d1c661aeac1bc6103547740ecd84294 Mon Sep 17 00:00:00 2001 From: Mathieu Poumeyrol Date: Mon, 2 Oct 2023 14:40:02 +0200 Subject: [PATCH 2/3] fix feature detection --- linalg/src/arm64.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/linalg/src/arm64.rs b/linalg/src/arm64.rs index 77e0650dcc..d306a83f8f 100644 --- a/linalg/src/arm64.rs +++ b/linalg/src/arm64.rs @@ -37,9 +37,24 @@ fn max_cpuid() -> std::io::Result { Ok(max.unwrap_or("").to_string()) } +lazy_static::lazy_static! { + static ref CPU_FEATURES: Vec = { + let cpu_info = std::fs::read_to_string("/proc/cpuinfo").unwrap(); + let line = cpu_info + .lines() + .filter(|line| line.starts_with("Features")) + .next().unwrap(); + line.split_once(":").unwrap().1.split_whitespace().map(|s| s.to_string()).collect() + }; + + static ref HAS_FP16: bool = { + CPU_FEATURES.iter().find(|s| &**s == "asimdhp").is_some() + }; +} + #[inline] pub fn has_fp16() -> bool { - cfg!(feature_cpu = "fp16") + *HAS_FP16 } #[derive(Debug, PartialEq, Eq, Copy, Clone)] @@ -150,6 +165,7 @@ pub fn plug(ops: &mut Ops) { #[cfg(not(feature = "no_fp16"))] if has_fp16() { if *KIND == Kind::CortexA55 { + log::info!("Cortex-A55 mmm_f16 and mmv_f16 activated"); ops.mmm_f16 = Box::new(|_, _, n| { use tract_data::internal::DimLike; if n.unwrap_or(1024).divceil(4) * 4 < n.unwrap_or(1024).divceil(8) * 8 { @@ -160,6 +176,7 @@ pub fn plug(ops: &mut Ops) { }); ops.mmv_f16 = Box::new(|_, _| arm64fp16_mmm_f16_128x1_a55::mmm()); } else { + log::info!("ARMv8.2 mmm_f16 and mmv_f16 activated"); ops.mmm_f16 = Box::new(|_, _, n| { use tract_data::internal::DimLike; if n.unwrap_or(1024).divceil(4) * 4 < n.unwrap_or(1024).divceil(8) * 8 { @@ -175,6 +192,7 @@ pub fn plug(ops: &mut Ops) { ops.tanh_f32 = Box::new(|| arm64simd_tanh_f32_4n::ew()); #[cfg(not(feature = "no_fp16"))] if has_fp16() { + log::info!("ARMv8.2 tanh_f16 and sigmoid_f16 activated"); ops.tanh_f16 = Box::new(|| arm64fp16_tanh_f16_8n::ew()); ops.sigmoid_f16 = Box::new(|| arm64fp16_sigmoid_f16_8n::ew()); } From ff39b93dd98afe861440aab18fd737c2b566dbd2 Mon Sep 17 00:00:00 2001 From: Mathieu Poumeyrol Date: Tue, 3 Oct 2023 13:47:40 +0200 Subject: [PATCH 3/3] improve logging of cpu feautres --- linalg/Cargo.toml | 1 + linalg/src/arm64.rs | 28 +++++++++++++++++++--------- linalg/src/frame/element_wise.rs | 1 + linalg/src/frame/mmm/tests.rs | 3 +++ linalg/src/frame/sigmoid.rs | 1 + linalg/src/frame/tanh.rs | 1 + linalg/src/lib.rs | 6 ++++++ 7 files changed, 32 insertions(+), 9 deletions(-) diff --git a/linalg/Cargo.toml b/linalg/Cargo.toml index 2989255b96..d824a1e43b 100644 --- a/linalg/Cargo.toml +++ b/linalg/Cargo.toml @@ -36,6 +36,7 @@ walkdir.workspace = true [dev-dependencies] criterion.workspace = true +env_logger.workspace = true nu-ansi-term.workspace = true proptest.workspace = true core_affinity.workspace = true diff --git a/linalg/src/arm64.rs b/linalg/src/arm64.rs index d306a83f8f..9b62397808 100644 --- a/linalg/src/arm64.rs +++ b/linalg/src/arm64.rs @@ -13,10 +13,6 @@ use crate::Ops; use crate::frame::element_wise::ElementWiseKer; use crate::frame::mmm::kernel::MatMatMulKer; -lazy_static::lazy_static! { - static ref KIND: Kind = Kind::choose(); -} - // https://en.wikipedia.org/wiki/Comparison_of_ARMv8-A_cores const PART_A53: &str = "0xd03"; const PART_A55: &str = "0xd05"; @@ -38,13 +34,23 @@ fn max_cpuid() -> std::io::Result { } lazy_static::lazy_static! { + static ref KIND: Kind = Kind::choose(); + static ref CPU_FEATURES: Vec = { - let cpu_info = std::fs::read_to_string("/proc/cpuinfo").unwrap(); - let line = cpu_info + #[cfg(test)] crate::setup_test_logger(); + let Ok(cpu_info) = std::fs::read_to_string("/proc/cpuinfo") else { + log::warn!("Could not read /proc/cpuinfo. CPU Features detection may be impaired."); + return vec!(); + }; + if let Some(line) = cpu_info .lines() .filter(|line| line.starts_with("Features")) - .next().unwrap(); - line.split_once(":").unwrap().1.split_whitespace().map(|s| s.to_string()).collect() + .next() { + line.split_once(":").unwrap().1.split_whitespace().map(|s| s.to_string()).collect() + } else { + log::warn!("Could not find \"Features :\" lines in /proc/cpuinfo. CPU Features detection may be impaired."); + vec!() + } }; static ref HAS_FP16: bool = { @@ -54,7 +60,7 @@ lazy_static::lazy_static! { #[inline] pub fn has_fp16() -> bool { - *HAS_FP16 + cfg!(feature_cpu = "fp16") || *KIND == Kind::CortexA55 || *KIND == Kind::CortexA75 || *HAS_FP16 } #[derive(Debug, PartialEq, Eq, Copy, Clone)] @@ -70,6 +76,8 @@ enum Kind { impl Kind { fn choose() -> Kind { + #[cfg(test)] + crate::setup_test_logger(); let kind = if let Ok(kind) = std::env::var("TRACT_CPU_AARCH64_KIND") { log::info!("CPU kind forced with TRACT_CPU_AARCH64_KIND: {}", kind); let kind = kind.to_lowercase(); @@ -195,6 +203,8 @@ pub fn plug(ops: &mut Ops) { log::info!("ARMv8.2 tanh_f16 and sigmoid_f16 activated"); ops.tanh_f16 = Box::new(|| arm64fp16_tanh_f16_8n::ew()); ops.sigmoid_f16 = Box::new(|| arm64fp16_sigmoid_f16_8n::ew()); + } else { + log::info!("No native fp16 support"); } #[cfg(target_os = "macos")] { diff --git a/linalg/src/frame/element_wise.rs b/linalg/src/frame/element_wise.rs index 11a5a865d1..3a09edc769 100644 --- a/linalg/src/frame/element_wise.rs +++ b/linalg/src/frame/element_wise.rs @@ -164,6 +164,7 @@ pub mod test { values: &[T], reference: F, ) -> TestCaseResult { + crate::setup_test_logger(); let op = ElementWiseImpl::::new(); let mut values = values.to_vec(); while values.len() < K::nr() { diff --git a/linalg/src/frame/mmm/tests.rs b/linalg/src/frame/mmm/tests.rs index 22ac55a984..814ee2cf24 100644 --- a/linalg/src/frame/mmm/tests.rs +++ b/linalg/src/frame/mmm/tests.rs @@ -233,6 +233,7 @@ where i32: AsPrimitive, usize: AsPrimitive, { + crate::setup_test_logger(); assert_eq!(a.datum_type(), TA::datum_type()); let op = MatMatMulImpl::::default(); unsafe { @@ -281,6 +282,7 @@ where i32: AsPrimitive, usize: AsPrimitive, { + crate::setup_test_logger(); unsafe { let op = MatMatMulImpl::::default(); let mut packed_a = @@ -327,6 +329,7 @@ where i32: AsPrimitive, usize: AsPrimitive, { + crate::setup_test_logger(); let op = MatMatMulImpl::::default(); let mut found = Tensor::zero::(&[m, n]).unwrap(); diff --git a/linalg/src/frame/sigmoid.rs b/linalg/src/frame/sigmoid.rs index 5445d8124e..6ad34d18c1 100644 --- a/linalg/src/frame/sigmoid.rs +++ b/linalg/src/frame/sigmoid.rs @@ -86,6 +86,7 @@ pub mod test { f32: AsPrimitive, T: AsPrimitive, { + crate::setup_test_logger(); let values: Vec = values.iter().copied().map(|x| x.as_()).collect(); crate::frame::element_wise::test::test_element_wise::(&values, |x| { (1f32).as_() / (1f32.as_() + (-x).exp()) diff --git a/linalg/src/frame/tanh.rs b/linalg/src/frame/tanh.rs index 29e1c69afa..7fca3f2249 100644 --- a/linalg/src/frame/tanh.rs +++ b/linalg/src/frame/tanh.rs @@ -95,6 +95,7 @@ pub mod test { f32: AsPrimitive, T: AsPrimitive, { + crate::setup_test_logger(); let values: Vec = values.iter().copied().map(|x| x.as_()).collect(); crate::frame::element_wise::test::test_element_wise::(&values, |x| x.tanh()) } diff --git a/linalg/src/lib.rs b/linalg/src/lib.rs index 16573dab0b..9b859936cd 100644 --- a/linalg/src/lib.rs +++ b/linalg/src/lib.rs @@ -208,3 +208,9 @@ impl LADatum for i32 { any::().boxed() } } + +#[cfg(test)] +#[allow(dead_code)] +fn setup_test_logger() { + let _ = env_logger::Builder::from_env("TRACT_LOG").try_init(); +}