From cd53d8b2f457bf44bdbd51b97260fc06fd1dc980 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Tue, 14 May 2024 15:18:18 +0200 Subject: [PATCH] sw: Fix warnings in compilation (#135) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Markus Böck --- Bender.yml | 50 --- sw/apps/atax/scripts/datagen.py | 4 +- sw/apps/correlation/scripts/datagen.py | 2 +- sw/apps/covariance/scripts/datagen.py | 2 +- sw/dnn/batchnorm/scripts/datagen.py | 5 +- sw/dnn/conv2d/scripts/datagen.py | 21 +- sw/dnn/conv2d/src/conv2d.h | 12 +- sw/dnn/fusedconv/scripts/datagen.py | 16 +- sw/dnn/layernorm/src/layernorm.h | 8 + sw/dnn/maxpool/scripts/datagen.py | 3 + sw/dnn/softmax/scripts/datagen.py | 3 + sw/dnn/src/dnn.h | 1 - sw/math/.gitignore | 1 - sw/math/COPYRIGHT | 193 ----------- sw/math/Makefile | 86 ----- sw/math/README | 5 - sw/math/arch/generic/fp_arch.h | 0 sw/math/arch/riscv64/bits/alltypes.h.in | 18 - sw/math/arch/riscv64/bits/float.h | 16 - sw/math/include/alltypes.h.in | 95 ----- sw/math/include/endian.h | 80 ----- sw/math/include/features.h | 40 --- sw/math/include/float.h | 52 --- sw/math/include/math.h | 442 ------------------------ sw/math/src/include/features.h | 11 - sw/math/src/internal/libm.h | 274 --------------- sw/math/src/math/__math_divzero.c | 6 - sw/math/src/math/__math_invalid.c | 6 - sw/math/src/math/__math_invalidf.c | 6 - sw/math/src/math/__math_invalidl.c | 9 - sw/math/src/math/__math_oflow.c | 6 - sw/math/src/math/__math_oflowf.c | 6 - sw/math/src/math/__math_uflow.c | 6 - sw/math/src/math/__math_uflowf.c | 6 - sw/math/src/math/__math_xflow.c | 6 - sw/math/src/math/__math_xflowf.c | 6 - sw/math/src/math/ceil.c | 31 -- sw/math/src/math/ceilf.c | 27 -- sw/math/src/math/ceill.c | 34 -- sw/math/src/math/exp2f_data.c | 35 -- sw/math/src/math/exp2f_data.h | 23 -- sw/math/src/math/expf.c | 80 ----- sw/math/src/math/expm1.c | 201 ----------- sw/math/src/math/log2.c | 122 ------- sw/math/src/math/log2_data.c | 201 ----------- sw/math/src/math/log2_data.h | 28 -- sw/math/src/math/log2f.c | 72 ---- sw/math/src/math/log2f_data.c | 33 -- sw/math/src/math/log2f_data.h | 19 - sw/math/src/math/sqrt.c | 158 --------- sw/math/src/math/sqrt_data.c | 19 - sw/math/src/math/sqrt_data.h | 13 - sw/math/src/math/sqrtf.c | 83 ----- sw/math/src/math/tanh.c | 45 --- sw/math/tools/mkalltypes.sed | 15 - sw/snRuntime/src/omp/kmp.c | 7 +- sw/tests/varargs_1.c | 4 +- sw/tests/varargs_2.c | 4 +- target/snitch_cluster/sw.mk | 7 +- target/snitch_cluster/sw/apps/common.mk | 3 - target/snitch_cluster/sw/math/Makefile | 8 - target/snitch_cluster/sw/tests/Makefile | 2 +- target/snitch_cluster/sw/toolchain.mk | 10 +- 63 files changed, 64 insertions(+), 2723 deletions(-) delete mode 100644 sw/math/.gitignore delete mode 100644 sw/math/COPYRIGHT delete mode 100644 sw/math/Makefile delete mode 100644 sw/math/README delete mode 100644 sw/math/arch/generic/fp_arch.h delete mode 100644 sw/math/arch/riscv64/bits/alltypes.h.in delete mode 100644 sw/math/arch/riscv64/bits/float.h delete mode 100644 sw/math/include/alltypes.h.in delete mode 100644 sw/math/include/endian.h delete mode 100644 sw/math/include/features.h delete mode 100644 sw/math/include/float.h delete mode 100644 sw/math/include/math.h delete mode 100644 sw/math/src/include/features.h delete mode 100644 sw/math/src/internal/libm.h delete mode 100644 sw/math/src/math/__math_divzero.c delete mode 100644 sw/math/src/math/__math_invalid.c delete mode 100644 sw/math/src/math/__math_invalidf.c delete mode 100644 sw/math/src/math/__math_invalidl.c delete mode 100644 sw/math/src/math/__math_oflow.c delete mode 100644 sw/math/src/math/__math_oflowf.c delete mode 100644 sw/math/src/math/__math_uflow.c delete mode 100644 sw/math/src/math/__math_uflowf.c delete mode 100644 sw/math/src/math/__math_xflow.c delete mode 100644 sw/math/src/math/__math_xflowf.c delete mode 100644 sw/math/src/math/ceil.c delete mode 100644 sw/math/src/math/ceilf.c delete mode 100644 sw/math/src/math/ceill.c delete mode 100644 sw/math/src/math/exp2f_data.c delete mode 100644 sw/math/src/math/exp2f_data.h delete mode 100644 sw/math/src/math/expf.c delete mode 100644 sw/math/src/math/expm1.c delete mode 100644 sw/math/src/math/log2.c delete mode 100644 sw/math/src/math/log2_data.c delete mode 100644 sw/math/src/math/log2_data.h delete mode 100644 sw/math/src/math/log2f.c delete mode 100644 sw/math/src/math/log2f_data.c delete mode 100644 sw/math/src/math/log2f_data.h delete mode 100644 sw/math/src/math/sqrt.c delete mode 100644 sw/math/src/math/sqrt_data.c delete mode 100644 sw/math/src/math/sqrt_data.h delete mode 100644 sw/math/src/math/sqrtf.c delete mode 100644 sw/math/src/math/tanh.c delete mode 100644 sw/math/tools/mkalltypes.sed delete mode 100644 target/snitch_cluster/sw/math/Makefile diff --git a/Bender.yml b/Bender.yml index dc4281912..6fb7255ad 100644 --- a/Bender.yml +++ b/Bender.yml @@ -29,56 +29,6 @@ dependencies: cluster_icache: { git: https://github.com/pulp-platform/cluster_icache.git, version: 0.1.0 } idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.0 } -vendor_package: - - name: musl - target_dir: sw/math - upstream: { git: https://github.com/kraj/musl.git, rev: 7a43f6fea9081bdd53d8a11cef9e9fab0348c53d } # v1.2.4 - patch_dir: sw/deps/patches/musl - include_from_upstream: - - "COPYRIGHT" - - "Makefile" - - ".gitignore" - - "README" - - "src/math/ceil.c" - - "src/math/ceilf.c" - - "src/math/ceill.c" - - "src/math/expm1.c" - - "src/math/expf.c" - - "src/math/exp2f_data.c" - - "src/math/exp2f_data.h" - - "src/math/log2.c" - - "src/math/log2_data.c" - - "src/math/log2_data.h" - - "src/math/log2f.c" - - "src/math/log2f_data.c" - - "src/math/log2f_data.h" - - "src/math/__math_divzero.c" - - "src/math/__math_invalid.c" - - "src/math/__math_invalidf.c" - - "src/math/__math_invalidl.c" - - "src/math/__math_oflow.c" - - "src/math/__math_oflowf.c" - - "src/math/__math_uflow.c" - - "src/math/__math_uflowf.c" - - "src/math/__math_xflow.c" - - "src/math/__math_xflowf.c" - - "src/math/sqrt.c" - - "src/math/sqrtf.c" - - "src/math/sqrt_data.c" - - "src/math/sqrt_data.h" - - "src/math/tanh.c" - - "src/internal/libm.h" - - "src/include/features.h" - - "include/endian.h" - - "include/math.h" - - "include/features.h" - - "include/float.h" - - "include/alltypes.h.in" - - "arch/riscv64/bits/alltypes.h.in" - - "arch/riscv64/bits/float.h" - - "tools/mkalltypes.sed" - - "arch/generic/fp_arch.h" - export_include_dirs: - hw/reqrsp_interface/include - hw/mem_interface/include diff --git a/sw/apps/atax/scripts/datagen.py b/sw/apps/atax/scripts/datagen.py index b68ea79ef..4332ecc31 100755 --- a/sw/apps/atax/scripts/datagen.py +++ b/sw/apps/atax/scripts/datagen.py @@ -29,8 +29,8 @@ def emit_header(self, **kwargs): header = [super().emit_header()] M, N = kwargs['M'], kwargs['N'] - A = np.random.random_integers(-200, 100, size=(M, N))/100 - x = np.random.random_integers(-200, 100, size=(N, 1))/100 + A = np.random.randint(-200, 100, size=(M, N))/100 + x = np.random.randint(-200, 100, size=(N, 1))/100 y = self.golden_model(A, x) assert (M % 8) == 0, "M must be an integer multiple of the number of cores" diff --git a/sw/apps/correlation/scripts/datagen.py b/sw/apps/correlation/scripts/datagen.py index 594b00ba7..a42fdfe33 100755 --- a/sw/apps/correlation/scripts/datagen.py +++ b/sw/apps/correlation/scripts/datagen.py @@ -29,7 +29,7 @@ def emit_header(self, **kwargs): header = [super().emit_header()] M, N = kwargs['M'], kwargs['N'] - data = np.random.random_integers(-200, 100, size=(N, M))/100 + data = np.random.randint(-200, 100, size=(N, M))/100 corr = self.golden_model(data) data = data.flatten() diff --git a/sw/apps/covariance/scripts/datagen.py b/sw/apps/covariance/scripts/datagen.py index 2cd596a3e..e62586a6d 100755 --- a/sw/apps/covariance/scripts/datagen.py +++ b/sw/apps/covariance/scripts/datagen.py @@ -29,7 +29,7 @@ def emit_header(self, **kwargs): header = [super().emit_header()] M, N = kwargs['M'], kwargs['N'] - data = np.random.random_integers(-200, 100, size=(N, M)) + data = np.random.randint(-200, 100, size=(N, M)) cov = self.golden_model(data) assert (M % 8) == 0, "M must be an integer multiple of the number of cores" diff --git a/sw/dnn/batchnorm/scripts/datagen.py b/sw/dnn/batchnorm/scripts/datagen.py index 72a254bf5..53dce77b4 100755 --- a/sw/dnn/batchnorm/scripts/datagen.py +++ b/sw/dnn/batchnorm/scripts/datagen.py @@ -59,11 +59,14 @@ def emit_header(**kwargs): ofmap = ofmap.permute(0, 2, 3, 1) n, ih, iw, ci = ifmap.shape + ifmap = data_utils.flatten(ifmap) + ofmap = data_utils.flatten(ofmap) ifmap_uid = 'ifmap' ofmap_uid = 'ofmap' beta_uid = 'beta' - gamma_uid = 'gamma' + # Underscore is used to disambiguate between this and the gamma function from "math.h" + gamma_uid = 'gamma_' layer_cfg = { 'CI': ci, diff --git a/sw/dnn/conv2d/scripts/datagen.py b/sw/dnn/conv2d/scripts/datagen.py index f3682b656..95ae015d0 100755 --- a/sw/dnn/conv2d/scripts/datagen.py +++ b/sw/dnn/conv2d/scripts/datagen.py @@ -48,19 +48,24 @@ def emit_header(**kwargs): conv2d_output = golden_model(inputs, filters[0], padding=filter['padding'], stride=filter['stride']) + output_dim = conv2d_output.shape # compute checksum row-wise conv2d_checksum = np.sum(conv2d_output.numpy(), axis=1) ctype = data_utils.ctype_from_precision_t(prec) + inputs = data_utils.flatten(inputs.numpy()) + filters = data_utils.flatten(filters[0].numpy()) + conv2d_output = data_utils.flatten(conv2d_output.numpy()) + layer_cfg = { 'CO': out_channels, 'CI': in_channels, 'IH': input_dim['height'], 'IW': input_dim['width'], - 'OH': conv2d_output.shape[1], - 'OW': conv2d_output.shape[2], + 'OH': output_dim[1], + 'OW': output_dim[2], 'FH': filter['height'], 'FW': filter['width'], 'ifmap': 'conv2d_ifmap_dram', @@ -71,18 +76,18 @@ def emit_header(**kwargs): data_str = [emit_license()] data_str += [format_array_declaration(ctype, 'conv2d_ifmap_dram', - inputs.numpy().shape, BURST_ALIGNMENT)] + inputs.shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'conv2d_weights_dram', - filters[0].numpy().shape, BURST_ALIGNMENT)] + filters.shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'conv2d_ofmap_dram', - conv2d_output.numpy().shape, BURST_ALIGNMENT)] + conv2d_output.shape, BURST_ALIGNMENT)] data_str += [format_struct_definition('conv_layer', 'layer', layer_cfg)] data_str += [format_array_definition(ctype, 'conv2d_ifmap_dram', - inputs.numpy(), BURST_ALIGNMENT)] + inputs, BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'conv2d_weights_dram', - filters[0].numpy(), BURST_ALIGNMENT)] + filters, BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'conv2d_ofmap_dram', - conv2d_output.numpy(), BURST_ALIGNMENT)] + conv2d_output, BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'conv2d_checksum', conv2d_checksum, BURST_ALIGNMENT)] diff --git a/sw/dnn/conv2d/src/conv2d.h b/sw/dnn/conv2d/src/conv2d.h index 8b0640aba..6dbb74ba1 100644 --- a/sw/dnn/conv2d/src/conv2d.h +++ b/sw/dnn/conv2d/src/conv2d.h @@ -330,8 +330,10 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y, ssr_i[0], ssr_i[1], ssr_i[2], ssr_i[3]); snrt_ssr_repeat(SNRT_SSR_DM1, 1); // Disable repeat from conv2d - snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_4D, &pBuffer[compute_id * 2]); - snrt_ssr_write(SNRT_SSR_DM1, SNRT_SSR_4D, &pBuffer[compute_id * 2]); + snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_4D, + (volatile void *)&pBuffer[compute_id * 2]); + snrt_ssr_write(SNRT_SSR_DM1, SNRT_SSR_4D, + (volatile void *)&pBuffer[compute_id * 2]); // Regular path with max unrolling is only done if dim_y // is at least n_unroll @@ -413,9 +415,11 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y, uint32_t h_cleanup_index = dim_y - cleanup_unroll; snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_4D, - &pBuffer[h_cleanup_index * h_stride + compute_id * 2]); + (volatile void *)&pBuffer[h_cleanup_index * h_stride + + compute_id * 2]); snrt_ssr_write(SNRT_SSR_DM1, SNRT_SSR_4D, - &pBuffer[h_cleanup_index * h_stride + compute_id * 2]); + (volatile void *)&pBuffer[h_cleanup_index * h_stride + + compute_id * 2]); for (uint32_t co = compute_id; co < ch / 2; co += compute_num) { volatile register v2s current_lambda = ((v2s *)lambda)[co]; diff --git a/sw/dnn/fusedconv/scripts/datagen.py b/sw/dnn/fusedconv/scripts/datagen.py index f7cd26f2b..3b06c5095 100755 --- a/sw/dnn/fusedconv/scripts/datagen.py +++ b/sw/dnn/fusedconv/scripts/datagen.py @@ -176,32 +176,36 @@ def emit_header(**kwargs): 'dtype': prec } + ifmap_padded = data_utils.flatten(ifmap_padded.numpy()) + kernel = data_utils.flatten(kernel.numpy()) + ofmap_before = data_utils.flatten(ofmap_before.numpy()) + data_str = [emit_license()] data_str += [format_array_declaration(ctype, 'fusedconv_pInBuffer_dram', - ifmap_padded.numpy().shape, BURST_ALIGNMENT)] + ifmap_padded.shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'fusedconv_pWeight_dram', - kernel.numpy().shape, BURST_ALIGNMENT)] + kernel.shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'fusedconv_lambda_dram', bn_l.numpy().shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'fusedconv_kappa_dram', bn_k.numpy().shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'fusedconv_pOutBuffer_dram', - ofmap_before.numpy().shape, BURST_ALIGNMENT)] + ofmap_before.shape, BURST_ALIGNMENT)] data_str += [format_array_declaration(ctype, 'fusedconv_pCheckOutBuffer_dram', ofmap.numpy().shape, BURST_ALIGNMENT)] data_str += [format_struct_definition('kernel_fp32', 'layer', layer_cfg)] data_str += [format_scalar_definition('uint32_t', 'dw', kwargs['depthwise'])] data_str += [format_scalar_definition('uint32_t', 'chw_layer', kwargs['chw_layer'])] data_str += [format_array_definition(ctype, 'fusedconv_pInBuffer_dram', - ifmap_padded.numpy(), BURST_ALIGNMENT)] + ifmap_padded, BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'fusedconv_pWeight_dram', - kernel.numpy(), BURST_ALIGNMENT)] + kernel, BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'fusedconv_lambda_dram', bn_l.numpy(), BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'fusedconv_kappa_dram', bn_k.numpy(), BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'fusedconv_pOutBuffer_dram', - ofmap_before.numpy(), BURST_ALIGNMENT)] + ofmap_before, BURST_ALIGNMENT)] data_str += [format_array_definition(ctype, 'fusedconv_pCheckOutBuffer_dram', ofmap.numpy(), BURST_ALIGNMENT)] diff --git a/sw/dnn/layernorm/src/layernorm.h b/sw/dnn/layernorm/src/layernorm.h index 08c5b27aa..3e5f82085 100644 --- a/sw/dnn/layernorm/src/layernorm.h +++ b/sw/dnn/layernorm/src/layernorm.h @@ -108,6 +108,8 @@ static inline void layernorm_layer(layernorm_layer_t l) { l.batch_size, tile_seq_len, l.embeddings, l.eps); break; + default: + break; } break; case FP16: @@ -122,6 +124,8 @@ static inline void layernorm_layer(layernorm_layer_t l) { l.batch_size, tile_seq_len, l.embeddings, l.eps); break; + default: + break; } break; case FP8: @@ -131,8 +135,12 @@ static inline void layernorm_layer(layernorm_layer_t l) { l.batch_size, tile_seq_len, l.embeddings, l.eps); break; + default: + break; } break; + default: + break; } if (snrt_is_compute_core()) snrt_mcycle(); diff --git a/sw/dnn/maxpool/scripts/datagen.py b/sw/dnn/maxpool/scripts/datagen.py index b9c1ccb2a..e83f733b8 100755 --- a/sw/dnn/maxpool/scripts/datagen.py +++ b/sw/dnn/maxpool/scripts/datagen.py @@ -54,6 +54,9 @@ def emit_header(**kwargs): n, ih, iw, ci = ifmap.shape _, oh, ow, co = ofmap.shape + ifmap = data_utils.flatten(ifmap) + ofmap = data_utils.flatten(ofmap) + ifmap_uid = 'ifmap' ofmap_uid = 'ofmap' diff --git a/sw/dnn/softmax/scripts/datagen.py b/sw/dnn/softmax/scripts/datagen.py index ac1be5fab..59c96b9f2 100755 --- a/sw/dnn/softmax/scripts/datagen.py +++ b/sw/dnn/softmax/scripts/datagen.py @@ -45,6 +45,9 @@ def emit_header(**kwargs): ofmap = golden_model(ifmap, reduce_dim) ofmap = ofmap.detach().numpy() + ifmap = data_utils.flatten(ifmap) + ofmap = data_utils.flatten(ofmap) + ctype = data_utils.ctype_from_precision_t(prec) ifmap_uid = 'ifmap' diff --git a/sw/dnn/src/dnn.h b/sw/dnn/src/dnn.h index 1e015d86e..ef2cfa593 100644 --- a/sw/dnn/src/dnn.h +++ b/sw/dnn/src/dnn.h @@ -36,7 +36,6 @@ typedef union { } v8s; #define M_PI 3.14159265358979323846 -#define INFINITY 0x7f800000 /** * @struct network_t_ diff --git a/sw/math/.gitignore b/sw/math/.gitignore deleted file mode 100644 index c3ae0c772..000000000 --- a/sw/math/.gitignore +++ /dev/null @@ -1 +0,0 @@ -include/bits/ diff --git a/sw/math/COPYRIGHT b/sw/math/COPYRIGHT deleted file mode 100644 index c1628e9ac..000000000 --- a/sw/math/COPYRIGHT +++ /dev/null @@ -1,193 +0,0 @@ -musl as a whole is licensed under the following standard MIT license: - ----------------------------------------------------------------------- -Copyright © 2005-2020 Rich Felker, et al. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- - -Authors/contributors include: - -A. Wilcox -Ada Worcester -Alex Dowad -Alex Suykov -Alexander Monakov -Andre McCurdy -Andrew Kelley -Anthony G. Basile -Aric Belsito -Arvid Picciani -Bartosz Brachaczek -Benjamin Peterson -Bobby Bingham -Boris Brezillon -Brent Cook -Chris Spiegel -Clément Vasseur -Daniel Micay -Daniel Sabogal -Daurnimator -David Carlier -David Edelsohn -Denys Vlasenko -Dmitry Ivanov -Dmitry V. Levin -Drew DeVault -Emil Renner Berthing -Fangrui Song -Felix Fietkau -Felix Janda -Gianluca Anzolin -Hauke Mehrtens -He X -Hiltjo Posthuma -Isaac Dunham -Jaydeep Patil -Jens Gustedt -Jeremy Huntwork -Jo-Philipp Wich -Joakim Sindholt -John Spencer -Julien Ramseier -Justin Cormack -Kaarle Ritvanen -Khem Raj -Kylie McClain -Leah Neukirchen -Luca Barbato -Luka Perkov -M Farkas-Dyck (Strake) -Mahesh Bodapati -Markus Wichmann -Masanori Ogino -Michael Clark -Michael Forney -Mikhail Kremnyov -Natanael Copa -Nicholas J. Kain -orc -Pascal Cuoq -Patrick Oppenlander -Petr Hosek -Petr Skocik -Pierre Carrier -Reini Urban -Rich Felker -Richard Pennington -Ryan Fairfax -Samuel Holland -Segev Finer -Shiz -sin -Solar Designer -Stefan Kristiansson -Stefan O'Rear -Szabolcs Nagy -Timo Teräs -Trutz Behn -Valentin Ochs -Will Dietz -William Haddon -William Pitcock - -Portions of this software are derived from third-party works licensed -under terms compatible with the above MIT license: - -The TRE regular expression implementation (src/regex/reg* and -src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed -under a 2-clause BSD license (license text in the source files). The -included version has been heavily modified by Rich Felker in 2012, in -the interests of size, simplicity, and namespace cleanliness. - -Much of the math library code (src/math/* and src/complex/*) is -Copyright © 1993,2004 Sun Microsystems or -Copyright © 2003-2011 David Schultz or -Copyright © 2003-2009 Steven G. Kargl or -Copyright © 2003-2009 Bruce D. Evans or -Copyright © 2008 Stephen L. Moshier or -Copyright © 2017-2018 Arm Limited -and labelled as such in comments in the individual source files. All -have been licensed under extremely permissive terms. - -The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 -The Android Open Source Project and is licensed under a two-clause BSD -license. It was taken from Bionic libc, used on Android. - -The AArch64 memcpy and memset code (src/string/aarch64/*) are -Copyright © 1999-2019, Arm Limited. - -The implementation of DES for crypt (src/crypt/crypt_des.c) is -Copyright © 1994 David Burren. It is licensed under a BSD license. - -The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was -originally written by Solar Designer and placed into the public -domain. The code also comes with a fallback permissive license for use -in jurisdictions that may not recognize the public domain. - -The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 -Valentin Ochs and is licensed under an MIT-style license. - -The x86_64 port was written by Nicholas J. Kain and is licensed under -the standard MIT terms. - -The mips and microblaze ports were originally written by Richard -Pennington for use in the ellcc project. The original code was adapted -by Rich Felker for build system and code conventions during upstream -integration. It is licensed under the standard MIT terms. - -The mips64 port was contributed by Imagination Technologies and is -licensed under the standard MIT terms. - -The powerpc port was also originally written by Richard Pennington, -and later supplemented and integrated by John Spencer. It is licensed -under the standard MIT terms. - -All other files which have no copyright comments are original works -produced specifically for use as part of this library, written either -by Rich Felker, the main author of the library, or by one or more -contibutors listed above. Details on authorship of individual files -can be found in the git version control history of the project. The -omission of copyright and license comments in each file is in the -interest of source tree size. - -In addition, permission is hereby granted for all public header files -(include/* and arch/*/bits/*) and crt files intended to be linked into -applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit -the copyright notice and permission notice otherwise required by the -license, and to use these files without any requirement of -attribution. These files include substantial contributions from: - -Bobby Bingham -John Spencer -Nicholas J. Kain -Rich Felker -Richard Pennington -Stefan Kristiansson -Szabolcs Nagy - -all of whom have explicitly granted such permission. - -This file previously contained text expressing a belief that most of -the files covered by the above exception were sufficiently trivial not -to be subject to copyright, resulting in confusion over whether it -negated the permissions granted in the license. In the spirit of -permissive licensing, and of not having licensing issues being an -obstacle to adoption, that text has been removed. diff --git a/sw/math/Makefile b/sw/math/Makefile deleted file mode 100644 index afb3192d1..000000000 --- a/sw/math/Makefile +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2023 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# Luca Colagrande -# Viviane Potocnik, ETH Zurich - -# Usage of absolute paths is required to externally include -# this Makefile from multiple different locations -MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) - -############### -# Directories # -############### - -BUILDDIR ?= $(abspath build) -SRC_DIR = $(MK_DIR)/src/math -BITS_DIR = $(MK_DIR)/include/bits - -################### -# Build variables # -################### - -INCDIRS += $(MK_DIR)/arch/riscv64/ -INCDIRS += $(MK_DIR)/arch/generic -INCDIRS += $(MK_DIR)/src/include -INCDIRS += $(MK_DIR)/src/internal -INCDIRS += $(MK_DIR)/include/bits -INCDIRS += $(MK_DIR)/include - -SRCS = $(abspath $(wildcard $(SRC_DIR)/*.c)) - -########### -# Outputs # -########### - -ALLTYPES_H = $(BITS_DIR)/alltypes.h - -OBJS = $(addprefix $(BUILDDIR)/,$(addsuffix .o,$(basename $(notdir $(SRCS))))) -DEPS = $(addprefix $(BUILDDIR)/,$(addsuffix .d,$(basename $(notdir $(SRCS))))) -LIB = $(BUILDDIR)/libmath.a -DUMP = $(BUILDDIR)/libmath.dump -ALL_OUTPUTS = $(LIB) $(DUMP) - -######### -# Rules # -######### - -.PHONY: all -all: $(ALL_OUTPUTS) - -.PHONY: clean -clean: - rm -rf $(BITS_DIR) - rm -f $(ALLTYPES_H) - rm -rf $(BUILDDIR) - -$(BITS_DIR): - mkdir -p $@ - -$(ALLTYPES_H): | $(BITS_DIR) - sed -f $(MK_DIR)/tools/mkalltypes.sed $(MK_DIR)/arch/riscv64/bits/alltypes.h.in $(MK_DIR)/include/alltypes.h.in > $@ - -$(DEPS): $(ALLTYPES_H) - -$(BUILDDIR): - mkdir -p $@ - -$(BUILDDIR)/%.o: $(SRC_DIR)/%.S | $(BUILDDIR) - $(RISCV_CC) $(RISCV_CFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.o: $(SRC_DIR)/%.c | $(BUILDDIR) - $(RISCV_CC) $(RISCV_CFLAGS) -c $< -o $@ - -$(BUILDDIR)/%.d: $(SRC_DIR)/%.c | $(BUILDDIR) - $(RISCV_CC) $(RISCV_CFLAGS) -MM -MT '$(@:.d=.o)' $< > $@ - -$(LIB): $(OBJS) | $(BUILDDIR) - $(RISCV_AR) $(RISCV_ARFLAGS) $@ $^ - -$(DUMP): $(LIB) | $(BUILDDIR) - $(RISCV_OBJDUMP) -D $< > $@ - -ifneq ($(MAKECMDGOALS),clean) --include $(DEPS) -endif diff --git a/sw/math/README b/sw/math/README deleted file mode 100644 index 2f7d512b9..000000000 --- a/sw/math/README +++ /dev/null @@ -1,5 +0,0 @@ -# Snitch math library - -The sources in this library are heavily based on the musl libc implementation. We implement only a subset of the math library functionality provided thereof. Refer to the `Bender.yml` file for the location of our patches and of the original sources. - -The original sources comply with the license specified in the `COPYRIGHT` file. All other sources, including our modifications, are released under the licensing rules of this repository. \ No newline at end of file diff --git a/sw/math/arch/generic/fp_arch.h b/sw/math/arch/generic/fp_arch.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/sw/math/arch/riscv64/bits/alltypes.h.in b/sw/math/arch/riscv64/bits/alltypes.h.in deleted file mode 100644 index 4579d1740..000000000 --- a/sw/math/arch/riscv64/bits/alltypes.h.in +++ /dev/null @@ -1,18 +0,0 @@ -#define _Addr long -#define _Int64 long -#define _Reg long - -#define __BYTE_ORDER 1234 -#define __LONG_MAX 0x7fffffffffffffffL - -#ifndef __cplusplus -TYPEDEF int wchar_t; -#endif - -TYPEDEF int blksize_t; -TYPEDEF unsigned int nlink_t; - -TYPEDEF float float_t; -TYPEDEF double double_t; - -TYPEDEF struct { long long __ll; long double __ld; } max_align_t; diff --git a/sw/math/arch/riscv64/bits/float.h b/sw/math/arch/riscv64/bits/float.h deleted file mode 100644 index 719c79085..000000000 --- a/sw/math/arch/riscv64/bits/float.h +++ /dev/null @@ -1,16 +0,0 @@ -#define FLT_EVAL_METHOD 0 - -#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L -#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L -#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L -#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L - -#define LDBL_MANT_DIG 113 -#define LDBL_MIN_EXP (-16381) -#define LDBL_MAX_EXP 16384 - -#define LDBL_DIG 33 -#define LDBL_MIN_10_EXP (-4931) -#define LDBL_MAX_10_EXP 4932 - -#define DECIMAL_DIG 36 diff --git a/sw/math/include/alltypes.h.in b/sw/math/include/alltypes.h.in deleted file mode 100644 index d47aeea9a..000000000 --- a/sw/math/include/alltypes.h.in +++ /dev/null @@ -1,95 +0,0 @@ -#define __LITTLE_ENDIAN 1234 -#define __BIG_ENDIAN 4321 -#define __USE_TIME_BITS64 1 - -TYPEDEF unsigned _Addr size_t; -TYPEDEF unsigned _Addr uintptr_t; -TYPEDEF _Addr ptrdiff_t; -TYPEDEF _Addr ssize_t; -TYPEDEF _Addr intptr_t; -TYPEDEF _Addr regoff_t; -TYPEDEF _Reg register_t; -TYPEDEF _Int64 time_t; -TYPEDEF _Int64 suseconds_t; - -TYPEDEF signed char int8_t; -TYPEDEF signed short int16_t; -TYPEDEF signed int int32_t; -TYPEDEF signed _Int64 int64_t; -TYPEDEF signed _Int64 intmax_t; -TYPEDEF unsigned char uint8_t; -TYPEDEF unsigned short uint16_t; -TYPEDEF unsigned int uint32_t; -TYPEDEF unsigned _Int64 uint64_t; -TYPEDEF unsigned _Int64 u_int64_t; -TYPEDEF unsigned _Int64 uintmax_t; - -TYPEDEF unsigned mode_t; -TYPEDEF unsigned _Reg nlink_t; -TYPEDEF _Int64 off_t; -TYPEDEF unsigned _Int64 ino_t; -TYPEDEF unsigned _Int64 dev_t; -TYPEDEF long blksize_t; -TYPEDEF _Int64 blkcnt_t; -TYPEDEF unsigned _Int64 fsblkcnt_t; -TYPEDEF unsigned _Int64 fsfilcnt_t; - -TYPEDEF unsigned wint_t; -TYPEDEF unsigned long wctype_t; - -TYPEDEF void * timer_t; -TYPEDEF int clockid_t; -TYPEDEF long clock_t; -STRUCT timeval { time_t tv_sec; suseconds_t tv_usec; }; -STRUCT timespec { time_t tv_sec; int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER==4321); long tv_nsec; int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER!=4321); }; - -TYPEDEF int pid_t; -TYPEDEF unsigned id_t; -TYPEDEF unsigned uid_t; -TYPEDEF unsigned gid_t; -TYPEDEF int key_t; -TYPEDEF unsigned useconds_t; - -#ifdef __cplusplus -TYPEDEF unsigned long pthread_t; -#else -TYPEDEF struct __pthread * pthread_t; -#endif -TYPEDEF int pthread_once_t; -TYPEDEF unsigned pthread_key_t; -TYPEDEF int pthread_spinlock_t; -TYPEDEF struct { unsigned __attr; } pthread_mutexattr_t; -TYPEDEF struct { unsigned __attr; } pthread_condattr_t; -TYPEDEF struct { unsigned __attr; } pthread_barrierattr_t; -TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t; - -STRUCT _IO_FILE { char __x; }; -TYPEDEF struct _IO_FILE FILE; - -TYPEDEF __builtin_va_list va_list; -TYPEDEF __builtin_va_list __isoc_va_list; - -TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t; - -TYPEDEF struct __locale_struct * locale_t; - -TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t; - -STRUCT iovec { void *iov_base; size_t iov_len; }; - -STRUCT winsize { unsigned short ws_row, ws_col, ws_xpixel, ws_ypixel; }; - -TYPEDEF unsigned socklen_t; -TYPEDEF unsigned short sa_family_t; - -TYPEDEF struct { union { int __i[sizeof(long)==8?14:9]; volatile int __vi[sizeof(long)==8?14:9]; unsigned long __s[sizeof(long)==8?7:9]; } __u; } pthread_attr_t; -TYPEDEF struct { union { int __i[sizeof(long)==8?10:6]; volatile int __vi[sizeof(long)==8?10:6]; volatile void *volatile __p[sizeof(long)==8?5:6]; } __u; } pthread_mutex_t; -TYPEDEF struct { union { int __i[sizeof(long)==8?10:6]; volatile int __vi[sizeof(long)==8?10:6]; volatile void *volatile __p[sizeof(long)==8?5:6]; } __u; } mtx_t; -TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12*sizeof(int)/sizeof(void*)]; } __u; } pthread_cond_t; -TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12*sizeof(int)/sizeof(void*)]; } __u; } cnd_t; -TYPEDEF struct { union { int __i[sizeof(long)==8?14:8]; volatile int __vi[sizeof(long)==8?14:8]; void *__p[sizeof(long)==8?7:8]; } __u; } pthread_rwlock_t; -TYPEDEF struct { union { int __i[sizeof(long)==8?8:5]; volatile int __vi[sizeof(long)==8?8:5]; void *__p[sizeof(long)==8?4:5]; } __u; } pthread_barrier_t; - -#undef _Addr -#undef _Int64 -#undef _Reg diff --git a/sw/math/include/endian.h b/sw/math/include/endian.h deleted file mode 100644 index 172c43203..000000000 --- a/sw/math/include/endian.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _ENDIAN_H -#define _ENDIAN_H - -#include - -#define __NEED_uint16_t -#define __NEED_uint32_t -#define __NEED_uint64_t - -#include - -#define __PDP_ENDIAN 3412 - -#define BIG_ENDIAN __BIG_ENDIAN -#define LITTLE_ENDIAN __LITTLE_ENDIAN -#define PDP_ENDIAN __PDP_ENDIAN -#define BYTE_ORDER __BYTE_ORDER - -static __inline uint16_t __bswap16(uint16_t __x) -{ - return __x<<8 | __x>>8; -} - -static __inline uint32_t __bswap32(uint32_t __x) -{ - return __x>>24 | __x>>8&0xff00 | __x<<8&0xff0000 | __x<<24; -} - -static __inline uint64_t __bswap64(uint64_t __x) -{ - return __bswap32(__x)+0ULL<<32 | __bswap32(__x>>32); -} - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define htobe16(x) __bswap16(x) -#define be16toh(x) __bswap16(x) -#define htobe32(x) __bswap32(x) -#define be32toh(x) __bswap32(x) -#define htobe64(x) __bswap64(x) -#define be64toh(x) __bswap64(x) -#define htole16(x) (uint16_t)(x) -#define le16toh(x) (uint16_t)(x) -#define htole32(x) (uint32_t)(x) -#define le32toh(x) (uint32_t)(x) -#define htole64(x) (uint64_t)(x) -#define le64toh(x) (uint64_t)(x) -#else -#define htobe16(x) (uint16_t)(x) -#define be16toh(x) (uint16_t)(x) -#define htobe32(x) (uint32_t)(x) -#define be32toh(x) (uint32_t)(x) -#define htobe64(x) (uint64_t)(x) -#define be64toh(x) (uint64_t)(x) -#define htole16(x) __bswap16(x) -#define le16toh(x) __bswap16(x) -#define htole32(x) __bswap32(x) -#define le32toh(x) __bswap32(x) -#define htole64(x) __bswap64(x) -#define le64toh(x) __bswap64(x) -#endif - -#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define betoh16(x) __bswap16(x) -#define betoh32(x) __bswap32(x) -#define betoh64(x) __bswap64(x) -#define letoh16(x) (uint16_t)(x) -#define letoh32(x) (uint32_t)(x) -#define letoh64(x) (uint64_t)(x) -#else -#define betoh16(x) (uint16_t)(x) -#define betoh32(x) (uint32_t)(x) -#define betoh64(x) (uint64_t)(x) -#define letoh16(x) __bswap16(x) -#define letoh32(x) __bswap32(x) -#define letoh64(x) __bswap64(x) -#endif -#endif - -#endif diff --git a/sw/math/include/features.h b/sw/math/include/features.h deleted file mode 100644 index 85cfb72a0..000000000 --- a/sw/math/include/features.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _FEATURES_H -#define _FEATURES_H - -#if defined(_ALL_SOURCE) && !defined(_GNU_SOURCE) -#define _GNU_SOURCE 1 -#endif - -#if defined(_DEFAULT_SOURCE) && !defined(_BSD_SOURCE) -#define _BSD_SOURCE 1 -#endif - -#if !defined(_POSIX_SOURCE) && !defined(_POSIX_C_SOURCE) \ - && !defined(_XOPEN_SOURCE) && !defined(_GNU_SOURCE) \ - && !defined(_BSD_SOURCE) && !defined(__STRICT_ANSI__) -#define _BSD_SOURCE 1 -#define _XOPEN_SOURCE 700 -#endif - -#if __STDC_VERSION__ >= 199901L -#define __restrict restrict -#elif !defined(__GNUC__) -#define __restrict -#endif - -#if __STDC_VERSION__ >= 199901L || defined(__cplusplus) -#define __inline inline -#elif !defined(__GNUC__) -#define __inline -#endif - -#if __STDC_VERSION__ >= 201112L -#elif defined(__GNUC__) -#define _Noreturn __attribute__((__noreturn__)) -#else -#define _Noreturn -#endif - -#define __REDIR(x,y) __typeof__(x) x __asm__(#y) - -#endif diff --git a/sw/math/include/float.h b/sw/math/include/float.h deleted file mode 100644 index 713aadb90..000000000 --- a/sw/math/include/float.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef _FLOAT_H -#define _FLOAT_H - -#ifdef __cplusplus -extern "C" { -#endif - -int __flt_rounds(void); -#define FLT_ROUNDS (__flt_rounds()) - -#define FLT_RADIX 2 - -#define FLT_TRUE_MIN 1.40129846432481707092e-45F -#define FLT_MIN 1.17549435082228750797e-38F -#define FLT_MAX 3.40282346638528859812e+38F -#define FLT_EPSILON 1.1920928955078125e-07F - -#define FLT_MANT_DIG 24 -#define FLT_MIN_EXP (-125) -#define FLT_MAX_EXP 128 -#define FLT_HAS_SUBNORM 1 - -#define FLT_DIG 6 -#define FLT_DECIMAL_DIG 9 -#define FLT_MIN_10_EXP (-37) -#define FLT_MAX_10_EXP 38 - -#define DBL_TRUE_MIN 4.94065645841246544177e-324 -#define DBL_MIN 2.22507385850720138309e-308 -#define DBL_MAX 1.79769313486231570815e+308 -#define DBL_EPSILON 2.22044604925031308085e-16 - -#define DBL_MANT_DIG 53 -#define DBL_MIN_EXP (-1021) -#define DBL_MAX_EXP 1024 -#define DBL_HAS_SUBNORM 1 - -#define DBL_DIG 15 -#define DBL_DECIMAL_DIG 17 -#define DBL_MIN_10_EXP (-307) -#define DBL_MAX_10_EXP 308 - -#define LDBL_HAS_SUBNORM 1 -#define LDBL_DECIMAL_DIG DECIMAL_DIG - -#include - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/sw/math/include/math.h b/sw/math/include/math.h deleted file mode 100644 index 14f28ec8c..000000000 --- a/sw/math/include/math.h +++ /dev/null @@ -1,442 +0,0 @@ -#ifndef _MATH_H -#define _MATH_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -#define __NEED_float_t -#define __NEED_double_t -#include - -#if 100*__GNUC__+__GNUC_MINOR__ >= 303 -#define NAN __builtin_nanf("") -#define INFINITY __builtin_inff() -#else -#define NAN (0.0f/0.0f) -#define INFINITY 1e5000f -#endif - -#define HUGE_VALF INFINITY -#define HUGE_VAL ((double)INFINITY) -#define HUGE_VALL ((long double)INFINITY) - -#define MATH_ERRNO 1 -#define MATH_ERREXCEPT 2 -#define math_errhandling 2 - -#define FP_ILOGBNAN (-1-0x7fffffff) -#define FP_ILOGB0 FP_ILOGBNAN - -#define FP_NAN 0 -#define FP_INFINITE 1 -#define FP_ZERO 2 -#define FP_SUBNORMAL 3 -#define FP_NORMAL 4 - -#ifdef __FP_FAST_FMA -#define FP_FAST_FMA 1 -#endif - -#ifdef __FP_FAST_FMAF -#define FP_FAST_FMAF 1 -#endif - -#ifdef __FP_FAST_FMAL -#define FP_FAST_FMAL 1 -#endif - -int __fpclassify(double); -int __fpclassifyf(float); -int __fpclassifyl(long double); - -static __inline unsigned __FLOAT_BITS(float __f) -{ - union {float __f; unsigned __i;} __u; - __u.__f = __f; - return __u.__i; -} -static __inline unsigned long long __DOUBLE_BITS(double __f) -{ - union {double __f; unsigned long long __i;} __u; - __u.__f = __f; - return __u.__i; -} - -#define fpclassify(x) ( \ - sizeof(x) == sizeof(float) ? __fpclassifyf(x) : \ - sizeof(x) == sizeof(double) ? __fpclassify(x) : \ - __fpclassifyl(x) ) - -#define isinf(x) ( \ - sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) == 0x7f800000 : \ - sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & -1ULL>>1) == 0x7ffULL<<52 : \ - __fpclassifyl(x) == FP_INFINITE) - -#define isnan(x) ( \ - sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) > 0x7f800000 : \ - sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & -1ULL>>1) > 0x7ffULL<<52 : \ - __fpclassifyl(x) == FP_NAN) - -#define isnormal(x) ( \ - sizeof(x) == sizeof(float) ? ((__FLOAT_BITS(x)+0x00800000) & 0x7fffffff) >= 0x01000000 : \ - sizeof(x) == sizeof(double) ? ((__DOUBLE_BITS(x)+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53 : \ - __fpclassifyl(x) == FP_NORMAL) - -#define isfinite(x) ( \ - sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) < 0x7f800000 : \ - sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & -1ULL>>1) < 0x7ffULL<<52 : \ - __fpclassifyl(x) > FP_INFINITE) - -int __signbit(double); -int __signbitf(float); -int __signbitl(long double); - -#define signbit(x) ( \ - sizeof(x) == sizeof(float) ? (int)(__FLOAT_BITS(x)>>31) : \ - sizeof(x) == sizeof(double) ? (int)(__DOUBLE_BITS(x)>>63) : \ - __signbitl(x) ) - -#define isunordered(x,y) (isnan((x)) ? ((void)(y),1) : isnan((y))) - -#define __ISREL_DEF(rel, op, type) \ -static __inline int __is##rel(type __x, type __y) \ -{ return !isunordered(__x,__y) && __x op __y; } - -__ISREL_DEF(lessf, <, float_t) -__ISREL_DEF(less, <, double_t) -__ISREL_DEF(lessl, <, long double) -__ISREL_DEF(lessequalf, <=, float_t) -__ISREL_DEF(lessequal, <=, double_t) -__ISREL_DEF(lessequall, <=, long double) -__ISREL_DEF(lessgreaterf, !=, float_t) -__ISREL_DEF(lessgreater, !=, double_t) -__ISREL_DEF(lessgreaterl, !=, long double) -__ISREL_DEF(greaterf, >, float_t) -__ISREL_DEF(greater, >, double_t) -__ISREL_DEF(greaterl, >, long double) -__ISREL_DEF(greaterequalf, >=, float_t) -__ISREL_DEF(greaterequal, >=, double_t) -__ISREL_DEF(greaterequall, >=, long double) - -#define __tg_pred_2(x, y, p) ( \ - sizeof((x)+(y)) == sizeof(float) ? p##f(x, y) : \ - sizeof((x)+(y)) == sizeof(double) ? p(x, y) : \ - p##l(x, y) ) - -#define isless(x, y) __tg_pred_2(x, y, __isless) -#define islessequal(x, y) __tg_pred_2(x, y, __islessequal) -#define islessgreater(x, y) __tg_pred_2(x, y, __islessgreater) -#define isgreater(x, y) __tg_pred_2(x, y, __isgreater) -#define isgreaterequal(x, y) __tg_pred_2(x, y, __isgreaterequal) - -double acos(double); -float acosf(float); -long double acosl(long double); - -double acosh(double); -float acoshf(float); -long double acoshl(long double); - -double asin(double); -float asinf(float); -long double asinl(long double); - -double asinh(double); -float asinhf(float); -long double asinhl(long double); - -double atan(double); -float atanf(float); -long double atanl(long double); - -double atan2(double, double); -float atan2f(float, float); -long double atan2l(long double, long double); - -double atanh(double); -float atanhf(float); -long double atanhl(long double); - -double cbrt(double); -float cbrtf(float); -long double cbrtl(long double); - -double ceil(double); -float ceilf(float); -long double ceill(long double); - -double copysign(double, double); -float copysignf(float, float); -long double copysignl(long double, long double); - -double cos(double); -float cosf(float); -long double cosl(long double); - -double cosh(double); -float coshf(float); -long double coshl(long double); - -double erf(double); -float erff(float); -long double erfl(long double); - -double erfc(double); -float erfcf(float); -long double erfcl(long double); - -double exp(double); -float expf(float); -long double expl(long double); - -double exp2(double); -float exp2f(float); -long double exp2l(long double); - -double expm1(double); -float expm1f(float); -long double expm1l(long double); - -double fabs(double); -float fabsf(float); -long double fabsl(long double); - -double fdim(double, double); -float fdimf(float, float); -long double fdiml(long double, long double); - -double floor(double); -float floorf(float); -long double floorl(long double); - -double fma(double, double, double); -float fmaf(float, float, float); -long double fmal(long double, long double, long double); - -double fmax(double, double); -float fmaxf(float, float); -long double fmaxl(long double, long double); - -double fmin(double, double); -float fminf(float, float); -long double fminl(long double, long double); - -double fmod(double, double); -float fmodf(float, float); -long double fmodl(long double, long double); - -double frexp(double, int *); -float frexpf(float, int *); -long double frexpl(long double, int *); - -double hypot(double, double); -float hypotf(float, float); -long double hypotl(long double, long double); - -int ilogb(double); -int ilogbf(float); -int ilogbl(long double); - -double ldexp(double, int); -float ldexpf(float, int); -long double ldexpl(long double, int); - -double lgamma(double); -float lgammaf(float); -long double lgammal(long double); - -long long llrint(double); -long long llrintf(float); -long long llrintl(long double); - -long long llround(double); -long long llroundf(float); -long long llroundl(long double); - -double log(double); -float logf(float); -long double logl(long double); - -double log10(double); -float log10f(float); -long double log10l(long double); - -double log1p(double); -float log1pf(float); -long double log1pl(long double); - -double log2(double); -float log2f(float); -long double log2l(long double); - -double logb(double); -float logbf(float); -long double logbl(long double); - -long lrint(double); -long lrintf(float); -long lrintl(long double); - -long lround(double); -long lroundf(float); -long lroundl(long double); - -double modf(double, double *); -float modff(float, float *); -long double modfl(long double, long double *); - -double nan(const char *); -float nanf(const char *); -long double nanl(const char *); - -double nearbyint(double); -float nearbyintf(float); -long double nearbyintl(long double); - -double nextafter(double, double); -float nextafterf(float, float); -long double nextafterl(long double, long double); - -double nexttoward(double, long double); -float nexttowardf(float, long double); -long double nexttowardl(long double, long double); - -double pow(double, double); -float powf(float, float); -long double powl(long double, long double); - -double remainder(double, double); -float remainderf(float, float); -long double remainderl(long double, long double); - -double remquo(double, double, int *); -float remquof(float, float, int *); -long double remquol(long double, long double, int *); - -double rint(double); -float rintf(float); -long double rintl(long double); - -double round(double); -float roundf(float); -long double roundl(long double); - -double scalbln(double, long); -float scalblnf(float, long); -long double scalblnl(long double, long); - -double scalbn(double, int); -float scalbnf(float, int); -long double scalbnl(long double, int); - -double sin(double); -float sinf(float); -long double sinl(long double); - -double sinh(double); -float sinhf(float); -long double sinhl(long double); - -double sqrt(double); -float sqrtf(float); -long double sqrtl(long double); - -double tan(double); -float tanf(float); -long double tanl(long double); - -double tanh(double); -float tanhf(float); -long double tanhl(long double); - -double tgamma(double); -float tgammaf(float); -long double tgammal(long double); - -double trunc(double); -float truncf(float); -long double truncl(long double); - - -#if defined(_XOPEN_SOURCE) || defined(_BSD_SOURCE) -#undef MAXFLOAT -#define MAXFLOAT 3.40282346638528859812e+38F -#endif - -#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) -#define M_E 2.7182818284590452354 /* e */ -#define M_LOG2E 1.4426950408889634074 /* log_2 e */ -#define M_LOG10E 0.43429448190325182765 /* log_10 e */ -#define M_LN2 0.69314718055994530942 /* log_e 2 */ -#define M_LN10 2.30258509299404568402 /* log_e 10 */ -#define M_PI 3.14159265358979323846 /* pi */ -#define M_PI_2 1.57079632679489661923 /* pi/2 */ -#define M_PI_4 0.78539816339744830962 /* pi/4 */ -#define M_1_PI 0.31830988618379067154 /* 1/pi */ -#define M_2_PI 0.63661977236758134308 /* 2/pi */ -#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ -#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ -#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ - -extern int signgam; - -double j0(double); -double j1(double); -double jn(int, double); - -double y0(double); -double y1(double); -double yn(int, double); -#endif - -#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) -#define HUGE 3.40282346638528859812e+38F - -double drem(double, double); -float dremf(float, float); - -int finite(double); -int finitef(float); - -double scalb(double, double); -float scalbf(float, float); - -double significand(double); -float significandf(float); - -double lgamma_r(double, int*); -float lgammaf_r(float, int*); - -float j0f(float); -float j1f(float); -float jnf(int, float); - -float y0f(float); -float y1f(float); -float ynf(int, float); -#endif - -#ifdef _GNU_SOURCE -long double lgammal_r(long double, int*); - -void sincos(double, double*, double*); -void sincosf(float, float*, float*); -void sincosl(long double, long double*, long double*); - -double exp10(double); -float exp10f(float); -long double exp10l(long double); - -double pow10(double); -float pow10f(float); -long double pow10l(long double); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/sw/math/src/include/features.h b/sw/math/src/include/features.h deleted file mode 100644 index f17bd1516..000000000 --- a/sw/math/src/include/features.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef FEATURES_H -#define FEATURES_H - -#include "../../include/features.h" - -#define weak __attribute__((__weak__)) -#define hidden __attribute__((__visibility__("hidden"))) -#define weak_alias(old, new) \ - extern __typeof(old) new __attribute__((__weak__, __alias__(#old))) - -#endif diff --git a/sw/math/src/internal/libm.h b/sw/math/src/internal/libm.h deleted file mode 100644 index 72ad17d8e..000000000 --- a/sw/math/src/internal/libm.h +++ /dev/null @@ -1,274 +0,0 @@ -#ifndef _LIBM_H -#define _LIBM_H - -#include -#include -#include -#include -#include "fp_arch.h" - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN -union ldshape { - long double f; - struct { - uint64_t m; - uint16_t se; - } i; -}; -#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN -/* This is the m68k variant of 80-bit long double, and this definition only works - * on archs where the alignment requirement of uint64_t is <= 4. */ -union ldshape { - long double f; - struct { - uint16_t se; - uint16_t pad; - uint64_t m; - } i; -}; -#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN -union ldshape { - long double f; - struct { - uint64_t lo; - uint32_t mid; - uint16_t top; - uint16_t se; - } i; - struct { - uint64_t lo; - uint64_t hi; - } i2; -}; -#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN -union ldshape { - long double f; - struct { - uint16_t se; - uint16_t top; - uint32_t mid; - uint64_t lo; - } i; - struct { - uint64_t hi; - uint64_t lo; - } i2; -}; -#else -#error Unsupported long double representation -#endif - -/* Support non-nearest rounding mode. */ -#define WANT_ROUNDING 1 -/* Support signaling NaNs. */ -#define WANT_SNAN 0 - -#if WANT_SNAN -#error SNaN is unsupported -#else -#define issignalingf_inline(x) 0 -#define issignaling_inline(x) 0 -#endif - -#ifndef TOINT_INTRINSICS -#define TOINT_INTRINSICS 0 -#endif - -#if TOINT_INTRINSICS -/* Round x to nearest int in all rounding modes, ties have to be rounded - consistently with converttoint so the results match. If the result - would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ -static double_t roundtoint(double_t); - -/* Convert x to nearest int in all rounding modes, ties have to be rounded - consistently with roundtoint. If the result is not representible in an - int32_t then the semantics is unspecified. */ -static int32_t converttoint(double_t); -#endif - -/* Helps static branch prediction so hot path can be better optimized. */ -#ifdef __GNUC__ -#define predict_true(x) __builtin_expect(!!(x), 1) -#define predict_false(x) __builtin_expect(x, 0) -#else -#define predict_true(x) (x) -#define predict_false(x) (x) -#endif - -/* Evaluate an expression as the specified type. With standard excess - precision handling a type cast or assignment is enough (with - -ffloat-store an assignment is required, in old compilers argument - passing and return statement may not drop excess precision). */ - -static inline float eval_as_float(float x) -{ - float y = x; - return y; -} - -static inline double eval_as_double(double x) -{ - double y = x; - return y; -} - -/* fp_barrier returns its input, but limits code transformations - as if it had a side-effect (e.g. observable io) and returned - an arbitrary value. */ - -#ifndef fp_barrierf -#define fp_barrierf fp_barrierf -static inline float fp_barrierf(float x) -{ - volatile float y = x; - return y; -} -#endif - -#ifndef fp_barrier -#define fp_barrier fp_barrier -static inline double fp_barrier(double x) -{ - volatile double y = x; - return y; -} -#endif - -#ifndef fp_barrierl -#define fp_barrierl fp_barrierl -static inline long double fp_barrierl(long double x) -{ - volatile long double y = x; - return y; -} -#endif - -/* fp_force_eval ensures that the input value is computed when that's - otherwise unused. To prevent the constant folding of the input - expression, an additional fp_barrier may be needed or a compilation - mode that does so (e.g. -frounding-math in gcc). Then it can be - used to evaluate an expression for its fenv side-effects only. */ - -#ifndef fp_force_evalf -#define fp_force_evalf fp_force_evalf -static inline void fp_force_evalf(float x) -{ - volatile float y; - y = x; -} -#endif - -#ifndef fp_force_eval -#define fp_force_eval fp_force_eval -static inline void fp_force_eval(double x) -{ - volatile double y; - y = x; -} -#endif - -#ifndef fp_force_evall -#define fp_force_evall fp_force_evall -static inline void fp_force_evall(long double x) -{ - volatile long double y; - y = x; -} -#endif - -#define FORCE_EVAL(x) do { \ - if (sizeof(x) == sizeof(float)) { \ - fp_force_evalf(x); \ - } else if (sizeof(x) == sizeof(double)) { \ - fp_force_eval(x); \ - } else { \ - fp_force_evall(x); \ - } \ -} while(0) - -#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i -#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f -#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i -#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f - -#define EXTRACT_WORDS(hi,lo,d) \ -do { \ - uint64_t __u = asuint64(d); \ - (hi) = __u >> 32; \ - (lo) = (uint32_t)__u; \ -} while (0) - -#define GET_HIGH_WORD(hi,d) \ -do { \ - (hi) = asuint64(d) >> 32; \ -} while (0) - -#define GET_LOW_WORD(lo,d) \ -do { \ - (lo) = (uint32_t)asuint64(d); \ -} while (0) - -#define INSERT_WORDS(d,hi,lo) \ -do { \ - (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \ -} while (0) - -#define SET_HIGH_WORD(d,hi) \ - INSERT_WORDS(d, hi, (uint32_t)asuint64(d)) - -#define SET_LOW_WORD(d,lo) \ - INSERT_WORDS(d, asuint64(d)>>32, lo) - -#define GET_FLOAT_WORD(w,d) \ -do { \ - (w) = asuint(d); \ -} while (0) - -#define SET_FLOAT_WORD(d,w) \ -do { \ - (d) = asfloat(w); \ -} while (0) - -hidden int __rem_pio2_large(double*,double*,int,int,int); - -hidden int __rem_pio2(double,double*); -hidden double __sin(double,double,int); -hidden double __cos(double,double); -hidden double __tan(double,double,int); -hidden double __expo2(double,double); - -hidden int __rem_pio2f(float,double*); -hidden float __sindf(double); -hidden float __cosdf(double); -hidden float __tandf(double,int); -hidden float __expo2f(float,float); - -hidden int __rem_pio2l(long double, long double *); -hidden long double __sinl(long double, long double, int); -hidden long double __cosl(long double, long double); -hidden long double __tanl(long double, long double, int); - -hidden long double __polevll(long double, const long double *, int); -hidden long double __p1evll(long double, const long double *, int); - -extern int __signgam; -hidden double __lgamma_r(double, int *); -hidden float __lgammaf_r(float, int *); - -/* error handling functions */ -hidden float __math_xflowf(uint32_t, float); -hidden float __math_uflowf(uint32_t); -hidden float __math_oflowf(uint32_t); -hidden float __math_divzerof(uint32_t); -hidden float __math_invalidf(float); -hidden double __math_xflow(uint32_t, double); -hidden double __math_uflow(uint32_t); -hidden double __math_oflow(uint32_t); -hidden double __math_divzero(uint32_t); -hidden double __math_invalid(double); -#if LDBL_MANT_DIG != DBL_MANT_DIG -hidden long double __math_invalidl(long double); -#endif - -#endif diff --git a/sw/math/src/math/__math_divzero.c b/sw/math/src/math/__math_divzero.c deleted file mode 100644 index 59d213500..000000000 --- a/sw/math/src/math/__math_divzero.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -double __math_divzero(uint32_t sign) -{ - return fp_barrier(sign ? -1.0 : 1.0) / 0.0; -} diff --git a/sw/math/src/math/__math_invalid.c b/sw/math/src/math/__math_invalid.c deleted file mode 100644 index 177404900..000000000 --- a/sw/math/src/math/__math_invalid.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -double __math_invalid(double x) -{ - return (x - x) / (x - x); -} diff --git a/sw/math/src/math/__math_invalidf.c b/sw/math/src/math/__math_invalidf.c deleted file mode 100644 index 357d4b121..000000000 --- a/sw/math/src/math/__math_invalidf.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -float __math_invalidf(float x) -{ - return (x - x) / (x - x); -} diff --git a/sw/math/src/math/__math_invalidl.c b/sw/math/src/math/__math_invalidl.c deleted file mode 100644 index 1fca99de4..000000000 --- a/sw/math/src/math/__math_invalidl.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include "libm.h" - -#if LDBL_MANT_DIG != DBL_MANT_DIG -long double __math_invalidl(long double x) -{ - return (x - x) / (x - x); -} -#endif diff --git a/sw/math/src/math/__math_oflow.c b/sw/math/src/math/__math_oflow.c deleted file mode 100644 index c85dbf982..000000000 --- a/sw/math/src/math/__math_oflow.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -double __math_oflow(uint32_t sign) -{ - return __math_xflow(sign, 0x1p769); -} diff --git a/sw/math/src/math/__math_oflowf.c b/sw/math/src/math/__math_oflowf.c deleted file mode 100644 index fa7d06208..000000000 --- a/sw/math/src/math/__math_oflowf.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -float __math_oflowf(uint32_t sign) -{ - return __math_xflowf(sign, 0x1p97f); -} diff --git a/sw/math/src/math/__math_uflow.c b/sw/math/src/math/__math_uflow.c deleted file mode 100644 index b90594aee..000000000 --- a/sw/math/src/math/__math_uflow.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -double __math_uflow(uint32_t sign) -{ - return __math_xflow(sign, 0x1p-767); -} diff --git a/sw/math/src/math/__math_uflowf.c b/sw/math/src/math/__math_uflowf.c deleted file mode 100644 index 94d50f2bf..000000000 --- a/sw/math/src/math/__math_uflowf.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -float __math_uflowf(uint32_t sign) -{ - return __math_xflowf(sign, 0x1p-95f); -} diff --git a/sw/math/src/math/__math_xflow.c b/sw/math/src/math/__math_xflow.c deleted file mode 100644 index 744203c4c..000000000 --- a/sw/math/src/math/__math_xflow.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -double __math_xflow(uint32_t sign, double y) -{ - return eval_as_double(fp_barrier(sign ? -y : y) * y); -} diff --git a/sw/math/src/math/__math_xflowf.c b/sw/math/src/math/__math_xflowf.c deleted file mode 100644 index f2c84784f..000000000 --- a/sw/math/src/math/__math_xflowf.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "libm.h" - -float __math_xflowf(uint32_t sign, float y) -{ - return eval_as_float(fp_barrierf(sign ? -y : y) * y); -} diff --git a/sw/math/src/math/ceil.c b/sw/math/src/math/ceil.c deleted file mode 100644 index b13e6f2d6..000000000 --- a/sw/math/src/math/ceil.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "libm.h" - -#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 -#define EPS DBL_EPSILON -#elif FLT_EVAL_METHOD==2 -#define EPS LDBL_EPSILON -#endif -static const double_t toint = 1/EPS; - -double ceil(double x) -{ - union {double f; uint64_t i;} u = {x}; - int e = u.i >> 52 & 0x7ff; - double_t y; - - if (e >= 0x3ff+52 || x == 0) - return x; - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - if (u.i >> 63) - y = x - toint + toint - x; - else - y = x + toint - toint - x; - /* special case because of non-nearest rounding modes */ - if (e <= 0x3ff-1) { - FORCE_EVAL(y); - return u.i >> 63 ? -0.0 : 1; - } - if (y < 0) - return x + y + 1; - return x + y; -} diff --git a/sw/math/src/math/ceilf.c b/sw/math/src/math/ceilf.c deleted file mode 100644 index 869835f39..000000000 --- a/sw/math/src/math/ceilf.c +++ /dev/null @@ -1,27 +0,0 @@ -#include "libm.h" - -float ceilf(float x) -{ - union {float f; uint32_t i;} u = {x}; - int e = (int)(u.i >> 23 & 0xff) - 0x7f; - uint32_t m; - - if (e >= 23) - return x; - if (e >= 0) { - m = 0x007fffff >> e; - if ((u.i & m) == 0) - return x; - FORCE_EVAL(x + 0x1p120f); - if (u.i >> 31 == 0) - u.i += m; - u.i &= ~m; - } else { - FORCE_EVAL(x + 0x1p120f); - if (u.i >> 31) - u.f = -0.0; - else if (u.i << 1) - u.f = 1.0; - } - return u.f; -} diff --git a/sw/math/src/math/ceill.c b/sw/math/src/math/ceill.c deleted file mode 100644 index 60a83020d..000000000 --- a/sw/math/src/math/ceill.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "libm.h" - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -long double ceill(long double x) -{ - return ceil(x); -} -#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 - -static const long double toint = 1/LDBL_EPSILON; - -long double ceill(long double x) -{ - union ldshape u = {x}; - int e = u.i.se & 0x7fff; - long double y; - - if (e >= 0x3fff+LDBL_MANT_DIG-1 || x == 0) - return x; - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - if (u.i.se >> 15) - y = x - toint + toint - x; - else - y = x + toint - toint - x; - /* special case because of non-nearest rounding modes */ - if (e <= 0x3fff-1) { - FORCE_EVAL(y); - return u.i.se >> 15 ? -0.0 : 1; - } - if (y < 0) - return x + y + 1; - return x + y; -} -#endif diff --git a/sw/math/src/math/exp2f_data.c b/sw/math/src/math/exp2f_data.c deleted file mode 100644 index be324727f..000000000 --- a/sw/math/src/math/exp2f_data.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Shared data between expf, exp2f and powf. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#include "exp2f_data.h" - -#define N (1 << EXP2F_TABLE_BITS) - -const struct exp2f_data __exp2f_data = { - /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) - used for computing 2^(k/N) for an int |k| < 150 N as - double(tab[k%N] + (k << 52-BITS)) */ - .tab = { -0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, -0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, -0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, -0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, -0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, -0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, -0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, -0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, - }, - .shift_scaled = 0x1.8p+52 / N, - .poly = { - 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1, - }, - .shift = 0x1.8p+52, - .invln2_scaled = 0x1.71547652b82fep+0 * N, - .poly_scaled = { - 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, - }, -}; diff --git a/sw/math/src/math/exp2f_data.h b/sw/math/src/math/exp2f_data.h deleted file mode 100644 index fe744f15b..000000000 --- a/sw/math/src/math/exp2f_data.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ -#ifndef _EXP2F_DATA_H -#define _EXP2F_DATA_H - -#include -#include - -/* Shared between expf, exp2f and powf. */ -#define EXP2F_TABLE_BITS 5 -#define EXP2F_POLY_ORDER 3 -extern hidden const struct exp2f_data { - uint64_t tab[1 << EXP2F_TABLE_BITS]; - double shift_scaled; - double poly[EXP2F_POLY_ORDER]; - double shift; - double invln2_scaled; - double poly_scaled[EXP2F_POLY_ORDER]; -} __exp2f_data; - -#endif diff --git a/sw/math/src/math/expf.c b/sw/math/src/math/expf.c deleted file mode 100644 index f9fbf8e72..000000000 --- a/sw/math/src/math/expf.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Single-precision e^x function. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include "libm.h" -#include "exp2f_data.h" - -/* -EXP2F_TABLE_BITS = 5 -EXP2F_POLY_ORDER = 3 - -ULP error: 0.502 (nearest rounding.) -Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) -Wrong count: 170635 (all nearest rounding wrong results with fma.) -Non-nearest ULP error: 1 (rounded ULP error) -*/ - -#define N (1 << EXP2F_TABLE_BITS) -#define InvLn2N __exp2f_data.invln2_scaled -#define T __exp2f_data.tab -#define C __exp2f_data.poly_scaled - -static inline uint32_t top12(float x) -{ - return asuint(x) >> 20; -} - -float expf(float x) -{ - uint32_t abstop; - uint64_t ki, t; - double_t kd, xd, z, r, r2, y, s; - - xd = (double_t)x; - abstop = top12(x) & 0x7ff; - if (predict_false(abstop >= top12(88.0f))) { - /* |x| >= 88 or x is nan. */ - if (asuint(x) == asuint(-INFINITY)) - return 0.0f; - if (abstop >= top12(INFINITY)) - return x + x; - if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ - return __math_oflowf(0); - if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ - return __math_uflowf(0); - } - - /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ - z = InvLn2N * xd; - - /* Round and convert z to int, the result is in [-150*N, 128*N] and - ideally ties-to-even rule is used, otherwise the magnitude of r - can be bigger which gives larger approximation error. */ -#if TOINT_INTRINSICS - kd = roundtoint(z); - ki = converttoint(z); -#else -# define SHIFT __exp2f_data.shift - kd = eval_as_double(z + SHIFT); - ki = asuint64(kd); - kd -= SHIFT; -#endif - r = z - kd; - - /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ - t = T[ki % N]; - t += ki << (52 - EXP2F_TABLE_BITS); - s = asdouble(t); - z = C[0] * r + C[1]; - r2 = r * r; - y = C[2] * r + 1; - y = z * r2 + y; - y = y * s; - return eval_as_float(y); -} diff --git a/sw/math/src/math/expm1.c b/sw/math/src/math/expm1.c deleted file mode 100644 index ac1e61e4f..000000000 --- a/sw/math/src/math/expm1.c +++ /dev/null @@ -1,201 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* expm1(x) - * Returns exp(x)-1, the exponential of x minus 1. - * - * Method - * 1. Argument reduction: - * Given x, find r and integer k such that - * - * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 - * - * Here a correction term c will be computed to compensate - * the error in r when rounded to a floating-point number. - * - * 2. Approximating expm1(r) by a special rational function on - * the interval [0,0.34658]: - * Since - * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ... - * we define R1(r*r) by - * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r) - * That is, - * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) - * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) - * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ... - * We use a special Remez algorithm on [0,0.347] to generate - * a polynomial of degree 5 in r*r to approximate R1. The - * maximum error of this polynomial approximation is bounded - * by 2**-61. In other words, - * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 - * where Q1 = -1.6666666666666567384E-2, - * Q2 = 3.9682539681370365873E-4, - * Q3 = -9.9206344733435987357E-6, - * Q4 = 2.5051361420808517002E-7, - * Q5 = -6.2843505682382617102E-9; - * z = r*r, - * with error bounded by - * | 5 | -61 - * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 - * | | - * - * expm1(r) = exp(r)-1 is then computed by the following - * specific way which minimize the accumulation rounding error: - * 2 3 - * r r [ 3 - (R1 + R1*r/2) ] - * expm1(r) = r + --- + --- * [--------------------] - * 2 2 [ 6 - r*(3 - R1*r/2) ] - * - * To compensate the error in the argument reduction, we use - * expm1(r+c) = expm1(r) + c + expm1(r)*c - * ~ expm1(r) + c + r*c - * Thus c+r*c will be added in as the correction terms for - * expm1(r+c). Now rearrange the term to avoid optimization - * screw up: - * ( 2 2 ) - * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) - * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) - * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) - * ( ) - * - * = r - E - * 3. Scale back to obtain expm1(x): - * From step 1, we have - * expm1(x) = either 2^k*[expm1(r)+1] - 1 - * = or 2^k*[expm1(r) + (1-2^-k)] - * 4. Implementation notes: - * (A). To save one multiplication, we scale the coefficient Qi - * to Qi*2^i, and replace z by (x^2)/2. - * (B). To achieve maximum accuracy, we compute expm1(x) by - * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) - * (ii) if k=0, return r-E - * (iii) if k=-1, return 0.5*(r-E)-0.5 - * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) - * else return 1.0+2.0*(r-E); - * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1) - * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else - * (vii) return 2^k(1-((E+2^-k)-r)) - * - * Special cases: - * expm1(INF) is INF, expm1(NaN) is NaN; - * expm1(-INF) is -1, and - * for finite argument, only expm1(0)=0 is exact. - * - * Accuracy: - * according to an error analysis, the error is always less than - * 1 ulp (unit in the last place). - * - * Misc. info. - * For IEEE double - * if x > 7.09782712893383973096e+02 then expm1(x) overflow - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ - -#include "libm.h" - -static const double -o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ -ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ -ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ -invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ -/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ -Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */ -Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ -Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ -Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ -Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ - -double expm1(double x) -{ - double_t y,hi,lo,c,t,e,hxs,hfx,r1,twopk; - union {double f; uint64_t i;} u = {x}; - uint32_t hx = u.i>>32 & 0x7fffffff; - int k, sign = u.i>>63; - - /* filter out huge and non-finite argument */ - if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */ - if (isnan(x)) - return x; - if (sign) - return -1; - if (x > o_threshold) { - x *= 0x1p1023; - return x; - } - } - - /* argument reduction */ - if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ - if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ - if (!sign) { - hi = x - ln2_hi; - lo = ln2_lo; - k = 1; - } else { - hi = x + ln2_hi; - lo = -ln2_lo; - k = -1; - } - } else { - k = invln2*x + (sign ? -0.5 : 0.5); - t = k; - hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ - lo = t*ln2_lo; - } - x = hi-lo; - c = (hi-x)-lo; - } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */ - if (hx < 0x00100000) - FORCE_EVAL((float)x); - return x; - } else - k = 0; - - /* x is now in primary range */ - hfx = 0.5*x; - hxs = x*hfx; - r1 = 1.0+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); - t = 3.0-r1*hfx; - e = hxs*((r1-t)/(6.0 - x*t)); - if (k == 0) /* c is 0 */ - return x - (x*e-hxs); - e = x*(e-c) - c; - e -= hxs; - /* exp(x) ~ 2^k (x_reduced - e + 1) */ - if (k == -1) - return 0.5*(x-e) - 0.5; - if (k == 1) { - if (x < -0.25) - return -2.0*(e-(x+0.5)); - return 1.0+2.0*(x-e); - } - u.i = (uint64_t)(0x3ff + k)<<52; /* 2^k */ - twopk = u.f; - if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */ - y = x - e + 1.0; - if (k == 1024) - y = y*2.0*0x1p1023; - else - y = y*twopk; - return y - 1.0; - } - u.i = (uint64_t)(0x3ff - k)<<52; /* 2^-k */ - if (k < 20) - y = (x-e+(1-u.f))*twopk; - else - y = (x-(e+u.f)+1)*twopk; - return y; -} diff --git a/sw/math/src/math/log2.c b/sw/math/src/math/log2.c deleted file mode 100644 index 1276ed4e3..000000000 --- a/sw/math/src/math/log2.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Double-precision log2(x) function. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include "libm.h" -#include "log2_data.h" - -#define T __log2_data.tab -#define T2 __log2_data.tab2 -#define B __log2_data.poly1 -#define A __log2_data.poly -#define InvLn2hi __log2_data.invln2hi -#define InvLn2lo __log2_data.invln2lo -#define N (1 << LOG2_TABLE_BITS) -#define OFF 0x3fe6000000000000 - -/* Top 16 bits of a double. */ -static inline uint32_t top16(double x) -{ - return asuint64(x) >> 48; -} - -double log2(double x) -{ - double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; - uint64_t ix, iz, tmp; - uint32_t top; - int k, i; - - ix = asuint64(x); - top = top16(x); -#define LO asuint64(1.0 - 0x1.5b51p-5) -#define HI asuint64(1.0 + 0x1.6ab2p-5) - if (predict_false(ix - LO < HI - LO)) { - /* Handle close to 1.0 inputs separately. */ - /* Fix sign of zero with downward rounding when x==1. */ - if (WANT_ROUNDING && predict_false(ix == asuint64(1.0))) - return 0; - r = x - 1.0; -#if __FP_FAST_FMA - hi = r * InvLn2hi; - lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi); -#else - double_t rhi, rlo; - rhi = asdouble(asuint64(r) & -1ULL << 32); - rlo = r - rhi; - hi = rhi * InvLn2hi; - lo = rlo * InvLn2hi + r * InvLn2lo; -#endif - r2 = r * r; /* rounding error: 0x1p-62. */ - r4 = r2 * r2; - /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ - p = r2 * (B[0] + r * B[1]); - y = hi + p; - lo += hi - y + p; - lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + - r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); - y += lo; - return eval_as_double(y); - } - if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) { - /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) - return __math_divzero(1); - if (ix == asuint64(INFINITY)) /* log(inf) == inf. */ - return x; - if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) - return __math_invalid(x); - /* x is subnormal, normalize it. */ - ix = asuint64(x * 0x1p52); - ix -= 52ULL << 52; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; - k = (int64_t)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - invc = T[i].invc; - logc = T[i].logc; - z = asdouble(iz); - kd = (double_t)k; - - /* log2(x) = log2(z/c) + log2(c) + k. */ - /* r ~= z/c - 1, |r| < 1/(2*N). */ -#if __FP_FAST_FMA - /* rounding error: 0x1p-55/N. */ - r = __builtin_fma(z, invc, -1.0); - t1 = r * InvLn2hi; - t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1); -#else - double_t rhi, rlo; - /* rounding error: 0x1p-55/N + 0x1p-65. */ - r = (z - T2[i].chi - T2[i].clo) * invc; - rhi = asdouble(asuint64(r) & -1ULL << 32); - rlo = r - rhi; - t1 = rhi * InvLn2hi; - t2 = rlo * InvLn2hi + r * InvLn2lo; -#endif - - /* hi + lo = r/ln2 + log2(c) + k. */ - t3 = kd + logc; - hi = t3 + t1; - lo = t3 - hi + t1 + t2; - - /* log2(r+1) = r/ln2 + r^2*poly(r). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; /* rounding error: 0x1p-54/N^2. */ - r4 = r2 * r2; - /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). - ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ - p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); - y = lo + r2 * p + hi; - return eval_as_double(y); -} diff --git a/sw/math/src/math/log2_data.c b/sw/math/src/math/log2_data.c deleted file mode 100644 index 3dd1ca514..000000000 --- a/sw/math/src/math/log2_data.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Data for log2. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#include "log2_data.h" - -#define N (1 << LOG2_TABLE_BITS) - -const struct log2_data __log2_data = { -// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0 -.invln2hi = 0x1.7154765200000p+0, -.invln2lo = 0x1.705fc2eefa200p-33, -.poly1 = { -// relative error: 0x1.2fad8188p-63 -// in -0x1.5b51p-5 0x1.6ab2p-5 --0x1.71547652b82fep-1, -0x1.ec709dc3a03f7p-2, --0x1.71547652b7c3fp-2, -0x1.2776c50f05be4p-2, --0x1.ec709dd768fe5p-3, -0x1.a61761ec4e736p-3, --0x1.7153fbc64a79bp-3, -0x1.484d154f01b4ap-3, --0x1.289e4a72c383cp-3, -0x1.0b32f285aee66p-3, -}, -.poly = { -// relative error: 0x1.a72c2bf8p-58 -// abs error: 0x1.67a552c8p-66 -// in -0x1.f45p-8 0x1.f45p-8 --0x1.71547652b8339p-1, -0x1.ec709dc3a04bep-2, --0x1.7154764702ffbp-2, -0x1.2776c50034c48p-2, --0x1.ec7b328ea92bcp-3, -0x1.a6225e117f92ep-3, -}, -/* Algorithm: - - x = 2^k z - log2(x) = k + log2(c) + log2(z/c) - log2(z/c) = poly(z/c - 1) - -where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls -into the ith one, then table entries are computed as - - tab[i].invc = 1/c - tab[i].logc = (double)log2(c) - tab2[i].chi = (double)c - tab2[i].clo = (double)(c - (double)c) - -where c is near the center of the subinterval and is chosen by trying +-2^29 -floating point invc candidates around 1/center and selecting one for which - - 1) the rounding error in 0x1.8p10 + logc is 0, - 2) the rounding error in z - chi - clo is < 0x1p-64 and - 3) the rounding error in (double)log2(c) is minimized (< 0x1p-68). - -Note: 1) ensures that k + logc can be computed without rounding error, 2) -ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a -single rounding error when there is no fast fma for z*invc - 1, 3) ensures -that logc + poly(z/c - 1) has small error, however near x == 1 when -|log2(x)| < 0x1p-4, this is not enough so that is special cased. */ -.tab = { -{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, -{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1}, -{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, -{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2}, -{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2}, -{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2}, -{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2}, -{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2}, -{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2}, -{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2}, -{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2}, -{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2}, -{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2}, -{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2}, -{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2}, -{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2}, -{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2}, -{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2}, -{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2}, -{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2}, -{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3}, -{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3}, -{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3}, -{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3}, -{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3}, -{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3}, -{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3}, -{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3}, -{0x1.19453847f2200p+0, -0x1.162595afdc000p-3}, -{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4}, -{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4}, -{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4}, -{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4}, -{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4}, -{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4}, -{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5}, -{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5}, -{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6}, -{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6}, -{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8}, -{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7}, -{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5}, -{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5}, -{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4}, -{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4}, -{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4}, -{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3}, -{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3}, -{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3}, -{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3}, -{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3}, -{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3}, -{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2}, -{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2}, -{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2}, -{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2}, -{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2}, -{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2}, -{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2}, -{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2}, -{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2}, -{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2}, -{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2}, -{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}, -}, -#if !__FP_FAST_FMA -.tab2 = { -{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, -{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57}, -{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55}, -{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55}, -{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55}, -{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56}, -{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56}, -{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57}, -{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55}, -{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57}, -{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55}, -{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55}, -{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56}, -{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56}, -{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56}, -{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55}, -{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57}, -{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55}, -{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55}, -{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58}, -{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55}, -{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58}, -{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56}, -{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56}, -{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57}, -{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56}, -{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56}, -{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55}, -{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58}, -{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56}, -{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55}, -{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56}, -{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55}, -{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56}, -{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55}, -{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55}, -{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55}, -{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59}, -{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58}, -{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55}, -{0x1.0200004292367p+0, 0x1.b7ff365324681p-54}, -{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55}, -{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58}, -{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54}, -{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55}, -{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54}, -{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54}, -{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54}, -{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55}, -{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55}, -{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56}, -{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54}, -{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56}, -{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54}, -{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56}, -{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54}, -{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56}, -{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55}, -{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55}, -{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56}, -{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54}, -{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55}, -{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55}, -{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}, -}, -#endif -}; diff --git a/sw/math/src/math/log2_data.h b/sw/math/src/math/log2_data.h deleted file mode 100644 index 276a786d1..000000000 --- a/sw/math/src/math/log2_data.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ -#ifndef _LOG2_DATA_H -#define _LOG2_DATA_H - -#include - -#define LOG2_TABLE_BITS 6 -#define LOG2_POLY_ORDER 7 -#define LOG2_POLY1_ORDER 11 -extern hidden const struct log2_data { - double invln2hi; - double invln2lo; - double poly[LOG2_POLY_ORDER - 1]; - double poly1[LOG2_POLY1_ORDER - 1]; - struct { - double invc, logc; - } tab[1 << LOG2_TABLE_BITS]; -#if !__FP_FAST_FMA - struct { - double chi, clo; - } tab2[1 << LOG2_TABLE_BITS]; -#endif -} __log2_data; - -#endif diff --git a/sw/math/src/math/log2f.c b/sw/math/src/math/log2f.c deleted file mode 100644 index c368f88f3..000000000 --- a/sw/math/src/math/log2f.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Single-precision log2 function. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include "libm.h" -#include "log2f_data.h" - -/* -LOG2F_TABLE_BITS = 4 -LOG2F_POLY_ORDER = 4 - -ULP error: 0.752 (nearest rounding.) -Relative error: 1.9 * 2^-26 (before rounding.) -*/ - -#define N (1 << LOG2F_TABLE_BITS) -#define T __log2f_data.tab -#define A __log2f_data.poly -#define OFF 0x3f330000 - -float log2f(float x) -{ - double_t z, r, r2, p, y, y0, invc, logc; - uint32_t ix, iz, top, tmp; - int k, i; - - ix = asuint(x); - /* Fix sign of zero with downward rounding when x==1. */ - if (WANT_ROUNDING && predict_false(ix == 0x3f800000)) - return 0; - if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { - /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) - return __math_divzerof(1); - if (ix == 0x7f800000) /* log2(inf) == inf. */ - return x; - if ((ix & 0x80000000) || ix * 2 >= 0xff000000) - return __math_invalidf(x); - /* x is subnormal, normalize it. */ - ix = asuint(x * 0x1p23f); - ix -= 23 << 23; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N; - top = tmp & 0xff800000; - iz = ix - top; - k = (int32_t)tmp >> 23; /* arithmetic shift */ - invc = T[i].invc; - logc = T[i].logc; - z = (double_t)asfloat(iz); - - /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ - r = z * invc - 1; - y0 = logc + (double_t)k; - - /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ - r2 = r * r; - y = A[1] * r + A[2]; - y = A[0] * r2 + y; - p = A[3] * r + y0; - y = y * r2 + p; - return eval_as_float(y); -} diff --git a/sw/math/src/math/log2f_data.c b/sw/math/src/math/log2f_data.c deleted file mode 100644 index 24e450f1e..000000000 --- a/sw/math/src/math/log2f_data.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Data definition for log2f. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -#include "log2f_data.h" - -const struct log2f_data __log2f_data = { - .tab = { - { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, - { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 }, - { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 }, - { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 }, - { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 }, - { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 }, - { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 }, - { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 }, - { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 }, - { 0x1p+0, 0x0p+0 }, - { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 }, - { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 }, - { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 }, - { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 }, - { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 }, - { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }, - }, - .poly = { - -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1, - 0x1.715475f35c8b8p0, - } -}; diff --git a/sw/math/src/math/log2f_data.h b/sw/math/src/math/log2f_data.h deleted file mode 100644 index 4fa489560..000000000 --- a/sw/math/src/math/log2f_data.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ -#ifndef _LOG2F_DATA_H -#define _LOG2F_DATA_H - -#include - -#define LOG2F_TABLE_BITS 4 -#define LOG2F_POLY_ORDER 4 -extern hidden const struct log2f_data { - struct { - double invc, logc; - } tab[1 << LOG2F_TABLE_BITS]; - double poly[LOG2F_POLY_ORDER]; -} __log2f_data; - -#endif diff --git a/sw/math/src/math/sqrt.c b/sw/math/src/math/sqrt.c deleted file mode 100644 index 5ba265596..000000000 --- a/sw/math/src/math/sqrt.c +++ /dev/null @@ -1,158 +0,0 @@ -#include -#include -#include "libm.h" -#include "sqrt_data.h" - -#define FENV_SUPPORT 1 - -/* returns a*b*2^-32 - e, with error 0 <= e < 1. */ -static inline uint32_t mul32(uint32_t a, uint32_t b) -{ - return (uint64_t)a*b >> 32; -} - -/* returns a*b*2^-64 - e, with error 0 <= e < 3. */ -static inline uint64_t mul64(uint64_t a, uint64_t b) -{ - uint64_t ahi = a>>32; - uint64_t alo = a&0xffffffff; - uint64_t bhi = b>>32; - uint64_t blo = b&0xffffffff; - return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32); -} - -double sqrt(double x) -{ - uint64_t ix, top, m; - - /* special case handling. */ - ix = asuint64(x); - top = ix >> 52; - if (predict_false(top - 0x001 >= 0x7ff - 0x001)) { - /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) - return x; - if (ix == 0x7ff0000000000000) - return x; - if (ix > 0x7ff0000000000000) - return __math_invalid(x); - /* x is subnormal, normalize it. */ - ix = asuint64(x * 0x1p52); - top = ix >> 52; - top -= 52; - } - - /* argument reduction: - x = 4^e m; with integer e, and m in [1, 4) - m: fixed point representation [2.62] - 2^e is the exponent part of the result. */ - int even = top & 1; - m = (ix << 11) | 0x8000000000000000; - if (even) m >>= 1; - top = (top + 0x3ff) >> 1; - - /* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4) - - initial estimate: - 7bit table lookup (1bit exponent and 6bit significand). - - iterative approximation: - using 2 goldschmidt iterations with 32bit int arithmetics - and a final iteration with 64bit int arithmetics. - - details: - - the relative error (e = r0 sqrt(m)-1) of a linear estimate - (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best, - a table lookup is faster and needs one less iteration - 6 bit lookup table (128b) gives |e| < 0x1.f9p-8 - 7 bit lookup table (256b) gives |e| < 0x1.fdp-9 - for single and double prec 6bit is enough but for quad - prec 7bit is needed (or modified iterations). to avoid - one more iteration >=13bit table would be needed (16k). - - a newton-raphson iteration for r is - w = r*r - u = 3 - m*w - r = r*u/2 - can use a goldschmidt iteration for s at the end or - s = m*r - - first goldschmidt iteration is - s = m*r - u = 3 - s*r - r = r*u/2 - s = s*u/2 - next goldschmidt iteration is - u = 3 - s*r - r = r*u/2 - s = s*u/2 - and at the end r is not computed only s. - - they use the same amount of operations and converge at the - same quadratic rate, i.e. if - r1 sqrt(m) - 1 = e, then - r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3 - the advantage of goldschmidt is that the mul for s and r - are independent (computed in parallel), however it is not - "self synchronizing": it only uses the input m in the - first iteration so rounding errors accumulate. at the end - or when switching to larger precision arithmetics rounding - errors dominate so the first iteration should be used. - - the fixed point representations are - m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30 - and after switching to 64 bit - m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */ - - static const uint64_t three = 0xc0000000; - uint64_t r, s, d, u, i; - - i = (ix >> 46) % 128; - r = (uint32_t)__rsqrt_tab[i] << 16; - /* |r sqrt(m) - 1| < 0x1.fdp-9 */ - s = mul32(m>>32, r); - /* |s/sqrt(m) - 1| < 0x1.fdp-9 */ - d = mul32(s, r); - u = three - d; - r = mul32(r, u) << 1; - /* |r sqrt(m) - 1| < 0x1.7bp-16 */ - s = mul32(s, u) << 1; - /* |s/sqrt(m) - 1| < 0x1.7bp-16 */ - d = mul32(s, r); - u = three - d; - r = mul32(r, u) << 1; - /* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */ - r = r << 32; - s = mul64(m, r); - d = mul64(s, r); - u = (three<<32) - d; - s = mul64(s, u); /* repr: 3.61 */ - /* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */ - s = (s - 2) >> 9; /* repr: 12.52 */ - /* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */ - - /* s < sqrt(m) < s + 0x1.09p-52, - compute nearest rounded result: - the nearest result to 52 bits is either s or s+0x1p-52, - we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */ - uint64_t d0, d1, d2; - double y, t; - d0 = (m << 42) - s*s; - d1 = s - d0; - d2 = d1 + s + 1; - s += d1 >> 63; - s &= 0x000fffffffffffff; - s |= top << 52; - y = asdouble(s); - if (FENV_SUPPORT) { - /* handle rounding modes and inexact exception: - only (s+1)^2 == 2^42 m case is exact otherwise - add a tiny value to cause the fenv effects. */ - uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000; - tiny |= (d1^d2) & 0x8000000000000000; - t = asdouble(tiny); - y = eval_as_double(y + t); - } - return y; -} diff --git a/sw/math/src/math/sqrt_data.c b/sw/math/src/math/sqrt_data.c deleted file mode 100644 index 61bc22f43..000000000 --- a/sw/math/src/math/sqrt_data.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "sqrt_data.h" -const uint16_t __rsqrt_tab[128] = { -0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43, -0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b, -0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1, -0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430, -0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59, -0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925, -0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479, -0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040, -0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234, -0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2, -0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1, -0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192, -0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f, -0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4, -0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59, -0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560, -}; diff --git a/sw/math/src/math/sqrt_data.h b/sw/math/src/math/sqrt_data.h deleted file mode 100644 index 260c7f9c2..000000000 --- a/sw/math/src/math/sqrt_data.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _SQRT_DATA_H -#define _SQRT_DATA_H - -#include -#include - -/* if x in [1,2): i = (int)(64*x); - if x in [2,4): i = (int)(32*x-64); - __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error: - |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */ -extern hidden const uint16_t __rsqrt_tab[128]; - -#endif diff --git a/sw/math/src/math/sqrtf.c b/sw/math/src/math/sqrtf.c deleted file mode 100644 index 740d81cba..000000000 --- a/sw/math/src/math/sqrtf.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include "libm.h" -#include "sqrt_data.h" - -#define FENV_SUPPORT 1 - -static inline uint32_t mul32(uint32_t a, uint32_t b) -{ - return (uint64_t)a*b >> 32; -} - -/* see sqrt.c for more detailed comments. */ - -float sqrtf(float x) -{ - uint32_t ix, m, m1, m0, even, ey; - - ix = asuint(x); - if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { - /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) - return x; - if (ix == 0x7f800000) - return x; - if (ix > 0x7f800000) - return __math_invalidf(x); - /* x is subnormal, normalize it. */ - ix = asuint(x * 0x1p23f); - ix -= 23 << 23; - } - - /* x = 4^e m; with int e and m in [1, 4). */ - even = ix & 0x00800000; - m1 = (ix << 8) | 0x80000000; - m0 = (ix << 7) & 0x7fffffff; - m = even ? m0 : m1; - - /* 2^e is the exponent part of the return value. */ - ey = ix >> 1; - ey += 0x3f800000 >> 1; - ey &= 0x7f800000; - - /* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */ - static const uint32_t three = 0xc0000000; - uint32_t r, s, d, u, i; - i = (ix >> 17) % 128; - r = (uint32_t)__rsqrt_tab[i] << 16; - /* |r*sqrt(m) - 1| < 0x1p-8 */ - s = mul32(m, r); - /* |s/sqrt(m) - 1| < 0x1p-8 */ - d = mul32(s, r); - u = three - d; - r = mul32(r, u) << 1; - /* |r*sqrt(m) - 1| < 0x1.7bp-16 */ - s = mul32(s, u) << 1; - /* |s/sqrt(m) - 1| < 0x1.7bp-16 */ - d = mul32(s, r); - u = three - d; - s = mul32(s, u); - /* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */ - s = (s - 1)>>6; - /* s < sqrt(m) < s + 0x1.08p-23 */ - - /* compute nearest rounded result. */ - uint32_t d0, d1, d2; - float y, t; - d0 = (m << 16) - s*s; - d1 = s - d0; - d2 = d1 + s + 1; - s += d1 >> 31; - s &= 0x007fffff; - s |= ey; - y = asfloat(s); - if (FENV_SUPPORT) { - /* handle rounding and inexact exception. */ - uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000; - tiny |= (d1^d2) & 0x80000000; - t = asfloat(tiny); - y = eval_as_float(y + t); - } - return y; -} diff --git a/sw/math/src/math/tanh.c b/sw/math/src/math/tanh.c deleted file mode 100644 index 20d6dbcf4..000000000 --- a/sw/math/src/math/tanh.c +++ /dev/null @@ -1,45 +0,0 @@ -#include "libm.h" - -/* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x)) - * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) - * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) - */ -double tanh(double x) -{ - union {double f; uint64_t i;} u = {.f = x}; - uint32_t w; - int sign; - double_t t; - - /* x = |x| */ - sign = u.i >> 63; - u.i &= (uint64_t)-1/2; - x = u.f; - w = u.i >> 32; - - if (w > 0x3fe193ea) { - /* |x| > log(3)/2 ~= 0.5493 or nan */ - if (w > 0x40340000) { - /* |x| > 20 or nan */ - /* note: this branch avoids raising overflow */ - t = 1 - 0/x; - } else { - t = expm1(2*x); - t = 1 - 2/(t+2); - } - } else if (w > 0x3fd058ae) { - /* |x| > log(5/3)/2 ~= 0.2554 */ - t = expm1(2*x); - t = t/(t+2); - } else if (w >= 0x00100000) { - /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ - t = expm1(-2*x); - t = -t/(t+2); - } else { - /* |x| is subnormal */ - /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ - FORCE_EVAL((float)x); - t = x; - } - return sign ? -t : t; -} diff --git a/sw/math/tools/mkalltypes.sed b/sw/math/tools/mkalltypes.sed deleted file mode 100644 index fa15efc35..000000000 --- a/sw/math/tools/mkalltypes.sed +++ /dev/null @@ -1,15 +0,0 @@ -/^TYPEDEF/s/TYPEDEF \(.*\) \([^ ]*\);$/#if defined(__NEED_\2) \&\& !defined(__DEFINED_\2)\ -typedef \1 \2;\ -#define __DEFINED_\2\ -#endif\ -/ -/^STRUCT/s/STRUCT * \([^ ]*\) \(.*\);$/#if defined(__NEED_struct_\1) \&\& !defined(__DEFINED_struct_\1)\ -struct \1 \2;\ -#define __DEFINED_struct_\1\ -#endif\ -/ -/^UNION/s/UNION * \([^ ]*\) \(.*\);$/#if defined(__NEED_union_\1) \&\& !defined(__DEFINED_union_\1)\ -union \1 \2;\ -#define __DEFINED_union_\1\ -#endif\ -/ diff --git a/sw/snRuntime/src/omp/kmp.c b/sw/snRuntime/src/omp/kmp.c index 6ce2394bd..4cab683c8 100644 --- a/sw/snRuntime/src/omp/kmp.c +++ b/sw/snRuntime/src/omp/kmp.c @@ -244,8 +244,8 @@ void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 globalUpper = *pupper; KMP_PRINTF(50, - "__kmpc_for_static_init_4 gtid %d schedtype %d plast %#x - p[%#x, " + "__kmpc_for_static_init_4 gtid %d schedtype %d plast %#x" + "p[%#x, " "%#x, %#x] incr %d chunk %d\n", gtid, sched, (uint32_t)plastiter, (uint32_t)plower, (uint32_t)pupper, (uint32_t)pstride, incr, chunk); @@ -287,8 +287,7 @@ void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, } KMP_PRINTF(10, - "__kmpc_for_static_init_4 plast %4d p[l %4d, u %4d, i %4d, str - " + "__kmpc_for_static_init_4 plast %4d p[l %4d, u %4d, i %4d, str" "%4d] chunk %d\n", *plastiter, *plower, *pupper, incr, *pstride, chunk); } diff --git a/sw/tests/varargs_1.c b/sw/tests/varargs_1.c index b88074051..caa1820fc 100644 --- a/sw/tests/varargs_1.c +++ b/sw/tests/varargs_1.c @@ -3,8 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 #include -// Use `-O1` for this function and don't inline. -int __attribute__((noinline)) __attribute__((optimize(1))) sum(int N, ...) { +// Use `-O0` for this function and don't inline. +int __attribute__((noinline)) __attribute__((optnone)) sum(int N, ...) { int sum = 0; va_list va; va_start(va, N); diff --git a/sw/tests/varargs_2.c b/sw/tests/varargs_2.c index 41c678443..b3f42ce33 100644 --- a/sw/tests/varargs_2.c +++ b/sw/tests/varargs_2.c @@ -3,8 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 #include -// Use `-O1` for this function and don't inline. -int __attribute__((noinline)) __attribute__((optimize(1))) +// Use `-O0` for this function and don't inline. +int __attribute__((noinline)) __attribute__((optnone)) sum(int dummy0, int dummy1, int dummy2, int dummy3, int dummy4, int dummy5, int dummy6, int N, ...) { (void)dummy0; diff --git a/target/snitch_cluster/sw.mk b/target/snitch_cluster/sw.mk index 9ab10b20d..1415fcb4e 100644 --- a/target/snitch_cluster/sw.mk +++ b/target/snitch_cluster/sw.mk @@ -57,7 +57,7 @@ APPS += sw/apps/atax APPS += sw/apps/correlation APPS += sw/apps/covariance -SUBDIRS = sw/runtime/banshee sw/runtime/rtl sw/math $(APPS) sw/tests +SUBDIRS = sw/runtime/banshee sw/runtime/rtl $(APPS) sw/tests .PHONY: sw clean-sw $(SUBDIRS) @@ -70,9 +70,6 @@ clean-sw: sw/runtime/rtl sw/runtime/banshee: $(TARGET_C_HDRS) $(MAKE) -C $@ $(MK_TARGET) -sw/math: - $(MAKE) -C $@ $(MK_TARGET) - # Apps depend on runtime libraries -$(APPS) sw/tests: $(RUNTIME) sw/math +$(APPS) sw/tests: $(RUNTIME) $(MAKE) -C $@ $(MK_TARGET) diff --git a/target/snitch_cluster/sw/apps/common.mk b/target/snitch_cluster/sw/apps/common.mk index 5752b29c6..0b7a10ebd 100644 --- a/target/snitch_cluster/sw/apps/common.mk +++ b/target/snitch_cluster/sw/apps/common.mk @@ -22,7 +22,6 @@ RISCV_CFLAGS += -DBIST else RUNTIME_DIR := $(ROOT)/target/snitch_cluster/sw/runtime/rtl endif -MATH_DIR := $(ROOT)/target/snitch_cluster/sw/math # Paths relative to the app including this Makefile APP_BUILDDIR ?= $(abspath build) @@ -39,9 +38,7 @@ INCDIRS += $(SNRT_DIR)/src INCDIRS += $(SNRT_DIR)/src/omp INCDIRS += $(ROOT)/sw/blas INCDIRS += $(ROOT)/sw/deps/riscv-opcodes -INCDIRS += $(ROOT)/sw/math/include -LIBS = $(MATH_DIR)/build/libmath.a LIBS += $(RUNTIME_DIR)/build/libsnRuntime.a LIBDIRS = $(dir $(LIBS)) diff --git a/target/snitch_cluster/sw/math/Makefile b/target/snitch_cluster/sw/math/Makefile deleted file mode 100644 index d0a83e86a..000000000 --- a/target/snitch_cluster/sw/math/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright 2023 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# Luca Colagrande - -include ../toolchain.mk -include ../../../../sw/math/Makefile diff --git a/target/snitch_cluster/sw/tests/Makefile b/target/snitch_cluster/sw/tests/Makefile index 57b26d9a0..b2d16fbac 100644 --- a/target/snitch_cluster/sw/tests/Makefile +++ b/target/snitch_cluster/sw/tests/Makefile @@ -76,7 +76,7 @@ $(BUILDDIR)/%.elf: $(SRC_DIR)/%.c $(BUILDDIR)/%.d | $(BUILDDIR) $(RISCV_CC) $(RISCV_CFLAGS) $(RISCV_LDFLAGS) $(SRC_DIR)/$*.c -o $@ $(BUILDDIR)/%.dump: $(BUILDDIR)/%.elf | $(BUILDDIR) - $(RISCV_OBJDUMP) -D -S $< > $@ + $(RISCV_OBJDUMP) $(RISCV_OBJDUMP_FLAGS) $< > $@ $(BUILDDIR)/%.dwarf: $(BUILDDIR)/%.elf | $(BUILDDIR) $(RISCV_DWARFDUMP) --all $< > $@ diff --git a/target/snitch_cluster/sw/toolchain.mk b/target/snitch_cluster/sw/toolchain.mk index 1b7013177..a57f229d2 100644 --- a/target/snitch_cluster/sw/toolchain.mk +++ b/target/snitch_cluster/sw/toolchain.mk @@ -42,22 +42,14 @@ RISCV_CFLAGS += -O3 ifeq ($(DEBUG), ON) RISCV_CFLAGS += -g endif - ifeq ($(OPENOCD_SEMIHOSTING), ON) RISCV_CFLAGS += -DOPENOCD_SEMIHOSTING endif - -# Required by math library to avoid conflict with stdint definition -RISCV_CFLAGS += -D__DEFINED_uint64_t - # Linker flags RISCV_LDFLAGS += -fuse-ld=$(RISCV_LD) RISCV_LDFLAGS += -nostartfiles -RISCV_LDFLAGS += -nostdlib -RISCV_LDFLAGS += -lc -RISCV_LDFLAGS += -L$(LLVM_BINROOT)/../lib/clang/$(LLVM_VER)/lib/ -RISCV_LDFLAGS += -lclang_rt.builtins-riscv32 +RISCV_LDFLAGS += -lm # Archiver flags RISCV_ARFLAGS = rcs