Skip to content

Commit

Permalink
🤖 apply linter changes (will not trigger CI)
Browse files Browse the repository at this point in the history
  • Loading branch information
yamaguchi1024 committed Nov 10, 2024
1 parent 838b9e9 commit ed2abbb
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 92 deletions.
34 changes: 17 additions & 17 deletions examples/quiz1/quiz1/quiz1.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@
// inp : f32[N] @DRAM,
// out : f32[N] @DRAM
// )
void vec_double( void *ctxt, int_fast32_t N, const float* inp, float* out ) {
EXO_ASSUME(N % 8 == 0);
for (int_fast32_t i = 0; i < N; i++) {
out[i] = 2.0f * inp[i];
}
void vec_double(void *ctxt, int_fast32_t N, const float *inp, float *out) {
EXO_ASSUME(N % 8 == 0);
for (int_fast32_t i = 0; i < N; i++) {
out[i] = 2.0f * inp[i];
}
}

// vec_double_optimized(
// N : size,
// inp : f32[N] @DRAM,
// out : f32[N] @DRAM
// )
void vec_double_optimized( void *ctxt, int_fast32_t N, const float* inp, float* out ) {
EXO_ASSUME(N % 8 == 0);
__m256 two_vec;
two_vec = _mm256_broadcast_ss(2.0);
for (int_fast32_t io = 0; io < ((N) / (8)); io++) {
__m256 out_vec;
__m256 inp_vec;
inp_vec = _mm256_loadu_ps(&inp[8 * io]);
out_vec = _mm256_mul_ps(two_vec, inp_vec);
_mm256_storeu_ps(&out[8 * io], out_vec);
}
void vec_double_optimized(
void *ctxt, int_fast32_t N, const float *inp, float *out) {
EXO_ASSUME(N % 8 == 0);
__m256 two_vec;
two_vec = _mm256_broadcast_ss(2.0);
for (int_fast32_t io = 0; io < ((N) / (8)); io++) {
__m256 out_vec;
__m256 inp_vec;
inp_vec = _mm256_loadu_ps(&inp[8 * io]);
out_vec = _mm256_mul_ps(two_vec, inp_vec);
_mm256_storeu_ps(&out[8 * io], out_vec);
}
}


/* relying on the following instruction..."
vector_assign_two(out)
{out_data} = _mm256_broadcast_ss(2.0);
Expand Down
34 changes: 15 additions & 19 deletions examples/quiz1/quiz1/quiz1.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,58 +7,54 @@
extern "C" {
#endif


#include <stdint.h>
#include <stdbool.h>
#include <stdint.h>

// Compiler feature macros adapted from Hedley (public domain)
// https://github.com/nemequ/hedley

#if defined(__has_builtin)
# define EXO_HAS_BUILTIN(builtin) __has_builtin(builtin)
#define EXO_HAS_BUILTIN(builtin) __has_builtin(builtin)
#else
# define EXO_HAS_BUILTIN(builtin) (0)
#define EXO_HAS_BUILTIN(builtin) (0)
#endif

#if EXO_HAS_BUILTIN(__builtin_assume)
# define EXO_ASSUME(expr) __builtin_assume(expr)
#define EXO_ASSUME(expr) __builtin_assume(expr)
#elif EXO_HAS_BUILTIN(__builtin_unreachable)
# define EXO_ASSUME(expr) \
((void)((expr) ? 1 : (__builtin_unreachable(), 1)))
#define EXO_ASSUME(expr) ((void)((expr) ? 1 : (__builtin_unreachable(), 1)))
#else
# define EXO_ASSUME(expr) ((void)(expr))
#define EXO_ASSUME(expr) ((void)(expr))
#endif


#ifndef EXO_WIN_1F32
#define EXO_WIN_1F32
struct exo_win_1f32{
float * const data;
const int_fast32_t strides[1];
struct exo_win_1f32 {
float *const data;
const int_fast32_t strides[1];
};
#endif
#ifndef EXO_WIN_1F32C
#define EXO_WIN_1F32C
struct exo_win_1f32c{
const float * const data;
const int_fast32_t strides[1];
struct exo_win_1f32c {
const float *const data;
const int_fast32_t strides[1];
};
#endif
// vec_double(
// N : size,
// inp : f32[N] @DRAM,
// out : f32[N] @DRAM
// )
void vec_double( void *ctxt, int_fast32_t N, const float* inp, float* out );
void vec_double(void *ctxt, int_fast32_t N, const float *inp, float *out);

// vec_double_optimized(
// N : size,
// inp : f32[N] @DRAM,
// out : f32[N] @DRAM
// )
void vec_double_optimized( void *ctxt, int_fast32_t N, const float* inp, float* out );


void vec_double_optimized(
void *ctxt, int_fast32_t N, const float *inp, float *out);

#ifdef __cplusplus
}
Expand Down
85 changes: 43 additions & 42 deletions examples/quiz2/quiz2/quiz2.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
// b : f32[N] @DRAM,
// c : f32[N] @DRAM
// )
void scaled_add( void *ctxt, int_fast32_t N, const float* a, const float* b, float* c ) {
EXO_ASSUME(N % 8 == 0);
for (int_fast32_t i = 0; i < N; i++) {
c[i] = 2.0f * a[i] + 3.0f * b[i];
}
void scaled_add(
void *ctxt, int_fast32_t N, const float *a, const float *b, float *c) {
EXO_ASSUME(N % 8 == 0);
for (int_fast32_t i = 0; i < N; i++) {
c[i] = 2.0f * a[i] + 3.0f * b[i];
}
}

// scaled_add_scheduled(
Expand All @@ -22,42 +23,42 @@ for (int_fast32_t i = 0; i < N; i++) {
// b : f32[N] @DRAM,
// c : f32[N] @DRAM
// )
void scaled_add_scheduled( void *ctxt, int_fast32_t N, const float* a, const float* b, float* c ) {
EXO_ASSUME(N % 8 == 0);
for (int_fast32_t io = 0; io < ((N) / (8)); io++) {
float *vec = (float*) malloc(8 * sizeof(*vec));
float *vec_1 = (float*) malloc(8 * sizeof(*vec_1));
float *vec_2 = (float*) malloc(8 * sizeof(*vec_2));
float *vec_3 = (float*) malloc(8 * sizeof(*vec_3));
float *vec_4 = (float*) malloc(8 * sizeof(*vec_4));
float *vec_5 = (float*) malloc(8 * sizeof(*vec_5));
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_1[ii] = 2.0f;
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_2[ii] = a[8 * io + ii];
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec[ii] = vec_1[ii] * vec_2[ii];
}
free(vec_2);
free(vec_1);
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_4[ii] = 3.0f;
void scaled_add_scheduled(
void *ctxt, int_fast32_t N, const float *a, const float *b, float *c) {
EXO_ASSUME(N % 8 == 0);
for (int_fast32_t io = 0; io < ((N) / (8)); io++) {
float *vec = (float *)malloc(8 * sizeof(*vec));
float *vec_1 = (float *)malloc(8 * sizeof(*vec_1));
float *vec_2 = (float *)malloc(8 * sizeof(*vec_2));
float *vec_3 = (float *)malloc(8 * sizeof(*vec_3));
float *vec_4 = (float *)malloc(8 * sizeof(*vec_4));
float *vec_5 = (float *)malloc(8 * sizeof(*vec_5));
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_1[ii] = 2.0f;
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_2[ii] = a[8 * io + ii];
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec[ii] = vec_1[ii] * vec_2[ii];
}
free(vec_2);
free(vec_1);
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_4[ii] = 3.0f;
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_5[ii] = b[8 * io + ii];
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_3[ii] = vec_4[ii] * vec_5[ii];
}
free(vec_5);
free(vec_4);
for (int_fast32_t ii = 0; ii < 8; ii++) {
c[8 * io + ii] = vec[ii] + vec_3[ii];
}
free(vec_3);
free(vec);
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_5[ii] = b[8 * io + ii];
}
for (int_fast32_t ii = 0; ii < 8; ii++) {
vec_3[ii] = vec_4[ii] * vec_5[ii];
}
free(vec_5);
free(vec_4);
for (int_fast32_t ii = 0; ii < 8; ii++) {
c[8 * io + ii] = vec[ii] + vec_3[ii];
}
free(vec_3);
free(vec);
}
}

24 changes: 10 additions & 14 deletions examples/quiz2/quiz2/quiz2.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,43 @@
extern "C" {
#endif


#include <stdint.h>
#include <stdbool.h>
#include <stdint.h>

// Compiler feature macros adapted from Hedley (public domain)
// https://github.com/nemequ/hedley

#if defined(__has_builtin)
# define EXO_HAS_BUILTIN(builtin) __has_builtin(builtin)
#define EXO_HAS_BUILTIN(builtin) __has_builtin(builtin)
#else
# define EXO_HAS_BUILTIN(builtin) (0)
#define EXO_HAS_BUILTIN(builtin) (0)
#endif

#if EXO_HAS_BUILTIN(__builtin_assume)
# define EXO_ASSUME(expr) __builtin_assume(expr)
#define EXO_ASSUME(expr) __builtin_assume(expr)
#elif EXO_HAS_BUILTIN(__builtin_unreachable)
# define EXO_ASSUME(expr) \
((void)((expr) ? 1 : (__builtin_unreachable(), 1)))
#define EXO_ASSUME(expr) ((void)((expr) ? 1 : (__builtin_unreachable(), 1)))
#else
# define EXO_ASSUME(expr) ((void)(expr))
#define EXO_ASSUME(expr) ((void)(expr))
#endif



// scaled_add(
// N : size,
// a : f32[N] @DRAM,
// b : f32[N] @DRAM,
// c : f32[N] @DRAM
// )
void scaled_add( void *ctxt, int_fast32_t N, const float* a, const float* b, float* c );
void scaled_add(
void *ctxt, int_fast32_t N, const float *a, const float *b, float *c);

// scaled_add_scheduled(
// N : size,
// a : f32[N] @DRAM,
// b : f32[N] @DRAM,
// c : f32[N] @DRAM
// )
void scaled_add_scheduled( void *ctxt, int_fast32_t N, const float* a, const float* b, float* c );


void scaled_add_scheduled(
void *ctxt, int_fast32_t N, const float *a, const float *b, float *c);

#ifdef __cplusplus
}
Expand Down

0 comments on commit ed2abbb

Please sign in to comment.