From c7a663879dd1542ba1406e6938e77334cd74ff73 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 01/12] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From 821a1c5ea3fd29387848c137f53b9ca34194b59c Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 02/12] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From 092b557cf0ead7c949731adf40f0acd6678dbe66 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 03/12] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 9111b47d52..cf52b1c624 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 02a2360e41a3f3d09b31e271609dbe642c13ac01 Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 04/12] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 82a6b10d31..ae99f31ed4 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4807a17474..ce9212abc6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index daf4559df4..8d4abb6bdb 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index fca93c5624..ef11579e49 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -441,7 +441,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index c0f8634e03..1e9d0e327d 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From bd7351493e3ae2c0947b1d2fb92605360db4de08 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 05/12] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 AUTHORS                                     |  1 +
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 7 files changed, 143 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/AUTHORS b/AUTHORS
index 6c30ce5830..3dbaa2f249 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -31,3 +31,4 @@ Contributors
 * Farouk Ech-Charef
 * Mustapha Belbiad
 * Varadarajan Rengaraj
+* Said Mazouz
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce9212abc6..1d99c60127 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From 261c152638e54caae3966e54985725a7fca505ba Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 06/12] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index ae99f31ed4..5d99e21194 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -118,3 +118,4 @@ datatyping
 datatypes
 indexable
 traceback
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d99c60127..7c1dcffc55 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 013f206dd6..f0e5cc67f1 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index e94b9c8413..fde10d6317 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 2967f4999b..3af7f0728a 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From 4893610f3d1145f53a95220a8e2e641baf4e9d38 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 28 Jun 2024 17:11:03 +0100
Subject: [PATCH 07/12] fix: add space between '%' and PRId64 to resolve C++11
 compilation warning

---
 pyccel/codegen/printing/ccode.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index a39a442a83..b0b4cc3b71 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -288,9 +288,9 @@ class CCodePrinter(CodePrinter):
     type_to_format = {(PrimitiveFloatingPointType(),8) : '%.15lf',
                       (PrimitiveFloatingPointType(),4) : '%.6f',
                       (PrimitiveIntegerType(),4)       : '%d',
-                      (PrimitiveIntegerType(),8)       : LiteralString("%") + CMacro('PRId64'),
-                      (PrimitiveIntegerType(),2)       : LiteralString("%") + CMacro('PRId16'),
-                      (PrimitiveIntegerType(),1)       : LiteralString("%") + CMacro('PRId8'),
+                      (PrimitiveIntegerType(),8)       : LiteralString("%") + CMacro(' PRId64'),
+                      (PrimitiveIntegerType(),2)       : LiteralString("%") + CMacro(' PRId16'),
+                      (PrimitiveIntegerType(),1)       : LiteralString("%") + CMacro(' PRId8'),
                       StringType()                  : '%s',
                       }
 

From 4445107b3e317b27db6f3096b473d3142983c0a4 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 28 Jun 2024 18:13:23 +0100
Subject: [PATCH 08/12] update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7c1dcffc55..f581182e73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ All notable changes to this project will be documented in this file.
 -   #48 : Fix incorrect handling of imports in `cuda`.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #62 : Fix Invalid Suffix on Literal with PRId64/PRId16/PRId8 in C++11
+
 
 ## \[UNRELEASED\]
 

From 1c545d9e3c77bfad21fc6c0c1028ea5d6431d402 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 28 Jun 2024 18:13:56 +0100
Subject: [PATCH 09/12] update CHANGELOG

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f581182e73..376b288430 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,6 @@ All notable changes to this project will be documented in this file.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 -   #62 : Fix Invalid Suffix on Literal with PRId64/PRId16/PRId8 in C++11
 
-
 ## \[UNRELEASED\]
 
 ### Added

From 82cc8273e7eddb2dda8cd54b6e51954d69d11312 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Tue, 2 Jul 2024 15:44:24 +0100
Subject: [PATCH 10/12] update CHANGELOG

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 376b288430..7c1dcffc55 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,6 @@ All notable changes to this project will be documented in this file.
 -   #48 : Fix incorrect handling of imports in `cuda`.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
--   #62 : Fix Invalid Suffix on Literal with PRId64/PRId16/PRId8 in C++11
 
 ## \[UNRELEASED\]
 

From 96fdbb376432356ce7a5f878ccd45a226bb1c7be Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:37:02 +0100
Subject: [PATCH 11/12] Updated CUDA Name Clash Checker By Added CUDA-specific
 keywords (#60)

This pull request addresses issue #59 by adding more CUDA-specific
keywords to enhance the checking of variable/function names and prevent
name clashes

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                              |  1 +
 pyccel/naming/cudanameclashchecker.py     | 36 ++++++++++++++++++++++-
 pyccel/naming/languagenameclashchecker.py |  5 ++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7c1dcffc55..b93a513351 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index 971204e912..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',
@@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker):
         'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
         'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
         'get_index', 'numpy_to_ndarray_strides',
-        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'
+        '__global__', '__device__', '__host__','__constant__', '__shared__',
+        '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim',
+        'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset',
+        'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch',
+        'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc',
+        'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer',
+        'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset',
+        'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+        'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice',
+        'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize',
+        'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord',
+        'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet',
+        'cuDeviceGetCount', 'cuDeviceGetName',
+        'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy',
+        'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload',
+        'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef',
+        'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH',
+        'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync',
+        'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32',
+        'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize',
+        'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid',
+        'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery',
+        'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime',
+        'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize',
+        'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize',
+        'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy',
+        'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D',
+        'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode',
+        'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray',
+        'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat',
+        'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor',
+        'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags'
+    ])
 
     def has_clash(self, name, symbols):
         """
diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py
index fa672a905b..d6415e6449 100644
--- a/pyccel/naming/languagenameclashchecker.py
+++ b/pyccel/naming/languagenameclashchecker.py
@@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton):
     """
     keywords = None
 
+    def __init__(self): #pylint: disable=useless-parent-delegation
+        # This __init__ function is required so the ArgumentSingleton can
+        # always detect a signature
+        super().__init__()
+
     def _get_collisionless_name(self, name, symbols):
         """
         Get a name which doesn't collision with keywords or symbols.

From 4fad96b58c403d61bf689805e8aa49e1d801dee3 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 18:04:22 +0100
Subject: [PATCH 12/12] add handle for custom device (#61)

This pull request addresses issue
https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new
feature in Pyccel that allows users to define a custom device

**Commit Summary**

- Adding handler for custom device and its code generation.
- Adding test

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
---
 CHANGELOG.md                               |  1 +
 docs/cuda.md                               | 25 ++++++++++++++++-
 pyccel/codegen/printing/cucode.py          |  7 ++---
 pyccel/decorators.py                       | 19 +++++++++++++
 pyccel/errors/messages.py                  |  2 +-
 pyccel/parser/semantic.py                  |  7 ++++-
 tests/cuda/test_device_semantic.py         | 31 ++++++++++++++++++++++
 tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++
 tests/pyccel/test_pyccel.py                |  8 ++++++
 9 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 tests/cuda/test_device_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/device_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b93a513351..191b21e28e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 -   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #41 : Add support for custom device in`cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
index de30d52b80..7643a4ac02 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -20,4 +20,27 @@ threadsperblock = 1
 # Call your kernel function
 my_kernel[blockspergrid, threadsperblock]()
 
-```
\ No newline at end of file
+```
+
+### device
+
+Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel.
+
+```python
+from pyccel.decorators import device, kernel
+
+@device
+def add(x, y):
+    return x + y
+
+@kernel
+def my_kernel():
+    x = 1
+    y = 2
+    z = add(x, y)
+    print(z)
+
+my_kernel[1, 1]()
+
+```
+
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..7c01d93c47 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True):
         str
             Signature of the function.
         """
-        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        cuda_decorator = '__global__' if 'kernel' in expr.decorators else \
+        '__device__' if 'device' in expr.decorators else ''
         c_function_signature = super().function_signature(expr, print_arg_names)
-        return f'{cuda_decorater} {c_function_signature}'
+        return f'{cuda_decorator} {c_function_signature}'
 
     def _print_KernelCall(self, expr):
         func = expr.funcdef
@@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr):
         cuda_headers = ""
         for f in expr.module.funcs:
             if not f.is_inline:
-                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                if 'kernel' in f.decorators or 'device' in f.decorators:
                     cuda_headers += self.function_signature(f) + ';\n'
                 else:
                     funcs += self.function_signature(f) + ';\n'
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 77717a991f..ff413fe443 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -11,6 +11,7 @@
 __all__ = (
     'allow_negative_index',
     'bypass',
+    'device',
     'elemental',
     'inline',
     'private',
@@ -141,3 +142,21 @@ def __getitem__(self, args):
             return self._f
 
     return KernelAccessor(f)
+
+def device(f):
+    """
+    Decorator for marking a function as a GPU device function.
+
+    This decorator is used to mark a Python function as a GPU device function.
+
+    Parameters
+    ----------
+    f : Function
+        The function to be marked as a device.
+
+    Returns
+    -------
+    f
+        The function marked as a device.
+    """
+    return f
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 09966d810c..5fe622c29b 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -166,7 +166,7 @@
 INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
 INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
 INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
-
+INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.'
 
 
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index fde10d6317..7e8dd11bb4 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -136,9 +136,10 @@
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
         FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
-        MISSING_KERNEL_CONFIGURATION,
+        MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL,
         INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
+
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
 
@@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
         FunctionCall/PyccelFunction
             The semantic representation of the call.
         """
+
+        if isinstance(func, FunctionDef) and 'device' in func.decorators:
+            if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators:
+                errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal')
         if isinstance(func, PyccelFunctionDef):
             if use_build_functions:
                 annotation_method = '_build_' + func.cls_name.__name__
diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py
new file mode 100644
index 0000000000..5723991961
--- /dev/null
+++ b/tests/cuda/test_device_semantic.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import device
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVAlID_DEVICE_CALL,)
+
+
+@pytest.mark.cuda
+def test_invalid_device_call():
+    def invalid_device_call():
+        @device
+        def device_call():
+            pass
+        def fake_kernel_call():
+            device_call()
+
+        fake_kernel_call()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_device_call, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert INVAlID_DEVICE_CALL == error_info.message
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
new file mode 100644
index 0000000000..a4762a6242
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/device_test.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import device, kernel
+from pyccel import cuda
+
+@device
+def device_call():
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b4757a3c31..2d55c6e1cb 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available):
     pyccel_test("scripts/kernel/kernel_name_collision.py",
             language="cuda", execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
+@pytest.mark.cuda
+def test_device_call(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/device_test.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str