From 86f587039654ac37d01342e66049f2903ccdfadd Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sun, 25 Feb 2024 00:35:48 +0800 Subject: [PATCH 1/6] add CuYao as extension --- Project.toml | 8 + ext/CuYao.jl | 3 + ext/CuYao/.gitignore | 23 ++ ext/CuYao/LICENSE.md | 217 +++++++++++++++++++ ext/CuYao/Project.toml | 27 +++ ext/CuYao/README.md | 86 ++++++++ ext/CuYao/benchmarks/TestCuda.jl | 54 +++++ ext/CuYao/benchmarks/gates.jl | 19 ++ ext/CuYao/benchmarks/gcompile.jl | 92 ++++++++ ext/CuYao/benchmarks/paralleldot.jl | 11 + ext/CuYao/benchmarks/statexpect.jl | 13 ++ ext/CuYao/src/CUDApatch.jl | 64 ++++++ ext/CuYao/src/CuYao.jl | 39 ++++ ext/CuYao/src/instructs.jl | 312 ++++++++++++++++++++++++++++ ext/CuYao/src/register.jl | 290 ++++++++++++++++++++++++++ ext/CuYao/test/CUDApatch.jl | 35 ++++ ext/CuYao/test/extra.jl | 52 +++++ ext/CuYao/test/instructs.jl | 135 ++++++++++++ ext/CuYao/test/register.jl | 139 +++++++++++++ ext/CuYao/test/runtests.jl | 18 ++ lib/YaoPlots/src/vizcircuit.jl | 14 +- 21 files changed, 1644 insertions(+), 7 deletions(-) create mode 100644 ext/CuYao.jl create mode 100644 ext/CuYao/.gitignore create mode 100644 ext/CuYao/LICENSE.md create mode 100644 ext/CuYao/Project.toml create mode 100644 ext/CuYao/README.md create mode 100644 ext/CuYao/benchmarks/TestCuda.jl create mode 100644 ext/CuYao/benchmarks/gates.jl create mode 100644 ext/CuYao/benchmarks/gcompile.jl create mode 100644 ext/CuYao/benchmarks/paralleldot.jl create mode 100644 ext/CuYao/benchmarks/statexpect.jl create mode 100644 ext/CuYao/src/CUDApatch.jl create mode 100644 ext/CuYao/src/CuYao.jl create mode 100644 ext/CuYao/src/instructs.jl create mode 100644 ext/CuYao/src/register.jl create mode 100644 ext/CuYao/test/CUDApatch.jl create mode 100644 ext/CuYao/test/extra.jl create mode 100644 ext/CuYao/test/instructs.jl create mode 100644 ext/CuYao/test/register.jl create mode 100644 ext/CuYao/test/runtests.jl diff --git a/Project.toml b/Project.toml index 88493caf..84787249 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "0.8.13" [deps] BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LuxurySparse = "d05aeea4-b7d4-55ac-b691-9e7fabb07ba2" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -13,8 +14,15 @@ YaoBlocks = "418bc28f-b43b-5e0b-a6e7-61bbc1a2c1df" YaoPlots = "32cfe2d9-419e-45f2-8191-2267705d8dbc" YaoSym = "3b27209a-d3d6-11e9-3c0f-41eb92b2cb9d" +[weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + +[extensions] +CuYao = "CUDA" + [compat] BitBasis = "0.8, 0.9" +CUDA = "4, 5" LinearAlgebra = "1" LuxurySparse = "0.7" Reexport = "1" diff --git a/ext/CuYao.jl b/ext/CuYao.jl new file mode 100644 index 00000000..1421abce --- /dev/null +++ b/ext/CuYao.jl @@ -0,0 +1,3 @@ +module CuYao +include("CuYao/src/CuYao.jl") +end diff --git a/ext/CuYao/.gitignore b/ext/CuYao/.gitignore new file mode 100644 index 00000000..56b59ee8 --- /dev/null +++ b/ext/CuYao/.gitignore @@ -0,0 +1,23 @@ +*.jl.cov +*.jl.*.cov +*.jl.mem + +docs/build/ +docs/site/ +docs/src/tutorial/RegisterBasics.md +docs/src/tutorial/BlockBasics.md +docs/src/tutorial/BinaryBasics.md +docs/src/tutorial/QCBM.md + +*.ipynb_checkpoints +**/*.ipynb_checkpoints +**/**/*.ipynb_checkpoints + +_*.dat +*.swp +__pycache__/ +.vscode/ + +Manifest.toml + +_local/ diff --git a/ext/CuYao/LICENSE.md b/ext/CuYao/LICENSE.md new file mode 100644 index 00000000..8373be63 --- /dev/null +++ b/ext/CuYao/LICENSE.md @@ -0,0 +1,217 @@ +The CuYao.jl package is licensed under the Apache License, Version 2.0: + +> COPYRIGHT +> +> All contributions by Jinguo Liu +> Copyright (c) 2018: Jinguo Liu +> All rights reserved +> +> All contributions by Xiuzhe Luo +> Copyright (c) 2018: Xiuzhe Luo. +> All rights reserved. +> +> Each contributor holds copyright over their respective contributions. +> The project versioning (Git) records all such contribution source information. +> +> Apache License +> Version 2.0, January 2004 +> http://www.apache.org/licenses/ +> +> TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +> +> 1. Definitions. +> +> "License" shall mean the terms and conditions for use, reproduction, +> and distribution as defined by Sections 1 through 9 of this document. +> +> "Licensor" shall mean the copyright owner or entity authorized by +> the copyright owner that is granting the License. +> +> "Legal Entity" shall mean the union of the acting entity and all +> other entities that control, are controlled by, or are under common +> control with that entity. For the purposes of this definition, +> "control" means (i) the power, direct or indirect, to cause the +> direction or management of such entity, whether by contract or +> otherwise, or (ii) ownership of fifty percent (50%) or more of the +> outstanding shares, or (iii) beneficial ownership of such entity. +> +> "You" (or "Your") shall mean an individual or Legal Entity +> exercising permissions granted by this License. +> +> "Source" form shall mean the preferred form for making modifications, +> including but not limited to software source code, documentation +> source, and configuration files. +> +> "Object" form shall mean any form resulting from mechanical +> transformation or translation of a Source form, including but +> not limited to compiled object code, generated documentation, +> and conversions to other media types. +> +> "Work" shall mean the work of authorship, whether in Source or +> Object form, made available under the License, as indicated by a +> copyright notice that is included in or attached to the work +> (an example is provided in the Appendix below). +> +> "Derivative Works" shall mean any work, whether in Source or Object +> form, that is based on (or derived from) the Work and for which the +> editorial revisions, annotations, elaborations, or other modifications +> represent, as a whole, an original work of authorship. For the purposes +> of this License, Derivative Works shall not include works that remain +> separable from, or merely link (or bind by name) to the interfaces of, +> the Work and Derivative Works thereof. +> +> "Contribution" shall mean any work of authorship, including +> the original version of the Work and any modifications or additions +> to that Work or Derivative Works thereof, that is intentionally +> submitted to Licensor for inclusion in the Work by the copyright owner +> or by an individual or Legal Entity authorized to submit on behalf of +> the copyright owner. For the purposes of this definition, "submitted" +> means any form of electronic, verbal, or written communication sent +> to the Licensor or its representatives, including but not limited to +> communication on electronic mailing lists, source code control systems, +> and issue tracking systems that are managed by, or on behalf of, the +> Licensor for the purpose of discussing and improving the Work, but +> excluding communication that is conspicuously marked or otherwise +> designated in writing by the copyright owner as "Not a Contribution." +> +> "Contributor" shall mean Licensor and any individual or Legal Entity +> on behalf of whom a Contribution has been received by Licensor and +> subsequently incorporated within the Work. +> +> 2. Grant of Copyright License. Subject to the terms and conditions of +> this License, each Contributor hereby grants to You a perpetual, +> worldwide, non-exclusive, no-charge, royalty-free, irrevocable +> copyright license to reproduce, prepare Derivative Works of, +> publicly display, publicly perform, sublicense, and distribute the +> Work and such Derivative Works in Source or Object form. +> +> 3. Grant of Patent License. Subject to the terms and conditions of +> this License, each Contributor hereby grants to You a perpetual, +> worldwide, non-exclusive, no-charge, royalty-free, irrevocable +> (except as stated in this section) patent license to make, have made, +> use, offer to sell, sell, import, and otherwise transfer the Work, +> where such license applies only to those patent claims licensable +> by such Contributor that are necessarily infringed by their +> Contribution(s) alone or by combination of their Contribution(s) +> with the Work to which such Contribution(s) was submitted. If You +> institute patent litigation against any entity (including a +> cross-claim or counterclaim in a lawsuit) alleging that the Work +> or a Contribution incorporated within the Work constitutes direct +> or contributory patent infringement, then any patent licenses +> granted to You under this License for that Work shall terminate +> as of the date such litigation is filed. +> +> 4. Redistribution. You may reproduce and distribute copies of the +> Work or Derivative Works thereof in any medium, with or without +> modifications, and in Source or Object form, provided that You +> meet the following conditions: +> +> (a) You must give any other recipients of the Work or +> Derivative Works a copy of this License; and +> +> (b) You must cause any modified files to carry prominent notices +> stating that You changed the files; and +> +> (c) You must retain, in the Source form of any Derivative Works +> that You distribute, all copyright, patent, trademark, and +> attribution notices from the Source form of the Work, +> excluding those notices that do not pertain to any part of +> the Derivative Works; and +> +> (d) If the Work includes a "NOTICE" text file as part of its +> distribution, then any Derivative Works that You distribute must +> include a readable copy of the attribution notices contained +> within such NOTICE file, excluding those notices that do not +> pertain to any part of the Derivative Works, in at least one +> of the following places: within a NOTICE text file distributed +> as part of the Derivative Works; within the Source form or +> documentation, if provided along with the Derivative Works; or, +> within a display generated by the Derivative Works, if and +> wherever such third-party notices normally appear. The contents +> of the NOTICE file are for informational purposes only and +> do not modify the License. You may add Your own attribution +> notices within Derivative Works that You distribute, alongside +> or as an addendum to the NOTICE text from the Work, provided +> that such additional attribution notices cannot be construed +> as modifying the License. +> +> You may add Your own copyright statement to Your modifications and +> may provide additional or different license terms and conditions +> for use, reproduction, or distribution of Your modifications, or +> for any such Derivative Works as a whole, provided Your use, +> reproduction, and distribution of the Work otherwise complies with +> the conditions stated in this License. +> +> 5. Submission of Contributions. Unless You explicitly state otherwise, +> any Contribution intentionally submitted for inclusion in the Work +> by You to the Licensor shall be under the terms and conditions of +> this License, without any additional terms or conditions. +> Notwithstanding the above, nothing herein shall supersede or modify +> the terms of any separate license agreement you may have executed +> with Licensor regarding such Contributions. +> +> 6. Trademarks. This License does not grant permission to use the trade +> names, trademarks, service marks, or product names of the Licensor, +> except as required for reasonable and customary use in describing the +> origin of the Work and reproducing the content of the NOTICE file. +> +> 7. Disclaimer of Warranty. Unless required by applicable law or +> agreed to in writing, Licensor provides the Work (and each +> Contributor provides its Contributions) on an "AS IS" BASIS, +> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +> implied, including, without limitation, any warranties or conditions +> of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +> PARTICULAR PURPOSE. You are solely responsible for determining the +> appropriateness of using or redistributing the Work and assume any +> risks associated with Your exercise of permissions under this License. +> +> 8. Limitation of Liability. In no event and under no legal theory, +> whether in tort (including negligence), contract, or otherwise, +> unless required by applicable law (such as deliberate and grossly +> negligent acts) or agreed to in writing, shall any Contributor be +> liable to You for damages, including any direct, indirect, special, +> incidental, or consequential damages of any character arising as a +> result of this License or out of the use or inability to use the +> Work (including but not limited to damages for loss of goodwill, +> work stoppage, computer failure or malfunction, or any and all +> other commercial damages or losses), even if such Contributor +> has been advised of the possibility of such damages. +> +> 9. Accepting Warranty or Additional Liability. While redistributing +> the Work or Derivative Works thereof, You may choose to offer, +> and charge a fee for, acceptance of support, warranty, indemnity, +> or other liability obligations and/or rights consistent with this +> License. However, in accepting such obligations, You may act only +> on Your own behalf and on Your sole responsibility, not on behalf +> of any other Contributor, and only if You agree to indemnify, +> defend, and hold each Contributor harmless for any liability +> incurred by, or claims asserted against, such Contributor by reason +> of your accepting any such warranty or additional liability. +> +> END OF TERMS AND CONDITIONS +> +> APPENDIX: How to apply the Apache License to your work. +> +> To apply the Apache License to your work, attach the following +> boilerplate notice, with the fields enclosed by brackets "{}" +> replaced with your own identifying information. (Don't include +> the brackets!) The text should be enclosed in the appropriate +> comment syntax for the file format. We also recommend that a +> file or class name and description of purpose be included on the +> same "printed page" as the copyright notice for easier +> identification within third-party archives. +> +> Copyright [year] [fullname] +> +> Licensed under the Apache License, Version 2.0 (the "License"); +> you may not use this file except in compliance with the License. +> You may obtain a copy of the License at +> +> http://www.apache.org/licenses/LICENSE-2.0 +> +> Unless required by applicable law or agreed to in writing, software +> distributed under the License is distributed on an "AS IS" BASIS, +> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +> See the License for the specific language governing permissions and +> limitations under the License. +> diff --git a/ext/CuYao/Project.toml b/ext/CuYao/Project.toml new file mode 100644 index 00000000..88d577a0 --- /dev/null +++ b/ext/CuYao/Project.toml @@ -0,0 +1,27 @@ +name = "CuYao" +uuid = "b48ca7a8-dd42-11e8-2b8e-1b7706800275" +version = "0.3.9" + +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" +Yao = "5872b779-8223-5990-8dd0-5abbb0748c8c" + +[compat] +CUDA = "4, 5" +Reexport = "0.2, 1" +TupleTools = "1" +Yao = "0.8" +YaoBlocks = "0.13.10" +julia = "1" + +[extras] +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +YaoBlocks = "418bc28f-b43b-5e0b-a6e7-61bbc1a2c1df" + +[targets] +test = ["Test", "Statistics", "YaoBlocks"] diff --git a/ext/CuYao/README.md b/ext/CuYao/README.md new file mode 100644 index 00000000..4184db5c --- /dev/null +++ b/ext/CuYao/README.md @@ -0,0 +1,86 @@ +**Build status**: [![][gitlab-img]][gitlab-url] + +[gitlab-img]: https://gitlab.com/JuliaGPU/CuYao.jl/badges/master/pipeline.svg +[gitlab-url]: https://gitlab.com/JuliaGPU/CuYao.jl/pipelines + +CUDA support for [Yao.jl](https://github.com/QuantumBFS/Yao.jl). + +**Only tested locally, expect some adventures and rough edges.** + +## Installation + +

+CuYao is a   + + + Julia Language + +   package. It provides CUDA support for Yao.jl. To install CuYao, + please open + Julia's interactive session (known as REPL) and press ] key in the REPL to use the package mode, then type the following command +

+ +For stable release + +```julia +pkg> add CuYao +``` + +For current master + +```julia +pkg> add CuYao#master +``` + +You don't need to install Yao if you have `CuYao` installed. They share the same API except CUDA backend. + +## Documentation + +For documentation of [Yao](https://github.com/QuantumBFS/Yao.jl), please refer to [documentation (stable)](https://quantumbfs.github.io/Yao.jl/stable). + +`CuYao.jl` provides only two extra APIs, `reg |> cu` to upload a register to GPU, and `cureg |> cpu` to download a register to CPU. + +To start, see the following example +```julia +using CuYao + +cureg = rand_state(9; nbatch=1000) |> cu # or `curand_state(9; nbatch=1000)`. +cureg |> put(9, 2=>Z) +measure!(cureg |> append_qubits!(1) |> focus!(4,1,3)) +cureg |> relax!(4,1,3) |> cpu +``` + +Constructors `curand_state`, `cuzero_state`, `cuproduct_state`, `cuuniform_state` and `cughz_state` are tailored for GPU, +they are faster than uploading a CPU register to CPU. + +## Features +### Supported Gates + +- general U(N) gate +- general U(1) gate +- better X, Y, Z gate +- better T, S gate +- SWAP gate +- better control gates +- BP diff blocks + +### Supported Register Operations +- measure!, measure_reset!, measure_remove!, select +- append_qudits!, append_qubits! +- insert_qudit!, insert_qubits! +- focus!, relax! +- join +- density_matrix +- fidelity (not including density matrix) +- expect + +### Other Operations +- autodiff is supported when the only parameterized gates are rotation gates in a circuit. + +## The Team + +This project is an effort of QuantumBFS, an open source organization for quantum science. Yao is currently maintained by [Xiu-Zhe (Roger) Luo](https://github.com/Roger-luo) and [Jin-Guo Liu](https://github.com/GiggleLiu) with contributions from open source community. All the contributors are listed in the [contributors](https://github.com/QuantumBFS/Yao.jl/graphs/contributors). + +## License + +**CuYao** is released under the Apache 2 license. diff --git a/ext/CuYao/benchmarks/TestCuda.jl b/ext/CuYao/benchmarks/TestCuda.jl new file mode 100644 index 00000000..8c6a6cd5 --- /dev/null +++ b/ext/CuYao/benchmarks/TestCuda.jl @@ -0,0 +1,54 @@ +using CUDA +using LinearAlgebra +using BenchmarkTools + +function ms!(X::CuArray, s::Number) + function kernel(X, s) + i = (blockIdx().x-1) * blockDim().x + threadIdx().x + @inbounds X[i] *= s + return + end + @cuda blocks=length(X) kernel(X, s) + X +end + +function ms1!(X::CuArray, a, b, c) + k = f(a) + function kernel(X, a, b, c) + i = (blockIdx().x-1) * blockDim().x + threadIdx().x + k(X, i) + k(X, i) + k(X, i) + #@inbounds X[i] *= a + #@inbounds X[i] *= b + #@inbounds X[i] *= c + return + end + @cuda blocks=length(X)÷256 threads=256 kernel(X, a, b, c) + X +end + +@inline function f(s) + @inline function kernel(X, i) + X[i]*=s + end +end +function ms2!(X::CuArray, s::Number) + function kernel(X, s) + i = (blockIdx().x-1) * blockDim().x + threadIdx().x + @inbounds X[i] *= s + return + end + @cuda blocks=length(X)÷256 threads=256 kernel(X, s) + X +end + +a = randn(1<<10) +cua = cu(a) +@benchmark ms1!($cua, 1.001, 1.001, 1.001) +@benchmark ms2!(ms2!(ms2!($cua, 1.001), 1.001), 1.001) +bss = 1:length(cua) +@benchmark f(1.001).(cua, bss) + +@benchmark rmul!($a, 1.01) +@benchmark ms!($cua, 1.0) diff --git a/ext/CuYao/benchmarks/gates.jl b/ext/CuYao/benchmarks/gates.jl new file mode 100644 index 00000000..b63dbd49 --- /dev/null +++ b/ext/CuYao/benchmarks/gates.jl @@ -0,0 +1,19 @@ +using Yao, CuYao, CUDA +using BenchmarkTools + +reg = rand_state(12; nbatch=1000) +creg = reg |> cu +@benchmark CUDA.@sync creg |> put(12, 3=>Z) +@benchmark CUDA.@sync creg |> put(12, 3=>X) +@benchmark reg |> put(12, 3=>Z) +@benchmark CUDA.@sync creg |> control(12, 6, 3=>X) +@benchmark reg |> control(12, 6, 3=>X) +@benchmark CUDA.@sync creg |> put(12, 3=>rot(X, 0.3)) +@benchmark reg |> put(12, 3=>rot(X, 0.3)) + +reg = rand_state(20) +creg = reg |> cu +g = swap(20, 7, 2) +g = put(20, (7, 2)=>matblock(rand_unitary(4))) +@benchmark reg |> g +@benchmark CUDA.@sync creg |> g diff --git a/ext/CuYao/benchmarks/gcompile.jl b/ext/CuYao/benchmarks/gcompile.jl new file mode 100644 index 00000000..62a90ae9 --- /dev/null +++ b/ext/CuYao/benchmarks/gcompile.jl @@ -0,0 +1,92 @@ +using Yao, Yao.Boost, Yao.Intrinsics, StaticArrays, Yao.Blocks +using CuYao, CUDA +using BenchmarkTools, Profile + +nbit = 12 +c = chain(put(nbit, 2=>X), put(nbit, 2=>rot(X, 0.2)), control(nbit, 3, 2=>rot(X,0.3))) +#c = chain(c..., c...,c...) +cc = c |> KernelCompiled +reg = rand_state(nbit) |> cu + +@benchmark $reg |> copy |> $c[1] seconds = 2 +@benchmark $reg |> copy |> $cc seconds = 2 + + +reg = rand_state(9, 1000) +creg = reg |> cu +@benchmark focus!(reg |> copy, 1) do r + measure!(r) + r +end + +@benchmark focus!(creg|>copy, 1) do r + measure!(r) + r +end + +@benchmark focus!(creg, 1) do r + measure_reset!(r) + r +end + +@profile for i=1:10 focus!(creg |> copy, 1) do r + measure!(r) + r +end +end + +a = randn(1<<10) +ca = a |> cu + +gpu_call(ca, (x->x^2, ca)) do state, f, ca + ilin = linear_index(state) + ca[ilin] = f(ca[ilin]) + return +end + +gpu_call(1:1<<10, (x->x^2, ca)) do state, f, ca + ilin = linear_index(state) + ca[ilin] = f(ca[ilin]) + return +end + + +function xx_kernel(bits::Ints) + ctrl = controller(bits[1], 0) + mask = bmask(bits...) + function kernel(state, inds) + i = inds[1] + b = i-1 + ctrl(b) && swaprows!(piecewise(state, inds), i, flip(b, mask) + 1) + end +end + +using CuYao: x_kernel, y_kernel, z_kernel +fx = x_kernel((3,4)) +fy = y_kernel(3) +fz = z_kernel((3,4)) + +a = randn(1<<10) +ca = a |> cu +fs = (fx, fz) + +@benchmark gpu_call(ca, (fs, ca)) do state, f, ca + #ilin = linear_index(state) + ilin = @cartesianidx ca + for fi in fs + fi(ca, ilin) + end + return +end + +@device_code_warntype gpu_call(ca, ((fx, fz), ca)) do state, fs, ca + ilin = @cartesianidx ca + #for fi in fs fi(ca, ilin) end + @Base.Cartesian.@nexprs $(length(fs)) i->fs[i](ca, ilin) + #fs(ca, ilin) + return +end + +a ≈ Vector(ca) + +@edit controller(3,0) diff --git a/ext/CuYao/benchmarks/paralleldot.jl b/ext/CuYao/benchmarks/paralleldot.jl new file mode 100644 index 00000000..38e5dd6e --- /dev/null +++ b/ext/CuYao/benchmarks/paralleldot.jl @@ -0,0 +1,11 @@ +function paralleldot(matrices::CuVector, ptrA, ptrB) + @inline function kernel(ctx, matrices) + inds = @cartesianidx state + i = inds[1] + piecewise(state, inds)[i] *= anyone(i-1, mask) ? d : a + return + end + gpu_call(kernel, state, a, d, mask; elements=length(state)) + return state +end + diff --git a/ext/CuYao/benchmarks/statexpect.jl b/ext/CuYao/benchmarks/statexpect.jl new file mode 100644 index 00000000..9e3ca5dd --- /dev/null +++ b/ext/CuYao/benchmarks/statexpect.jl @@ -0,0 +1,13 @@ +using CuYao +using Yao +using BenchmarkTools + +sf(x, y) = abs(x-y) +a = randn(1024) +ca = a |> cu +b = randn(1024) +cb = b |> cu +@benchmark expect(StatFunctional{2}(sf), a, b) seconds=2 +@benchmark expect(StatFunctional{2}(sf), a) seconds=2 +@benchmark expect(StatFunctional{2}(sf), ca, cb) seconds=2 +@benchmark expect(StatFunctional{2}(sf), ca) seconds=2 diff --git a/ext/CuYao/src/CUDApatch.jl b/ext/CuYao/src/CUDApatch.jl new file mode 100644 index 00000000..aeaf7d36 --- /dev/null +++ b/ext/CuYao/src/CUDApatch.jl @@ -0,0 +1,64 @@ +# TODO +# support norm(view(reshape(A, m, n), :, 1)) +norm2(A::DenseCuArray; dims=1) = mapreduce(abs2, +, A, dims=dims) .|> CUDA.sqrt + +piecewise(state::AbstractVector, inds) = state +piecewise(state::AbstractMatrix, inds) = @inbounds view(state,:,inds[2]) + +function kron(A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2} + res = CUDA.zeros(promote_type(T1,T2), (size(A).*size(B))...) + @inline function kernel(ctx, res, A, B) + state = @linearidx res + @inbounds inds = CartesianIndices(res)[state].I + inds_A = (inds.-1) .÷ size(B) .+ 1 + inds_B = (inds.-1) .% size(B) .+ 1 + @inbounds res[state] = A[inds_A...]*B[inds_B...] + return + end + + gpu_call(kernel, res, A, B) + res +end + +""" + kron!(C::CuArray, A, B) + +Computes Kronecker products in-place on the GPU. +The results are stored in 'C', overwriting the existing values of 'C'. +""" +function Yao.YaoArrayRegister.kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3} + @boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2))) || throw(DimensionMismatch()) + CI = Base.CartesianIndices(C) + @inline function kernel(ctx, C, A, B) + state = @linearidx C + @inbounds inds = CI[state].I + inds_A = (inds.-1) .÷ size(B) .+ 1 + inds_B = (inds.-1) .% size(B) .+ 1 + @inbounds C[state] = A[inds_A...]*B[inds_B...] + return + end + + gpu_call(kernel, C, A, B) + C +end + +function getindex(A::DenseCuVector{T}, B::DenseCuArray{<:Integer}) where T + res = CUDA.zeros(T, size(B)...) + @inline function kernel(ctx, res, A, B) + state = @linearidx res + @inbounds res[state] = A[B[state]] + return + end + gpu_call(kernel, res, A, B) + res +end + +function getindex(A::AbstractVector, B::DenseCuArray{<:Integer}) + A[Array(B)] +end + +YaoBlocks.AD.as_scalar(x::DenseCuArray) = Array(x)[] + +# patch for ExponentialUtilities +YaoBlocks.compatible_multiplicative_operand(::CuArray, source::AbstractArray) = CuArray(source) +YaoBlocks.compatible_multiplicative_operand(::CuArray, source::CuArray) = source diff --git a/ext/CuYao/src/CuYao.jl b/ext/CuYao/src/CuYao.jl new file mode 100644 index 00000000..cc64a45e --- /dev/null +++ b/ext/CuYao/src/CuYao.jl @@ -0,0 +1,39 @@ +# module CuYao +using Yao.YaoArrayRegister.LuxurySparse, Yao.YaoArrayRegister.StaticArrays, LinearAlgebra, Base.Cartesian +using Yao.YaoArrayRegister.StatsBase, Yao.YaoArrayRegister.BitBasis +using Yao.Reexport +import Yao.YaoArrayRegister.TupleTools +using Yao.YaoArrayRegister.Random + +using Yao.YaoArrayRegister +using Yao +using CUDA +using CUDA.GPUArrays: gpu_call, @linearidx, @cartesianidx, linear_index +using Yao.YaoArrayRegister +using Yao.YaoBlocks +using Yao.ConstGate: SWAPGate +using Yao.ConstGate: S, T, Sdag, Tdag + +import Yao.YaoArrayRegister: insert_qudits!, join +import CUDA: cu +import Yao.YaoArrayRegister: _measure, measure, measure! +import Yao.YaoArrayRegister: batch_normalize! +import Yao.YaoBlocks: BlockedBasis, nblocks, subblock +import Yao: expect +import Yao.YaoArrayRegister: u1rows!, unrows!, autostatic, instruct!, swaprows!, mulrow! +import Yao.YaoArrayRegister.LinearAlgebra: norm +import Base: kron, getindex + +export cpu, cu, AbstractCuArrayReg, CuArrayReg, CuBatchedArrayReg, CuDensityMatrix, + cuzero_state, cuuniform_state, curand_state, cuproduct_state, cughz_state + +const Ints = NTuple{<:Any, Int} + +include("CUDApatch.jl") +include("register.jl") +include("instructs.jl") + +function __init__() + CUDA.allowscalar(false) +end +# end diff --git a/ext/CuYao/src/instructs.jl b/ext/CuYao/src/instructs.jl new file mode 100644 index 00000000..6ed8e35d --- /dev/null +++ b/ext/CuYao/src/instructs.jl @@ -0,0 +1,312 @@ +# get index +macro idx(shape, grididx=1, ctxsym=:ctx) + quote + x = $(esc(shape)) + i = $linear_index($(esc(ctxsym)), $(esc(grididx))) + i > $prod2(x) && return + @inbounds Base.CartesianIndices(x)[i].I + end +end +replace_first(x::NTuple{2}, v) = (v, x[2]) +replace_first(x::NTuple{1}, v) = (v,) +prod2(x::NTuple{2}) = x[1] * x[2] +prod2(x::NTuple{1}) = x[1] + +gpu_compatible(A::AbstractVecOrMat) = A |> staticize +gpu_compatible(A::StaticArray) = A + +###################### unapply! ############################ +function instruct!(::Val{2}, state::DenseCuVecOrMat, U0::AbstractMatrix, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} + @debug "The generic U(N) matrix of size ($(size(U0))), on: GPU, locations: $(locs), controlled by: $(clocs) = $(cvals)." + nbit = log2dim1(state) + # reorder a unirary matrix. + U = gpu_compatible(all(TupleTools.diff(locs).>0) ? U0 : reorder(U0, collect(locs)|>sortperm)) + locked_bits = [clocs..., locs...] + locked_vals = [cvals..., zeros(Int, M)...] + locs_raw = gpu_compatible([i+1 for i in itercontrol(nbit, setdiff(1:nbit, locs), zeros(Int, nbit-M))]) + configs = itercontrol(nbit, locked_bits, locked_vals) + + len = length(configs) + @inline function kernel(ctx, state, locs_raw, U, configs, len) + CUDA.assume(len > 0) + sz = size(state) + CUDA.assume(length(sz) == 1 || length(sz) == 2) + inds = @idx replace_first(sz, len) + x = @inbounds configs[inds[1]] + @inbounds unrows!(piecewise(state, inds), x .+ locs_raw, U) + return + end + + @inline function kernel_single_entry_diag(ctx, state, loc, val, configs, len) + CUDA.assume(len > 0) + sz = size(state) + CUDA.assume(length(sz) == 1 || length(sz) == 2) + inds = @idx replace_first(sz, len) + x = @inbounds configs[inds[1]] + @inbounds piecewise(state, inds)[x + loc] *= val + return + end + + elements = len*size(state,2) + if U isa Diagonal && count(!isone, U.diag) == 1 + @debug "The single entry diagonal matrix, on: GPU, locations: $(locs), controlled by: $(clocs) = $(cvals)." + k = findfirst(!isone, U.diag) + loc = locs_raw[k] + val = U.diag[k] + gpu_call(kernel_single_entry_diag, state, loc, val, configs, len; elements) + else + gpu_call(kernel, state, locs_raw, U, configs, len; elements) + end + state +end +instruct!(::Val{2}, state::DenseCuVecOrMat, U0::IMatrix, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = state +instruct!(::Val{2}, state::DenseCuVecOrMat, U0::SDSparseMatrixCSC, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = instruct!(Val(2), state, U0 |> Matrix, locs, clocs, cvals) + +################## General U1 apply! ################### +function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::SDSparseMatrixCSC, loc::Int) + instruct!(Val(2), state, Matrix(U1), loc, clocs, cval) +end +function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::AbstractMatrix, loc::Int) + @debug "The generic U(2) matrix of size ($(size(U1))), on: GPU, locations: $(loc)." + a, c, b, d = U1 + nbit = log2dim1(state) + step = 1<<(loc-1) + configs = itercontrol(nbit, [loc], [0]) + + len = length(configs) + @inline function kernel(ctx, state, a, b, c, d, len) + inds = @idx replace_first(size(state), len) + i = @inbounds configs[inds[1]]+1 + @inbounds u1rows!(piecewise(state, inds), i, i+step, a, b, c, d) + return + end + gpu_call(kernel, state, a, b, c, d, len; elements=len*size(state,2)) + return state +end + +function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::SDPermMatrix, loc::Int) + @debug "The single qubit permutation matrix of size ($(size(U1))), on: GPU, locations: $(loc)." + nbit = log2dim1(state) + b, c = U1.vals + step = 1<<(loc-1) + configs = itercontrol(nbit, [loc], [0]) + + len = length(configs) + function kernel(ctx, state, b, c, step, len, configs) + inds = @idx replace_first(size(state), len) + x = @inbounds configs[inds[1]] + 1 + @inbounds swaprows!(piecewise(state, inds), x, x+step, c, b) + return + end + gpu_call(kernel, state, b, c, step, len, configs; elements=len*size(state,2)) + return state +end + +function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::SDDiagonal, loc::Int) + @debug "The single qubit diagonal matrix of size ($(size(U1))), on: GPU, locations: $(loc)." + a, d = U1.diag + nbit = log2dim1(state) + mask = bmask(loc) + @inline function kernel(ctx, state, a, d, mask) + inds = @cartesianidx state + i = inds[1] + piecewise(state, inds)[i] *= anyone(i-1, mask) ? d : a + return + end + gpu_call(kernel, state, a, d, mask; elements=length(state)) + return state +end + +YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U::IMatrix, loc::Int) = state + +################## XYZ ############# + +_instruct!(state::DenseCuVecOrMat, ::Val{:X}, locs::NTuple{L,Int}) where {L} = _instruct!(state, Val(:X), locs, (), ()) +function _instruct!(state::DenseCuVecOrMat, ::Val{:X}, locs::NTuple{L,Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {L,C} + @debug "The X gate, on: GPU, locations: $(locs), controlled by: $(clocs) = $(cvals)." + length(locs) == 0 && return state + nbit = log2dim1(state) + configs = itercontrol(nbit, [clocs..., locs[1]], [cvals..., 0]) + mask = bmask(locs...) + len = length(configs) + @inline function kernel(ctx, state, mask, len, configs) + inds = @idx replace_first(size(state), len) + b = @inbounds configs[inds[1]] + @inbounds swaprows!(piecewise(state, inds), b+1, flip(b, mask) + 1) + return + end + gpu_call(kernel, state, mask, len, configs; elements=len*size(state,2)) + return state +end + +function _instruct!(state::DenseCuVecOrMat, ::Val{:Y}, locs::NTuple{C,Int}) where C + @debug "The Y gate, on: GPU, locations: $(locs)." + length(locs) == 0 && return state + nbit = log2dim1(state) + mask = bmask(Int, locs...) + configs = itercontrol(nbit, [locs[1]], [0]) + bit_parity = length(locs)%2 == 0 ? 1 : -1 + factor = (-im)^length(locs) + len = length(configs) + @inline function kernel(ctx, state, mask, bit_parity, configs, len) + inds = @idx replace_first(size(state), len) + b = @inbounds configs[inds[1]] + i_ = flip(b, mask) + 1 + factor1 = count_ones(b&mask)%2 == 1 ? -factor : factor + factor2 = factor1*bit_parity + @inbounds swaprows!(piecewise(state, inds), b+1, i_, factor2, factor1) + return + end + gpu_call(kernel, state, mask, bit_parity, configs, len; elements=len*size(state,2)) + return state +end + +function _instruct!(state::DenseCuVecOrMat, ::Val{:Y}, locs::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C + @debug "The Y gate, on: GPU, locations: $(locs), controlled by: $(clocs) = $(cvals)." + length(locs) == 0 && return state + nbit = log2dim1(state) + configs = itercontrol(nbit, [clocs..., locs...], [cvals..., 0]) + mask = bmask(locs...) + len = length(configs) + @inline function kernel(ctx, state, configs, mask, len) + inds = @idx replace_first(size(state), len) + b = @inbounds configs[inds[1]] + @inbounds swaprows!(piecewise(state, inds), b+1, flip(b, mask) + 1, im, -im) + return + end + gpu_call(kernel, state, configs, mask, len; elements=len*size(state,2)) + return state +end + +function _instruct!(state::DenseCuVecOrMat, ::Val{:Z}, locs::NTuple{C,Int}) where C + @debug "The Z gate, on: GPU, locations: $(locs)." + length(locs) == 0 && return state + nbit = log2dim1(state) + mask = bmask(locs...) + @inline function kernel(ctx, state, mask) + inds = @cartesianidx state + i = inds[1] + piecewise(state, inds)[i] *= count_ones((i-1)&mask)%2==1 ? -1 : 1 + return + end + gpu_call(kernel, state, mask; elements=length(state)) + return state +end + + +for (G, FACTOR) in zip([:Z, :S, :T, :Sdag, :Tdag], [:(-one(Int32)), :(1f0im), :($(exp(im*π/4))), :(-1f0im), :($(exp(-im*π/4)))]) + if G !== :Z + @eval function _instruct!(state::DenseCuVecOrMat, ::Val{$(QuoteNode(G))}, locs::NTuple{C,Int}) where C + @debug "The $($(QuoteNode(G))) gate, on: GPU, locations: $(locs)." + length(locs) == 0 && return state + nbit = log2dim1(state) + mask = bmask(Int32, locs...) + @inline function kernel(ctx, state, mask) + inds = @cartesianidx state + i = inds[1] + piecewise(state, inds)[i] *= $FACTOR ^ count_ones(Int32(i-1)&mask) + return + end + gpu_call(kernel, state, mask; elements=length(state)) + return state + end + end + @eval function _instruct!(state::DenseCuVecOrMat, ::Val{$(QuoteNode(G))}, locs::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C + @debug "The $($(QuoteNode(G))) gate, on: GPU, locations: $(locs), controlled by: $(clocs) = $(cvals)." + ctrl = controller((clocs..., locs...), (cvals..., 1)) + @inline function kernel(ctx, state, ctrl) + inds = @cartesianidx state + i = inds[1] + piecewise(state, inds)[i] *= ctrl(i-1) ? $FACTOR : one($FACTOR) + return + end + gpu_call(kernel, state, ctrl; elements=length(state)) + return state + end +end + +for G in [:X, :Y, :Z, :S, :T, :Sdag, :Tdag] + @eval begin + function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::NTuple{C,Int}) where C + _instruct!(state, g, locs) + end + + function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int}) + _instruct!(state, g, locs) + end + + function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C + _instruct!(state, g, locs, clocs, cvals) + end + + function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, vg::Val{$(QuoteNode(G))}, locs::Tuple{Int}, cloc::Tuple{Int}, cval::Tuple{Int}) + _instruct!(state, vg, locs, cloc, cval) + end + end + +end + +function instruct!(::Val{2}, state::DenseCuVecOrMat, ::Val{:SWAP}, locs::Tuple{Int,Int}) + @debug "The SWAP gate, on: GPU, locations: $(locs)." + b1, b2 = locs + mask1 = bmask(b1) + mask2 = bmask(b2) + + configs = itercontrol(log2dim1(state), [locs...], [1,0]) + function kf(ctx, state, mask1, mask2) + inds = @idx replace_first(size(state), length(configs)) + + b = configs[inds[1]] + i = b+1 + i_ = b ⊻ (mask1|mask2) + 1 + swaprows!(piecewise(state, inds), i, i_) + nothing + end + gpu_call(kf, state, mask1, mask2; elements=length(configs)*size(state,2)) + state +end + +############## other gates ################ +# parametrized swap gate + +function instruct!(::Val{2}, state::DenseCuVecOrMat, ::Val{:PSWAP}, locs::Tuple{Int, Int}, θ::Real) + @debug "The PSWAP gate, on: GPU, locations: $(locs)." + nbit = log2dim1(state) + mask1 = bmask(locs[1]) + mask2 = bmask(locs[2]) + mask12 = mask1|mask2 + a, c, b_, d = mat(Rx(θ)) + e = exp(-im/2*θ) + configs = itercontrol(nbit, [locs...], [0,0]) + len = length(configs) + @inline function kernel(ctx, state, mask2, mask12, configs, a, b_, c, d) + inds = @idx replace_first(size(state), len) + @inbounds x = configs[inds[1]] + piecewise(state, inds)[x+1] *= e + piecewise(state, inds)[x⊻mask12+1] *= e + y = x ⊻ mask2 + @inbounds u1rows!(piecewise(state, inds), y+1, y⊻mask12+1, a, b_, c, d) + return + end + gpu_call(kernel, state, mask2, mask12, configs, a, b_, c, d; elements=len*size(state,2)) + state +end + +function YaoBlocks._apply_fallback!(r::AbstractCuArrayReg{B,T}, b::AbstractBlock) where {B,T} + YaoBlocks._check_size(r, b) + r.state .= CUDA.adapt(CuArray{T}, mat(T, b)) * r.state + return r +end + +for RG in [:Rx, :Ry, :Rz] + @eval function instruct!( + ::Val{2}, + state::DenseCuVecOrMat{T}, + ::Val{$(QuoteNode(RG))}, + locs::Tuple{Int}, + theta::Number + ) where {T} + YaoArrayRegister.instruct!(Val(2), state, Val($(QuoteNode(RG))), locs, (), (), theta) + return state + end +end diff --git a/ext/CuYao/src/register.jl b/ext/CuYao/src/register.jl new file mode 100644 index 00000000..cc493e94 --- /dev/null +++ b/ext/CuYao/src/register.jl @@ -0,0 +1,290 @@ +cu(reg::ArrayReg{D}) where D = ArrayReg{D}(CuArray(reg.state)) +cpu(reg::ArrayReg{D}) where D = ArrayReg{D}(Array(reg.state)) +cu(reg::BatchedArrayReg{D}) where D = BatchedArrayReg{D}(CuArray(reg.state), reg.nbatch) +cpu(reg::BatchedArrayReg{D}) where D = BatchedArrayReg{D}(Array(reg.state), reg.nbatch) +cu(reg::DensityMatrix{D}) where D = DensityMatrix{D}(CuArray(reg.state)) +cpu(reg::DensityMatrix{D}) where D = DensityMatrix{D}(Array(reg.state)) +const AbstractCuArrayReg{D, T, MT} = AbstractArrayReg{D, T, MT} where MT<:DenseCuArray +const CuArrayReg{D, T, MT} = ArrayReg{D, T, MT} where MT<:DenseCuArray +const CuBatchedArrayReg{D, T, MT} = BatchedArrayReg{D, T, MT} where MT<:DenseCuArray +const CuDensityMatrix{D, T, MT} = DensityMatrix{D, T, MT} where MT<:DenseCuMatrix + +function batch_normalize!(s::DenseCuArray, p::Real=2) + p!=2 && throw(ArgumentError("p must be 2!")) + s./=norm2(s, dims=1) + return s +end + +@inline function tri2ij(l::Int) + i = ceil(Int, sqrt(2*l+0.25)-0.5) + j = l-i*(i-1)÷2 + return i+1,j +end + +############### MEASURE ################## +function measure(::ComputationalBasis, reg::ArrayReg{D, T, MT} where MT<:DenseCuArray, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG, nshots::Int=1) where {D,T} + _measure(rng, basis(reg), reg |> probs |> Vector, nshots) +end + +# TODO: optimize the batch dimension using parallel sampling +function measure(::ComputationalBasis, reg::BatchedArrayReg{D, T, MT} where MT<:DenseCuArray, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG, nshots::Int=1) where {D,T} + regm = reg |> rank3 + pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2) + return _measure(rng, basis(reg), pl |> Matrix, nshots) +end + +function measure!(::RemoveMeasured, ::ComputationalBasis, reg::AbstractCuArrayReg{D}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where D + regm = reg |> rank3 + B = size(regm, 3) + nregm = similar(regm, D ^ nremain(reg), B) + pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2) + pl_cpu = pl |> Matrix + res_cpu = map(ib->_measure(rng, basis(reg), view(pl_cpu, :, ib), 1)[], 1:B) + res = CuArray(res_cpu) + CI = Base.CartesianIndices(nregm) + @inline function kernel(ctx, nregm, regm, res, pl) + state = @linearidx nregm + @inbounds i,j = CI[state].I + @inbounds r = Int(res[j])+1 + @inbounds nregm[i,j] = regm[r,i,j]/CUDA.sqrt(pl[r, j]) + return + end + gpu_call(kernel, nregm, regm, res, pl) + reg.state = reshape(nregm,1,:) + return reg isa ArrayReg ? Array(res)[] : res +end + +function measure!(::NoPostProcess, ::ComputationalBasis, reg::AbstractCuArrayReg{D, T}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where {D, T} + regm = reg |> rank3 + B = size(regm, 3) + pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2) + pl_cpu = pl |> Matrix + res_cpu = map(ib->_measure(rng, basis(reg), view(pl_cpu, :, ib), 1)[], 1:B) + res = CuArray(res_cpu) + CI = Base.CartesianIndices(regm) + + @inline function kernel(ctx, regm, res, pl) + state = @linearidx regm + @inbounds k,i,j = CI[state].I + @inbounds rind = Int(res[j]) + 1 + @inbounds regm[k,i,j] = k==rind ? regm[k,i,j]/CUDA.sqrt(pl[k, j]) : T(0) + return + end + gpu_call(kernel, regm, res, pl) + return reg isa ArrayReg ? Array(res)[] : res +end + +function YaoArrayRegister.measure!( + ::NoPostProcess, + bb::BlockedBasis, + reg::AbstractCuArrayReg{D,T}, + ::AllLocs; + rng::AbstractRNG = Random.GLOBAL_RNG, +) where {D,T} + state = @inbounds (reg|>rank3)[bb.perm, :, :] # permute to make eigen values sorted + B = size(state, 3) + pl = dropdims(mapreduce(abs2, +, state, dims=2), dims=2) + pl_cpu = pl |> Matrix + pl_block = zeros(eltype(pl), nblocks(bb), B) + @inbounds for ib = 1:B + for i = 1:nblocks(bb) + for k in subblock(bb, i) + pl_block[i, ib] += pl_cpu[k, ib] + end + end + end + # perform measurements on CPU + res_cpu = Vector{Int}(undef, B) + @inbounds @views for ib = 1:B + ires = sample(rng, 1:nblocks(bb), Weights(pl_block[:, ib])) + # notice ires is `BitStr` type, can be use as indices directly. + range = subblock(bb, ires) + state[range, :, ib] ./= sqrt(pl_block[ires, ib]) + state[1:range.start-1, :, ib] .= zero(T) + state[range.stop+1:size(state, 1), :, ib] .= zero(T) + res_cpu[ib] = ires + end + # undo permute and assign back + _state = reshape(state, 1 << nactive(reg), :) + rstate = reshape(reg.state, 1 << nactive(reg), :) + @inbounds rstate[bb.perm, :] .= _state + return reg isa ArrayReg ? bb.values[res_cpu[]] : CuArray(bb.values[res_cpu]) +end + +function measure!(rst::ResetTo, ::ComputationalBasis, reg::AbstractCuArrayReg{D, T}, ::AllLocs; rng::AbstractRNG=Random.GLOBAL_RNG) where {D, T} + regm = reg |> rank3 + B = size(regm, 3) + pl = dropdims(mapreduce(abs2, +, regm, dims=2), dims=2) + pl_cpu = pl |> Matrix + res_cpu = map(ib->_measure(rng, basis(reg), view(pl_cpu, :, ib), 1)[], 1:B) + res = CuArray(res_cpu) + CI = Base.CartesianIndices(regm) + + @inline function kernel(ctx, regm, res, pl, val) + state = @linearidx regm + @inbounds k,i,j = CI[state].I + @inbounds rind = Int(res[j]) + 1 + @inbounds k==val+1 && (regm[k,i,j] = regm[rind,i,j]/CUDA.sqrt(pl[rind, j])) + CUDA.sync_threads() + @inbounds k!=val+1 && (regm[k,i,j] = 0) + return + end + + gpu_call(kernel, regm, res, pl, rst.x) + return reg isa ArrayReg ? Array(res)[] : res +end + +function YaoArrayRegister.batched_kron(A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1 ,T2} + res = CUDA.zeros(promote_type(T1,T2), size(A,1)*size(B, 1), size(A,2)*size(B,2), size(A, 3)) + CI = Base.CartesianIndices(res) + @inline function kernel(ctx, res, A, B) + state = @linearidx res + @inbounds i,j,b = CI[state].I + i_A, i_B = divrem((i-1), size(B,1)) + j_A, j_B = divrem((j-1), size(B,2)) + @inbounds res[state] = A[i_A+1, j_A+1, b]*B[i_B+1, j_B+1, b] + return + end + + gpu_call(kernel, res, A, B) + return res +end + +""" + YaoArrayRegister.batched_kron!(C::CuArray, A, B) + +Performs batched Kronecker products in-place on the GPU. +The results are stored in 'C', overwriting the existing values of 'C'. +""" +function YaoArrayRegister.batched_kron!(C::CuArray{T3, 3}, A::DenseCuArray, B::DenseCuArray) where {T3} + @boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2), size(A,3))) || throw(DimensionMismatch()) + @boundscheck (size(A,3) == size(B,3) == size(C,3)) || throw(DimensionMismatch()) + CI = Base.CartesianIndices(C) + @inline function kernel(ctx, C, A, B) + state = @linearidx C + @inbounds i,j,b = CI[state].I + i_A, i_B = divrem((i-1), size(B,1)) + j_A, j_B = divrem((j-1), size(B,2)) + @inbounds C[state] = A[i_A+1, j_A+1, b]*B[i_B+1, j_B+1, b] + return + end + + gpu_call(kernel, C, A, B) + return C +end + +function join(reg1::AbstractCuArrayReg{D}, reg2::AbstractCuArrayReg{D}) where {D} + @assert nbatch(reg1) == nbatch(reg2) + s1 = reg1 |> rank3 + s2 = reg2 |> rank3 + state = YaoArrayRegister.batched_kron(s1, s2) + return arrayreg(copy(reshape(state, size(state, 1), :)); nlevel=D, nbatch=nbatch(reg1)) +end + +function Yao.insert_qudits!(reg::AbstractCuArrayReg{D}, loc::Int; nqudits::Int=1) where D + na = nactive(reg) + focus!(reg, 1:loc-1) + reg2 = join(zero_state(nqudits; nbatch=nbatch(reg)) |> cu, reg) |> relax! |> focus!((1:na+nqudits)...) + reg.state = reg2.state + return reg +end + +""" + cuproduct_state([T=ComplexF64], total::Int, bit_config::Integer; nbatch=NoBatch()) + +The GPU version of [`product_state`](@ref). +""" +cuproduct_state(bit_str::BitStr; nbatch::Union{NoBatch,Int} = NoBatch()) = + cuproduct_state(ComplexF64, bit_str; nbatch = nbatch) +cuproduct_state(bit_str::AbstractVector; nbatch::Union{NoBatch,Int} = NoBatch()) = + cuproduct_state(ComplexF64, bit_str; nbatch = nbatch) +cuproduct_state(total::Int, bit_config::Integer; kwargs...) = + cuproduct_state(ComplexF64, total, bit_config; kwargs...) +cuproduct_state(::Type{T}, bit_str::BitStr{N}; kwargs...) where {T,N} = + cuproduct_state(T, N, buffer(bit_str); kwargs...) +cuproduct_state(::Type{T}, bit_configs::AbstractVector; kwargs...) where {T} = + cuproduct_state(T, bit_literal(bit_configs...); kwargs...) +function cuproduct_state( + ::Type{T}, + total::Int, + bit_config::Integer; + nbatch::Union{Int,NoBatch} = NoBatch(), + nlevel::Int=2, +) where {T} + raw = CUDA.zeros(T, nlevel ^ total, YaoArrayRegister._asint(nbatch)) + raw[Int(bit_config)+1,:] .= Ref(one(T)) + return arrayreg(raw; nbatch=nbatch, nlevel=nlevel) +end + +cuzero_state(n::Int; kwargs...) = cuzero_state(ComplexF64, n; kwargs...) +cuzero_state(::Type{T}, n::Int; kwargs...) where {T} = cuproduct_state(T, n, 0; kwargs...) + +""" + curand_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`rand_state`](@ref). +""" +curand_state(n::Int; kwargs...) = curand_state(ComplexF64, n; kwargs...) + +function curand_state( + ::Type{T}, + n::Int; + nbatch::Union{Int,NoBatch} = NoBatch(), + nlevel = 2, +) where {T} + raw = CUDA.randn(T, nlevel ^ n, YaoArrayRegister._asint(nbatch)) + return normalize!(arrayreg(raw; nbatch=nbatch, nlevel=nlevel)) +end + +""" + cuuniform_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`uniform_state`](@ref). +""" +cuuniform_state(n::Int; kwargs...) = cuuniform_state(ComplexF64, n; kwargs...) +function cuuniform_state(::Type{T}, n::Int; + nbatch::Union{Int,NoBatch} = NoBatch(), + nlevel::Int = 2, +) where {T} + raw = CUDA.ones(T, nlevel ^ n, YaoArrayRegister._asint(nbatch)) + return normalize!(arrayreg(raw; nbatch=nbatch, nlevel=nlevel)) +end + +""" + cughz_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`ghz_state`](@ref). +""" +cughz_state(n::Int; kwargs...) = cughz_state(ComplexF64, n; kwargs...) +function cughz_state(::Type{T}, n::Int; kwargs...) where {T} + reg = cuzero_state(T, n; kwargs...) + reg.state[1:1,:] .= Ref(sqrt(T(0.5))) + reg.state[end:end,:] .= Ref(sqrt(T(0.5))) + return reg +end + +#= +for FUNC in [:measure!, :measure!] + @eval function $FUNC(rng::AbstractRNG, op::AbstractBlock, reg::AbstractCuArrayReg, al::AllLocs; kwargs...) where B + E, V = eigen!(mat(op) |> Matrix) + ei = Eigen(E|>cu, V|>cu) + $FUNC(rng::AbstractRNG, ei, reg, al; kwargs...) + end +end +=# + +function YaoBlocks.expect(op::AbstractAdd, dm::CuDensityMatrix) + sum(x->expect(x, dm), subblocks(op)) +end +function YaoBlocks.expect(op::AbstractBlock, dm::CuDensityMatrix{D}) where D + return tr(apply(ArrayReg{D}(dm.state), op).state) +end + +measure( + ::ComputationalBasis, + reg::CuDensityMatrix, + ::AllLocs; + nshots::Int = 1, + rng::AbstractRNG = Random.GLOBAL_RNG, +) = YaoArrayRegister._measure(rng, basis(reg), Array(reg |> probs), nshots) + diff --git a/ext/CuYao/test/CUDApatch.jl b/ext/CuYao/test/CUDApatch.jl new file mode 100644 index 00000000..b00adb96 --- /dev/null +++ b/ext/CuYao/test/CUDApatch.jl @@ -0,0 +1,35 @@ +using CuYao +using CUDA +using Test +using Yao.YaoBlocks + +@testset "isapprox-complex" begin + ca = CuArray(randn(ComplexF64,3,3)) + cb = copy(ca) + #@test ca ≈ cb still error! + cb[1:1, 1:1] .+= 1e-7im + @test isapprox(ca, cb, atol=1e-5) + @test !isapprox(ca, cb, atol=1e-9) +end + +@testset "view general" begin + a = randn(5,6,8) |> CuArray + @test view(a, 2:4, 4, [1,4,3]) |> size == (3, 3) +end + +@testset "permutedims vector" begin + ca = randn(ComplexF64,3,4,5,1) + @test permutedims(CuArray(ca), [2,1,4,3]) |> Array ≈ permutedims(ca, [2,1,4,3]) +end + +@testset "Complex pow" begin + for T in [ComplexF64, ComplexF32] + a = CuArray(randn(T, 4, 4)) + @test Array(a .^ Int32(3)) ≈ Array(a).^3 + @test Array(a .^ real(T)(3)) ≈ Array(a).^3 + end +end + +@testset "as_scalar" begin + @test YaoBlocks.AD.as_scalar([2.0] |> CuArray) == 2.0 +end diff --git a/ext/CuYao/test/extra.jl b/ext/CuYao/test/extra.jl new file mode 100644 index 00000000..739ee6cb --- /dev/null +++ b/ext/CuYao/test/extra.jl @@ -0,0 +1,52 @@ +using CuYao, Test, CUDA +CUDA.allowscalar(false) + +@testset "gradient" begin + reg1 = rand_state(10) + reg2 = rand_state(10) + c = EasyBuild.variational_circuit(10) + g1, g2 = fidelity'(reg1=>c, reg2) + cg1, cg2 = fidelity'(cu(reg1)=>c, cu(reg2)) + @test g1.first ≈ cpu(cg1.first) + @test g1.second ≈ cg1.second + @test g2 ≈ cpu(cg2) + + h = EasyBuild.heisenberg(10) + g1 = expect'(h, reg1=>c) + cg1 = expect'(h, cu(reg1)=>c) + @test g1.first ≈ cpu(cg1.first) + @test g1.second ≈ cg1.second +end + +@testset "apply density matrix" begin + reg = rand_state(6) + creg = cu(reg) + rho = density_matrix(reg, (3,4)) + crho = density_matrix(creg, (3,4)) + @test rho ≈ cpu(crho) + g = put(2, 1=>Rx(0.3)) + @test cpu(apply(crho, g)) ≈ apply(rho, g) + rho = density_matrix(reg) + crho = density_matrix(creg) + @test rho ≈ cpu(crho) + @test probs(crho) ≈ probs(creg) + g = put(6, 1=>Rx(0.3)) + @test cpu(apply(crho, g)) ≈ apply(rho, g) + + # channel + c = UnitaryChannel([put(6, 1=>Rx(0.3)), put(6, 2=>Z)], [0.4, 0.6]) + @test cpu(apply(crho, c)) ≈ apply(rho, c) +end + +@testset "expect on density matrix" begin + reg = rand_state(6) + rho = density_matrix(reg, (3,4,5)) + crho = cu(rho) + h = EasyBuild.heisenberg(3) + a = expect(h, rho) + b = expect(h, crho) + @test a ≈ b + # fidelity + @test fidelity(crho, crho) ≈ 1 + @test measure(crho; nshots=2) isa Vector +end \ No newline at end of file diff --git a/ext/CuYao/test/instructs.jl b/ext/CuYao/test/instructs.jl new file mode 100644 index 00000000..300e779a --- /dev/null +++ b/ext/CuYao/test/instructs.jl @@ -0,0 +1,135 @@ +using LinearAlgebra, Yao.ConstGate +using Test, Random +using CuYao +using Yao.YaoArrayRegister.StaticArrays +using Yao.ConstGate: SWAPGate +using CUDA + +@testset "gpu instruct nbit!" begin + Random.seed!(3) + nbit = 6 + N = 1<Z)), + mat(ComplexF32, put(2,2=>I2)), + mat(ComplexF32, put(2,2=>P0)) + ] + + @test instruct!(Val(2),v1 |> CuArray, UN, (3,1)) |> Vector ≈ instruct!(Val(2),v1 |> copy, UN, (3,1)) + @test instruct!(Val(2),vn |> CuArray, UN, (3,1)) |> Matrix ≈ instruct!(Val(2),vn |> copy, UN, (3,1)) + end + # sparse matrix like P0, P1 et. al. are not implemented. + for g in [mat(ComplexF32, ConstGate.T), mat(ComplexF32, ConstGate.H), mat(ComplexF32, rot(Z, 0.5))] + @test instruct!(Val(2), v1 |> CuArray, g, (3,), (4,), (1,)) |> Vector ≈ instruct!(Val(2), v1 |> copy, g, (3,), (4,), (1,)) + @test instruct!(Val(2), vn |> CuArray, g, (3,), (4,), (1,)) |> Matrix ≈ instruct!(Val(2), vn |> copy, g, (3,), (4,), (1,)) + @test instruct!(Val(2), v1 |> CuArray, g, (3,), (4,), (1,)) |> Vector ≈ instruct!(Val(2), v1 |> copy, g, (3,), (4,), (1,)) + @test instruct!(Val(2), vn |> CuArray, g, (3,), (4,), (1,)) |> Matrix ≈ instruct!(Val(2), vn |> copy, g, (3,), (4,), (1,)) + end +end + +@testset "gpu swapapply!" begin + nbit = 6 + N = 1< CuArray, Val(:SWAP), (3,5)) |> Vector ≈ instruct!(Val(2),v1 |> copy, Val(:SWAP), (3,5)) + @test instruct!(Val(2),vn |> CuArray, Val(:SWAP), (3,5)) |> Matrix ≈ instruct!(Val(2),vn |> copy, Val(:SWAP), (3,5)) +end + +@testset "gpu instruct! 1bit" begin + nbit = 6 + N = 1< CuArray, U1, (3,)) |> Vector ≈ instruct!(Val(2),v1 |> copy, U1, (3,)) + @test instruct!(Val(2),vn |> CuArray, U1, (3,)) |> Matrix ≈ instruct!(Val(2),vn |> copy, U1, (3,)) + end + # sparse matrix like P0, P1 et. al. are not implemented. +end + +@testset "gpu xyz-instruct!" begin + nbit = 6 + N = 1< CuArray, Val(G), (3,)) |> Vector ≈ instruct!(Val(2),v1 |> copy, Val(G), (3,)) + @test instruct!(Val(2),vn |> CuArray, Val(G), (3,)) |> Matrix ≈ instruct!(Val(2),vn |> copy, Val(G), (3,)) + if G != :H + @test instruct!(Val(2),v1 |> CuArray, Val(G), (1,3,4)) |> Vector ≈ instruct!(Val(2),v1 |> copy, Val(G), (1,3,4)) + @test instruct!(Val(2),vn |> CuArray, Val(G),(1,3,4)) |> Matrix ≈ instruct!(Val(2),vn |> copy, Val(G), (1,3,4)) + end + end +end + +@testset "gpu cn-xyz-instruct!" begin + nbit = 6 + N = 1< CuArray, Val(G), (3,), (4,5), (0, 1)) |> Vector ≈ instruct!(Val(2),v1 |> copy, Val(G), (3,), (4,5), (0, 1)) + @test instruct!(Val(2),vn |> CuArray, Val(G), (3,), (4,5), (0, 1)) |> Matrix ≈ instruct!(Val(2),vn |> copy, Val(G), (3,), (4,5), (0, 1)) + @test instruct!(Val(2),v1 |> CuArray, Val(G), (3,), (1,), (1,)) |> Vector ≈ instruct!(Val(2),v1 |> copy, Val(G),(3,), (1,), (1,)) + @test instruct!(Val(2),vn |> CuArray, Val(G), (3,), (1,), (1,)) |> Matrix ≈ instruct!(Val(2),vn |> copy, Val(G),(3,), (1,), (1,)) + end +end + +@testset "pswap" begin + ps = put(6, (2,4)=>rot(SWAP, π/2)) + reg = rand_state(6; nbatch=10) + @test apply!(reg |> cu, ps) |> cpu ≈ apply!(copy(reg), ps) + @test apply!(reg |> cu, ps).state isa CuArray +end + +@testset "regression test: Rx, Ry, Rz, CPHASE" begin + Random.seed!(3) + nbit = 6 + for ps in [put(6, (2,)=>Rx(π/2)), put(6, 2=>Ry(0.5)), put(6, 2=>Rz(0.4))] + reg = rand_state(6; nbatch=10) + @test apply!(reg |> cu, ps) |> cpu ≈ apply!(copy(reg), ps) + @test apply!(reg |> cu, ps).state isa CuArray + end +end + +@testset "density matrix" begin + nbit = 6 + reg = rand_state(nbit) + rho = density_matrix(reg) + c = put(6, (2,)=>Rx(π/3)) + @test apply(rho |> cu, c) |> cpu ≈ apply(rho, c) +end + +@testset "time evolve" begin + g = time_evolve(kron(10, 2=>X, 3=>X), 0.5) + reg = rand_state(10) + @test apply!(copy(reg), g) ≈ apply!(reg |> cu, g) |> cpu +end + +# fix: https://github.com/QuantumBFS/CuYao.jl/issues/81 +@testset "generic sparse (pqc circuit)" begin + # kron of Rx + pqc_circuit = subroutine(10, kron(Rx(0.4), Rx(0.5), Rx(0.6), Rx(0.8)), (1, 2, 6, 5)) + proxy_reg = zero_state(10) + @test apply!(proxy_reg |> cu, pqc_circuit) |> cpu ≈ apply(proxy_reg, pqc_circuit) + + # kron of Rz + pqc_circuit = subroutine(10, kron(Rz(0.4), Rz(0.5), Rz(0.6), Rz(0.8)), (1, 2, 6, 5)) + proxy_reg = zero_state(10) + @test apply!(proxy_reg |> cu, pqc_circuit) |> cpu ≈ apply(proxy_reg, pqc_circuit) +end \ No newline at end of file diff --git a/ext/CuYao/test/register.jl b/ext/CuYao/test/register.jl new file mode 100644 index 00000000..8c9551c0 --- /dev/null +++ b/ext/CuYao/test/register.jl @@ -0,0 +1,139 @@ +using Test +using CuYao +using CuYao: tri2ij +using LinearAlgebra +using Yao.YaoArrayRegister.BitBasis +using Statistics: mean +using Yao.YaoArrayRegister.StaticArrays +using CUDA +using Yao.YaoArrayRegister: batched_kron, batched_kron!, batch_normalize! +CUDA.allowscalar(false) + +@testset "basics" begin + a = randn(ComplexF64, 50, 20) + ca = a|>cu + batch_normalize!(ca) + batch_normalize!(a) + @test ca |> Matrix ≈ a + + for l = 1:100 + i, j = tri2ij(l) + @test (i-1)*(i-2)÷2+j == l + end +end + +@testset "constructor an measure" begin + reg = rand_state(10) + greg = reg |> cu + @test greg isa AbstractCuArrayReg + @test eltype(greg.state) == ComplexF64 + myvec(x) = Vector(x) + myvec(x::Number) = [x] + for reg in [rand_state(10, nbatch=333), rand_state(10)] + greg = reg |> cu + @test size(measure(greg |> copy, nshots=10)) == size(measure(reg, nshots=10)) + @test size(measure!(greg |> copy)) == size(measure!(reg |> copy)) + @test size(measure!(ResetTo(0), greg |> copy)) == size(measure!(ResetTo(0), reg |> copy)) + @test size(measure!(RemoveMeasured(), greg |> copy)) == size(measure!(RemoveMeasured(), reg |> copy)) + @test select(greg |> copy, 12) |> cpu ≈ select(reg, 12) + @test size(measure!(greg |> copy |> focus!(3,4,1))) == size(measure!(reg |> copy |> focus!(3,4,1))) + @test greg |> copy |> focus!(3,4,1) |> relax!(3,4,1) |> cpu ≈ reg + + if nbatch(greg) == 1 + greg1 = greg |> copy |> focus!(1,4,3) + greg0 = copy(greg1) + res = measure!(RemoveMeasured(), greg1) + @test select(greg0, res |> myvec) |> normalize! |> cpu ≈ greg1 |> cpu + end + + greg1 = greg |> copy |> focus!(1,4,3) + greg0 = copy(greg1) + res = measure!(ResetTo(3), greg1) + @test all(measure(greg1, nshots=10) .== 3) + @test greg1 |> isnormalized + @test all(select.(BatchedArrayReg(greg0 |> cpu), res |> myvec) .|> normalize! .≈ select.(BatchedArrayReg(greg1 |> cpu), 3)) + + greg1 = greg |> copy |> focus!(1,4,3) + greg0 = copy(greg1) + res = measure!(greg1) + @test all(select.(BatchedArrayReg(greg0 |> cpu), res |> myvec) .|> normalize! .≈ select.(BatchedArrayReg(greg1 |> cpu), res|>myvec)) + end + + @test join(rand_state(3) |> cu, rand_state(3) |> cu) |> nactive == 6 + @test join(rand_state(3, nbatch=10) |> cu, rand_state(3, nbatch=10) |> cu) |> nactive == 6 +end + +@testset "insert_qubits!" begin + reg = rand_state(5; nbatch=10) + res = insert_qudits!(reg |> cu, 3, 2) |> cpu + @test insert_qudits!(reg, 3, 2) ≈ res + @test append_qudits!(copy(reg) |> cu, 3) |> cpu ≈ append_qudits!(copy(reg), 3) + + reg = rand_state(5, nbatch=10) |>focus!(2,3) + res = insert_qudits!(reg |> cu, 3, 2) |> cpu + @test insert_qudits!(reg, 3, 2) ≈ res + + @test append_qudits!(copy(reg) |> cu, 3) |> cpu ≈ append_qudits!(copy(reg), 3) +end + +@testset "cuda-op-measures" begin + reg = rand_state(8; nbatch=32) |> cu + op = repeat(5, X, 1:5) + + # measure! + reg2 = reg |> copy + res = measure!(op, reg2, 2:6) + res2 = measure!(op, reg2, 2:6) + @test size(res) == (32,) + @test res2 == res + + reg = clone(ArrayReg([1,-1+0im]/sqrt(2.0)), 10) |> cu + @test measure!(X, reg) |> mean ≈ -1 + reg = clone(ArrayReg([1.0,0+0im]), 1000) + @test abs(measure!(X, reg) |> mean) < 0.1 +end + +@testset "cuda kron getindex" begin + a = randn(3,4) + b = randn(4,2) + c = zeros(12,8) + ca, cb, cc = cu(a), cu(b), cu(c) + @test kron(ca, cb) |> Array ≈ kron(a, b) + @test Yao.YaoArrayRegister.kron!(cc, ca, cb) |> Array ≈ kron(a,b) + + Yao.YaoArrayRegister.kron!(c,a,b) + @test cc |> Array ≈ c + + v = randn(100) |> cu + inds = [3,5,2,1,7,1] + @test v[inds] ≈ v[inds |> CuVector] +end + +@testset "cuda batched_kron" begin + a = randn(3,4,5) + b = randn(4,2,5) + c = zeros(12,8,5) + ca, cb, cc = cu(a), cu(b), cu(c) + + @test batched_kron(ca, cb) |> Array ≈ batched_kron(a, b) + @test batched_kron!(cc, ca, cb) |> Array ≈ batched_kron(a, b) + + batched_kron!(c, a, b) + @test cc |> Array ≈ c +end + +@testset "zero_state, et al" begin + for b = [4, NoBatch()] + reg = cuzero_state(3; nbatch=b) + @test cpu(reg) ≈ zero_state(3; nbatch=b) && reg isa AbstractCuArrayReg + reg = curand_state(3; nbatch=b) + @test reg isa AbstractCuArrayReg + reg = cuuniform_state(3; nbatch=b) + @test cpu(reg) ≈ uniform_state(3; nbatch=b) && reg isa AbstractCuArrayReg + reg = cuproduct_state(bit"110"; nbatch=b) + @test cpu(reg) ≈ product_state(bit"110"; nbatch=b) && reg isa AbstractCuArrayReg + reg = cughz_state(3; nbatch=b) + @test cpu(reg) ≈ ghz_state(3; nbatch=b) && reg isa AbstractCuArrayReg + end +end + diff --git a/ext/CuYao/test/runtests.jl b/ext/CuYao/test/runtests.jl new file mode 100644 index 00000000..77df6514 --- /dev/null +++ b/ext/CuYao/test/runtests.jl @@ -0,0 +1,18 @@ +using CUDA, CuYao, Test +CUDA.allowscalar(false) + +@testset "CUDA patch" begin + include("CUDApatch.jl") +end + +@testset "GPU reg" begin + include("register.jl") +end + +@testset "gpu applies" begin + include("instructs.jl") +end + +@testset "extra" begin + include("extra.jl") +end \ No newline at end of file diff --git a/lib/YaoPlots/src/vizcircuit.jl b/lib/YaoPlots/src/vizcircuit.jl index 9e5478f9..d4d28cb9 100644 --- a/lib/YaoPlots/src/vizcircuit.jl +++ b/lib/YaoPlots/src/vizcircuit.jl @@ -367,13 +367,13 @@ for B in [:LabelBlock, :GeneralMatrixBlock, :Add] _draw!(c, [controls..., (address, c.gatestyles.g, string(cb))]) end end -for GT in [:KronBlock, :RepeatedBlock, :CachedBlock, :Subroutine, :(YaoBlocks.AD.NoParams)] - @eval function draw!(c::CircuitGrid, p::$GT, address, controls) - barrier_style = CircuitStyles.barrier_for_chain[] - CircuitStyles.barrier_for_chain[] = false - draw!(c, YaoBlocks.Optimise.to_basictypes(p), address, controls) - CircuitStyles.barrier_for_chain[] = barrier_style - end + +# [:KronBlock, :RepeatedBlock, :CachedBlock, :Subroutine, :(YaoBlocks.AD.NoParams)] +function draw!(c::CircuitGrid, p::CompositeBlock, address, controls) + barrier_style = CircuitStyles.barrier_for_chain[] + CircuitStyles.barrier_for_chain[] = false + draw!(c, YaoBlocks.Optimise.to_basictypes(p), address, controls) + CircuitStyles.barrier_for_chain[] = barrier_style end for (GATE, SYM) in [(:XGate, :Rx), (:YGate, :Ry), (:ZGate, :Rz)] @eval get_brush_texts(c, b::RotationGate{D,T,<:$GATE}) where {D,T} = (c.gatestyles.g, "$($(SYM))($(pretty_angle(b.theta)))") From 3a93015f34739527a3227db882d08c823f73c736 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 24 Feb 2024 17:37:01 +0000 Subject: [PATCH 2/6] tests pass --- Project.toml | 1 - ext/CuYao/src/CuYao.jl | 3 +-- ext/CuYao/test/CUDApatch.jl | 2 +- ext/CuYao/{ => test}/Project.toml | 4 ---- ext/CuYao/test/extra.jl | 4 ++-- ext/CuYao/test/instructs.jl | 4 ++-- ext/CuYao/test/register.jl | 18 +++++++++--------- ext/CuYao/test/runtests.jl | 4 ++-- src/Yao.jl | 7 ++++++- 9 files changed, 23 insertions(+), 24 deletions(-) rename ext/CuYao/{ => test}/Project.toml (88%) diff --git a/Project.toml b/Project.toml index 84787249..4faa42d5 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ version = "0.8.13" [deps] BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LuxurySparse = "d05aeea4-b7d4-55ac-b691-9e7fabb07ba2" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" diff --git a/ext/CuYao/src/CuYao.jl b/ext/CuYao/src/CuYao.jl index cc64a45e..a04c2417 100644 --- a/ext/CuYao/src/CuYao.jl +++ b/ext/CuYao/src/CuYao.jl @@ -24,8 +24,7 @@ import Yao.YaoArrayRegister: u1rows!, unrows!, autostatic, instruct!, swaprows!, import Yao.YaoArrayRegister.LinearAlgebra: norm import Base: kron, getindex -export cpu, cu, AbstractCuArrayReg, CuArrayReg, CuBatchedArrayReg, CuDensityMatrix, - cuzero_state, cuuniform_state, curand_state, cuproduct_state, cughz_state +import Yao: cpu, cuzero_state, cuuniform_state, curand_state, cuproduct_state, cughz_state const Ints = NTuple{<:Any, Int} diff --git a/ext/CuYao/test/CUDApatch.jl b/ext/CuYao/test/CUDApatch.jl index b00adb96..136d7aeb 100644 --- a/ext/CuYao/test/CUDApatch.jl +++ b/ext/CuYao/test/CUDApatch.jl @@ -1,4 +1,4 @@ -using CuYao +using Yao using CUDA using Test using Yao.YaoBlocks diff --git a/ext/CuYao/Project.toml b/ext/CuYao/test/Project.toml similarity index 88% rename from ext/CuYao/Project.toml rename to ext/CuYao/test/Project.toml index 88d577a0..2e5f158c 100644 --- a/ext/CuYao/Project.toml +++ b/ext/CuYao/test/Project.toml @@ -1,7 +1,3 @@ -name = "CuYao" -uuid = "b48ca7a8-dd42-11e8-2b8e-1b7706800275" -version = "0.3.9" - [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/ext/CuYao/test/extra.jl b/ext/CuYao/test/extra.jl index 739ee6cb..95e759ce 100644 --- a/ext/CuYao/test/extra.jl +++ b/ext/CuYao/test/extra.jl @@ -1,4 +1,4 @@ -using CuYao, Test, CUDA +using Yao, Test, CUDA CUDA.allowscalar(false) @testset "gradient" begin @@ -49,4 +49,4 @@ end # fidelity @test fidelity(crho, crho) ≈ 1 @test measure(crho; nshots=2) isa Vector -end \ No newline at end of file +end diff --git a/ext/CuYao/test/instructs.jl b/ext/CuYao/test/instructs.jl index 300e779a..48ae4c69 100644 --- a/ext/CuYao/test/instructs.jl +++ b/ext/CuYao/test/instructs.jl @@ -1,6 +1,6 @@ using LinearAlgebra, Yao.ConstGate using Test, Random -using CuYao +using Yao using Yao.YaoArrayRegister.StaticArrays using Yao.ConstGate: SWAPGate using CUDA @@ -132,4 +132,4 @@ end pqc_circuit = subroutine(10, kron(Rz(0.4), Rz(0.5), Rz(0.6), Rz(0.8)), (1, 2, 6, 5)) proxy_reg = zero_state(10) @test apply!(proxy_reg |> cu, pqc_circuit) |> cpu ≈ apply(proxy_reg, pqc_circuit) -end \ No newline at end of file +end diff --git a/ext/CuYao/test/register.jl b/ext/CuYao/test/register.jl index 8c9551c0..8c6dc1d6 100644 --- a/ext/CuYao/test/register.jl +++ b/ext/CuYao/test/register.jl @@ -1,6 +1,6 @@ using Test -using CuYao -using CuYao: tri2ij +using Yao +const CuYao = Base.get_extension(Yao, :CuYao) using LinearAlgebra using Yao.YaoArrayRegister.BitBasis using Statistics: mean @@ -17,7 +17,7 @@ CUDA.allowscalar(false) @test ca |> Matrix ≈ a for l = 1:100 - i, j = tri2ij(l) + i, j = CuYao.tri2ij(l) @test (i-1)*(i-2)÷2+j == l end end @@ -25,7 +25,7 @@ end @testset "constructor an measure" begin reg = rand_state(10) greg = reg |> cu - @test greg isa AbstractCuArrayReg + @test greg isa CuYao.AbstractCuArrayReg @test eltype(greg.state) == ComplexF64 myvec(x) = Vector(x) myvec(x::Number) = [x] @@ -125,15 +125,15 @@ end @testset "zero_state, et al" begin for b = [4, NoBatch()] reg = cuzero_state(3; nbatch=b) - @test cpu(reg) ≈ zero_state(3; nbatch=b) && reg isa AbstractCuArrayReg + @test cpu(reg) ≈ zero_state(3; nbatch=b) && reg isa CuYao.AbstractCuArrayReg reg = curand_state(3; nbatch=b) - @test reg isa AbstractCuArrayReg + @test reg isa CuYao.AbstractCuArrayReg reg = cuuniform_state(3; nbatch=b) - @test cpu(reg) ≈ uniform_state(3; nbatch=b) && reg isa AbstractCuArrayReg + @test cpu(reg) ≈ uniform_state(3; nbatch=b) && reg isa CuYao.AbstractCuArrayReg reg = cuproduct_state(bit"110"; nbatch=b) - @test cpu(reg) ≈ product_state(bit"110"; nbatch=b) && reg isa AbstractCuArrayReg + @test cpu(reg) ≈ product_state(bit"110"; nbatch=b) && reg isa CuYao.AbstractCuArrayReg reg = cughz_state(3; nbatch=b) - @test cpu(reg) ≈ ghz_state(3; nbatch=b) && reg isa AbstractCuArrayReg + @test cpu(reg) ≈ ghz_state(3; nbatch=b) && reg isa CuYao.AbstractCuArrayReg end end diff --git a/ext/CuYao/test/runtests.jl b/ext/CuYao/test/runtests.jl index 77df6514..420a735c 100644 --- a/ext/CuYao/test/runtests.jl +++ b/ext/CuYao/test/runtests.jl @@ -1,4 +1,4 @@ -using CUDA, CuYao, Test +using CUDA, Yao, Test CUDA.allowscalar(false) @testset "CUDA patch" begin @@ -15,4 +15,4 @@ end @testset "extra" begin include("extra.jl") -end \ No newline at end of file +end diff --git a/src/Yao.jl b/src/Yao.jl index a42710b0..7b1deac1 100644 --- a/src/Yao.jl +++ b/src/Yao.jl @@ -16,7 +16,6 @@ const 幺 = Yao using Reexport @reexport using YaoArrayRegister, YaoBlocks, YaoSym, YaoPlots -export EasyBuild using YaoArrayRegister.BitBasis, YaoAPI using YaoBlocks: @@ -28,6 +27,12 @@ using YaoBlocks: print_title, print_block +export EasyBuild +# CUDA APIs +for FT in [:cpu, :cuzero_state, :cuuniform_state, :curand_state, :cuproduct_state, :cughz_state] + @eval export $FT + @eval function $FT end +end include("deprecations.jl") include("EasyBuild/easybuild.jl") From 1154106bb03079365d7f03a451f7a6d351f44d8e Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sun, 25 Feb 2024 02:37:34 +0800 Subject: [PATCH 3/6] add makefile --- Makefile | 36 ++++++++++++++++++++++++++++++++++++ docs/Project.toml | 1 + 2 files changed, 37 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..93f0a1b3 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +JL = julia --project + +default: init test + +init: + $(JL) -e 'using Pkg; Pkg.develop([Pkg.PackageSpec(path = joinpath("lib", pkg)) for pkg in ["YaoArrayRegister", "YaoBlocks", "YaoSym", "YaoPlots", "YaoAPI"]]); Pkg.precompile()' +init-docs: + $(JL) -e 'using Pkg; Pkg.activate("docs"); Pkg.develop([Pkg.PackageSpec(path = joinpath("lib", pkg)) for pkg in ["YaoArrayRegister", "YaoBlocks", "YaoSym", "YaoPlots", "YaoAPI"]]); Pkg.precompile()' + +update: + $(JL) -e 'using Pkg; Pkg.update(); Pkg.precompile()' +update-docs: + $(JL) -e 'using Pkg; Pkg.activate("docs"); Pkg.update(); Pkg.precompile()' + +test-Yao: + $(JL) -e 'using Pkg; Pkg.test("Yao")' +test-CuYao: + $(JL) -e 'using Pkg; Pkg.activate("ext/CuYao/test"); Pkg.develop(path="."); Pkg.update()' + $(JL) -e 'using Pkg; Pkg.activate("ext/CuYao/test"); include("ext/CuYao/test/runtests.jl")' + +test-%: + $(JL) -e 'using Pkg; Pkg.test("$*")' + +test: + $(JL) -e 'using Pkg; Pkg.test(["YaoAPI", "YaoArrayRegister", "YaoBlocks", "YaoSym", "YaoPlots", "Yao"])' + +coverage: + $(JL) -e 'using Pkg; Pkg.test(["YaoAPI", "YaoArrayRegister", "YaoBlocks", "YaoSym", "YaoPlots", "Yao"]; coverage=true)' + +servedocs: + $(JL) -e 'using Pkg; Pkg.activate("docs"); using LiveServer; servedocs(;skip_dirs=["docs/src/assets", "docs/src/generated"])' + +clean: + rm -rf docs/build + +.PHONY: init test diff --git a/docs/Project.toml b/docs/Project.toml index d6d7fde0..d0ab5651 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -7,6 +7,7 @@ FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" Latexify = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" From c9a24ba97278dfd1f7f0110bdef075a07da67215 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Sat, 24 Feb 2024 18:50:04 +0000 Subject: [PATCH 4/6] update make file --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 93f0a1b3..45e53f2d 100644 --- a/Makefile +++ b/Makefile @@ -32,5 +32,6 @@ servedocs: clean: rm -rf docs/build + find . -name "*.cov" -type f -print0 | xargs -0 /bin/rm -f .PHONY: init test From 1550f32b7c22333b508db03537e949adab40ad82 Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Mon, 26 Feb 2024 02:26:19 +0800 Subject: [PATCH 5/6] rm CuYao LICENSE --- ext/CuYao/LICENSE.md | 217 ------------------------------------------- 1 file changed, 217 deletions(-) delete mode 100644 ext/CuYao/LICENSE.md diff --git a/ext/CuYao/LICENSE.md b/ext/CuYao/LICENSE.md deleted file mode 100644 index 8373be63..00000000 --- a/ext/CuYao/LICENSE.md +++ /dev/null @@ -1,217 +0,0 @@ -The CuYao.jl package is licensed under the Apache License, Version 2.0: - -> COPYRIGHT -> -> All contributions by Jinguo Liu -> Copyright (c) 2018: Jinguo Liu -> All rights reserved -> -> All contributions by Xiuzhe Luo -> Copyright (c) 2018: Xiuzhe Luo. -> All rights reserved. -> -> Each contributor holds copyright over their respective contributions. -> The project versioning (Git) records all such contribution source information. -> -> Apache License -> Version 2.0, January 2004 -> http://www.apache.org/licenses/ -> -> TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -> -> 1. Definitions. -> -> "License" shall mean the terms and conditions for use, reproduction, -> and distribution as defined by Sections 1 through 9 of this document. -> -> "Licensor" shall mean the copyright owner or entity authorized by -> the copyright owner that is granting the License. -> -> "Legal Entity" shall mean the union of the acting entity and all -> other entities that control, are controlled by, or are under common -> control with that entity. For the purposes of this definition, -> "control" means (i) the power, direct or indirect, to cause the -> direction or management of such entity, whether by contract or -> otherwise, or (ii) ownership of fifty percent (50%) or more of the -> outstanding shares, or (iii) beneficial ownership of such entity. -> -> "You" (or "Your") shall mean an individual or Legal Entity -> exercising permissions granted by this License. -> -> "Source" form shall mean the preferred form for making modifications, -> including but not limited to software source code, documentation -> source, and configuration files. -> -> "Object" form shall mean any form resulting from mechanical -> transformation or translation of a Source form, including but -> not limited to compiled object code, generated documentation, -> and conversions to other media types. -> -> "Work" shall mean the work of authorship, whether in Source or -> Object form, made available under the License, as indicated by a -> copyright notice that is included in or attached to the work -> (an example is provided in the Appendix below). -> -> "Derivative Works" shall mean any work, whether in Source or Object -> form, that is based on (or derived from) the Work and for which the -> editorial revisions, annotations, elaborations, or other modifications -> represent, as a whole, an original work of authorship. For the purposes -> of this License, Derivative Works shall not include works that remain -> separable from, or merely link (or bind by name) to the interfaces of, -> the Work and Derivative Works thereof. -> -> "Contribution" shall mean any work of authorship, including -> the original version of the Work and any modifications or additions -> to that Work or Derivative Works thereof, that is intentionally -> submitted to Licensor for inclusion in the Work by the copyright owner -> or by an individual or Legal Entity authorized to submit on behalf of -> the copyright owner. For the purposes of this definition, "submitted" -> means any form of electronic, verbal, or written communication sent -> to the Licensor or its representatives, including but not limited to -> communication on electronic mailing lists, source code control systems, -> and issue tracking systems that are managed by, or on behalf of, the -> Licensor for the purpose of discussing and improving the Work, but -> excluding communication that is conspicuously marked or otherwise -> designated in writing by the copyright owner as "Not a Contribution." -> -> "Contributor" shall mean Licensor and any individual or Legal Entity -> on behalf of whom a Contribution has been received by Licensor and -> subsequently incorporated within the Work. -> -> 2. Grant of Copyright License. Subject to the terms and conditions of -> this License, each Contributor hereby grants to You a perpetual, -> worldwide, non-exclusive, no-charge, royalty-free, irrevocable -> copyright license to reproduce, prepare Derivative Works of, -> publicly display, publicly perform, sublicense, and distribute the -> Work and such Derivative Works in Source or Object form. -> -> 3. Grant of Patent License. Subject to the terms and conditions of -> this License, each Contributor hereby grants to You a perpetual, -> worldwide, non-exclusive, no-charge, royalty-free, irrevocable -> (except as stated in this section) patent license to make, have made, -> use, offer to sell, sell, import, and otherwise transfer the Work, -> where such license applies only to those patent claims licensable -> by such Contributor that are necessarily infringed by their -> Contribution(s) alone or by combination of their Contribution(s) -> with the Work to which such Contribution(s) was submitted. If You -> institute patent litigation against any entity (including a -> cross-claim or counterclaim in a lawsuit) alleging that the Work -> or a Contribution incorporated within the Work constitutes direct -> or contributory patent infringement, then any patent licenses -> granted to You under this License for that Work shall terminate -> as of the date such litigation is filed. -> -> 4. Redistribution. You may reproduce and distribute copies of the -> Work or Derivative Works thereof in any medium, with or without -> modifications, and in Source or Object form, provided that You -> meet the following conditions: -> -> (a) You must give any other recipients of the Work or -> Derivative Works a copy of this License; and -> -> (b) You must cause any modified files to carry prominent notices -> stating that You changed the files; and -> -> (c) You must retain, in the Source form of any Derivative Works -> that You distribute, all copyright, patent, trademark, and -> attribution notices from the Source form of the Work, -> excluding those notices that do not pertain to any part of -> the Derivative Works; and -> -> (d) If the Work includes a "NOTICE" text file as part of its -> distribution, then any Derivative Works that You distribute must -> include a readable copy of the attribution notices contained -> within such NOTICE file, excluding those notices that do not -> pertain to any part of the Derivative Works, in at least one -> of the following places: within a NOTICE text file distributed -> as part of the Derivative Works; within the Source form or -> documentation, if provided along with the Derivative Works; or, -> within a display generated by the Derivative Works, if and -> wherever such third-party notices normally appear. The contents -> of the NOTICE file are for informational purposes only and -> do not modify the License. You may add Your own attribution -> notices within Derivative Works that You distribute, alongside -> or as an addendum to the NOTICE text from the Work, provided -> that such additional attribution notices cannot be construed -> as modifying the License. -> -> You may add Your own copyright statement to Your modifications and -> may provide additional or different license terms and conditions -> for use, reproduction, or distribution of Your modifications, or -> for any such Derivative Works as a whole, provided Your use, -> reproduction, and distribution of the Work otherwise complies with -> the conditions stated in this License. -> -> 5. Submission of Contributions. Unless You explicitly state otherwise, -> any Contribution intentionally submitted for inclusion in the Work -> by You to the Licensor shall be under the terms and conditions of -> this License, without any additional terms or conditions. -> Notwithstanding the above, nothing herein shall supersede or modify -> the terms of any separate license agreement you may have executed -> with Licensor regarding such Contributions. -> -> 6. Trademarks. This License does not grant permission to use the trade -> names, trademarks, service marks, or product names of the Licensor, -> except as required for reasonable and customary use in describing the -> origin of the Work and reproducing the content of the NOTICE file. -> -> 7. Disclaimer of Warranty. Unless required by applicable law or -> agreed to in writing, Licensor provides the Work (and each -> Contributor provides its Contributions) on an "AS IS" BASIS, -> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -> implied, including, without limitation, any warranties or conditions -> of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -> PARTICULAR PURPOSE. You are solely responsible for determining the -> appropriateness of using or redistributing the Work and assume any -> risks associated with Your exercise of permissions under this License. -> -> 8. Limitation of Liability. In no event and under no legal theory, -> whether in tort (including negligence), contract, or otherwise, -> unless required by applicable law (such as deliberate and grossly -> negligent acts) or agreed to in writing, shall any Contributor be -> liable to You for damages, including any direct, indirect, special, -> incidental, or consequential damages of any character arising as a -> result of this License or out of the use or inability to use the -> Work (including but not limited to damages for loss of goodwill, -> work stoppage, computer failure or malfunction, or any and all -> other commercial damages or losses), even if such Contributor -> has been advised of the possibility of such damages. -> -> 9. Accepting Warranty or Additional Liability. While redistributing -> the Work or Derivative Works thereof, You may choose to offer, -> and charge a fee for, acceptance of support, warranty, indemnity, -> or other liability obligations and/or rights consistent with this -> License. However, in accepting such obligations, You may act only -> on Your own behalf and on Your sole responsibility, not on behalf -> of any other Contributor, and only if You agree to indemnify, -> defend, and hold each Contributor harmless for any liability -> incurred by, or claims asserted against, such Contributor by reason -> of your accepting any such warranty or additional liability. -> -> END OF TERMS AND CONDITIONS -> -> APPENDIX: How to apply the Apache License to your work. -> -> To apply the Apache License to your work, attach the following -> boilerplate notice, with the fields enclosed by brackets "{}" -> replaced with your own identifying information. (Don't include -> the brackets!) The text should be enclosed in the appropriate -> comment syntax for the file format. We also recommend that a -> file or class name and description of purpose be included on the -> same "printed page" as the copyright notice for easier -> identification within third-party archives. -> -> Copyright [year] [fullname] -> -> Licensed under the Apache License, Version 2.0 (the "License"); -> you may not use this file except in compliance with the License. -> You may obtain a copy of the License at -> -> http://www.apache.org/licenses/LICENSE-2.0 -> -> Unless required by applicable law or agreed to in writing, software -> distributed under the License is distributed on an "AS IS" BASIS, -> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -> See the License for the specific language governing permissions and -> limitations under the License. -> From ac15ea8696ee54e905cdc6fe26f0f7cfaca0883a Mon Sep 17 00:00:00 2001 From: GiggleLiu Date: Thu, 29 Feb 2024 02:30:04 +0800 Subject: [PATCH 6/6] add CuYao and YaoPlots doc --- docs/make.jl | 1 + docs/src/man/cuda.md | 71 ++++++++++++++++++++++++++++++ docs/src/man/plot.md | 54 ++++++++++++++++++++++- ext/CuYao/README.md | 90 +++------------------------------------ ext/CuYao/src/register.jl | 20 --------- src/Yao.jl | 8 ++-- src/cudainterfaces.jl | 41 ++++++++++++++++++ 7 files changed, 175 insertions(+), 110 deletions(-) create mode 100644 docs/src/man/cuda.md create mode 100644 src/cudainterfaces.jl diff --git a/docs/make.jl b/docs/make.jl index f67cad1a..9a549392 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -58,6 +58,7 @@ const PAGES = [ "man/registers.md", "man/blocks.md", "man/symbolic.md", + "man/cuda.md", "man/plot.md", "man/automatic_differentiation.md", "man/simplification.md", diff --git a/docs/src/man/cuda.md b/docs/src/man/cuda.md new file mode 100644 index 00000000..98454f1b --- /dev/null +++ b/docs/src/man/cuda.md @@ -0,0 +1,71 @@ +```@meta +CurrentModule = Yao +``` + +# CUDA extension - CuYao + +## Tutorial +`CuYao` is a CUDA extension of Yao, which allows you to run Yao circuits on GPU. The usage of `CuYao` is similar to `Yao`, but with some extra APIs to upload and download registers between CPU and GPU: +- `cu(reg)` to upload a registe `reg` to GPU, and +- `cpu(cureg)` to download a register `cureg` from GPU to CPU. + +```julia +julia> using Yao, CUDA + +# create a register on GPU +julia> cureg = rand_state(9; nbatch=1000) |> cu; # or `curand_state(9; nbatch=1000)`. + +# run a circuit on GPU +julia> cureg |> put(9, 2=>Z); + +# measure the register on GPU +julia> measure!(cureg) +1000-element CuArray{DitStr{2, 9, Int64}, 1, CUDA.Mem.DeviceBuffer}: + 110110100 ₍₂₎ + 000100001 ₍₂₎ + 111111001 ₍₂₎ + ⋮ + 010001101 ₍₂₎ + 000100110 ₍₂₎ + +# download the register to CPU +julia> reg = cureg |> cpu; +``` + + +## Features +Supported gates: + +- general U(N) gate +- general U(1) gate +- X, Y, Z gate +- T, S gate +- SWAP gate +- control gates + +Supported register operations: + +- measure!, measure_reset!, measure_remove!, select +- append_qudits!, append_qubits! +- insert_qudit!, insert_qubits! +- focus!, relax! +- join +- density_matrix +- fidelity +- expect + +Autodiff: +- autodiff is supported when the only parameterized gates are rotation gates in a circuit. + +## API +```@docs +cpu +curand_state +cuzero_state +cuproduct_state +cuuniform_state +cughz_state +``` + +!!! note + the `cu` function is not documented in this module, but it is used to upload a register to GPU. \ No newline at end of file diff --git a/docs/src/man/plot.md b/docs/src/man/plot.md index 7c0a3d6e..7e1e9160 100644 --- a/docs/src/man/plot.md +++ b/docs/src/man/plot.md @@ -11,7 +11,59 @@ end # Quantum Circuit Visualization -Quantum circuit visualization support for Yao. +`YaoPlots` is the Quantum circuit visualization component for Yao. + + +## Examples +#### Example 1: Visualize a QBIR define in Yao + +```@example plot +using Yao.EasyBuild, YaoPlots + +# show a qft circuit +vizcircuit(qft_circuit(5)) +``` + +If you are using a Pluto/Jupyter notebook, Atom/VSCode editor, you should see the above image in your plotting panel. + +#### Example 2: Visualize a single qubit +```@example plot +using YaoPlots, Yao + +reg = zero_state(1) |> Rx(π/8) |> Rx(π/8) +rho = density_matrix(ghz_state(2), 1) + +bloch_sphere("|ψ⟩"=>reg, "ρ"=>rho; show_projection_lines=true) +``` + +See more [examples](lib/YaoPlots/examples/circuits.jl). + +### Adjusting the plot attributes + +Various attributes of the visualizations can be altered. +The plot can be modified, if we change the following attributes + +- `YaoPlots.CircuitStyles.linecolor[]` for line color, default value being `"#000000"` (black color) +- `YaoPlots.CircuitStyles.gate_bgcolor[]` for background color of square blocks, the default value being `"#FFFFFF"` (white color) +- `YaoPlots.CircuitStyles.textcolor[]` for text color, default value being `"#000000` +- `YaoPlots.CircuitStyles.lw[]` for line width, default value being `1` (pt) +- `YaoPlots.CircuitStyles.textsize[]` for text size, default value being `16` (pt) +- `YaoPlots.CircuitStyles.paramtextsize[]` for parameter text size, for parameterized gates, default value being `10` (pt) + +For example, + +```@example plot +using YaoPlots, Yao +YaoPlots.CircuitStyles.linecolor[] = "pink" +YaoPlots.CircuitStyles.gate_bgcolor[] = "yellow" +YaoPlots.CircuitStyles.textcolor[] = "#000080" # the navy blue color +YaoPlots.CircuitStyles.fontfamily[] = "JuliaMono" +YaoPlots.CircuitStyles.lw[] = 2.5 +YaoPlots.CircuitStyles.textsize[] = 13 +YaoPlots.CircuitStyles.paramtextsize[] = 8 + +vizcircuit(chain(3, put(1=>X), repeat(3, H), put(2=>Y), repeat(3, Rx(π/2)))) +``` ## Circuit Visualization ```@docs diff --git a/ext/CuYao/README.md b/ext/CuYao/README.md index 4184db5c..becb9632 100644 --- a/ext/CuYao/README.md +++ b/ext/CuYao/README.md @@ -1,86 +1,8 @@ -**Build status**: [![][gitlab-img]][gitlab-url] - -[gitlab-img]: https://gitlab.com/JuliaGPU/CuYao.jl/badges/master/pipeline.svg -[gitlab-url]: https://gitlab.com/JuliaGPU/CuYao.jl/pipelines - -CUDA support for [Yao.jl](https://github.com/QuantumBFS/Yao.jl). - -**Only tested locally, expect some adventures and rough edges.** - -## Installation - -

-CuYao is a   - - - Julia Language - -   package. It provides CUDA support for Yao.jl. To install CuYao, - please open - Julia's interactive session (known as REPL) and press ] key in the REPL to use the package mode, then type the following command -

- -For stable release - -```julia -pkg> add CuYao +## How to run tests + +Open a terminal, switch to Yao folder and run the following commands: +```bash +make init-CuYao +make test-CuYao ``` -For current master - -```julia -pkg> add CuYao#master -``` - -You don't need to install Yao if you have `CuYao` installed. They share the same API except CUDA backend. - -## Documentation - -For documentation of [Yao](https://github.com/QuantumBFS/Yao.jl), please refer to [documentation (stable)](https://quantumbfs.github.io/Yao.jl/stable). - -`CuYao.jl` provides only two extra APIs, `reg |> cu` to upload a register to GPU, and `cureg |> cpu` to download a register to CPU. - -To start, see the following example -```julia -using CuYao - -cureg = rand_state(9; nbatch=1000) |> cu # or `curand_state(9; nbatch=1000)`. -cureg |> put(9, 2=>Z) -measure!(cureg |> append_qubits!(1) |> focus!(4,1,3)) -cureg |> relax!(4,1,3) |> cpu -``` - -Constructors `curand_state`, `cuzero_state`, `cuproduct_state`, `cuuniform_state` and `cughz_state` are tailored for GPU, -they are faster than uploading a CPU register to CPU. - -## Features -### Supported Gates - -- general U(N) gate -- general U(1) gate -- better X, Y, Z gate -- better T, S gate -- SWAP gate -- better control gates -- BP diff blocks - -### Supported Register Operations -- measure!, measure_reset!, measure_remove!, select -- append_qudits!, append_qubits! -- insert_qudit!, insert_qubits! -- focus!, relax! -- join -- density_matrix -- fidelity (not including density matrix) -- expect - -### Other Operations -- autodiff is supported when the only parameterized gates are rotation gates in a circuit. - -## The Team - -This project is an effort of QuantumBFS, an open source organization for quantum science. Yao is currently maintained by [Xiu-Zhe (Roger) Luo](https://github.com/Roger-luo) and [Jin-Guo Liu](https://github.com/GiggleLiu) with contributions from open source community. All the contributors are listed in the [contributors](https://github.com/QuantumBFS/Yao.jl/graphs/contributors). - -## License - -**CuYao** is released under the Apache 2 license. diff --git a/ext/CuYao/src/register.jl b/ext/CuYao/src/register.jl index cc493e94..01c83cbc 100644 --- a/ext/CuYao/src/register.jl +++ b/ext/CuYao/src/register.jl @@ -189,11 +189,6 @@ function Yao.insert_qudits!(reg::AbstractCuArrayReg{D}, loc::Int; nqudits::Int=1 return reg end -""" - cuproduct_state([T=ComplexF64], total::Int, bit_config::Integer; nbatch=NoBatch()) - -The GPU version of [`product_state`](@ref). -""" cuproduct_state(bit_str::BitStr; nbatch::Union{NoBatch,Int} = NoBatch()) = cuproduct_state(ComplexF64, bit_str; nbatch = nbatch) cuproduct_state(bit_str::AbstractVector; nbatch::Union{NoBatch,Int} = NoBatch()) = @@ -219,11 +214,6 @@ end cuzero_state(n::Int; kwargs...) = cuzero_state(ComplexF64, n; kwargs...) cuzero_state(::Type{T}, n::Int; kwargs...) where {T} = cuproduct_state(T, n, 0; kwargs...) -""" - curand_state([T=ComplexF64], n::Int; nbatch=1) - -The GPU version of [`rand_state`](@ref). -""" curand_state(n::Int; kwargs...) = curand_state(ComplexF64, n; kwargs...) function curand_state( @@ -236,11 +226,6 @@ function curand_state( return normalize!(arrayreg(raw; nbatch=nbatch, nlevel=nlevel)) end -""" - cuuniform_state([T=ComplexF64], n::Int; nbatch=1) - -The GPU version of [`uniform_state`](@ref). -""" cuuniform_state(n::Int; kwargs...) = cuuniform_state(ComplexF64, n; kwargs...) function cuuniform_state(::Type{T}, n::Int; nbatch::Union{Int,NoBatch} = NoBatch(), @@ -250,11 +235,6 @@ function cuuniform_state(::Type{T}, n::Int; return normalize!(arrayreg(raw; nbatch=nbatch, nlevel=nlevel)) end -""" - cughz_state([T=ComplexF64], n::Int; nbatch=1) - -The GPU version of [`ghz_state`](@ref). -""" cughz_state(n::Int; kwargs...) = cughz_state(ComplexF64, n; kwargs...) function cughz_state(::Type{T}, n::Int; kwargs...) where {T} reg = cuzero_state(T, n; kwargs...) diff --git a/src/Yao.jl b/src/Yao.jl index 7b1deac1..87d1ac4b 100644 --- a/src/Yao.jl +++ b/src/Yao.jl @@ -29,12 +29,10 @@ using YaoBlocks: export EasyBuild # CUDA APIs -for FT in [:cpu, :cuzero_state, :cuuniform_state, :curand_state, :cuproduct_state, :cughz_state] - @eval export $FT - @eval function $FT end -end +export cpu, cuzero_state, cuuniform_state, curand_state, cuproduct_state, cughz_state -include("deprecations.jl") +include("cudainterfaces.jl") include("EasyBuild/easybuild.jl") +include("deprecations.jl") end # module diff --git a/src/cudainterfaces.jl b/src/cudainterfaces.jl new file mode 100644 index 00000000..335ba000 --- /dev/null +++ b/src/cudainterfaces.jl @@ -0,0 +1,41 @@ +""" + cuproduct_state([T=ComplexF64], total::Int, bit_config::Integer; nbatch=NoBatch()) + +The GPU version of [`product_state`](@ref). +""" +function cuproduct_state end + +""" + curand_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`rand_state`](@ref). +""" +function curand_state end + +""" + cuzero_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`zero_state`](@ref). +""" +function cuzero_state end + +""" + cuuniform_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`uniform_state`](@ref). +""" +function cuuniform_state end + +""" + cughz_state([T=ComplexF64], n::Int; nbatch=1) + +The GPU version of [`ghz_state`](@ref). +""" +function cughz_state end + +""" + cpu(cureg) + +Download the register state from GPU to CPU. +""" +function cpu end \ No newline at end of file