mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge branch 'master' into dx12
This commit is contained in:
commit
619fc8d4eb
63
Cargo.lock
generated
63
Cargo.lock
generated
|
@ -610,7 +610,19 @@ checksum = "5eb167c1febed0a496639034d0c76b3b74263636045db5489eee52143c246e73"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"jni-sys",
|
"jni-sys",
|
||||||
"ndk-sys",
|
"ndk-sys",
|
||||||
"num_enum",
|
"num_enum 0.4.3",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ndk"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8794322172319b972f528bf90c6b467be0079f1fa82780ffb431088e741a73ab"
|
||||||
|
dependencies = [
|
||||||
|
"jni-sys",
|
||||||
|
"ndk-sys",
|
||||||
|
"num_enum 0.5.1",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -623,7 +635,21 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
"ndk",
|
"ndk 0.2.1",
|
||||||
|
"ndk-macro",
|
||||||
|
"ndk-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ndk-glue"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c5caf0c24d51ac1c905c27d4eda4fa0635bbe0de596b8f79235e0b17a4d29385"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
|
"log",
|
||||||
|
"ndk 0.3.0",
|
||||||
"ndk-macro",
|
"ndk-macro",
|
||||||
"ndk-sys",
|
"ndk-sys",
|
||||||
]
|
]
|
||||||
|
@ -687,7 +713,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ca565a7df06f3d4b485494f25ba05da1435950f4dc263440eda7a6fa9b8e36e4"
|
checksum = "ca565a7df06f3d4b485494f25ba05da1435950f4dc263440eda7a6fa9b8e36e4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"derivative",
|
"derivative",
|
||||||
"num_enum_derive",
|
"num_enum_derive 0.4.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_enum"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "226b45a5c2ac4dd696ed30fa6b94b057ad909c7b7fc2e0d0808192bced894066"
|
||||||
|
dependencies = [
|
||||||
|
"derivative",
|
||||||
|
"num_enum_derive 0.5.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -702,6 +738,18 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_enum_derive"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1c0fd9eba1d5db0994a239e09c1be402d35622277e35468ba891aa5e3188ce7e"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro-crate",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "objc"
|
name = "objc"
|
||||||
version = "0.2.7"
|
version = "0.2.7"
|
||||||
|
@ -773,11 +821,15 @@ name = "piet-gpu"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
|
"ndk 0.3.0",
|
||||||
|
"ndk-glue 0.3.0",
|
||||||
|
"ndk-sys",
|
||||||
"piet",
|
"piet",
|
||||||
"piet-gpu-hal",
|
"piet-gpu-hal",
|
||||||
"piet-gpu-types",
|
"piet-gpu-types",
|
||||||
"png",
|
"png",
|
||||||
"rand",
|
"rand",
|
||||||
|
"raw-window-handle",
|
||||||
"roxmltree",
|
"roxmltree",
|
||||||
"winit",
|
"winit",
|
||||||
]
|
]
|
||||||
|
@ -797,7 +849,6 @@ version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ash",
|
"ash",
|
||||||
"ash-window",
|
"ash-window",
|
||||||
"once_cell",
|
|
||||||
"raw-window-handle",
|
"raw-window-handle",
|
||||||
"winapi 0.3.9",
|
"winapi 0.3.9",
|
||||||
"wio",
|
"wio",
|
||||||
|
@ -1301,8 +1352,8 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"mio",
|
"mio",
|
||||||
"mio-extras",
|
"mio-extras",
|
||||||
"ndk",
|
"ndk 0.2.1",
|
||||||
"ndk-glue",
|
"ndk-glue 0.2.1",
|
||||||
"ndk-sys",
|
"ndk-sys",
|
||||||
"objc",
|
"objc",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
|
|
21
README.md
21
README.md
|
@ -12,21 +12,15 @@ The main goal is to answer research questions about the future of 2D rendering:
|
||||||
|
|
||||||
* To what extent do "advanced" GPU features (subgroups, descriptor arrays) help?
|
* To what extent do "advanced" GPU features (subgroups, descriptor arrays) help?
|
||||||
|
|
||||||
|
* Can we improve quality and extend the imaging model in useful ways?
|
||||||
|
|
||||||
Another goal is to explore a standards-based, portable approach to GPU compute.
|
Another goal is to explore a standards-based, portable approach to GPU compute.
|
||||||
|
|
||||||
## Non-goals
|
## Blogs and other writing
|
||||||
|
|
||||||
There are a great number of concerns that need to be addressed in production:
|
Much of the research progress on piet-gpu is documented in blog entries. See [doc/blogs.md](doc/blogs.md) for pointers to those.
|
||||||
|
|
||||||
* Compatibility with older graphics hardware (including runtime detection)
|
There is a much larger and detailed [vision](doc/vision.md) that explains the longer-term goals of the project, and how we might get there.
|
||||||
|
|
||||||
* Asynchrony
|
|
||||||
|
|
||||||
* Swapchains and presentation
|
|
||||||
|
|
||||||
## Notes
|
|
||||||
|
|
||||||
A more detailed explanation will come. But for now, a few notes. Also refer to [Fast 2D rendering on GPU] and linked blog posts for more information.
|
|
||||||
|
|
||||||
### Why not gfx-hal?
|
### Why not gfx-hal?
|
||||||
|
|
||||||
|
@ -36,7 +30,7 @@ The hal layer in this repo is strongly inspired by gfx-hal, but with some differ
|
||||||
|
|
||||||
### Why not wgpu?
|
### Why not wgpu?
|
||||||
|
|
||||||
The case for wgpu is also strong, but it's even less mature. I'd love to see it become a solid foundation, at which point I'd use it as the main integration with [druid].
|
The case for wgpu is also strong, but it's even less mature. I'd love to see it become a solid foundation, at which point I'd use it as the main integration with [Druid].
|
||||||
|
|
||||||
In short, the goal is to facilitate the research now, collect the data, and then use that to choose a best path for shipping later.
|
In short, the goal is to facilitate the research now, collect the data, and then use that to choose a best path for shipping later.
|
||||||
|
|
||||||
|
@ -56,7 +50,6 @@ Contributions are welcome by pull request. The [Rust code of conduct] applies.
|
||||||
[SPIRV-Cross]: https://github.com/KhronosGroup/SPIRV-Cross
|
[SPIRV-Cross]: https://github.com/KhronosGroup/SPIRV-Cross
|
||||||
[Shader Model 6]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
|
[Shader Model 6]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
|
||||||
[DXC]: https://github.com/microsoft/DirectXShaderCompiler
|
[DXC]: https://github.com/microsoft/DirectXShaderCompiler
|
||||||
[druid]: https://github.com/xi-editor/druid
|
[Druid]: https://github.com/xi-editor/druid
|
||||||
[Unlicense]: https://unlicense.org/
|
[Unlicense]: https://unlicense.org/
|
||||||
[Rust code of conduct]: https://www.rust-lang.org/policies/code-of-conduct
|
[Rust code of conduct]: https://www.rust-lang.org/policies/code-of-conduct
|
||||||
[Fast 2D rendering on GPU]: https://raphlinus.github.io/rust/graphics/gpu/2020/06/13/fast-2d-rendering.html
|
|
||||||
|
|
14
doc/blogs.md
Normal file
14
doc/blogs.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Blogs and writing
|
||||||
|
|
||||||
|
Much of the research progress on piet-gpu is documented in blog entries. Here are the most relevant:
|
||||||
|
|
||||||
|
* [Fast 2D rendering on GPU](https://raphlinus.github.io/rust/graphics/gpu/2020/06/13/fast-2d-rendering.html), Jun 13, 2020
|
||||||
|
* [A sort-middle architecture for 2D graphics](https://raphlinus.github.io/rust/graphics/gpu/2020/06/12/sort-middle.html), Jun 12, 2020
|
||||||
|
* [piet-gpu progress report](https://raphlinus.github.io/rust/graphics/gpu/2020/06/01/piet-gpu-progress.html), Jun 1, 2020
|
||||||
|
* [2D Graphics on Modern GPU](https://raphlinus.github.io/rust/graphics/gpu/2019/05/08/modern-2d.html), May 8, 2019
|
||||||
|
|
||||||
|
There are some posts more general to GPU compute programming that might be of some interest:
|
||||||
|
|
||||||
|
* [The stack monoid](https://raphlinus.github.io/gpu/2020/09/05/stack-monoid.html), Sep 5, 2020
|
||||||
|
* [Prefix sum on Vulkan](https://raphlinus.github.io/gpu/2020/04/30/prefix-sum.html), Apr 30, 2020
|
||||||
|
* [GPU resources](https://raphlinus.github.io/gpu/2020/02/12/gpu-resources.html), Feb 12, 2020
|
183
doc/vision.md
Normal file
183
doc/vision.md
Normal file
|
@ -0,0 +1,183 @@
|
||||||
|
## The piet-gpu vision
|
||||||
|
|
||||||
|
Raph Levien, 2020-12-10
|
||||||
|
|
||||||
|
I’ve done several [blog posts](./blogs.md) about piet-gpu already, and more generally GPU compute, but this document is a little different in scope. Rather than showing off a prototype and presenting a research result, it will set forth a bold and ambitious plan for where this might go. I find this vision compelling, and it’s motivated me to spend a lot of energy mastering some difficult material. The grand vision is much more than one person can do, so I’ll do some of it myself and maybe inspire collaboration for the rest of it.
|
||||||
|
|
||||||
|
The full vision for piet-gpu is a 2D rendering engine that is considerably faster, higher quality, and more flexible than the current state of the art, and runs on a wide variety of hardware. I’ll go into some detail about why I think this goal is possible and what kind of work is needed to get there.
|
||||||
|
|
||||||
|
The current state of the piet-gpu codebase is an early stage prototype, largely to test whether the ideas are viable and to gather empirical performance data on some of the more intensive parts of the rendering problem, so far mainly antialiased vector filling and stroking.
|
||||||
|
|
||||||
|
## Compute-based 2D rendering
|
||||||
|
|
||||||
|
The central theme of piet-gpu is to do most or all of the rendering steps in compute shaders. This is quite a different philosophy to the traditional rasterization-based approach to 2D rendering, which breaks the scene (on the CPU side) into a series of draw calls, which are then sent to the GPU. This works extremely well when the mapping to draw calls is simple (which is the case for imgui-style UI made up of text and simple graphic elements), but otherwise much less so. In using GPU compute extensively, piet-gpu draws much inspiration from [Spinel].
|
||||||
|
|
||||||
|
Using compute shaders has profound effects at two particular stages in the pipeline. First, in early stages, it lets the GPU ingest a scene description that is, as much as possible, a straightforward binary encoding of the scene. That, in turn, makes the CPU-side part of the job simple and efficient, allowing higher frame rates on complex scenes without jank.
|
||||||
|
|
||||||
|
Second, in the last stage (“fine rasterization”), compositing takes place within the compute shader, using vector registers rather than texture buffers in global memory for intermediate RGBA values.
|
||||||
|
|
||||||
|
Note that the benefits depend on the scene. For a static (or mostly static) scene, the CPU-side encoding cost might not matter much because it can be done ahead of time. Similarly, if the scene doesn’t require sophisticated compositing, but is just a series of alpha-blended draws, existing rasterization pipelines can handle those very efficiently. But piet-gpu should fly with dynamic scenes with lots of masking and blending, where existing 2D engines would struggle.
|
||||||
|
|
||||||
|
The intermediate stages benefit too. The coarse rasterization step can employ sophisticated logic to enable optimizations on a per-tile granularity that would otherwise rely on brute force.
|
||||||
|
|
||||||
|
## Retained scene graph fragments
|
||||||
|
|
||||||
|
Applications vary in their degree of dynamism. At one extreme, the scene is mostly static, with perhaps a few variable elements and perhaps some animation done at compositing time (I think of this as the iPhone style of UI, as it’s so well adapted to mechanisms like Core Animation). At the other extreme, every rendered frame is completely different from the one before, so encoding needs to be done entirely from scratch every time; these applications are well adapted to an “immediate mode” approach.
|
||||||
|
|
||||||
|
I’m most interested in cases in the middle. I believe the best approach is to split the encoding process so that the static parts of the scene graph can be encoded once into a retained scene graph fragment, then these fragments can be stitched together, along with the dynamically encoded parts of the scene, with a minimum of CPU effort.
|
||||||
|
|
||||||
|
Much of the piet-gpu architecture is geared towards supporting this goal. Notably, the global affine transformation is not baked into the encoding of vector paths, so the same binary encoding of a vector path can be instanced (perhaps multiple times within a scene) with different transforms. Applying the transform is done GPU-side, early in the [pipeline][sort-middle architecture]. Thus, animating the transform should be very efficient, and the vector paths will be re-rendered at full resolution with vector crispness.
|
||||||
|
|
||||||
|
Even so, fully realizing retained scene graph fragments will be one of the more difficult parts of the vision. It requires a good API to represent retained fragments, as well as incrementally update parameters such as transformation and opacity. It also requires a sophisticated approach to resource management so that resources backing the retained fragments can be efficiently cached GPU-side without hogging relatively scarce GPU memory. As such, I will focus on immediate mode first, as that is also an important case. But make no mistake, the goal of retaining scene fragments is motivating a number of design decisions, in particular leading me away from shortcuts such as applying affine transforms CPU-side during encoding.
|
||||||
|
|
||||||
|
## Portable compute runtime
|
||||||
|
|
||||||
|
One challenge facing piet-gpu is the lack of adequate infrastructure for portable GPU compute. Most research is done on CUDA, as that is the only truly viable platform for GPU compute today, but that would make it essentially impossible to deploy the work on any hardware other than Nvidia.
|
||||||
|
|
||||||
|
I strongly believe that Vulkan is emerging as a viable low-level platform for utilizing GPU compute resources. I’m also not the only one thinking along these lines. The [VkFFT] project is an impressive demonstration that a Vulkan deployment of one math-intensive algorithm can be just as performant as the CUDA version. In addition, there are early steps toward running machine learning workloads on Vulkan, particularly TensorFlow Lite.
|
||||||
|
|
||||||
|
Of course, while it’s possible to run Vulkan on a pretty wide range of hardware, it doesn’t solve all portability problems. “Runs Vulkan” is not a binary, but rather a portal to a vast matrix of optional features and limits from the various combinations of hardware, drivers, and compatibility shims ([vulkan.gpuinfo.org] is an excellent resource). In particular, Apple forces the use of Metal. In theory, MoltenVk — or, more generally, the [Vulkan Portability Extension] — lets you run Vulkan code on Apple hardware, but in practice it doesn’t quite work (see [#42]), and there are compatibility and integration advantages to DX12 over Vulkan on Windows; older CPU generations such as Haswell and Broadwell don’t support Vulkan at all. To this end, I’ve started a portability layer (piet-gpu-hal) which should be able to run natively on these other API’s.
|
||||||
|
|
||||||
|
### Why not wgpu?
|
||||||
|
|
||||||
|
The compatibility layer has overlapping goals as [wgpu], and WebGPU more broadly. Why not just use that, as much of the Rust ecosystem has done?
|
||||||
|
|
||||||
|
It’s *very* tempting, but there is also some divergence of goals. The main one is that to keep the piet-gpu runtime light and startup time quick, I really want to do ahead-of-time compilation of shaders, so that the binary embeds intermediate representation for the target platform (DXIL for Windows 10, etc). Further, by using Vulkan directly, we can experiment with advanced features such as subgroups, the memory model, etc., which are not yet well supported in wgpu, though it certainly would be possible to add these features. I don’t know how much these advanced features contribute, but that’s one of the research questions to be addressed. If the gain is modest, then implementing them is a low priority. If the gain is significant, then that should increase motivation for runtimes such as wgpu to include them.
|
||||||
|
|
||||||
|
Also see the section on incremental present, below, which is another feature that is not yet well supported in wgpu, so working with lower level APIs should reduce the friction.
|
||||||
|
|
||||||
|
At the same time, wgpu continues to improve, including focus on making the runtime leaner (using the new [naga] shader compilation engine rather than spirv-cross is one such advance). My sense is this: a primary reason for piet-gpu to have its own compatibility layer is so that we can really clarify and sharpen the requirements for a more general GPU compute runtime.
|
||||||
|
|
||||||
|
### Compatibility fallback
|
||||||
|
|
||||||
|
One challenge of a compute-centric approach is that there is not (yet) an ironclad guarantee that the GPU and drivers will actually be able to handle the compute shaders and resource management patterns (the latter may actually be more of a challenge, as piet-gpu relies on [descriptor indexing] to address multiple images during fine rasterization).
|
||||||
|
|
||||||
|
There are a number of approaches to this problem, including building hybrid pipelines and otherwise doing lots of compatibility engineering, to target platforms well on their way to becoming obsolete. But I worry quite a bit about the complexity burden, as well as pressure away from the absolute best solution to a problem if it poses compatibility challenges.
|
||||||
|
|
||||||
|
I’m more inclined to fall back to CPU rendering. Projects such as [Blend2D] show that CPU rendering can be performant, though nowhere nearly as much as a GPU. Of course, that means coming up with CPU implementations of the algorithms.
|
||||||
|
|
||||||
|
One intriguing possibility is to automatically translate the Vulkan compute shaders to CPU runnable code. This approach has the advantage of maintaining one codebase for the pipeline, reducing friction for adding new features, and guaranteeing pixel-perfect consistency. The biggest question is whether such an approach would be adequately performant. A very good way to get preliminary answers is to use [SwiftShader] or Mesa’s [Lavapipe], which do JIT generation of CPU side code. Obviously, for reasons of startup time and binary size it would be better to ship ahead-of-time translated shaders, but that’s a practical rather than conceptual problem.
|
||||||
|
|
||||||
|
There are examples of compile time translation of shaders to CPU code. An intriguing possibility is the [spirv to ispc translator], which doesn’t seem to be actively developed, but would seem to be a path to reasonably good CPU performance from shaders. Another, actually used in production in WebRender, is [glsl-to-cxx].
|
||||||
|
|
||||||
|
A truly universal compute infrastructure with unified shader source would have implications far beyond 2D rendering. The domain most likely to invest in this area is AI (deployment to consumer hardware; for server side and in-house deployment, they’ll obviously just use CUDA and neural accelerators). I’ll also note that this problem is ostensibly within scope of OpenCL, but they have so far failed to deliver, largely because they’ve historically been entirely dependent on driver support from the GPU manufacturer. I expect *something* to happen.
|
||||||
|
|
||||||
|
There is another perfectly viable path this could take, less dependent on shader compilation infrastructure: a software renderer developed in parallel with the GPU one. Possible existing Rust code bases to draw on include [raqote] and [tiny-skia]. These make more sense as community sub-projects (see below).
|
||||||
|
|
||||||
|
## Text
|
||||||
|
|
||||||
|
An essential part of any 2D library is text rendering. This really breaks down into text layout and painting of glyphs. Both are important to get right.
|
||||||
|
|
||||||
|
The Piet of today is primarily an abstraction layer over platform 2D graphics libraries, and that’s equally true of text. We’ve lately made some really good progress in a common [rich text API] and implementations over DirectWrite and Core Text. However, it is currently lacking a Linux backend. (As a placeholder, we use the Cairo “toy text API,” but that is unsatisfying for a number of reasons.)
|
||||||
|
|
||||||
|
I think we want to move away from abstracting over platform capabilities, for several reasons. One is that it’s harder to ensure consistent results. Another is that it’s hard to add new features, such as hz-style justification (see below). Thus, we follow a similar trajectory as Web browsers.
|
||||||
|
|
||||||
|
As a project related to piet-gpu, I’d love to build (or mentor someone to build) a text layout engine, in Rust, suitable for most UI work. This wouldn’t be my first time; I wrote the original version of [Minikin], the text layout engine first shipped in Android Lollipop.
|
||||||
|
|
||||||
|
### Painting
|
||||||
|
|
||||||
|
Ultimately, I’d like piet-gpu to support 3 sources of glyph data for painting.
|
||||||
|
|
||||||
|
The first is bitmaps produced by the platform. These have the advantage of matching native UI, and also take maximum advantage of hinting and subpixel RGB rendering, thus improving contrast and clarity. These bitmaps would be rendered mostly CPU-side, and uploaded into a texture atlas. The actual rasterization is just texture lookups, and should be super efficient.
|
||||||
|
|
||||||
|
The second is dynamic vector rendering from glyph outlines. This source is best optimized for large text, animation (including supporting pinch-to-zoom style gestures), and possible extension into 3D, including VR and AR. The lack of hinting and RGB subpixel rendering is not a serious issue on high-dpi screens, and is not an expectation on mobile. Early measurements from piet-gpu suggest that it should be possible to maintain 60fps of text-heavy scenes on most GPUs, but power usage might not be ideal.
|
||||||
|
|
||||||
|
Thus, the third source is vector rendering through a glyph cache, something of a hybrid of the first two sources. Originally, management of the cache will be CPU-side, and managed during encoding (likely using [Guillotière], [Étagère], or something similar), but in the future we might explore GPU-side algorithms to manage the cache in parallel, reducing CPU requirements further.
|
||||||
|
|
||||||
|
### GPU-side variable fonts
|
||||||
|
|
||||||
|
A very intriguing possibility is to offload most of the work of rendering variable fonts to GPU. There are reasons to believe this would work well: [variable font technology] is fundamentally based on multiplying vectors of “deltas” with basis functions and adding those up, a task ideally suited to GPU.
|
||||||
|
|
||||||
|
A challenge is representing the coordinate data and deltas in a GPU-friendly format; the [glyf] and [gvar] table formats are designed for compact data representation and (reasonably) simple decoding by scalar CPUs, but are challenging for massively parallel algorithms. Decoding to fixed-size numbers is straightforward but might use a lot of GPU memory and bandwidth to represent the font data (especially a problem for CJK fonts). One intriguing approach is to re-encode the underlying data using a self-synchronizing variable integer encoding, which would reduce the memory requirements but preserve the ability to do processing in parallel.
|
||||||
|
|
||||||
|
The major advantages of GPU-side variable font rendering are to allow efficient animation of variable font axes, and also to open up the possibility of adjusting the axes to improve text layout, for example to improve the quality of paragraph justification as pioneered by the [hz] prototype and recently demonstrated with [amstelvar], or to support calligraphic styles and complex scripts better, for example to make more beautiful [kashida] for Arabic, all without significantly reducing performance.
|
||||||
|
|
||||||
|
## Improving rendering quality
|
||||||
|
|
||||||
|
The question of quality in GPU 2D rendering has long been complex. Many rasterization based approaches are dependent on [MSAA] in the GPU’s fixed-function pipeline, which may not always be available or perhaps only practical at lower settings (especially on mobile). Thus, GPU accelerated 2D rendering quality has gotten something of a bad name.
|
||||||
|
|
||||||
|
A compute-centric approach changes the story. All actual pixels are generated by code; the quality of the rendering is entirely up to the author of that code. The current piet-gpu codebase uses an exact-area approach to antialiasing (in the [tradition of libart]), and thus does not exhibit stepping or graininess characteristic of MSAA at low or medium settings. The quality should be the same as a good software renderer, because it *is* a software renderer, just one that happens to be running on hardware with orders of magnitude more parallelism than any reasonable CPU.
|
||||||
|
|
||||||
|
Even so, I believe it’s possible to do even better. A CPU-bound renderer has barely enough performance to get pixels to the screen, so takes whatever shortcuts are needed to get the job done in that performance budget. A GPU typically has an order of magnitude more raw compute bandwidth, so there is headroom that can be used to improve quality.
|
||||||
|
|
||||||
|
The details of what I have in mind could be a blog post in and of itself, but I’ll sketch out the highlights.
|
||||||
|
|
||||||
|
Perhaps the most important quality problem is that of so-called “conflation artifacts,” the seams that happen when compositing antialiased elements (see [#49]). Most of the academic literature on 2D rendering on GPU addresses this question. I think it’s practical to do in the piet-gpu architecture, basically by swapping out soft-alpha compositing in the fine rasterizer with one based on supersampling. Some of the academic literature also takes the opportunity at that stage in the pipeline to apply a reconstruction filter more sophisticated than a box filter, but I am not yet convinced that the improvement is worth it, especially as physical display resolution increases.
|
||||||
|
|
||||||
|
The next major area of potential quality improvement is getting gamma right. This is a surprisingly tricky area, as a theoretically “correct” approach to gamma often yields text and hairline strokes that appear weak and spindly. Another concern is document compatibility; simply changing the gamma of the colorspace in which alpha blending happens will change the color of the result. Likely, a perfect solution to this problem will require cooperation with the application driving the renderer; if it is designed with gamma-perfect rendering in mind, there is no real problem, but otherwise it’s likely that various heuristics will need to be applied to get good results. (Note that [stem darkening] is one approach used specifically for text rendering, and among other things is a source of considerable variation between platforms.)
|
||||||
|
|
||||||
|
When driving low-dpi displays (which still exist), one opportunity to improve quality is more sophisticated RGB [subpixel rendering]. Currently, that’s basically text-only, but could be applied to vector rendering as well, and often doesn’t survive sophisticated compositing, as an RGBA texture with a transparent background cannot represent RGB subpixel text. One solution is to do compositing with per-channel alpha, which can be done very efficiently when compositing in a compute shader, but would be a serious performance problem if intermediate texture buffers needed to be written out to global memory.
|
||||||
|
|
||||||
|
These potential quality improvements may well provide the answer to the question, “why move to a new rendering architecture instead of incrementally improving what we’ve got now?”
|
||||||
|
|
||||||
|
## Enriching the imaging model
|
||||||
|
|
||||||
|
There is consensus on “the modern 2D imaging model,” roughly encompassing PDF, SVG, HTML Canvas, and Direct2D, but it is not set in stone and with considerable variation in advanced features within those systems (for example, gradient meshes are more or less unique to PDF — the feature was proposed for SVG 2 but [then removed](http://libregraphicsworld.org/blog/entry/gradient-meshes-and-hatching-to-be-removed-from-svg-2-0)).
|
||||||
|
|
||||||
|
I like this consensus 2D imaging model because I feel it is extremely well suited for UI and documents of considerable richness and complexity, and is quite designer-friendly. There is also tension pulling away from it, I think for two reasons. One is that it is not always implemented efficiently on GPU, especially with deeply nested soft clipping and other nontrivial compositing requirements. The other is that it’s possible to do things on GPU (especially using custom shaders) that are not easily possible with the standard 2D api. Shadertoy shows *many* things that are possible in shaders. One idea I’d like to explore is watercolor brush strokes (see [Computer-Generated Watercolor](https://grail.cs.washington.edu/projects/watercolor/paper_small.pdf) for inspiration). I think it would be possible to get pretty far with distance fields and procedural noise, and a simple function to go from those to paint values for paint-like compositing.
|
||||||
|
|
||||||
|
Another direction the imaging model should go is support for [HDR] (strong overlap with the gamma issue above). This will require color transformations for tone mapping in the compositing pipeline, which again can be written as shaders.
|
||||||
|
|
||||||
|
One interesting existing 2D engine with extension points is Direct2D, which lets users provide [Custom effects](https://docs.microsoft.com/en-us/windows/win32/direct2d/custom-effects) by linking in compute shaders. Of course, it is a major challenge to make such a thing portable, but I’m encouraged about building on existing GPU infrastructure efforts. In particular, over time, I think WebGPU could become a standard way to provide such an extension point portably.
|
||||||
|
|
||||||
|
Blurs are a specific case that should probably be done early, as they’re very widely used in UI. In the general case, it will require allocating temporary buffers for the contents being blurred, which is not exactly in the spirit of piet-gpu compositing, largely because it requires a lot of resource management and pipeline building CPU-side, but is possible. I’ve already done research on a special case, a [blurred rounded rectangle], which can be computed extremely efficiently as a fairly simple shader. The encoder would apply a peephole-like optimization during encoding time, pattern matching the blurred contents and swapping in the more efficient shader when possible.
|
||||||
|
|
||||||
|
## Incremental present
|
||||||
|
|
||||||
|
In the old days, UI tracked “dirty rectangles,” and only redrew what actually changed, as computers just weren’t fast enough to redraw the entire screen contents in a single refresh period. Games, on the other hand, need to redraw every pixel every frame, so the GPU pipeline became optimized for those, and many rendering engines got more relaxed about avoiding redrawing, as the GPU was plenty fast for that.
|
||||||
|
|
||||||
|
Today, the GPU is still plenty fast, but there are still gains to be had from incremental present, primarily power consumption. Blinking a cursor in a text editor should not run the battery down. Also, on low resource devices, incremental present can reduce latency and increase the chance of smooth running without dropped frames.
|
||||||
|
|
||||||
|
The tile-based architecture of piet-gpu is extremely well suited to incremental present, as the various pipeline stages are optimized to only do work within the viewport (render region). This is especially true for fine rasterization, which doesn’t touch any work outside that region.
|
||||||
|
|
||||||
|
A small challenge is support by the GPU infrastructure, which tends to be more optimized for games than UI. DirectX has long had [good support](https://docs.microsoft.com/en-us/windows/win32/api/dxgi1_2/nf-dxgi1_2-idxgiswapchain1-present1). The Vulkan world is spottier, as it’s available as an extension. That extension tends to be available on Linux (largely because [Gnome can make good use of it](https://feaneron.com/2019/10/05/incremental-present-in-gtk4/)), and some on Android, but in my experiments less so on desktop. And of course Metal can’t do it at all.
|
||||||
|
|
||||||
|
## Roadmap and community
|
||||||
|
|
||||||
|
This vision is *very* ambitious. There’s no way one person could do it all in a reasonable amount of time. It’s a multi-year project at best, and that’s not counting the year and a half since the first piet-metal prototype.
|
||||||
|
|
||||||
|
There are a few ways I plan to deal with this. First is to be explicit that it is a research project. That means that certain elements, especially dealing with compatibility, are a lower priority. Other projects in a similar space have sunk a lot of time and energy into working around driver bugs and dealing with the complex landscape of GPU capability diversity (especially on older devices and mobile). The initial goal is to prove that the concepts work on a reasonably modern GPU platform.
|
||||||
|
|
||||||
|
Another strategy is to split up the work so that at least some parts can be taken up by the community. There are a number of interesting subprojects. Also, it would be wonderful for the runtime work to be taken up by another project, as most of it is not specific to the needs of 2D rendering.
|
||||||
|
|
||||||
|
I’d really like to build a good open-source community around piet-gpu, and that’s already starting to happen. The #gpu stream on [xi.zulipchat.com] hosts some really interesting discussions. In addition, the [gio] project is exploring adopting the compute shaders of piet-gpu (with the CPU runtime in Go) and has made substantive contributions to the code base. There’s a lot of research potential in piet-gpu, and knowledge about GPU compute programming in general, that I think is valuable to share, so it’s my intent to keep creating blog posts and other materials to spread that knowledge. Academic papers are also within scope, and I’m open to collaboration on those.
|
||||||
|
|
||||||
|
I'm really excited to see where this goes. I think there's the potential to build something truly great, and I look forward to working with others to realize that vision.
|
||||||
|
|
||||||
|
There's been some great discussion on [/r/rust](https://www.reddit.com/r/rust/comments/kal8ac/the_pietgpu_vision/).
|
||||||
|
|
||||||
|
[hz]: https://en.wikipedia.org/wiki/Hz-program
|
||||||
|
[spirv to ispc translator]: https://software.intel.com/content/www/us/en/develop/articles/spir-v-to-ispc-convert-gpu-compute-to-the-cpu.html
|
||||||
|
[tiny-skia]: https://github.com/RazrFalcon/tiny-skia
|
||||||
|
[raqote]: https://github.com/jrmuizel/raqote
|
||||||
|
[Blend2D]: https://blend2d.com/
|
||||||
|
[amstelvar]: https://variablefonts.typenetwork.com/topics/spacing/justification
|
||||||
|
[kashida]: https://andreasmhallberg.github.io/stretchable-kashida/
|
||||||
|
[SwiftShader]: https://swiftshader.googlesource.com/SwiftShader
|
||||||
|
[Lavapipe]: https://www.phoronix.com/scan.php?page=news_item&px=Mesa-Vulkan-Lavapipe
|
||||||
|
[glsl-to-cxx]: https://github.com/servo/webrender/tree/master/glsl-to-cxx
|
||||||
|
[sort-middle architecture]: https://raphlinus.github.io/rust/graphics/gpu/2020/06/12/sort-middle.html
|
||||||
|
[vulkan.gpuinfo.org]: https://vulkan.gpuinfo.org
|
||||||
|
[Vulkan Portability Extension]: https://www.khronos.org/blog/fighting-fragmentation-vulkan-portability-extension-released-implementations-shipping
|
||||||
|
[xi.zulipchat.com]: https://xi.zulipchat.com
|
||||||
|
[glyf]: https://docs.microsoft.com/en-us/typography/opentype/spec/glyf
|
||||||
|
[gvar]: https://docs.microsoft.com/en-us/typography/opentype/spec/gvar
|
||||||
|
[VkFFT]: https://github.com/DTolm/VkFFT
|
||||||
|
[Spinel]: https://fuchsia.googlesource.com/fuchsia/+/refs/heads/master/src/graphics/lib/compute/spinel/
|
||||||
|
[wgpu]: https://github.com/gfx-rs/wgpu
|
||||||
|
[naga]: https://github.com/gfx-rs/naga
|
||||||
|
[descriptor indexing]: http://chunkstories.xyz/blog/a-note-on-descriptor-indexing/
|
||||||
|
[rich text API]: https://www.cmyr.net/blog/piet-text-work.html
|
||||||
|
[Guillotière]: https://github.com/nical/guillotiere
|
||||||
|
[Étagère]: https://crates.io/crates/etagere
|
||||||
|
[variable font technology]: https://docs.microsoft.com/en-us/typography/opentype/spec/otvaroverview
|
||||||
|
[MSAA]: https://en.wikipedia.org/wiki/Multisample_anti-aliasing
|
||||||
|
[tradition of libart]: https://people.gnome.org/~mathieu/libart/internals.html
|
||||||
|
[stem darkening]: https://www.freetype.org/freetype2/docs/text-rendering-general.html
|
||||||
|
[subpixel rendering]: https://en.wikipedia.org/wiki/Subpixel_rendering
|
||||||
|
[HDR]: https://en.wikipedia.org/wiki/High-dynamic-range_imaging
|
||||||
|
[blurred rounded rectangle]: https://raphlinus.github.io/graphics/2020/04/21/blurred-rounded-rects.html
|
||||||
|
[gio]: https://gioui.org/
|
||||||
|
[Minikin]: https://android.googlesource.com/platform/frameworks/minikin/
|
||||||
|
[#42]: https://github.com/linebender/piet-gpu/issues/42
|
||||||
|
[#49]: https://github.com/linebender/piet-gpu/issues/49
|
|
@ -73,18 +73,31 @@ fn gen_derive_def(name: &str, size: usize, def: &LayoutTypeDef) -> proc_macro2::
|
||||||
|
|
||||||
let mut args = Vec::new();
|
let mut args = Vec::new();
|
||||||
let mut field_encoders = proc_macro2::TokenStream::new();
|
let mut field_encoders = proc_macro2::TokenStream::new();
|
||||||
for (i, (offset, _ty)) in payload.iter().enumerate() {
|
let mut tag_field = None;
|
||||||
|
for (i, (offset, ty)) in payload.iter().enumerate() {
|
||||||
let field_id = format_ident!("f{}", i);
|
let field_id = format_ident!("f{}", i);
|
||||||
|
if matches!(ty.ty, GpuType::Scalar(GpuScalar::TagFlags)) {
|
||||||
|
tag_field = Some(field_id.clone());
|
||||||
|
} else {
|
||||||
let field_encoder = quote! {
|
let field_encoder = quote! {
|
||||||
#field_id.encode_to(&mut buf[#offset..]);
|
#field_id.encode_to(&mut buf[#offset..]);
|
||||||
};
|
};
|
||||||
field_encoders.extend(field_encoder);
|
field_encoders.extend(field_encoder);
|
||||||
|
}
|
||||||
args.push(field_id);
|
args.push(field_id);
|
||||||
}
|
}
|
||||||
let tag = variant_ix as u32;
|
let tag = variant_ix as u32;
|
||||||
|
let tag_encode = match tag_field {
|
||||||
|
None => quote! {
|
||||||
|
buf[0..4].copy_from_slice(&#tag.to_le_bytes());
|
||||||
|
},
|
||||||
|
Some(tag_field) => quote! {
|
||||||
|
buf[0..4].copy_from_slice(&(#tag | ((*#tag_field as u32) << 16)).to_le_bytes());
|
||||||
|
},
|
||||||
|
};
|
||||||
let case = quote! {
|
let case = quote! {
|
||||||
#name_id::#variant_id(#(#args),*) => {
|
#name_id::#variant_id(#(#args),*) => {
|
||||||
buf[0..4].copy_from_slice(&#tag.to_le_bytes());
|
#tag_encode
|
||||||
#field_encoders
|
#field_encoders
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -139,12 +152,15 @@ fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream {
|
||||||
GpuScalar::U8 => quote!(u8),
|
GpuScalar::U8 => quote!(u8),
|
||||||
GpuScalar::U16 => quote!(u16),
|
GpuScalar::U16 => quote!(u16),
|
||||||
GpuScalar::U32 => quote!(u32),
|
GpuScalar::U32 => quote!(u32),
|
||||||
|
GpuScalar::TagFlags => quote!(u16),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_encode_field(name: &str, offset: usize, ty: &GpuType) -> proc_macro2::TokenStream {
|
fn gen_encode_field(name: &str, offset: usize, ty: &GpuType) -> proc_macro2::TokenStream {
|
||||||
let name_id = format_ident!("{}", name);
|
let name_id = format_ident!("{}", name);
|
||||||
match ty {
|
match ty {
|
||||||
|
// encoding of flags into tag word is handled elsewhere
|
||||||
|
GpuType::Scalar(GpuScalar::TagFlags) => quote! {},
|
||||||
GpuType::Scalar(s) => {
|
GpuType::Scalar(s) => {
|
||||||
let end = offset + s.size();
|
let end = offset + s.size();
|
||||||
quote! {
|
quote! {
|
||||||
|
|
|
@ -8,6 +8,11 @@ use crate::parse::{GpuScalar, GpuType};
|
||||||
|
|
||||||
pub fn gen_glsl(module: &LayoutModule) -> String {
|
pub fn gen_glsl(module: &LayoutModule) -> String {
|
||||||
let mut r = String::new();
|
let mut r = String::new();
|
||||||
|
writeln!(
|
||||||
|
&mut r,
|
||||||
|
"// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense\n"
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
writeln!(&mut r, "// Code auto-generated by piet-gpu-derive\n").unwrap();
|
writeln!(&mut r, "// Code auto-generated by piet-gpu-derive\n").unwrap();
|
||||||
// Note: GLSL needs definitions before uses. We could do a topological sort here,
|
// Note: GLSL needs definitions before uses. We could do a topological sort here,
|
||||||
// but easiest for now to just require that in spec.
|
// but easiest for now to just require that in spec.
|
||||||
|
@ -24,23 +29,25 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
|
||||||
(size, LayoutTypeDef::Enum(en)) => {
|
(size, LayoutTypeDef::Enum(en)) => {
|
||||||
gen_enum_def(&mut r, name, en);
|
gen_enum_def(&mut r, name, en);
|
||||||
gen_item_def(&mut r, name, size.size);
|
gen_item_def(&mut r, name, size.size);
|
||||||
|
gen_tag_def(&mut r, name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for name in &module.def_names {
|
for name in &module.def_names {
|
||||||
let def = module.defs.get(name).unwrap();
|
let def = module.defs.get(name).unwrap();
|
||||||
|
let is_mem = !module.name.eq(&"state") && !module.name.eq(&"scene");
|
||||||
match def {
|
match def {
|
||||||
(_size, LayoutTypeDef::Struct(fields)) => {
|
(_size, LayoutTypeDef::Struct(fields)) => {
|
||||||
gen_struct_read(&mut r, &module.name, &name, fields);
|
gen_struct_read(&mut r, &module.name, &name, is_mem, fields);
|
||||||
if module.gpu_write {
|
if module.gpu_write {
|
||||||
gen_struct_write(&mut r, &module.name, &name, fields);
|
gen_struct_write(&mut r, &module.name, &name, is_mem, fields);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(_size, LayoutTypeDef::Enum(en)) => {
|
(_size, LayoutTypeDef::Enum(en)) => {
|
||||||
gen_enum_read(&mut r, &module.name, &name, en);
|
gen_enum_read(&mut r, &module.name, &name, is_mem, en);
|
||||||
if module.gpu_write {
|
if module.gpu_write {
|
||||||
gen_enum_write(&mut r, &module.name, &name, en);
|
gen_enum_write(&mut r, &module.name, &name, is_mem, en);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -86,20 +93,36 @@ fn gen_item_def(r: &mut String, name: &str, size: usize) {
|
||||||
writeln!(r, "}}\n").unwrap();
|
writeln!(r, "}}\n").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn gen_tag_def(r: &mut String, name: &str) {
|
||||||
|
writeln!(r, "struct {}Tag {{", name).unwrap();
|
||||||
|
writeln!(r, " uint tag;").unwrap();
|
||||||
|
writeln!(r, " uint flags;").unwrap();
|
||||||
|
writeln!(r, "}};\n").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
fn gen_struct_read(
|
fn gen_struct_read(
|
||||||
r: &mut String,
|
r: &mut String,
|
||||||
bufname: &str,
|
bufname: &str,
|
||||||
name: &str,
|
name: &str,
|
||||||
|
is_mem: bool,
|
||||||
fields: &[(String, usize, LayoutType)],
|
fields: &[(String, usize, LayoutType)],
|
||||||
) {
|
) {
|
||||||
writeln!(r, "{} {}_read({}Ref ref) {{", name, name, name).unwrap();
|
write!(r, "{} {}_read(", name, name).unwrap();
|
||||||
|
if is_mem {
|
||||||
|
write!(r, "Alloc a, ").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(r, "{}Ref ref) {{", name).unwrap();
|
||||||
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
|
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
|
||||||
let coverage = crate::layout::struct_coverage(fields, false);
|
let coverage = crate::layout::struct_coverage(fields, false);
|
||||||
for (i, fields) in coverage.iter().enumerate() {
|
for (i, fields) in coverage.iter().enumerate() {
|
||||||
if !fields.is_empty() {
|
if !fields.is_empty() {
|
||||||
|
if is_mem {
|
||||||
|
writeln!(r, " uint raw{} = read_mem(a, ix + {});", i, i).unwrap();
|
||||||
|
} else {
|
||||||
writeln!(r, " uint raw{} = {}[ix + {}];", i, bufname, i).unwrap();
|
writeln!(r, " uint raw{} = {}[ix + {}];", i, bufname, i).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
writeln!(r, " {} s;", name).unwrap();
|
writeln!(r, " {} s;", name).unwrap();
|
||||||
|
|
||||||
let mut preload: bool = false;
|
let mut preload: bool = false;
|
||||||
|
@ -124,14 +147,51 @@ fn gen_enum_read(
|
||||||
r: &mut String,
|
r: &mut String,
|
||||||
bufname: &str,
|
bufname: &str,
|
||||||
name: &str,
|
name: &str,
|
||||||
|
is_mem: bool,
|
||||||
variants: &[(String, Vec<(usize, LayoutType)>)],
|
variants: &[(String, Vec<(usize, LayoutType)>)],
|
||||||
) {
|
) {
|
||||||
writeln!(r, "uint {}_tag({}Ref ref) {{", name, name).unwrap();
|
if is_mem {
|
||||||
writeln!(r, " return {}[ref.offset >> 2];", bufname).unwrap();
|
writeln!(r, "{}Tag {}_tag(Alloc a, {}Ref ref) {{", name, name, name).unwrap();
|
||||||
|
writeln!(r, " uint tag_and_flags = read_mem(a, ref.offset >> 2);").unwrap();
|
||||||
|
} else {
|
||||||
|
writeln!(r, "{}Tag {}_tag({}Ref ref) {{", name, name, name).unwrap();
|
||||||
|
writeln!(r, " uint tag_and_flags = {}[ref.offset >> 2];", bufname).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" return {}Tag(tag_and_flags & 0xffff, tag_and_flags >> 16);",
|
||||||
|
name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
writeln!(r, "}}\n").unwrap();
|
writeln!(r, "}}\n").unwrap();
|
||||||
for (var_name, payload) in variants {
|
for (var_name, payload) in variants {
|
||||||
if payload.len() == 1 {
|
let payload_ix = if payload.len() == 1 {
|
||||||
if let GpuType::InlineStruct(structname) = &payload[0].1.ty {
|
Some(0)
|
||||||
|
} else if payload.len() == 2 {
|
||||||
|
if matches!(payload[0].1.ty, GpuType::Scalar(GpuScalar::TagFlags)) {
|
||||||
|
Some(1)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
if let Some(payload_ix) = payload_ix {
|
||||||
|
if let GpuType::InlineStruct(structname) = &payload[payload_ix].1.ty {
|
||||||
|
if is_mem {
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
"{} {}_{}_read(Alloc a, {}Ref ref) {{",
|
||||||
|
structname, name, var_name, name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" return {}_read(a, {}Ref(ref.offset + {}));",
|
||||||
|
structname, structname, payload[0].0
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
} else {
|
||||||
writeln!(
|
writeln!(
|
||||||
r,
|
r,
|
||||||
"{} {}_{}_read({}Ref ref) {{",
|
"{} {}_{}_read({}Ref ref) {{",
|
||||||
|
@ -144,6 +204,7 @@ fn gen_enum_read(
|
||||||
structname, structname, payload[0].0
|
structname, structname, payload[0].0
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
}
|
||||||
writeln!(r, "}}\n").unwrap();
|
writeln!(r, "}}\n").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -225,6 +286,7 @@ fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String {
|
||||||
GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()),
|
GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()),
|
||||||
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()),
|
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()),
|
||||||
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()),
|
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()),
|
||||||
|
GpuScalar::TagFlags => format!("0 /* TODO */"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,9 +359,14 @@ fn gen_struct_write(
|
||||||
r: &mut String,
|
r: &mut String,
|
||||||
bufname: &str,
|
bufname: &str,
|
||||||
name: &str,
|
name: &str,
|
||||||
|
is_mem: bool,
|
||||||
fields: &[(String, usize, LayoutType)],
|
fields: &[(String, usize, LayoutType)],
|
||||||
) {
|
) {
|
||||||
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
|
write!(r, "void {}_write(", name).unwrap();
|
||||||
|
if is_mem {
|
||||||
|
write!(r, "Alloc a, ").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(r, "{}Ref ref, {} s) {{", name, name).unwrap();
|
||||||
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
|
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
|
||||||
let coverage = crate::layout::struct_coverage(fields, true);
|
let coverage = crate::layout::struct_coverage(fields, true);
|
||||||
|
|
||||||
|
@ -375,13 +442,20 @@ fn gen_struct_write(
|
||||||
}
|
}
|
||||||
|
|
||||||
if !pieces.is_empty() {
|
if !pieces.is_empty() {
|
||||||
|
if is_mem {
|
||||||
|
write!(r, " write_mem(a, ix + {}, ", i).unwrap();
|
||||||
|
} else {
|
||||||
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
|
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
|
||||||
|
}
|
||||||
for (j, piece) in pieces.iter().enumerate() {
|
for (j, piece) in pieces.iter().enumerate() {
|
||||||
if j != 0 {
|
if j != 0 {
|
||||||
write!(r, " | ").unwrap();
|
write!(r, " | ").unwrap();
|
||||||
}
|
}
|
||||||
write!(r, "{}", piece).unwrap();
|
write!(r, "{}", piece).unwrap();
|
||||||
}
|
}
|
||||||
|
if is_mem {
|
||||||
|
write!(r, ")").unwrap();
|
||||||
|
}
|
||||||
writeln!(r, ";").unwrap();
|
writeln!(r, ";").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -411,6 +485,7 @@ fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GpuScalar::I32 => format!("uint({})", inner),
|
GpuScalar::I32 => format!("uint({})", inner),
|
||||||
|
GpuScalar::TagFlags => format!("0"),
|
||||||
};
|
};
|
||||||
if shift == 0 {
|
if shift == 0 {
|
||||||
bits
|
bits
|
||||||
|
@ -423,10 +498,25 @@ fn gen_enum_write(
|
||||||
r: &mut String,
|
r: &mut String,
|
||||||
bufname: &str,
|
bufname: &str,
|
||||||
name: &str,
|
name: &str,
|
||||||
|
is_mem: bool,
|
||||||
variants: &[(String, Vec<(usize, LayoutType)>)],
|
variants: &[(String, Vec<(usize, LayoutType)>)],
|
||||||
) {
|
) {
|
||||||
for (var_name, payload) in variants {
|
for (var_name, payload) in variants {
|
||||||
if payload.is_empty() {
|
if payload.is_empty() {
|
||||||
|
if is_mem {
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
"void {}_{}_write(Alloc a, {}Ref ref) {{",
|
||||||
|
name, var_name, name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" write_mem(a, ref.offset >> 2, {}_{});",
|
||||||
|
name, var_name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
} else {
|
||||||
writeln!(r, "void {}_{}_write({}Ref ref) {{", name, var_name, name).unwrap();
|
writeln!(r, "void {}_{}_write({}Ref ref) {{", name, var_name, name).unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
r,
|
r,
|
||||||
|
@ -434,9 +524,30 @@ fn gen_enum_write(
|
||||||
bufname, name, var_name
|
bufname, name, var_name
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
}
|
||||||
writeln!(r, "}}\n").unwrap();
|
writeln!(r, "}}\n").unwrap();
|
||||||
} else if payload.len() == 1 {
|
} else if payload.len() == 1 {
|
||||||
if let GpuType::InlineStruct(structname) = &payload[0].1.ty {
|
if let GpuType::InlineStruct(structname) = &payload[0].1.ty {
|
||||||
|
if is_mem {
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
"void {}_{}_write(Alloc a, {}Ref ref, {} s) {{",
|
||||||
|
name, var_name, name, structname
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" write_mem(a, ref.offset >> 2, {}_{});",
|
||||||
|
name, var_name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" {}_write(a, {}Ref(ref.offset + {}), s);",
|
||||||
|
structname, structname, payload[0].0
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
} else {
|
||||||
writeln!(
|
writeln!(
|
||||||
r,
|
r,
|
||||||
"void {}_{}_write({}Ref ref, {} s) {{",
|
"void {}_{}_write({}Ref ref, {} s) {{",
|
||||||
|
@ -455,6 +566,52 @@ fn gen_enum_write(
|
||||||
structname, structname, payload[0].0
|
structname, structname, payload[0].0
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
}
|
||||||
|
writeln!(r, "}}\n").unwrap();
|
||||||
|
}
|
||||||
|
} else if payload.len() == 2
|
||||||
|
&& matches!(payload[0].1.ty, GpuType::Scalar(GpuScalar::TagFlags))
|
||||||
|
{
|
||||||
|
if let GpuType::InlineStruct(structname) = &payload[1].1.ty {
|
||||||
|
if is_mem {
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
"void {}_{}_write(Alloc a, {}Ref ref, uint flags, {} s) {{",
|
||||||
|
name, var_name, name, structname
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" write_mem(a, ref.offset >> 2, (flags << 16) | {}_{});",
|
||||||
|
name, var_name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" {}_write(a, {}Ref(ref.offset + {}), s);",
|
||||||
|
structname, structname, payload[0].0
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
} else {
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
"void {}_{}_write({}Ref ref, uint flags, {} s) {{",
|
||||||
|
name, var_name, name, structname
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" {}[ref.offset >> 2] = (flags << 16) | {}_{};",
|
||||||
|
bufname, name, var_name
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(
|
||||||
|
r,
|
||||||
|
" {}_write({}Ref(ref.offset + {}), s);",
|
||||||
|
structname, structname, payload[0].0
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
writeln!(r, "}}\n").unwrap();
|
writeln!(r, "}}\n").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -490,7 +647,7 @@ fn glsl_scalar(s: &GpuScalar) -> &'static str {
|
||||||
match s {
|
match s {
|
||||||
GpuScalar::F16 | GpuScalar::F32 => "float",
|
GpuScalar::F16 | GpuScalar::F32 => "float",
|
||||||
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int",
|
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int",
|
||||||
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint",
|
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 | GpuScalar::TagFlags => "uint",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -498,7 +655,7 @@ fn glsl_vecname(s: &GpuScalar) -> &'static str {
|
||||||
match s {
|
match s {
|
||||||
GpuScalar::F16 | GpuScalar::F32 => "vec",
|
GpuScalar::F16 | GpuScalar::F32 => "vec",
|
||||||
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec",
|
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec",
|
||||||
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec",
|
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 | GpuScalar::TagFlags => "uvec",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -240,5 +240,5 @@ impl Size {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn align_padding(offset: usize, alignment: usize) -> usize {
|
fn align_padding(offset: usize, alignment: usize) -> usize {
|
||||||
offset.wrapping_neg() & (alignment - 1)
|
offset.wrapping_neg() & (alignment.max(1) - 1)
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ pub enum GpuScalar {
|
||||||
U8,
|
U8,
|
||||||
U16,
|
U16,
|
||||||
U32,
|
U32,
|
||||||
|
TagFlags,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An algebraic datatype.
|
/// An algebraic datatype.
|
||||||
|
@ -59,6 +60,7 @@ impl GpuScalar {
|
||||||
"u8" => Some(GpuScalar::U8),
|
"u8" => Some(GpuScalar::U8),
|
||||||
"u16" => Some(GpuScalar::U16),
|
"u16" => Some(GpuScalar::U16),
|
||||||
"u32" => Some(GpuScalar::U32),
|
"u32" => Some(GpuScalar::U32),
|
||||||
|
"TagFlags" => Some(GpuScalar::TagFlags),
|
||||||
_ => None,
|
_ => None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -72,6 +74,7 @@ impl GpuScalar {
|
||||||
GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4,
|
GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4,
|
||||||
GpuScalar::I8 | GpuScalar::U8 => 1,
|
GpuScalar::I8 | GpuScalar::U8 => 1,
|
||||||
GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2,
|
GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2,
|
||||||
|
GpuScalar::TagFlags => 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,6 @@ edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
ash = "0.31"
|
ash = "0.31"
|
||||||
once_cell = "1.3.1"
|
|
||||||
ash-window = "0.5"
|
ash-window = "0.5"
|
||||||
raw-window-handle = "0.3"
|
raw-window-handle = "0.3"
|
||||||
|
|
||||||
|
|
|
@ -14,9 +14,9 @@ fn main() {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
buffer.write(&src).unwrap();
|
buffer.write(&src).unwrap();
|
||||||
let code = include_bytes!("./shader/collatz.spv");
|
let code = include_bytes!("./shader/collatz.spv");
|
||||||
let pipeline = session.create_simple_compute_pipeline(code, 1, 0).unwrap();
|
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
|
||||||
let descriptor_set = session
|
let descriptor_set = session
|
||||||
.create_descriptor_set(&pipeline, &[buffer.vk_buffer()], &[])
|
.create_simple_descriptor_set(&pipeline, &[&buffer])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let query_pool = session.create_query_pool(2).unwrap();
|
let query_pool = session.create_query_pool(2).unwrap();
|
||||||
let mut cmd_buf = session.cmd_buf().unwrap();
|
let mut cmd_buf = session.cmd_buf().unwrap();
|
||||||
|
|
|
@ -8,13 +8,16 @@ use std::any::Any;
|
||||||
use std::sync::{Arc, Mutex, Weak};
|
use std::sync::{Arc, Mutex, Weak};
|
||||||
|
|
||||||
use crate::vulkan;
|
use crate::vulkan;
|
||||||
use crate::{Device, Error};
|
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
||||||
|
use crate::PipelineBuilder as PipelineBuilderTrait;
|
||||||
|
use crate::{Device, Error, GpuInfo, SamplerParams};
|
||||||
|
|
||||||
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
|
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
|
||||||
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
|
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
|
||||||
pub type Pipeline = <vulkan::VkDevice as Device>::Pipeline;
|
pub type Pipeline = <vulkan::VkDevice as Device>::Pipeline;
|
||||||
pub type DescriptorSet = <vulkan::VkDevice as Device>::DescriptorSet;
|
pub type DescriptorSet = <vulkan::VkDevice as Device>::DescriptorSet;
|
||||||
pub type QueryPool = <vulkan::VkDevice as Device>::QueryPool;
|
pub type QueryPool = <vulkan::VkDevice as Device>::QueryPool;
|
||||||
|
pub type Sampler = <vulkan::VkDevice as Device>::Sampler;
|
||||||
|
|
||||||
type Fence = <vulkan::VkDevice as Device>::Fence;
|
type Fence = <vulkan::VkDevice as Device>::Fence;
|
||||||
|
|
||||||
|
@ -29,6 +32,7 @@ struct SessionInner {
|
||||||
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
|
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
|
||||||
/// Command buffers that are still pending (so resources can't be freed).
|
/// Command buffers that are still pending (so resources can't be freed).
|
||||||
pending: Mutex<Vec<SubmittedCmdBufInner>>,
|
pending: Mutex<Vec<SubmittedCmdBufInner>>,
|
||||||
|
gpu_info: GpuInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct CmdBuf {
|
pub struct CmdBuf {
|
||||||
|
@ -63,10 +67,16 @@ struct BufferInner {
|
||||||
session: Weak<SessionInner>,
|
session: Weak<SessionInner>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct PipelineBuilder(vulkan::PipelineBuilder);
|
||||||
|
|
||||||
|
pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder);
|
||||||
|
|
||||||
impl Session {
|
impl Session {
|
||||||
pub fn new(device: vulkan::VkDevice) -> Session {
|
pub fn new(device: vulkan::VkDevice) -> Session {
|
||||||
|
let gpu_info = device.query_gpu_info();
|
||||||
Session(Arc::new(SessionInner {
|
Session(Arc::new(SessionInner {
|
||||||
device,
|
device,
|
||||||
|
gpu_info,
|
||||||
cmd_buf_pool: Default::default(),
|
cmd_buf_pool: Default::default(),
|
||||||
pending: Default::default(),
|
pending: Default::default(),
|
||||||
}))
|
}))
|
||||||
|
@ -158,31 +168,28 @@ impl Session {
|
||||||
self.0.device.create_semaphore()
|
self.0.device.create_semaphore()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This creates a pipeline that runs over the buffer.
|
/// This creates a pipeline that operates on some buffers and images.
|
||||||
///
|
///
|
||||||
/// The descriptor set layout is just some number of storage buffers and storage images (this might change).
|
/// The descriptor set layout is just some number of storage buffers and storage images (this might change).
|
||||||
pub unsafe fn create_simple_compute_pipeline(
|
pub unsafe fn create_simple_compute_pipeline(
|
||||||
&self,
|
&self,
|
||||||
code: &[u8],
|
code: &[u8],
|
||||||
n_buffers: u32,
|
n_buffers: u32,
|
||||||
n_images: u32,
|
|
||||||
) -> Result<Pipeline, Error> {
|
) -> Result<Pipeline, Error> {
|
||||||
self.0
|
self.pipeline_builder()
|
||||||
.device
|
.add_buffers(n_buffers)
|
||||||
.create_simple_compute_pipeline(code, n_buffers, n_images)
|
.create_compute_pipeline(self, code)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a descriptor set for a simple pipeline that just references buffers and images.
|
/// Create a descriptor set for a simple pipeline that just references buffers.
|
||||||
///
|
pub unsafe fn create_simple_descriptor_set<'a>(
|
||||||
/// Note: when we do portability, the signature will change to not reference the Vulkan types
|
|
||||||
/// directly.
|
|
||||||
pub unsafe fn create_descriptor_set(
|
|
||||||
&self,
|
&self,
|
||||||
pipeline: &Pipeline,
|
pipeline: &Pipeline,
|
||||||
bufs: &[&vulkan::Buffer],
|
buffers: impl IntoRefs<'a, Buffer>,
|
||||||
images: &[&vulkan::Image],
|
|
||||||
) -> Result<DescriptorSet, Error> {
|
) -> Result<DescriptorSet, Error> {
|
||||||
self.0.device.create_descriptor_set(pipeline, bufs, images)
|
self.descriptor_set_builder()
|
||||||
|
.add_buffers(buffers)
|
||||||
|
.build(self, pipeline)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a query pool for timestamp queries.
|
/// Create a query pool for timestamp queries.
|
||||||
|
@ -193,6 +200,22 @@ impl Session {
|
||||||
pub unsafe fn fetch_query_pool(&self, pool: &QueryPool) -> Result<Vec<f64>, Error> {
|
pub unsafe fn fetch_query_pool(&self, pool: &QueryPool) -> Result<Vec<f64>, Error> {
|
||||||
self.0.device.fetch_query_pool(pool)
|
self.0.device.fetch_query_pool(pool)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn pipeline_builder(&self) -> PipelineBuilder {
|
||||||
|
PipelineBuilder(self.0.device.pipeline_builder())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn descriptor_set_builder(&self) -> DescriptorSetBuilder {
|
||||||
|
DescriptorSetBuilder(self.0.device.descriptor_set_builder())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Sampler, Error> {
|
||||||
|
self.0.device.create_sampler(params)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn gpu_info(&self) -> &GpuInfo {
|
||||||
|
&self.0.gpu_info
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CmdBuf {
|
impl CmdBuf {
|
||||||
|
@ -299,3 +322,134 @@ impl Buffer {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl PipelineBuilder {
|
||||||
|
/// Add buffers to the pipeline. Each has its own binding.
|
||||||
|
pub fn add_buffers(mut self, n_buffers: u32) -> Self {
|
||||||
|
self.0.add_buffers(n_buffers);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add storage images to the pipeline. Each has its own binding.
|
||||||
|
pub fn add_images(mut self, n_images: u32) -> Self {
|
||||||
|
self.0.add_images(n_images);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a binding with a variable-size array of textures.
|
||||||
|
pub fn add_textures(mut self, max_textures: u32) -> Self {
|
||||||
|
self.0.add_textures(max_textures);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn create_compute_pipeline(
|
||||||
|
self,
|
||||||
|
session: &Session,
|
||||||
|
code: &[u8],
|
||||||
|
) -> Result<Pipeline, Error> {
|
||||||
|
self.0.create_compute_pipeline(&session.0.device, code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DescriptorSetBuilder {
|
||||||
|
pub fn add_buffers<'a>(mut self, buffers: impl IntoRefs<'a, Buffer>) -> Self {
|
||||||
|
let vk_buffers = buffers
|
||||||
|
.into_refs()
|
||||||
|
.map(|b| b.vk_buffer())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
self.0.add_buffers(&vk_buffers);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_images<'a>(mut self, images: impl IntoRefs<'a, Image>) -> Self {
|
||||||
|
let vk_images = images.into_refs().map(|i| i.vk_image()).collect::<Vec<_>>();
|
||||||
|
self.0.add_images(&vk_images);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_textures<'a>(mut self, images: impl IntoRefs<'a, Image>) -> Self {
|
||||||
|
let vk_images = images.into_refs().map(|i| i.vk_image()).collect::<Vec<_>>();
|
||||||
|
self.0.add_textures(&vk_images);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn build(
|
||||||
|
self,
|
||||||
|
session: &Session,
|
||||||
|
pipeline: &Pipeline,
|
||||||
|
) -> Result<DescriptorSet, Error> {
|
||||||
|
self.0.build(&session.0.device, pipeline)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This lets us use either a slice or a vector. The type is clunky but it
|
||||||
|
// seems fine enough to use.
|
||||||
|
pub trait IntoRefs<'a, T: 'a> {
|
||||||
|
type Iterator: Iterator<Item = &'a T>;
|
||||||
|
|
||||||
|
fn into_refs(self) -> Self::Iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [T] {
|
||||||
|
type Iterator = std::slice::Iter<'a, T>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: this will benefit from const generics!
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 1] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 2] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 3] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 4] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 5] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 6] {
|
||||||
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IntoRefs<'a, T> for Vec<&'a T> {
|
||||||
|
type Iterator = std::vec::IntoIter<&'a T>;
|
||||||
|
fn into_refs(self) -> Self::Iterator {
|
||||||
|
self.into_iter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -18,10 +18,44 @@ pub enum ImageLayout {
|
||||||
BlitSrc,
|
BlitSrc,
|
||||||
BlitDst,
|
BlitDst,
|
||||||
General,
|
General,
|
||||||
|
ShaderRead,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The type of sampling for image lookup.
|
||||||
|
///
|
||||||
|
/// This could take a lot more params, such as filtering, repeat, behavior
|
||||||
|
/// at edges, etc., but for now we'll keep it simple.
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum SamplerParams {
|
||||||
|
Nearest,
|
||||||
|
Linear,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
/// Information about the GPU.
|
||||||
|
pub struct GpuInfo {
|
||||||
|
/// The GPU supports descriptor indexing.
|
||||||
|
pub has_descriptor_indexing: bool,
|
||||||
|
/// The GPU supports subgroups.
|
||||||
|
///
|
||||||
|
/// Right now, this just checks for basic subgroup capability (as
|
||||||
|
/// required in Vulkan 1.1), and we should have finer grained
|
||||||
|
/// queries for shuffles, etc.
|
||||||
|
pub has_subgroups: bool,
|
||||||
|
/// Info about subgroup size control, if available.
|
||||||
|
pub subgroup_size: Option<SubgroupSize>,
|
||||||
|
/// The GPU supports a real, grown-ass memory model.
|
||||||
|
pub has_memory_model: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct SubgroupSize {
|
||||||
|
min: u32,
|
||||||
|
max: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Device: Sized {
|
pub trait Device: Sized {
|
||||||
type Buffer;
|
type Buffer: 'static;
|
||||||
type Image;
|
type Image;
|
||||||
type MemFlags: MemFlags;
|
type MemFlags: MemFlags;
|
||||||
type Pipeline;
|
type Pipeline;
|
||||||
|
@ -30,6 +64,15 @@ pub trait Device: Sized {
|
||||||
type CmdBuf: CmdBuf<Self>;
|
type CmdBuf: CmdBuf<Self>;
|
||||||
type Fence;
|
type Fence;
|
||||||
type Semaphore;
|
type Semaphore;
|
||||||
|
type PipelineBuilder: PipelineBuilder<Self>;
|
||||||
|
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
||||||
|
type Sampler;
|
||||||
|
|
||||||
|
/// Query the GPU info.
|
||||||
|
///
|
||||||
|
/// This method may be expensive, so the hub should call it once and retain
|
||||||
|
/// the info.
|
||||||
|
fn query_gpu_info(&self) -> GpuInfo;
|
||||||
|
|
||||||
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
|
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
|
||||||
|
|
||||||
|
@ -58,19 +101,48 @@ pub trait Device: Sized {
|
||||||
/// Maybe doesn't need result return?
|
/// Maybe doesn't need result return?
|
||||||
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
|
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
|
||||||
|
|
||||||
|
/// Start building a pipeline.
|
||||||
|
///
|
||||||
|
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
|
||||||
|
/// it expects.
|
||||||
|
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder;
|
||||||
|
|
||||||
|
/// Start building a descriptor set.
|
||||||
|
///
|
||||||
|
/// A descriptor set is a binding of resources for a given pipeline.
|
||||||
|
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
|
||||||
|
|
||||||
|
/// Create a simple compute pipeline that operates on buffers and storage images.
|
||||||
|
///
|
||||||
|
/// This is provided as a convenience but will probably go away, as the functionality
|
||||||
|
/// is subsumed by the builder.
|
||||||
unsafe fn create_simple_compute_pipeline(
|
unsafe fn create_simple_compute_pipeline(
|
||||||
&self,
|
&self,
|
||||||
code: &[u8],
|
code: &[u8],
|
||||||
n_buffers: u32,
|
n_buffers: u32,
|
||||||
n_images: u32,
|
n_images: u32,
|
||||||
) -> Result<Self::Pipeline, Error>;
|
) -> Result<Self::Pipeline, Error> {
|
||||||
|
let mut builder = self.pipeline_builder();
|
||||||
|
builder.add_buffers(n_buffers);
|
||||||
|
builder.add_images(n_images);
|
||||||
|
builder.create_compute_pipeline(self, code)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a descriptor set for a given pipeline, binding buffers and images.
|
||||||
|
///
|
||||||
|
/// This is provided as a convenience but will probably go away, as the functionality
|
||||||
|
/// is subsumed by the builder.
|
||||||
unsafe fn create_descriptor_set(
|
unsafe fn create_descriptor_set(
|
||||||
&self,
|
&self,
|
||||||
pipeline: &Self::Pipeline,
|
pipeline: &Self::Pipeline,
|
||||||
bufs: &[&Self::Buffer],
|
bufs: &[&Self::Buffer],
|
||||||
images: &[&Self::Image],
|
images: &[&Self::Image],
|
||||||
) -> Result<Self::DescriptorSet, Error>;
|
) -> Result<Self::DescriptorSet, Error> {
|
||||||
|
let mut builder = self.descriptor_set_builder();
|
||||||
|
builder.add_buffers(bufs);
|
||||||
|
builder.add_images(images);
|
||||||
|
builder.build(self, pipeline)
|
||||||
|
}
|
||||||
|
|
||||||
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
|
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
|
||||||
|
|
||||||
|
@ -109,6 +181,8 @@ pub trait Device: Sized {
|
||||||
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
|
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
|
||||||
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>;
|
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>;
|
||||||
unsafe fn get_fence_status(&self, fence: Self::Fence) -> Result<bool, Error>;
|
unsafe fn get_fence_status(&self, fence: Self::Fence) -> Result<bool, Error>;
|
||||||
|
|
||||||
|
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait CmdBuf<D: Device> {
|
pub trait CmdBuf<D: Device> {
|
||||||
|
@ -150,7 +224,7 @@ pub trait CmdBuf<D: Device> {
|
||||||
/// This is readily supported in Vulkan, but for portability it is remarkably
|
/// This is readily supported in Vulkan, but for portability it is remarkably
|
||||||
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
||||||
/// kernel, or organize the code not to need it.
|
/// kernel, or organize the code not to need it.
|
||||||
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
|
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
|
||||||
|
|
||||||
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
||||||
|
|
||||||
|
@ -176,3 +250,34 @@ pub trait MemFlags: Sized + Clone + Copy {
|
||||||
|
|
||||||
fn host_coherent() -> Self;
|
fn host_coherent() -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A builder for pipelines with more complex layouts.
|
||||||
|
pub trait PipelineBuilder<D: Device> {
|
||||||
|
/// Add buffers to the pipeline. Each has its own binding.
|
||||||
|
fn add_buffers(&mut self, n_buffers: u32);
|
||||||
|
/// Add storage images to the pipeline. Each has its own binding.
|
||||||
|
fn add_images(&mut self, n_images: u32);
|
||||||
|
/// Add a binding with a variable-size array of textures.
|
||||||
|
fn add_textures(&mut self, max_textures: u32);
|
||||||
|
unsafe fn create_compute_pipeline(self, device: &D, code: &[u8]) -> Result<D::Pipeline, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for descriptor sets with more complex layouts.
|
||||||
|
///
|
||||||
|
/// Note: the order needs to match the pipeline building, and it also needs to
|
||||||
|
/// be buffers, then images, then textures.
|
||||||
|
pub trait DescriptorSetBuilder<D: Device> {
|
||||||
|
fn add_buffers(&mut self, buffers: &[&D::Buffer]);
|
||||||
|
/// Add an array of storage images.
|
||||||
|
///
|
||||||
|
/// The images need to be in `ImageLayout::General` layout.
|
||||||
|
fn add_images(&mut self, images: &[&D::Image]);
|
||||||
|
/// Add an array of textures.
|
||||||
|
///
|
||||||
|
/// The images need to be in `ImageLayout::ShaderRead` layout.
|
||||||
|
///
|
||||||
|
/// The same sampler is used for all textures, which is not very sophisticated;
|
||||||
|
/// we should have a way to vary the sampler.
|
||||||
|
fn add_textures(&mut self, images: &[&D::Image]);
|
||||||
|
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
|
||||||
|
}
|
||||||
|
|
|
@ -2,20 +2,22 @@
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::ffi::{CStr, CString};
|
use std::ffi::{CStr, CString};
|
||||||
|
use std::os::raw::c_char;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use ash::extensions::{ext::DebugUtils, khr};
|
use ash::extensions::{ext::DebugUtils, khr};
|
||||||
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
|
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1};
|
||||||
use ash::{vk, Device, Entry, Instance};
|
use ash::{vk, Device, Entry, Instance};
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
|
|
||||||
use crate::{Device as DeviceTrait, Error, ImageLayout};
|
use crate::{Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize};
|
||||||
|
|
||||||
pub struct VkInstance {
|
pub struct VkInstance {
|
||||||
/// Retain the dynamic lib.
|
/// Retain the dynamic lib.
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
entry: Entry,
|
entry: Entry,
|
||||||
instance: Instance,
|
instance: Instance,
|
||||||
|
get_phys_dev_props: Option<vk::KhrGetPhysicalDeviceProperties2Fn>,
|
||||||
|
vk_version: u32,
|
||||||
_dbg_loader: Option<DebugUtils>,
|
_dbg_loader: Option<DebugUtils>,
|
||||||
_dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
|
_dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
|
||||||
}
|
}
|
||||||
|
@ -27,6 +29,7 @@ pub struct VkDevice {
|
||||||
queue: vk::Queue,
|
queue: vk::Queue,
|
||||||
qfi: u32,
|
qfi: u32,
|
||||||
timestamp_period: f32,
|
timestamp_period: f32,
|
||||||
|
gpu_info: GpuInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct RawDevice {
|
struct RawDevice {
|
||||||
|
@ -71,6 +74,7 @@ pub struct Pipeline {
|
||||||
pipeline: vk::Pipeline,
|
pipeline: vk::Pipeline,
|
||||||
descriptor_set_layout: vk::DescriptorSetLayout,
|
descriptor_set_layout: vk::DescriptorSetLayout,
|
||||||
pipeline_layout: vk::PipelineLayout,
|
pipeline_layout: vk::PipelineLayout,
|
||||||
|
max_textures: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DescriptorSet {
|
pub struct DescriptorSet {
|
||||||
|
@ -90,6 +94,30 @@ pub struct QueryPool {
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct MemFlags(vk::MemoryPropertyFlags);
|
pub struct MemFlags(vk::MemoryPropertyFlags);
|
||||||
|
|
||||||
|
pub struct PipelineBuilder {
|
||||||
|
bindings: Vec<vk::DescriptorSetLayoutBinding>,
|
||||||
|
binding_flags: Vec<vk::DescriptorBindingFlags>,
|
||||||
|
max_textures: u32,
|
||||||
|
has_descriptor_indexing: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DescriptorSetBuilder {
|
||||||
|
buffers: Vec<vk::Buffer>,
|
||||||
|
images: Vec<vk::ImageView>,
|
||||||
|
textures: Vec<vk::ImageView>,
|
||||||
|
sampler: vk::Sampler,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Extensions {
|
||||||
|
exts: Vec<*const c_char>,
|
||||||
|
exist_exts: Vec<vk::ExtensionProperties>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Layers {
|
||||||
|
layers: Vec<*const c_char>,
|
||||||
|
exist_layers: Vec<vk::LayerProperties>,
|
||||||
|
}
|
||||||
|
|
||||||
unsafe extern "system" fn vulkan_debug_callback(
|
unsafe extern "system" fn vulkan_debug_callback(
|
||||||
message_severity: vk::DebugUtilsMessageSeverityFlagsEXT,
|
message_severity: vk::DebugUtilsMessageSeverityFlagsEXT,
|
||||||
message_type: vk::DebugUtilsMessageTypeFlagsEXT,
|
message_type: vk::DebugUtilsMessageTypeFlagsEXT,
|
||||||
|
@ -119,22 +147,6 @@ unsafe extern "system" fn vulkan_debug_callback(
|
||||||
vk::FALSE
|
vk::FALSE
|
||||||
}
|
}
|
||||||
|
|
||||||
static LAYERS: Lazy<Vec<&'static CStr>> = Lazy::new(|| {
|
|
||||||
let mut layers: Vec<&'static CStr> = vec![];
|
|
||||||
if cfg!(debug_assertions) {
|
|
||||||
layers.push(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap());
|
|
||||||
}
|
|
||||||
layers
|
|
||||||
});
|
|
||||||
|
|
||||||
static EXTS: Lazy<Vec<&'static CStr>> = Lazy::new(|| {
|
|
||||||
let mut exts: Vec<&'static CStr> = vec![];
|
|
||||||
if cfg!(debug_assertions) {
|
|
||||||
exts.push(DebugUtils::name());
|
|
||||||
}
|
|
||||||
exts
|
|
||||||
});
|
|
||||||
|
|
||||||
impl VkInstance {
|
impl VkInstance {
|
||||||
/// Create a new instance.
|
/// Create a new instance.
|
||||||
///
|
///
|
||||||
|
@ -150,50 +162,35 @@ impl VkInstance {
|
||||||
let app_name = CString::new("VkToy").unwrap();
|
let app_name = CString::new("VkToy").unwrap();
|
||||||
let entry = Entry::new()?;
|
let entry = Entry::new()?;
|
||||||
|
|
||||||
let exist_layers = entry.enumerate_instance_layer_properties()?;
|
let mut layers = Layers::new(entry.enumerate_instance_layer_properties()?);
|
||||||
let layers = LAYERS
|
if cfg!(debug_assertions) {
|
||||||
.iter()
|
layers
|
||||||
.filter_map(|&lyr| {
|
.try_add(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap());
|
||||||
exist_layers
|
|
||||||
.iter()
|
|
||||||
.find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == lyr)
|
|
||||||
.map(|_| lyr.as_ptr())
|
|
||||||
.or_else(|| {
|
|
||||||
println!(
|
|
||||||
"Unable to find layer: {}, have you installed the Vulkan SDK?",
|
|
||||||
lyr.to_string_lossy()
|
|
||||||
);
|
|
||||||
None
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let exist_exts = entry.enumerate_instance_extension_properties()?;
|
|
||||||
let mut exts = EXTS
|
|
||||||
.iter()
|
|
||||||
.filter_map(|&ext| {
|
|
||||||
exist_exts
|
|
||||||
.iter()
|
|
||||||
.find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext)
|
|
||||||
.map(|_| ext.as_ptr())
|
|
||||||
.or_else(|| {
|
|
||||||
println!(
|
|
||||||
"Unable to find extension: {}, have you installed the Vulkan SDK?",
|
|
||||||
ext.to_string_lossy()
|
|
||||||
);
|
|
||||||
None
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let surface_extensions = match window_handle {
|
|
||||||
Some(ref handle) => ash_window::enumerate_required_extensions(*handle)?,
|
|
||||||
None => vec![],
|
|
||||||
};
|
|
||||||
for extension in surface_extensions {
|
|
||||||
exts.push(extension.as_ptr());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut exts = Extensions::new(entry.enumerate_instance_extension_properties()?);
|
||||||
|
let mut has_debug_ext = false;
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
has_debug_ext = exts.try_add(DebugUtils::name());
|
||||||
|
}
|
||||||
|
// We'll need this to do runtime query of descriptor indexing.
|
||||||
|
let has_phys_dev_props = exts.try_add(vk::KhrGetPhysicalDeviceProperties2Fn::name());
|
||||||
|
if let Some(ref handle) = window_handle {
|
||||||
|
for ext in ash_window::enumerate_required_extensions(*handle)? {
|
||||||
|
exts.try_add(ext);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let supported_version = entry
|
||||||
|
.try_enumerate_instance_version()?
|
||||||
|
.unwrap_or(vk::make_version(1, 0, 0));
|
||||||
|
let vk_version = if supported_version >= vk::make_version(1, 1, 0) {
|
||||||
|
// We need Vulkan 1.1 to do subgroups; most other things can be extensions.
|
||||||
|
vk::make_version(1, 1, 0)
|
||||||
|
} else {
|
||||||
|
vk::make_version(1, 0, 0)
|
||||||
|
};
|
||||||
|
|
||||||
let instance = entry.create_instance(
|
let instance = entry.create_instance(
|
||||||
&vk::InstanceCreateInfo::builder()
|
&vk::InstanceCreateInfo::builder()
|
||||||
.application_info(
|
.application_info(
|
||||||
|
@ -201,14 +198,14 @@ impl VkInstance {
|
||||||
.application_name(&app_name)
|
.application_name(&app_name)
|
||||||
.application_version(0)
|
.application_version(0)
|
||||||
.engine_name(&app_name)
|
.engine_name(&app_name)
|
||||||
.api_version(vk::make_version(1, 0, 0)),
|
.api_version(vk_version),
|
||||||
)
|
)
|
||||||
.enabled_layer_names(&layers)
|
.enabled_layer_names(layers.as_ptrs())
|
||||||
.enabled_extension_names(&exts),
|
.enabled_extension_names(exts.as_ptrs()),
|
||||||
None,
|
None,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let (_dbg_loader, _dbg_callbk) = if cfg!(debug_assertions) {
|
let (_dbg_loader, _dbg_callbk) = if has_debug_ext {
|
||||||
let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder()
|
let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder()
|
||||||
.message_severity(
|
.message_severity(
|
||||||
vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
|
vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
|
||||||
|
@ -233,9 +230,21 @@ impl VkInstance {
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let get_phys_dev_props = if has_phys_dev_props {
|
||||||
|
Some(vk::KhrGetPhysicalDeviceProperties2Fn::load(|name| {
|
||||||
|
std::mem::transmute(
|
||||||
|
entry.get_instance_proc_addr(instance.handle(), name.as_ptr()),
|
||||||
|
)
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let vk_instance = VkInstance {
|
let vk_instance = VkInstance {
|
||||||
entry,
|
entry,
|
||||||
instance,
|
instance,
|
||||||
|
get_phys_dev_props,
|
||||||
|
vk_version,
|
||||||
_dbg_loader,
|
_dbg_loader,
|
||||||
_dbg_callbk,
|
_dbg_callbk,
|
||||||
};
|
};
|
||||||
|
@ -256,19 +265,55 @@ impl VkInstance {
|
||||||
let (pdevice, qfi) =
|
let (pdevice, qfi) =
|
||||||
choose_compute_device(&self.instance, &devices, surface).ok_or("no suitable device")?;
|
choose_compute_device(&self.instance, &devices, surface).ok_or("no suitable device")?;
|
||||||
|
|
||||||
|
let mut has_descriptor_indexing = false;
|
||||||
|
if let Some(ref get_phys_dev_props) = self.get_phys_dev_props {
|
||||||
|
let mut descriptor_indexing_features =
|
||||||
|
vk::PhysicalDeviceDescriptorIndexingFeatures::builder();
|
||||||
|
// See https://github.com/MaikKlein/ash/issues/325 for why we do this workaround.
|
||||||
|
let mut features_v2 = vk::PhysicalDeviceFeatures2::default();
|
||||||
|
features_v2.p_next =
|
||||||
|
&mut descriptor_indexing_features as *mut _ as *mut std::ffi::c_void;
|
||||||
|
get_phys_dev_props.get_physical_device_features2_khr(pdevice, &mut features_v2);
|
||||||
|
has_descriptor_indexing = descriptor_indexing_features
|
||||||
|
.shader_storage_image_array_non_uniform_indexing
|
||||||
|
== vk::TRUE
|
||||||
|
&& descriptor_indexing_features.descriptor_binding_variable_descriptor_count
|
||||||
|
== vk::TRUE
|
||||||
|
&& descriptor_indexing_features.runtime_descriptor_array == vk::TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
let queue_priorities = [1.0];
|
let queue_priorities = [1.0];
|
||||||
let queue_create_infos = [vk::DeviceQueueCreateInfo::builder()
|
let queue_create_infos = [vk::DeviceQueueCreateInfo::builder()
|
||||||
.queue_family_index(qfi)
|
.queue_family_index(qfi)
|
||||||
.queue_priorities(&queue_priorities)
|
.queue_priorities(&queue_priorities)
|
||||||
.build()];
|
.build()];
|
||||||
let extensions = match surface {
|
|
||||||
Some(_) => vec![khr::Swapchain::name().as_ptr()],
|
let mut descriptor_indexing = vk::PhysicalDeviceDescriptorIndexingFeatures::builder()
|
||||||
None => vec![],
|
.shader_storage_image_array_non_uniform_indexing(true)
|
||||||
};
|
.descriptor_binding_variable_descriptor_count(true)
|
||||||
let create_info = vk::DeviceCreateInfo::builder()
|
.runtime_descriptor_array(true);
|
||||||
|
|
||||||
|
let mut extensions = Extensions::new(
|
||||||
|
self.instance
|
||||||
|
.enumerate_device_extension_properties(pdevice)?,
|
||||||
|
);
|
||||||
|
if surface.is_some() {
|
||||||
|
extensions.try_add(khr::Swapchain::name());
|
||||||
|
}
|
||||||
|
if has_descriptor_indexing {
|
||||||
|
extensions.try_add(vk::KhrMaintenance3Fn::name());
|
||||||
|
extensions.try_add(vk::ExtDescriptorIndexingFn::name());
|
||||||
|
}
|
||||||
|
let has_subgroup_size = self.vk_version >= vk::make_version(1, 1, 0)
|
||||||
|
&& extensions.try_add(vk::ExtSubgroupSizeControlFn::name());
|
||||||
|
let has_memory_model = self.vk_version >= vk::make_version(1, 1, 0)
|
||||||
|
&& extensions.try_add(vk::KhrVulkanMemoryModelFn::name());
|
||||||
|
let mut create_info = vk::DeviceCreateInfo::builder()
|
||||||
.queue_create_infos(&queue_create_infos)
|
.queue_create_infos(&queue_create_infos)
|
||||||
.enabled_extension_names(&extensions)
|
.enabled_extension_names(extensions.as_ptrs());
|
||||||
.build();
|
if has_descriptor_indexing {
|
||||||
|
create_info = create_info.push_next(&mut descriptor_indexing);
|
||||||
|
}
|
||||||
let device = self.instance.create_device(pdevice, &create_info, None)?;
|
let device = self.instance.create_device(pdevice, &create_info, None)?;
|
||||||
|
|
||||||
let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice);
|
let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice);
|
||||||
|
@ -280,6 +325,28 @@ impl VkInstance {
|
||||||
|
|
||||||
let props = self.instance.get_physical_device_properties(pdevice);
|
let props = self.instance.get_physical_device_properties(pdevice);
|
||||||
let timestamp_period = props.limits.timestamp_period;
|
let timestamp_period = props.limits.timestamp_period;
|
||||||
|
let subgroup_size = if has_subgroup_size {
|
||||||
|
let mut subgroup_props = vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT::default();
|
||||||
|
let mut properties =
|
||||||
|
vk::PhysicalDeviceProperties2::builder().push_next(&mut subgroup_props);
|
||||||
|
self.instance
|
||||||
|
.get_physical_device_properties2(pdevice, &mut properties);
|
||||||
|
Some(SubgroupSize {
|
||||||
|
min: subgroup_props.min_subgroup_size,
|
||||||
|
max: subgroup_props.max_subgroup_size,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: finer grained query of specific subgroup info.
|
||||||
|
let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0);
|
||||||
|
let gpu_info = GpuInfo {
|
||||||
|
has_descriptor_indexing,
|
||||||
|
has_subgroups,
|
||||||
|
subgroup_size,
|
||||||
|
has_memory_model,
|
||||||
|
};
|
||||||
|
|
||||||
Ok(VkDevice {
|
Ok(VkDevice {
|
||||||
device,
|
device,
|
||||||
|
@ -288,11 +355,14 @@ impl VkInstance {
|
||||||
qfi,
|
qfi,
|
||||||
queue,
|
queue,
|
||||||
timestamp_period,
|
timestamp_period,
|
||||||
|
gpu_info,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn swapchain(
|
pub unsafe fn swapchain(
|
||||||
&self,
|
&self,
|
||||||
|
width: usize,
|
||||||
|
height: usize,
|
||||||
device: &VkDevice,
|
device: &VkDevice,
|
||||||
surface: &VkSurface,
|
surface: &VkSurface,
|
||||||
) -> Result<VkSwapchain, Error> {
|
) -> Result<VkSwapchain, Error> {
|
||||||
|
@ -326,8 +396,13 @@ impl VkInstance {
|
||||||
.find(|mode| mode == &vk::PresentModeKHR::MAILBOX)
|
.find(|mode| mode == &vk::PresentModeKHR::MAILBOX)
|
||||||
.unwrap_or(vk::PresentModeKHR::FIFO);
|
.unwrap_or(vk::PresentModeKHR::FIFO);
|
||||||
|
|
||||||
let image_count = 2; // TODO
|
let image_count = capabilities.min_image_count;
|
||||||
let extent = capabilities.current_extent; // TODO: wayland for example will complain here ..
|
let mut extent = capabilities.current_extent;
|
||||||
|
if extent.width == u32::MAX || extent.height == u32::MAX {
|
||||||
|
// We're deciding the size.
|
||||||
|
extent.width = width as u32;
|
||||||
|
extent.height = height as u32;
|
||||||
|
}
|
||||||
|
|
||||||
let create_info = vk::SwapchainCreateInfoKHR::builder()
|
let create_info = vk::SwapchainCreateInfoKHR::builder()
|
||||||
.surface(surface.surface)
|
.surface(surface.surface)
|
||||||
|
@ -375,6 +450,13 @@ impl crate::Device for VkDevice {
|
||||||
type MemFlags = MemFlags;
|
type MemFlags = MemFlags;
|
||||||
type Fence = vk::Fence;
|
type Fence = vk::Fence;
|
||||||
type Semaphore = vk::Semaphore;
|
type Semaphore = vk::Semaphore;
|
||||||
|
type PipelineBuilder = PipelineBuilder;
|
||||||
|
type DescriptorSetBuilder = DescriptorSetBuilder;
|
||||||
|
type Sampler = vk::Sampler;
|
||||||
|
|
||||||
|
fn query_gpu_info(&self) -> GpuInfo {
|
||||||
|
self.gpu_info.clone()
|
||||||
|
}
|
||||||
|
|
||||||
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
|
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -527,151 +609,22 @@ impl crate::Device for VkDevice {
|
||||||
Ok(device.get_fence_status(fence)?)
|
Ok(device.get_fence_status(fence)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This creates a pipeline that runs over the buffer.
|
unsafe fn pipeline_builder(&self) -> PipelineBuilder {
|
||||||
///
|
PipelineBuilder {
|
||||||
/// The descriptor set layout is just some number of storage buffers and storage images (this might change).
|
bindings: Vec::new(),
|
||||||
unsafe fn create_simple_compute_pipeline(
|
binding_flags: Vec::new(),
|
||||||
&self,
|
max_textures: 0,
|
||||||
code: &[u8],
|
has_descriptor_indexing: self.gpu_info.has_descriptor_indexing,
|
||||||
n_buffers: u32,
|
|
||||||
n_images: u32,
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
let device = &self.device.device;
|
|
||||||
let mut bindings = Vec::new();
|
|
||||||
for i in 0..n_buffers {
|
|
||||||
bindings.push(
|
|
||||||
vk::DescriptorSetLayoutBinding::builder()
|
|
||||||
.binding(i)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
|
|
||||||
.descriptor_count(1)
|
|
||||||
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
for i in n_buffers..n_buffers + n_images {
|
|
||||||
bindings.push(
|
|
||||||
vk::DescriptorSetLayoutBinding::builder()
|
|
||||||
.binding(i)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
|
||||||
.descriptor_count(1)
|
|
||||||
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let descriptor_set_layout = device.create_descriptor_set_layout(
|
|
||||||
&vk::DescriptorSetLayoutCreateInfo::builder().bindings(&bindings),
|
|
||||||
None,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let descriptor_set_layouts = [descriptor_set_layout];
|
|
||||||
|
|
||||||
// Create compute pipeline.
|
|
||||||
let code_u32 = convert_u32_vec(code);
|
|
||||||
let compute_shader_module = device
|
|
||||||
.create_shader_module(&vk::ShaderModuleCreateInfo::builder().code(&code_u32), None)?;
|
|
||||||
let entry_name = CString::new("main").unwrap();
|
|
||||||
let pipeline_layout = device.create_pipeline_layout(
|
|
||||||
&vk::PipelineLayoutCreateInfo::builder().set_layouts(&descriptor_set_layouts),
|
|
||||||
None,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let pipeline = device
|
|
||||||
.create_compute_pipelines(
|
|
||||||
vk::PipelineCache::null(),
|
|
||||||
&[vk::ComputePipelineCreateInfo::builder()
|
|
||||||
.stage(
|
|
||||||
vk::PipelineShaderStageCreateInfo::builder()
|
|
||||||
.stage(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.module(compute_shader_module)
|
|
||||||
.name(&entry_name)
|
|
||||||
.build(),
|
|
||||||
)
|
|
||||||
.layout(pipeline_layout)
|
|
||||||
.build()],
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.map_err(|(_pipeline, err)| err)?[0];
|
|
||||||
Ok(Pipeline {
|
|
||||||
pipeline,
|
|
||||||
pipeline_layout,
|
|
||||||
descriptor_set_layout,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn create_descriptor_set(
|
unsafe fn descriptor_set_builder(&self) -> DescriptorSetBuilder {
|
||||||
&self,
|
DescriptorSetBuilder {
|
||||||
pipeline: &Pipeline,
|
buffers: Vec::new(),
|
||||||
bufs: &[&Buffer],
|
images: Vec::new(),
|
||||||
images: &[&Image],
|
textures: Vec::new(),
|
||||||
) -> Result<DescriptorSet, Error> {
|
sampler: vk::Sampler::null(),
|
||||||
let device = &self.device.device;
|
|
||||||
let mut descriptor_pool_sizes = Vec::new();
|
|
||||||
if !bufs.is_empty() {
|
|
||||||
descriptor_pool_sizes.push(
|
|
||||||
vk::DescriptorPoolSize::builder()
|
|
||||||
.ty(vk::DescriptorType::STORAGE_BUFFER)
|
|
||||||
.descriptor_count(bufs.len() as u32)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
if !images.is_empty() {
|
|
||||||
descriptor_pool_sizes.push(
|
|
||||||
vk::DescriptorPoolSize::builder()
|
|
||||||
.ty(vk::DescriptorType::STORAGE_IMAGE)
|
|
||||||
.descriptor_count(images.len() as u32)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let descriptor_pool = device.create_descriptor_pool(
|
|
||||||
&vk::DescriptorPoolCreateInfo::builder()
|
|
||||||
.pool_sizes(&descriptor_pool_sizes)
|
|
||||||
.max_sets(1),
|
|
||||||
None,
|
|
||||||
)?;
|
|
||||||
let descriptor_set_layouts = [pipeline.descriptor_set_layout];
|
|
||||||
let descriptor_sets = device
|
|
||||||
.allocate_descriptor_sets(
|
|
||||||
&vk::DescriptorSetAllocateInfo::builder()
|
|
||||||
.descriptor_pool(descriptor_pool)
|
|
||||||
.set_layouts(&descriptor_set_layouts),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
for (i, buf) in bufs.iter().enumerate() {
|
|
||||||
let buf_info = vk::DescriptorBufferInfo::builder()
|
|
||||||
.buffer(buf.buffer)
|
|
||||||
.offset(0)
|
|
||||||
.range(vk::WHOLE_SIZE)
|
|
||||||
.build();
|
|
||||||
device.update_descriptor_sets(
|
|
||||||
&[vk::WriteDescriptorSet::builder()
|
|
||||||
.dst_set(descriptor_sets[0])
|
|
||||||
.dst_binding(i as u32)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
|
|
||||||
.buffer_info(&[buf_info])
|
|
||||||
.build()],
|
|
||||||
&[],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
for (i, image) in images.iter().enumerate() {
|
|
||||||
let binding = i + bufs.len();
|
|
||||||
let image_info = vk::DescriptorImageInfo::builder()
|
|
||||||
.sampler(vk::Sampler::null())
|
|
||||||
.image_view(image.image_view)
|
|
||||||
.image_layout(vk::ImageLayout::GENERAL)
|
|
||||||
.build();
|
|
||||||
device.update_descriptor_sets(
|
|
||||||
&[vk::WriteDescriptorSet::builder()
|
|
||||||
.dst_set(descriptor_sets[0])
|
|
||||||
.dst_binding(binding as u32)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
|
||||||
.image_info(&[image_info])
|
|
||||||
.build()],
|
|
||||||
&[],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Ok(DescriptorSet {
|
|
||||||
descriptor_set: descriptor_sets[0],
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_cmd_buf(&self) -> Result<CmdBuf, Error> {
|
fn create_cmd_buf(&self) -> Result<CmdBuf, Error> {
|
||||||
|
@ -797,6 +750,32 @@ impl crate::Device for VkDevice {
|
||||||
device.unmap_memory(buffer.buffer_memory);
|
device.unmap_memory(buffer.buffer_memory);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error> {
|
||||||
|
let device = &self.device.device;
|
||||||
|
let filter = match params {
|
||||||
|
SamplerParams::Linear => vk::Filter::LINEAR,
|
||||||
|
SamplerParams::Nearest => vk::Filter::NEAREST,
|
||||||
|
};
|
||||||
|
let sampler = device.create_sampler(
|
||||||
|
&vk::SamplerCreateInfo::builder()
|
||||||
|
.mag_filter(filter)
|
||||||
|
.min_filter(filter)
|
||||||
|
.mipmap_mode(vk::SamplerMipmapMode::LINEAR)
|
||||||
|
.address_mode_u(vk::SamplerAddressMode::CLAMP_TO_BORDER)
|
||||||
|
.address_mode_v(vk::SamplerAddressMode::CLAMP_TO_BORDER)
|
||||||
|
.address_mode_w(vk::SamplerAddressMode::CLAMP_TO_BORDER)
|
||||||
|
.mip_lod_bias(0.0)
|
||||||
|
.compare_op(vk::CompareOp::NEVER)
|
||||||
|
.min_lod(0.0)
|
||||||
|
.max_lod(0.0)
|
||||||
|
.border_color(vk::BorderColor::FLOAT_TRANSPARENT_BLACK)
|
||||||
|
.max_anisotropy(1.0)
|
||||||
|
.anisotropy_enable(false),
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
Ok(sampler)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl crate::CmdBuf<VkDevice> for CmdBuf {
|
impl crate::CmdBuf<VkDevice> for CmdBuf {
|
||||||
|
@ -902,9 +881,10 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn clear_buffer(&self, buffer: &Buffer) {
|
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||||
let device = &self.device.device;
|
let device = &self.device.device;
|
||||||
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
|
let size = size.unwrap_or(vk::WHOLE_SIZE);
|
||||||
|
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||||
|
@ -1032,6 +1012,234 @@ impl crate::MemFlags for MemFlags {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
|
||||||
|
fn add_buffers(&mut self, n_buffers: u32) {
|
||||||
|
let start = self.bindings.len() as u32;
|
||||||
|
for i in 0..n_buffers {
|
||||||
|
self.bindings.push(
|
||||||
|
vk::DescriptorSetLayoutBinding::builder()
|
||||||
|
.binding(start + i)
|
||||||
|
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
|
||||||
|
.descriptor_count(1)
|
||||||
|
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
self.binding_flags
|
||||||
|
.push(vk::DescriptorBindingFlags::default());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_images(&mut self, n_images: u32) {
|
||||||
|
let start = self.bindings.len() as u32;
|
||||||
|
for i in 0..n_images {
|
||||||
|
self.bindings.push(
|
||||||
|
vk::DescriptorSetLayoutBinding::builder()
|
||||||
|
.binding(start + i)
|
||||||
|
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
||||||
|
.descriptor_count(1)
|
||||||
|
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
self.binding_flags
|
||||||
|
.push(vk::DescriptorBindingFlags::default());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_textures(&mut self, max_textures: u32) {
|
||||||
|
let start = self.bindings.len() as u32;
|
||||||
|
self.bindings.push(
|
||||||
|
vk::DescriptorSetLayoutBinding::builder()
|
||||||
|
.binding(start)
|
||||||
|
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
||||||
|
.descriptor_count(max_textures)
|
||||||
|
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
let flags = if self.has_descriptor_indexing {
|
||||||
|
vk::DescriptorBindingFlags::VARIABLE_DESCRIPTOR_COUNT
|
||||||
|
} else {
|
||||||
|
Default::default()
|
||||||
|
};
|
||||||
|
self.binding_flags.push(flags);
|
||||||
|
self.max_textures += max_textures;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn create_compute_pipeline(
|
||||||
|
self,
|
||||||
|
device: &VkDevice,
|
||||||
|
code: &[u8],
|
||||||
|
) -> Result<Pipeline, Error> {
|
||||||
|
let device = &device.device.device;
|
||||||
|
let descriptor_set_layout = device.create_descriptor_set_layout(
|
||||||
|
&vk::DescriptorSetLayoutCreateInfo::builder()
|
||||||
|
.bindings(&self.bindings)
|
||||||
|
// It might be a slight optimization not to push this if max_textures = 0
|
||||||
|
.push_next(
|
||||||
|
&mut vk::DescriptorSetLayoutBindingFlagsCreateInfo::builder()
|
||||||
|
.binding_flags(&self.binding_flags)
|
||||||
|
.build(),
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
let descriptor_set_layouts = [descriptor_set_layout];
|
||||||
|
|
||||||
|
// Create compute pipeline.
|
||||||
|
let code_u32 = convert_u32_vec(code);
|
||||||
|
let compute_shader_module = device
|
||||||
|
.create_shader_module(&vk::ShaderModuleCreateInfo::builder().code(&code_u32), None)?;
|
||||||
|
let entry_name = CString::new("main").unwrap();
|
||||||
|
let pipeline_layout = device.create_pipeline_layout(
|
||||||
|
&vk::PipelineLayoutCreateInfo::builder().set_layouts(&descriptor_set_layouts),
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let pipeline = device
|
||||||
|
.create_compute_pipelines(
|
||||||
|
vk::PipelineCache::null(),
|
||||||
|
&[vk::ComputePipelineCreateInfo::builder()
|
||||||
|
.stage(
|
||||||
|
vk::PipelineShaderStageCreateInfo::builder()
|
||||||
|
.stage(vk::ShaderStageFlags::COMPUTE)
|
||||||
|
.module(compute_shader_module)
|
||||||
|
.name(&entry_name)
|
||||||
|
.build(),
|
||||||
|
)
|
||||||
|
.layout(pipeline_layout)
|
||||||
|
.build()],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.map_err(|(_pipeline, err)| err)?[0];
|
||||||
|
Ok(Pipeline {
|
||||||
|
pipeline,
|
||||||
|
pipeline_layout,
|
||||||
|
descriptor_set_layout,
|
||||||
|
max_textures: self.max_textures,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
|
||||||
|
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||||
|
self.buffers.extend(buffers.iter().map(|b| b.buffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_images(&mut self, images: &[&Image]) {
|
||||||
|
self.images.extend(images.iter().map(|i| i.image_view));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_textures(&mut self, images: &[&Image]) {
|
||||||
|
self.textures.extend(images.iter().map(|i| i.image_view));
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn build(self, device: &VkDevice, pipeline: &Pipeline) -> Result<DescriptorSet, Error> {
|
||||||
|
let device = &device.device.device;
|
||||||
|
let mut descriptor_pool_sizes = Vec::new();
|
||||||
|
if !self.buffers.is_empty() {
|
||||||
|
descriptor_pool_sizes.push(
|
||||||
|
vk::DescriptorPoolSize::builder()
|
||||||
|
.ty(vk::DescriptorType::STORAGE_BUFFER)
|
||||||
|
.descriptor_count(self.buffers.len() as u32)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if !self.images.is_empty() {
|
||||||
|
descriptor_pool_sizes.push(
|
||||||
|
vk::DescriptorPoolSize::builder()
|
||||||
|
.ty(vk::DescriptorType::STORAGE_IMAGE)
|
||||||
|
.descriptor_count(self.images.len() as u32)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if pipeline.max_textures > 0 {
|
||||||
|
descriptor_pool_sizes.push(
|
||||||
|
vk::DescriptorPoolSize::builder()
|
||||||
|
.ty(vk::DescriptorType::STORAGE_IMAGE)
|
||||||
|
.descriptor_count(pipeline.max_textures)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let descriptor_pool = device.create_descriptor_pool(
|
||||||
|
&vk::DescriptorPoolCreateInfo::builder()
|
||||||
|
.pool_sizes(&descriptor_pool_sizes)
|
||||||
|
.max_sets(1),
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
let descriptor_set_layouts = [pipeline.descriptor_set_layout];
|
||||||
|
|
||||||
|
let counts = &[pipeline.max_textures];
|
||||||
|
let variable_info = vk::DescriptorSetVariableDescriptorCountAllocateInfo::builder()
|
||||||
|
.descriptor_counts(counts);
|
||||||
|
let descriptor_sets = device
|
||||||
|
.allocate_descriptor_sets(
|
||||||
|
&vk::DescriptorSetAllocateInfo::builder()
|
||||||
|
.descriptor_pool(descriptor_pool)
|
||||||
|
.set_layouts(&descriptor_set_layouts)
|
||||||
|
.push_next(&mut variable_info.build()),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut binding = 0;
|
||||||
|
// Maybe one call to update_descriptor_sets with an array of descriptor_writes?
|
||||||
|
for buf in &self.buffers {
|
||||||
|
device.update_descriptor_sets(
|
||||||
|
&[vk::WriteDescriptorSet::builder()
|
||||||
|
.dst_set(descriptor_sets[0])
|
||||||
|
.dst_binding(binding)
|
||||||
|
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
|
||||||
|
.buffer_info(&[vk::DescriptorBufferInfo::builder()
|
||||||
|
.buffer(*buf)
|
||||||
|
.offset(0)
|
||||||
|
.range(vk::WHOLE_SIZE)
|
||||||
|
.build()])
|
||||||
|
.build()],
|
||||||
|
&[],
|
||||||
|
);
|
||||||
|
binding += 1;
|
||||||
|
}
|
||||||
|
for image in &self.images {
|
||||||
|
device.update_descriptor_sets(
|
||||||
|
&[vk::WriteDescriptorSet::builder()
|
||||||
|
.dst_set(descriptor_sets[0])
|
||||||
|
.dst_binding(binding)
|
||||||
|
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
||||||
|
.image_info(&[vk::DescriptorImageInfo::builder()
|
||||||
|
.sampler(vk::Sampler::null())
|
||||||
|
.image_view(*image)
|
||||||
|
.image_layout(vk::ImageLayout::GENERAL)
|
||||||
|
.build()])
|
||||||
|
.build()],
|
||||||
|
&[],
|
||||||
|
);
|
||||||
|
binding += 1;
|
||||||
|
}
|
||||||
|
if !self.textures.is_empty() {
|
||||||
|
let infos = self
|
||||||
|
.textures
|
||||||
|
.iter()
|
||||||
|
.map(|texture| {
|
||||||
|
vk::DescriptorImageInfo::builder()
|
||||||
|
.sampler(self.sampler)
|
||||||
|
.image_view(*texture)
|
||||||
|
.image_layout(vk::ImageLayout::GENERAL)
|
||||||
|
.build()
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
device.update_descriptor_sets(
|
||||||
|
&[vk::WriteDescriptorSet::builder()
|
||||||
|
.dst_set(descriptor_sets[0])
|
||||||
|
.dst_binding(binding)
|
||||||
|
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
||||||
|
.image_info(&infos)
|
||||||
|
.build()],
|
||||||
|
&[],
|
||||||
|
);
|
||||||
|
//binding += 1;
|
||||||
|
}
|
||||||
|
Ok(DescriptorSet {
|
||||||
|
descriptor_set: descriptor_sets[0],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl VkSwapchain {
|
impl VkSwapchain {
|
||||||
pub unsafe fn next(&mut self) -> Result<(usize, vk::Semaphore), Error> {
|
pub unsafe fn next(&mut self) -> Result<(usize, vk::Semaphore), Error> {
|
||||||
let acquisition_semaphore = self.acquisition_semaphores[self.acquisition_idx];
|
let acquisition_semaphore = self.acquisition_semaphores[self.acquisition_idx];
|
||||||
|
@ -1075,6 +1283,64 @@ impl VkSwapchain {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Extensions {
|
||||||
|
fn new(exist_exts: Vec<vk::ExtensionProperties>) -> Extensions {
|
||||||
|
Extensions {
|
||||||
|
exist_exts,
|
||||||
|
exts: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_add(&mut self, ext: &'static CStr) -> bool {
|
||||||
|
unsafe {
|
||||||
|
if self
|
||||||
|
.exist_exts
|
||||||
|
.iter()
|
||||||
|
.find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext)
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
self.exts.push(ext.as_ptr());
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_ptrs(&self) -> &[*const c_char] {
|
||||||
|
&self.exts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Layers {
|
||||||
|
fn new(exist_layers: Vec<vk::LayerProperties>) -> Layers {
|
||||||
|
Layers {
|
||||||
|
exist_layers,
|
||||||
|
layers: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_add(&mut self, ext: &'static CStr) -> bool {
|
||||||
|
unsafe {
|
||||||
|
if self
|
||||||
|
.exist_layers
|
||||||
|
.iter()
|
||||||
|
.find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == ext)
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
self.layers.push(ext.as_ptr());
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_ptrs(&self) -> &[*const c_char] {
|
||||||
|
&self.layers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsafe fn choose_compute_device(
|
unsafe fn choose_compute_device(
|
||||||
instance: &Instance,
|
instance: &Instance,
|
||||||
devices: &[vk::PhysicalDevice],
|
devices: &[vk::PhysicalDevice],
|
||||||
|
@ -1136,5 +1402,6 @@ fn map_image_layout(layout: ImageLayout) -> vk::ImageLayout {
|
||||||
ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
|
ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
|
||||||
ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL,
|
ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL,
|
||||||
ImageLayout::General => vk::ImageLayout::GENERAL,
|
ImageLayout::General => vk::ImageLayout::GENERAL,
|
||||||
|
ImageLayout::ShaderRead => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,34 +3,33 @@ use piet_gpu_derive::piet_gpu;
|
||||||
piet_gpu! {
|
piet_gpu! {
|
||||||
#[gpu_write]
|
#[gpu_write]
|
||||||
mod annotated {
|
mod annotated {
|
||||||
struct AnnoFill {
|
struct AnnoImage {
|
||||||
// The bbox is always first, as we take advantage of common
|
|
||||||
// layout when binning.
|
|
||||||
bbox: [f32; 4],
|
bbox: [f32; 4],
|
||||||
rgba_color: u32,
|
linewidth: f32,
|
||||||
|
index: u32,
|
||||||
|
offset: [i16; 2],
|
||||||
}
|
}
|
||||||
struct AnnoFillMask {
|
struct AnnoColor {
|
||||||
bbox: [f32; 4],
|
bbox: [f32; 4],
|
||||||
mask: f32,
|
// For stroked fills.
|
||||||
}
|
|
||||||
struct AnnoStroke {
|
|
||||||
bbox: [f32; 4],
|
|
||||||
rgba_color: u32,
|
|
||||||
// For the nonuniform scale case, this needs to be a 2x2 matrix.
|
// For the nonuniform scale case, this needs to be a 2x2 matrix.
|
||||||
// That's expected to be uncommon, so we could special-case it.
|
// That's expected to be uncommon, so we could special-case it.
|
||||||
linewidth: f32,
|
linewidth: f32,
|
||||||
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
struct AnnoClip {
|
struct AnnoBeginClip {
|
||||||
|
bbox: [f32; 4],
|
||||||
|
linewidth: f32,
|
||||||
|
}
|
||||||
|
struct AnnoEndClip {
|
||||||
bbox: [f32; 4],
|
bbox: [f32; 4],
|
||||||
}
|
}
|
||||||
enum Annotated {
|
enum Annotated {
|
||||||
Nop,
|
Nop,
|
||||||
Stroke(AnnoStroke),
|
Color(TagFlags, AnnoColor),
|
||||||
Fill(AnnoFill),
|
Image(TagFlags, AnnoImage),
|
||||||
FillMask(AnnoFillMask),
|
BeginClip(TagFlags, AnnoBeginClip),
|
||||||
FillMaskInv(AnnoFillMask),
|
EndClip(AnnoEndClip),
|
||||||
BeginClip(AnnoClip),
|
|
||||||
EndClip(AnnoClip),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,12 +8,5 @@ piet_gpu! {
|
||||||
struct BinInstance {
|
struct BinInstance {
|
||||||
element_ix: u32,
|
element_ix: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct BinChunk {
|
|
||||||
// First chunk can have n = 0, subsequent ones not.
|
|
||||||
n: u32,
|
|
||||||
next: Ref<BinChunk>,
|
|
||||||
// Instances follow
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,65 +3,20 @@ use piet_gpu_derive::piet_gpu;
|
||||||
piet_gpu! {
|
piet_gpu! {
|
||||||
#[gpu_write]
|
#[gpu_write]
|
||||||
mod pathseg {
|
mod pathseg {
|
||||||
struct PathFillLine {
|
|
||||||
p0: [f32; 2],
|
|
||||||
p1: [f32; 2],
|
|
||||||
path_ix: u32,
|
|
||||||
// A note: the layout of this struct is shared with
|
|
||||||
// PathStrokeLine. In that case, we actually write
|
|
||||||
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
|
||||||
}
|
|
||||||
struct PathStrokeLine {
|
|
||||||
p0: [f32; 2],
|
|
||||||
p1: [f32; 2],
|
|
||||||
path_ix: u32,
|
|
||||||
// halfwidth in both x and y for binning
|
|
||||||
stroke: [f32; 2],
|
|
||||||
}
|
|
||||||
struct PathFillCubic {
|
|
||||||
p0: [f32; 2],
|
|
||||||
p1: [f32; 2],
|
|
||||||
p2: [f32; 2],
|
|
||||||
p3: [f32; 2],
|
|
||||||
path_ix: u32,
|
|
||||||
// A note: the layout of this struct is shared with
|
|
||||||
// PathStrokeCubic. In that case, we actually write
|
|
||||||
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
|
||||||
}
|
|
||||||
struct PathStrokeCubic {
|
|
||||||
p0: [f32; 2],
|
|
||||||
p1: [f32; 2],
|
|
||||||
p2: [f32; 2],
|
|
||||||
p3: [f32; 2],
|
|
||||||
path_ix: u32,
|
|
||||||
// halfwidth in both x and y for binning
|
|
||||||
stroke: [f32; 2],
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
struct PathQuad {
|
|
||||||
p0: [f32; 2],
|
|
||||||
p1: [f32; 2],
|
|
||||||
p2: [f32; 2],
|
|
||||||
stroke: [f32; 2],
|
|
||||||
}
|
|
||||||
struct PathCubic {
|
struct PathCubic {
|
||||||
p0: [f32; 2],
|
p0: [f32; 2],
|
||||||
p1: [f32; 2],
|
p1: [f32; 2],
|
||||||
p2: [f32; 2],
|
p2: [f32; 2],
|
||||||
p3: [f32; 2],
|
p3: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
|
// trans_ix is the transform index. It is 1-based, 0 means no transformation.
|
||||||
|
trans_ix: u32,
|
||||||
|
// Halfwidth in both x and y for binning. For strokes only.
|
||||||
stroke: [f32; 2],
|
stroke: [f32; 2],
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
enum PathSeg {
|
enum PathSeg {
|
||||||
Nop,
|
Nop,
|
||||||
FillLine(PathFillLine),
|
Cubic(TagFlags, PathCubic),
|
||||||
StrokeLine(PathStrokeLine),
|
|
||||||
FillCubic(PathFillCubic),
|
|
||||||
StrokeCubic(PathStrokeCubic),
|
|
||||||
/*
|
|
||||||
Quad(AnnoQuadSeg),
|
|
||||||
Cubic(AnnoCubicSeg),
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,88 +3,41 @@ use piet_gpu_derive::piet_gpu;
|
||||||
piet_gpu! {
|
piet_gpu! {
|
||||||
#[gpu_write]
|
#[gpu_write]
|
||||||
mod ptcl {
|
mod ptcl {
|
||||||
struct CmdCircle {
|
|
||||||
center: [f32; 2],
|
|
||||||
radius: f32,
|
|
||||||
rgba_color: u32,
|
|
||||||
}
|
|
||||||
struct CmdLine {
|
|
||||||
start: [f32; 2],
|
|
||||||
end: [f32; 2],
|
|
||||||
}
|
|
||||||
struct CmdStroke {
|
struct CmdStroke {
|
||||||
// This is really a Ref<Tile>, but we don't have cross-module
|
// This is really a Ref<Tile>, but we don't have cross-module
|
||||||
// references.
|
// references.
|
||||||
tile_ref: u32,
|
tile_ref: u32,
|
||||||
half_width: f32,
|
half_width: f32,
|
||||||
rgba_color: u32,
|
|
||||||
}
|
}
|
||||||
struct CmdFill {
|
struct CmdFill {
|
||||||
// As above, really Ref<Tile>
|
// As above, really Ref<Tile>
|
||||||
tile_ref: u32,
|
tile_ref: u32,
|
||||||
backdrop: i32,
|
backdrop: i32,
|
||||||
|
}
|
||||||
|
struct CmdColor {
|
||||||
rgba_color: u32,
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
struct CmdFillMask {
|
struct CmdImage {
|
||||||
tile_ref: u32,
|
index: u32,
|
||||||
backdrop: i32,
|
offset: [i16; 2],
|
||||||
mask: f32,
|
|
||||||
}
|
}
|
||||||
struct CmdBeginClip {
|
struct CmdAlpha {
|
||||||
tile_ref: u32,
|
|
||||||
backdrop: i32,
|
|
||||||
}
|
|
||||||
// This is mostly here for expedience and can always be optimized
|
|
||||||
// out for pure clips, but will be useful for blend groups.
|
|
||||||
struct CmdBeginSolidClip {
|
|
||||||
alpha: f32,
|
alpha: f32,
|
||||||
}
|
}
|
||||||
struct CmdEndClip {
|
|
||||||
// This will be 1.0 for clips, but we can imagine blend groups.
|
|
||||||
alpha: f32,
|
|
||||||
}
|
|
||||||
struct CmdSolid {
|
|
||||||
rgba_color: u32,
|
|
||||||
}
|
|
||||||
struct CmdSolidMask {
|
|
||||||
mask: f32,
|
|
||||||
}
|
|
||||||
struct CmdJump {
|
struct CmdJump {
|
||||||
new_ref: u32,
|
new_ref: u32,
|
||||||
}
|
}
|
||||||
enum Cmd {
|
enum Cmd {
|
||||||
End,
|
End,
|
||||||
Circle(CmdCircle),
|
|
||||||
Line(CmdLine),
|
|
||||||
Fill(CmdFill),
|
Fill(CmdFill),
|
||||||
FillMask(CmdFillMask),
|
|
||||||
FillMaskInv(CmdFillMask),
|
|
||||||
BeginClip(CmdBeginClip),
|
|
||||||
BeginSolidClip(CmdBeginSolidClip),
|
|
||||||
EndClip(CmdEndClip),
|
|
||||||
Stroke(CmdStroke),
|
Stroke(CmdStroke),
|
||||||
Solid(CmdSolid),
|
Solid,
|
||||||
SolidMask(CmdSolidMask),
|
Alpha(CmdAlpha),
|
||||||
|
Color(CmdColor),
|
||||||
|
Image(CmdImage),
|
||||||
|
BeginClip,
|
||||||
|
EndClip,
|
||||||
Jump(CmdJump),
|
Jump(CmdJump),
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: strongly consider using f16. If so, these would be
|
|
||||||
// relative to the tile. We're doing f32 for now to minimize
|
|
||||||
// divergence from piet-metal originals.
|
|
||||||
struct Segment {
|
|
||||||
start: [f32; 2],
|
|
||||||
end: [f32; 2],
|
|
||||||
|
|
||||||
// This is used for fills only, but we're including it in
|
|
||||||
// the general structure for simplicity.
|
|
||||||
y_edge: f32,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SegChunk {
|
|
||||||
n: u32,
|
|
||||||
next: Ref<SegChunk>,
|
|
||||||
// Actually a reference to a variable-sized slice.
|
|
||||||
segs: Ref<Segment>,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use piet_gpu_derive::piet_gpu;
|
use piet_gpu_derive::piet_gpu;
|
||||||
|
|
||||||
pub use self::scene::{
|
pub use self::scene::{
|
||||||
Clip, CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform,
|
Clip, CubicSeg, Element, FillColor, LineSeg, QuadSeg, SetFillMode, SetLineWidth, Transform,
|
||||||
};
|
};
|
||||||
|
|
||||||
piet_gpu! {
|
piet_gpu! {
|
||||||
|
@ -22,14 +22,12 @@ piet_gpu! {
|
||||||
p2: [f32; 2],
|
p2: [f32; 2],
|
||||||
p3: [f32; 2],
|
p3: [f32; 2],
|
||||||
}
|
}
|
||||||
struct Fill {
|
struct FillColor {
|
||||||
rgba_color: u32,
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
struct FillMask {
|
struct FillImage {
|
||||||
mask: f32,
|
index: u32,
|
||||||
}
|
offset: [i16; 2],
|
||||||
struct Stroke {
|
|
||||||
rgba_color: u32,
|
|
||||||
}
|
}
|
||||||
struct SetLineWidth {
|
struct SetLineWidth {
|
||||||
width: f32,
|
width: f32,
|
||||||
|
@ -42,27 +40,23 @@ piet_gpu! {
|
||||||
bbox: [f32; 4],
|
bbox: [f32; 4],
|
||||||
// TODO: add alpha?
|
// TODO: add alpha?
|
||||||
}
|
}
|
||||||
|
struct SetFillMode {
|
||||||
|
fill_mode: u32,
|
||||||
|
}
|
||||||
enum Element {
|
enum Element {
|
||||||
Nop,
|
Nop,
|
||||||
// Another approach to encoding would be to use a single
|
|
||||||
// variant but have a bool for fill/stroke. This could be
|
|
||||||
// packed into the tag, so the on-the-wire representation
|
|
||||||
// would be very similar to what's here.
|
|
||||||
StrokeLine(LineSeg),
|
|
||||||
FillLine(LineSeg),
|
|
||||||
|
|
||||||
StrokeQuad(QuadSeg),
|
Line(LineSeg),
|
||||||
FillQuad(QuadSeg),
|
Quad(QuadSeg),
|
||||||
StrokeCubic(CubicSeg),
|
Cubic(CubicSeg),
|
||||||
FillCubic(CubicSeg),
|
|
||||||
Stroke(Stroke),
|
FillColor(FillColor),
|
||||||
Fill(Fill),
|
|
||||||
SetLineWidth(SetLineWidth),
|
SetLineWidth(SetLineWidth),
|
||||||
Transform(Transform),
|
Transform(Transform),
|
||||||
FillMask(FillMask),
|
|
||||||
FillMaskInv(FillMask),
|
|
||||||
BeginClip(Clip),
|
BeginClip(Clip),
|
||||||
EndClip(Clip),
|
EndClip(Clip),
|
||||||
|
FillImage(FillImage),
|
||||||
|
SetFillMode(SetFillMode),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@ piet_gpu! {
|
||||||
flags: u32,
|
flags: u32,
|
||||||
path_count: u32,
|
path_count: u32,
|
||||||
pathseg_count: u32,
|
pathseg_count: u32,
|
||||||
|
trans_count: u32,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,10 +13,14 @@ piet_gpu! {
|
||||||
}
|
}
|
||||||
// Segments within a tile are represented as a linked list.
|
// Segments within a tile are represented as a linked list.
|
||||||
struct TileSeg {
|
struct TileSeg {
|
||||||
start: [f32; 2],
|
origin: [f32; 2],
|
||||||
end: [f32; 2],
|
vector: [f32; 2],
|
||||||
y_edge: f32,
|
y_edge: f32,
|
||||||
next: Ref<TileSeg>,
|
next: Ref<TileSeg>,
|
||||||
}
|
}
|
||||||
|
struct TransformSeg {
|
||||||
|
mat: [f32; 4],
|
||||||
|
translate: [f32; 2],
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,11 @@ path = "bin/cli.rs"
|
||||||
name = "winit"
|
name = "winit"
|
||||||
path = "bin/winit.rs"
|
path = "bin/winit.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "android"
|
||||||
|
path = "bin/android.rs"
|
||||||
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies.piet-gpu-hal]
|
[dependencies.piet-gpu-hal]
|
||||||
path = "../piet-gpu-hal"
|
path = "../piet-gpu-hal"
|
||||||
|
|
||||||
|
@ -27,3 +32,9 @@ rand = "0.7.3"
|
||||||
roxmltree = "0.13"
|
roxmltree = "0.13"
|
||||||
winit = "0.23"
|
winit = "0.23"
|
||||||
clap = "2.33"
|
clap = "2.33"
|
||||||
|
|
||||||
|
[target.'cfg(target_os = "android")'.dependencies]
|
||||||
|
ndk = "0.3"
|
||||||
|
ndk-sys = "0.2.0"
|
||||||
|
ndk-glue = "0.3"
|
||||||
|
raw-window-handle = "0.3"
|
||||||
|
|
176
piet-gpu/bin/android.rs
Normal file
176
piet-gpu/bin/android.rs
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
//! Android example
|
||||||
|
//!
|
||||||
|
//! Run using `cargo apk run --example android`
|
||||||
|
//!
|
||||||
|
//! Requires the [cargo-apk] tool.
|
||||||
|
//! [cargo-apk]: https://crates.io/crates/cargo-apk
|
||||||
|
|
||||||
|
use raw_window_handle::android::AndroidHandle;
|
||||||
|
use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
||||||
|
|
||||||
|
use ndk::native_window::NativeWindow;
|
||||||
|
use ndk_glue::Event;
|
||||||
|
|
||||||
|
use piet_gpu_hal::hub;
|
||||||
|
use piet_gpu_hal::vulkan::{QueryPool, VkInstance, VkSurface, VkSwapchain};
|
||||||
|
use piet_gpu_hal::{CmdBuf, Error, ImageLayout};
|
||||||
|
|
||||||
|
use piet_gpu::{render_scene, PietGpuRenderContext, Renderer};
|
||||||
|
|
||||||
|
#[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))]
|
||||||
|
fn main() {
|
||||||
|
my_main().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
struct MyHandle {
|
||||||
|
handle: AndroidHandle,
|
||||||
|
}
|
||||||
|
|
||||||
|
// State required to render and present the contents
|
||||||
|
struct GfxState {
|
||||||
|
session: hub::Session,
|
||||||
|
renderer: Renderer,
|
||||||
|
swapchain: VkSwapchain,
|
||||||
|
current_frame: usize,
|
||||||
|
last_frame_idx: usize,
|
||||||
|
submitted: Option<hub::SubmittedCmdBuf>,
|
||||||
|
query_pools: Vec<QueryPool>,
|
||||||
|
present_semaphores: Vec<hub::Semaphore>,
|
||||||
|
}
|
||||||
|
|
||||||
|
const WIDTH: usize = 1080;
|
||||||
|
const HEIGHT: usize = 2280;
|
||||||
|
const NUM_FRAMES: usize = 2;
|
||||||
|
|
||||||
|
fn my_main() -> Result<(), Error> {
|
||||||
|
let mut gfx_state = None;
|
||||||
|
loop {
|
||||||
|
for event in ndk_glue::poll_events() {
|
||||||
|
println!("got event {:?}", event);
|
||||||
|
match event {
|
||||||
|
Event::WindowCreated => {
|
||||||
|
let window = ndk_glue::native_window();
|
||||||
|
if let Some(window) = &*window {
|
||||||
|
let handle = get_handle(window);
|
||||||
|
let (instance, surface) = VkInstance::new(Some(&handle))?;
|
||||||
|
gfx_state = Some(GfxState::new(&instance, surface.as_ref())?);
|
||||||
|
} else {
|
||||||
|
println!("native window is sadly none");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::WindowRedrawNeeded => {
|
||||||
|
if let Some(gfx_state) = gfx_state.as_mut() {
|
||||||
|
for _ in 0..10 {
|
||||||
|
gfx_state.redraw();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_handle(window: &NativeWindow) -> MyHandle {
|
||||||
|
println!(
|
||||||
|
"window = {:?}, {}x{}",
|
||||||
|
window.ptr(),
|
||||||
|
window.width(),
|
||||||
|
window.height()
|
||||||
|
);
|
||||||
|
let mut handle = AndroidHandle::empty();
|
||||||
|
handle.a_native_window = window.ptr().as_ptr() as *mut std::ffi::c_void;
|
||||||
|
MyHandle { handle }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl HasRawWindowHandle for MyHandle {
|
||||||
|
fn raw_window_handle(&self) -> RawWindowHandle {
|
||||||
|
RawWindowHandle::Android(self.handle)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GfxState {
|
||||||
|
fn new(instance: &VkInstance, surface: Option<&VkSurface>) -> Result<GfxState, Error> {
|
||||||
|
unsafe {
|
||||||
|
let device = instance.device(surface)?;
|
||||||
|
let mut swapchain =
|
||||||
|
instance.swapchain(WIDTH / 2, HEIGHT / 2, &device, surface.unwrap())?;
|
||||||
|
let session = hub::Session::new(device);
|
||||||
|
let mut current_frame = 0;
|
||||||
|
let present_semaphores = (0..NUM_FRAMES)
|
||||||
|
.map(|_| session.create_semaphore())
|
||||||
|
.collect::<Result<Vec<_>, Error>>()?;
|
||||||
|
let query_pools = (0..NUM_FRAMES)
|
||||||
|
.map(|_| session.create_query_pool(8))
|
||||||
|
.collect::<Result<Vec<_>, Error>>()?;
|
||||||
|
|
||||||
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
|
render_scene(&mut ctx);
|
||||||
|
let n_paths = ctx.path_count();
|
||||||
|
let n_pathseg = ctx.pathseg_count();
|
||||||
|
let n_trans = ctx.pathseg_count();
|
||||||
|
let scene = ctx.get_scene_buf();
|
||||||
|
|
||||||
|
let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?;
|
||||||
|
|
||||||
|
let submitted: Option<hub::SubmittedCmdBuf> = None;
|
||||||
|
let current_frame = 0;
|
||||||
|
let last_frame_idx = 0;
|
||||||
|
Ok(GfxState {
|
||||||
|
session,
|
||||||
|
renderer,
|
||||||
|
swapchain,
|
||||||
|
current_frame,
|
||||||
|
last_frame_idx,
|
||||||
|
submitted,
|
||||||
|
query_pools,
|
||||||
|
present_semaphores,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn redraw(&mut self) {
|
||||||
|
println!("redraw");
|
||||||
|
unsafe {
|
||||||
|
if let Some(submitted) = self.submitted.take() {
|
||||||
|
submitted.wait().unwrap();
|
||||||
|
|
||||||
|
let ts = self
|
||||||
|
.session
|
||||||
|
.fetch_query_pool(&self.query_pools[self.last_frame_idx])
|
||||||
|
.unwrap();
|
||||||
|
println!("render time: {:?}", ts);
|
||||||
|
}
|
||||||
|
let frame_idx = self.current_frame % NUM_FRAMES;
|
||||||
|
let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap();
|
||||||
|
let swap_image = self.swapchain.image(image_idx);
|
||||||
|
let query_pool = &self.query_pools[frame_idx];
|
||||||
|
let mut cmd_buf = self.session.cmd_buf().unwrap();
|
||||||
|
cmd_buf.begin();
|
||||||
|
self.renderer.record(&mut cmd_buf, &query_pool);
|
||||||
|
|
||||||
|
// Image -> Swapchain
|
||||||
|
cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst);
|
||||||
|
cmd_buf.blit_image(self.renderer.image_dev.vk_image(), &swap_image);
|
||||||
|
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
||||||
|
cmd_buf.finish();
|
||||||
|
|
||||||
|
self.submitted = Some(
|
||||||
|
self.session
|
||||||
|
.run_cmd_buf(
|
||||||
|
cmd_buf,
|
||||||
|
&[acquisition_semaphore],
|
||||||
|
&[self.present_semaphores[frame_idx]],
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
self.last_frame_idx = frame_idx;
|
||||||
|
|
||||||
|
self.swapchain
|
||||||
|
.present(image_idx, &[self.present_semaphores[frame_idx]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
self.current_frame += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -248,10 +248,11 @@ fn main() -> Result<(), Error> {
|
||||||
}
|
}
|
||||||
let n_paths = ctx.path_count();
|
let n_paths = ctx.path_count();
|
||||||
let n_pathseg = ctx.pathseg_count();
|
let n_pathseg = ctx.pathseg_count();
|
||||||
|
let n_trans = ctx.trans_count();
|
||||||
let scene = ctx.get_scene_buf();
|
let scene = ctx.get_scene_buf();
|
||||||
//dump_scene(&scene);
|
//dump_scene(&scene);
|
||||||
|
|
||||||
let renderer = Renderer::new(&session, scene, n_paths, n_pathseg)?;
|
let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?;
|
||||||
let image_buf =
|
let image_buf =
|
||||||
session.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
|
session.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,8 @@ fn main() -> Result<(), Error> {
|
||||||
let (instance, surface) = VkInstance::new(Some(&window))?;
|
let (instance, surface) = VkInstance::new(Some(&window))?;
|
||||||
unsafe {
|
unsafe {
|
||||||
let device = instance.device(surface.as_ref())?;
|
let device = instance.device(surface.as_ref())?;
|
||||||
let mut swapchain = instance.swapchain(&device, surface.as_ref().unwrap())?;
|
let mut swapchain =
|
||||||
|
instance.swapchain(WIDTH / 2, HEIGHT / 2, &device, surface.as_ref().unwrap())?;
|
||||||
let session = hub::Session::new(device);
|
let session = hub::Session::new(device);
|
||||||
|
|
||||||
let mut current_frame = 0;
|
let mut current_frame = 0;
|
||||||
|
@ -40,11 +41,13 @@ fn main() -> Result<(), Error> {
|
||||||
render_scene(&mut ctx);
|
render_scene(&mut ctx);
|
||||||
let n_paths = ctx.path_count();
|
let n_paths = ctx.path_count();
|
||||||
let n_pathseg = ctx.pathseg_count();
|
let n_pathseg = ctx.pathseg_count();
|
||||||
|
let n_trans = ctx.trans_count();
|
||||||
let scene = ctx.get_scene_buf();
|
let scene = ctx.get_scene_buf();
|
||||||
|
|
||||||
let renderer = Renderer::new(&session, scene, n_paths, n_pathseg)?;
|
let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?;
|
||||||
|
|
||||||
let mut submitted: Option<hub::SubmittedCmdBuf> = None;
|
let mut submitted: Option<hub::SubmittedCmdBuf> = None;
|
||||||
|
let mut last_frame_idx = 0;
|
||||||
|
|
||||||
event_loop.run(move |event, _, control_flow| {
|
event_loop.run(move |event, _, control_flow| {
|
||||||
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
|
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
|
||||||
|
@ -63,12 +66,16 @@ fn main() -> Result<(), Error> {
|
||||||
}
|
}
|
||||||
Event::RedrawRequested(window_id) if window_id == window.id() => {
|
Event::RedrawRequested(window_id) if window_id == window.id() => {
|
||||||
let frame_idx = current_frame % NUM_FRAMES;
|
let frame_idx = current_frame % NUM_FRAMES;
|
||||||
let query_pool = &query_pools[frame_idx];
|
|
||||||
|
|
||||||
|
// Note: this logic is a little strange. We have two sets of renderer
|
||||||
|
// resources, so we could have two frames in flight (submit two, wait on
|
||||||
|
// the first), but we actually just wait on the last submitted.
|
||||||
|
//
|
||||||
|
// Getting this right will take some thought.
|
||||||
if let Some(submitted) = submitted.take() {
|
if let Some(submitted) = submitted.take() {
|
||||||
submitted.wait().unwrap();
|
submitted.wait().unwrap();
|
||||||
|
|
||||||
let ts = session.fetch_query_pool(query_pool).unwrap();
|
let ts = session.fetch_query_pool(&query_pools[last_frame_idx]).unwrap();
|
||||||
window.set_title(&format!(
|
window.set_title(&format!(
|
||||||
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
|
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
|
||||||
ts[6] * 1e3,
|
ts[6] * 1e3,
|
||||||
|
@ -82,8 +89,10 @@ fn main() -> Result<(), Error> {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
|
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
|
||||||
let swap_image = swapchain.image(image_idx);
|
let swap_image = swapchain.image(image_idx);
|
||||||
|
let query_pool = &query_pools[frame_idx];
|
||||||
let mut cmd_buf = session.cmd_buf().unwrap();
|
let mut cmd_buf = session.cmd_buf().unwrap();
|
||||||
cmd_buf.begin();
|
cmd_buf.begin();
|
||||||
renderer.record(&mut cmd_buf, &query_pool);
|
renderer.record(&mut cmd_buf, &query_pool);
|
||||||
|
@ -105,6 +114,7 @@ fn main() -> Result<(), Error> {
|
||||||
&[present_semaphores[frame_idx]],
|
&[present_semaphores[frame_idx]],
|
||||||
)
|
)
|
||||||
.unwrap());
|
.unwrap());
|
||||||
|
last_frame_idx = frame_idx;
|
||||||
|
|
||||||
swapchain
|
swapchain
|
||||||
.present(image_idx, &[present_semaphores[frame_idx]])
|
.present(image_idx, &[present_semaphores[frame_idx]])
|
||||||
|
|
|
@ -1,18 +1,20 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct AnnoFillRef {
|
struct AnnoImageRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoFillMaskRef {
|
struct AnnoColorRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoStrokeRef {
|
struct AnnoBeginClipRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoClipRef {
|
struct AnnoEndClipRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -20,210 +22,204 @@ struct AnnotatedRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoFill {
|
struct AnnoImage {
|
||||||
vec4 bbox;
|
vec4 bbox;
|
||||||
|
float linewidth;
|
||||||
|
uint index;
|
||||||
|
ivec2 offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define AnnoImage_size 28
|
||||||
|
|
||||||
|
AnnoImageRef AnnoImage_index(AnnoImageRef ref, uint index) {
|
||||||
|
return AnnoImageRef(ref.offset + index * AnnoImage_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AnnoColor {
|
||||||
|
vec4 bbox;
|
||||||
|
float linewidth;
|
||||||
uint rgba_color;
|
uint rgba_color;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AnnoFill_size 20
|
#define AnnoColor_size 24
|
||||||
|
|
||||||
AnnoFillRef AnnoFill_index(AnnoFillRef ref, uint index) {
|
AnnoColorRef AnnoColor_index(AnnoColorRef ref, uint index) {
|
||||||
return AnnoFillRef(ref.offset + index * AnnoFill_size);
|
return AnnoColorRef(ref.offset + index * AnnoColor_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct AnnoFillMask {
|
struct AnnoBeginClip {
|
||||||
vec4 bbox;
|
vec4 bbox;
|
||||||
float mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define AnnoFillMask_size 20
|
|
||||||
|
|
||||||
AnnoFillMaskRef AnnoFillMask_index(AnnoFillMaskRef ref, uint index) {
|
|
||||||
return AnnoFillMaskRef(ref.offset + index * AnnoFillMask_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct AnnoStroke {
|
|
||||||
vec4 bbox;
|
|
||||||
uint rgba_color;
|
|
||||||
float linewidth;
|
float linewidth;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AnnoStroke_size 24
|
#define AnnoBeginClip_size 20
|
||||||
|
|
||||||
AnnoStrokeRef AnnoStroke_index(AnnoStrokeRef ref, uint index) {
|
AnnoBeginClipRef AnnoBeginClip_index(AnnoBeginClipRef ref, uint index) {
|
||||||
return AnnoStrokeRef(ref.offset + index * AnnoStroke_size);
|
return AnnoBeginClipRef(ref.offset + index * AnnoBeginClip_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct AnnoClip {
|
struct AnnoEndClip {
|
||||||
vec4 bbox;
|
vec4 bbox;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AnnoClip_size 16
|
#define AnnoEndClip_size 16
|
||||||
|
|
||||||
AnnoClipRef AnnoClip_index(AnnoClipRef ref, uint index) {
|
AnnoEndClipRef AnnoEndClip_index(AnnoEndClipRef ref, uint index) {
|
||||||
return AnnoClipRef(ref.offset + index * AnnoClip_size);
|
return AnnoEndClipRef(ref.offset + index * AnnoEndClip_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define Annotated_Nop 0
|
#define Annotated_Nop 0
|
||||||
#define Annotated_Stroke 1
|
#define Annotated_Color 1
|
||||||
#define Annotated_Fill 2
|
#define Annotated_Image 2
|
||||||
#define Annotated_FillMask 3
|
#define Annotated_BeginClip 3
|
||||||
#define Annotated_FillMaskInv 4
|
#define Annotated_EndClip 4
|
||||||
#define Annotated_BeginClip 5
|
#define Annotated_size 32
|
||||||
#define Annotated_EndClip 6
|
|
||||||
#define Annotated_size 28
|
|
||||||
|
|
||||||
AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
|
AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
|
||||||
return AnnotatedRef(ref.offset + index * Annotated_size);
|
return AnnotatedRef(ref.offset + index * Annotated_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoFill AnnoFill_read(AnnoFillRef ref) {
|
struct AnnotatedTag {
|
||||||
|
uint tag;
|
||||||
|
uint flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = annotated[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = annotated[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = annotated[ix + 2];
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
uint raw3 = annotated[ix + 3];
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
uint raw4 = annotated[ix + 4];
|
uint raw4 = read_mem(a, ix + 4);
|
||||||
AnnoFill s;
|
uint raw5 = read_mem(a, ix + 5);
|
||||||
|
uint raw6 = read_mem(a, ix + 6);
|
||||||
|
AnnoImage s;
|
||||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.rgba_color = raw4;
|
s.linewidth = uintBitsToFloat(raw4);
|
||||||
|
s.index = raw5;
|
||||||
|
s.offset = ivec2(int(raw6 << 16) >> 16, int(raw6) >> 16);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnnoFill_write(AnnoFillRef ref, AnnoFill s) {
|
void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
|
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||||
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
|
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||||
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
|
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||||
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
|
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||||
annotated[ix + 4] = s.rgba_color;
|
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
|
||||||
|
write_mem(a, ix + 5, s.index);
|
||||||
|
write_mem(a, ix + 6, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoFillMask AnnoFillMask_read(AnnoFillMaskRef ref) {
|
AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = annotated[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = annotated[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = annotated[ix + 2];
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
uint raw3 = annotated[ix + 3];
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
uint raw4 = annotated[ix + 4];
|
uint raw4 = read_mem(a, ix + 4);
|
||||||
AnnoFillMask s;
|
uint raw5 = read_mem(a, ix + 5);
|
||||||
|
AnnoColor s;
|
||||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.mask = uintBitsToFloat(raw4);
|
s.linewidth = uintBitsToFloat(raw4);
|
||||||
|
s.rgba_color = raw5;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnnoFillMask_write(AnnoFillMaskRef ref, AnnoFillMask s) {
|
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
|
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||||
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
|
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||||
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
|
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||||
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
|
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||||
annotated[ix + 4] = floatBitsToUint(s.mask);
|
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
|
||||||
|
write_mem(a, ix + 5, s.rgba_color);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
|
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = annotated[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = annotated[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = annotated[ix + 2];
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
uint raw3 = annotated[ix + 3];
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
uint raw4 = annotated[ix + 4];
|
uint raw4 = read_mem(a, ix + 4);
|
||||||
uint raw5 = annotated[ix + 5];
|
AnnoBeginClip s;
|
||||||
AnnoStroke s;
|
|
||||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.rgba_color = raw4;
|
s.linewidth = uintBitsToFloat(raw4);
|
||||||
s.linewidth = uintBitsToFloat(raw5);
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) {
|
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
|
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||||
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
|
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||||
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
|
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||||
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
|
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||||
annotated[ix + 4] = s.rgba_color;
|
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
|
||||||
annotated[ix + 5] = floatBitsToUint(s.linewidth);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoClip AnnoClip_read(AnnoClipRef ref) {
|
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = annotated[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = annotated[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = annotated[ix + 2];
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
uint raw3 = annotated[ix + 3];
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
AnnoClip s;
|
AnnoEndClip s;
|
||||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnnoClip_write(AnnoClipRef ref, AnnoClip s) {
|
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
annotated[ix + 0] = floatBitsToUint(s.bbox.x);
|
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||||
annotated[ix + 1] = floatBitsToUint(s.bbox.y);
|
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||||
annotated[ix + 2] = floatBitsToUint(s.bbox.z);
|
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||||
annotated[ix + 3] = floatBitsToUint(s.bbox.w);
|
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||||
}
|
}
|
||||||
|
|
||||||
uint Annotated_tag(AnnotatedRef ref) {
|
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) {
|
||||||
return annotated[ref.offset >> 2];
|
uint tag_and_flags = read_mem(a, ref.offset >> 2);
|
||||||
|
return AnnotatedTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoStroke Annotated_Stroke_read(AnnotatedRef ref) {
|
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) {
|
||||||
return AnnoStroke_read(AnnoStrokeRef(ref.offset + 4));
|
return AnnoColor_read(a, AnnoColorRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoFill Annotated_Fill_read(AnnotatedRef ref) {
|
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) {
|
||||||
return AnnoFill_read(AnnoFillRef(ref.offset + 4));
|
return AnnoImage_read(a, AnnoImageRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoFillMask Annotated_FillMask_read(AnnotatedRef ref) {
|
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) {
|
||||||
return AnnoFillMask_read(AnnoFillMaskRef(ref.offset + 4));
|
return AnnoBeginClip_read(a, AnnoBeginClipRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoFillMask Annotated_FillMaskInv_read(AnnotatedRef ref) {
|
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) {
|
||||||
return AnnoFillMask_read(AnnoFillMaskRef(ref.offset + 4));
|
return AnnoEndClip_read(a, AnnoEndClipRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoClip Annotated_BeginClip_read(AnnotatedRef ref) {
|
void Annotated_Nop_write(Alloc a, AnnotatedRef ref) {
|
||||||
return AnnoClip_read(AnnoClipRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Annotated_Nop);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoClip Annotated_EndClip_read(AnnotatedRef ref) {
|
void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) {
|
||||||
return AnnoClip_read(AnnoClipRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_Color);
|
||||||
|
AnnoColor_write(a, AnnoColorRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Annotated_Nop_write(AnnotatedRef ref) {
|
void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) {
|
||||||
annotated[ref.offset >> 2] = Annotated_Nop;
|
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_Image);
|
||||||
|
AnnoImage_write(a, AnnoImageRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Annotated_Stroke_write(AnnotatedRef ref, AnnoStroke s) {
|
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) {
|
||||||
annotated[ref.offset >> 2] = Annotated_Stroke;
|
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_BeginClip);
|
||||||
AnnoStroke_write(AnnoStrokeRef(ref.offset + 4), s);
|
AnnoBeginClip_write(a, AnnoBeginClipRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Annotated_Fill_write(AnnotatedRef ref, AnnoFill s) {
|
void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) {
|
||||||
annotated[ref.offset >> 2] = Annotated_Fill;
|
write_mem(a, ref.offset >> 2, Annotated_EndClip);
|
||||||
AnnoFill_write(AnnoFillRef(ref.offset + 4), s);
|
AnnoEndClip_write(a, AnnoEndClipRef(ref.offset + 4), s);
|
||||||
}
|
|
||||||
|
|
||||||
void Annotated_FillMask_write(AnnotatedRef ref, AnnoFillMask s) {
|
|
||||||
annotated[ref.offset >> 2] = Annotated_FillMask;
|
|
||||||
AnnoFillMask_write(AnnoFillMaskRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Annotated_FillMaskInv_write(AnnotatedRef ref, AnnoFillMask s) {
|
|
||||||
annotated[ref.offset >> 2] = Annotated_FillMaskInv;
|
|
||||||
AnnoFillMask_write(AnnoFillMaskRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Annotated_BeginClip_write(AnnotatedRef ref, AnnoClip s) {
|
|
||||||
annotated[ref.offset >> 2] = Annotated_BeginClip;
|
|
||||||
AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Annotated_EndClip_write(AnnotatedRef ref, AnnoClip s) {
|
|
||||||
annotated[ref.offset >> 2] = Annotated_EndClip;
|
|
||||||
AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Propagation of tile backdrop for filling.
|
// Propagation of tile backdrop for filling.
|
||||||
//
|
//
|
||||||
// Each thread reads one path element and calculates the number of spanned tiles
|
// Each thread reads one path element and calculates the number of spanned tiles
|
||||||
|
@ -13,6 +15,7 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
||||||
|
@ -20,54 +23,49 @@
|
||||||
|
|
||||||
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
uint[] annotated;
|
Config conf;
|
||||||
};
|
|
||||||
|
|
||||||
// This is really only used for n_elements; maybe we can handle that
|
|
||||||
// a different way, but it's convenient to have the same signature as
|
|
||||||
// tile allocation.
|
|
||||||
layout(set = 0, binding = 1) readonly buffer AllocBuf {
|
|
||||||
uint n_elements; // paths
|
|
||||||
uint n_pathseg;
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer TileBuf {
|
|
||||||
uint[] tile;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
|
|
||||||
shared uint sh_row_count[BACKDROP_WG];
|
shared uint sh_row_count[BACKDROP_WG];
|
||||||
shared uint sh_row_base[BACKDROP_WG];
|
shared Alloc sh_row_alloc[BACKDROP_WG];
|
||||||
shared uint sh_row_width[BACKDROP_WG];
|
shared uint sh_row_width[BACKDROP_WG];
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
|
|
||||||
// Work assignment: 1 thread : 1 path element
|
// Work assignment: 1 thread : 1 path element
|
||||||
uint row_count = 0;
|
uint row_count = 0;
|
||||||
if (element_ix < n_elements) {
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
uint tag = Annotated_tag(ref);
|
if (element_ix < conf.n_elements) {
|
||||||
switch (tag) {
|
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
||||||
case Annotated_Fill:
|
switch (tag.tag) {
|
||||||
case Annotated_FillMask:
|
case Annotated_Image:
|
||||||
case Annotated_FillMaskInv:
|
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
PathRef path_ref = PathRef(element_ix * Path_size);
|
case Annotated_Color:
|
||||||
Path path = Path_read(path_ref);
|
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Fall through.
|
||||||
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
||||||
|
Path path = Path_read(conf.tile_alloc, path_ref);
|
||||||
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||||
row_count = path.bbox.w - path.bbox.y;
|
row_count = path.bbox.w - path.bbox.y;
|
||||||
if (row_count == 1) {
|
// Paths that don't cross tile top edges don't have backdrops.
|
||||||
|
// Don't apply the optimization to paths that may cross the y = 0
|
||||||
|
// top edge, but clipped to 1 row.
|
||||||
|
if (row_count == 1 && path.bbox.y > 0) {
|
||||||
// Note: this can probably be expanded to width = 2 as
|
// Note: this can probably be expanded to width = 2 as
|
||||||
// long as it doesn't cross the left edge.
|
// long as it doesn't cross the left edge.
|
||||||
row_count = 0;
|
row_count = 0;
|
||||||
}
|
}
|
||||||
sh_row_base[th_ix] = (path.tiles.offset >> 2) + 1;
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
|
sh_row_alloc[th_ix] = path_alloc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,16 +91,19 @@ void main() {
|
||||||
el_ix = probe;
|
el_ix = probe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
|
||||||
uint width = sh_row_width[el_ix];
|
uint width = sh_row_width[el_ix];
|
||||||
|
if (width > 0 && mem_ok) {
|
||||||
// Process one row sequentially
|
// Process one row sequentially
|
||||||
// Read backdrop value per tile and prefix sum it
|
// Read backdrop value per tile and prefix sum it
|
||||||
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
|
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||||
uint sum = tile[tile_el_ix];
|
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
||||||
|
uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
|
||||||
|
uint sum = read_mem(tiles_alloc, tile_el_ix);
|
||||||
for (uint x = 1; x < width; x++) {
|
for (uint x = 1; x < width; x++) {
|
||||||
tile_el_ix += 2;
|
tile_el_ix += 2;
|
||||||
sum += tile[tile_el_ix];
|
sum += read_mem(tiles_alloc, tile_el_ix);
|
||||||
tile[tile_el_ix] = sum;
|
write_mem(tiles_alloc, tile_el_ix, sum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// The binning stage of the pipeline.
|
// The binning stage of the pipeline.
|
||||||
//
|
//
|
||||||
// Each workgroup processes N_TILE paths.
|
// Each workgroup processes N_TILE paths.
|
||||||
|
@ -7,21 +9,13 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
uint[] annotated;
|
Config conf;
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer AllocBuf {
|
|
||||||
uint n_elements; // paths
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer BinsBuf {
|
|
||||||
uint[] bins;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
|
@ -38,39 +32,41 @@ layout(set = 0, binding = 2) buffer BinsBuf {
|
||||||
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
|
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
|
||||||
shared uint bitmaps[N_SLICE][N_TILE];
|
shared uint bitmaps[N_SLICE][N_TILE];
|
||||||
shared uint count[N_SLICE][N_TILE];
|
shared uint count[N_SLICE][N_TILE];
|
||||||
shared uint sh_chunk_start[N_TILE];
|
shared Alloc sh_chunk_alloc[N_TILE];
|
||||||
|
shared bool sh_alloc_failed;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint my_n_elements = n_elements;
|
uint my_n_elements = conf.n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
|
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
||||||
}
|
}
|
||||||
|
if (gl_LocalInvocationID.x == 0) {
|
||||||
|
sh_alloc_failed = false;
|
||||||
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
|
||||||
// Read inputs and determine coverage of bins
|
// Read inputs and determine coverage of bins
|
||||||
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
|
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
|
||||||
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
uint tag = Annotated_Nop;
|
uint tag = Annotated_Nop;
|
||||||
if (element_ix < my_n_elements) {
|
if (element_ix < my_n_elements) {
|
||||||
tag = Annotated_tag(ref);
|
tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||||
}
|
}
|
||||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_Fill:
|
case Annotated_Color:
|
||||||
case Annotated_FillMask:
|
case Annotated_Image:
|
||||||
case Annotated_FillMaskInv:
|
|
||||||
case Annotated_Stroke:
|
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
// Note: we take advantage of the fact that these drawing elements
|
// Note: we take advantage of the fact that these drawing elements
|
||||||
// have the bbox at the same place in their layout.
|
// have the bbox at the same place in their layout.
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
|
||||||
x0 = int(floor(fill.bbox.x * SX));
|
x0 = int(floor(clip.bbox.x * SX));
|
||||||
y0 = int(floor(fill.bbox.y * SY));
|
y0 = int(floor(clip.bbox.y * SY));
|
||||||
x1 = int(ceil(fill.bbox.z * SX));
|
x1 = int(ceil(clip.bbox.z * SX));
|
||||||
y1 = int(ceil(fill.bbox.w * SY));
|
y1 = int(ceil(clip.bbox.w * SY));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,16 +74,18 @@ void main() {
|
||||||
// trying to keep divergence low.
|
// trying to keep divergence low.
|
||||||
// Right now, it's just a bbox, but we'll get finer with
|
// Right now, it's just a bbox, but we'll get finer with
|
||||||
// segments.
|
// segments.
|
||||||
x0 = clamp(x0, 0, N_TILE_X);
|
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
||||||
x1 = clamp(x1, x0, N_TILE_X);
|
uint height_in_bins = (conf.height_in_tiles + N_TILE_Y - 1)/N_TILE_Y;
|
||||||
y0 = clamp(y0, 0, N_TILE_Y);
|
x0 = clamp(x0, 0, int(width_in_bins));
|
||||||
y1 = clamp(y1, y0, N_TILE_Y);
|
x1 = clamp(x1, x0, int(width_in_bins));
|
||||||
|
y0 = clamp(y0, 0, int(height_in_bins));
|
||||||
|
y1 = clamp(y1, y0, int(height_in_bins));
|
||||||
if (x0 == x1) y1 = y0;
|
if (x0 == x1) y1 = y0;
|
||||||
int x = x0, y = y0;
|
int x = x0, y = y0;
|
||||||
uint my_slice = gl_LocalInvocationID.x / 32;
|
uint my_slice = gl_LocalInvocationID.x / 32;
|
||||||
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
|
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
|
||||||
while (y < y1) {
|
while (y < y1) {
|
||||||
atomicOr(bitmaps[my_slice][y * N_TILE_X + x], my_mask);
|
atomicOr(bitmaps[my_slice][y * width_in_bins + x], my_mask);
|
||||||
x++;
|
x++;
|
||||||
if (x == x1) {
|
if (x == x1) {
|
||||||
x = x0;
|
x = x0;
|
||||||
|
@ -103,33 +101,42 @@ void main() {
|
||||||
count[i][gl_LocalInvocationID.x] = element_count;
|
count[i][gl_LocalInvocationID.x] = element_count;
|
||||||
}
|
}
|
||||||
// element_count is number of elements covering bin for this invocation.
|
// element_count is number of elements covering bin for this invocation.
|
||||||
uint chunk_start = 0;
|
Alloc chunk_alloc = new_alloc(0, 0, true);
|
||||||
if (element_count != 0) {
|
if (element_count != 0) {
|
||||||
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
||||||
chunk_start = atomicAdd(alloc, element_count * BinInstance_size);
|
MallocResult chunk = malloc(element_count * BinInstance_size);
|
||||||
sh_chunk_start[gl_LocalInvocationID.x] = chunk_start;
|
chunk_alloc = chunk.alloc;
|
||||||
|
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||||
|
if (chunk.failed) {
|
||||||
|
sh_alloc_failed = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Note: it might be more efficient for reading to do this in the
|
// Note: it might be more efficient for reading to do this in the
|
||||||
// other order (each bin is a contiguous sequence of partitions)
|
// other order (each bin is a contiguous sequence of partitions)
|
||||||
uint out_ix = (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
|
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
|
||||||
bins[out_ix] = element_count;
|
write_mem(conf.bin_alloc, out_ix, element_count);
|
||||||
bins[out_ix + 1] = chunk_start;
|
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
|
if (sh_alloc_failed || mem_error != NO_ERROR) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
|
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
|
||||||
// touched by this element
|
// touched by this element
|
||||||
x = x0;
|
x = x0;
|
||||||
y = y0;
|
y = y0;
|
||||||
while (y < y1) {
|
while (y < y1) {
|
||||||
uint bin_ix = y * N_TILE_X + x;
|
uint bin_ix = y * width_in_bins + x;
|
||||||
uint out_mask = bitmaps[my_slice][bin_ix];
|
uint out_mask = bitmaps[my_slice][bin_ix];
|
||||||
if ((out_mask & my_mask) != 0) {
|
if ((out_mask & my_mask) != 0) {
|
||||||
uint idx = bitCount(out_mask & (my_mask - 1));
|
uint idx = bitCount(out_mask & (my_mask - 1));
|
||||||
if (my_slice > 0) {
|
if (my_slice > 0) {
|
||||||
idx += count[my_slice - 1][bin_ix];
|
idx += count[my_slice - 1][bin_ix];
|
||||||
}
|
}
|
||||||
uint out_offset = sh_chunk_start[bin_ix] + idx * BinInstance_size;
|
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||||
BinInstance_write(BinInstanceRef(out_offset), BinInstance(element_ix));
|
uint out_offset = out_alloc.offset + idx * BinInstance_size;
|
||||||
|
BinInstance_write(out_alloc, BinInstanceRef(out_offset), BinInstance(element_ix));
|
||||||
}
|
}
|
||||||
x++;
|
x++;
|
||||||
if (x == x1) {
|
if (x == x1) {
|
||||||
|
|
Binary file not shown.
|
@ -1,13 +1,11 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct BinInstanceRef {
|
struct BinInstanceRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BinChunkRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct BinInstance {
|
struct BinInstance {
|
||||||
uint element_ix;
|
uint element_ix;
|
||||||
};
|
};
|
||||||
|
@ -18,43 +16,16 @@ BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
|
||||||
return BinInstanceRef(ref.offset + index * BinInstance_size);
|
return BinInstanceRef(ref.offset + index * BinInstance_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct BinChunk {
|
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) {
|
||||||
uint n;
|
|
||||||
BinChunkRef next;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define BinChunk_size 8
|
|
||||||
|
|
||||||
BinChunkRef BinChunk_index(BinChunkRef ref, uint index) {
|
|
||||||
return BinChunkRef(ref.offset + index * BinChunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
BinInstance BinInstance_read(BinInstanceRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = bins[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
BinInstance s;
|
BinInstance s;
|
||||||
s.element_ix = raw0;
|
s.element_ix = raw0;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BinInstance_write(BinInstanceRef ref, BinInstance s) {
|
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
bins[ix + 0] = s.element_ix;
|
write_mem(a, ix + 0, s.element_ix);
|
||||||
}
|
|
||||||
|
|
||||||
BinChunk BinChunk_read(BinChunkRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = bins[ix + 0];
|
|
||||||
uint raw1 = bins[ix + 1];
|
|
||||||
BinChunk s;
|
|
||||||
s.n = raw0;
|
|
||||||
s.next = BinChunkRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void BinChunk_write(BinChunkRef ref, BinChunk s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
bins[ix + 0] = s.n;
|
|
||||||
bins[ix + 1] = s.next.offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
glslang_validator = glslangValidator
|
glslang_validator = glslangValidator
|
||||||
|
|
||||||
rule glsl
|
rule glsl
|
||||||
command = $glslang_validator -V -o $out $in
|
command = $glslang_validator $flags -V -o $out $in
|
||||||
|
|
||||||
|
|
||||||
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||||
|
@ -21,3 +21,6 @@ build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
|
||||||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||||
|
|
||||||
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||||
|
|
||||||
|
build kernel4_idx.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||||
|
flags = -DENABLE_IMAGE_INDICES
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// The coarse rasterizer stage of the pipeline.
|
// The coarse rasterizer stage of the pipeline.
|
||||||
//
|
//
|
||||||
// As input we have the ordered partitions of paths from the binning phase and
|
// As input we have the ordered partitions of paths from the binning phase and
|
||||||
|
@ -11,29 +13,13 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
uint[] annotated;
|
Config conf;
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer BinsBuf {
|
|
||||||
uint[] bins;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer TileBuf {
|
|
||||||
uint[] tile;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
|
||||||
uint n_elements;
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 4) buffer PtclBuf {
|
|
||||||
uint[] ptcl;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
|
@ -48,7 +34,7 @@ shared uint sh_elements[N_TILE];
|
||||||
|
|
||||||
// Number of elements in the partition; prefix sum.
|
// Number of elements in the partition; prefix sum.
|
||||||
shared uint sh_part_count[N_PART_READ];
|
shared uint sh_part_count[N_PART_READ];
|
||||||
shared uint sh_part_elements[N_PART_READ];
|
shared Alloc sh_part_elements[N_PART_READ];
|
||||||
|
|
||||||
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
||||||
|
|
||||||
|
@ -62,33 +48,96 @@ shared uint sh_tile_y0[N_TILE];
|
||||||
shared uint sh_tile_base[N_TILE];
|
shared uint sh_tile_base[N_TILE];
|
||||||
shared uint sh_tile_stride[N_TILE];
|
shared uint sh_tile_stride[N_TILE];
|
||||||
|
|
||||||
// Perhaps cmd_limit should be a global? This is a style question.
|
#ifdef MEM_DEBUG
|
||||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
// Store allocs only when MEM_DEBUG to save shared memory traffic.
|
||||||
if (cmd_ref.offset > cmd_limit) {
|
shared Alloc sh_tile_alloc[N_TILE];
|
||||||
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
|
|
||||||
CmdJump jump = CmdJump(new_cmd);
|
void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
Cmd_Jump_write(cmd_ref, jump);
|
sh_tile_alloc[el_ix] = a;
|
||||||
cmd_ref = CmdRef(new_cmd);
|
}
|
||||||
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
|
||||||
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
|
return sh_tile_alloc[el_ix];
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
|
// No-op
|
||||||
|
}
|
||||||
|
|
||||||
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
|
// All memory.
|
||||||
|
return new_alloc(0, memory.length()*4, mem_ok);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// The maximum number of commands per annotated element.
|
||||||
|
#define ANNO_COMMANDS 2
|
||||||
|
|
||||||
|
// Perhaps cmd_alloc should be a global? This is a style question.
|
||||||
|
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||||
|
if (cmd_ref.offset < cmd_limit) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
|
||||||
|
if (new_cmd.failed) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
CmdJump jump = CmdJump(new_cmd.alloc.offset);
|
||||||
|
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
|
||||||
|
cmd_alloc = new_cmd.alloc;
|
||||||
|
cmd_ref = CmdRef(cmd_alloc.offset);
|
||||||
|
// Reserve space for the maximum number of commands and a potential jump.
|
||||||
|
cmd_limit = cmd_alloc.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) {
|
||||||
|
if (fill_mode_from_flags(flags) == MODE_NONZERO) {
|
||||||
|
if (tile.tile.offset != 0) {
|
||||||
|
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||||
|
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
|
||||||
|
cmd_ref.offset += 4 + CmdFill_size;
|
||||||
|
} else {
|
||||||
|
Cmd_Solid_write(alloc, cmd_ref);
|
||||||
|
cmd_ref.offset += 4;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
|
||||||
|
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
|
||||||
|
cmd_ref.offset += 4 + CmdStroke_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
// Could use either linear or 2d layouts for both dispatch and
|
// Could use either linear or 2d layouts for both dispatch and
|
||||||
// invocations within the workgroup. We'll use variables to abstract.
|
// invocations within the workgroup. We'll use variables to abstract.
|
||||||
uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x;
|
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
||||||
|
uint bin_ix = width_in_bins * gl_WorkGroupID.y + gl_WorkGroupID.x;
|
||||||
uint partition_ix = 0;
|
uint partition_ix = 0;
|
||||||
uint n_partitions = (n_elements + N_TILE - 1) / N_TILE;
|
uint n_partitions = (conf.n_elements + N_TILE - 1) / N_TILE;
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
|
|
||||||
// Coordinates of top left of bin, in tiles.
|
// Coordinates of top left of bin, in tiles.
|
||||||
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
|
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
|
||||||
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
|
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
|
||||||
|
|
||||||
|
// Per-tile state
|
||||||
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
|
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
|
||||||
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
|
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
|
||||||
uint this_tile_ix = (bin_tile_y + tile_y) * WIDTH_IN_TILES + bin_tile_x + tile_x;
|
uint this_tile_ix = (bin_tile_y + tile_y) * conf.width_in_tiles + bin_tile_x + tile_x;
|
||||||
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
||||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
||||||
|
// Reserve space for the maximum number of commands and a potential jump.
|
||||||
|
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
|
||||||
|
// The nesting depth of the clip stack
|
||||||
|
uint clip_depth = 0;
|
||||||
|
// State for the "clip zero" optimization. If it's nonzero, then we are
|
||||||
|
// currently in a clip for which the entire tile has an alpha of zero, and
|
||||||
|
// the value is the depth after the "begin clip" of that element.
|
||||||
|
uint clip_zero_depth = 0;
|
||||||
|
// State for the "clip one" optimization. If bit `i` is set, then that means
|
||||||
|
// that the clip pushed at depth `i` has an alpha of all one.
|
||||||
|
uint clip_one_mask = 0;
|
||||||
|
|
||||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||||
// Items up to rd_ix have been read from sh_elements
|
// Items up to rd_ix have been read from sh_elements
|
||||||
|
@ -98,6 +147,14 @@ void main() {
|
||||||
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
||||||
uint part_start_ix = 0;
|
uint part_start_ix = 0;
|
||||||
uint ready_ix = 0;
|
uint ready_ix = 0;
|
||||||
|
|
||||||
|
// Leave room for the fine rasterizer scratch allocation.
|
||||||
|
Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size);
|
||||||
|
cmd_ref.offset += Alloc_size;
|
||||||
|
|
||||||
|
uint num_begin_slots = 0;
|
||||||
|
uint begin_slot = 0;
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
while (true) {
|
while (true) {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
sh_bitmaps[i][th_ix] = 0;
|
sh_bitmaps[i][th_ix] = 0;
|
||||||
|
@ -109,9 +166,10 @@ void main() {
|
||||||
part_start_ix = ready_ix;
|
part_start_ix = ready_ix;
|
||||||
uint count = 0;
|
uint count = 0;
|
||||||
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
|
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
|
||||||
uint in_ix = ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
||||||
count = bins[in_ix];
|
count = read_mem(conf.bin_alloc, in_ix);
|
||||||
sh_part_elements[th_ix] = bins[in_ix + 1];
|
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
||||||
|
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok);
|
||||||
}
|
}
|
||||||
// prefix sum of counts
|
// prefix sum of counts
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
|
@ -135,7 +193,7 @@ void main() {
|
||||||
}
|
}
|
||||||
// use binary search to find element to read
|
// use binary search to find element to read
|
||||||
uint ix = rd_ix + th_ix;
|
uint ix = rd_ix + th_ix;
|
||||||
if (ix >= wr_ix && ix < ready_ix) {
|
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
|
||||||
uint part_ix = 0;
|
uint part_ix = 0;
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
||||||
|
@ -144,8 +202,9 @@ void main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ix -= part_ix > 0 ? sh_part_count[part_ix - 1] : part_start_ix;
|
ix -= part_ix > 0 ? sh_part_count[part_ix - 1] : part_start_ix;
|
||||||
BinInstanceRef inst_ref = BinInstanceRef(sh_part_elements[part_ix]);
|
Alloc bin_alloc = sh_part_elements[part_ix];
|
||||||
BinInstance inst = BinInstance_read(BinInstance_index(inst_ref, ix));
|
BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);
|
||||||
|
BinInstance inst = BinInstance_read(bin_alloc, BinInstance_index(inst_ref, ix));
|
||||||
sh_elements[th_ix] = inst.element_ix;
|
sh_elements[th_ix] = inst.element_ix;
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
@ -161,23 +220,21 @@ void main() {
|
||||||
AnnotatedRef ref;
|
AnnotatedRef ref;
|
||||||
if (th_ix + rd_ix < wr_ix) {
|
if (th_ix + rd_ix < wr_ix) {
|
||||||
element_ix = sh_elements[th_ix];
|
element_ix = sh_elements[th_ix];
|
||||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
tag = Annotated_tag(ref);
|
tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bounding box of element in pixel coordinates.
|
// Bounding box of element in pixel coordinates.
|
||||||
uint tile_count;
|
uint tile_count;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_Fill:
|
case Annotated_Color:
|
||||||
case Annotated_FillMask:
|
case Annotated_Image:
|
||||||
case Annotated_FillMaskInv:
|
|
||||||
case Annotated_Stroke:
|
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
// We have one "path" for each element, even if the element isn't
|
// We have one "path" for each element, even if the element isn't
|
||||||
// actually a path (currently EndClip, but images etc in the future).
|
// actually a path (currently EndClip, but images etc in the future).
|
||||||
uint path_ix = element_ix;
|
uint path_ix = element_ix;
|
||||||
Path path = Path_read(PathRef(path_ix * Path_size));
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||||
uint stride = path.bbox.z - path.bbox.x;
|
uint stride = path.bbox.z - path.bbox.x;
|
||||||
sh_tile_stride[th_ix] = stride;
|
sh_tile_stride[th_ix] = stride;
|
||||||
int dx = int(path.bbox.x) - int(bin_tile_x);
|
int dx = int(path.bbox.x) - int(bin_tile_x);
|
||||||
|
@ -193,6 +250,8 @@ void main() {
|
||||||
// base relative to bin
|
// base relative to bin
|
||||||
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||||
sh_tile_base[th_ix] = base;
|
sh_tile_base[th_ix] = base;
|
||||||
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
|
write_tile_alloc(th_ix, path_alloc);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
tile_count = 0;
|
tile_count = 0;
|
||||||
|
@ -220,23 +279,21 @@ void main() {
|
||||||
el_ix = probe;
|
el_ix = probe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AnnotatedRef ref = AnnotatedRef(sh_elements[el_ix] * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size);
|
||||||
uint tag = Annotated_tag(ref);
|
uint tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||||
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
|
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
|
||||||
uint width = sh_tile_width[el_ix];
|
uint width = sh_tile_width[el_ix];
|
||||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
bool include_tile;
|
bool include_tile = false;
|
||||||
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
||||||
include_tile = true;
|
include_tile = true;
|
||||||
} else {
|
} else if (mem_ok) {
|
||||||
Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
// Include the path in the tile if
|
// Include the path in the tile if
|
||||||
// - the tile contains at least a segment (tile offset non-zero)
|
// - the tile contains at least a segment (tile offset non-zero)
|
||||||
// - the tile is completely covered (backdrop non-zero)
|
// - the tile is completely covered (backdrop non-zero)
|
||||||
bool inside = tile.backdrop != 0;
|
include_tile = tile.tile.offset != 0 || tile.backdrop != 0;
|
||||||
bool fill = tag != Annotated_FillMaskInv;
|
|
||||||
include_tile = tile.tile.offset != 0 || inside == fill;
|
|
||||||
}
|
}
|
||||||
if (include_tile) {
|
if (include_tile) {
|
||||||
uint el_slice = el_ix / 32;
|
uint el_slice = el_ix / 32;
|
||||||
|
@ -251,7 +308,7 @@ void main() {
|
||||||
// through the non-segment elements.
|
// through the non-segment elements.
|
||||||
uint slice_ix = 0;
|
uint slice_ix = 0;
|
||||||
uint bitmap = sh_bitmaps[0][th_ix];
|
uint bitmap = sh_bitmaps[0][th_ix];
|
||||||
while (true) {
|
while (mem_ok) {
|
||||||
if (bitmap == 0) {
|
if (bitmap == 0) {
|
||||||
slice_ix++;
|
slice_ix++;
|
||||||
if (slice_ix == N_SLICE) {
|
if (slice_ix == N_SLICE) {
|
||||||
|
@ -271,86 +328,99 @@ void main() {
|
||||||
// At this point, we read the element again from global memory.
|
// At this point, we read the element again from global memory.
|
||||||
// If that turns out to be expensive, maybe we can pack it into
|
// If that turns out to be expensive, maybe we can pack it into
|
||||||
// shared memory (or perhaps just the tag).
|
// shared memory (or perhaps just the tag).
|
||||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
tag = Annotated_tag(ref);
|
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
||||||
|
|
||||||
switch (tag) {
|
if (clip_zero_depth == 0) {
|
||||||
case Annotated_Fill:
|
switch (tag.tag) {
|
||||||
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
case Annotated_Color:
|
||||||
|
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
if (tile.tile.offset != 0) {
|
break;
|
||||||
CmdFill cmd_fill;
|
|
||||||
cmd_fill.tile_ref = tile.tile.offset;
|
|
||||||
cmd_fill.backdrop = tile.backdrop;
|
|
||||||
cmd_fill.rgba_color = fill.rgba_color;
|
|
||||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
|
||||||
} else {
|
|
||||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
|
||||||
}
|
}
|
||||||
cmd_ref.offset += Cmd_size;
|
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth);
|
||||||
|
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
|
||||||
|
cmd_ref.offset += 4 + CmdColor_size;
|
||||||
|
break;
|
||||||
|
case Annotated_Image:
|
||||||
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
|
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
|
||||||
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth);
|
||||||
|
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset));
|
||||||
|
cmd_ref.offset += 4 + CmdImage_size;
|
||||||
break;
|
break;
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||||
if (tile.tile.offset != 0) {
|
clip_zero_depth = clip_depth + 1;
|
||||||
CmdBeginClip cmd_begin_clip;
|
} else if (tile.tile.offset == 0 && clip_depth < 32) {
|
||||||
cmd_begin_clip.tile_ref = tile.tile.offset;
|
clip_one_mask |= (1 << clip_depth);
|
||||||
cmd_begin_clip.backdrop = tile.backdrop;
|
|
||||||
Cmd_BeginClip_write(cmd_ref, cmd_begin_clip);
|
|
||||||
} else {
|
} else {
|
||||||
// TODO: here is where a bunch of optimization magic should happen
|
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
|
||||||
float alpha = tile.backdrop == 0 ? 0.0 : 1.0;
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
Cmd_BeginSolidClip_write(cmd_ref, CmdBeginSolidClip(alpha));
|
break;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += Cmd_size;
|
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
|
||||||
|
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
||||||
|
cmd_ref.offset += 4;
|
||||||
|
if (clip_depth < 32) {
|
||||||
|
clip_one_mask &= ~(1 << clip_depth);
|
||||||
|
}
|
||||||
|
begin_slot++;
|
||||||
|
num_begin_slots = max(num_begin_slots, begin_slot);
|
||||||
|
}
|
||||||
|
clip_depth++;
|
||||||
break;
|
break;
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
clip_depth--;
|
||||||
Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0));
|
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
|
||||||
cmd_ref.offset += Cmd_size;
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
||||||
|
cmd_ref.offset += 4;
|
||||||
|
begin_slot--;
|
||||||
|
Cmd_EndClip_write(cmd_alloc, cmd_ref);
|
||||||
|
cmd_ref.offset += 4;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case Annotated_FillMask:
|
|
||||||
case Annotated_FillMaskInv:
|
|
||||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
|
||||||
AnnoFillMask fill_mask = Annotated_FillMask_read(ref);
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
if (tile.tile.offset != 0) {
|
|
||||||
CmdFillMask cmd_fill;
|
|
||||||
cmd_fill.tile_ref = tile.tile.offset;
|
|
||||||
cmd_fill.backdrop = tile.backdrop;
|
|
||||||
cmd_fill.mask = fill_mask.mask;
|
|
||||||
if (tag == Annotated_FillMask) {
|
|
||||||
Cmd_FillMask_write(cmd_ref, cmd_fill);
|
|
||||||
} else {
|
|
||||||
Cmd_FillMaskInv_write(cmd_ref, cmd_fill);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Cmd_SolidMask_write(cmd_ref, CmdSolidMask(fill_mask.mask));
|
// In "clip zero" state, suppress all drawing
|
||||||
|
switch (tag.tag) {
|
||||||
|
case Annotated_BeginClip:
|
||||||
|
clip_depth++;
|
||||||
|
break;
|
||||||
|
case Annotated_EndClip:
|
||||||
|
if (clip_depth == clip_zero_depth) {
|
||||||
|
clip_zero_depth = 0;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += Cmd_size;
|
clip_depth--;
|
||||||
break;
|
|
||||||
case Annotated_Stroke:
|
|
||||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
|
||||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
|
||||||
CmdStroke cmd_stroke;
|
|
||||||
cmd_stroke.tile_ref = tile.tile.offset;
|
|
||||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
|
||||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
|
||||||
rd_ix += N_TILE;
|
rd_ix += N_TILE;
|
||||||
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
|
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
|
||||||
}
|
}
|
||||||
Cmd_End_write(cmd_ref);
|
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
|
||||||
|
Cmd_End_write(cmd_alloc, cmd_ref);
|
||||||
|
if (num_begin_slots > 0) {
|
||||||
|
// Write scratch allocation: one state per BeginClip per rasterizer chunk.
|
||||||
|
uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
|
||||||
|
MallocResult scratch = malloc(scratch_size);
|
||||||
|
// Ignore scratch.failed; we don't use the allocation and kernel4
|
||||||
|
// checks for memory overflow before using it.
|
||||||
|
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// The element processing stage, first in the pipeline.
|
// The element processing stage, first in the pipeline.
|
||||||
//
|
//
|
||||||
// This stage is primarily about applying transforms and computing bounding
|
// This stage is primarily about applying transforms and computing bounding
|
||||||
|
@ -7,6 +9,9 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
#define N_ROWS 4
|
#define N_ROWS 4
|
||||||
#define WG_SIZE 32
|
#define WG_SIZE 32
|
||||||
#define LG_WG_SIZE 5
|
#define LG_WG_SIZE 5
|
||||||
|
@ -14,44 +19,40 @@
|
||||||
|
|
||||||
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
|
Config conf;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2) readonly buffer SceneBuf {
|
||||||
uint[] scene;
|
uint[] scene;
|
||||||
};
|
};
|
||||||
|
|
||||||
// It would be better to use the Vulkan memory model than
|
// It would be better to use the Vulkan memory model than
|
||||||
// "volatile" but shooting for compatibility here rather
|
// "volatile" but shooting for compatibility here rather
|
||||||
// than doing things right.
|
// than doing things right.
|
||||||
layout(set = 0, binding = 1) volatile buffer StateBuf {
|
layout(set = 0, binding = 3) volatile buffer StateBuf {
|
||||||
|
uint part_counter;
|
||||||
uint[] state;
|
uint[] state;
|
||||||
};
|
};
|
||||||
|
|
||||||
// The annotated results are stored here.
|
|
||||||
layout(set = 0, binding = 2) buffer AnnotatedBuf {
|
|
||||||
uint[] annotated;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Path segments are stored here.
|
|
||||||
layout(set = 0, binding = 3) buffer PathSegBuf {
|
|
||||||
uint[] pathseg;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "scene.h"
|
#include "scene.h"
|
||||||
#include "state.h"
|
#include "state.h"
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
#include "pathseg.h"
|
#include "pathseg.h"
|
||||||
|
#include "tile.h"
|
||||||
|
|
||||||
#define StateBuf_stride (8 + 2 * State_size)
|
#define StateBuf_stride (4 + 2 * State_size)
|
||||||
|
|
||||||
StateRef state_aggregate_ref(uint partition_ix) {
|
StateRef state_aggregate_ref(uint partition_ix) {
|
||||||
return StateRef(12 + partition_ix * StateBuf_stride);
|
return StateRef(4 + partition_ix * StateBuf_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
StateRef state_prefix_ref(uint partition_ix) {
|
StateRef state_prefix_ref(uint partition_ix) {
|
||||||
return StateRef(12 + partition_ix * StateBuf_stride + State_size);
|
return StateRef(4 + partition_ix * StateBuf_stride + State_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint state_flag_index(uint partition_ix) {
|
uint state_flag_index(uint partition_ix) {
|
||||||
return 1 + partition_ix * (StateBuf_stride / 4);
|
return partition_ix * (StateBuf_stride / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
// These correspond to X, A, P respectively in the prefix sum paper.
|
// These correspond to X, A, P respectively in the prefix sum paper.
|
||||||
|
@ -62,6 +63,11 @@ uint state_flag_index(uint partition_ix) {
|
||||||
#define FLAG_SET_LINEWIDTH 1
|
#define FLAG_SET_LINEWIDTH 1
|
||||||
#define FLAG_SET_BBOX 2
|
#define FLAG_SET_BBOX 2
|
||||||
#define FLAG_RESET_BBOX 4
|
#define FLAG_RESET_BBOX 4
|
||||||
|
#define FLAG_SET_FILL_MODE 8
|
||||||
|
// Fill modes take up the next bit. Non-zero fill is 0, stroke is 1.
|
||||||
|
#define LG_FILL_MODE 4
|
||||||
|
#define FILL_MODE_BITS 1
|
||||||
|
#define FILL_MODE_MASK (FILL_MODE_BITS << LG_FILL_MODE)
|
||||||
|
|
||||||
// This is almost like a monoid (the interaction between transformation and
|
// This is almost like a monoid (the interaction between transformation and
|
||||||
// bounding boxes is approximate)
|
// bounding boxes is approximate)
|
||||||
|
@ -87,17 +93,21 @@ State combine_state(State a, State b) {
|
||||||
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
|
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
|
||||||
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
|
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
|
||||||
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
||||||
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
|
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX | FLAG_SET_FILL_MODE)) | b.flags;
|
||||||
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
||||||
|
uint fill_mode = (b.flags & FLAG_SET_FILL_MODE) == 0 ? a.flags : b.flags;
|
||||||
|
fill_mode &= FILL_MODE_MASK;
|
||||||
|
c.flags = (c.flags & ~FILL_MODE_MASK) | fill_mode;
|
||||||
c.path_count = a.path_count + b.path_count;
|
c.path_count = a.path_count + b.path_count;
|
||||||
c.pathseg_count = a.pathseg_count + b.pathseg_count;
|
c.pathseg_count = a.pathseg_count + b.pathseg_count;
|
||||||
|
c.trans_count = a.trans_count + b.trans_count;
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
State map_element(ElementRef ref) {
|
State map_element(ElementRef ref) {
|
||||||
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
||||||
// divergent, though it would be more wasted memory.
|
// divergent, though it would be more wasted memory.
|
||||||
uint tag = Element_tag(ref);
|
uint tag = Element_tag(ref).tag;
|
||||||
State c;
|
State c;
|
||||||
c.bbox = vec4(0.0, 0.0, 0.0, 0.0);
|
c.bbox = vec4(0.0, 0.0, 0.0, 0.0);
|
||||||
c.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
c.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
||||||
|
@ -106,32 +116,28 @@ State map_element(ElementRef ref) {
|
||||||
c.flags = 0;
|
c.flags = 0;
|
||||||
c.path_count = 0;
|
c.path_count = 0;
|
||||||
c.pathseg_count = 0;
|
c.pathseg_count = 0;
|
||||||
|
c.trans_count = 0;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Element_FillLine:
|
case Element_Line:
|
||||||
case Element_StrokeLine:
|
LineSeg line = Element_Line_read(ref);
|
||||||
LineSeg line = Element_FillLine_read(ref);
|
|
||||||
c.bbox.xy = min(line.p0, line.p1);
|
c.bbox.xy = min(line.p0, line.p1);
|
||||||
c.bbox.zw = max(line.p0, line.p1);
|
c.bbox.zw = max(line.p0, line.p1);
|
||||||
c.pathseg_count = 1;
|
c.pathseg_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_FillQuad:
|
case Element_Quad:
|
||||||
case Element_StrokeQuad:
|
QuadSeg quad = Element_Quad_read(ref);
|
||||||
QuadSeg quad = Element_FillQuad_read(ref);
|
|
||||||
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
|
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
|
||||||
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
|
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
|
||||||
c.pathseg_count = 1;
|
c.pathseg_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_FillCubic:
|
case Element_Cubic:
|
||||||
case Element_StrokeCubic:
|
CubicSeg cubic = Element_Cubic_read(ref);
|
||||||
CubicSeg cubic = Element_FillCubic_read(ref);
|
|
||||||
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
|
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
|
||||||
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
||||||
c.pathseg_count = 1;
|
c.pathseg_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_Fill:
|
case Element_FillColor:
|
||||||
case Element_FillMask:
|
case Element_FillImage:
|
||||||
case Element_FillMaskInv:
|
|
||||||
case Element_Stroke:
|
|
||||||
case Element_BeginClip:
|
case Element_BeginClip:
|
||||||
c.flags = FLAG_RESET_BBOX;
|
c.flags = FLAG_RESET_BBOX;
|
||||||
c.path_count = 1;
|
c.path_count = 1;
|
||||||
|
@ -148,6 +154,11 @@ State map_element(ElementRef ref) {
|
||||||
Transform t = Element_Transform_read(ref);
|
Transform t = Element_Transform_read(ref);
|
||||||
c.mat = t.mat;
|
c.mat = t.mat;
|
||||||
c.translate = t.translate;
|
c.translate = t.translate;
|
||||||
|
c.trans_count = 1;
|
||||||
|
break;
|
||||||
|
case Element_SetFillMode:
|
||||||
|
SetFillMode fm = Element_SetFillMode_read(ref);
|
||||||
|
c.flags = FLAG_SET_FILL_MODE | (fm.fill_mode << LG_FILL_MODE);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
|
@ -159,16 +170,7 @@ vec2 get_linewidth(State st) {
|
||||||
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
|
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should be able to use an array of structs but the NV shader compiler
|
shared State sh_state[WG_SIZE];
|
||||||
// doesn't seem to like it :/
|
|
||||||
//shared State sh_state[WG_SIZE];
|
|
||||||
shared vec4 sh_mat[WG_SIZE];
|
|
||||||
shared vec2 sh_translate[WG_SIZE];
|
|
||||||
shared vec4 sh_bbox[WG_SIZE];
|
|
||||||
shared float sh_width[WG_SIZE];
|
|
||||||
shared uint sh_flags[WG_SIZE];
|
|
||||||
shared uint sh_path_count[WG_SIZE];
|
|
||||||
shared uint sh_pathseg_count[WG_SIZE];
|
|
||||||
|
|
||||||
shared uint sh_part_ix;
|
shared uint sh_part_ix;
|
||||||
shared State sh_prefix;
|
shared State sh_prefix;
|
||||||
|
@ -178,7 +180,7 @@ void main() {
|
||||||
// Determine partition to process by atomic counter (described in Section
|
// Determine partition to process by atomic counter (described in Section
|
||||||
// 4.4 of prefix sum paper).
|
// 4.4 of prefix sum paper).
|
||||||
if (gl_LocalInvocationID.x == 0) {
|
if (gl_LocalInvocationID.x == 0) {
|
||||||
sh_part_ix = atomicAdd(state[0], 1);
|
sh_part_ix = atomicAdd(part_counter, 1);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
uint part_ix = sh_part_ix;
|
uint part_ix = sh_part_ix;
|
||||||
|
@ -193,35 +195,15 @@ void main() {
|
||||||
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
|
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
|
||||||
}
|
}
|
||||||
State agg = th_state[N_ROWS - 1];
|
State agg = th_state[N_ROWS - 1];
|
||||||
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
sh_state[gl_LocalInvocationID.x] = agg;
|
||||||
sh_translate[gl_LocalInvocationID.x] = agg.translate;
|
|
||||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
|
||||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
|
||||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
|
||||||
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
|
|
||||||
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
|
|
||||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
if (gl_LocalInvocationID.x >= (1 << i)) {
|
if (gl_LocalInvocationID.x >= (1 << i)) {
|
||||||
State other;
|
State other = sh_state[gl_LocalInvocationID.x - (1 << i)];
|
||||||
uint ix = gl_LocalInvocationID.x - (1 << i);
|
|
||||||
other.mat = sh_mat[ix];
|
|
||||||
other.translate = sh_translate[ix];
|
|
||||||
other.bbox = sh_bbox[ix];
|
|
||||||
other.linewidth = sh_width[ix];
|
|
||||||
other.flags = sh_flags[ix];
|
|
||||||
other.path_count = sh_path_count[ix];
|
|
||||||
other.pathseg_count = sh_pathseg_count[ix];
|
|
||||||
agg = combine_state(other, agg);
|
agg = combine_state(other, agg);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
sh_state[gl_LocalInvocationID.x] = agg;
|
||||||
sh_translate[gl_LocalInvocationID.x] = agg.translate;
|
|
||||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
|
||||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
|
||||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
|
||||||
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
|
|
||||||
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
State exclusive;
|
State exclusive;
|
||||||
|
@ -232,6 +214,7 @@ void main() {
|
||||||
exclusive.flags = 0;
|
exclusive.flags = 0;
|
||||||
exclusive.path_count = 0;
|
exclusive.path_count = 0;
|
||||||
exclusive.pathseg_count = 0;
|
exclusive.pathseg_count = 0;
|
||||||
|
exclusive.trans_count = 0;
|
||||||
|
|
||||||
// Publish aggregate for this partition
|
// Publish aggregate for this partition
|
||||||
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
||||||
|
@ -302,15 +285,7 @@ void main() {
|
||||||
|
|
||||||
State row = exclusive;
|
State row = exclusive;
|
||||||
if (gl_LocalInvocationID.x > 0) {
|
if (gl_LocalInvocationID.x > 0) {
|
||||||
uint ix = gl_LocalInvocationID.x - 1;
|
State other = sh_state[gl_LocalInvocationID.x - 1];
|
||||||
State other;
|
|
||||||
other.mat = sh_mat[ix];
|
|
||||||
other.translate = sh_translate[ix];
|
|
||||||
other.bbox = sh_bbox[ix];
|
|
||||||
other.linewidth = sh_width[ix];
|
|
||||||
other.flags = sh_flags[ix];
|
|
||||||
other.path_count = sh_path_count[ix];
|
|
||||||
other.pathseg_count = sh_pathseg_count[ix];
|
|
||||||
row = combine_state(row, other);
|
row = combine_state(row, other);
|
||||||
}
|
}
|
||||||
for (uint i = 0; i < N_ROWS; i++) {
|
for (uint i = 0; i < N_ROWS; i++) {
|
||||||
|
@ -320,125 +295,115 @@ void main() {
|
||||||
// gains to be had from stashing in shared memory or possibly
|
// gains to be had from stashing in shared memory or possibly
|
||||||
// registers (though register pressure is an issue).
|
// registers (though register pressure is an issue).
|
||||||
ElementRef this_ref = Element_index(ref, i);
|
ElementRef this_ref = Element_index(ref, i);
|
||||||
uint tag = Element_tag(this_ref);
|
ElementTag tag = Element_tag(this_ref);
|
||||||
switch (tag) {
|
uint fill_mode = fill_mode_from_flags(st.flags >> LG_FILL_MODE);
|
||||||
case Element_FillLine:
|
bool is_stroke = fill_mode == MODE_STROKE;
|
||||||
case Element_StrokeLine:
|
switch (tag.tag) {
|
||||||
LineSeg line = Element_StrokeLine_read(this_ref);
|
case Element_Line:
|
||||||
vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
|
LineSeg line = Element_Line_read(this_ref);
|
||||||
vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
|
PathCubic path_cubic;
|
||||||
PathStrokeCubic path_cubic;
|
path_cubic.p0 = line.p0;
|
||||||
path_cubic.p0 = p0;
|
path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0);
|
||||||
path_cubic.p1 = mix(p0, p1, 1.0 / 3.0);
|
path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0);
|
||||||
path_cubic.p2 = mix(p1, p0, 1.0 / 3.0);
|
path_cubic.p3 = line.p1;
|
||||||
path_cubic.p3 = p1;
|
|
||||||
path_cubic.path_ix = st.path_count;
|
path_cubic.path_ix = st.path_count;
|
||||||
if (tag == Element_StrokeLine) {
|
path_cubic.trans_ix = st.trans_count;
|
||||||
|
if (is_stroke) {
|
||||||
path_cubic.stroke = get_linewidth(st);
|
path_cubic.stroke = get_linewidth(st);
|
||||||
} else {
|
} else {
|
||||||
path_cubic.stroke = vec2(0.0);
|
path_cubic.stroke = vec2(0.0);
|
||||||
}
|
}
|
||||||
// We do encoding a bit by hand to minimize divergence. Another approach
|
PathSegRef path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
|
||||||
// would be to have a fill/stroke bool.
|
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
|
||||||
PathSegRef path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
|
|
||||||
uint out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic;
|
|
||||||
pathseg[path_out_ref.offset >> 2] = out_tag;
|
|
||||||
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
|
|
||||||
break;
|
break;
|
||||||
case Element_FillQuad:
|
case Element_Quad:
|
||||||
case Element_StrokeQuad:
|
QuadSeg quad = Element_Quad_read(this_ref);
|
||||||
QuadSeg quad = Element_StrokeQuad_read(this_ref);
|
path_cubic.p0 = quad.p0;
|
||||||
p0 = st.mat.xy * quad.p0.x + st.mat.zw * quad.p0.y + st.translate;
|
path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0);
|
||||||
p1 = st.mat.xy * quad.p1.x + st.mat.zw * quad.p1.y + st.translate;
|
path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0);
|
||||||
vec2 p2 = st.mat.xy * quad.p2.x + st.mat.zw * quad.p2.y + st.translate;
|
path_cubic.p3 = quad.p2;
|
||||||
path_cubic;
|
|
||||||
path_cubic.p0 = p0;
|
|
||||||
path_cubic.p1 = mix(p1, p0, 1.0 / 3.0);
|
|
||||||
path_cubic.p2 = mix(p1, p2, 1.0 / 3.0);
|
|
||||||
path_cubic.p3 = p2;
|
|
||||||
path_cubic.path_ix = st.path_count;
|
path_cubic.path_ix = st.path_count;
|
||||||
if (tag == Element_StrokeQuad) {
|
path_cubic.trans_ix = st.trans_count;
|
||||||
|
if (is_stroke) {
|
||||||
path_cubic.stroke = get_linewidth(st);
|
path_cubic.stroke = get_linewidth(st);
|
||||||
} else {
|
} else {
|
||||||
path_cubic.stroke = vec2(0.0);
|
path_cubic.stroke = vec2(0.0);
|
||||||
}
|
}
|
||||||
// We do encoding a bit by hand to minimize divergence. Another approach
|
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
|
||||||
// would be to have a fill/stroke bool.
|
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
|
||||||
path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
|
|
||||||
out_tag = tag == Element_FillQuad ? PathSeg_FillCubic : PathSeg_StrokeCubic;
|
|
||||||
pathseg[path_out_ref.offset >> 2] = out_tag;
|
|
||||||
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
|
|
||||||
break;
|
break;
|
||||||
case Element_FillCubic:
|
case Element_Cubic:
|
||||||
case Element_StrokeCubic:
|
CubicSeg cubic = Element_Cubic_read(this_ref);
|
||||||
CubicSeg cubic = Element_StrokeCubic_read(this_ref);
|
path_cubic.p0 = cubic.p0;
|
||||||
path_cubic;
|
path_cubic.p1 = cubic.p1;
|
||||||
path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate;
|
path_cubic.p2 = cubic.p2;
|
||||||
path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate;
|
path_cubic.p3 = cubic.p3;
|
||||||
path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate;
|
|
||||||
path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate;
|
|
||||||
path_cubic.path_ix = st.path_count;
|
path_cubic.path_ix = st.path_count;
|
||||||
if (tag == Element_StrokeCubic) {
|
path_cubic.trans_ix = st.trans_count;
|
||||||
|
if (is_stroke) {
|
||||||
path_cubic.stroke = get_linewidth(st);
|
path_cubic.stroke = get_linewidth(st);
|
||||||
} else {
|
} else {
|
||||||
path_cubic.stroke = vec2(0.0);
|
path_cubic.stroke = vec2(0.0);
|
||||||
}
|
}
|
||||||
// We do encoding a bit by hand to minimize divergence. Another approach
|
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
|
||||||
// would be to have a fill/stroke bool.
|
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
|
||||||
path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
|
|
||||||
out_tag = tag == Element_FillCubic ? PathSeg_FillCubic : PathSeg_StrokeCubic;
|
|
||||||
pathseg[path_out_ref.offset >> 2] = out_tag;
|
|
||||||
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
|
|
||||||
break;
|
break;
|
||||||
case Element_Stroke:
|
case Element_FillColor:
|
||||||
Stroke stroke = Element_Stroke_read(this_ref);
|
FillColor fill = Element_FillColor_read(this_ref);
|
||||||
AnnoStroke anno_stroke;
|
AnnoColor anno_fill;
|
||||||
anno_stroke.rgba_color = stroke.rgba_color;
|
|
||||||
vec2 lw = get_linewidth(st);
|
|
||||||
anno_stroke.bbox = st.bbox + vec4(-lw, lw);
|
|
||||||
anno_stroke.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
|
||||||
AnnotatedRef out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
|
||||||
Annotated_Stroke_write(out_ref, anno_stroke);
|
|
||||||
break;
|
|
||||||
case Element_Fill:
|
|
||||||
Fill fill = Element_Fill_read(this_ref);
|
|
||||||
AnnoFill anno_fill;
|
|
||||||
anno_fill.rgba_color = fill.rgba_color;
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
|
if (is_stroke) {
|
||||||
|
vec2 lw = get_linewidth(st);
|
||||||
|
anno_fill.bbox = st.bbox + vec4(-lw, lw);
|
||||||
|
anno_fill.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||||
|
} else {
|
||||||
anno_fill.bbox = st.bbox;
|
anno_fill.bbox = st.bbox;
|
||||||
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
anno_fill.linewidth = 0.0;
|
||||||
Annotated_Fill_write(out_ref, anno_fill);
|
}
|
||||||
|
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||||
|
Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill);
|
||||||
break;
|
break;
|
||||||
case Element_FillMask:
|
case Element_FillImage:
|
||||||
FillMask fill_mask = Element_FillMask_read(this_ref);
|
FillImage fill_img = Element_FillImage_read(this_ref);
|
||||||
AnnoFillMask anno_fill_mask;
|
AnnoImage anno_img;
|
||||||
anno_fill_mask.mask = fill_mask.mask;
|
anno_img.index = fill_img.index;
|
||||||
anno_fill_mask.bbox = st.bbox;
|
anno_img.offset = fill_img.offset;
|
||||||
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
if (is_stroke) {
|
||||||
Annotated_FillMask_write(out_ref, anno_fill_mask);
|
vec2 lw = get_linewidth(st);
|
||||||
break;
|
anno_img.bbox = st.bbox + vec4(-lw, lw);
|
||||||
case Element_FillMaskInv:
|
anno_img.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||||
fill_mask = Element_FillMaskInv_read(this_ref);
|
} else {
|
||||||
anno_fill_mask.mask = fill_mask.mask;
|
anno_img.bbox = st.bbox;
|
||||||
// The inverse fill conceptually takes up the entire screen.
|
anno_img.linewidth = 0.0;
|
||||||
// TODO: Tighten bounds to contain only affected paths.
|
}
|
||||||
anno_fill_mask.bbox = vec4(0, 0, 1e9, 1e9);
|
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||||
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
|
||||||
Annotated_FillMaskInv_write(out_ref, anno_fill_mask);
|
|
||||||
break;
|
break;
|
||||||
case Element_BeginClip:
|
case Element_BeginClip:
|
||||||
Clip begin_clip = Element_BeginClip_read(this_ref);
|
Clip begin_clip = Element_BeginClip_read(this_ref);
|
||||||
AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox);
|
AnnoBeginClip anno_begin_clip;
|
||||||
// This is the absolute bbox, it's been transformed during encoding.
|
// This is the absolute bbox, it's been transformed during encoding.
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
anno_begin_clip.bbox = begin_clip.bbox;
|
||||||
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
if (is_stroke) {
|
||||||
Annotated_BeginClip_write(out_ref, anno_begin_clip);
|
vec2 lw = get_linewidth(st);
|
||||||
|
anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||||
|
} else {
|
||||||
|
anno_fill.linewidth = 0.0;
|
||||||
|
}
|
||||||
|
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||||
|
Annotated_BeginClip_write(conf.anno_alloc, out_ref, fill_mode, anno_begin_clip);
|
||||||
break;
|
break;
|
||||||
case Element_EndClip:
|
case Element_EndClip:
|
||||||
Clip end_clip = Element_EndClip_read(this_ref);
|
Clip end_clip = Element_EndClip_read(this_ref);
|
||||||
// This bbox is expected to be the same as the begin one.
|
// This bbox is expected to be the same as the begin one.
|
||||||
AnnoClip anno_end_clip = AnnoClip(end_clip.bbox);
|
AnnoEndClip anno_end_clip = AnnoEndClip(end_clip.bbox);
|
||||||
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||||
Annotated_EndClip_write(out_ref, anno_end_clip);
|
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
|
||||||
|
break;
|
||||||
|
case Element_Transform:
|
||||||
|
TransformSeg transform = TransformSeg(st.mat, st.translate);
|
||||||
|
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size);
|
||||||
|
TransformSeg_write(conf.trans_alloc, trans_ref, transform);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
|
// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
|
||||||
// in the per-tile command list to an image.
|
// in the per-tile command list to an image.
|
||||||
|
|
||||||
|
@ -6,44 +8,156 @@
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
#ifdef ENABLE_IMAGE_INDICES
|
||||||
|
#extension GL_EXT_nonuniform_qualifier : enable
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define CHUNK 8
|
#define CHUNK_X 2
|
||||||
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK)
|
#define CHUNK_Y 4
|
||||||
layout(local_size_x = TILE_WIDTH_PX, local_size_y = CHUNK_DY) in;
|
#define CHUNK CHUNK_X * CHUNK_Y
|
||||||
|
#define CHUNK_DX (TILE_WIDTH_PX / CHUNK_X)
|
||||||
|
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
||||||
|
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
||||||
|
|
||||||
// Same concern that this should be readonly as in kernel 3.
|
layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf {
|
||||||
layout(set = 0, binding = 0) buffer PtclBuf {
|
Config conf;
|
||||||
uint[] ptcl;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer TileBuf {
|
layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image;
|
||||||
uint[] tile;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
|
#ifdef ENABLE_IMAGE_INDICES
|
||||||
|
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[];
|
||||||
|
#else
|
||||||
|
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[1];
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
|
|
||||||
#define BLEND_STACK_SIZE 4
|
mediump vec3 tosRGB(mediump vec3 rgb) {
|
||||||
|
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
|
||||||
|
mediump vec3 below = vec3(12.92)*rgb;
|
||||||
|
mediump vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
|
||||||
|
return mix(below, above, cutoff);
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate coverage based on backdrop + coverage of each line segment
|
mediump vec3 fromsRGB(mediump vec3 srgb) {
|
||||||
float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
|
// Formula from EXT_sRGB.
|
||||||
// Probably better to store as float, but conversion is no doubt cheap.
|
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
|
||||||
float area[CHUNK];
|
mediump vec3 below = srgb/vec3(12.92);
|
||||||
for (uint k = 0; k < CHUNK; k++) area[k] = float(backdrop);
|
mediump vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
|
||||||
TileSegRef tile_seg_ref = TileSegRef(tile_ref);
|
return mix(below, above, cutoff);
|
||||||
|
}
|
||||||
|
|
||||||
|
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
|
||||||
|
// space.
|
||||||
|
mediump vec4 unpacksRGB(uint srgba) {
|
||||||
|
mediump vec4 color = unpackUnorm4x8(srgba).wzyx;
|
||||||
|
return vec4(fromsRGB(color.rgb), color.a);
|
||||||
|
}
|
||||||
|
|
||||||
|
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
|
||||||
|
uint packsRGB(mediump vec4 rgba) {
|
||||||
|
rgba = vec4(tosRGB(rgba.rgb), rgba.a);
|
||||||
|
return packUnorm4x8(rgba.wzyx);
|
||||||
|
}
|
||||||
|
|
||||||
|
uvec2 chunk_offset(uint i) {
|
||||||
|
return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
|
||||||
|
}
|
||||||
|
|
||||||
|
mediump vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
||||||
|
mediump vec4 rgba[CHUNK];
|
||||||
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
|
ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
|
||||||
|
mediump vec4 fg_rgba;
|
||||||
|
#ifdef ENABLE_IMAGE_INDICES
|
||||||
|
fg_rgba = imageLoad(images[cmd_img.index], uv);
|
||||||
|
#else
|
||||||
|
fg_rgba = imageLoad(images[0], uv);
|
||||||
|
#endif
|
||||||
|
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
|
||||||
|
rgba[i] = fg_rgba;
|
||||||
|
}
|
||||||
|
return rgba;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
||||||
|
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
||||||
|
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
||||||
|
|
||||||
|
// Read scrach space allocation, written first in the command list.
|
||||||
|
Alloc scratch_alloc = alloc_read(cmd_alloc, cmd_ref.offset);
|
||||||
|
cmd_ref.offset += Alloc_size;
|
||||||
|
|
||||||
|
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||||
|
vec2 xy = vec2(xy_uint);
|
||||||
|
mediump vec4 rgba[CHUNK];
|
||||||
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
|
rgba[i] = vec4(0.0);
|
||||||
|
// TODO: remove this debug image support when the actual image method is plumbed.
|
||||||
|
#ifdef DEBUG_IMAGES
|
||||||
|
#ifdef ENABLE_IMAGE_INDICES
|
||||||
|
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
||||||
|
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
||||||
|
rgb[i] = imageLoad(images[0], ivec2(xy_uint + chunk_offset(i))/4).rgb;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
mediump float area[CHUNK];
|
||||||
|
uint clip_depth = 0;
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
|
while (mem_ok) {
|
||||||
|
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
||||||
|
if (tag == Cmd_End) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (tag) {
|
||||||
|
case Cmd_Stroke:
|
||||||
|
// Calculate distance field from all the line segments in this tile.
|
||||||
|
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
|
||||||
|
mediump float df[CHUNK];
|
||||||
|
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||||
|
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||||
|
vec2 line_vec = seg.vector;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
|
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
||||||
vec2 start = seg.start - my_xy;
|
dpos += vec2(chunk_offset(k));
|
||||||
vec2 end = seg.end - my_xy;
|
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||||
|
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||||
|
}
|
||||||
|
tile_seg_ref = seg.next;
|
||||||
|
} while (tile_seg_ref.offset != 0);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
area[k] = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
||||||
|
}
|
||||||
|
cmd_ref.offset += 4 + CmdStroke_size;
|
||||||
|
break;
|
||||||
|
case Cmd_Fill:
|
||||||
|
CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
|
||||||
|
tile_seg_ref = TileSegRef(fill.tile_ref);
|
||||||
|
// Calculate coverage based on backdrop + coverage of each line segment
|
||||||
|
do {
|
||||||
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
vec2 my_xy = xy + vec2(chunk_offset(k));
|
||||||
|
vec2 start = seg.origin - my_xy;
|
||||||
|
vec2 end = start + seg.vector;
|
||||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||||
if (window.x != window.y) {
|
if (window.x != window.y) {
|
||||||
vec2 t = (window - start.y) / (end.y - start.y);
|
vec2 t = (window - start.y) / seg.vector.y;
|
||||||
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
||||||
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
||||||
float xmax = max(xs.x, xs.y);
|
float xmax = max(xs.x, xs.y);
|
||||||
|
@ -53,140 +167,82 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
|
||||||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||||
area[k] += a * (window.x - window.y);
|
area[k] += a * (window.x - window.y);
|
||||||
}
|
}
|
||||||
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
area[k] += sign(seg.vector.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||||
}
|
}
|
||||||
tile_seg_ref = seg.next;
|
tile_seg_ref = seg.next;
|
||||||
} while (tile_seg_ref.offset != 0);
|
} while (tile_seg_ref.offset != 0);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
area[k] = min(abs(area[k]), 1.0);
|
area[k] = min(abs(area[k]), 1.0);
|
||||||
}
|
}
|
||||||
return area;
|
cmd_ref.offset += 4 + CmdFill_size;
|
||||||
}
|
|
||||||
|
|
||||||
void main() {
|
|
||||||
uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x;
|
|
||||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
|
||||||
|
|
||||||
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
|
||||||
vec2 xy = vec2(xy_uint);
|
|
||||||
vec3 rgb[CHUNK];
|
|
||||||
float mask[CHUNK];
|
|
||||||
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
|
|
||||||
uint blend_sp = 0;
|
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
|
||||||
rgb[i] = vec3(0.5);
|
|
||||||
mask[i] = 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
uint tag = Cmd_tag(cmd_ref);
|
|
||||||
if (tag == Cmd_End) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
switch (tag) {
|
|
||||||
case Cmd_Circle:
|
|
||||||
CmdCircle circle = Cmd_Circle_read(cmd_ref);
|
|
||||||
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx;
|
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
|
||||||
float dy = float(i * CHUNK_DY);
|
|
||||||
float r = length(vec2(xy.x, xy.y + dy) + vec2(0.5, 0.5) - circle.center.xy);
|
|
||||||
float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
|
|
||||||
// TODO: sRGB
|
|
||||||
rgb[i] = mix(rgb[i], fg_rgba.rgb, mask[i] * alpha * fg_rgba.a);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Cmd_Stroke:
|
|
||||||
// Calculate distance field from all the line segments in this tile.
|
|
||||||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
|
||||||
float df[CHUNK];
|
|
||||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
|
||||||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
|
||||||
do {
|
|
||||||
TileSeg seg = TileSeg_read(tile_seg_ref);
|
|
||||||
vec2 line_vec = seg.end - seg.start;
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
|
||||||
dpos.y += float(k * CHUNK_DY);
|
|
||||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
|
||||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
|
||||||
}
|
|
||||||
tile_seg_ref = seg.next;
|
|
||||||
} while (tile_seg_ref.offset != 0);
|
|
||||||
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
|
||||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * alpha * fg_rgba.a);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Cmd_Fill:
|
|
||||||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
|
||||||
float area[CHUNK];
|
|
||||||
area = computeArea(xy, fill.backdrop, fill.tile_ref);
|
|
||||||
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Cmd_FillMask:
|
|
||||||
CmdFillMask fill_mask = Cmd_FillMask_read(cmd_ref);
|
|
||||||
area = computeArea(xy, fill_mask.backdrop, fill_mask.tile_ref);
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
mask[k] = mix(mask[k], fill_mask.mask, area[k]);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Cmd_FillMaskInv:
|
|
||||||
fill_mask = Cmd_FillMask_read(cmd_ref);
|
|
||||||
area = computeArea(xy, fill_mask.backdrop, fill_mask.tile_ref);
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
mask[k] = mix(mask[k], fill_mask.mask, 1.0 - area[k]);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Cmd_BeginClip:
|
|
||||||
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
|
|
||||||
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
|
|
||||||
}
|
|
||||||
blend_sp++;
|
|
||||||
break;
|
|
||||||
case Cmd_BeginSolidClip:
|
|
||||||
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
|
|
||||||
float solid_alpha = begin_solid_clip.alpha;
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
|
|
||||||
}
|
|
||||||
blend_sp++;
|
|
||||||
break;
|
|
||||||
case Cmd_EndClip:
|
|
||||||
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
|
|
||||||
blend_sp--;
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
vec4 rgba = unpackUnorm4x8(blend_stack[blend_sp][k]);
|
|
||||||
rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case Cmd_Solid:
|
case Cmd_Solid:
|
||||||
CmdSolid solid = Cmd_Solid_read(cmd_ref);
|
|
||||||
fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * fg_rgba.a);
|
area[k] = 1.0;
|
||||||
}
|
}
|
||||||
|
cmd_ref.offset += 4;
|
||||||
break;
|
break;
|
||||||
case Cmd_SolidMask:
|
case Cmd_Alpha:
|
||||||
CmdSolidMask solid_mask = Cmd_SolidMask_read(cmd_ref);
|
CmdAlpha alpha = Cmd_Alpha_read(cmd_alloc, cmd_ref);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
mask[k] = solid_mask.mask;
|
area[k] = alpha.alpha;
|
||||||
}
|
}
|
||||||
|
cmd_ref.offset += 4 + CmdAlpha_size;
|
||||||
|
break;
|
||||||
|
case Cmd_Color:
|
||||||
|
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
|
||||||
|
mediump vec4 fg = unpacksRGB(color.rgba_color);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
mediump vec4 fg_k = fg * area[k];
|
||||||
|
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||||
|
}
|
||||||
|
cmd_ref.offset += 4 + CmdColor_size;
|
||||||
|
break;
|
||||||
|
case Cmd_Image:
|
||||||
|
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
|
||||||
|
mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
mediump vec4 fg_k = img[k] * area[k];
|
||||||
|
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||||
|
}
|
||||||
|
cmd_ref.offset += 4 + CmdImage_size;
|
||||||
|
break;
|
||||||
|
case Cmd_BeginClip:
|
||||||
|
uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
|
||||||
|
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
uvec2 offset = chunk_offset(k);
|
||||||
|
uint srgb = packsRGB(vec4(rgba[k]));
|
||||||
|
mediump float alpha = clamp(abs(area[k]), 0.0, 1.0);
|
||||||
|
write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
|
||||||
|
write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
|
||||||
|
rgba[k] = vec4(0.0);
|
||||||
|
}
|
||||||
|
clip_depth++;
|
||||||
|
cmd_ref.offset += 4;
|
||||||
|
break;
|
||||||
|
case Cmd_EndClip:
|
||||||
|
clip_depth--;
|
||||||
|
base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
|
||||||
|
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
uvec2 offset = chunk_offset(k);
|
||||||
|
uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
||||||
|
uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
||||||
|
mediump vec4 bg = unpacksRGB(srgb);
|
||||||
|
mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
|
||||||
|
rgba[k] = bg * (1.0 - fg.a) + fg;
|
||||||
|
}
|
||||||
|
cmd_ref.offset += 4;
|
||||||
break;
|
break;
|
||||||
case Cmd_Jump:
|
case Cmd_Jump:
|
||||||
cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
|
cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
|
||||||
continue;
|
cmd_alloc.offset = cmd_ref.offset;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: sRGB
|
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(rgb[i], 1.0));
|
imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
BIN
piet-gpu/shader/kernel4_idx.spv
Normal file
BIN
piet-gpu/shader/kernel4_idx.spv
Normal file
Binary file not shown.
147
piet-gpu/shader/mem.h
Normal file
147
piet-gpu/shader/mem.h
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) buffer Memory {
|
||||||
|
// offset into memory of the next allocation, initialized by the user.
|
||||||
|
uint mem_offset;
|
||||||
|
// mem_error tracks the status of memory accesses, initialized to NO_ERROR
|
||||||
|
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
|
||||||
|
// If MEM_DEBUG is defined the following errors are reported:
|
||||||
|
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
|
||||||
|
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
|
||||||
|
uint mem_error;
|
||||||
|
uint[] memory;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Uncomment this line to add the size field to Alloc and enable memory checks.
|
||||||
|
// Note that the Config struct in setup.h grows size fields as well.
|
||||||
|
//#define MEM_DEBUG
|
||||||
|
|
||||||
|
#define NO_ERROR 0
|
||||||
|
#define ERR_MALLOC_FAILED 1
|
||||||
|
#define ERR_OUT_OF_BOUNDS 2
|
||||||
|
#define ERR_UNALIGNED_ACCESS 3
|
||||||
|
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
#define Alloc_size 16
|
||||||
|
#else
|
||||||
|
#define Alloc_size 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Alloc represents a memory allocation.
|
||||||
|
struct Alloc {
|
||||||
|
// offset in bytes into memory.
|
||||||
|
uint offset;
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
// size in bytes of the allocation.
|
||||||
|
uint size;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MallocResult {
|
||||||
|
Alloc alloc;
|
||||||
|
// failed is true if the allocation overflowed memory.
|
||||||
|
bool failed;
|
||||||
|
};
|
||||||
|
|
||||||
|
// new_alloc synthesizes an Alloc from an offset and size.
|
||||||
|
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
||||||
|
Alloc a;
|
||||||
|
a.offset = offset;
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
if (mem_ok) {
|
||||||
|
a.size = size;
|
||||||
|
} else {
|
||||||
|
a.size = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
// malloc allocates size bytes of memory.
|
||||||
|
MallocResult malloc(uint size) {
|
||||||
|
MallocResult r;
|
||||||
|
uint offset = atomicAdd(mem_offset, size);
|
||||||
|
r.failed = offset + size > memory.length() * 4;
|
||||||
|
r.alloc = new_alloc(offset, size, !r.failed);
|
||||||
|
if (r.failed) {
|
||||||
|
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
if ((size & 3) != 0) {
|
||||||
|
r.failed = true;
|
||||||
|
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// touch_mem checks whether access to the memory word at offset is valid.
|
||||||
|
// If MEM_DEBUG is defined, touch_mem returns false if offset is out of bounds.
|
||||||
|
// Offset is in words.
|
||||||
|
bool touch_mem(Alloc alloc, uint offset) {
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
if (offset < alloc.offset/4 || offset >= (alloc.offset + alloc.size)/4) {
|
||||||
|
atomicMax(mem_error, ERR_OUT_OF_BOUNDS);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write_mem writes val to memory at offset.
|
||||||
|
// Offset is in words.
|
||||||
|
void write_mem(Alloc alloc, uint offset, uint val) {
|
||||||
|
if (!touch_mem(alloc, offset)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memory[offset] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read_mem reads the value from memory at offset.
|
||||||
|
// Offset is in words.
|
||||||
|
uint read_mem(Alloc alloc, uint offset) {
|
||||||
|
if (!touch_mem(alloc, offset)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
uint v = memory[offset];
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
// slice_mem returns a sub-allocation inside another. Offset and size are in
|
||||||
|
// bytes, relative to a.offset.
|
||||||
|
Alloc slice_mem(Alloc a, uint offset, uint size) {
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
if ((offset & 3) != 0 || (size & 3) != 0) {
|
||||||
|
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
||||||
|
return Alloc(0, 0);
|
||||||
|
}
|
||||||
|
if (offset + size > a.size) {
|
||||||
|
// slice_mem is sometimes used for slices outside bounds,
|
||||||
|
// but never written.
|
||||||
|
return Alloc(0, 0);
|
||||||
|
}
|
||||||
|
return Alloc(a.offset + offset, size);
|
||||||
|
#else
|
||||||
|
return Alloc(a.offset + offset);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// alloc_write writes alloc to memory at offset bytes.
|
||||||
|
void alloc_write(Alloc a, uint offset, Alloc alloc) {
|
||||||
|
write_mem(a, offset >> 2, alloc.offset);
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
write_mem(a, (offset >> 2) + 1, alloc.size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// alloc_read reads an Alloc from memory at offset bytes.
|
||||||
|
Alloc alloc_read(Alloc a, uint offset) {
|
||||||
|
Alloc alloc;
|
||||||
|
alloc.offset = read_mem(a, offset >> 2);
|
||||||
|
#ifdef MEM_DEBUG
|
||||||
|
alloc.size = read_mem(a, (offset >> 2) + 1);
|
||||||
|
#endif
|
||||||
|
return alloc;
|
||||||
|
}
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Coarse rasterization of path segments.
|
// Coarse rasterization of path segments.
|
||||||
|
|
||||||
// Allocation and initialization of tiles for paths.
|
// Allocation and initialization of tiles for paths.
|
||||||
|
@ -5,6 +7,7 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_COARSE_WG 5
|
#define LG_COARSE_WG 5
|
||||||
|
@ -12,18 +15,8 @@
|
||||||
|
|
||||||
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
|
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) buffer PathSegBuf {
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
uint[] pathseg;
|
Config conf;
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer AllocBuf {
|
|
||||||
uint n_paths;
|
|
||||||
uint n_pathseg;
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer TileBuf {
|
|
||||||
uint[] tile;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "pathseg.h"
|
#include "pathseg.h"
|
||||||
|
@ -95,22 +88,27 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathSegRef ref = PathSegRef(element_ix * PathSeg_size);
|
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
||||||
|
|
||||||
uint tag = PathSeg_Nop;
|
PathSegTag tag = PathSegTag(PathSeg_Nop, 0);
|
||||||
if (element_ix < n_pathseg) {
|
if (element_ix < conf.n_pathseg) {
|
||||||
tag = PathSeg_tag(ref);
|
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
||||||
}
|
}
|
||||||
// Setup for coverage algorithm.
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
float a, b, c;
|
switch (tag.tag) {
|
||||||
// Bounding box of element in pixel coordinates.
|
case PathSeg_Cubic:
|
||||||
float xmin, xmax, ymin, ymax;
|
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
||||||
PathStrokeLine line;
|
|
||||||
float dx;
|
uint trans_ix = cubic.trans_ix;
|
||||||
switch (tag) {
|
if (trans_ix > 0) {
|
||||||
case PathSeg_FillCubic:
|
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
|
||||||
case PathSeg_StrokeCubic:
|
TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
|
||||||
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
|
cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
|
||||||
|
cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
|
||||||
|
cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
|
||||||
|
cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
|
||||||
|
}
|
||||||
|
|
||||||
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
||||||
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
||||||
// The number of quadratics.
|
// The number of quadratics.
|
||||||
|
@ -131,8 +129,10 @@ void main() {
|
||||||
}
|
}
|
||||||
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
|
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
|
||||||
|
|
||||||
|
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
||||||
uint path_ix = cubic.path_ix;
|
uint path_ix = cubic.path_ix;
|
||||||
Path path = Path_read(PathRef(path_ix * Path_size));
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||||
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
ivec4 bbox = ivec4(path.bbox);
|
ivec4 bbox = ivec4(path.bbox);
|
||||||
vec2 p0 = cubic.p0;
|
vec2 p0 = cubic.p0;
|
||||||
qp0 = cubic.p0;
|
qp0 = cubic.p0;
|
||||||
|
@ -162,22 +162,24 @@ void main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output line segment
|
// Output line segment
|
||||||
xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
|
||||||
xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
// Bounding box of element in pixel coordinates.
|
||||||
ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
||||||
ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
||||||
|
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
||||||
|
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
||||||
float dx = p1.x - p0.x;
|
float dx = p1.x - p0.x;
|
||||||
float dy = p1.y - p0.y;
|
float dy = p1.y - p0.y;
|
||||||
// Set up for per-scanline coverage formula, below.
|
// Set up for per-scanline coverage formula, below.
|
||||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||||
c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
||||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
float b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||||
a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||||
|
|
||||||
int x0 = int(floor((xmin) * SX));
|
int x0 = int(floor(xmin * SX));
|
||||||
int x1 = int(ceil((xmax) * SX));
|
int x1 = int(floor(xmax * SX) + 1);
|
||||||
int y0 = int(floor((ymin) * SY));
|
int y0 = int(floor(ymin * SY));
|
||||||
int y1 = int(ceil((ymax) * SY));
|
int y1 = int(floor(ymax * SY) + 1);
|
||||||
|
|
||||||
x0 = clamp(x0, bbox.x, bbox.z);
|
x0 = clamp(x0, bbox.x, bbox.z);
|
||||||
y0 = clamp(y0, bbox.y, bbox.w);
|
y0 = clamp(y0, bbox.y, bbox.w);
|
||||||
|
@ -189,48 +191,89 @@ void main() {
|
||||||
// TODO: can be tighter, use c to bound width
|
// TODO: can be tighter, use c to bound width
|
||||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||||
// Consider using subgroups to aggregate atomic add.
|
// Consider using subgroups to aggregate atomic add.
|
||||||
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
|
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
||||||
|
if (tile_alloc.failed || !mem_ok) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint tile_offset = tile_alloc.alloc.offset;
|
||||||
|
|
||||||
TileSeg tile_seg;
|
TileSeg tile_seg;
|
||||||
|
|
||||||
|
int xray = int(floor(p0.x*SX));
|
||||||
|
int last_xray = int(floor(p1.x*SX));
|
||||||
|
if (p0.y > p1.y) {
|
||||||
|
int tmp = xray;
|
||||||
|
xray = last_xray;
|
||||||
|
last_xray = tmp;
|
||||||
|
}
|
||||||
for (int y = y0; y < y1; y++) {
|
for (int y = y0; y < y1; y++) {
|
||||||
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
||||||
if (tag == PathSeg_FillCubic && min(p0.y, p1.y) <= tile_y0) {
|
int xbackdrop = max(xray + 1, bbox.x);
|
||||||
int xray = max(int(ceil(xc - 0.5 * b)), bbox.x);
|
if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) {
|
||||||
if (xray < bbox.z) {
|
|
||||||
int backdrop = p1.y < p0.y ? 1 : -1;
|
int backdrop = p1.y < p0.y ? 1 : -1;
|
||||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + xray));
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
|
||||||
uint tile_el = tile_ref.offset >> 2;
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
atomicAdd(tile[tile_el + 1], backdrop);
|
if (touch_mem(path_alloc, tile_el + 1)) {
|
||||||
|
atomicAdd(memory[tile_el + 1], backdrop);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int xx0 = clamp(int(floor(xc - c)), x0, x1);
|
|
||||||
int xx1 = clamp(int(ceil(xc + c)), x0, x1);
|
// next_xray is the xray for the next scanline; the line segment intersects
|
||||||
|
// all tiles between xray and next_xray.
|
||||||
|
int next_xray = last_xray;
|
||||||
|
if (y < y1 - 1) {
|
||||||
|
float tile_y1 = float((y + 1) * TILE_HEIGHT_PX);
|
||||||
|
float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
|
||||||
|
next_xray = int(floor(x_edge*SX));
|
||||||
|
}
|
||||||
|
|
||||||
|
int min_xray = min(xray, next_xray);
|
||||||
|
int max_xray = max(xray, next_xray);
|
||||||
|
int xx0 = min(int(floor(xc - c)), min_xray);
|
||||||
|
int xx1 = max(int(ceil(xc + c)), max_xray + 1);
|
||||||
|
xx0 = clamp(xx0, x0, x1);
|
||||||
|
xx1 = clamp(xx1, x0, x1);
|
||||||
|
|
||||||
for (int x = xx0; x < xx1; x++) {
|
for (int x = xx0; x < xx1; x++) {
|
||||||
float tile_x0 = float(x * TILE_WIDTH_PX);
|
float tile_x0 = float(x * TILE_WIDTH_PX);
|
||||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
|
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
|
||||||
uint tile_el = tile_ref.offset >> 2;
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
uint old = atomicExchange(tile[tile_el], tile_offset);
|
uint old = 0;
|
||||||
tile_seg.start = p0;
|
if (touch_mem(path_alloc, tile_el)) {
|
||||||
tile_seg.end = p1;
|
old = atomicExchange(memory[tile_el], tile_offset);
|
||||||
float y_edge = 0.0;
|
|
||||||
if (tag == PathSeg_FillCubic) {
|
|
||||||
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
|
|
||||||
if (min(p0.x, p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) {
|
|
||||||
if (p0.x > p1.x) {
|
|
||||||
tile_seg.end = vec2(tile_x0, y_edge);
|
|
||||||
} else {
|
|
||||||
tile_seg.start = vec2(tile_x0, y_edge);
|
|
||||||
}
|
}
|
||||||
|
tile_seg.origin = p0;
|
||||||
|
tile_seg.vector = p1 - p0;
|
||||||
|
float y_edge = 0.0;
|
||||||
|
if (!is_stroke) {
|
||||||
|
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
|
||||||
|
if (min(p0.x, p1.x) < tile_x0) {
|
||||||
|
vec2 p = vec2(tile_x0, y_edge);
|
||||||
|
if (p0.x > p1.x) {
|
||||||
|
tile_seg.vector = p - p0;
|
||||||
} else {
|
} else {
|
||||||
|
tile_seg.origin = p;
|
||||||
|
tile_seg.vector = p1 - p;
|
||||||
|
}
|
||||||
|
// kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
|
||||||
|
// Nudge zeroes towards the intended sign.
|
||||||
|
if (tile_seg.vector.x == 0) {
|
||||||
|
tile_seg.vector.x = sign(p1.x - p0.x)*1e-9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (x <= min_xray || max_xray < x) {
|
||||||
|
// Reject inconsistent intersections.
|
||||||
y_edge = 1e9;
|
y_edge = 1e9;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tile_seg.y_edge = y_edge;
|
tile_seg.y_edge = y_edge;
|
||||||
tile_seg.next.offset = old;
|
tile_seg.next.offset = old;
|
||||||
TileSeg_write(TileSegRef(tile_offset), tile_seg);
|
TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
|
||||||
tile_offset += TileSeg_size;
|
tile_offset += TileSeg_size;
|
||||||
}
|
}
|
||||||
xc += b;
|
xc += b;
|
||||||
base += stride;
|
base += stride;
|
||||||
|
xray = next_xray;
|
||||||
}
|
}
|
||||||
|
|
||||||
n_out += 1;
|
n_out += 1;
|
||||||
|
|
Binary file not shown.
|
@ -1,18 +1,8 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct PathFillLineRef {
|
struct PathCubicRef {
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PathStrokeLineRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PathFillCubicRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PathStrokeCubicRef {
|
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -20,234 +10,91 @@ struct PathSegRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PathFillLine {
|
struct PathCubic {
|
||||||
vec2 p0;
|
|
||||||
vec2 p1;
|
|
||||||
uint path_ix;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PathFillLine_size 20
|
|
||||||
|
|
||||||
PathFillLineRef PathFillLine_index(PathFillLineRef ref, uint index) {
|
|
||||||
return PathFillLineRef(ref.offset + index * PathFillLine_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct PathStrokeLine {
|
|
||||||
vec2 p0;
|
|
||||||
vec2 p1;
|
|
||||||
uint path_ix;
|
|
||||||
vec2 stroke;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PathStrokeLine_size 28
|
|
||||||
|
|
||||||
PathStrokeLineRef PathStrokeLine_index(PathStrokeLineRef ref, uint index) {
|
|
||||||
return PathStrokeLineRef(ref.offset + index * PathStrokeLine_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct PathFillCubic {
|
|
||||||
vec2 p0;
|
|
||||||
vec2 p1;
|
|
||||||
vec2 p2;
|
|
||||||
vec2 p3;
|
|
||||||
uint path_ix;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PathFillCubic_size 36
|
|
||||||
|
|
||||||
PathFillCubicRef PathFillCubic_index(PathFillCubicRef ref, uint index) {
|
|
||||||
return PathFillCubicRef(ref.offset + index * PathFillCubic_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct PathStrokeCubic {
|
|
||||||
vec2 p0;
|
vec2 p0;
|
||||||
vec2 p1;
|
vec2 p1;
|
||||||
vec2 p2;
|
vec2 p2;
|
||||||
vec2 p3;
|
vec2 p3;
|
||||||
uint path_ix;
|
uint path_ix;
|
||||||
|
uint trans_ix;
|
||||||
vec2 stroke;
|
vec2 stroke;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define PathStrokeCubic_size 44
|
#define PathCubic_size 48
|
||||||
|
|
||||||
PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) {
|
PathCubicRef PathCubic_index(PathCubicRef ref, uint index) {
|
||||||
return PathStrokeCubicRef(ref.offset + index * PathStrokeCubic_size);
|
return PathCubicRef(ref.offset + index * PathCubic_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PathSeg_Nop 0
|
#define PathSeg_Nop 0
|
||||||
#define PathSeg_FillLine 1
|
#define PathSeg_Cubic 1
|
||||||
#define PathSeg_StrokeLine 2
|
#define PathSeg_size 52
|
||||||
#define PathSeg_FillCubic 3
|
|
||||||
#define PathSeg_StrokeCubic 4
|
|
||||||
#define PathSeg_size 48
|
|
||||||
|
|
||||||
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
|
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
|
||||||
return PathSegRef(ref.offset + index * PathSeg_size);
|
return PathSegRef(ref.offset + index * PathSeg_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
PathFillLine PathFillLine_read(PathFillLineRef ref) {
|
struct PathSegTag {
|
||||||
uint ix = ref.offset >> 2;
|
uint tag;
|
||||||
uint raw0 = pathseg[ix + 0];
|
uint flags;
|
||||||
uint raw1 = pathseg[ix + 1];
|
};
|
||||||
uint raw2 = pathseg[ix + 2];
|
|
||||||
uint raw3 = pathseg[ix + 3];
|
|
||||||
uint raw4 = pathseg[ix + 4];
|
|
||||||
PathFillLine s;
|
|
||||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
s.path_ix = raw4;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathFillLine_write(PathFillLineRef ref, PathFillLine s) {
|
PathCubic PathCubic_read(Alloc a, PathCubicRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
pathseg[ix + 4] = s.path_ix;
|
uint raw4 = read_mem(a, ix + 4);
|
||||||
}
|
uint raw5 = read_mem(a, ix + 5);
|
||||||
|
uint raw6 = read_mem(a, ix + 6);
|
||||||
PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
|
uint raw7 = read_mem(a, ix + 7);
|
||||||
uint ix = ref.offset >> 2;
|
uint raw8 = read_mem(a, ix + 8);
|
||||||
uint raw0 = pathseg[ix + 0];
|
uint raw9 = read_mem(a, ix + 9);
|
||||||
uint raw1 = pathseg[ix + 1];
|
uint raw10 = read_mem(a, ix + 10);
|
||||||
uint raw2 = pathseg[ix + 2];
|
uint raw11 = read_mem(a, ix + 11);
|
||||||
uint raw3 = pathseg[ix + 3];
|
PathCubic s;
|
||||||
uint raw4 = pathseg[ix + 4];
|
|
||||||
uint raw5 = pathseg[ix + 5];
|
|
||||||
uint raw6 = pathseg[ix + 6];
|
|
||||||
PathStrokeLine s;
|
|
||||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
s.path_ix = raw4;
|
|
||||||
s.stroke = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathStrokeLine_write(PathStrokeLineRef ref, PathStrokeLine s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
|
||||||
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
|
||||||
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
|
||||||
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
|
||||||
pathseg[ix + 4] = s.path_ix;
|
|
||||||
pathseg[ix + 5] = floatBitsToUint(s.stroke.x);
|
|
||||||
pathseg[ix + 6] = floatBitsToUint(s.stroke.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
PathFillCubic PathFillCubic_read(PathFillCubicRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = pathseg[ix + 0];
|
|
||||||
uint raw1 = pathseg[ix + 1];
|
|
||||||
uint raw2 = pathseg[ix + 2];
|
|
||||||
uint raw3 = pathseg[ix + 3];
|
|
||||||
uint raw4 = pathseg[ix + 4];
|
|
||||||
uint raw5 = pathseg[ix + 5];
|
|
||||||
uint raw6 = pathseg[ix + 6];
|
|
||||||
uint raw7 = pathseg[ix + 7];
|
|
||||||
uint raw8 = pathseg[ix + 8];
|
|
||||||
PathFillCubic s;
|
|
||||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||||
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||||
s.path_ix = raw8;
|
s.path_ix = raw8;
|
||||||
|
s.trans_ix = raw9;
|
||||||
|
s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11));
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PathFillCubic_write(PathFillCubicRef ref, PathFillCubic s) {
|
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
|
||||||
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
|
||||||
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
|
||||||
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
|
||||||
pathseg[ix + 4] = floatBitsToUint(s.p2.x);
|
write_mem(a, ix + 4, floatBitsToUint(s.p2.x));
|
||||||
pathseg[ix + 5] = floatBitsToUint(s.p2.y);
|
write_mem(a, ix + 5, floatBitsToUint(s.p2.y));
|
||||||
pathseg[ix + 6] = floatBitsToUint(s.p3.x);
|
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
|
||||||
pathseg[ix + 7] = floatBitsToUint(s.p3.y);
|
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
|
||||||
pathseg[ix + 8] = s.path_ix;
|
write_mem(a, ix + 8, s.path_ix);
|
||||||
|
write_mem(a, ix + 9, s.trans_ix);
|
||||||
|
write_mem(a, ix + 10, floatBitsToUint(s.stroke.x));
|
||||||
|
write_mem(a, ix + 11, floatBitsToUint(s.stroke.y));
|
||||||
}
|
}
|
||||||
|
|
||||||
PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) {
|
PathSegTag PathSeg_tag(Alloc a, PathSegRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint tag_and_flags = read_mem(a, ref.offset >> 2);
|
||||||
uint raw0 = pathseg[ix + 0];
|
return PathSegTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||||
uint raw1 = pathseg[ix + 1];
|
|
||||||
uint raw2 = pathseg[ix + 2];
|
|
||||||
uint raw3 = pathseg[ix + 3];
|
|
||||||
uint raw4 = pathseg[ix + 4];
|
|
||||||
uint raw5 = pathseg[ix + 5];
|
|
||||||
uint raw6 = pathseg[ix + 6];
|
|
||||||
uint raw7 = pathseg[ix + 7];
|
|
||||||
uint raw8 = pathseg[ix + 8];
|
|
||||||
uint raw9 = pathseg[ix + 9];
|
|
||||||
uint raw10 = pathseg[ix + 10];
|
|
||||||
PathStrokeCubic s;
|
|
||||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
|
||||||
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
|
||||||
s.path_ix = raw8;
|
|
||||||
s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));
|
|
||||||
return s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PathStrokeCubic_write(PathStrokeCubicRef ref, PathStrokeCubic s) {
|
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
return PathCubic_read(a, PathCubicRef(ref.offset + 4));
|
||||||
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
|
||||||
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
|
||||||
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
|
||||||
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
|
||||||
pathseg[ix + 4] = floatBitsToUint(s.p2.x);
|
|
||||||
pathseg[ix + 5] = floatBitsToUint(s.p2.y);
|
|
||||||
pathseg[ix + 6] = floatBitsToUint(s.p3.x);
|
|
||||||
pathseg[ix + 7] = floatBitsToUint(s.p3.y);
|
|
||||||
pathseg[ix + 8] = s.path_ix;
|
|
||||||
pathseg[ix + 9] = floatBitsToUint(s.stroke.x);
|
|
||||||
pathseg[ix + 10] = floatBitsToUint(s.stroke.y);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint PathSeg_tag(PathSegRef ref) {
|
void PathSeg_Nop_write(Alloc a, PathSegRef ref) {
|
||||||
return pathseg[ref.offset >> 2];
|
write_mem(a, ref.offset >> 2, PathSeg_Nop);
|
||||||
}
|
}
|
||||||
|
|
||||||
PathFillLine PathSeg_FillLine_read(PathSegRef ref) {
|
void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s) {
|
||||||
return PathFillLine_read(PathFillLineRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, (flags << 16) | PathSeg_Cubic);
|
||||||
}
|
PathCubic_write(a, PathCubicRef(ref.offset + 4), s);
|
||||||
|
|
||||||
PathStrokeLine PathSeg_StrokeLine_read(PathSegRef ref) {
|
|
||||||
return PathStrokeLine_read(PathStrokeLineRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
PathFillCubic PathSeg_FillCubic_read(PathSegRef ref) {
|
|
||||||
return PathFillCubic_read(PathFillCubicRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
PathStrokeCubic PathSeg_StrokeCubic_read(PathSegRef ref) {
|
|
||||||
return PathStrokeCubic_read(PathStrokeCubicRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathSeg_Nop_write(PathSegRef ref) {
|
|
||||||
pathseg[ref.offset >> 2] = PathSeg_Nop;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathSeg_FillLine_write(PathSegRef ref, PathFillLine s) {
|
|
||||||
pathseg[ref.offset >> 2] = PathSeg_FillLine;
|
|
||||||
PathFillLine_write(PathFillLineRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathSeg_StrokeLine_write(PathSegRef ref, PathStrokeLine s) {
|
|
||||||
pathseg[ref.offset >> 2] = PathSeg_StrokeLine;
|
|
||||||
PathStrokeLine_write(PathStrokeLineRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathSeg_FillCubic_write(PathSegRef ref, PathFillCubic s) {
|
|
||||||
pathseg[ref.offset >> 2] = PathSeg_FillCubic;
|
|
||||||
PathFillCubic_write(PathFillCubicRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PathSeg_StrokeCubic_write(PathSegRef ref, PathStrokeCubic s) {
|
|
||||||
pathseg[ref.offset >> 2] = PathSeg_StrokeCubic;
|
|
||||||
PathStrokeCubic_write(PathStrokeCubicRef(ref.offset + 4), s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,7 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct CmdCircleRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CmdLineRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CmdStrokeRef {
|
struct CmdStrokeRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
@ -16,27 +10,15 @@ struct CmdFillRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CmdFillMaskRef {
|
struct CmdColorRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CmdBeginClipRef {
|
struct CmdImageRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CmdBeginSolidClipRef {
|
struct CmdAlphaRef {
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CmdEndClipRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CmdSolidRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CmdSolidMaskRef {
|
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -48,44 +30,12 @@ struct CmdRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SegmentRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SegChunkRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CmdCircle {
|
|
||||||
vec2 center;
|
|
||||||
float radius;
|
|
||||||
uint rgba_color;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define CmdCircle_size 16
|
|
||||||
|
|
||||||
CmdCircleRef CmdCircle_index(CmdCircleRef ref, uint index) {
|
|
||||||
return CmdCircleRef(ref.offset + index * CmdCircle_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CmdLine {
|
|
||||||
vec2 start;
|
|
||||||
vec2 end;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define CmdLine_size 16
|
|
||||||
|
|
||||||
CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
|
||||||
return CmdLineRef(ref.offset + index * CmdLine_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CmdStroke {
|
struct CmdStroke {
|
||||||
uint tile_ref;
|
uint tile_ref;
|
||||||
float half_width;
|
float half_width;
|
||||||
uint rgba_color;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CmdStroke_size 12
|
#define CmdStroke_size 8
|
||||||
|
|
||||||
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||||
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
|
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
|
||||||
|
@ -94,76 +44,43 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||||
struct CmdFill {
|
struct CmdFill {
|
||||||
uint tile_ref;
|
uint tile_ref;
|
||||||
int backdrop;
|
int backdrop;
|
||||||
uint rgba_color;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CmdFill_size 12
|
#define CmdFill_size 8
|
||||||
|
|
||||||
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
|
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
|
||||||
return CmdFillRef(ref.offset + index * CmdFill_size);
|
return CmdFillRef(ref.offset + index * CmdFill_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CmdFillMask {
|
struct CmdColor {
|
||||||
uint tile_ref;
|
|
||||||
int backdrop;
|
|
||||||
float mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define CmdFillMask_size 12
|
|
||||||
|
|
||||||
CmdFillMaskRef CmdFillMask_index(CmdFillMaskRef ref, uint index) {
|
|
||||||
return CmdFillMaskRef(ref.offset + index * CmdFillMask_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CmdBeginClip {
|
|
||||||
uint tile_ref;
|
|
||||||
int backdrop;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define CmdBeginClip_size 8
|
|
||||||
|
|
||||||
CmdBeginClipRef CmdBeginClip_index(CmdBeginClipRef ref, uint index) {
|
|
||||||
return CmdBeginClipRef(ref.offset + index * CmdBeginClip_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CmdBeginSolidClip {
|
|
||||||
float alpha;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define CmdBeginSolidClip_size 4
|
|
||||||
|
|
||||||
CmdBeginSolidClipRef CmdBeginSolidClip_index(CmdBeginSolidClipRef ref, uint index) {
|
|
||||||
return CmdBeginSolidClipRef(ref.offset + index * CmdBeginSolidClip_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CmdEndClip {
|
|
||||||
float alpha;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define CmdEndClip_size 4
|
|
||||||
|
|
||||||
CmdEndClipRef CmdEndClip_index(CmdEndClipRef ref, uint index) {
|
|
||||||
return CmdEndClipRef(ref.offset + index * CmdEndClip_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CmdSolid {
|
|
||||||
uint rgba_color;
|
uint rgba_color;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CmdSolid_size 4
|
#define CmdColor_size 4
|
||||||
|
|
||||||
CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
|
CmdColorRef CmdColor_index(CmdColorRef ref, uint index) {
|
||||||
return CmdSolidRef(ref.offset + index * CmdSolid_size);
|
return CmdColorRef(ref.offset + index * CmdColor_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CmdSolidMask {
|
struct CmdImage {
|
||||||
float mask;
|
uint index;
|
||||||
|
ivec2 offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CmdSolidMask_size 4
|
#define CmdImage_size 8
|
||||||
|
|
||||||
CmdSolidMaskRef CmdSolidMask_index(CmdSolidMaskRef ref, uint index) {
|
CmdImageRef CmdImage_index(CmdImageRef ref, uint index) {
|
||||||
return CmdSolidMaskRef(ref.offset + index * CmdSolidMask_size);
|
return CmdImageRef(ref.offset + index * CmdImage_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdAlpha {
|
||||||
|
float alpha;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdAlpha_size 4
|
||||||
|
|
||||||
|
CmdAlphaRef CmdAlpha_index(CmdAlphaRef ref, uint index) {
|
||||||
|
return CmdAlphaRef(ref.offset + index * CmdAlpha_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CmdJump {
|
struct CmdJump {
|
||||||
|
@ -177,382 +94,185 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define Cmd_End 0
|
#define Cmd_End 0
|
||||||
#define Cmd_Circle 1
|
#define Cmd_Fill 1
|
||||||
#define Cmd_Line 2
|
#define Cmd_Stroke 2
|
||||||
#define Cmd_Fill 3
|
#define Cmd_Solid 3
|
||||||
#define Cmd_FillMask 4
|
#define Cmd_Alpha 4
|
||||||
#define Cmd_FillMaskInv 5
|
#define Cmd_Color 5
|
||||||
#define Cmd_BeginClip 6
|
#define Cmd_Image 6
|
||||||
#define Cmd_BeginSolidClip 7
|
#define Cmd_BeginClip 7
|
||||||
#define Cmd_EndClip 8
|
#define Cmd_EndClip 8
|
||||||
#define Cmd_Stroke 9
|
#define Cmd_Jump 9
|
||||||
#define Cmd_Solid 10
|
#define Cmd_size 12
|
||||||
#define Cmd_SolidMask 11
|
|
||||||
#define Cmd_Jump 12
|
|
||||||
#define Cmd_size 20
|
|
||||||
|
|
||||||
CmdRef Cmd_index(CmdRef ref, uint index) {
|
CmdRef Cmd_index(CmdRef ref, uint index) {
|
||||||
return CmdRef(ref.offset + index * Cmd_size);
|
return CmdRef(ref.offset + index * Cmd_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Segment {
|
struct CmdTag {
|
||||||
vec2 start;
|
uint tag;
|
||||||
vec2 end;
|
uint flags;
|
||||||
float y_edge;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define Segment_size 20
|
CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) {
|
||||||
|
|
||||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
|
||||||
return SegmentRef(ref.offset + index * Segment_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SegChunk {
|
|
||||||
uint n;
|
|
||||||
SegChunkRef next;
|
|
||||||
SegmentRef segs;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define SegChunk_size 12
|
|
||||||
|
|
||||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
|
||||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = ptcl[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = ptcl[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
uint raw3 = ptcl[ix + 3];
|
|
||||||
CmdCircle s;
|
|
||||||
s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.radius = uintBitsToFloat(raw2);
|
|
||||||
s.rgba_color = raw3;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = floatBitsToUint(s.center.x);
|
|
||||||
ptcl[ix + 1] = floatBitsToUint(s.center.y);
|
|
||||||
ptcl[ix + 2] = floatBitsToUint(s.radius);
|
|
||||||
ptcl[ix + 3] = s.rgba_color;
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdLine CmdLine_read(CmdLineRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
uint raw1 = ptcl[ix + 1];
|
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
uint raw3 = ptcl[ix + 3];
|
|
||||||
CmdLine s;
|
|
||||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CmdLine_write(CmdLineRef ref, CmdLine s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
|
||||||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
|
||||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
|
||||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
uint raw1 = ptcl[ix + 1];
|
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
CmdStroke s;
|
CmdStroke s;
|
||||||
s.tile_ref = raw0;
|
s.tile_ref = raw0;
|
||||||
s.half_width = uintBitsToFloat(raw1);
|
s.half_width = uintBitsToFloat(raw1);
|
||||||
s.rgba_color = raw2;
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
void CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = s.tile_ref;
|
write_mem(a, ix + 0, s.tile_ref);
|
||||||
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
write_mem(a, ix + 1, floatBitsToUint(s.half_width));
|
||||||
ptcl[ix + 2] = s.rgba_color;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdFill CmdFill_read(CmdFillRef ref) {
|
CmdFill CmdFill_read(Alloc a, CmdFillRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = ptcl[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = ptcl[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
CmdFill s;
|
CmdFill s;
|
||||||
s.tile_ref = raw0;
|
s.tile_ref = raw0;
|
||||||
s.backdrop = int(raw1);
|
s.backdrop = int(raw1);
|
||||||
s.rgba_color = raw2;
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = s.tile_ref;
|
write_mem(a, ix + 0, s.tile_ref);
|
||||||
ptcl[ix + 1] = uint(s.backdrop);
|
write_mem(a, ix + 1, uint(s.backdrop));
|
||||||
ptcl[ix + 2] = s.rgba_color;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdFillMask CmdFillMask_read(CmdFillMaskRef ref) {
|
CmdColor CmdColor_read(Alloc a, CmdColorRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = ptcl[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = ptcl[ix + 1];
|
CmdColor s;
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
CmdFillMask s;
|
|
||||||
s.tile_ref = raw0;
|
|
||||||
s.backdrop = int(raw1);
|
|
||||||
s.mask = uintBitsToFloat(raw2);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CmdFillMask_write(CmdFillMaskRef ref, CmdFillMask s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = s.tile_ref;
|
|
||||||
ptcl[ix + 1] = uint(s.backdrop);
|
|
||||||
ptcl[ix + 2] = floatBitsToUint(s.mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdBeginClip CmdBeginClip_read(CmdBeginClipRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
uint raw1 = ptcl[ix + 1];
|
|
||||||
CmdBeginClip s;
|
|
||||||
s.tile_ref = raw0;
|
|
||||||
s.backdrop = int(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CmdBeginClip_write(CmdBeginClipRef ref, CmdBeginClip s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = s.tile_ref;
|
|
||||||
ptcl[ix + 1] = uint(s.backdrop);
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdBeginSolidClip CmdBeginSolidClip_read(CmdBeginSolidClipRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
CmdBeginSolidClip s;
|
|
||||||
s.alpha = uintBitsToFloat(raw0);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CmdBeginSolidClip_write(CmdBeginSolidClipRef ref, CmdBeginSolidClip s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = floatBitsToUint(s.alpha);
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdEndClip CmdEndClip_read(CmdEndClipRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
CmdEndClip s;
|
|
||||||
s.alpha = uintBitsToFloat(raw0);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CmdEndClip_write(CmdEndClipRef ref, CmdEndClip s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = floatBitsToUint(s.alpha);
|
|
||||||
}
|
|
||||||
|
|
||||||
CmdSolid CmdSolid_read(CmdSolidRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
CmdSolid s;
|
|
||||||
s.rgba_color = raw0;
|
s.rgba_color = raw0;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
|
void CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = s.rgba_color;
|
write_mem(a, ix + 0, s.rgba_color);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdSolidMask CmdSolidMask_read(CmdSolidMaskRef ref) {
|
CmdImage CmdImage_read(Alloc a, CmdImageRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = ptcl[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
CmdSolidMask s;
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
s.mask = uintBitsToFloat(raw0);
|
CmdImage s;
|
||||||
|
s.index = raw0;
|
||||||
|
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CmdSolidMask_write(CmdSolidMaskRef ref, CmdSolidMask s) {
|
void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = floatBitsToUint(s.mask);
|
write_mem(a, ix + 0, s.index);
|
||||||
|
write_mem(a, ix + 1, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdJump CmdJump_read(CmdJumpRef ref) {
|
CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = ptcl[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
|
CmdAlpha s;
|
||||||
|
s.alpha = uintBitsToFloat(raw0);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdAlpha_write(Alloc a, CmdAlphaRef ref, CmdAlpha s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
write_mem(a, ix + 0, floatBitsToUint(s.alpha));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
CmdJump s;
|
CmdJump s;
|
||||||
s.new_ref = raw0;
|
s.new_ref = raw0;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CmdJump_write(CmdJumpRef ref, CmdJump s) {
|
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = s.new_ref;
|
write_mem(a, ix + 0, s.new_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint Cmd_tag(CmdRef ref) {
|
CmdTag Cmd_tag(Alloc a, CmdRef ref) {
|
||||||
return ptcl[ref.offset >> 2];
|
uint tag_and_flags = read_mem(a, ref.offset >> 2);
|
||||||
|
return CmdTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdCircle Cmd_Circle_read(CmdRef ref) {
|
CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) {
|
||||||
return CmdCircle_read(CmdCircleRef(ref.offset + 4));
|
return CmdFill_read(a, CmdFillRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdLine Cmd_Line_read(CmdRef ref) {
|
CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) {
|
||||||
return CmdLine_read(CmdLineRef(ref.offset + 4));
|
return CmdStroke_read(a, CmdStrokeRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdFill Cmd_Fill_read(CmdRef ref) {
|
CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) {
|
||||||
return CmdFill_read(CmdFillRef(ref.offset + 4));
|
return CmdAlpha_read(a, CmdAlphaRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdFillMask Cmd_FillMask_read(CmdRef ref) {
|
CmdColor Cmd_Color_read(Alloc a, CmdRef ref) {
|
||||||
return CmdFillMask_read(CmdFillMaskRef(ref.offset + 4));
|
return CmdColor_read(a, CmdColorRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdFillMask Cmd_FillMaskInv_read(CmdRef ref) {
|
CmdImage Cmd_Image_read(Alloc a, CmdRef ref) {
|
||||||
return CmdFillMask_read(CmdFillMaskRef(ref.offset + 4));
|
return CmdImage_read(a, CmdImageRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdBeginClip Cmd_BeginClip_read(CmdRef ref) {
|
CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) {
|
||||||
return CmdBeginClip_read(CmdBeginClipRef(ref.offset + 4));
|
return CmdJump_read(a, CmdJumpRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdBeginSolidClip Cmd_BeginSolidClip_read(CmdRef ref) {
|
void Cmd_End_write(Alloc a, CmdRef ref) {
|
||||||
return CmdBeginSolidClip_read(CmdBeginSolidClipRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Cmd_End);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdEndClip Cmd_EndClip_read(CmdRef ref) {
|
void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) {
|
||||||
return CmdEndClip_read(CmdEndClipRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Cmd_Fill);
|
||||||
|
CmdFill_write(a, CmdFillRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdStroke Cmd_Stroke_read(CmdRef ref) {
|
void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) {
|
||||||
return CmdStroke_read(CmdStrokeRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Cmd_Stroke);
|
||||||
|
CmdStroke_write(a, CmdStrokeRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdSolid Cmd_Solid_read(CmdRef ref) {
|
void Cmd_Solid_write(Alloc a, CmdRef ref) {
|
||||||
return CmdSolid_read(CmdSolidRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Cmd_Solid);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdSolidMask Cmd_SolidMask_read(CmdRef ref) {
|
void Cmd_Alpha_write(Alloc a, CmdRef ref, CmdAlpha s) {
|
||||||
return CmdSolidMask_read(CmdSolidMaskRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Cmd_Alpha);
|
||||||
|
CmdAlpha_write(a, CmdAlphaRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdJump Cmd_Jump_read(CmdRef ref) {
|
void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) {
|
||||||
return CmdJump_read(CmdJumpRef(ref.offset + 4));
|
write_mem(a, ref.offset >> 2, Cmd_Color);
|
||||||
|
CmdColor_write(a, CmdColorRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cmd_End_write(CmdRef ref) {
|
void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) {
|
||||||
ptcl[ref.offset >> 2] = Cmd_End;
|
write_mem(a, ref.offset >> 2, Cmd_Image);
|
||||||
|
CmdImage_write(a, CmdImageRef(ref.offset + 4), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
|
void Cmd_BeginClip_write(Alloc a, CmdRef ref) {
|
||||||
ptcl[ref.offset >> 2] = Cmd_Circle;
|
write_mem(a, ref.offset >> 2, Cmd_BeginClip);
|
||||||
CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cmd_Line_write(CmdRef ref, CmdLine s) {
|
void Cmd_EndClip_write(Alloc a, CmdRef ref) {
|
||||||
ptcl[ref.offset >> 2] = Cmd_Line;
|
write_mem(a, ref.offset >> 2, Cmd_EndClip);
|
||||||
CmdLine_write(CmdLineRef(ref.offset + 4), s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cmd_Fill_write(CmdRef ref, CmdFill s) {
|
void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) {
|
||||||
ptcl[ref.offset >> 2] = Cmd_Fill;
|
write_mem(a, ref.offset >> 2, Cmd_Jump);
|
||||||
CmdFill_write(CmdFillRef(ref.offset + 4), s);
|
CmdJump_write(a, CmdJumpRef(ref.offset + 4), s);
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_FillMask_write(CmdRef ref, CmdFillMask s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_FillMask;
|
|
||||||
CmdFillMask_write(CmdFillMaskRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_FillMaskInv_write(CmdRef ref, CmdFillMask s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_FillMaskInv;
|
|
||||||
CmdFillMask_write(CmdFillMaskRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_BeginClip_write(CmdRef ref, CmdBeginClip s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_BeginClip;
|
|
||||||
CmdBeginClip_write(CmdBeginClipRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_BeginSolidClip_write(CmdRef ref, CmdBeginSolidClip s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_BeginSolidClip;
|
|
||||||
CmdBeginSolidClip_write(CmdBeginSolidClipRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_EndClip_write(CmdRef ref, CmdEndClip s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_EndClip;
|
|
||||||
CmdEndClip_write(CmdEndClipRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_Stroke;
|
|
||||||
CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_Solid;
|
|
||||||
CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_SolidMask_write(CmdRef ref, CmdSolidMask s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_SolidMask;
|
|
||||||
CmdSolidMask_write(CmdSolidMaskRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_Jump_write(CmdRef ref, CmdJump s) {
|
|
||||||
ptcl[ref.offset >> 2] = Cmd_Jump;
|
|
||||||
CmdJump_write(CmdJumpRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
Segment Segment_read(SegmentRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
uint raw1 = ptcl[ix + 1];
|
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
uint raw3 = ptcl[ix + 3];
|
|
||||||
uint raw4 = ptcl[ix + 4];
|
|
||||||
Segment s;
|
|
||||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
s.y_edge = uintBitsToFloat(raw4);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Segment_write(SegmentRef ref, Segment s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
|
||||||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
|
||||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
|
||||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
|
||||||
ptcl[ix + 4] = floatBitsToUint(s.y_edge);
|
|
||||||
}
|
|
||||||
|
|
||||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = ptcl[ix + 0];
|
|
||||||
uint raw1 = ptcl[ix + 1];
|
|
||||||
uint raw2 = ptcl[ix + 2];
|
|
||||||
SegChunk s;
|
|
||||||
s.n = raw0;
|
|
||||||
s.next = SegChunkRef(raw1);
|
|
||||||
s.segs = SegmentRef(raw2);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
ptcl[ix + 0] = s.n;
|
|
||||||
ptcl[ix + 1] = s.next.offset;
|
|
||||||
ptcl[ix + 2] = s.segs.offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct LineSegRef {
|
struct LineSegRef {
|
||||||
|
@ -12,15 +14,11 @@ struct CubicSegRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FillRef {
|
struct FillColorRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FillMaskRef {
|
struct FillImageRef {
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct StrokeRef {
|
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -36,6 +34,10 @@ struct ClipRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SetFillModeRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
struct ElementRef {
|
struct ElementRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
@ -76,34 +78,25 @@ CubicSegRef CubicSeg_index(CubicSegRef ref, uint index) {
|
||||||
return CubicSegRef(ref.offset + index * CubicSeg_size);
|
return CubicSegRef(ref.offset + index * CubicSeg_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Fill {
|
struct FillColor {
|
||||||
uint rgba_color;
|
uint rgba_color;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define Fill_size 4
|
#define FillColor_size 4
|
||||||
|
|
||||||
FillRef Fill_index(FillRef ref, uint index) {
|
FillColorRef FillColor_index(FillColorRef ref, uint index) {
|
||||||
return FillRef(ref.offset + index * Fill_size);
|
return FillColorRef(ref.offset + index * FillColor_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FillMask {
|
struct FillImage {
|
||||||
float mask;
|
uint index;
|
||||||
|
ivec2 offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FillMask_size 4
|
#define FillImage_size 8
|
||||||
|
|
||||||
FillMaskRef FillMask_index(FillMaskRef ref, uint index) {
|
FillImageRef FillImage_index(FillImageRef ref, uint index) {
|
||||||
return FillMaskRef(ref.offset + index * FillMask_size);
|
return FillImageRef(ref.offset + index * FillImage_size);
|
||||||
}
|
|
||||||
|
|
||||||
struct Stroke {
|
|
||||||
uint rgba_color;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define Stroke_size 4
|
|
||||||
|
|
||||||
StrokeRef Stroke_index(StrokeRef ref, uint index) {
|
|
||||||
return StrokeRef(ref.offset + index * Stroke_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct SetLineWidth {
|
struct SetLineWidth {
|
||||||
|
@ -137,27 +130,38 @@ ClipRef Clip_index(ClipRef ref, uint index) {
|
||||||
return ClipRef(ref.offset + index * Clip_size);
|
return ClipRef(ref.offset + index * Clip_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct SetFillMode {
|
||||||
|
uint fill_mode;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SetFillMode_size 4
|
||||||
|
|
||||||
|
SetFillModeRef SetFillMode_index(SetFillModeRef ref, uint index) {
|
||||||
|
return SetFillModeRef(ref.offset + index * SetFillMode_size);
|
||||||
|
}
|
||||||
|
|
||||||
#define Element_Nop 0
|
#define Element_Nop 0
|
||||||
#define Element_StrokeLine 1
|
#define Element_Line 1
|
||||||
#define Element_FillLine 2
|
#define Element_Quad 2
|
||||||
#define Element_StrokeQuad 3
|
#define Element_Cubic 3
|
||||||
#define Element_FillQuad 4
|
#define Element_FillColor 4
|
||||||
#define Element_StrokeCubic 5
|
#define Element_SetLineWidth 5
|
||||||
#define Element_FillCubic 6
|
#define Element_Transform 6
|
||||||
#define Element_Stroke 7
|
#define Element_BeginClip 7
|
||||||
#define Element_Fill 8
|
#define Element_EndClip 8
|
||||||
#define Element_SetLineWidth 9
|
#define Element_FillImage 9
|
||||||
#define Element_Transform 10
|
#define Element_SetFillMode 10
|
||||||
#define Element_FillMask 11
|
|
||||||
#define Element_FillMaskInv 12
|
|
||||||
#define Element_BeginClip 13
|
|
||||||
#define Element_EndClip 14
|
|
||||||
#define Element_size 36
|
#define Element_size 36
|
||||||
|
|
||||||
ElementRef Element_index(ElementRef ref, uint index) {
|
ElementRef Element_index(ElementRef ref, uint index) {
|
||||||
return ElementRef(ref.offset + index * Element_size);
|
return ElementRef(ref.offset + index * Element_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ElementTag {
|
||||||
|
uint tag;
|
||||||
|
uint flags;
|
||||||
|
};
|
||||||
|
|
||||||
LineSeg LineSeg_read(LineSegRef ref) {
|
LineSeg LineSeg_read(LineSegRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = scene[ix + 0];
|
uint raw0 = scene[ix + 0];
|
||||||
|
@ -203,27 +207,21 @@ CubicSeg CubicSeg_read(CubicSegRef ref) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
Fill Fill_read(FillRef ref) {
|
FillColor FillColor_read(FillColorRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = scene[ix + 0];
|
uint raw0 = scene[ix + 0];
|
||||||
Fill s;
|
FillColor s;
|
||||||
s.rgba_color = raw0;
|
s.rgba_color = raw0;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
FillMask FillMask_read(FillMaskRef ref) {
|
FillImage FillImage_read(FillImageRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = scene[ix + 0];
|
uint raw0 = scene[ix + 0];
|
||||||
FillMask s;
|
uint raw1 = scene[ix + 1];
|
||||||
s.mask = uintBitsToFloat(raw0);
|
FillImage s;
|
||||||
return s;
|
s.index = raw0;
|
||||||
}
|
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
|
||||||
|
|
||||||
Stroke Stroke_read(StrokeRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = scene[ix + 0];
|
|
||||||
Stroke s;
|
|
||||||
s.rgba_color = raw0;
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,40 +258,33 @@ Clip Clip_read(ClipRef ref) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint Element_tag(ElementRef ref) {
|
SetFillMode SetFillMode_read(SetFillModeRef ref) {
|
||||||
return scene[ref.offset >> 2];
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = scene[ix + 0];
|
||||||
|
SetFillMode s;
|
||||||
|
s.fill_mode = raw0;
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
LineSeg Element_StrokeLine_read(ElementRef ref) {
|
ElementTag Element_tag(ElementRef ref) {
|
||||||
|
uint tag_and_flags = scene[ref.offset >> 2];
|
||||||
|
return ElementTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
LineSeg Element_Line_read(ElementRef ref) {
|
||||||
return LineSeg_read(LineSegRef(ref.offset + 4));
|
return LineSeg_read(LineSegRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
LineSeg Element_FillLine_read(ElementRef ref) {
|
QuadSeg Element_Quad_read(ElementRef ref) {
|
||||||
return LineSeg_read(LineSegRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
QuadSeg Element_StrokeQuad_read(ElementRef ref) {
|
|
||||||
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
QuadSeg Element_FillQuad_read(ElementRef ref) {
|
CubicSeg Element_Cubic_read(ElementRef ref) {
|
||||||
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
CubicSeg Element_StrokeCubic_read(ElementRef ref) {
|
|
||||||
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CubicSeg Element_FillCubic_read(ElementRef ref) {
|
FillColor Element_FillColor_read(ElementRef ref) {
|
||||||
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
return FillColor_read(FillColorRef(ref.offset + 4));
|
||||||
}
|
|
||||||
|
|
||||||
Stroke Element_Stroke_read(ElementRef ref) {
|
|
||||||
return Stroke_read(StrokeRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
Fill Element_Fill_read(ElementRef ref) {
|
|
||||||
return Fill_read(FillRef(ref.offset + 4));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SetLineWidth Element_SetLineWidth_read(ElementRef ref) {
|
SetLineWidth Element_SetLineWidth_read(ElementRef ref) {
|
||||||
|
@ -304,14 +295,6 @@ Transform Element_Transform_read(ElementRef ref) {
|
||||||
return Transform_read(TransformRef(ref.offset + 4));
|
return Transform_read(TransformRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
FillMask Element_FillMask_read(ElementRef ref) {
|
|
||||||
return FillMask_read(FillMaskRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
FillMask Element_FillMaskInv_read(ElementRef ref) {
|
|
||||||
return FillMask_read(FillMaskRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
Clip Element_BeginClip_read(ElementRef ref) {
|
Clip Element_BeginClip_read(ElementRef ref) {
|
||||||
return Clip_read(ClipRef(ref.offset + 4));
|
return Clip_read(ClipRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
@ -320,3 +303,11 @@ Clip Element_EndClip_read(ElementRef ref) {
|
||||||
return Clip_read(ClipRef(ref.offset + 4));
|
return Clip_read(ClipRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FillImage Element_FillImage_read(ElementRef ref) {
|
||||||
|
return FillImage_read(FillImageRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
SetFillMode Element_SetFillMode_read(ElementRef ref) {
|
||||||
|
return SetFillMode_read(SetFillModeRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Various constants for the sizes of groups and tiles.
|
// Various constants for the sizes of groups and tiles.
|
||||||
|
|
||||||
// Much of this will be made dynamic in various ways, but for now it's easiest
|
// Much of this will be made dynamic in various ways, but for now it's easiest
|
||||||
|
@ -8,15 +10,14 @@
|
||||||
#define LG_WG_FACTOR 1
|
#define LG_WG_FACTOR 1
|
||||||
#define WG_FACTOR (1<<LG_WG_FACTOR)
|
#define WG_FACTOR (1<<LG_WG_FACTOR)
|
||||||
|
|
||||||
// TODO: compute all these
|
|
||||||
|
|
||||||
#define WIDTH_IN_TILES 128
|
|
||||||
#define HEIGHT_IN_TILES 96
|
|
||||||
#define TILE_WIDTH_PX 16
|
#define TILE_WIDTH_PX 16
|
||||||
#define TILE_HEIGHT_PX 16
|
#define TILE_HEIGHT_PX 16
|
||||||
|
|
||||||
#define PTCL_INITIAL_ALLOC 1024
|
#define PTCL_INITIAL_ALLOC 1024
|
||||||
|
|
||||||
|
// This is now set in the ninja file during compilation
|
||||||
|
//#define ENABLE_IMAGE_INDICES
|
||||||
|
|
||||||
// These should probably be renamed and/or reworked. In the binning
|
// These should probably be renamed and/or reworked. In the binning
|
||||||
// kernel, they represent the number of bins. Also, the workgroup size
|
// kernel, they represent the number of bins. Also, the workgroup size
|
||||||
// of that kernel is equal to the number of bins, but should probably
|
// of that kernel is equal to the number of bins, but should probably
|
||||||
|
@ -26,3 +27,28 @@
|
||||||
#define N_TILE (N_TILE_X * N_TILE_Y)
|
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||||
#define LG_N_TILE (7 + LG_WG_FACTOR)
|
#define LG_N_TILE (7 + LG_WG_FACTOR)
|
||||||
#define N_SLICE (N_TILE / 32)
|
#define N_SLICE (N_TILE / 32)
|
||||||
|
|
||||||
|
struct Config {
|
||||||
|
uint n_elements; // paths
|
||||||
|
uint n_pathseg;
|
||||||
|
uint width_in_tiles;
|
||||||
|
uint height_in_tiles;
|
||||||
|
Alloc tile_alloc;
|
||||||
|
Alloc bin_alloc;
|
||||||
|
Alloc ptcl_alloc;
|
||||||
|
Alloc pathseg_alloc;
|
||||||
|
Alloc anno_alloc;
|
||||||
|
Alloc trans_alloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fill modes.
|
||||||
|
#define MODE_NONZERO 0
|
||||||
|
#define MODE_STROKE 1
|
||||||
|
|
||||||
|
// Size of kernel4 clip state, in words.
|
||||||
|
#define CLIP_STATE_SIZE 2
|
||||||
|
|
||||||
|
// fill_mode_from_flags extracts the fill mode from tag flags.
|
||||||
|
uint fill_mode_from_flags(uint flags) {
|
||||||
|
return flags & 0x1;
|
||||||
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct StateRef {
|
struct StateRef {
|
||||||
|
@ -12,9 +14,10 @@ struct State {
|
||||||
uint flags;
|
uint flags;
|
||||||
uint path_count;
|
uint path_count;
|
||||||
uint pathseg_count;
|
uint pathseg_count;
|
||||||
|
uint trans_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define State_size 56
|
#define State_size 60
|
||||||
|
|
||||||
StateRef State_index(StateRef ref, uint index) {
|
StateRef State_index(StateRef ref, uint index) {
|
||||||
return StateRef(ref.offset + index * State_size);
|
return StateRef(ref.offset + index * State_size);
|
||||||
|
@ -36,6 +39,7 @@ State State_read(StateRef ref) {
|
||||||
uint raw11 = state[ix + 11];
|
uint raw11 = state[ix + 11];
|
||||||
uint raw12 = state[ix + 12];
|
uint raw12 = state[ix + 12];
|
||||||
uint raw13 = state[ix + 13];
|
uint raw13 = state[ix + 13];
|
||||||
|
uint raw14 = state[ix + 14];
|
||||||
State s;
|
State s;
|
||||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||||
|
@ -44,6 +48,7 @@ State State_read(StateRef ref) {
|
||||||
s.flags = raw11;
|
s.flags = raw11;
|
||||||
s.path_count = raw12;
|
s.path_count = raw12;
|
||||||
s.pathseg_count = raw13;
|
s.pathseg_count = raw13;
|
||||||
|
s.trans_count = raw14;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,5 +68,6 @@ void State_write(StateRef ref, State s) {
|
||||||
state[ix + 11] = s.flags;
|
state[ix + 11] = s.flags;
|
||||||
state[ix + 12] = s.path_count;
|
state[ix + 12] = s.path_count;
|
||||||
state[ix + 13] = s.pathseg_count;
|
state[ix + 13] = s.pathseg_count;
|
||||||
|
state[ix + 14] = s.trans_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Code auto-generated by piet-gpu-derive
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
struct PathRef {
|
struct PathRef {
|
||||||
|
@ -12,6 +14,10 @@ struct TileSegRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TransformSegRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
struct Path {
|
struct Path {
|
||||||
uvec4 bbox;
|
uvec4 bbox;
|
||||||
TileRef tiles;
|
TileRef tiles;
|
||||||
|
@ -35,8 +41,8 @@ TileRef Tile_index(TileRef ref, uint index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TileSeg {
|
struct TileSeg {
|
||||||
vec2 start;
|
vec2 origin;
|
||||||
vec2 end;
|
vec2 vector;
|
||||||
float y_edge;
|
float y_edge;
|
||||||
TileSegRef next;
|
TileSegRef next;
|
||||||
};
|
};
|
||||||
|
@ -47,63 +53,98 @@ TileSegRef TileSeg_index(TileSegRef ref, uint index) {
|
||||||
return TileSegRef(ref.offset + index * TileSeg_size);
|
return TileSegRef(ref.offset + index * TileSeg_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
Path Path_read(PathRef ref) {
|
struct TransformSeg {
|
||||||
|
vec4 mat;
|
||||||
|
vec2 translate;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define TransformSeg_size 24
|
||||||
|
|
||||||
|
TransformSegRef TransformSeg_index(TransformSegRef ref, uint index) {
|
||||||
|
return TransformSegRef(ref.offset + index * TransformSeg_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
Path Path_read(Alloc a, PathRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = tile[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = tile[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = tile[ix + 2];
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
Path s;
|
Path s;
|
||||||
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
|
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
|
||||||
s.tiles = TileRef(raw2);
|
s.tiles = TileRef(raw2);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Path_write(PathRef ref, Path s) {
|
void Path_write(Alloc a, PathRef ref, Path s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
tile[ix + 0] = s.bbox.x | (s.bbox.y << 16);
|
write_mem(a, ix + 0, s.bbox.x | (s.bbox.y << 16));
|
||||||
tile[ix + 1] = s.bbox.z | (s.bbox.w << 16);
|
write_mem(a, ix + 1, s.bbox.z | (s.bbox.w << 16));
|
||||||
tile[ix + 2] = s.tiles.offset;
|
write_mem(a, ix + 2, s.tiles.offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tile Tile_read(TileRef ref) {
|
Tile Tile_read(Alloc a, TileRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = tile[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = tile[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
Tile s;
|
Tile s;
|
||||||
s.tile = TileSegRef(raw0);
|
s.tile = TileSegRef(raw0);
|
||||||
s.backdrop = int(raw1);
|
s.backdrop = int(raw1);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Tile_write(TileRef ref, Tile s) {
|
void Tile_write(Alloc a, TileRef ref, Tile s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
tile[ix + 0] = s.tile.offset;
|
write_mem(a, ix + 0, s.tile.offset);
|
||||||
tile[ix + 1] = uint(s.backdrop);
|
write_mem(a, ix + 1, uint(s.backdrop));
|
||||||
}
|
}
|
||||||
|
|
||||||
TileSeg TileSeg_read(TileSegRef ref) {
|
TileSeg TileSeg_read(Alloc a, TileSegRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = tile[ix + 0];
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
uint raw1 = tile[ix + 1];
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
uint raw2 = tile[ix + 2];
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
uint raw3 = tile[ix + 3];
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
uint raw4 = tile[ix + 4];
|
uint raw4 = read_mem(a, ix + 4);
|
||||||
uint raw5 = tile[ix + 5];
|
uint raw5 = read_mem(a, ix + 5);
|
||||||
TileSeg s;
|
TileSeg s;
|
||||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.y_edge = uintBitsToFloat(raw4);
|
s.y_edge = uintBitsToFloat(raw4);
|
||||||
s.next = TileSegRef(raw5);
|
s.next = TileSegRef(raw5);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TileSeg_write(TileSegRef ref, TileSeg s) {
|
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
tile[ix + 0] = floatBitsToUint(s.start.x);
|
write_mem(a, ix + 0, floatBitsToUint(s.origin.x));
|
||||||
tile[ix + 1] = floatBitsToUint(s.start.y);
|
write_mem(a, ix + 1, floatBitsToUint(s.origin.y));
|
||||||
tile[ix + 2] = floatBitsToUint(s.end.x);
|
write_mem(a, ix + 2, floatBitsToUint(s.vector.x));
|
||||||
tile[ix + 3] = floatBitsToUint(s.end.y);
|
write_mem(a, ix + 3, floatBitsToUint(s.vector.y));
|
||||||
tile[ix + 4] = floatBitsToUint(s.y_edge);
|
write_mem(a, ix + 4, floatBitsToUint(s.y_edge));
|
||||||
tile[ix + 5] = s.next.offset;
|
write_mem(a, ix + 5, s.next.offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = read_mem(a, ix + 0);
|
||||||
|
uint raw1 = read_mem(a, ix + 1);
|
||||||
|
uint raw2 = read_mem(a, ix + 2);
|
||||||
|
uint raw3 = read_mem(a, ix + 3);
|
||||||
|
uint raw4 = read_mem(a, ix + 4);
|
||||||
|
uint raw5 = read_mem(a, ix + 5);
|
||||||
|
TransformSeg s;
|
||||||
|
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
write_mem(a, ix + 0, floatBitsToUint(s.mat.x));
|
||||||
|
write_mem(a, ix + 1, floatBitsToUint(s.mat.y));
|
||||||
|
write_mem(a, ix + 2, floatBitsToUint(s.mat.z));
|
||||||
|
write_mem(a, ix + 3, floatBitsToUint(s.mat.w));
|
||||||
|
write_mem(a, ix + 4, floatBitsToUint(s.translate.x));
|
||||||
|
write_mem(a, ix + 5, floatBitsToUint(s.translate.y));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
// Allocation and initialization of tiles for paths.
|
// Allocation and initialization of tiles for paths.
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
||||||
|
@ -10,18 +13,8 @@
|
||||||
|
|
||||||
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
uint[] annotated;
|
Config conf;
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer AllocBuf {
|
|
||||||
uint n_elements;
|
|
||||||
uint n_pathseg;
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer TileBuf {
|
|
||||||
uint[] tile;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
|
@ -32,39 +25,37 @@ layout(set = 0, binding = 2) buffer TileBuf {
|
||||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||||
|
|
||||||
shared uint sh_tile_count[TILE_ALLOC_WG];
|
shared uint sh_tile_count[TILE_ALLOC_WG];
|
||||||
shared uint sh_tile_alloc;
|
shared MallocResult sh_tile_alloc;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathRef path_ref = PathRef(element_ix * Path_size);
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
||||||
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
|
|
||||||
uint tag = Annotated_Nop;
|
uint tag = Annotated_Nop;
|
||||||
if (element_ix < n_elements) {
|
if (element_ix < conf.n_elements) {
|
||||||
tag = Annotated_tag(ref);
|
tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||||
}
|
}
|
||||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_Fill:
|
case Annotated_Color:
|
||||||
case Annotated_FillMask:
|
case Annotated_Image:
|
||||||
case Annotated_FillMaskInv:
|
|
||||||
case Annotated_Stroke:
|
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
// Note: we take advantage of the fact that fills, strokes, and
|
// Note: we take advantage of the fact that fills, strokes, and
|
||||||
// clips have compatible layout.
|
// clips have compatible layout.
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
|
||||||
x0 = int(floor(fill.bbox.x * SX));
|
x0 = int(floor(clip.bbox.x * SX));
|
||||||
y0 = int(floor(fill.bbox.y * SY));
|
y0 = int(floor(clip.bbox.y * SY));
|
||||||
x1 = int(ceil(fill.bbox.z * SX));
|
x1 = int(ceil(clip.bbox.z * SX));
|
||||||
y1 = int(ceil(fill.bbox.w * SY));
|
y1 = int(ceil(clip.bbox.w * SY));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
x0 = clamp(x0, 0, WIDTH_IN_TILES);
|
x0 = clamp(x0, 0, int(conf.width_in_tiles));
|
||||||
y0 = clamp(y0, 0, HEIGHT_IN_TILES);
|
y0 = clamp(y0, 0, int(conf.height_in_tiles));
|
||||||
x1 = clamp(x1, 0, WIDTH_IN_TILES);
|
x1 = clamp(x1, 0, int(conf.width_in_tiles));
|
||||||
y1 = clamp(y1, 0, HEIGHT_IN_TILES);
|
y1 = clamp(y1, 0, int(conf.height_in_tiles));
|
||||||
|
|
||||||
Path path;
|
Path path;
|
||||||
path.bbox = uvec4(x0, y0, x1, y1);
|
path.bbox = uvec4(x0, y0, x1, y1);
|
||||||
|
@ -76,33 +67,38 @@ void main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
sh_tile_count[th_ix] = tile_count;
|
sh_tile_count[th_ix] = tile_count;
|
||||||
|
uint total_tile_count = tile_count;
|
||||||
// Prefix sum of sh_tile_count
|
// Prefix sum of sh_tile_count
|
||||||
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
if (th_ix >= (1 << i)) {
|
if (th_ix >= (1 << i)) {
|
||||||
tile_count += sh_tile_count[th_ix - (1 << i)];
|
total_tile_count += sh_tile_count[th_ix - (1 << i)];
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
sh_tile_count[th_ix] = tile_count;
|
sh_tile_count[th_ix] = total_tile_count;
|
||||||
}
|
}
|
||||||
if (th_ix == TILE_ALLOC_WG - 1) {
|
if (th_ix == TILE_ALLOC_WG - 1) {
|
||||||
sh_tile_alloc = atomicAdd(alloc, tile_count * Tile_size);
|
sh_tile_alloc = malloc(total_tile_count * Tile_size);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
uint alloc_start = sh_tile_alloc;
|
MallocResult alloc_start = sh_tile_alloc;
|
||||||
|
if (alloc_start.failed || mem_error != NO_ERROR) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (element_ix < n_elements) {
|
if (element_ix < conf.n_elements) {
|
||||||
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
||||||
path.tiles = TileRef(alloc_start + Tile_size * tile_subix);
|
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
|
||||||
Path_write(path_ref, path);
|
path.tiles = TileRef(tiles_alloc.offset);
|
||||||
|
Path_write(conf.tile_alloc, path_ref, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zero out allocated tiles efficiently
|
// Zero out allocated tiles efficiently
|
||||||
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
||||||
uint start_ix = alloc_start >> 2;
|
uint start_ix = alloc_start.alloc.offset >> 2;
|
||||||
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
||||||
// Note: this interleaving is faster than using Tile_write
|
// Note: this interleaving is faster than using Tile_write
|
||||||
// by a significant amount.
|
// by a significant amount.
|
||||||
tile[start_ix + i] = 0;
|
write_mem(alloc_start.alloc, start_ix + i, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -1,132 +0,0 @@
|
||||||
// Code auto-generated by piet-gpu-derive
|
|
||||||
|
|
||||||
struct InstanceRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct JumpRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ChunkRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TileGroupRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Instance {
|
|
||||||
uint item_ref;
|
|
||||||
vec2 offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define Instance_size 12
|
|
||||||
|
|
||||||
InstanceRef Instance_index(InstanceRef ref, uint index) {
|
|
||||||
return InstanceRef(ref.offset + index * Instance_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Jump {
|
|
||||||
TileGroupRef new_ref;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define Jump_size 4
|
|
||||||
|
|
||||||
JumpRef Jump_index(JumpRef ref, uint index) {
|
|
||||||
return JumpRef(ref.offset + index * Jump_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Chunk {
|
|
||||||
uint chunk_n;
|
|
||||||
ChunkRef next;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define Chunk_size 8
|
|
||||||
|
|
||||||
ChunkRef Chunk_index(ChunkRef ref, uint index) {
|
|
||||||
return ChunkRef(ref.offset + index * Chunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TileGroup_Instance 0
|
|
||||||
#define TileGroup_Jump 1
|
|
||||||
#define TileGroup_End 2
|
|
||||||
#define TileGroup_size 16
|
|
||||||
|
|
||||||
TileGroupRef TileGroup_index(TileGroupRef ref, uint index) {
|
|
||||||
return TileGroupRef(ref.offset + index * TileGroup_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
Instance Instance_read(InstanceRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = tilegroup[ix + 0];
|
|
||||||
uint raw1 = tilegroup[ix + 1];
|
|
||||||
uint raw2 = tilegroup[ix + 2];
|
|
||||||
Instance s;
|
|
||||||
s.item_ref = raw0;
|
|
||||||
s.offset = vec2(uintBitsToFloat(raw1), uintBitsToFloat(raw2));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Instance_write(InstanceRef ref, Instance s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
tilegroup[ix + 0] = s.item_ref;
|
|
||||||
tilegroup[ix + 1] = floatBitsToUint(s.offset.x);
|
|
||||||
tilegroup[ix + 2] = floatBitsToUint(s.offset.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
Jump Jump_read(JumpRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = tilegroup[ix + 0];
|
|
||||||
Jump s;
|
|
||||||
s.new_ref = TileGroupRef(raw0);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jump_write(JumpRef ref, Jump s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
tilegroup[ix + 0] = s.new_ref.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
Chunk Chunk_read(ChunkRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = tilegroup[ix + 0];
|
|
||||||
uint raw1 = tilegroup[ix + 1];
|
|
||||||
Chunk s;
|
|
||||||
s.chunk_n = raw0;
|
|
||||||
s.next = ChunkRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Chunk_write(ChunkRef ref, Chunk s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
tilegroup[ix + 0] = s.chunk_n;
|
|
||||||
tilegroup[ix + 1] = s.next.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint TileGroup_tag(TileGroupRef ref) {
|
|
||||||
return tilegroup[ref.offset >> 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
Instance TileGroup_Instance_read(TileGroupRef ref) {
|
|
||||||
return Instance_read(InstanceRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
Jump TileGroup_Jump_read(TileGroupRef ref) {
|
|
||||||
return Jump_read(JumpRef(ref.offset + 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
void TileGroup_Instance_write(TileGroupRef ref, Instance s) {
|
|
||||||
tilegroup[ref.offset >> 2] = TileGroup_Instance;
|
|
||||||
Instance_write(InstanceRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TileGroup_Jump_write(TileGroupRef ref, Jump s) {
|
|
||||||
tilegroup[ref.offset >> 2] = TileGroup_Jump;
|
|
||||||
Jump_write(JumpRef(ref.offset + 4), s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TileGroup_End_write(TileGroupRef ref) {
|
|
||||||
tilegroup[ref.offset >> 2] = TileGroup_End;
|
|
||||||
}
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ pub use render_ctx::PietGpuRenderContext;
|
||||||
|
|
||||||
use rand::{Rng, RngCore};
|
use rand::{Rng, RngCore};
|
||||||
|
|
||||||
use piet::kurbo::{BezPath, Circle, Point, Vec2};
|
use piet::kurbo::{BezPath, Circle, Point, Shape, Vec2};
|
||||||
use piet::{Color, ImageFormat, RenderContext};
|
use piet::{Color, ImageFormat, RenderContext};
|
||||||
|
|
||||||
use piet_gpu_types::encoder::Encode;
|
use piet_gpu_types::encoder::Encode;
|
||||||
|
@ -73,7 +73,9 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
||||||
5.0,
|
5.0,
|
||||||
);
|
);
|
||||||
//render_cardioid(rc);
|
//render_cardioid(rc);
|
||||||
render_tiger(rc);
|
render_clip_test(rc);
|
||||||
|
render_alpha_test(rc);
|
||||||
|
//render_tiger(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
|
@ -94,6 +96,67 @@ fn render_cardioid(rc: &mut impl RenderContext) {
|
||||||
rc.stroke(&path, &Color::BLACK, 2.0);
|
rc.stroke(&path, &Color::BLACK, 2.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
fn render_clip_test(rc: &mut impl RenderContext) {
|
||||||
|
const N: usize = 16;
|
||||||
|
const X0: f64 = 50.0;
|
||||||
|
const Y0: f64 = 450.0;
|
||||||
|
// Note: if it gets much larger, it will exceed the 1MB scratch buffer.
|
||||||
|
// But this is a pretty demanding test.
|
||||||
|
const X1: f64 = 550.0;
|
||||||
|
const Y1: f64 = 950.0;
|
||||||
|
let step = 1.0 / ((N + 1) as f64);
|
||||||
|
for i in 0..N {
|
||||||
|
let t = ((i + 1) as f64) * step;
|
||||||
|
rc.save();
|
||||||
|
let mut path = BezPath::new();
|
||||||
|
path.move_to((X0, Y0));
|
||||||
|
path.line_to((X1, Y0));
|
||||||
|
path.line_to((X1, Y0 + t * (Y1 - Y0)));
|
||||||
|
path.line_to((X1 + t * (X0 - X1), Y1));
|
||||||
|
path.line_to((X0, Y1));
|
||||||
|
path.close_path();
|
||||||
|
rc.clip(path);
|
||||||
|
}
|
||||||
|
let rect = piet::kurbo::Rect::new(X0, Y0, X1, Y1);
|
||||||
|
rc.fill(rect, &Color::BLACK);
|
||||||
|
for _ in 0..N {
|
||||||
|
rc.restore();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
fn render_alpha_test(rc: &mut impl RenderContext) {
|
||||||
|
// Alpha compositing tests.
|
||||||
|
rc.fill(
|
||||||
|
diamond(Point::new(1024.0, 100.0)),
|
||||||
|
&Color::Rgba32(0xff0000ff),
|
||||||
|
);
|
||||||
|
rc.fill(
|
||||||
|
diamond(Point::new(1024.0, 125.0)),
|
||||||
|
&Color::Rgba32(0x00ff0080),
|
||||||
|
);
|
||||||
|
rc.save();
|
||||||
|
rc.clip(diamond(Point::new(1024.0, 150.0)));
|
||||||
|
rc.fill(
|
||||||
|
diamond(Point::new(1024.0, 175.0)),
|
||||||
|
&Color::Rgba32(0x0000ff80),
|
||||||
|
);
|
||||||
|
rc.restore();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diamond(origin: Point) -> impl Shape {
|
||||||
|
let mut path = BezPath::new();
|
||||||
|
const SIZE: f64 = 50.0;
|
||||||
|
path.move_to((origin.x, origin.y - SIZE));
|
||||||
|
path.line_to((origin.x + SIZE, origin.y));
|
||||||
|
path.line_to((origin.x, origin.y + SIZE));
|
||||||
|
path.line_to((origin.x - SIZE, origin.y));
|
||||||
|
path.close_path();
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
fn render_tiger(rc: &mut impl RenderContext) {
|
fn render_tiger(rc: &mut impl RenderContext) {
|
||||||
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
|
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
|
@ -126,15 +189,16 @@ pub fn dump_k1_data(k1_buf: &[u32]) {
|
||||||
pub struct Renderer {
|
pub struct Renderer {
|
||||||
pub image_dev: hub::Image, // resulting image
|
pub image_dev: hub::Image, // resulting image
|
||||||
|
|
||||||
scene_buf: hub::Buffer,
|
scene_buf_host: hub::Buffer,
|
||||||
scene_dev: hub::Buffer,
|
scene_buf_dev: hub::Buffer,
|
||||||
|
|
||||||
pub state_buf: hub::Buffer,
|
memory_buf_host: hub::Buffer,
|
||||||
pub anno_buf: hub::Buffer,
|
memory_buf_dev: hub::Buffer,
|
||||||
pub pathseg_buf: hub::Buffer,
|
|
||||||
pub tile_buf: hub::Buffer,
|
state_buf: hub::Buffer,
|
||||||
pub bin_buf: hub::Buffer,
|
|
||||||
pub ptcl_buf: hub::Buffer,
|
config_buf_host: hub::Buffer,
|
||||||
|
config_buf_dev: hub::Buffer,
|
||||||
|
|
||||||
el_pipeline: hub::Pipeline,
|
el_pipeline: hub::Pipeline,
|
||||||
el_ds: hub::DescriptorSet,
|
el_ds: hub::DescriptorSet,
|
||||||
|
@ -148,27 +212,21 @@ pub struct Renderer {
|
||||||
backdrop_pipeline: hub::Pipeline,
|
backdrop_pipeline: hub::Pipeline,
|
||||||
backdrop_ds: hub::DescriptorSet,
|
backdrop_ds: hub::DescriptorSet,
|
||||||
|
|
||||||
tile_alloc_buf_host: hub::Buffer,
|
|
||||||
tile_alloc_buf_dev: hub::Buffer,
|
|
||||||
|
|
||||||
bin_pipeline: hub::Pipeline,
|
bin_pipeline: hub::Pipeline,
|
||||||
bin_ds: hub::DescriptorSet,
|
bin_ds: hub::DescriptorSet,
|
||||||
|
|
||||||
bin_alloc_buf_host: hub::Buffer,
|
|
||||||
bin_alloc_buf_dev: hub::Buffer,
|
|
||||||
|
|
||||||
coarse_pipeline: hub::Pipeline,
|
coarse_pipeline: hub::Pipeline,
|
||||||
coarse_ds: hub::DescriptorSet,
|
coarse_ds: hub::DescriptorSet,
|
||||||
|
|
||||||
coarse_alloc_buf_host: hub::Buffer,
|
|
||||||
coarse_alloc_buf_dev: hub::Buffer,
|
|
||||||
|
|
||||||
k4_pipeline: hub::Pipeline,
|
k4_pipeline: hub::Pipeline,
|
||||||
k4_ds: hub::DescriptorSet,
|
k4_ds: hub::DescriptorSet,
|
||||||
|
|
||||||
n_elements: usize,
|
n_elements: usize,
|
||||||
n_paths: usize,
|
n_paths: usize,
|
||||||
n_pathseg: usize,
|
n_pathseg: usize,
|
||||||
|
|
||||||
|
// Keep a reference to the image so that it is not destroyed.
|
||||||
|
_bg_image: hub::Image,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Renderer {
|
impl Renderer {
|
||||||
|
@ -177,137 +235,142 @@ impl Renderer {
|
||||||
scene: &[u8],
|
scene: &[u8],
|
||||||
n_paths: usize,
|
n_paths: usize,
|
||||||
n_pathseg: usize,
|
n_pathseg: usize,
|
||||||
|
n_trans: usize,
|
||||||
) -> Result<Self, Error> {
|
) -> Result<Self, Error> {
|
||||||
let host = MemFlags::host_coherent();
|
let host = MemFlags::host_coherent();
|
||||||
let dev = MemFlags::device_local();
|
let dev = MemFlags::device_local();
|
||||||
|
|
||||||
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
||||||
println!(
|
println!(
|
||||||
"scene: {} elements, {} paths, {} path_segments",
|
"scene: {} elements, {} paths, {} path_segments, {} transforms",
|
||||||
n_elements, n_paths, n_pathseg
|
n_elements, n_paths, n_pathseg, n_trans
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut scene_buf = session
|
let mut scene_buf_host = session
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let scene_dev = session
|
let scene_buf_dev = session
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
scene_buf.write(&scene)?;
|
scene_buf_host.write(&scene)?;
|
||||||
|
|
||||||
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
|
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
|
||||||
let anno_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
|
|
||||||
let pathseg_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
|
|
||||||
let tile_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
|
|
||||||
let bin_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
|
|
||||||
let ptcl_buf = session.create_buffer(48 * 1024 * 1024, dev)?;
|
|
||||||
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
||||||
|
|
||||||
let el_code = include_bytes!("../shader/elements.spv");
|
const CONFIG_SIZE: u64 = 10 * 4; // Size of Config in setup.h.
|
||||||
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4, 0)?;
|
let mut config_buf_host = session.create_buffer(CONFIG_SIZE, host)?;
|
||||||
let el_ds = session.create_descriptor_set(
|
let config_buf_dev = session.create_buffer(CONFIG_SIZE, dev)?;
|
||||||
&el_pipeline,
|
|
||||||
&[
|
|
||||||
scene_dev.vk_buffer(),
|
|
||||||
state_buf.vk_buffer(),
|
|
||||||
anno_buf.vk_buffer(),
|
|
||||||
pathseg_buf.vk_buffer(),
|
|
||||||
],
|
|
||||||
&[],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut tile_alloc_buf_host = session.create_buffer(12, host)?;
|
|
||||||
let tile_alloc_buf_dev = session.create_buffer(12, dev)?;
|
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
const PATH_SIZE: usize = 12;
|
const PATH_SIZE: usize = 12;
|
||||||
let tile_alloc_start = ((n_paths + 31) & !31) * PATH_SIZE;
|
const BIN_SIZE: usize = 8;
|
||||||
tile_alloc_buf_host.write(&[n_paths as u32, n_pathseg as u32, tile_alloc_start as u32])?;
|
const PATHSEG_SIZE: usize = 52;
|
||||||
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
|
const ANNO_SIZE: usize = 32;
|
||||||
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 3, 0)?;
|
const TRANS_SIZE: usize = 24;
|
||||||
let tile_ds = session.create_descriptor_set(
|
let mut alloc = 0;
|
||||||
&tile_pipeline,
|
let tile_base = alloc;
|
||||||
&[
|
alloc += ((n_paths + 3) & !3) * PATH_SIZE;
|
||||||
anno_buf.vk_buffer(),
|
let bin_base = alloc;
|
||||||
tile_alloc_buf_dev.vk_buffer(),
|
alloc += ((n_paths + 255) & !255) * BIN_SIZE;
|
||||||
tile_buf.vk_buffer(),
|
let ptcl_base = alloc;
|
||||||
],
|
alloc += WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
||||||
&[],
|
let pathseg_base = alloc;
|
||||||
|
alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3;
|
||||||
|
let anno_base = alloc;
|
||||||
|
alloc += (n_paths * ANNO_SIZE + 3) & !3;
|
||||||
|
let trans_base = alloc;
|
||||||
|
alloc += (n_trans * TRANS_SIZE + 3) & !3;
|
||||||
|
config_buf_host.write(&[
|
||||||
|
n_paths as u32,
|
||||||
|
n_pathseg as u32,
|
||||||
|
WIDTH_IN_TILES as u32,
|
||||||
|
HEIGHT_IN_TILES as u32,
|
||||||
|
tile_base as u32,
|
||||||
|
bin_base as u32,
|
||||||
|
ptcl_base as u32,
|
||||||
|
pathseg_base as u32,
|
||||||
|
anno_base as u32,
|
||||||
|
trans_base as u32,
|
||||||
|
])?;
|
||||||
|
|
||||||
|
let mut memory_buf_host = session.create_buffer(2 * 4, host)?;
|
||||||
|
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
|
||||||
|
memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;
|
||||||
|
|
||||||
|
let el_code = include_bytes!("../shader/elements.spv");
|
||||||
|
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
|
||||||
|
let el_ds = session.create_simple_descriptor_set(
|
||||||
|
&el_pipeline,
|
||||||
|
&[&memory_buf_dev, &config_buf_dev, &scene_buf_dev, &state_buf],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
|
||||||
|
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
|
||||||
|
let tile_ds = session
|
||||||
|
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
||||||
|
|
||||||
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
|
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
|
||||||
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 3, 0)?;
|
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
|
||||||
let path_ds = session.create_descriptor_set(
|
let path_ds = session
|
||||||
&path_pipeline,
|
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
||||||
&[
|
|
||||||
pathseg_buf.vk_buffer(),
|
|
||||||
tile_alloc_buf_dev.vk_buffer(),
|
|
||||||
tile_buf.vk_buffer(),
|
|
||||||
],
|
|
||||||
&[],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
|
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
|
||||||
let backdrop_pipeline =
|
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?;
|
||||||
session.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?;
|
let backdrop_ds = session.create_simple_descriptor_set(
|
||||||
let backdrop_ds = session.create_descriptor_set(
|
|
||||||
&backdrop_pipeline,
|
&backdrop_pipeline,
|
||||||
&[
|
&[&memory_buf_dev, &config_buf_dev],
|
||||||
anno_buf.vk_buffer(),
|
|
||||||
tile_alloc_buf_dev.vk_buffer(),
|
|
||||||
tile_buf.vk_buffer(),
|
|
||||||
],
|
|
||||||
&[],
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut bin_alloc_buf_host = session.create_buffer(8, host)?;
|
|
||||||
let bin_alloc_buf_dev = session.create_buffer(8, dev)?;
|
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
|
|
||||||
bin_alloc_buf_host.write(&[n_paths as u32, bin_alloc_start as u32])?;
|
|
||||||
let bin_code = include_bytes!("../shader/binning.spv");
|
let bin_code = include_bytes!("../shader/binning.spv");
|
||||||
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 3, 0)?;
|
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
|
||||||
let bin_ds = session.create_descriptor_set(
|
let bin_ds = session
|
||||||
&bin_pipeline,
|
.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
||||||
&[
|
|
||||||
anno_buf.vk_buffer(),
|
|
||||||
bin_alloc_buf_dev.vk_buffer(),
|
|
||||||
bin_buf.vk_buffer(),
|
|
||||||
],
|
|
||||||
&[],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
|
|
||||||
let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;
|
|
||||||
|
|
||||||
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
|
||||||
coarse_alloc_buf_host.write(&[n_paths as u32, coarse_alloc_start as u32])?;
|
|
||||||
let coarse_code = include_bytes!("../shader/coarse.spv");
|
let coarse_code = include_bytes!("../shader/coarse.spv");
|
||||||
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 5, 0)?;
|
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
|
||||||
let coarse_ds = session.create_descriptor_set(
|
let coarse_ds = session
|
||||||
&coarse_pipeline,
|
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
||||||
&[
|
|
||||||
anno_buf.vk_buffer(),
|
|
||||||
bin_buf.vk_buffer(),
|
|
||||||
tile_buf.vk_buffer(),
|
|
||||||
coarse_alloc_buf_dev.vk_buffer(),
|
|
||||||
ptcl_buf.vk_buffer(),
|
|
||||||
],
|
|
||||||
&[],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
let bg_image = Self::make_test_bg_image(&session);
|
||||||
let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 2, 1)?;
|
|
||||||
let k4_ds = session.create_descriptor_set(
|
let k4_code = if session.gpu_info().has_descriptor_indexing {
|
||||||
&k4_pipeline,
|
&include_bytes!("../shader/kernel4_idx.spv")[..]
|
||||||
&[ptcl_buf.vk_buffer(), tile_buf.vk_buffer()],
|
} else {
|
||||||
&[image_dev.vk_image()],
|
println!("doing non-indexed k4");
|
||||||
)?;
|
&include_bytes!("../shader/kernel4.spv")[..]
|
||||||
|
};
|
||||||
|
// This is an arbitrary limit on the number of textures that can be referenced by
|
||||||
|
// the fine rasterizer. To set it for real, we probably want to pay attention both
|
||||||
|
// to the device limit (maxDescriptorSetSampledImages) but also to the number of
|
||||||
|
// images encoded (I believe there's an cost when allocating descriptor pools). If
|
||||||
|
// it can't be satisfied, then for compatibility we'll probably want to fall back
|
||||||
|
// to an atlasing approach.
|
||||||
|
//
|
||||||
|
// However, we're adding only one texture for now. Avoid a harmless Vulkan validation
|
||||||
|
// error by using a tight bound.
|
||||||
|
let max_textures = 1;
|
||||||
|
let k4_pipeline = session
|
||||||
|
.pipeline_builder()
|
||||||
|
.add_buffers(2)
|
||||||
|
.add_images(1)
|
||||||
|
.add_textures(max_textures)
|
||||||
|
.create_compute_pipeline(&session, k4_code)?;
|
||||||
|
let k4_ds = session
|
||||||
|
.descriptor_set_builder()
|
||||||
|
.add_buffers(&[&memory_buf_dev, &config_buf_dev])
|
||||||
|
.add_images(&[&image_dev])
|
||||||
|
.add_textures(&[&bg_image])
|
||||||
|
.build(&session, &k4_pipeline)?;
|
||||||
|
|
||||||
Ok(Renderer {
|
Ok(Renderer {
|
||||||
scene_buf,
|
scene_buf_host,
|
||||||
scene_dev,
|
scene_buf_dev,
|
||||||
|
memory_buf_host,
|
||||||
|
memory_buf_dev,
|
||||||
|
state_buf,
|
||||||
|
config_buf_host,
|
||||||
|
config_buf_dev,
|
||||||
image_dev,
|
image_dev,
|
||||||
el_pipeline,
|
el_pipeline,
|
||||||
el_ds,
|
el_ds,
|
||||||
|
@ -323,39 +386,27 @@ impl Renderer {
|
||||||
coarse_ds,
|
coarse_ds,
|
||||||
k4_pipeline,
|
k4_pipeline,
|
||||||
k4_ds,
|
k4_ds,
|
||||||
state_buf,
|
|
||||||
anno_buf,
|
|
||||||
pathseg_buf,
|
|
||||||
tile_buf,
|
|
||||||
bin_buf,
|
|
||||||
ptcl_buf,
|
|
||||||
tile_alloc_buf_host,
|
|
||||||
tile_alloc_buf_dev,
|
|
||||||
bin_alloc_buf_host,
|
|
||||||
bin_alloc_buf_dev,
|
|
||||||
coarse_alloc_buf_host,
|
|
||||||
coarse_alloc_buf_dev,
|
|
||||||
n_elements,
|
n_elements,
|
||||||
n_paths,
|
n_paths,
|
||||||
n_pathseg,
|
n_pathseg,
|
||||||
|
_bg_image: bg_image,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
|
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
|
||||||
cmd_buf.copy_buffer(self.scene_buf.vk_buffer(), self.scene_dev.vk_buffer());
|
|
||||||
cmd_buf.copy_buffer(
|
cmd_buf.copy_buffer(
|
||||||
self.tile_alloc_buf_host.vk_buffer(),
|
self.scene_buf_host.vk_buffer(),
|
||||||
self.tile_alloc_buf_dev.vk_buffer(),
|
self.scene_buf_dev.vk_buffer(),
|
||||||
);
|
);
|
||||||
cmd_buf.copy_buffer(
|
cmd_buf.copy_buffer(
|
||||||
self.bin_alloc_buf_host.vk_buffer(),
|
self.config_buf_host.vk_buffer(),
|
||||||
self.bin_alloc_buf_dev.vk_buffer(),
|
self.config_buf_dev.vk_buffer(),
|
||||||
);
|
);
|
||||||
cmd_buf.copy_buffer(
|
cmd_buf.copy_buffer(
|
||||||
self.coarse_alloc_buf_host.vk_buffer(),
|
self.memory_buf_host.vk_buffer(),
|
||||||
self.coarse_alloc_buf_dev.vk_buffer(),
|
self.memory_buf_dev.vk_buffer(),
|
||||||
);
|
);
|
||||||
cmd_buf.clear_buffer(self.state_buf.vk_buffer());
|
cmd_buf.clear_buffer(self.state_buf.vk_buffer(), None);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.image_barrier(
|
cmd_buf.image_barrier(
|
||||||
self.image_dev.vk_image(),
|
self.image_dev.vk_image(),
|
||||||
|
@ -405,7 +456,7 @@ impl Renderer {
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
&self.coarse_pipeline,
|
&self.coarse_pipeline,
|
||||||
&self.coarse_ds,
|
&self.coarse_ds,
|
||||||
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
|
((WIDTH as u32 + 255) / 256, (HEIGHT as u32 + 255) / 256, 1),
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 6);
|
cmd_buf.write_timestamp(&query_pool, 6);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
@ -448,7 +499,6 @@ impl Renderer {
|
||||||
ImageLayout::BlitDst,
|
ImageLayout::BlitDst,
|
||||||
);
|
);
|
||||||
cmd_buf.copy_buffer_to_image(buffer.vk_buffer(), image.vk_image());
|
cmd_buf.copy_buffer_to_image(buffer.vk_buffer(), image.vk_image());
|
||||||
// TODO: instead of General, we might want ShaderReadOnly
|
|
||||||
cmd_buf.image_barrier(image.vk_image(), ImageLayout::BlitDst, ImageLayout::General);
|
cmd_buf.image_barrier(image.vk_image(), ImageLayout::BlitDst, ImageLayout::General);
|
||||||
cmd_buf.finish();
|
cmd_buf.finish();
|
||||||
// Make sure not to drop the buffer and image until the command buffer completes.
|
// Make sure not to drop the buffer and image until the command buffer completes.
|
||||||
|
@ -459,4 +509,22 @@ impl Renderer {
|
||||||
Ok(image)
|
Ok(image)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Make a test image.
|
||||||
|
fn make_test_bg_image(session: &hub::Session) -> hub::Image {
|
||||||
|
const WIDTH: usize = 256;
|
||||||
|
const HEIGHT: usize = 256;
|
||||||
|
let mut buf = vec![255u8; WIDTH * HEIGHT * 4];
|
||||||
|
for y in 0..HEIGHT {
|
||||||
|
for x in 0..WIDTH {
|
||||||
|
let r = x as u8;
|
||||||
|
let g = y as u8;
|
||||||
|
let b = r ^ g;
|
||||||
|
buf[(y * WIDTH + x) * 4] = r;
|
||||||
|
buf[(y * WIDTH + x) * 4 + 1] = g;
|
||||||
|
buf[(y * WIDTH + x) * 4 + 2] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Self::make_image(session, WIDTH, HEIGHT, &buf, ImageFormat::RgbaPremul).unwrap()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,19 @@
|
||||||
use std::{borrow::Cow, ops::RangeBounds};
|
use std::{borrow::Cow, ops::RangeBounds};
|
||||||
|
|
||||||
use piet_gpu_types::encoder::{Encode, Encoder};
|
|
||||||
|
|
||||||
use piet_gpu_types::scene::{
|
|
||||||
Clip, CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform,
|
|
||||||
};
|
|
||||||
|
|
||||||
use piet::{
|
use piet::{
|
||||||
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size},
|
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size},
|
||||||
HitTestPosition, TextAttribute, TextStorage,
|
HitTestPosition, TextAttribute, TextStorage,
|
||||||
};
|
};
|
||||||
|
|
||||||
use piet::{
|
use piet::{
|
||||||
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
|
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
|
||||||
IntoBrush, LineMetric, RenderContext, StrokeStyle, Text, TextLayout, TextLayoutBuilder,
|
IntoBrush, LineMetric, RenderContext, StrokeStyle, Text, TextLayout, TextLayoutBuilder,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use piet_gpu_types::encoder::{Encode, Encoder};
|
||||||
|
use piet_gpu_types::scene::{
|
||||||
|
Clip, CubicSeg, Element, FillColor, LineSeg, QuadSeg, SetFillMode, SetLineWidth, Transform,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct PietGpuImage;
|
pub struct PietGpuImage;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -32,12 +30,15 @@ pub struct PietGpuRenderContext {
|
||||||
// Will probably need direct accesss to hal Device to create images etc.
|
// Will probably need direct accesss to hal Device to create images etc.
|
||||||
inner_text: PietGpuText,
|
inner_text: PietGpuText,
|
||||||
stroke_width: f32,
|
stroke_width: f32,
|
||||||
|
fill_mode: FillMode,
|
||||||
// We're tallying these cpu-side for expedience, but will probably
|
// We're tallying these cpu-side for expedience, but will probably
|
||||||
// move this to some kind of readback from element processing.
|
// move this to some kind of readback from element processing.
|
||||||
/// The count of elements that make it through to coarse rasterization.
|
/// The count of elements that make it through to coarse rasterization.
|
||||||
path_count: usize,
|
path_count: usize,
|
||||||
/// The count of path segment elements.
|
/// The count of path segment elements.
|
||||||
pathseg_count: usize,
|
pathseg_count: usize,
|
||||||
|
/// The count of transform elements.
|
||||||
|
trans_count: usize,
|
||||||
|
|
||||||
cur_transform: Affine,
|
cur_transform: Affine,
|
||||||
state_stack: Vec<State>,
|
state_stack: Vec<State>,
|
||||||
|
@ -67,6 +68,14 @@ struct ClipElement {
|
||||||
bbox: Option<Rect>,
|
bbox: Option<Rect>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
|
enum FillMode {
|
||||||
|
// Fill path according to the non-zero winding rule.
|
||||||
|
Nonzero = 0,
|
||||||
|
// Fill stroked path.
|
||||||
|
Stroke = 1,
|
||||||
|
}
|
||||||
|
|
||||||
const TOLERANCE: f64 = 0.25;
|
const TOLERANCE: f64 = 0.25;
|
||||||
|
|
||||||
impl PietGpuRenderContext {
|
impl PietGpuRenderContext {
|
||||||
|
@ -80,8 +89,10 @@ impl PietGpuRenderContext {
|
||||||
elements,
|
elements,
|
||||||
inner_text,
|
inner_text,
|
||||||
stroke_width,
|
stroke_width,
|
||||||
|
fill_mode: FillMode::Nonzero,
|
||||||
path_count: 0,
|
path_count: 0,
|
||||||
pathseg_count: 0,
|
pathseg_count: 0,
|
||||||
|
trans_count: 0,
|
||||||
cur_transform: Affine::default(),
|
cur_transform: Affine::default(),
|
||||||
state_stack: Vec::new(),
|
state_stack: Vec::new(),
|
||||||
clip_stack: Vec::new(),
|
clip_stack: Vec::new(),
|
||||||
|
@ -100,6 +111,19 @@ impl PietGpuRenderContext {
|
||||||
pub fn pathseg_count(&self) -> usize {
|
pub fn pathseg_count(&self) -> usize {
|
||||||
self.pathseg_count
|
self.pathseg_count
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn trans_count(&self) -> usize {
|
||||||
|
self.trans_count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_fill_mode(ctx: &mut PietGpuRenderContext, fill_mode: FillMode) {
|
||||||
|
if ctx.fill_mode != fill_mode {
|
||||||
|
ctx.elements.push(Element::SetFillMode(SetFillMode {
|
||||||
|
fill_mode: fill_mode as u32,
|
||||||
|
}));
|
||||||
|
ctx.fill_mode = fill_mode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RenderContext for PietGpuRenderContext {
|
impl RenderContext for PietGpuRenderContext {
|
||||||
|
@ -113,7 +137,19 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn solid_brush(&mut self, color: Color) -> Self::Brush {
|
fn solid_brush(&mut self, color: Color) -> Self::Brush {
|
||||||
PietGpuBrush::Solid(color.as_rgba_u32())
|
// kernel4 expects colors encoded in alpha-premultiplied sRGB:
|
||||||
|
//
|
||||||
|
// [α,sRGB(α⋅R),sRGB(α⋅G),sRGB(α⋅B)]
|
||||||
|
//
|
||||||
|
// See also http://ssp.impulsetrain.com/gamma-premult.html.
|
||||||
|
let (r, g, b, a) = color.as_rgba();
|
||||||
|
let premul = Color::rgba(
|
||||||
|
to_srgb(from_srgb(r) * a),
|
||||||
|
to_srgb(from_srgb(g) * a),
|
||||||
|
to_srgb(from_srgb(b) * a),
|
||||||
|
a,
|
||||||
|
);
|
||||||
|
PietGpuBrush::Solid(premul.as_rgba_u32())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gradient(&mut self, _gradient: impl Into<FixedGradient>) -> Result<Self::Brush, Error> {
|
fn gradient(&mut self, _gradient: impl Into<FixedGradient>) -> Result<Self::Brush, Error> {
|
||||||
|
@ -129,6 +165,7 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
.push(Element::SetLineWidth(SetLineWidth { width: width_f32 }));
|
.push(Element::SetLineWidth(SetLineWidth { width: width_f32 }));
|
||||||
self.stroke_width = width_f32;
|
self.stroke_width = width_f32;
|
||||||
}
|
}
|
||||||
|
set_fill_mode(self, FillMode::Stroke);
|
||||||
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
|
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
|
||||||
match brush {
|
match brush {
|
||||||
PietGpuBrush::Solid(rgba_color) => {
|
PietGpuBrush::Solid(rgba_color) => {
|
||||||
|
@ -136,8 +173,8 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5));
|
self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5));
|
||||||
let path = shape.path_elements(TOLERANCE);
|
let path = shape.path_elements(TOLERANCE);
|
||||||
self.encode_path(path, false);
|
self.encode_path(path, false);
|
||||||
let stroke = Stroke { rgba_color };
|
let stroke = FillColor { rgba_color };
|
||||||
self.elements.push(Element::Stroke(stroke));
|
self.elements.push(Element::FillColor(stroke));
|
||||||
self.path_count += 1;
|
self.path_count += 1;
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
|
@ -160,9 +197,10 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
// Perhaps that should be added to kurbo.
|
// Perhaps that should be added to kurbo.
|
||||||
self.accumulate_bbox(|| shape.bounding_box());
|
self.accumulate_bbox(|| shape.bounding_box());
|
||||||
let path = shape.path_elements(TOLERANCE);
|
let path = shape.path_elements(TOLERANCE);
|
||||||
|
set_fill_mode(self, FillMode::Nonzero);
|
||||||
self.encode_path(path, true);
|
self.encode_path(path, true);
|
||||||
let fill = Fill { rgba_color };
|
let fill = FillColor { rgba_color };
|
||||||
self.elements.push(Element::Fill(fill));
|
self.elements.push(Element::FillColor(fill));
|
||||||
self.path_count += 1;
|
self.path_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,6 +208,7 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
fn fill_even_odd(&mut self, _shape: impl Shape, _brush: &impl IntoBrush<Self>) {}
|
fn fill_even_odd(&mut self, _shape: impl Shape, _brush: &impl IntoBrush<Self>) {}
|
||||||
|
|
||||||
fn clip(&mut self, shape: impl Shape) {
|
fn clip(&mut self, shape: impl Shape) {
|
||||||
|
set_fill_mode(self, FillMode::Nonzero);
|
||||||
let path = shape.path_elements(TOLERANCE);
|
let path = shape.path_elements(TOLERANCE);
|
||||||
self.encode_path(path, true);
|
self.encode_path(path, true);
|
||||||
let begin_ix = self.elements.len();
|
let begin_ix = self.elements.len();
|
||||||
|
@ -207,6 +246,7 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
let a_inv = state.rel_transform.inverse();
|
let a_inv = state.rel_transform.inverse();
|
||||||
self.elements
|
self.elements
|
||||||
.push(Element::Transform(to_scene_transform(a_inv)));
|
.push(Element::Transform(to_scene_transform(a_inv)));
|
||||||
|
self.trans_count += 1;
|
||||||
}
|
}
|
||||||
self.cur_transform = state.transform;
|
self.cur_transform = state.transform;
|
||||||
for _ in 0..state.n_clip {
|
for _ in 0..state.n_clip {
|
||||||
|
@ -228,6 +268,7 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
fn transform(&mut self, transform: Affine) {
|
fn transform(&mut self, transform: Affine) {
|
||||||
self.elements
|
self.elements
|
||||||
.push(Element::Transform(to_scene_transform(transform)));
|
.push(Element::Transform(to_scene_transform(transform)));
|
||||||
|
self.trans_count += 1;
|
||||||
if let Some(tos) = self.state_stack.last_mut() {
|
if let Some(tos) = self.state_stack.last_mut() {
|
||||||
tos.rel_transform *= transform;
|
tos.rel_transform *= transform;
|
||||||
}
|
}
|
||||||
|
@ -275,34 +316,40 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PietGpuRenderContext {
|
impl PietGpuRenderContext {
|
||||||
fn encode_line_seg(&mut self, seg: LineSeg, is_fill: bool) {
|
fn encode_line_seg(&mut self, seg: LineSeg) {
|
||||||
if is_fill {
|
self.elements.push(Element::Line(seg));
|
||||||
self.elements.push(Element::FillLine(seg));
|
|
||||||
} else {
|
|
||||||
self.elements.push(Element::StrokeLine(seg));
|
|
||||||
}
|
|
||||||
self.pathseg_count += 1;
|
self.pathseg_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_quad_seg(&mut self, seg: QuadSeg, is_fill: bool) {
|
fn encode_quad_seg(&mut self, seg: QuadSeg) {
|
||||||
if is_fill {
|
self.elements.push(Element::Quad(seg));
|
||||||
self.elements.push(Element::FillQuad(seg));
|
|
||||||
} else {
|
|
||||||
self.elements.push(Element::StrokeQuad(seg));
|
|
||||||
}
|
|
||||||
self.pathseg_count += 1;
|
self.pathseg_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_cubic_seg(&mut self, seg: CubicSeg, is_fill: bool) {
|
fn encode_cubic_seg(&mut self, seg: CubicSeg) {
|
||||||
if is_fill {
|
self.elements.push(Element::Cubic(seg));
|
||||||
self.elements.push(Element::FillCubic(seg));
|
|
||||||
} else {
|
|
||||||
self.elements.push(Element::StrokeCubic(seg));
|
|
||||||
}
|
|
||||||
self.pathseg_count += 1;
|
self.pathseg_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
|
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
|
||||||
|
if is_fill {
|
||||||
|
self.encode_path_inner(
|
||||||
|
path.flat_map(|el| {
|
||||||
|
match el {
|
||||||
|
PathEl::MoveTo(..) => Some(PathEl::ClosePath),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
.into_iter()
|
||||||
|
.chain(Some(el))
|
||||||
|
})
|
||||||
|
.chain(Some(PathEl::ClosePath)),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
self.encode_path_inner(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_path_inner(&mut self, path: impl Iterator<Item = PathEl>) {
|
||||||
let flatten = false;
|
let flatten = false;
|
||||||
if flatten {
|
if flatten {
|
||||||
let mut start_pt = None;
|
let mut start_pt = None;
|
||||||
|
@ -320,7 +367,7 @@ impl PietGpuRenderContext {
|
||||||
p0: last_pt.unwrap(),
|
p0: last_pt.unwrap(),
|
||||||
p1: scene_pt,
|
p1: scene_pt,
|
||||||
};
|
};
|
||||||
self.encode_line_seg(seg, is_fill);
|
self.encode_line_seg(seg);
|
||||||
last_pt = Some(scene_pt);
|
last_pt = Some(scene_pt);
|
||||||
}
|
}
|
||||||
PathEl::ClosePath => {
|
PathEl::ClosePath => {
|
||||||
|
@ -330,7 +377,7 @@ impl PietGpuRenderContext {
|
||||||
p0: last,
|
p0: last,
|
||||||
p1: start,
|
p1: start,
|
||||||
};
|
};
|
||||||
self.encode_line_seg(seg, is_fill);
|
self.encode_line_seg(seg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -354,7 +401,7 @@ impl PietGpuRenderContext {
|
||||||
p0: last_pt.unwrap(),
|
p0: last_pt.unwrap(),
|
||||||
p1: scene_pt,
|
p1: scene_pt,
|
||||||
};
|
};
|
||||||
self.encode_line_seg(seg, is_fill);
|
self.encode_line_seg(seg);
|
||||||
last_pt = Some(scene_pt);
|
last_pt = Some(scene_pt);
|
||||||
}
|
}
|
||||||
PathEl::QuadTo(p1, p2) => {
|
PathEl::QuadTo(p1, p2) => {
|
||||||
|
@ -365,7 +412,7 @@ impl PietGpuRenderContext {
|
||||||
p1: scene_p1,
|
p1: scene_p1,
|
||||||
p2: scene_p2,
|
p2: scene_p2,
|
||||||
};
|
};
|
||||||
self.encode_quad_seg(seg, is_fill);
|
self.encode_quad_seg(seg);
|
||||||
last_pt = Some(scene_p2);
|
last_pt = Some(scene_p2);
|
||||||
}
|
}
|
||||||
PathEl::CurveTo(p1, p2, p3) => {
|
PathEl::CurveTo(p1, p2, p3) => {
|
||||||
|
@ -378,7 +425,7 @@ impl PietGpuRenderContext {
|
||||||
p2: scene_p2,
|
p2: scene_p2,
|
||||||
p3: scene_p3,
|
p3: scene_p3,
|
||||||
};
|
};
|
||||||
self.encode_cubic_seg(seg, is_fill);
|
self.encode_cubic_seg(seg);
|
||||||
last_pt = Some(scene_p3);
|
last_pt = Some(scene_p3);
|
||||||
}
|
}
|
||||||
PathEl::ClosePath => {
|
PathEl::ClosePath => {
|
||||||
|
@ -388,7 +435,7 @@ impl PietGpuRenderContext {
|
||||||
p0: last,
|
p0: last,
|
||||||
p1: start,
|
p1: start,
|
||||||
};
|
};
|
||||||
self.encode_line_seg(seg, is_fill);
|
self.encode_line_seg(seg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -550,3 +597,21 @@ fn to_scene_transform(transform: Affine) -> Transform {
|
||||||
translate: [c[4] as f32, c[5] as f32],
|
translate: [c[4] as f32, c[5] as f32],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn to_srgb(f: f64) -> f64 {
|
||||||
|
if f <= 0.0031308 {
|
||||||
|
f * 12.92
|
||||||
|
} else {
|
||||||
|
let a = 0.055;
|
||||||
|
(1. + a) * f64::powf(f, f64::recip(2.4)) - a
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_srgb(f: f64) -> f64 {
|
||||||
|
if f <= 0.04045 {
|
||||||
|
f / 12.92
|
||||||
|
} else {
|
||||||
|
let a = 0.055;
|
||||||
|
f64::powf((f + a) * f64::recip(1. + a), 2.4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue