2020-12-12 01:01:48 +11:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
|
2020-04-22 12:30:14 +10:00
|
|
|
// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
|
|
|
|
// in the per-tile command list to an image.
|
|
|
|
|
|
|
|
// Right now, this kernel stores the image in a buffer, but a better
|
|
|
|
// plan is to use a texture. This is because of limited support.
|
|
|
|
|
|
|
|
#version 450
|
|
|
|
#extension GL_GOOGLE_include_directive : enable
|
2021-04-03 12:59:07 +11:00
|
|
|
#ifdef ENABLE_IMAGE_INDICES
|
2020-11-19 10:54:11 +11:00
|
|
|
#extension GL_EXT_nonuniform_qualifier : enable
|
2021-04-03 12:59:07 +11:00
|
|
|
#endif
|
2020-04-22 12:30:14 +10:00
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
#include "mem.h"
|
2020-12-24 22:00:53 +11:00
|
|
|
#include "setup.h"
|
2020-06-15 07:32:59 +10:00
|
|
|
|
2021-03-23 22:54:49 +11:00
|
|
|
#define CHUNK_X 2
|
|
|
|
#define CHUNK_Y 4
|
|
|
|
#define CHUNK CHUNK_X * CHUNK_Y
|
|
|
|
#define CHUNK_DX (TILE_WIDTH_PX / CHUNK_X)
|
|
|
|
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
|
|
|
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
2020-04-22 12:30:14 +10:00
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|
|
|
Config conf;
|
2020-04-22 12:30:14 +10:00
|
|
|
};
|
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
|
2020-04-22 12:30:14 +10:00
|
|
|
|
2021-04-03 12:59:07 +11:00
|
|
|
#ifdef ENABLE_IMAGE_INDICES
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[];
|
|
|
|
#else
|
|
|
|
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
|
|
|
|
#endif
|
2020-11-19 10:54:11 +11:00
|
|
|
|
2020-04-22 12:30:14 +10:00
|
|
|
#include "ptcl.h"
|
2020-06-04 02:28:43 +10:00
|
|
|
#include "tile.h"
|
2020-04-22 12:30:14 +10:00
|
|
|
|
2020-11-20 06:53:59 +11:00
|
|
|
#define BLEND_STACK_SIZE 4
|
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
// Layout of a clip scratch frame:
|
2020-11-22 06:39:23 +11:00
|
|
|
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
|
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
// Link offset and frame size in 32-bit words.
|
2020-11-22 06:39:23 +11:00
|
|
|
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
|
|
|
|
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
|
|
|
|
|
2020-12-24 22:00:53 +11:00
|
|
|
shared MallocResult sh_clip_alloc;
|
2020-11-22 06:39:23 +11:00
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
// Allocate a scratch buffer for clipping.
|
2020-12-24 22:00:53 +11:00
|
|
|
MallocResult alloc_clip_buf(uint link) {
|
2020-11-22 06:39:23 +11:00
|
|
|
if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
|
2020-12-24 22:00:53 +11:00
|
|
|
MallocResult m = malloc(CLIP_BUF_SIZE * 4);
|
|
|
|
if (!m.failed) {
|
|
|
|
write_mem(m.alloc, (m.alloc.offset >> 2) + CLIP_LINK_OFFSET, link);
|
2020-12-12 04:30:20 +11:00
|
|
|
}
|
2020-12-24 22:00:53 +11:00
|
|
|
sh_clip_alloc = m;
|
2020-11-22 06:39:23 +11:00
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
return sh_clip_alloc;
|
|
|
|
}
|
|
|
|
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
vec3 tosRGB(vec3 rgb) {
|
|
|
|
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
|
|
|
|
vec3 below = vec3(12.92)*rgb;
|
|
|
|
vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
|
|
|
|
return mix(below, above, cutoff);
|
|
|
|
}
|
|
|
|
|
|
|
|
vec3 fromsRGB(vec3 srgb) {
|
|
|
|
// Formula from EXT_sRGB.
|
|
|
|
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
|
|
|
|
vec3 below = srgb/vec3(12.92);
|
|
|
|
vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
|
|
|
|
return mix(below, above, cutoff);
|
|
|
|
}
|
|
|
|
|
|
|
|
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
|
|
|
|
// space.
|
|
|
|
vec4 unpacksRGB(uint srgba) {
|
|
|
|
vec4 color = unpackUnorm4x8(srgba).wzyx;
|
|
|
|
return vec4(fromsRGB(color.rgb), color.a);
|
|
|
|
}
|
|
|
|
|
|
|
|
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
|
|
|
|
uint packsRGB(vec4 rgba) {
|
|
|
|
rgba = vec4(tosRGB(rgba.rgb), rgba.a);
|
|
|
|
return packUnorm4x8(rgba.wzyx);
|
|
|
|
}
|
|
|
|
|
2021-03-23 22:54:49 +11:00
|
|
|
uvec2 chunk_offset(uint i) {
|
|
|
|
return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
|
|
|
|
}
|
|
|
|
|
2021-03-18 22:47:14 +11:00
|
|
|
vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
vec4 rgba[CHUNK];
|
|
|
|
for (uint i = 0; i < CHUNK; i++) {
|
2021-03-23 22:54:49 +11:00
|
|
|
ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
#ifdef ENABLE_IMAGE_INDICES
|
|
|
|
vec4 fg_rgba = imageLoad(images[cmd_img.index], uv);
|
|
|
|
#else
|
|
|
|
vec4 fg_rgba = imageLoad(images[0], uv);
|
|
|
|
#endif
|
|
|
|
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
|
|
|
|
rgba[i] = fg_rgba;
|
|
|
|
}
|
|
|
|
return rgba;
|
|
|
|
}
|
|
|
|
|
2020-04-22 12:30:14 +10:00
|
|
|
void main() {
|
2020-12-24 22:00:53 +11:00
|
|
|
if (mem_error != NO_ERROR) {
|
2020-12-12 04:30:20 +11:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-12-18 10:55:21 +11:00
|
|
|
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
2020-12-24 22:00:53 +11:00
|
|
|
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
|
|
|
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
2020-04-22 12:30:14 +10:00
|
|
|
|
2021-03-23 22:54:49 +11:00
|
|
|
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
2020-04-22 12:30:14 +10:00
|
|
|
vec2 xy = vec2(xy_uint);
|
2020-05-26 08:45:06 +10:00
|
|
|
vec3 rgb[CHUNK];
|
2020-11-20 06:53:59 +11:00
|
|
|
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
|
2020-11-22 06:39:23 +11:00
|
|
|
uint blend_spill = 0;
|
2020-11-20 06:53:59 +11:00
|
|
|
uint blend_sp = 0;
|
2020-12-24 22:00:53 +11:00
|
|
|
Alloc clip_tos = new_alloc(0, 0);
|
2020-05-26 08:45:06 +10:00
|
|
|
for (uint i = 0; i < CHUNK; i++) {
|
|
|
|
rgb[i] = vec3(0.5);
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
#ifdef ENABLE_IMAGE_INDICES
|
2020-11-26 07:43:42 +11:00
|
|
|
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
2021-03-23 22:54:49 +11:00
|
|
|
rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4).rgb;
|
2020-11-19 10:54:11 +11:00
|
|
|
}
|
2021-04-03 12:59:07 +11:00
|
|
|
#else
|
|
|
|
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
|
|
|
rgb[i] = imageLoad(images[0], ivec2(xy_uint + chunk_offset(i))/4).rgb;
|
|
|
|
}
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
#endif
|
2020-05-26 08:45:06 +10:00
|
|
|
}
|
2020-04-22 12:30:14 +10:00
|
|
|
|
2021-03-18 22:47:14 +11:00
|
|
|
float area[CHUNK];
|
2020-04-22 12:30:14 +10:00
|
|
|
while (true) {
|
2021-03-17 20:51:38 +11:00
|
|
|
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
2020-04-22 12:30:14 +10:00
|
|
|
if (tag == Cmd_End) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
switch (tag) {
|
2020-04-29 04:02:19 +10:00
|
|
|
case Cmd_Stroke:
|
2020-06-28 23:37:27 +10:00
|
|
|
// Calculate distance field from all the line segments in this tile.
|
2020-12-24 22:00:53 +11:00
|
|
|
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
|
2020-05-26 08:45:06 +10:00
|
|
|
float df[CHUNK];
|
|
|
|
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
2020-06-04 02:28:43 +10:00
|
|
|
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
2020-04-29 15:25:57 +10:00
|
|
|
do {
|
2020-12-24 22:00:53 +11:00
|
|
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
|
2020-12-02 04:06:09 +11:00
|
|
|
vec2 line_vec = seg.vector;
|
2020-06-04 02:28:43 +10:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2020-12-02 04:06:09 +11:00
|
|
|
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
2021-03-23 22:54:49 +11:00
|
|
|
dpos += vec2(chunk_offset(k));
|
2020-06-04 02:28:43 +10:00
|
|
|
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
|
|
|
df[k] = min(df[k], length(line_vec * t - dpos));
|
2020-05-06 02:13:07 +10:00
|
|
|
}
|
2020-06-04 02:28:43 +10:00
|
|
|
tile_seg_ref = seg.next;
|
|
|
|
} while (tile_seg_ref.offset != 0);
|
2020-05-26 08:45:06 +10:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2021-03-18 22:47:14 +11:00
|
|
|
area[k] = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
2020-05-26 08:45:06 +10:00
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4 + CmdStroke_size;
|
2020-04-29 04:02:19 +10:00
|
|
|
break;
|
2020-05-01 10:06:01 +10:00
|
|
|
case Cmd_Fill:
|
2020-12-24 22:00:53 +11:00
|
|
|
CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref);
|
2021-03-18 22:47:14 +11:00
|
|
|
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
|
|
|
|
tile_seg_ref = TileSegRef(fill.tile_ref);
|
|
|
|
// Calculate coverage based on backdrop + coverage of each line segment
|
|
|
|
do {
|
|
|
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
|
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2021-03-23 22:54:49 +11:00
|
|
|
vec2 my_xy = xy + vec2(chunk_offset(k));
|
2021-03-18 22:47:14 +11:00
|
|
|
vec2 start = seg.origin - my_xy;
|
|
|
|
vec2 end = start + seg.vector;
|
|
|
|
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
|
|
|
if (window.x != window.y) {
|
|
|
|
vec2 t = (window - start.y) / seg.vector.y;
|
|
|
|
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
|
|
|
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
|
|
|
float xmax = max(xs.x, xs.y);
|
|
|
|
float b = min(xmax, 1.0);
|
|
|
|
float c = max(b, 0.0);
|
|
|
|
float d = max(xmin, 0.0);
|
|
|
|
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
|
|
|
area[k] += a * (window.x - window.y);
|
|
|
|
}
|
|
|
|
area[k] += sign(seg.vector.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
|
|
|
}
|
|
|
|
tile_seg_ref = seg.next;
|
|
|
|
} while (tile_seg_ref.offset != 0);
|
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
|
|
|
area[k] = min(abs(area[k]), 1.0);
|
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4 + CmdFill_size;
|
2021-03-18 22:47:14 +11:00
|
|
|
break;
|
|
|
|
case Cmd_Solid:
|
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
|
|
|
area[k] = 1.0;
|
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4;
|
2021-03-18 22:47:14 +11:00
|
|
|
break;
|
|
|
|
case Cmd_Alpha:
|
|
|
|
CmdAlpha alpha = Cmd_Alpha_read(cmd_alloc, cmd_ref);
|
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
|
|
|
area[k] = alpha.alpha;
|
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4 + CmdAlpha_size;
|
2021-03-18 22:47:14 +11:00
|
|
|
break;
|
|
|
|
case Cmd_Color:
|
|
|
|
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
|
2021-03-23 00:12:05 +11:00
|
|
|
vec4 fg = unpacksRGB(color.rgba_color);
|
2020-05-26 08:45:06 +10:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2021-03-23 00:12:05 +11:00
|
|
|
vec4 fg_k = fg * area[k];
|
|
|
|
rgb[k] = rgb[k] * (1.0 - fg_k.a) + fg_k.rgb;
|
2020-10-09 21:43:29 +11:00
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4 + CmdColor_size;
|
2020-10-09 21:43:29 +11:00
|
|
|
break;
|
2021-03-18 22:47:14 +11:00
|
|
|
case Cmd_Image:
|
|
|
|
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
|
2021-03-23 00:12:05 +11:00
|
|
|
vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2021-03-23 00:12:05 +11:00
|
|
|
vec4 fg_k = img[k] * area[k];
|
|
|
|
rgb[k] = rgb[k] * (1.0 - fg_k.a) + fg_k.rgb;
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4 + CmdImage_size;
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
break;
|
2020-11-20 06:53:59 +11:00
|
|
|
case Cmd_BeginClip:
|
2020-11-22 06:39:23 +11:00
|
|
|
uint blend_slot = blend_sp % BLEND_STACK_SIZE;
|
|
|
|
if (blend_sp == blend_spill + BLEND_STACK_SIZE) {
|
|
|
|
// spill to scratch buffer
|
2020-12-24 22:00:53 +11:00
|
|
|
MallocResult m = alloc_clip_buf(clip_tos.offset);
|
|
|
|
if (m.failed) {
|
2020-12-12 04:30:20 +11:00
|
|
|
return;
|
|
|
|
}
|
2020-12-24 22:00:53 +11:00
|
|
|
clip_tos = m.alloc;
|
|
|
|
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
|
2020-11-22 06:39:23 +11:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2021-03-23 22:54:49 +11:00
|
|
|
uvec2 offset = chunk_offset(k);
|
|
|
|
write_mem(clip_tos, base_ix + offset.x + offset.y * TILE_WIDTH_PX, blend_stack[blend_slot][k]);
|
2020-11-22 06:39:23 +11:00
|
|
|
}
|
|
|
|
blend_spill++;
|
|
|
|
}
|
2021-03-18 22:47:14 +11:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
|
|
|
blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
|
2020-11-21 04:26:02 +11:00
|
|
|
}
|
|
|
|
blend_sp++;
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4;
|
2020-11-21 04:26:02 +11:00
|
|
|
break;
|
2020-11-20 06:53:59 +11:00
|
|
|
case Cmd_EndClip:
|
2020-11-22 06:39:23 +11:00
|
|
|
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
|
|
|
|
if (blend_sp == blend_spill) {
|
2020-12-24 22:00:53 +11:00
|
|
|
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
|
2020-11-22 06:39:23 +11:00
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
2021-03-23 22:54:49 +11:00
|
|
|
uvec2 offset = chunk_offset(k);
|
|
|
|
blend_stack[blend_slot][k] = read_mem(clip_tos, base_ix + offset.x + offset.y * TILE_WIDTH_PX);
|
2020-11-22 06:39:23 +11:00
|
|
|
}
|
2020-12-24 22:00:53 +11:00
|
|
|
clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET);
|
2020-11-22 06:39:23 +11:00
|
|
|
blend_spill--;
|
|
|
|
}
|
2020-11-20 06:53:59 +11:00
|
|
|
blend_sp--;
|
|
|
|
for (uint k = 0; k < CHUNK; k++) {
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]);
|
2021-03-18 22:47:14 +11:00
|
|
|
rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a);
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
}
|
2021-03-19 06:17:04 +11:00
|
|
|
cmd_ref.offset += 4;
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
break;
|
2020-04-26 03:15:22 +10:00
|
|
|
case Cmd_Jump:
|
2020-12-24 22:00:53 +11:00
|
|
|
cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
|
|
|
|
cmd_alloc.offset = cmd_ref.offset;
|
2020-04-26 03:15:22 +10:00
|
|
|
continue;
|
2020-04-22 12:30:14 +10:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-26 08:45:06 +10:00
|
|
|
for (uint i = 0; i < CHUNK; i++) {
|
2021-03-23 22:54:49 +11:00
|
|
|
imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgb[i]), 1.0));
|
2020-05-26 08:45:06 +10:00
|
|
|
}
|
2020-04-22 12:30:14 +10:00
|
|
|
}
|