diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 90f15d566..a383b1902 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -47,6 +47,7 @@ static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16; static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F; static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F; +static constexpr GPUTexture::Format VRAM_TRANSFER_FORMAT = GPUTexture::Format::R16U; #if defined(_DEBUG) || defined(_DEVEL) @@ -927,8 +928,8 @@ bool GPU_HW::CreateBuffers(Error* error) g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, GPUTexture::Type::Texture, VRAM_RT_FORMAT, read_texture_flags, nullptr, 0, error)) || !(m_vram_readback_texture = - g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Type::RenderTarget, - VRAM_RT_FORMAT, GPUTexture::Flags::None, nullptr, 0, error))) + g_gpu_device->FetchTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Type::RenderTarget, + VRAM_TRANSFER_FORMAT, GPUTexture::Flags::None, nullptr, 0, error))) { Error::AddPrefix(error, "Failed to create VRAM textures: "); return false; @@ -1773,7 +1774,7 @@ bool GPU_HW::CompileResolutionDependentPipelines(Error* error) plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get(); - plconfig.SetTargetFormats(VRAM_RT_FORMAT); + plconfig.SetTargetFormats(VRAM_TRANSFER_FORMAT); // VRAM read { @@ -1792,6 +1793,7 @@ bool GPU_HW::CompileResolutionDependentPipelines(Error* error) } // Display + plconfig.SetTargetFormats(VRAM_RT_FORMAT); { for (u8 shader = 0; shader < 3; shader++) { @@ -3386,26 +3388,16 @@ void GPU_HW::DownloadVRAMFromGPU(u32 x, u32 y, u32 width, u32 height) // TODO: Only read if it's in the drawn area // Get bounds with wrap-around handled. - GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); - - // Has to be aligned to an even pixel for the download, due to 32-bit packing. - if (copy_rect.left & 1) - copy_rect.left--; - if (copy_rect.right & 1) - copy_rect.right++; - - DebugAssert((copy_rect.left % 2) == 0 && (copy_rect.width() % 2) == 0); - const u32 encoded_left = copy_rect.left / 2; - const u32 encoded_top = copy_rect.top; - const u32 encoded_width = copy_rect.width() / 2; - const u32 encoded_height = copy_rect.height(); + const GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 copy_width = static_cast(copy_rect.width()); + const u32 copy_height = static_cast(copy_rect.height()); // Encode the 24-bit texture as 16-bit. const s32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.width(), copy_rect.height()}; g_gpu_device->SetRenderTarget(m_vram_readback_texture.get()); g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height); + g_gpu_device->SetViewportAndScissor(0, 0, copy_width, copy_height); g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); @@ -3413,18 +3405,17 @@ void GPU_HW::DownloadVRAMFromGPU(u32 x, u32 y, u32 width, u32 height) if (m_vram_readback_download_texture->IsImported()) { // Fast path, read directly. - m_vram_readback_download_texture->CopyFromTexture(encoded_left, encoded_top, m_vram_readback_texture.get(), 0, 0, - encoded_width, encoded_height, 0, 0, false); + m_vram_readback_download_texture->CopyFromTexture(copy_rect.x, copy_rect.y, m_vram_readback_texture.get(), 0, 0, + copy_width, copy_height, 0, 0, false); m_vram_readback_download_texture->Flush(); } else { // Copy to staging buffer, then to VRAM. - m_vram_readback_download_texture->CopyFromTexture(0, 0, m_vram_readback_texture.get(), 0, 0, encoded_width, - encoded_height, 0, 0, true); - m_vram_readback_download_texture->ReadTexels(0, 0, encoded_width, encoded_height, - &g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left], - VRAM_WIDTH * sizeof(u16)); + m_vram_readback_download_texture->CopyFromTexture(0, 0, m_vram_readback_texture.get(), 0, 0, copy_width, + copy_height, 0, 0, true); + m_vram_readback_download_texture->ReadTexels( + 0, 0, copy_width, copy_height, &g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left], VRAM_WIDTH * sizeof(u16)); } RestoreDeviceContext(); @@ -3477,7 +3468,7 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da { map_index = 0; upload_texture = - g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::R16U, + g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, VRAM_TRANSFER_FORMAT, GPUTexture::Flags::None, data, data_pitch); if (!upload_texture) { diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 500d605ba..b0fb02a98 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1395,13 +1395,13 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) if (uv_limits) { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "float4 v_uv_limits"}}, true, num_fragment_outputs, - use_dual_source, write_mask_as_depth, msaa, per_sample_shading, false, + false, use_dual_source, write_mask_as_depth, msaa, per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, use_rov); } else { - DeclareFragmentEntryPoint(ss, 1, 1, {}, true, num_fragment_outputs, use_dual_source, write_mask_as_depth, msaa, - per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, + DeclareFragmentEntryPoint(ss, 1, 1, {}, true, num_fragment_outputs, false, use_dual_source, write_mask_as_depth, + msaa, per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, use_rov); } } @@ -1415,21 +1415,22 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, - true, num_fragment_outputs, use_dual_source, write_mask_as_depth, msaa, + true, num_fragment_outputs, false, use_dual_source, write_mask_as_depth, msaa, per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, use_rov); } else { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}}, true, - num_fragment_outputs, use_dual_source, write_mask_as_depth, msaa, per_sample_shading, - false, disable_color_perspective, shader_blending && !use_rov, use_rov); + num_fragment_outputs, false, use_dual_source, write_mask_as_depth, msaa, + per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, + use_rov); } } else { - DeclareFragmentEntryPoint(ss, 1, 0, {}, true, num_fragment_outputs, use_dual_source, write_mask_as_depth, msaa, - per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, + DeclareFragmentEntryPoint(ss, 1, 0, {}, true, num_fragment_outputs, false, use_dual_source, write_mask_as_depth, + msaa, per_sample_shading, false, disable_color_perspective, shader_blending && !use_rov, use_rov); } @@ -1933,19 +1934,12 @@ uint SampleVRAM(uint2 coords) } )"; - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); ss << R"( { - uint2 sample_coords = uint2(uint(v_pos.x) * 2u, uint(v_pos.y)); - sample_coords += u_base_coords; - - // We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel. - uint left = SampleVRAM(sample_coords); - uint right = SampleVRAM(uint2(sample_coords.x + 1u, sample_coords.y)); - - o_col0 = float4(float(left & 0xFFu), float((left >> 8) & 0xFFu), - float(right & 0xFFu), float((right >> 8) & 0xFFu)) - / float4(255.0, 255.0, 255.0, 255.0); + uint2 sample_coords = uint2(v_pos.xy) + u_base_coords; + uint value = SampleVRAM(sample_coords); + o_col0 = uint4(value, 0u, 0u, 0u); })"; return std::move(ss).str(); @@ -1997,7 +1991,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n"; } - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1 + BoolToUInt32(write_depth_as_rt), false, write_mask_as_depth); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1 + BoolToUInt32(write_depth_as_rt), false, false, write_mask_as_depth); ss << R"( { float2 coords = floor(v_pos.xy / u_resolution_scale); @@ -2051,8 +2045,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(bool write_mask_as_ true); DeclareTexture(ss, "samp0", 0, msaa); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1 + BoolToUInt32(write_depth_as_rt), false, write_mask_as_depth, false, - false, msaa); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1 + BoolToUInt32(write_depth_as_rt), false, false, write_mask_as_depth, + false, false, msaa); ss << R"( { float2 dst_coords = floor(v_pos.xy); @@ -2105,7 +2099,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool DeclareUniformBuffer( ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); - DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1 + BoolToUInt32(write_depth_as_rt), false, + DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1 + BoolToUInt32(write_depth_as_rt), false, false, write_mask_as_depth, false, false, false); ss << R"( { @@ -2144,7 +2138,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader(bool msaa) c WriteHeader(ss); DefineMacro(ss, "MULTISAMPLING", msaa); DeclareTexture(ss, "samp0", 0, msaa); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, false, true, false, false, msaa); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, false, false, true, false, false, msaa); ss << R"( { @@ -2165,7 +2159,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyDepthFragmentShader(bool msaa) con WriteHeader(ss); DefineMacro(ss, "MULTISAMPLED", msaa); DeclareTexture(ss, "samp0", 0, msaa); - DeclareFragmentEntryPoint(ss, 0, 1, {}, msaa, 1, false, false, msaa, msaa, msaa); + DeclareFragmentEntryPoint(ss, 0, 1, {}, msaa, 1, false, false, false, msaa, msaa, msaa); ss << R"( { @@ -2185,7 +2179,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMClearDepthFragmentShader(bool write_de std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "WRITE_DEPTH_AS_RT", write_depth_as_rt); - DeclareFragmentEntryPoint(ss, 0, 1, {}, false, BoolToUInt32(write_depth_as_rt), false, false, false, false, false); + DeclareFragmentEntryPoint(ss, 0, 1, {}, false, BoolToUInt32(write_depth_as_rt), false, false, false, false, false, + false); ss << R"( { diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 5b8497a67..e46350e1c 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -625,10 +625,10 @@ void ShaderGen::DeclareVertexEntryPoint( void ShaderGen::DeclareFragmentEntryPoint( std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, const std::initializer_list>& additional_inputs /* = */, - bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool dual_source_output /* = false */, - bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */, - bool declare_sample_id /* = false */, bool noperspective_color /* = false */, bool feedback_loop /* = false */, - bool rov /* = false */) const + bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool integer_color_output /* = false */, + bool dual_source_output /* = false */, bool depth_output /* = false */, bool msaa /* = false */, + bool ssaa /* = false */, bool declare_sample_id /* = false */, bool noperspective_color /* = false */, + bool feedback_loop /* = false */, bool rov /* = false */) const { if (m_glsl) { @@ -761,23 +761,26 @@ void ShaderGen::DeclareFragmentEntryPoint( { for (u32 i = 0; i < num_color_outputs; i++) { - ss << "layout(location = 0, index = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") - << " float4 o_col" << i << ";\n"; + ss << "layout(location = 0, index = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") << " " + << (integer_color_output ? "uint4" : "float4") << " o_col" << i << ";\n"; } } else { for (u32 i = 0; i < num_color_outputs; i++) { - ss << "layout(location = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") << " float4 o_col" << i - << ";\n"; + ss << "layout(location = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") << " " + << (integer_color_output ? "uint4" : "float4") << " o_col" << i << ";\n"; } } } else { for (u32 i = 0; i < num_color_outputs; i++) - ss << ((i == 0) ? target_0_qualifier : "out") << " float4 o_col" << i << ";\n"; + { + ss << ((i == 0) ? target_0_qualifier : "out") << " " << (integer_color_output ? "uint4" : "float4") << " o_col" + << i << ";\n"; + } } ss << "\n"; @@ -839,7 +842,8 @@ void ShaderGen::DeclareFragmentEntryPoint( } for (u32 i = 0; i < num_color_outputs; i++) { - ss << (first ? "" : ",\n") << " out float4 o_col" << i << " : SV_Target" << i; + ss << (first ? "" : ",\n") << " out " << (integer_color_output ? "uint4" : "float4") << " o_col" << i + << " : SV_Target" << i; first = false; } diff --git a/src/util/shadergen.h b/src/util/shadergen.h index 3c432f738..213371d8c 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -61,7 +61,8 @@ public: void DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs, const std::initializer_list>& additional_inputs = {}, - bool declare_fragcoord = false, u32 num_color_outputs = 1, bool dual_source_output = false, + bool declare_fragcoord = false, u32 num_color_outputs = 1, + bool integer_color_output = false, bool dual_source_output = false, bool depth_output = false, bool msaa = false, bool ssaa = false, bool declare_sample_id = false, bool noperspective_color = false, bool feedback_loop = false, bool rov = false) const;