Skip to content

[Nvidia Driver Bugs] Fix Broken VSync on Nvidia + Sync Enhancements (Experimental) #593

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public void copyBuffer(Buffer src, long srcOffset, Buffer dst, long dstOffset, l
public void syncUploads() {
submitUploads();

Synchronization.INSTANCE.waitFences();
Synchronization.INSTANCE.recycleCmdBuffers();
}

private void beginCommands() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public void submitCommands() {
return;
}

long fence = queue.submitCommands(this.currentCmdBuffer);
queue.submitCommands(this.currentCmdBuffer);
Synchronization.INSTANCE.addCommandBuffer(this.currentCmdBuffer);

this.currentCmdBuffer = null;
Expand Down
184 changes: 119 additions & 65 deletions src/main/java/net/vulkanmod/vulkan/Renderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import net.vulkanmod.vulkan.memory.MemoryManager;
import net.vulkanmod.vulkan.pass.DefaultMainPass;
import net.vulkanmod.vulkan.pass.MainPass;
import net.vulkanmod.vulkan.queue.Queue;
import net.vulkanmod.vulkan.shader.GraphicsPipeline;
import net.vulkanmod.vulkan.shader.Pipeline;
import net.vulkanmod.vulkan.shader.PipelineState;
Expand Down Expand Up @@ -54,6 +55,7 @@ public class Renderer {

private static boolean swapChainUpdate = false;
public static boolean skipRendering = false;
private static final boolean sync2 = DeviceManager.checkExt(KHRSynchronization2.VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);

public static void initRenderer() {
INSTANCE = new Renderer();
Expand Down Expand Up @@ -88,7 +90,7 @@ public static int getCurrentImage() {
private List<VkCommandBuffer> commandBuffers;
private ArrayList<Long> imageAvailableSemaphores;
private ArrayList<Long> renderFinishedSemaphores;
private ArrayList<Long> inFlightFences;
private ArrayList<Long> inFlightSubmits;

private Framebuffer boundFramebuffer;
private RenderPass boundRenderPass;
Expand Down Expand Up @@ -164,33 +166,27 @@ private void allocateCommandBuffers() {
private void createSyncObjects() {
imageAvailableSemaphores = new ArrayList<>(framesNum);
renderFinishedSemaphores = new ArrayList<>(framesNum);
inFlightFences = new ArrayList<>(framesNum);
inFlightSubmits = new ArrayList<>(framesNum);

try (MemoryStack stack = stackPush()) {

VkSemaphoreCreateInfo semaphoreInfo = VkSemaphoreCreateInfo.calloc(stack);
semaphoreInfo.sType(VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);

VkFenceCreateInfo fenceInfo = VkFenceCreateInfo.calloc(stack);
fenceInfo.sType(VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
fenceInfo.flags(VK_FENCE_CREATE_SIGNALED_BIT);

LongBuffer pImageAvailableSemaphore = stack.mallocLong(1);
LongBuffer pRenderFinishedSemaphore = stack.mallocLong(1);
LongBuffer pFence = stack.mallocLong(1);

for (int i = 0; i < framesNum; i++) {

if (vkCreateSemaphore(device, semaphoreInfo, null, pImageAvailableSemaphore) != VK_SUCCESS
|| vkCreateSemaphore(device, semaphoreInfo, null, pRenderFinishedSemaphore) != VK_SUCCESS
|| vkCreateFence(device, fenceInfo, null, pFence) != VK_SUCCESS) {
|| vkCreateSemaphore(device, semaphoreInfo, null, pRenderFinishedSemaphore) != VK_SUCCESS) {

throw new RuntimeException("Failed to create synchronization objects for the frame: " + i);
}

imageAvailableSemaphores.add(pImageAvailableSemaphore.get(0));
renderFinishedSemaphores.add(pRenderFinishedSemaphore.get(0));
inFlightFences.add(pFence.get(0));
inFlightSubmits.add(0L);

}

Expand All @@ -206,7 +202,7 @@ public void preInitFrame() {
// runTick might be called recursively,
// this check forces sync to avoid upload corruption
if (lastReset == currentFrame) {
waitFences();
submitPending();
}
lastReset = currentFrame;

Expand Down Expand Up @@ -238,20 +234,22 @@ public void beginFrame() {
if (skipRendering || recordingCmds)
return;

vkWaitForFences(device, inFlightFences.get(currentFrame), true, VUtil.UINT64_MAX);
try (MemoryStack stack = stackPush()) {

p.pop();
p.push("Begin_rendering");
DeviceManager.getGraphicsQueue().waitSubmits(stack, inFlightSubmits.get(currentFrame));
//Uses Graphics Timeline as a substitute for inFlightFences
//Aggregates frame fences and Graphics Queue fences together as one

MemoryManager.getInstance().initFrame(currentFrame);
drawer.setCurrentFrame(currentFrame);

resetDescriptors();
p.pop();
p.push("Begin_rendering");

currentCmdBuffer = commandBuffers.get(currentFrame);
vkResetCommandBuffer(currentCmdBuffer, 0);
MemoryManager.getInstance().initFrame(currentFrame);
drawer.setCurrentFrame(currentFrame);

try (MemoryStack stack = stackPush()) {
resetDescriptors();

currentCmdBuffer = commandBuffers.get(currentFrame);

IntBuffer pImageIndex = stack.mallocInt(1);

Expand Down Expand Up @@ -303,7 +301,7 @@ public void endFrame() {

mainPass.end(currentCmdBuffer);

waitFences();
submitPending();

submitFrame();
recordingCmds = false;
Expand All @@ -317,23 +315,12 @@ private void submitFrame() {
return;

try (MemoryStack stack = stackPush()) {
int vkResult;

VkSubmitInfo submitInfo = VkSubmitInfo.calloc(stack);
submitInfo.sType(VK_STRUCTURE_TYPE_SUBMIT_INFO);
//Wait Async Transfers on host to avoid invalid frees (Destroy Buffer during use)
DeviceManager.getTransferQueue().waitSubmits(stack);

submitInfo.waitSemaphoreCount(1);
submitInfo.pWaitSemaphores(stack.longs(imageAvailableSemaphores.get(currentFrame)));
submitInfo.pWaitDstStageMask(stack.ints(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT));
submitInfo.pSignalSemaphores(stack.longs(renderFinishedSemaphores.get(currentFrame)));
submitInfo.pCommandBuffers(stack.pointers(currentCmdBuffer));

vkResetFences(device, inFlightFences.get(currentFrame));

if ((vkResult = vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), submitInfo, inFlightFences.get(currentFrame))) != VK_SUCCESS) {
vkResetFences(device, inFlightFences.get(currentFrame));
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}
final long submitId = sync2 ? getSubmitId2(stack) : getSubmitId(stack);

VkPresentInfoKHR presentInfo = VkPresentInfoKHR.calloc(stack);
presentInfo.sType(VK_STRUCTURE_TYPE_PRESENT_INFO_KHR);
Expand All @@ -345,7 +332,7 @@ private void submitFrame() {

presentInfo.pImageIndices(stack.ints(imageIndex));

vkResult = vkQueuePresentKHR(DeviceManager.getPresentQueue().queue(), presentInfo);
final int vkResult = vkQueuePresentKHR(DeviceManager.getPresentQueue().queue(), presentInfo);

if (vkResult == VK_ERROR_OUT_OF_DATE_KHR || vkResult == VK_SUBOPTIMAL_KHR || swapChainUpdate) {
swapChainUpdate = true;
Expand All @@ -355,9 +342,85 @@ private void submitFrame() {
}

currentFrame = (currentFrame + 1) % framesNum;

inFlightSubmits.set(currentFrame, submitId);

}
}

/**
* Fallback if Sync2 is unsupported (macOS and some EOL Devices)
*/
private long getSubmitId(MemoryStack stack) {
Queue graphicsQueue = DeviceManager.getGraphicsQueue();

final long submitId = graphicsQueue.submitCount();

VkTimelineSemaphoreSubmitInfo mainSemaphoreSubmitInfo = VkTimelineSemaphoreSubmitInfo.calloc(stack)
.sType$Default()
.pWaitSemaphoreValues(stack.longs(0, graphicsQueue.submitCount()))
.pSignalSemaphoreValues(stack.longs(0, graphicsQueue.submitCountAdd()));

VkSubmitInfo submitInfo = VkSubmitInfo.calloc(stack);
submitInfo.sType(VK_STRUCTURE_TYPE_SUBMIT_INFO);
submitInfo.pNext(mainSemaphoreSubmitInfo);
submitInfo.waitSemaphoreCount(2);
submitInfo.pWaitSemaphores(stack.longs(imageAvailableSemaphores.get(currentFrame), graphicsQueue.getTmSemaphore()));
submitInfo.pWaitDstStageMask(stack.ints(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT)); //Vertex Bit used for LightMap Sampler Transitions
submitInfo.pSignalSemaphores(stack.longs(renderFinishedSemaphores.get(currentFrame), graphicsQueue.getTmSemaphore()));
submitInfo.pCommandBuffers(stack.pointers(currentCmdBuffer));

final int vkResult;
if ((vkResult = vkQueueSubmit(graphicsQueue.queue(), submitInfo, 0)) != VK_SUCCESS) {
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}
return submitId;
}

private long getSubmitId2(MemoryStack stack) {

Queue graphicsQueue = DeviceManager.getGraphicsQueue();
final long submitId = graphicsQueue.submitCount();

VkCommandBufferSubmitInfo.Buffer commandBufferSubmitInfo = VkCommandBufferSubmitInfo.calloc(1, stack)
.sType$Default()
.commandBuffer(currentCmdBuffer);

VkSemaphoreSubmitInfo.Buffer waitSemaphoreSubmitInfo = VkSemaphoreSubmitInfo.calloc(2, stack);
waitSemaphoreSubmitInfo.get(0).sType$Default()
.semaphore(imageAvailableSemaphores.get(currentFrame))
.stageMask(VK13.VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)
.value(0);

waitSemaphoreSubmitInfo.get(1).sType$Default()
.semaphore(graphicsQueue.getTmSemaphore())
.stageMask(VK13.VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT) //LightMap Sampler Transitions
.value(graphicsQueue.submitCount());

VkSemaphoreSubmitInfo.Buffer mainSemaphoreSubmitInfo = VkSemaphoreSubmitInfo.calloc(2, stack);
mainSemaphoreSubmitInfo.get(0).sType$Default()
.semaphore(renderFinishedSemaphores.get(currentFrame))
.stageMask(VK13.VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
.value(0);

mainSemaphoreSubmitInfo.get(1).sType$Default()
.semaphore(graphicsQueue.getTmSemaphore())
.stageMask(VK13.VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
.value(graphicsQueue.submitCountAdd());

VkSubmitInfo2.Buffer submitInfo = VkSubmitInfo2.calloc(1, stack)
.sType$Default()
.pWaitSemaphoreInfos(waitSemaphoreSubmitInfo)
.pSignalSemaphoreInfos(mainSemaphoreSubmitInfo)
.pCommandBufferInfos(commandBufferSubmitInfo);

final int vkResult;
if ((vkResult = KHRSynchronization2.vkQueueSubmit2KHR(graphicsQueue.queue(), submitInfo, 0)) != VK_SUCCESS) {
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}
return submitId;
}

/**
* Called in case draw results are needed before the of the frame
*/
Expand All @@ -371,21 +434,30 @@ public void flushCmds() {
this.endRenderPass(currentCmdBuffer);
vkEndCommandBuffer(currentCmdBuffer);

DeviceManager.getTransferQueue().waitSubmits(stack);

Queue graphicsQueue = DeviceManager.getGraphicsQueue();

VkTimelineSemaphoreSubmitInfo mainSemaphoreSubmitInfo = VkTimelineSemaphoreSubmitInfo.calloc(stack)
.sType$Default()
.pWaitSemaphoreValues(stack.longs(graphicsQueue.submitCount()))
.pSignalSemaphoreValues(stack.longs(graphicsQueue.submitCountAdd()));

VkSubmitInfo submitInfo = VkSubmitInfo.calloc(stack);
submitInfo.sType(VK_STRUCTURE_TYPE_SUBMIT_INFO);

submitInfo.pNext(mainSemaphoreSubmitInfo);
submitInfo.waitSemaphoreCount(1);
submitInfo.pWaitSemaphores(stack.longs(graphicsQueue.getTmSemaphore()));
submitInfo.pSignalSemaphores(stack.longs(graphicsQueue.getTmSemaphore()));
submitInfo.pCommandBuffers(stack.pointers(currentCmdBuffer));

vkResetFences(device, inFlightFences.get(currentFrame));

waitFences();
submitPending();

if ((vkResult = vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), submitInfo, inFlightFences.get(currentFrame))) != VK_SUCCESS) {
vkResetFences(device, inFlightFences.get(currentFrame));
if ((vkResult = vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), submitInfo, 0)) != VK_SUCCESS) {
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}

vkWaitForFences(device, inFlightFences.get(currentFrame), true, VUtil.UINT64_MAX);
graphicsQueue.waitSubmits(stack);

this.beginRenderPass(stack);
}
Expand Down Expand Up @@ -433,11 +505,10 @@ public void addUsedPipeline(Pipeline pipeline) {
public void removeUsedPipeline(Pipeline pipeline) {
usedPipelines.remove(pipeline);
}

private void waitFences() {
// Make sure there are no uploads/transitions scheduled
//Synchronization fences are merged into vkQueueSubmit2 submit Barrier, reducing sync overhead and improving frametime
private void submitPending() {
ImageUploadHelper.INSTANCE.submitCommands();
Synchronization.INSTANCE.waitFences();
Synchronization.INSTANCE.recycleCmdBuffers();
Vulkan.getStagingBuffer().reset();
}

Expand All @@ -451,25 +522,9 @@ private void resetDescriptors() {
boundPipelineHandle = 0;
}

void waitForSwapChain() {
vkResetFences(device, inFlightFences.get(currentFrame));

// constexpr VkPipelineStageFlags t=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
try (MemoryStack stack = MemoryStack.stackPush()) {
//Empty Submit
VkSubmitInfo info = VkSubmitInfo.calloc(stack)
.sType$Default()
.pWaitSemaphores(stack.longs(imageAvailableSemaphores.get(currentFrame)))
.pWaitDstStageMask(stack.ints(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT));

vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), info, inFlightFences.get(currentFrame));
vkWaitForFences(device, inFlightFences.get(currentFrame), true, -1);
}
}

@SuppressWarnings("UnreachableCode")
private void recreateSwapChain() {
waitFences();
submitPending();
Vulkan.waitIdle();

commandBuffers.forEach(commandBuffer -> vkResetCommandBuffer(commandBuffer, 0));
Expand Down Expand Up @@ -518,7 +573,6 @@ public void cleanUpResources() {

private void destroySyncObjects() {
for (int i = 0; i < framesNum; ++i) {
vkDestroyFence(device, inFlightFences.get(i), null);
vkDestroySemaphore(device, imageAvailableSemaphores.get(i), null);
vkDestroySemaphore(device, renderFinishedSemaphores.get(i), null);
}
Expand Down
37 changes: 3 additions & 34 deletions src/main/java/net/vulkanmod/vulkan/Synchronization.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,24 @@
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import net.vulkanmod.vulkan.queue.CommandPool;
import net.vulkanmod.vulkan.util.VUtil;
import org.lwjgl.system.MemoryUtil;
import org.lwjgl.vulkan.VkDevice;

import java.nio.LongBuffer;

import static org.lwjgl.vulkan.VK10.*;

public class Synchronization {
private static final int ALLOCATION_SIZE = 50;

public static final Synchronization INSTANCE = new Synchronization(ALLOCATION_SIZE);

private final LongBuffer fences;
private int idx = 0;
public static final Synchronization INSTANCE = new Synchronization();

private ObjectArrayList<CommandPool.CommandBuffer> commandBuffers = new ObjectArrayList<>();
private final ObjectArrayList<CommandPool.CommandBuffer> commandBuffers = new ObjectArrayList<>();

Synchronization(int allocSize) {
this.fences = MemoryUtil.memAllocLong(allocSize);
}

public synchronized void addCommandBuffer(CommandPool.CommandBuffer commandBuffer) {
this.addFence(commandBuffer.getFence());
this.commandBuffers.add(commandBuffer);
}

public synchronized void addFence(long fence) {
if (idx == ALLOCATION_SIZE)
waitFences();

fences.put(idx, fence);
idx++;
}

public synchronized void waitFences() {
if (idx == 0)
return;

VkDevice device = Vulkan.getVkDevice();

fences.limit(idx);

vkWaitForFences(device, fences, true, VUtil.UINT64_MAX);

public synchronized void recycleCmdBuffers() {
this.commandBuffers.forEach(CommandPool.CommandBuffer::reset);
this.commandBuffers.clear();

fences.limit(ALLOCATION_SIZE);
idx = 0;
}

public static void waitFence(long fence) {
Expand Down
Loading