perf: replace dirty bitset with a flag on each row

This is much faster for most operations since the row is often already
loaded when we have to mark it as dirty.
This commit is contained in:
Qwerasd
2025-11-18 12:10:47 -07:00
parent 212598ed66
commit 30472c0077
5 changed files with 74 additions and 138 deletions

View File

@@ -1191,12 +1191,13 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
{
var it = state.terminal.screens.active.pages.pageIterator(
.right_down,
.{ .screen = .{} },
.{ .viewport = .{} },
null,
);
while (it.next()) |chunk| {
var dirty_set = chunk.node.data.dirtyBitSet();
dirty_set.unsetAll();
for (chunk.rows()) |*row| {
row.dirty = false;
}
}
}

View File

@@ -2683,10 +2683,9 @@ pub fn eraseRow(
// If we have a pinned viewport, we need to adjust for active area.
self.fixupViewport(1);
{
// Set all the rows as dirty in this page
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = pn.y, .end = node.data.size.rows }, true);
// Set all the rows as dirty in this page, starting at the erased row.
for (rows[pn.y..node.data.size.rows]) |*row| {
row.dirty = true;
}
// We iterate through all of the following pages in order to move their
@@ -2721,8 +2720,9 @@ pub fn eraseRow(
fastmem.rotateOnce(Row, rows[0..node.data.size.rows]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = node.data.size.rows }, true);
for (rows[0..node.data.size.rows]) |*row| {
row.dirty = true;
}
// Our tracked pins for this page need to be updated.
// If the pin is in row 0 that means the corresponding row has
@@ -2774,8 +2774,9 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[pn.y..][0 .. limit + 1]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = pn.y, .end = pn.y + limit }, true);
for (rows[pn.y..][0..limit]) |*row| {
row.dirty = true;
}
// If our viewport is a pin and our pin is within the erased
// region we need to maybe shift our cache up. We do this here instead
@@ -2813,9 +2814,8 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[pn.y..node.data.size.rows]);
// All the rows in the page are dirty below the erased row.
{
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = pn.y, .end = node.data.size.rows }, true);
for (rows[pn.y..node.data.size.rows]) |*row| {
row.dirty = true;
}
// We need to keep track of how many rows we've shifted so that we can
@@ -2872,8 +2872,9 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[0 .. shifted_limit + 1]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = shifted_limit }, true);
for (rows[0..shifted_limit]) |*row| {
row.dirty = true;
}
// See the other places we do something similar in this function
// for a detailed explanation.
@@ -2904,8 +2905,9 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[0..node.data.size.rows]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = node.data.size.rows }, true);
for (rows[0..node.data.size.rows]) |*row| {
row.dirty = true;
}
// Account for the rows shifted in this node.
shifted += node.data.size.rows;
@@ -2993,6 +2995,9 @@ pub fn eraseRows(
const old_dst = dst.*;
dst.* = src.*;
src.* = old_dst;
// Mark the moved row as dirty.
dst.dirty = true;
}
// Clear our remaining cells that we didn't shift or swapped
@@ -3022,10 +3027,6 @@ pub fn eraseRows(
// Our new size is the amount we scrolled
chunk.node.data.size.rows = @intCast(scroll_amount);
erased += chunk.end;
// Set all the rows as dirty
var dirty = chunk.node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = chunk.node.data.size.rows }, true);
}
// Update our total row count
@@ -3881,10 +3882,10 @@ fn growRows(self: *PageList, n: usize) !void {
/// traverses the entire list of pages. This is used for testing/debugging.
pub fn clearDirty(self: *PageList) void {
var page = self.pages.first;
while (page) |p| {
var set = p.data.dirtyBitSet();
set.unsetAll();
page = p.next;
while (page) |p| : (page = p.next) {
for (p.data.rows.ptr(p.data.memory)[0..p.data.size.rows]) |*row| {
row.dirty = false;
}
}
}
@@ -3965,13 +3966,12 @@ pub const Pin = struct {
/// Check if this pin is dirty.
pub inline fn isDirty(self: Pin) bool {
return self.node.data.isRowDirty(self.y);
return self.rowAndCell().row.dirty;
}
/// Mark this pin location as dirty.
pub inline fn markDirty(self: Pin) void {
var set = self.node.data.dirtyBitSet();
set.set(self.y);
self.rowAndCell().row.dirty = true;
}
/// Returns true if the row of this pin should never have its background
@@ -4375,7 +4375,7 @@ const Cell = struct {
/// This is not very performant this is primarily used for assertions
/// and testing.
pub fn isDirty(self: Cell) bool {
return self.node.data.isRowDirty(self.row_idx);
return self.row.dirty;
}
/// Get the cell style.

View File

@@ -786,9 +786,7 @@ pub fn cursorDownScroll(self: *Screen) !void {
self.cursor.page_row,
page.getCells(self.cursor.page_row),
);
var dirty = page.dirtyBitSet();
dirty.set(0);
self.cursorMarkDirty();
} else {
// The call to `eraseRow` will move the tracked cursor pin up by one
// row, but we don't actually want that, so we keep the old pin and
@@ -880,7 +878,7 @@ pub fn cursorScrollAbove(self: *Screen) !void {
// the cursor always changes page rows inside this function, and
// when that happens it can mean the text in the old row needs to
// be re-shaped because the cursor splits runs to break ligatures.
self.cursor.page_pin.markDirty();
self.cursorMarkDirty();
// If the cursor is on the bottom of the screen, its faster to use
// our specialized function for that case.
@@ -926,8 +924,9 @@ pub fn cursorScrollAbove(self: *Screen) !void {
fastmem.rotateOnceR(Row, rows[pin.y..page.size.rows]);
// Mark all our rotated rows as dirty.
var dirty = page.dirtyBitSet();
dirty.setRangeValue(.{ .start = pin.y, .end = page.size.rows }, true);
for (rows[pin.y..page.size.rows]) |*row| {
row.dirty = true;
}
// Setup our cursor caches after the rotation so it points to the
// correct data
@@ -993,8 +992,9 @@ fn cursorScrollAboveRotate(self: *Screen) !void {
);
// All rows we rotated are dirty
var dirty = cur_page.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = cur_page.size.rows }, true);
for (cur_rows[0..cur_page.size.rows]) |*row| {
row.dirty = true;
}
}
// Our current is our cursor page, we need to rotate down from
@@ -1010,11 +1010,9 @@ fn cursorScrollAboveRotate(self: *Screen) !void {
);
// Set all the rows we rotated and cleared dirty
var dirty = cur_page.dirtyBitSet();
dirty.setRangeValue(
.{ .start = self.cursor.page_pin.y, .end = cur_page.size.rows },
true,
);
for (cur_rows[self.cursor.page_pin.y..cur_page.size.rows]) |*row| {
row.dirty = true;
}
// Setup cursor cache data after all the rotations so our
// row is valid.
@@ -1105,7 +1103,7 @@ inline fn cursorChangePin(self: *Screen, new: Pin) void {
// we must mark the old and new page dirty. We do this as long
// as the pins are not equal
if (!self.cursor.page_pin.eql(new)) {
self.cursor.page_pin.markDirty();
self.cursorMarkDirty();
new.markDirty();
}
@@ -1175,7 +1173,7 @@ inline fn cursorChangePin(self: *Screen, new: Pin) void {
/// Mark the cursor position as dirty.
/// TODO: test
pub inline fn cursorMarkDirty(self: *Screen) void {
self.cursor.page_pin.markDirty();
self.cursor.page_row.dirty = true;
}
/// Reset the cursor row's soft-wrap state and the cursor's pending wrap.
@@ -1303,10 +1301,6 @@ pub fn clearRows(
var it = self.pages.pageIterator(.right_down, tl, bl);
while (it.next()) |chunk| {
// Mark everything in this chunk as dirty
var dirty = chunk.node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = chunk.start, .end = chunk.end }, true);
for (chunk.rows()) |*row| {
const cells_offset = row.cells;
const cells_multi: [*]Cell = row.cells.ptr(chunk.node.data.memory);
@@ -1322,6 +1316,8 @@ pub fn clearRows(
self.clearCells(&chunk.node.data, row, cells);
row.* = .{ .cells = cells_offset };
}
row.dirty = true;
}
}
}

View File

@@ -1672,6 +1672,9 @@ pub fn insertLines(self: *Terminal, count: usize) void {
dst_row.* = src_row.*;
src_row.* = dst;
// Make sure the row is marked as dirty though.
dst_row.dirty = true;
// Ensure what we did didn't corrupt the page
cur_p.node.data.assertIntegrity();
} else {
@@ -1867,6 +1870,9 @@ pub fn deleteLines(self: *Terminal, count: usize) void {
dst_row.* = src_row.*;
src_row.* = dst;
// Make sure the row is marked as dirty though.
dst_row.dirty = true;
// Ensure what we did didn't corrupt the page
cur_p.node.data.assertIntegrity();
} else {

View File

@@ -136,44 +136,6 @@ pub const Page = struct {
hyperlink_map: hyperlink.Map,
hyperlink_set: hyperlink.Set,
/// The offset to the first mask of dirty bits in the page.
///
/// The dirty bits is a contiguous array of usize where each bit represents
/// a row in the page, in order. If the bit is set, then the row is dirty
/// and requires a redraw. Dirty status is only ever meant to convey that
/// a cell has changed visually. A cell which changes in a way that doesn't
/// affect the visual representation may not be marked as dirty.
///
/// Dirty tracking may have false positives but should never have false
/// negatives. A false negative would result in a visual artifact on the
/// screen.
///
/// Dirty bits are only ever unset by consumers of a page. The page
/// structure itself does not unset dirty bits since the page does not
/// know when a cell has been redrawn.
///
/// As implementation background: it may seem that dirty bits should be
/// stored elsewhere and not on the page itself, because the only data
/// that could possibly change is in the active area of a terminal
/// historically and that area is small compared to the typical scrollback.
/// My original thinking was to put the dirty bits on Screen instead and
/// have them only track the active area. However, I decided to put them
/// into the page directly for a few reasons:
///
/// 1. It's simpler. The page is a self-contained unit and it's nice
/// to have all the data for a page in one place.
///
/// 2. It's cheap. Even a very large page might have 1000 rows and
/// that's only ~128 bytes of 64-bit integers to track all the dirty
/// bits. Compared to the hundreds of kilobytes a typical page
/// consumes, this is nothing.
///
/// 3. It's more flexible. If we ever want to implement new terminal
/// features that allow non-active area to be dirty, we can do that
/// with minimal dirty-tracking work.
///
dirty: Offset(usize),
/// The current dimensions of the page. The capacity may be larger
/// than this. This allows us to allocate a larger page than necessary
/// and also to resize a page smaller without reallocating.
@@ -238,7 +200,6 @@ pub const Page = struct {
.memory = @alignCast(buf.start()[0..l.total_size]),
.rows = rows,
.cells = cells,
.dirty = buf.member(usize, l.dirty_start),
.styles = StyleSet.init(
buf.add(l.styles_start),
l.styles_layout,
@@ -686,11 +647,8 @@ pub const Page = struct {
const other_rows = other.rows.ptr(other.memory)[y_start..y_end];
const rows = self.rows.ptr(self.memory)[0 .. y_end - y_start];
const other_dirty_set = other.dirtyBitSet();
var dirty_set = self.dirtyBitSet();
for (rows, 0.., other_rows, y_start..) |*dst_row, dst_y, *src_row, src_y| {
for (rows, other_rows) |*dst_row, *src_row| {
try self.cloneRowFrom(other, dst_row, src_row);
if (other_dirty_set.isSet(src_y)) dirty_set.set(dst_y);
}
// We should remain consistent
@@ -752,6 +710,7 @@ pub const Page = struct {
copy.grapheme = dst_row.grapheme;
copy.hyperlink = dst_row.hyperlink;
copy.styled = dst_row.styled;
copy.dirty |= dst_row.dirty;
}
// Our cell offset remains the same
@@ -1501,30 +1460,12 @@ pub const Page = struct {
return self.grapheme_map.map(self.memory).capacity();
}
/// Returns the bitset for the dirty bits on this page.
///
/// The returned value is a DynamicBitSetUnmanaged but it is NOT
/// actually dynamic; do NOT call resize on this. It is safe to
/// read and write but do not resize it.
pub inline fn dirtyBitSet(self: *const Page) std.DynamicBitSetUnmanaged {
return .{
.bit_length = self.capacity.rows,
.masks = self.dirty.ptr(self.memory),
};
}
/// Returns true if the given row is dirty. This is NOT very
/// efficient if you're checking many rows and you should use
/// dirtyBitSet directly instead.
pub inline fn isRowDirty(self: *const Page, y: usize) bool {
return self.dirtyBitSet().isSet(y);
}
/// Returns true if this page is dirty at all. If you plan on
/// checking any additional rows, you should use dirtyBitSet and
/// check this on your own so you have the set available.
/// Returns true if this page is dirty at all.
pub inline fn isDirty(self: *const Page) bool {
return self.dirtyBitSet().findFirstSet() != null;
for (self.rows.ptr(self.memory)[0..self.size.rows]) |row| {
if (row.dirty) return true;
}
return false;
}
pub const Layout = struct {
@@ -1533,8 +1474,6 @@ pub const Page = struct {
rows_size: usize,
cells_start: usize,
cells_size: usize,
dirty_start: usize,
dirty_size: usize,
styles_start: usize,
styles_layout: StyleSet.Layout,
grapheme_alloc_start: usize,
@@ -1561,19 +1500,8 @@ pub const Page = struct {
const cells_start = alignForward(usize, rows_end, @alignOf(Cell));
const cells_end = cells_start + (cells_count * @sizeOf(Cell));
// The division below cannot fail because our row count cannot
// exceed the maximum value of usize.
const dirty_bit_length: usize = rows_count;
const dirty_usize_length: usize = std.math.divCeil(
usize,
dirty_bit_length,
@bitSizeOf(usize),
) catch unreachable;
const dirty_start = alignForward(usize, cells_end, @alignOf(usize));
const dirty_end: usize = dirty_start + (dirty_usize_length * @sizeOf(usize));
const styles_layout: StyleSet.Layout = .init(cap.styles);
const styles_start = alignForward(usize, dirty_end, StyleSet.base_align.toByteUnits());
const styles_start = alignForward(usize, cells_end, StyleSet.base_align.toByteUnits());
const styles_end = styles_start + styles_layout.total_size;
const grapheme_alloc_layout = GraphemeAlloc.layout(cap.grapheme_bytes);
@@ -1614,8 +1542,6 @@ pub const Page = struct {
.rows_size = rows_end - rows_start,
.cells_start = cells_start,
.cells_size = cells_end - cells_start,
.dirty_start = dirty_start,
.dirty_size = dirty_end - dirty_start,
.styles_start = styles_start,
.styles_layout = styles_layout,
.grapheme_alloc_start = grapheme_alloc_start,
@@ -1707,11 +1633,9 @@ pub const Capacity = struct {
// The size per row is:
// - The row metadata itself
// - The cells per row (n=cols)
// - 1 bit for dirty tracking
const bits_per_row: usize = size: {
var bits: usize = @bitSizeOf(Row); // Row metadata
bits += @bitSizeOf(Cell) * @as(usize, @intCast(cols)); // Cells (n=cols)
bits += 1; // The dirty bit
break :size bits;
};
const available_bits: usize = styles_start * 8;
@@ -1775,7 +1699,20 @@ pub const Row = packed struct(u64) {
// everything throughout the same.
kitty_virtual_placeholder: bool = false,
_padding: u23 = 0,
/// True if this row is dirty and requires a redraw. This is set to true
/// by any operation that modifies the row's contents or position, and
/// consumers of the page are expected to clear it when they redraw.
///
/// Dirty status is only ever meant to convey that one or more cells in
/// the row have changed visually. A cell which changes in a way that
/// doesn't affect the visual representation may not be marked as dirty.
///
/// Dirty tracking may have false positives but should never have false
/// negatives. A false negative would result in a visual artifact on the
/// screen.
dirty: bool = false,
_padding: u22 = 0,
/// Semantic prompt type.
pub const SemanticPrompt = enum(u3) {
@@ -2079,10 +2016,6 @@ test "Page init" {
.styles = 32,
});
defer page.deinit();
// Dirty set should be empty
const dirty = page.dirtyBitSet();
try std.testing.expectEqual(@as(usize, 0), dirty.count());
}
test "Page read and write cells" {