[Mesa-dev] [PATCH 3/9] radeonsi: don't use an indirect table for state atoms

Tue Apr 17 00:42:05 UTC 2018

From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_blit.c        |  12 +--
 src/gallium/drivers/radeonsi/si_clear.c       |  14 +--
 src/gallium/drivers/radeonsi/si_compute.c     |   8 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c      |   2 +-
 src/gallium/drivers/radeonsi/si_descriptors.c |  12 +--
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |  34 +++---
 src/gallium/drivers/radeonsi/si_pipe.h        |  17 ---
 src/gallium/drivers/radeonsi/si_query.c       |   4 +-
 src/gallium/drivers/radeonsi/si_state.c       | 100 +++++++++---------
 src/gallium/drivers/radeonsi/si_state.h       |  43 ++++----
 src/gallium/drivers/radeonsi/si_state_draw.c  |  14 +--
 .../drivers/radeonsi/si_state_shaders.c       |  30 +++---
 .../drivers/radeonsi/si_state_streamout.c     |  12 +--
 .../drivers/radeonsi/si_state_viewport.c      |  18 ++--
 14 files changed, 148 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index d6eab58b3a8..bbdb5e8f271 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -78,21 +78,21 @@ void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
 }
 
 void si_blitter_end(struct si_context *sctx)
 {
 	sctx->render_cond_force_off = false;
 
 	/* Restore shader pointers because the VS blit shader changed all
 	 * non-global VS user SGPRs. */
 	sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
 	sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
-	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 }
 
 static unsigned u_max_sample(struct pipe_resource *r)
 {
 	return r->nr_samples ? r->nr_samples - 1 : 0;
 }
 
 static unsigned
 si_blit_dbcb_copy(struct si_context *sctx,
 		  struct r600_texture *src,
@@ -102,21 +102,21 @@ si_blit_dbcb_copy(struct si_context *sctx,
 		  unsigned first_sample, unsigned last_sample)
 {
 	struct pipe_surface surf_tmpl = {{0}};
 	unsigned layer, sample, checked_last_layer, max_layer;
 	unsigned fully_copied_levels = 0;
 
 	if (planes & PIPE_MASK_Z)
 		sctx->dbcb_depth_copy_enabled = true;
 	if (planes & PIPE_MASK_S)
 		sctx->dbcb_stencil_copy_enabled = true;
-	si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
 
 	sctx->decompression_enabled = true;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
 
 		/* The smaller the mipmap level, the less layers there are
 		 * as far as 3D textures are concerned. */
@@ -133,42 +133,42 @@ si_blit_dbcb_copy(struct si_context *sctx,
 			surf_tmpl.u.tex.last_layer = layer;
 
 			zsurf = sctx->b.create_surface(&sctx->b, &src->resource.b.b, &surf_tmpl);
 
 			surf_tmpl.format = dst->resource.b.b.format;
 			cbsurf = sctx->b.create_surface(&sctx->b, &dst->resource.b.b, &surf_tmpl);
 
 			for (sample = first_sample; sample <= last_sample; sample++) {
 				if (sample != sctx->dbcb_copy_sample) {
 					sctx->dbcb_copy_sample = sample;
-					si_mark_atom_dirty(sctx, &sctx->db_render_state);
+					si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 				}
 
 				si_blitter_begin(sctx, SI_DECOMPRESS);
 				util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
 								  sctx->custom_dsa_flush, 1.0f);
 				si_blitter_end(sctx);
 			}
 
 			pipe_surface_reference(&zsurf, NULL);
 			pipe_surface_reference(&cbsurf, NULL);
 		}
 
 		if (first_layer == 0 && last_layer >= max_layer &&
 		    first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
 			fully_copied_levels |= 1u << level;
 	}
 
 	sctx->decompression_enabled = false;
 	sctx->dbcb_depth_copy_enabled = false;
 	sctx->dbcb_stencil_copy_enabled = false;
-	si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 	return fully_copied_levels;
 }
 
 void si_blit_decompress_depth(struct pipe_context *ctx,
 			      struct r600_texture *texture,
 			      struct r600_texture *staging,
 			      unsigned first_level, unsigned last_level,
 			      unsigned first_layer, unsigned last_layer,
 			      unsigned first_sample, unsigned last_sample)
@@ -203,21 +203,21 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 	unsigned layer, max_layer, checked_last_layer;
 	unsigned fully_decompressed_mask = 0;
 
 	if (!level_mask)
 		return;
 
 	if (planes & PIPE_MASK_S)
 		sctx->db_flush_stencil_inplace = true;
 	if (planes & PIPE_MASK_Z)
 		sctx->db_flush_depth_inplace = true;
-	si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 	surf_tmpl.format = texture->resource.b.b.format;
 
 	sctx->decompression_enabled = true;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
 
 		surf_tmpl.u.tex.level = level;
 
@@ -249,21 +249,21 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 	}
 
 	if (planes & PIPE_MASK_Z)
 		texture->dirty_level_mask &= ~fully_decompressed_mask;
 	if (planes & PIPE_MASK_S)
 		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
 
 	sctx->decompression_enabled = false;
 	sctx->db_flush_depth_inplace = false;
 	sctx->db_flush_stencil_inplace = false;
-	si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 }
 
 /* Helper function of si_flush_depth_texture: decompress the given levels
  * of Z and/or S planes in place.
  */
 static void
 si_blit_decompress_zs_in_place(struct si_context *sctx,
 			       struct r600_texture *texture,
 			       unsigned levels_z, unsigned levels_s,
 			       unsigned first_layer, unsigned last_layer)
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 03cb08502ef..b8047c4d532 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -541,21 +541,21 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 			tex->dirty_level_mask |= 1 << level;
 			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
 		}
 
 		/* We can change the micro tile mode before a full clear. */
 		si_set_optimal_micro_tile_mode(sctx->screen, tex);
 
 		si_set_clear_color(tex, fb->cbufs[i]->format, color);
 
 		sctx->framebuffer.dirty_cbufs |= 1 << i;
-		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
 		*buffers &= ~clear_bit;
 	}
 }
 
 static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		     const union pipe_color_union *color,
 		     double depth, unsigned stencil)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
@@ -591,41 +591,41 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		    (!zstex->tc_compatible_htile ||
 		     depth == 0 || depth == 1)) {
 			/* Need to disable EXPCLEAR temporarily if clearing
 			 * to a new value. */
 			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
 				sctx->db_depth_disable_expclear = true;
 			}
 
 			zstex->depth_clear_value = depth;
 			sctx->framebuffer.dirty_zsbuf = true;
-			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); /* updates DB_DEPTH_CLEAR */
 			sctx->db_depth_clear = true;
-			si_mark_atom_dirty(sctx, &sctx->db_render_state);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 		}
 
 		/* TC-compatible HTILE only supports stencil clears to 0. */
 		if (buffers & PIPE_CLEAR_STENCIL &&
 		    (!zstex->tc_compatible_htile || stencil == 0)) {
 			stencil &= 0xff;
 
 			/* Need to disable EXPCLEAR temporarily if clearing
 			 * to a new value. */
 			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
 				sctx->db_stencil_disable_expclear = true;
 			}
 
 			zstex->stencil_clear_value = stencil;
 			sctx->framebuffer.dirty_zsbuf = true;
-			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); /* updates DB_STENCIL_CLEAR */
 			sctx->db_stencil_clear = true;
-			si_mark_atom_dirty(sctx, &sctx->db_render_state);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 		}
 
 		/* TODO: Find out what's wrong here. Fast depth clear leads to
 		 * corruption in ARK: Survival Evolved, but that may just be
 		 * a coincidence and the root cause is elsewhere.
 		 *
 		 * The corruption can be fixed by putting the DB flush before
 		 * or after the depth clear. (surprisingly)
 		 *
 		 * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
@@ -640,28 +640,28 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 	si_blitter_begin(sctx, SI_CLEAR);
 	util_blitter_clear(sctx->blitter, fb->width, fb->height,
 			   util_framebuffer_get_num_layers(fb),
 			   buffers, color, depth, stencil);
 	si_blitter_end(sctx);
 
 	if (sctx->db_depth_clear) {
 		sctx->db_depth_clear = false;
 		sctx->db_depth_disable_expclear = false;
 		zstex->depth_cleared = true;
-		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 	}
 
 	if (sctx->db_stencil_clear) {
 		sctx->db_stencil_clear = false;
 		sctx->db_stencil_disable_expclear = false;
 		zstex->stencil_cleared = true;
-		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 	}
 }
 
 static void si_clear_render_target(struct pipe_context *ctx,
 				   struct pipe_surface *dst,
 				   const union pipe_color_union *color,
 				   unsigned dstx, unsigned dsty,
 				   unsigned width, unsigned height,
 				   bool render_condition_enabled)
 {
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 8ac5b262c40..e7867e7fe4f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -856,24 +856,24 @@ static void si_launch_grid(
 	if (sctx->flags)
 		si_emit_cache_flush(sctx);
 
 	if (!si_switch_compute_shader(sctx, program, &program->shader,
 					code_object, info->pc))
 		return;
 
 	si_upload_compute_shader_descriptors(sctx);
 	si_emit_compute_shader_pointers(sctx);
 
-	if (si_is_atom_dirty(sctx, sctx->atoms.s.render_cond)) {
-		sctx->atoms.s.render_cond->emit(sctx,
-		                                sctx->atoms.s.render_cond);
-		si_set_atom_dirty(sctx, sctx->atoms.s.render_cond, false);
+	if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
+		sctx->atoms.s.render_cond.emit(sctx,
+					       &sctx->atoms.s.render_cond);
+		si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);
 	}
 
 	if ((program->input_size ||
             program->ir_type == PIPE_SHADER_IR_NATIVE) &&
            unlikely(!si_upload_compute_input(sctx, code_object, info))) {
 		return;
 	}
 
 	/* Global buffers */
 	for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b316637d94b..db9cb0b5346 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -381,21 +381,21 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
 	    sctx->scratch_buffer->b.b.width0 < scratch_size) {
 		r600_resource_reference(&sctx->scratch_buffer, NULL);
 		sctx->scratch_buffer = (struct r600_resource*)
 			si_aligned_buffer_create(&sctx->screen->b,
 						   SI_RESOURCE_FLAG_UNMAPPABLE,
 						   PIPE_USAGE_DEFAULT,
 						   scratch_size, 256);
 		if (!sctx->scratch_buffer)
 			return;
 
-		si_mark_atom_dirty(sctx, &sctx->scratch_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);
 	}
 
 	si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
 			  &sctx->scratch_buffer->b.b, size, size, user_flags,
 			  is_first, &dma_flags);
 
 	va = sctx->scratch_buffer->gpu_address;
 	si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags,
 		       SI_COHERENCY_SHADER);
 }
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 8bd7c77c8c6..4efae9704bc 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -155,21 +155,21 @@ static bool si_upload_descriptors(struct si_context *sctx,
 	/* If there is just one active descriptor, bind it directly. */
 	if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
 	    desc->num_active_slots == 1) {
 		uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
 						   desc->element_dw_size];
 
 		/* The buffer is already in the buffer list. */
 		r600_resource_reference(&desc->buffer, NULL);
 		desc->gpu_list = NULL;
 		desc->gpu_address = si_desc_extract_buffer_address(descriptor);
-		si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 		return true;
 	}
 
 	uint32_t *ptr;
 	unsigned buffer_offset;
 	u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size,
 		       si_optimal_tcc_alignment(sctx, upload_size),
 		       &buffer_offset, (struct pipe_resource**)&desc->buffer,
 		       (void**)&ptr);
 	if (!desc->buffer) {
@@ -185,21 +185,21 @@ static bool si_upload_descriptors(struct si_context *sctx,
                             RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 
 	/* The shader pointer should point to slot 0. */
 	buffer_offset -= first_slot_offset;
 	desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
 
 	assert(desc->buffer->flags & RADEON_FLAG_32BIT);
 	assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi);
 	assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi);
 
-	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 	return true;
 }
 
 static void
 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
 {
 	if (!desc->buffer)
 		return;
 
 	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, desc->buffer,
@@ -1165,21 +1165,21 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					      (struct r600_resource*)vb->buffer.resource,
 					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 		}
 	}
 
 	/* Don't flush the const cache. It would have a very negative effect
 	 * on performance (confirmed by testing). New descriptors are always
 	 * uploaded to a fresh new buffer, so I don't think flushing the const
 	 * cache is needed. */
-	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 	sctx->vertex_buffers_dirty = false;
 	sctx->vertex_buffer_pointer_dirty = true;
 	sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
 	return true;
 }
 
 
 /* CONSTANT BUFFERS */
 
 static unsigned
@@ -1978,28 +1978,28 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
 					  unsigned shader)
 {
 	sctx->shader_pointers_dirty |=
 		u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
 				  SI_NUM_SHADER_DESCS);
 
 	if (shader == PIPE_SHADER_VERTEX)
 		sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
 
-	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 }
 
 static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
 {
 	sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 	sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
-	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 	sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
 	sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
 }
 
 /* Set a base register address for user data constants in the given shader.
  * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
  */
 static void si_set_user_data_base(struct si_context *sctx,
 				  unsigned shader, uint32_t new_base)
 {
@@ -2755,21 +2755,21 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.set_sampler_views = si_set_sampler_views;
 	sctx->b.create_texture_handle = si_create_texture_handle;
 	sctx->b.delete_texture_handle = si_delete_texture_handle;
 	sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
 	sctx->b.create_image_handle = si_create_image_handle;
 	sctx->b.delete_image_handle = si_delete_image_handle;
 	sctx->b.make_image_handle_resident = si_make_image_handle_resident;
 
 	/* Shader user data. */
-	si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
+	si_init_atom(sctx, &sctx->atoms.s.shader_pointers,
 		     si_emit_graphics_shader_pointers);
 
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
 
 	if (sctx->chip_class >= GFX9) {
 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
 				      R_00B430_SPI_SHADER_USER_DATA_LS_0);
 		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
 				      R_00B330_SPI_SHADER_USER_DATA_ES_0);
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index ddfdb497364..e864b627a96 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -250,53 +250,53 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 		ctx->framebuffer.dirty_cbufs =
 			 u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
 		/* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */
 		ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL;
 	} else {
 		ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, 8);
 		ctx->framebuffer.dirty_zsbuf = true;
 	}
 	/* This should always be marked as dirty to set the framebuffer scissor
 	 * at least. */
-	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.framebuffer);
 
-	si_mark_atom_dirty(ctx, &ctx->clip_regs);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs);
 	/* CLEAR_STATE sets zeros. */
 	if (!has_clear_state || ctx->clip_state.any_nonzeros)
-		si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_state);
 	ctx->msaa_sample_locs.nr_samples = 0;
-	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
-	si_mark_atom_dirty(ctx, &ctx->msaa_config);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_sample_locs);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_config);
 	/* CLEAR_STATE sets 0xffff. */
 	if (!has_clear_state || ctx->sample_mask.sample_mask != 0xffff)
-		si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
-	si_mark_atom_dirty(ctx, &ctx->cb_render_state);
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.sample_mask);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.cb_render_state);
 	/* CLEAR_STATE sets zeros. */
 	if (!has_clear_state || ctx->blend_color.any_nonzeros)
-		si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
-	si_mark_atom_dirty(ctx, &ctx->db_render_state);
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
 	if (ctx->chip_class >= GFX9)
-		si_mark_atom_dirty(ctx, &ctx->dpbb_state);
-	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
-	si_mark_atom_dirty(ctx, &ctx->spi_map);
-	si_mark_atom_dirty(ctx, &ctx->streamout.enable_atom);
-	si_mark_atom_dirty(ctx, &ctx->render_cond_atom);
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
 	si_all_descriptors_begin_new_cs(ctx);
 	si_all_resident_buffers_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
-	si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 
-	si_mark_atom_dirty(ctx, &ctx->scratch_state);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
 	if (ctx->scratch_buffer) {
 		si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b);
 	}
 
 	if (ctx->streamout.suspended) {
 		ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
 		si_streamout_buffers_dirty(ctx);
 	}
 
 	if (!LIST_IS_EMPTY(&ctx->active_queries))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a76d52f7ea0..37ff05082cf 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -532,21 +532,20 @@ struct si_screen {
 	 * the number of cores. */
 	LLVMTargetMachineRef		tm[3]; /* used by the queue only */
 
 	struct util_queue		shader_compiler_queue_low_priority;
 	/* Use at most 2 low priority threads on quadcore and better.
 	 * We want to minimize the impact on multithreaded Mesa. */
 	LLVMTargetMachineRef		tm_low_priority[2]; /* at most 2 threads */
 };
 
 struct si_blend_color {
-	struct si_atom			atom;
 	struct pipe_blend_color		state;
 	bool				any_nonzeros;
 };
 
 struct si_sampler_view {
 	struct pipe_sampler_view	base;
         /* [0..7] = image descriptor
          * [4..7] = buffer descriptor */
 	uint32_t			state[8];
 	uint32_t			fmask_state[8];
@@ -587,21 +586,20 @@ struct si_samplers {
 	uint32_t			needs_color_decompress_mask;
 };
 
 struct si_images {
 	struct pipe_image_view		views[SI_NUM_IMAGES];
 	uint32_t			needs_color_decompress_mask;
 	unsigned			enabled_mask;
 };
 
 struct si_framebuffer {
-	struct si_atom			atom;
 	struct pipe_framebuffer_state	state;
 	unsigned			colorbuf_enabled_4bit;
 	unsigned			spi_shader_col_format;
 	unsigned			spi_shader_col_format_alpha;
 	unsigned			spi_shader_col_format_blend;
 	unsigned			spi_shader_col_format_blend_alpha;
 	ubyte				nr_samples:5; /* at most 16xAA */
 	ubyte				log_samples:3; /* at most 4 = 16xAA */
 	ubyte				compressed_cb_mask;
 	ubyte				uncompressed_cb_mask;
@@ -615,81 +613,74 @@ struct si_framebuffer {
 };
 
 struct si_signed_scissor {
 	int minx;
 	int miny;
 	int maxx;
 	int maxy;
 };
 
 struct si_scissors {
-	struct si_atom			atom;
 	unsigned			dirty_mask;
 	struct pipe_scissor_state	states[SI_MAX_VIEWPORTS];
 };
 
 struct si_viewports {
-	struct si_atom			atom;
 	unsigned			dirty_mask;
 	unsigned			depth_range_dirty_mask;
 	struct pipe_viewport_state	states[SI_MAX_VIEWPORTS];
 	struct si_signed_scissor	as_scissor[SI_MAX_VIEWPORTS];
 };
 
 struct si_clip_state {
-	struct si_atom			atom;
 	struct pipe_clip_state		state;
 	bool				any_nonzeros;
 };
 
 struct si_sample_locs {
-	struct si_atom		atom;
 	unsigned		nr_samples;
 };
 
 struct si_sample_mask {
-	struct si_atom		atom;
 	uint16_t		sample_mask;
 };
 
 struct si_streamout_target {
 	struct pipe_stream_output_target b;
 
 	/* The buffer where BUFFER_FILLED_SIZE is stored. */
 	struct r600_resource	*buf_filled_size;
 	unsigned		buf_filled_size_offset;
 	bool			buf_filled_size_valid;
 
 	unsigned		stride_in_dw;
 };
 
 struct si_streamout {
-	struct si_atom			begin_atom;
 	bool				begin_emitted;
 
 	unsigned			enabled_mask;
 	unsigned			num_targets;
 	struct si_streamout_target	*targets[PIPE_MAX_SO_BUFFERS];
 
 	unsigned			append_bitmask;
 	bool				suspended;
 
 	/* External state which comes from the vertex shader,
 	 * it must be set explicitly when binding a shader. */
 	uint16_t			*stride_in_dw;
 	unsigned			enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
 
 	/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
 	unsigned			hw_enabled_mask;
 
 	/* The state of VGT_STRMOUT_(CONFIG|EN). */
-	struct si_atom			enable_atom;
 	bool				streamout_enabled;
 	bool				prims_gen_query_enabled;
 	int				num_prims_gen_queries;
 };
 
 /* A shader state consists of the shader selector, which is a constant state
  * object shared by multiple contexts and shouldn't be modified, and
  * the current shader variant selected for this context.
  */
 struct si_shader_ctx_state {
@@ -818,32 +809,26 @@ struct si_context {
 	union si_state_atoms		atoms;
 	unsigned			dirty_atoms; /* mask */
 	/* PM4 states (precomputed immutable states) */
 	unsigned			dirty_states;
 	union si_state			queued;
 	union si_state			emitted;
 
 	/* Atom declarations. */
 	struct si_framebuffer		framebuffer;
 	struct si_sample_locs		msaa_sample_locs;
-	struct si_atom			db_render_state;
-	struct si_atom			dpbb_state;
-	struct si_atom			msaa_config;
 	struct si_sample_mask		sample_mask;
-	struct si_atom			cb_render_state;
 	unsigned			last_cb_target_mask;
 	struct si_blend_color		blend_color;
-	struct si_atom			clip_regs;
 	struct si_clip_state		clip_state;
 	struct si_shader_data		shader_pointers;
 	struct si_stencil_ref		stencil_ref;
-	struct si_atom			spi_map;
 	struct si_scissors		scissors;
 	struct si_streamout		streamout;
 	struct si_viewports		viewports;
 
 	/* Precomputed states. */
 	struct si_pm4_state		*init_config;
 	struct si_pm4_state		*init_config_gs_rings;
 	bool				init_config_has_vgt_flush;
 	struct si_pm4_state		*vgt_shader_config[4];
 
@@ -925,21 +910,20 @@ struct si_context {
 	int			last_gs_out_prim;
 	int			last_prim;
 	int			last_multi_vgt_param;
 	int			last_rast_prim;
 	unsigned		last_sc_line_stipple;
 	unsigned		current_vs_state;
 	unsigned		last_vs_state;
 	enum pipe_prim_type	current_rast_prim; /* primitive type after TES, GS */
 
 	/* Scratch buffer */
-	struct si_atom		scratch_state;
 	struct r600_resource	*scratch_buffer;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
 
 	struct r600_resource	*compute_scratch_buffer;
 
 	/* Emitted derived tessellation state. */
 	/* Local shader (VS), or HS if LS-HS are merged. */
 	struct si_shader	*last_ls;
 	struct si_shader_selector *last_tcs;
@@ -1021,21 +1005,20 @@ struct si_context {
 	unsigned			last_tex_ps_draw_ratio; /* for query */
 
 	/* Queries. */
 	/* Maintain the list of active queries for pausing between IBs. */
 	int				num_occlusion_queries;
 	int				num_perfect_occlusion_queries;
 	struct list_head		active_queries;
 	unsigned			num_cs_dw_queries_suspend;
 
 	/* Render condition. */
-	struct si_atom			render_cond_atom;
 	struct pipe_query		*render_cond;
 	unsigned			render_cond_mode;
 	bool				render_cond_invert;
 	bool				render_cond_force_off; /* for u_blitter */
 
 	/* Statistics gathering for the DCC enablement heuristic. It can't be
 	 * in r600_texture because r600_texture can be shared by multiple
 	 * contexts. This is for back buffers only. We shouldn't get too many
 	 * of those.
 	 *
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index 7d94194f300..d1b519e5508 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -1769,21 +1769,21 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 	pipe_resource_reference(&tmp_buffer, NULL);
 }
 
 static void si_render_condition(struct pipe_context *ctx,
 				struct pipe_query *query,
 				boolean condition,
 				enum pipe_render_cond_flag mode)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_query_hw *rquery = (struct si_query_hw *)query;
-	struct si_atom *atom = &sctx->render_cond_atom;
+	struct si_atom *atom = &sctx->atoms.s.render_cond;
 
 	if (query) {
 		bool needs_workaround = false;
 
 		/* There was a firmware regression in VI which causes successive
 		 * SET_PREDICATION packets to give the wrong answer for
 		 * non-inverted stream overflow predication.
 		 */
 		if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) ||
 		     (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
@@ -2046,21 +2046,21 @@ static int si_get_driver_query_group_info(struct pipe_screen *screen,
 
 void si_init_query_functions(struct si_context *sctx)
 {
 	sctx->b.create_query = si_create_query;
 	sctx->b.create_batch_query = si_create_batch_query;
 	sctx->b.destroy_query = si_destroy_query;
 	sctx->b.begin_query = si_begin_query;
 	sctx->b.end_query = si_end_query;
 	sctx->b.get_query_result = si_get_query_result;
 	sctx->b.get_query_result_resource = si_get_query_result_resource;
-	sctx->render_cond_atom.emit = si_emit_query_predication;
+	sctx->atoms.s.render_cond.emit = si_emit_query_predication;
 
 	if (((struct si_screen*)sctx->b.screen)->info.num_render_backends > 0)
 	    sctx->b.render_condition = si_render_condition;
 
 	LIST_INITHEAD(&sctx->active_queries);
 }
 
 void si_init_screen_query_functions(struct si_screen *sscreen)
 {
 	sscreen->b.get_driver_query_info = si_get_driver_query_info;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5cb11311a97..8841077cca1 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -28,35 +28,31 @@
 
 #include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "util/u_resource.h"
 #include "util/u_upload_mgr.h"
 
 /* Initialize an external atom (owned by ../radeon). */
 static void
-si_init_external_atom(struct si_context *sctx, struct si_atom *atom,
-		      struct si_atom **list_elem)
+si_init_external_atom(struct si_context *sctx, struct si_atom *atom)
 {
-	atom->id = list_elem - sctx->atoms.array;
-	*list_elem = atom;
+	atom->id = atom - sctx->atoms.array;
 }
 
 /* Initialize an atom owned by radeonsi.  */
 void si_init_atom(struct si_context *sctx, struct si_atom *atom,
-		  struct si_atom **list_elem,
 		  void (*emit_func)(struct si_context *ctx, struct si_atom *state))
 {
 	atom->emit = emit_func;
-	atom->id = list_elem - sctx->atoms.array;
-	*list_elem = atom;
+	atom->id = atom - sctx->atoms.array;
 }
 
 static unsigned si_map_swizzle(unsigned swizzle)
 {
 	switch (swizzle) {
 	case PIPE_SWIZZLE_Y:
 		return V_008F0C_SQ_SEL_Y;
 	case PIPE_SWIZZLE_Z:
 		return V_008F0C_SQ_SEL_Z;
 	case PIPE_SWIZZLE_W:
@@ -665,62 +661,62 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
 		return;
 
 	si_pm4_bind_state(sctx, blend, state);
 
 	if (!old_blend ||
 	    old_blend->cb_target_mask != blend->cb_target_mask ||
 	    old_blend->dual_src_blend != blend->dual_src_blend ||
 	    (old_blend->blend_enable_4bit != blend->blend_enable_4bit &&
 	     sctx->framebuffer.nr_samples >= 2 &&
 	     sctx->screen->dcc_msaa_allowed))
-		si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
 
 	if (!old_blend ||
 	    old_blend->cb_target_mask != blend->cb_target_mask ||
 	    old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
 	    old_blend->alpha_to_one != blend->alpha_to_one ||
 	    old_blend->dual_src_blend != blend->dual_src_blend ||
 	    old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
 	    old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
 		sctx->do_update_shaders = true;
 
 	if (sctx->screen->dpbb_allowed &&
 	    (!old_blend ||
 	     old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
 	     old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
 	     old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
-		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
 
 	if (sctx->screen->has_out_of_order_rast &&
 	    (!old_blend ||
 	     (old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
 	      old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
 	      old_blend->commutative_4bit != blend->commutative_4bit ||
 	      old_blend->logicop_enable != blend->logicop_enable)))
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 }
 
 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
 }
 
 static void si_set_blend_color(struct pipe_context *ctx,
 			       const struct pipe_blend_color *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	static const struct pipe_blend_color zeros;
 
 	sctx->blend_color.state = *state;
 	sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
-	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color);
 }
 
 static void si_emit_blend_color(struct si_context *sctx, struct si_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
 }
 
@@ -733,21 +729,21 @@ static void si_set_clip_state(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct pipe_constant_buffer cb;
 	static const struct pipe_clip_state zeros;
 
 	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
 		return;
 
 	sctx->clip_state.state = *state;
 	sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
-	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state);
 
 	cb.buffer = NULL;
 	cb.user_buffer = state->ucp;
 	cb.buffer_offset = 0;
 	cb.buffer_size = 4*4*8;
 	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
 	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 static void si_emit_clip_state(struct si_context *sctx, struct si_atom *atom)
@@ -999,52 +995,52 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_state_rasterizer *old_rs =
 		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
 	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
 
 	if (!state)
 		return;
 
 	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
-		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 		/* Update the small primitive filter workaround if necessary. */
 		if (sctx->screen->has_msaa_sample_loc_bug &&
 		    sctx->framebuffer.nr_samples > 1)
-			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
 	}
 
 	sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
 	sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
 
 	si_pm4_bind_state(sctx, rasterizer, rs);
 	si_update_poly_offset_state(sctx);
 
 	if (!old_rs ||
 	    (old_rs->scissor_enable != rs->scissor_enable ||
 	     old_rs->line_width != rs->line_width ||
 	     old_rs->max_point_size != rs->max_point_size)) {
 		sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-		si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
 	}
 
 	if (!old_rs ||
 	    old_rs->clip_halfz != rs->clip_halfz) {
 		sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-		si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);
 	}
 
 	if (!old_rs ||
 	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
 	    old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
-		si_mark_atom_dirty(sctx, &sctx->clip_regs);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
 
 	sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
 		rs->line_stipple_enable;
 
 	if (!old_rs ||
 	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
 	    old_rs->rasterizer_discard != rs->rasterizer_discard ||
 	    old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
 	    old_rs->flatshade != rs->flatshade ||
 	    old_rs->two_side != rs->two_side ||
@@ -1091,21 +1087,21 @@ static void si_emit_stencil_ref(struct si_context *sctx, struct si_atom *atom)
 
 static void si_set_stencil_ref(struct pipe_context *ctx,
 			       const struct pipe_stencil_ref *state)
 {
         struct si_context *sctx = (struct si_context *)ctx;
 
 	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
 		return;
 
 	sctx->stencil_ref.state = *state;
-	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
 }
 
 
 /*
  * DSA
  */
 
 static uint32_t si_translate_stencil_op(int s_op)
 {
 	switch (s_op) {
@@ -1279,38 +1275,38 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
         struct si_state_dsa *dsa = state;
 
         if (!state)
                 return;
 
 	si_pm4_bind_state(sctx, dsa, dsa);
 
 	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
 		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
 		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
-		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
 	}
 
 	if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
 		sctx->do_update_shaders = true;
 
 	if (sctx->screen->dpbb_allowed &&
 	    (!old_dsa ||
 	     (old_dsa->depth_enabled != dsa->depth_enabled ||
 	      old_dsa->stencil_enabled != dsa->stencil_enabled ||
 	      old_dsa->db_can_write != dsa->db_can_write)))
-		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
 
 	if (sctx->screen->has_out_of_order_rast &&
 	    (!old_dsa ||
 	     memcmp(old_dsa->order_invariance, dsa->order_invariance,
 		    sizeof(old_dsa->order_invariance))))
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 }
 
 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
 }
 
 static void *si_create_db_flush_dsa(struct si_context *sctx)
 {
@@ -1330,33 +1326,33 @@ static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
 		sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
 		sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
 	} else {
 		sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
 		sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
 	}
 
 	/* Occlusion queries. */
 	if (sctx->occlusion_queries_disabled != !enable) {
 		sctx->occlusion_queries_disabled = !enable;
-		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 	}
 }
 
 void si_set_occlusion_query_state(struct si_context *sctx,
 				  bool old_perfect_enable)
 {
-	si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 	bool perfect_enable = sctx->num_perfect_occlusion_queries != 0;
 
 	if (perfect_enable != old_perfect_enable)
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 }
 
 void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
 {
 	st->saved_compute = sctx->cs_shader_state.program;
 
 	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
 	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
 }
 
@@ -2888,38 +2884,38 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		}
 
 		if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
 			sctx->framebuffer.DB_has_shader_readable_metadata = true;
 
 		si_context_add_resource_size(sctx, surf->base.texture);
 	}
 
 	si_update_ps_colorbuf0_slot(sctx);
 	si_update_poly_offset_state(sctx);
-	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
-	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
 
 	if (sctx->screen->dpbb_allowed)
-		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
 
 	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 
 	if (sctx->screen->has_out_of_order_rast &&
 	    (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
 	     !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
 	     (zstex && zstex->surface.has_stencil != old_has_stencil)))
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 
 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
-		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 		/* Set sample locations as fragment shader constants. */
 		switch (sctx->framebuffer.nr_samples) {
 		case 1:
 			constbuf.user_buffer = sctx->sample_locations_1x;
 			break;
 		case 2:
 			constbuf.user_buffer = sctx->sample_locations_2x;
 			break;
 		case 4:
@@ -2932,21 +2928,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 			constbuf.user_buffer = sctx->sample_locations_16x;
 			break;
 		default:
 			PRINT_ERR("Requested an invalid number of samples %i.\n",
 				 sctx->framebuffer.nr_samples);
 			assert(0);
 		}
 		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
 		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
 
-		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
 	}
 
 	sctx->do_update_shaders = true;
 
 	if (!sctx->decompression_enabled) {
 		/* Prevent textures decompression when the framebuffer state
 		 * changes come from the decompression passes themselves.
 		 */
 		sctx->need_check_render_feedback = true;
 	}
@@ -3409,23 +3405,23 @@ static void si_emit_msaa_config(struct si_context *sctx, struct si_atom *atom)
 	/* GFX9: Flush DFSM when the AA mode changes. */
 	if (sctx->screen->dfsm_allowed) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
 	}
 }
 
 void si_update_ps_iter_samples(struct si_context *sctx)
 {
 	if (sctx->framebuffer.nr_samples > 1)
-		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 	if (sctx->screen->dpbb_allowed)
-		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
 }
 
 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	if (sctx->ps_iter_samples == min_samples)
 		return;
 
 	sctx->ps_iter_samples = min_samples;
@@ -4167,21 +4163,21 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 }
 
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
 		return;
 
 	sctx->sample_mask.sample_mask = sample_mask;
-	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask);
 }
 
 static void si_emit_sample_mask(struct si_context *sctx, struct si_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned mask = sctx->sample_mask.sample_mask;
 
 	/* Needed for line and polygon smoothing as well as for the Polaris
 	 * small primitive filter. We expect the state tracker to take care of
 	 * this for us.
@@ -4523,37 +4519,37 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 	memset(&blend, 0, sizeof(blend));
 	blend.independent_blend_enable = true;
 	blend.rt[0].colormask = 0xf;
 	return si_create_blend_state_mode(&sctx->b, &blend, mode);
 }
 
 static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
-	si_init_external_atom(sctx, &sctx->render_cond_atom, &sctx->atoms.s.render_cond);
-	si_init_external_atom(sctx, &sctx->streamout.begin_atom, &sctx->atoms.s.streamout_begin);
-	si_init_external_atom(sctx, &sctx->streamout.enable_atom, &sctx->atoms.s.streamout_enable);
-	si_init_external_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors);
-	si_init_external_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports);
-
-	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
-	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
-	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
-	si_init_atom(sctx, &sctx->dpbb_state, &sctx->atoms.s.dpbb_state, si_emit_dpbb_state);
-	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
-	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
-	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
-	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
-	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
-	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
-	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
+	si_init_external_atom(sctx, &sctx->atoms.s.render_cond);
+	si_init_external_atom(sctx, &sctx->atoms.s.streamout_begin);
+	si_init_external_atom(sctx, &sctx->atoms.s.streamout_enable);
+	si_init_external_atom(sctx, &sctx->atoms.s.scissors);
+	si_init_external_atom(sctx, &sctx->atoms.s.viewports);
+
+	si_init_atom(sctx, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
+	si_init_atom(sctx, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+	si_init_atom(sctx, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
+	si_init_atom(sctx, &sctx->atoms.s.dpbb_state, si_emit_dpbb_state);
+	si_init_atom(sctx, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
+	si_init_atom(sctx, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
+	si_init_atom(sctx, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
+	si_init_atom(sctx, &sctx->atoms.s.blend_color, si_emit_blend_color);
+	si_init_atom(sctx, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
+	si_init_atom(sctx, &sctx->atoms.s.clip_state, si_emit_clip_state);
+	si_init_atom(sctx, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
 
 	sctx->b.create_blend_state = si_create_blend_state;
 	sctx->b.bind_blend_state = si_bind_blend_state;
 	sctx->b.delete_blend_state = si_delete_blend_state;
 	sctx->b.set_blend_color = si_set_blend_color;
 
 	sctx->b.create_rasterizer_state = si_create_rs_state;
 	sctx->b.bind_rasterizer_state = si_bind_rs_state;
 	sctx->b.delete_rasterizer_state = si_delete_rs_state;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index ebdb44694e4..6c9899d9468 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -125,21 +125,20 @@ struct si_state_dsa {
 	ubyte				alpha_func:3;
 	bool				depth_enabled:1;
 	bool				depth_write_enabled:1;
 	bool				stencil_enabled:1;
 	bool				stencil_write_enabled:1;
 	bool				db_can_write:1;
 
 };
 
 struct si_stencil_ref {
-	struct si_atom			atom;
 	struct pipe_stencil_ref		state;
 	struct si_dsa_stencil_ref_part	dsa_part;
 };
 
 struct si_vertex_elements
 {
 	uint32_t			instance_divisors[SI_MAX_ATTRIBS];
 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
 	uint16_t			src_offset[SI_MAX_ATTRIBS];
 	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
@@ -171,47 +170,46 @@ union si_state {
 		struct si_pm4_state		*ps;
 	} named;
 	struct si_pm4_state	*array[0];
 };
 
 #define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
 
 union si_state_atoms {
 	struct {
 		/* The order matters. */
-		struct si_atom *render_cond;
-		struct si_atom *streamout_begin;
-		struct si_atom *streamout_enable; /* must be after streamout_begin */
-		struct si_atom *framebuffer;
-		struct si_atom *msaa_sample_locs;
-		struct si_atom *db_render_state;
-		struct si_atom *dpbb_state;
-		struct si_atom *msaa_config;
-		struct si_atom *sample_mask;
-		struct si_atom *cb_render_state;
-		struct si_atom *blend_color;
-		struct si_atom *clip_regs;
-		struct si_atom *clip_state;
-		struct si_atom *shader_pointers;
-		struct si_atom *scissors;
-		struct si_atom *viewports;
-		struct si_atom *stencil_ref;
-		struct si_atom *spi_map;
-		struct si_atom *scratch_state;
+		struct si_atom render_cond;
+		struct si_atom streamout_begin;
+		struct si_atom streamout_enable; /* must be after streamout_begin */
+		struct si_atom framebuffer;
+		struct si_atom msaa_sample_locs;
+		struct si_atom db_render_state;
+		struct si_atom dpbb_state;
+		struct si_atom msaa_config;
+		struct si_atom sample_mask;
+		struct si_atom cb_render_state;
+		struct si_atom blend_color;
+		struct si_atom clip_regs;
+		struct si_atom clip_state;
+		struct si_atom shader_pointers;
+		struct si_atom scissors;
+		struct si_atom viewports;
+		struct si_atom stencil_ref;
+		struct si_atom spi_map;
+		struct si_atom scratch_state;
 	} s;
-	struct si_atom *array[0];
+	struct si_atom array[0];
 };
 
 #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct si_atom*))
 
 struct si_shader_data {
-	struct si_atom		atom;
 	uint32_t		sh_base[SI_NUM_SHADERS];
 };
 
 /* Private read-write buffer slots. */
 enum {
 	SI_ES_RING_ESGS,
 	SI_GS_RING_ESGS,
 
 	SI_RING_GSVS,
 
@@ -377,21 +375,20 @@ bool si_bindless_descriptor_can_reclaim_slab(void *priv,
 struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
 						  unsigned entry_size,
 						  unsigned group_index);
 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 		      uint64_t old_va);
 /* si_state.c */
 struct si_shader_selector;
 
 void si_init_atom(struct si_context *sctx, struct si_atom *atom,
-		  struct si_atom **list_elem,
 		  void (*emit_func)(struct si_context *ctx, struct si_atom *state));
 void si_init_state_functions(struct si_context *sctx);
 void si_init_screen_state_functions(struct si_screen *sscreen);
 void
 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
 			  enum pipe_format format,
 			  unsigned offset, unsigned size,
 			  uint32_t *state);
 void
 si_make_texture_descriptor(struct si_screen *screen,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 852b6b0e977..531ed106d6a 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1174,21 +1174,21 @@ static void si_get_draw_start_count(struct si_context *sctx,
 		*count = info->count;
 	}
 }
 
 static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
 			       unsigned skip_atom_mask)
 {
 	/* Emit state atoms. */
 	unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
 	while (mask) {
-		struct si_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+		struct si_atom *atom = &sctx->atoms.array[u_bit_scan(&mask)];
 
 		atom->emit(sctx, atom);
 	}
 	sctx->dirty_atoms &= skip_atom_mask;
 
 	/* Emit states. */
 	mask = sctx->dirty_states;
 	while (mask) {
 		unsigned i = u_bit_scan(&mask);
 		struct si_pm4_state *state = sctx->queued.array[i];
@@ -1248,21 +1248,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		return;
 	}
 
 	/* Recompute and re-emit the texture resource states if needed. */
 	dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter);
 	if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) {
 		sctx->last_dirty_tex_counter = dirty_tex_counter;
 		sctx->framebuffer.dirty_cbufs |=
 			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
 		sctx->framebuffer.dirty_zsbuf = true;
-		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
 		si_update_all_texture_descriptors(sctx);
 	}
 
 	si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
 
 	/* Set the rasterization primitive type.
 	 *
 	 * This must be done after si_decompress_textures, which can call
 	 * draw_vbo recursively, and before si_update_shaders, which uses
 	 * current_rast_prim for this draw_vbo call. */
@@ -1274,21 +1274,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		else
 			rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 	} else
 		rast_prim = info->mode;
 
 	if (rast_prim != sctx->current_rast_prim) {
 		bool old_is_poly = sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES;
 		bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES;
 		if (old_is_poly != new_is_poly) {
 			sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-			si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
 		}
 
 		sctx->current_rast_prim = rast_prim;
 		sctx->do_update_shaders = true;
 	}
 
 	if (sctx->tes_shader.cso &&
 	    sctx->screen->has_ls_vgpr_init_bug) {
 		/* Determine whether the LS VGPR fix should be applied.
 		 *
@@ -1410,50 +1410,50 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	 * need_cs_space flush before we add buffers to the buffer list.
 	 */
 	if (!si_upload_vertex_buffer_descriptors(sctx))
 		return;
 
 	/* Vega10/Raven scissor bug workaround. This must be done before VPORT
 	 * scissor registers are changed. There is also a more efficient but
 	 * more involved alternative workaround.
 	 */
 	if ((sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
-	    si_is_atom_dirty(sctx, &sctx->scissors.atom)) {
+	    si_is_atom_dirty(sctx, &sctx->atoms.s.scissors)) {
 		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
 		si_emit_cache_flush(sctx);
 	}
 
 	/* Use optimal packet order based on whether we need to sync the pipeline. */
 	if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 				      SI_CONTEXT_FLUSH_AND_INV_DB |
 				      SI_CONTEXT_PS_PARTIAL_FLUSH |
 				      SI_CONTEXT_CS_PARTIAL_FLUSH))) {
 		/* If we have to wait for idle, set all states first, so that all
 		 * SET packets are processed in parallel with previous draw calls.
 		 * Then draw and prefetch at the end. This ensures that the time
 		 * the CUs are idle is very short.
 		 */
 		unsigned masked_atoms = 0;
 
 		if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
-			masked_atoms |= 1u << sctx->render_cond_atom.id;
+			masked_atoms |= 1u << sctx->atoms.s.render_cond.id;
 
 		if (!si_upload_graphics_shader_descriptors(sctx))
 			return;
 
 		/* Emit all states except possibly render condition. */
 		si_emit_all_states(sctx, info, masked_atoms);
 		si_emit_cache_flush(sctx);
 		/* <-- CUs are idle here. */
 
-		if (si_is_atom_dirty(sctx, &sctx->render_cond_atom))
-			sctx->render_cond_atom.emit(sctx, NULL);
+		if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
+			sctx->atoms.s.render_cond.emit(sctx, NULL);
 		sctx->dirty_atoms = 0;
 
 		si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
 		/* <-- CUs are busy here. */
 
 		/* Start prefetches after the draw has been started. Both will run
 		 * in parallel, but starting the draw first is more important.
 		 */
 		if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
 			cik_emit_prefetch_L2(sctx, false);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index db44a4967ba..747a2abc7e1 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2265,21 +2265,21 @@ static void si_update_clip_regs(struct si_context *sctx,
 	    (!old_hw_vs ||
 	     old_hw_vs->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] !=
 	     next_hw_vs->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] ||
 	     old_hw_vs->pa_cl_vs_out_cntl != next_hw_vs->pa_cl_vs_out_cntl ||
 	     old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
 	     old_hw_vs->culldist_mask != next_hw_vs->culldist_mask ||
 	     !old_hw_vs_variant ||
 	     !next_hw_vs_variant ||
 	     old_hw_vs_variant->key.opt.clip_disable !=
 	     next_hw_vs_variant->key.opt.clip_disable))
-		si_mark_atom_dirty(sctx, &sctx->clip_regs);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
 }
 
 static void si_update_common_shader_state(struct si_context *sctx)
 {
 	sctx->uses_bindless_samplers =
 		si_shader_uses_bindless_samplers(sctx->vs_shader.cso)  ||
 		si_shader_uses_bindless_samplers(sctx->gs_shader.cso)  ||
 		si_shader_uses_bindless_samplers(sctx->ps_shader.cso)  ||
 		si_shader_uses_bindless_samplers(sctx->tcs_shader.cso) ||
 		si_shader_uses_bindless_samplers(sctx->tes_shader.cso);
@@ -2421,28 +2421,28 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	sctx->ps_shader.cso = sel;
 	sctx->ps_shader.current = sel ? sel->first_variant : NULL;
 
 	si_update_common_shader_state(sctx);
 	if (sel) {
 		if (sctx->ia_multi_vgt_param_key.u.uses_tess)
 			si_update_tess_uses_prim_id(sctx);
 
 		if (!old_sel ||
 		    old_sel->info.colors_written != sel->info.colors_written)
-			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
 
 		if (sctx->screen->has_out_of_order_rast &&
 		    (!old_sel ||
 		     old_sel->info.writes_memory != sel->info.writes_memory ||
 		     old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] !=
 		     sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]))
-			si_mark_atom_dirty(sctx, &sctx->msaa_config);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 	}
 	si_set_active_descriptors_for_shader(sctx, sel);
 	si_update_ps_colorbuf0_slot(sctx);
 }
 
 static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
 {
 	if (shader->is_optimized) {
 		util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
 				    &shader->ready);
@@ -2973,38 +2973,38 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 			r600_resource_reference(&sctx->scratch_buffer, NULL);
 
 			sctx->scratch_buffer = (struct r600_resource*)
 				si_aligned_buffer_create(&sctx->screen->b,
 							   SI_RESOURCE_FLAG_UNMAPPABLE,
 							   PIPE_USAGE_DEFAULT,
 							   scratch_needed_size, 256);
 			if (!sctx->scratch_buffer)
 				return false;
 
-			si_mark_atom_dirty(sctx, &sctx->scratch_state);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);
 			si_context_add_resource_size(sctx,
 						     &sctx->scratch_buffer->b.b);
 		}
 
 		if (!si_update_scratch_relocs(sctx))
 			return false;
 	}
 
 	/* The LLVM shader backend should be reporting aligned scratch_sizes. */
 	assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
 		"scratch size should already be aligned correctly.");
 
 	spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
 			   S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
 	if (spi_tmpring_size != sctx->spi_tmpring_size) {
 		sctx->spi_tmpring_size = spi_tmpring_size;
-		si_mark_atom_dirty(sctx, &sctx->scratch_state);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);
 	}
 	return true;
 }
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
 {
 	assert(!sctx->tess_rings);
 
 	/* The address must be aligned to 2^19, because the shader only
 	 * receives the high 13 bits.
@@ -3227,65 +3227,65 @@ bool si_update_shaders(struct si_context *sctx)
 			return false;
 	} else {
 		si_pm4_bind_state(sctx, gs, NULL);
 		if (sctx->chip_class <= VI)
 			si_pm4_bind_state(sctx, es, NULL);
 	}
 
 	si_update_vgt_shader_config(sctx);
 
 	if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)
-		si_mark_atom_dirty(sctx, &sctx->clip_regs);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
 
 	if (sctx->ps_shader.cso) {
 		unsigned db_shader_control;
 
 		r = si_shader_select(ctx, &sctx->ps_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
 
 		db_shader_control =
 			sctx->ps_shader.cso->db_shader_control |
 			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
 
 		if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
 		    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
 		    sctx->flatshade != rs->flatshade) {
 			sctx->sprite_coord_enable = rs->sprite_coord_enable;
 			sctx->flatshade = rs->flatshade;
-			si_mark_atom_dirty(sctx, &sctx->spi_map);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map);
 		}
 
 		if (sctx->screen->rbplus_allowed &&
 		    si_pm4_state_changed(sctx, ps) &&
 		    (!old_ps ||
 		     old_spi_shader_col_format !=
 		     sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
-			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
 
 		if (sctx->ps_db_shader_control != db_shader_control) {
 			sctx->ps_db_shader_control = db_shader_control;
-			si_mark_atom_dirty(sctx, &sctx->db_render_state);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 			if (sctx->screen->dpbb_allowed)
-				si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+				si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
 		}
 
 		if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
 			sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
-			si_mark_atom_dirty(sctx, &sctx->msaa_config);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 
 			if (sctx->chip_class == SI)
-				si_mark_atom_dirty(sctx, &sctx->db_render_state);
+				si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
 			if (sctx->framebuffer.nr_samples <= 1)
-				si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+				si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
 		}
 	}
 
 	if (si_pm4_state_enabled_and_changed(sctx, ls) ||
 	    si_pm4_state_enabled_and_changed(sctx, hs) ||
 	    si_pm4_state_enabled_and_changed(sctx, es) ||
 	    si_pm4_state_enabled_and_changed(sctx, gs) ||
 	    si_pm4_state_enabled_and_changed(sctx, vs) ||
 	    si_pm4_state_enabled_and_changed(sctx, ps)) {
 		if (!si_update_spi_tmpring_size(sctx))
@@ -3402,22 +3402,22 @@ void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type,
 			 ureg_scalar(instance_id, TGSI_SWIZZLE_X));
 	}
 	ureg_END(ureg);
 
 	*vs = ureg_create_shader_and_destroy(ureg, &sctx->b);
 	return *vs;
 }
 
 void si_init_shader_functions(struct si_context *sctx)
 {
-	si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
-	si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state,
+	si_init_atom(sctx, &sctx->atoms.s.spi_map, si_emit_spi_map);
+	si_init_atom(sctx, &sctx->atoms.s.scratch_state,
 		     si_emit_scratch_state);
 
 	sctx->b.create_vs_state = si_create_shader_selector;
 	sctx->b.create_tcs_state = si_create_shader_selector;
 	sctx->b.create_tes_state = si_create_shader_selector;
 	sctx->b.create_gs_state = si_create_shader_selector;
 	sctx->b.create_fs_state = si_create_shader_selector;
 
 	sctx->b.bind_vs_state = si_bind_vs_shader;
 	sctx->b.bind_tcs_state = si_bind_tcs_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 482946eba2b..328a0cc7a7f 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -76,21 +76,21 @@ static void si_so_target_destroy(struct pipe_context *ctx,
 	pipe_resource_reference(&t->b.buffer, NULL);
 	r600_resource_reference(&t->buf_filled_size, NULL);
 	FREE(t);
 }
 
 void si_streamout_buffers_dirty(struct si_context *sctx)
 {
 	if (!sctx->streamout.enabled_mask)
 		return;
 
-	si_mark_atom_dirty(sctx, &sctx->streamout.begin_atom);
+	si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_begin);
 	si_set_streamout_enable(sctx, true);
 }
 
 static void si_set_streamout_targets(struct pipe_context *ctx,
 				     unsigned num_targets,
 				     struct pipe_stream_output_target **targets,
 				     const unsigned *offsets)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_buffer_resources *buffers = &sctx->rw_buffers;
@@ -162,21 +162,21 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 		si_so_target_reference(&sctx->streamout.targets[i], NULL);
 
 	sctx->streamout.enabled_mask = enabled_mask;
 	sctx->streamout.num_targets = num_targets;
 	sctx->streamout.append_bitmask = append_bitmask;
 
 	/* Update dirty state bits. */
 	if (num_targets) {
 		si_streamout_buffers_dirty(sctx);
 	} else {
-		si_set_atom_dirty(sctx, &sctx->streamout.begin_atom, false);
+		si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
 		si_set_streamout_enable(sctx, false);
 	}
 
 	/* Set the shader resources.*/
 	for (i = 0; i < num_targets; i++) {
 		bufidx = SI_VS_STREAMOUT_BUF0 + i;
 
 		if (targets[i]) {
 			struct pipe_resource *buffer = targets[i]->buffer;
 			uint64_t va = r600_resource(buffer)->gpu_address;
@@ -378,38 +378,38 @@ static void si_set_streamout_enable(struct si_context *sctx, bool enable)
 
 	sctx->streamout.streamout_enabled = enable;
 
 	sctx->streamout.hw_enabled_mask = sctx->streamout.enabled_mask |
 					  (sctx->streamout.enabled_mask << 4) |
 					  (sctx->streamout.enabled_mask << 8) |
 					  (sctx->streamout.enabled_mask << 12);
 
 	if ((old_strmout_en != si_get_strmout_en(sctx)) ||
             (old_hw_enabled_mask != sctx->streamout.hw_enabled_mask))
-		si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
 }
 
 void si_update_prims_generated_query_state(struct si_context *sctx,
 					   unsigned type, int diff)
 {
 	if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
 		bool old_strmout_en = si_get_strmout_en(sctx);
 
 		sctx->streamout.num_prims_gen_queries += diff;
 		assert(sctx->streamout.num_prims_gen_queries >= 0);
 
 		sctx->streamout.prims_gen_query_enabled =
 			sctx->streamout.num_prims_gen_queries != 0;
 
 		if (old_strmout_en != si_get_strmout_en(sctx))
-			si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
+			si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
 	}
 }
 
 void si_init_streamout_functions(struct si_context *sctx)
 {
 	sctx->b.create_stream_output_target = si_create_so_target;
 	sctx->b.stream_output_target_destroy = si_so_target_destroy;
 	sctx->b.set_stream_output_targets = si_set_streamout_targets;
-	sctx->streamout.begin_atom.emit = si_emit_streamout_begin;
-	sctx->streamout.enable_atom.emit = si_emit_streamout_enable;
+	sctx->atoms.s.streamout_begin.emit = si_emit_streamout_begin;
+	sctx->atoms.s.streamout_enable.emit = si_emit_streamout_enable;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index 5fc57647245..a84f0e1f9f2 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -37,21 +37,21 @@ static void si_set_scissor_states(struct pipe_context *pctx,
 	int i;
 
 	for (i = 0; i < num_scissors; i++)
 		ctx->scissors.states[start_slot + i] = state[i];
 
 	if (!ctx->queued.named.rasterizer ||
 	    !ctx->queued.named.rasterizer->scissor_enable)
 		return;
 
 	ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
-	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 }
 
 /* Since the guard band disables clipping, we have to clip per-pixel
  * using a scissor.
  */
 static void si_get_scissor_from_viewport(struct si_context *ctx,
 					 const struct pipe_viewport_state *vp,
 					 struct si_signed_scissor *scissor)
 {
 	float tmp, minx, miny, maxx, maxy;
@@ -272,22 +272,22 @@ static void si_set_viewport_states(struct pipe_context *pctx,
 
 		ctx->viewports.states[index] = state[i];
 		si_get_scissor_from_viewport(ctx, &state[i],
 					     &ctx->viewports.as_scissor[index]);
 	}
 
 	mask = ((1 << num_viewports) - 1) << start_slot;
 	ctx->viewports.dirty_mask |= mask;
 	ctx->viewports.depth_range_dirty_mask |= mask;
 	ctx->scissors.dirty_mask |= mask;
-	si_mark_atom_dirty(ctx, &ctx->viewports.atom);
-	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
+	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 }
 
 static void si_emit_one_viewport(struct si_context *ctx,
 				 struct pipe_viewport_state *state)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 
 	radeon_emit(cs, fui(state->scale[0]));
 	radeon_emit(cs, fui(state->translate[0]));
 	radeon_emit(cs, fui(state->scale[1]));
@@ -408,35 +408,35 @@ void si_update_vs_viewport_state(struct si_context *ctx)
 		return;
 
 	/* When the VS disables clipping and viewport transformation. */
 	vs_window_space =
 		info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
 
 	if (ctx->vs_disables_clipping_viewport != vs_window_space) {
 		ctx->vs_disables_clipping_viewport = vs_window_space;
 		ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 		ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-		si_mark_atom_dirty(ctx, &ctx->scissors.atom);
-		si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
+		si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 	}
 
 	/* Viewport index handling. */
 	ctx->vs_writes_viewport_index = info->writes_viewport_index;
 	if (!ctx->vs_writes_viewport_index)
 		return;
 
 	if (ctx->scissors.dirty_mask)
-	    si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+	    si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 
 	if (ctx->viewports.dirty_mask ||
 	    ctx->viewports.depth_range_dirty_mask)
-	    si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+	    si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 }
 
 void si_init_viewport_functions(struct si_context *ctx)
 {
-	ctx->scissors.atom.emit = si_emit_scissors;
-	ctx->viewports.atom.emit = si_emit_viewport_states;
+	ctx->atoms.s.scissors.emit = si_emit_scissors;
+	ctx->atoms.s.viewports.emit = si_emit_viewport_states;
 
 	ctx->b.set_scissor_states = si_set_scissor_states;
 	ctx->b.set_viewport_states = si_set_viewport_states;
 }
-- 
2.17.0