merging with MIB and OpenMandriva, osmesa enabled

This commit is contained in:
Alexander Khryukin 2013-01-14 18:11:59 +04:00
parent 440983d4f7
commit ab42a9892b
10 changed files with 2301 additions and 319 deletions

14
0900-Mips-support.patch Normal file
View file

@ -0,0 +1,14 @@
Index: Mesa-6.5.2/include/GL/internal/sarea.h
===================================================================
--- Mesa-6.5.2.orig/include/GL/internal/sarea.h
+++ Mesa-6.5.2/include/GL/internal/sarea.h
@@ -46,6 +46,8 @@
#define SAREA_MAX 0x2000
#elif defined(__ia64__)
#define SAREA_MAX 0x10000 /* 64kB */
+#elif defined(__mips__)
+#define SAREA_MAX getpagesize()
#else
/* Intel 830M driver needs at least 8k SAREA */
#define SAREA_MAX 0x2000

6
README.xvmc Normal file
View file

@ -0,0 +1,6 @@
Starting with lib64dri-drivers and libdri-drivers version 8.0.2, with the
regular dri drivers, we also install the backends supporting XvMC for some
Gallium drivers (softpipe, r300, r600 and nouveau).
To actually enable that support, however, you will have to manually edit the
XvMCConfig file (found in the /etc/X11/ directory) and uncomment the
line corresponding to the video hardware you are actually using.

24
XvMCConfig Normal file
View file

@ -0,0 +1,24 @@
# To enable the experimental XvMC support for Gallium drivers, you will have to
# tell the Mesa core where to find the needed libraries.
# Uncomment a line below, according the driver you are actually using and the
# bitness of your Mandriva setup.
# Software driver - 64 bit
# /usr/lib64/libXvMCsoftpipe.so.1.0
# Software driver - 32 bit
# /usr/lib/libXvMCsoftpipe.so.1.0
# Driver for Radeon r300-r500 video cards - 64 bit
# /usr/lib64/libXvMCr300.so.1.0
# Driver for Radeon r300-r500 video cards - 32 bit
# /usr/lib/libXvMCr300.so.1.0
# Driver for Radeon r600+ video cards - 64 bit
# /usr/lib64/libXvMCr600.so.1.0
# Driver for Radeon r600+ video cards - 32 bit
# /usr/lib/libXvMCr600.so.1.0
# Driver for Nvidia (nouveau) - 64 bit
# /usr/lib64/libXvMCnouveau.so.1.0
# Driver for Nvidia (nouveau) - 32 bit
# /usr/lib/libXvMCnouveau.so.1.0

View file

@ -0,0 +1,28 @@
From 63c3a051cd8c9de665fd2d1af61eee89e4c66537 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 24 Sep 2012 21:30:21 +0000
Subject: build: Order src/Makefile correctly
---
diff --git a/configure.ac b/configure.ac
index 0e66823..f96cffc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1963,6 +1963,7 @@ CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS"
dnl Substitute the config
AC_CONFIG_FILES([configs/current
Makefile
+ src/Makefile
src/egl/Makefile
src/egl/drivers/Makefile
src/egl/drivers/dri2/Makefile
@@ -1988,7 +1989,6 @@ AC_CONFIG_FILES([configs/current
src/glx/Makefile
src/glx/tests/Makefile
src/gtest/Makefile
- src/Makefile
src/mapi/es1api/Makefile
src/mapi/es1api/glesv1_cm.pc
src/mapi/es2api/Makefile
--
cgit v0.9.0.2-2-gbebe

1050
mesa.spec

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,70 @@
diff -rupN Mesa-9.0.old/src/gallium/drivers/r600/r600_buffer.c Mesa-9.0/src/gallium/drivers/r600/r600_buffer.c
--- Mesa-9.0.old/src/gallium/drivers/r600/r600_buffer.c 2012-09-01 01:36:09.000000000 +0200
+++ Mesa-9.0/src/gallium/drivers/r600/r600_buffer.c 2012-11-01 17:11:47.000000000 +0100
@@ -201,29 +201,31 @@ bool r600_init_resource(struct r600_scre
{
uint32_t initial_domain, domains;
- /* Staging resources particpate in transfers and blits only
- * and are used for uploads and downloads from regular
- * resources. We generate them internally for some transfers.
- */
- if (usage == PIPE_USAGE_STAGING) {
+ switch(usage) {
+ case PIPE_USAGE_STAGING:
+ /* Staging resources participate in transfers, i.e. are used
+ * for uploads and downloads from regular resources.
+ * We generate them internally for some transfers.
+ */
+ initial_domain = RADEON_DOMAIN_GTT;
domains = RADEON_DOMAIN_GTT;
+ break;
+ case PIPE_USAGE_DYNAMIC:
+ case PIPE_USAGE_STREAM:
+ /* Default to GTT, but allow the memory manager to move it to VRAM. */
initial_domain = RADEON_DOMAIN_GTT;
- } else {
domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
-
- switch(usage) {
- case PIPE_USAGE_DYNAMIC:
- case PIPE_USAGE_STREAM:
- case PIPE_USAGE_STAGING:
- initial_domain = RADEON_DOMAIN_GTT;
- break;
- case PIPE_USAGE_DEFAULT:
- case PIPE_USAGE_STATIC:
- case PIPE_USAGE_IMMUTABLE:
- default:
- initial_domain = RADEON_DOMAIN_VRAM;
- break;
- }
+ break;
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_STATIC:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ /* Don't list GTT here, because the memory manager would put some
+ * resources to GTT no matter what the initial domain is.
+ * Not listing GTT in the domains improves performance a lot. */
+ initial_domain = RADEON_DOMAIN_VRAM;
+ domains = RADEON_DOMAIN_VRAM;
+ break;
}
res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, initial_domain);
diff -rupN Mesa-9.0.old/src/gallium/drivers/r600/r600_texture.c Mesa-9.0/src/gallium/drivers/r600/r600_texture.c
--- Mesa-9.0.old/src/gallium/drivers/r600/r600_texture.c 2012-09-28 18:11:04.000000000 +0200
+++ Mesa-9.0/src/gallium/drivers/r600/r600_texture.c 2012-11-01 17:13:10.000000000 +0100
@@ -431,9 +431,10 @@ r600_texture_create_object(struct pipe_s
return NULL;
}
} else if (buf) {
+ /* This is usually the window framebuffer. We want it in VRAM, always. */
resource->buf = buf;
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
- resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
+ resource->domains = RADEON_DOMAIN_VRAM;
}
if (rtex->cmask_size) {

View file

@ -0,0 +1,19 @@
diff -rupN Mesa-9.0.old/src/gallium/drivers/r600/r600_buffer.c Mesa-9.0/src/gallium/drivers/r600/r600_buffer.c
--- Mesa-9.0.old/src/gallium/drivers/r600/r600_buffer.c 2012-11-07 22:57:33.000000000 +0100
+++ Mesa-9.0/src/gallium/drivers/r600/r600_buffer.c 2012-11-07 22:59:08.000000000 +0100
@@ -130,7 +130,6 @@ static void *r600_buffer_transfer_map(st
r600_set_constants_dirty_if_bound(rctx, &rctx->ps_constbuf_state, rbuffer);
}
}
-#if 0 /* this is broken (see Bug 53130) */
else if ((transfer->usage & PIPE_TRANSFER_DISCARD_RANGE) &&
!(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
rctx->screen->has_streamout &&
@@ -150,7 +149,6 @@ static void *r600_buffer_transfer_map(st
return rctx->ws->buffer_map(rtransfer->staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
}
}
-#endif
data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, transfer->usage);
if (!data)

View file

@ -0,0 +1,603 @@
From eca3a66a1c96e7206fe443e548b7ef63a8e8acf5 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Wed, 12 Sep 2012 14:37:08 -0400
Subject: [PATCH] r600g: add htile support v11
htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.
v2 really use fast clear, still random issue with some tiles
need to try more flush combination, fix depth/stencil
texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
decompression, the custom blitter with custom dsa is no longer
needed.
v6 Reorganize render control/override update mecanism, fixing more
issues in the process.
v7 Add nop after depth surface base update to work around some htile
flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
have issue. Do not enable hyperz when flushing/uncompressing
depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
and prefetch on htile surface clear like fglrx. Record depth
clear value per level. Support several level for the htile
surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
disable fast zclear if clearing different layer of texture
array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
src/gallium/drivers/r600/evergreen_state.c | 95 ++++++++++++++++++++++++++----
src/gallium/drivers/r600/evergreend.h | 2 +
src/gallium/drivers/r600/r600_blit.c | 28 +++++++++
src/gallium/drivers/r600/r600_hw_context.c | 1 +
src/gallium/drivers/r600/r600_pipe.c | 9 +++
src/gallium/drivers/r600/r600_pipe.h | 22 ++++---
src/gallium/drivers/r600/r600_resource.h | 8 +++
src/gallium/drivers/r600/r600_state.c | 63 ++++++++++++++++++--
src/gallium/drivers/r600/r600_texture.c | 46 +++++++++++++++
src/gallium/drivers/r600/r600d.h | 1 +
10 files changed, 251 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index f895906..c4030b5 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1446,6 +1446,37 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
surf->db_stencil_info = 1;
}
+ surf->htile_enabled = 0;
+ if (rtex->htile) {
+ unsigned preload_x, preload_y;
+
+ surf->htile_enabled = 1;
+ surf->db_htile_data_base = rtex->htile->surface.level[level].offset;
+ surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+ S_028ABC_HTILE_HEIGHT(1) |
+ S_028ABC_LINEAR(1) |
+ S_028ABC_FULL_CACHE(1);
+ if (rtex->surface.level[level].nblk_x <= 512) {
+ surf->db_htile_surface |= S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+ surf->db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+ surf->db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(4);
+ } else if (rtex->surface.level[level].nblk_x <= 1024) {
+ surf->db_htile_surface |= S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+ surf->db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+ surf->db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(2);
+ } else {
+ surf->db_htile_surface |= S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+ surf->db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+ surf->db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(0);
+ }
+ /* just safe default clear value */
+ surf->depth_clear = 1.0f;
+ surf->db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+ preload_x = align(rtex->surface.level[level].nblk_x, 32) >> 5;
+ preload_y = align(rtex->surface.level[level].nblk_y, 32) >> 5;
+ surf->db_preload_control = S_028AC8_MAX_X(preload_x) | S_028AC8_MAX_Y(preload_y);
+ }
+
surf->depth_initialized = true;
}
@@ -1726,6 +1757,12 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
evergreen_init_depth_surface(rctx, surf);
}
+ if (rctx->db_state.rsurf != surf) {
+ rctx->db_state.rsurf = surf;
+ r600_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ }
+
r600_pipe_state_add_reg_bo(rstate, R_028048_DB_Z_READ_BASE, surf->db_depth_base,
res, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg_bo(rstate, R_028050_DB_Z_WRITE_BASE, surf->db_depth_base,
@@ -1743,6 +1780,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
res, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, surf->db_depth_size);
r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, surf->db_depth_slice);
+ } else if (rctx->db_state.rsurf) {
+ rctx->db_state.rsurf = NULL;
+ r600_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
/* Framebuffer dimensions. */
@@ -1844,6 +1885,28 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
r600_write_value(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
}
+static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct r600_db_state *a = (struct r600_db_state*)atom;
+
+ if (a->rsurf && a->rsurf->htile_enabled) {
+ struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
+ unsigned reloc_idx;
+
+ r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(a->rsurf->depth_clear));
+ r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+ r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
+ r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+ reloc_idx = r600_context_bo_reloc(rctx, (struct r600_resource*)rtex->htile, RADEON_USAGE_READWRITE);
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = reloc_idx;
+ } else {
+ r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
+ r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
+ }
+}
+
static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
@@ -1851,7 +1914,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
unsigned db_render_control = 0;
unsigned db_count_control = 0;
unsigned db_render_override =
- S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
@@ -1870,6 +1932,19 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
S_028000_COPY_CENTROID(1) |
S_028000_COPY_SAMPLE(a->copy_sample);
+ db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
+ } else {
+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+ /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+ db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
+ } else {
+ db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
+ }
+
+ if (a->htile_clear) {
+ /* FIXME we might want to disable cliprect here */
+ db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
+ }
}
r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
@@ -2162,6 +2237,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 4);
r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26);
+ r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14);
r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 7);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8);
@@ -2274,16 +2350,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+ r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
- r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+ r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2);
r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
- r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
@@ -2522,10 +2595,9 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
- r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+ r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2);
r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
- r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
@@ -2815,9 +2887,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+ r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
@@ -2826,10 +2896,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
- r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+ r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2);
r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
- r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
r600_store_context_reg_seq(cb, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
r600_store_value(cb, 0x3F800000); /* R_028C0C_PA_CL_GB_VERT_CLIP_ADJ */
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index cb89199..7e2b687 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1873,6 +1873,8 @@
#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x00028AC0
#define R_028AC4_DB_SRESULTS_COMPARE_STATE1 0x00028AC4
#define R_028AC8_DB_PRELOAD_CONTROL 0x00028AC8
+#define S_028AC8_MAX_X(x) (((x) & 0xff) << 16)
+#define S_028AC8_MAX_Y(x) (((x) & 0xff) << 24)
#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0
#define R_028AD4_VGT_STRMOUT_VTX_STRIDE_0 0x028AD4
#define R_028AD8_VGT_STRMOUT_BUFFER_BASE_0 0x028AD8
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 58655fa..945fe1d 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -494,11 +494,39 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
struct r600_context *rctx = (struct r600_context *)ctx;
struct pipe_framebuffer_state *fb = &rctx->framebuffer;
+ /* if hyperz enabled just clear hyperz */
+ if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
+ struct r600_texture *rtex;
+ unsigned level = fb->zsbuf->u.tex.level;
+
+ rtex = (struct r600_texture*)fb->zsbuf->texture;
+
+ /* We can't use hyperz fast clear if each slice of a texture
+ * array are clear to different value. To simplify code just
+ * disable fast clear for texture array.
+ */
+ if (rtex->htile && rtex->surface.array_size == 1) {
+ rctx->db_state.rsurf->depth_clear = depth;
+ if (rtex->htile_initialized[level]) {
+ rctx->db_misc_state.htile_clear = true;
+ } else {
+ rtex->htile_initialized[level] = true;
+ }
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ }
+ }
+
r600_blitter_begin(ctx, R600_CLEAR);
util_blitter_clear(rctx->blitter, fb->width, fb->height,
fb->nr_cbufs, buffers, fb->nr_cbufs ? fb->cbufs[0]->format : PIPE_FORMAT_NONE,
color, depth, stencil);
r600_blitter_end(ctx);
+
+ /* disable fast clear */
+ if (rctx->db_misc_state.htile_clear) {
+ rctx->db_misc_state.htile_clear = false;
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ }
}
static void r600_clear_render_target(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 122f878..e5730aa 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1005,6 +1005,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
r600_atom_dirty(ctx, &ctx->clip_misc_state.atom);
r600_atom_dirty(ctx, &ctx->clip_state.atom);
+ r600_atom_dirty(ctx, &ctx->db_state.atom);
r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
r600_atom_dirty(ctx, &ctx->sample_mask.atom);
r600_atom_dirty(ctx, &ctx->stencil_ref.atom);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 658e9a9..ce864d7 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -977,6 +977,15 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
LIST_INITHEAD(&rscreen->fences.blocks);
pipe_mutex_init(rscreen->fences.mutex);
+ /* Hyperz leads to lockup on r6xx/r7xx and evergreen, due to this instabilities
+ * don't enable this by default until we can figure out how to do it properly
+ *
+ * You can trigger lockup easily with :
+ * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8
+ * run it in a loop, it will lockup often on first run
+ */
+ rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE);
+
rscreen->global_pool = compute_memory_pool_new(rscreen);
return &rscreen->screen;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ee512c7..2c58255 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -35,7 +35,7 @@
#include "r600_resource.h"
#include "evergreen_compute.h"
-#define R600_NUM_ATOMS 25
+#define R600_NUM_ATOMS 26
#define R600_MAX_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFER_SIZE 4096
@@ -70,13 +70,19 @@ struct r600_surface_sync_cmd {
unsigned flush_flags; /* CP_COHER_CNTL */
};
+struct r600_db_state {
+ struct r600_atom atom;
+ struct r600_surface *rsurf;
+};
+
struct r600_db_misc_state {
- struct r600_atom atom;
- bool occlusion_query_enabled;
- bool flush_depthstencil_through_cb;
- bool copy_depth, copy_stencil;
- unsigned copy_sample;
- unsigned log_samples;
+ struct r600_atom atom;
+ unsigned copy_sample;
+ unsigned log_samples;
+ bool occlusion_query_enabled;
+ bool flush_depthstencil_through_cb;
+ bool copy_depth, copy_stencil;
+ bool htile_clear;
};
struct r600_cb_misc_state {
@@ -179,6 +185,7 @@ struct r600_screen {
bool has_streamout;
struct r600_tiling_info tiling_info;
struct r600_pipe_fences fences;
+ bool use_hyperz;
/*for compute global memory binding, we allocate stuff here, instead of
* buffers.
@@ -396,6 +403,7 @@ struct r600_context {
struct r600_clip_misc_state clip_misc_state;
struct r600_clip_state clip_state;
struct r600_db_misc_state db_misc_state;
+ struct r600_db_state db_state;
struct r600_seamless_cube_map seamless_cube_map;
struct r600_stencil_ref_state stencil_ref;
struct r600_sample_mask sample_mask;
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index a5a5404..43d7ace 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -60,6 +60,9 @@ struct r600_texture {
* MSAA textures cannot have mipmaps. */
unsigned fmask_offset, fmask_size, fmask_bank_height;
unsigned cmask_offset, cmask_size, cmask_slice_tile_max;
+
+ struct r600_texture *htile;
+ bool htile_initialized[PIPE_MAX_TEXTURE_LEVELS];
};
#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
@@ -112,6 +115,11 @@ struct r600_surface {
unsigned db_stencil_base; /* EG only */
unsigned db_stencil_info; /* EG only */
unsigned db_prefetch_limit; /* R600 only */
+ unsigned htile_enabled;
+ unsigned db_htile_surface;
+ unsigned db_htile_data_base;
+ unsigned db_preload_control;
+ float depth_clear;
};
void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 4a10261..7130a48 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1448,6 +1448,20 @@ static void r600_init_depth_surface(struct r600_context *rctx,
surf->db_depth_size = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice);
surf->db_prefetch_limit = (rtex->surface.level[level].nblk_y / 8) - 1;
+ surf->htile_enabled = 0;
+ if (rtex->htile) {
+ surf->htile_enabled = 1;
+ surf->db_htile_data_base = rtex->htile->surface.level[level].offset;
+ surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) |
+ S_028D24_HTILE_HEIGHT(1) |
+ S_028D24_LINEAR(1) |
+ S_028D24_FULL_CACHE(1);
+ /* preload is not working properly on r6xx/r7xx */
+ /* just safe default clear value */
+ surf->depth_clear = 1.0f;
+ surf->db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
+ }
+
surf->depth_initialized = true;
}
@@ -1627,6 +1641,12 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
r600_init_depth_surface(rctx, surf);
}
+ if (rctx->db_state.rsurf != surf) {
+ rctx->db_state.rsurf = surf;
+ r600_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ }
+
r600_pipe_state_add_reg_bo(rstate, R_02800C_DB_DEPTH_BASE, surf->db_depth_base,
res, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, surf->db_depth_size);
@@ -1634,6 +1654,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
r600_pipe_state_add_reg_bo(rstate, R_028010_DB_DEPTH_INFO, surf->db_depth_info,
res, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
+ } else if (rctx->db_state.rsurf) {
+ rctx->db_state.rsurf = NULL;
+ r600_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
/* Framebuffer dimensions. */
@@ -1729,13 +1753,32 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
}
}
+static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct r600_db_state *a = (struct r600_db_state*)atom;
+
+ if (a->rsurf && a->rsurf->htile_enabled) {
+ struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
+ unsigned reloc_idx;
+
+ r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(a->rsurf->depth_clear));
+ r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+ r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+ reloc_idx = r600_context_bo_reloc(rctx, (struct r600_resource*)rtex->htile, RADEON_USAGE_READWRITE);
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = reloc_idx;
+ } else {
+ r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
+ }
+}
+
static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
unsigned db_render_control = 0;
unsigned db_render_override =
- S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
@@ -1752,6 +1795,19 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
S_028D0C_STENCIL_COPY_ENABLE(a->copy_stencil) |
S_028D0C_COPY_CENTROID(1) |
S_028D0C_COPY_SAMPLE(a->copy_sample);
+ db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
+ } else {
+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+ /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+ db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
+ } else {
+ db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
+ }
+
+ if (a->htile_clear) {
+ /* FIXME we might want to disable cliprect here */
+ db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1);
+ }
}
r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
@@ -2043,6 +2099,7 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, r600_emit_cb_misc_state, 7);
r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
r600_init_atom(rctx, &rctx->clip_state.atom, id++, r600_emit_clip_state, 26);
+ r600_init_atom(rctx, &rctx->db_state.atom, id++, r600_emit_db_state, 14);
r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, r600_emit_db_misc_state, 4);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8);
@@ -2347,9 +2404,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+ r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3);
r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 6de3d6a..5bffe1a 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -421,6 +421,52 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
+ rtex->htile = NULL;
+ if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | R600_RESOURCE_FLAG_FLUSHED_DEPTH)) &&
+ util_format_is_depth_or_stencil(base->format) &&
+ rscreen->use_hyperz &&
+ rscreen->info.drm_minor >= 14 &&
+ base->target == PIPE_TEXTURE_2D) {
+ struct pipe_resource hyperz;
+ struct radeon_surface hsurface;
+ char *ptr;
+
+ /* Allocate the hyperz buffer. */
+ hyperz = *base;
+ hyperz.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ hsurface = *surface;
+ hsurface.npix_x = rtex->surface.level[0].nblk_x * rtex->surface.blk_w;
+ hsurface.npix_y = rtex->surface.level[0].nblk_y * rtex->surface.blk_h;
+ hsurface.blk_w = 1;
+ hsurface.blk_h = 1;
+ hsurface.bpe = 4;
+ hsurface.flags = RADEON_SURF_CLR(hsurface.flags, MODE);
+ hsurface.npix_x = align(hsurface.npix_x, 64);
+ hsurface.npix_y = align(hsurface.npix_y, 32);
+ hyperz.width0 = hsurface.npix_x;
+ hyperz.height0 = hsurface.npix_y;
+ hyperz.last_level = base->last_level;
+ hyperz.array_size = rtex->surface.array_size;
+ hyperz.bind = PIPE_BIND_RENDER_TARGET;
+ hyperz.flags = 0;
+
+ memset(rtex->htile_initialized, 0, PIPE_MAX_TEXTURE_LEVELS);
+
+ rtex->htile = r600_texture_create_object(screen, &hyperz, 0,
+ NULL, TRUE, &hsurface);
+ if (!rtex->htile) {
+ FREE(rtex);
+ return NULL;
+ }
+
+#if 0
+ /* Initialize the htile to 0 */
+ ptr = rscreen->ws->buffer_map(rtex->htile->resource.cs_buf, NULL, PIPE_TRANSFER_WRITE);
+ memset(ptr, 0, rtex->htile->size);
+ rscreen->ws->buffer_unmap(rtex->htile->resource.cs_buf);
+#endif
+ }
+
/* Now create the backing buffer. */
if (!buf && alloc_bo) {
unsigned base_align = rtex->surface.bo_alignment;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 0ec0586..6f9f8c2 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -528,6 +528,7 @@
#define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31)
#define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1)
#define C_028010_ZRANGE_PRECISION 0x7FFFFFFF
+#define R_028014_DB_HTILE_DATA_BASE 0x00028014
#define R_028414_CB_BLEND_RED 0x028414
#define S_028414_BLEND_RED(x) (((x) & 0xFFFFFFFF) << 0)
#define G_028414_BLEND_RED(x) (((x) >> 0) & 0xFFFFFFFF)
--
1.7.11.4

View file

@ -0,0 +1,348 @@
From 0a364f8fd53327be49b8f0b63055986333a6a9fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 11 Sep 2012 01:16:32 +0200
Subject: [PATCH] r600g: convert the remnants of VGT state into immediate
register writes v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
v2: Group vgt register together to avoid lockup
v3: Split multi primitive register and index bias register
Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
src/gallium/drivers/r600/evergreen_hw_context.c | 16 --------
src/gallium/drivers/r600/evergreen_state.c | 3 ++
src/gallium/drivers/r600/r600.h | 7 ----
src/gallium/drivers/r600/r600_hw_context.c | 16 ++------
src/gallium/drivers/r600/r600_hw_context_priv.h | 2 +-
src/gallium/drivers/r600/r600_pipe.h | 24 +++++++++---
src/gallium/drivers/r600/r600_state.c | 3 ++
src/gallium/drivers/r600/r600_state_common.c | 51 +++++++++++++++++--------
8 files changed, 65 insertions(+), 57 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 483021f..0c2159a 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -32,10 +32,6 @@ static const struct r600_reg cayman_config_reg_list[] = {
{R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0},
};
-static const struct r600_reg evergreen_ctl_const_list[] = {
- {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0},
-};
-
static const struct r600_reg evergreen_context_reg_list[] = {
{R_028008_DB_DEPTH_VIEW, 0, 0},
{R_028010_DB_RENDER_OVERRIDE2, 0, 0},
@@ -63,10 +59,6 @@ static const struct r600_reg evergreen_context_reg_list[] = {
{R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0},
{R_028350_SX_MISC, 0, 0},
{GROUP_FORCE_NEW_BLOCK, 0, 0},
- {R_028408_VGT_INDX_OFFSET, 0, 0},
- {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0},
- {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0},
- {GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_02861C_SPI_VS_OUT_ID_0, 0, 0},
{R_028620_SPI_VS_OUT_ID_1, 0, 0},
{R_028624_SPI_VS_OUT_ID_2, 0, 0},
@@ -353,10 +345,6 @@ static const struct r600_reg cayman_context_reg_list[] = {
{R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0},
{R_028350_SX_MISC, 0, 0},
{GROUP_FORCE_NEW_BLOCK, 0, 0},
- {R_028408_VGT_INDX_OFFSET, 0, 0},
- {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0},
- {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0},
- {GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_02861C_SPI_VS_OUT_ID_0, 0, 0},
{R_028620_SPI_VS_OUT_ID_1, 0, 0},
{R_028624_SPI_VS_OUT_ID_2, 0, 0},
@@ -664,10 +652,6 @@ int evergreen_context_init(struct r600_context *ctx)
Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
if (r)
goto out_err;
- r = r600_context_add_block(ctx, evergreen_ctl_const_list,
- Elements(evergreen_ctl_const_list), PKT3_SET_CTL_CONST, EVERGREEN_CTL_CONST_OFFSET);
- if (r)
- goto out_err;
/* PS loop const */
evergreen_loop_const_init(ctx, 0);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index f895906..da6907f 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2150,6 +2150,9 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 6);
+ r600_init_atom(rctx, &rctx->vgt2_state.atom, id++, r600_emit_vgt2_state, 3);
+
if (rctx->chip_class == EVERGREEN) {
r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3);
} else {
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 6363a03..83d21a4 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -228,11 +228,4 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx,
#define r600_pipe_state_add_reg_bo(state, offset, value, bo, usage) _r600_pipe_state_add_reg_bo(rctx, state, offset, value, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo, usage)
#define r600_pipe_state_add_reg(state, offset, value) _r600_pipe_state_add_reg(rctx, state, offset, value, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset))
-static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state,
- uint32_t value)
-{
- state->regs[state->nregs].value = value;
- state->nregs++;
-}
-
#endif
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 57dcc7e..e9369de 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -233,10 +233,6 @@ static const struct r600_reg r600_config_reg_list[] = {
{R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0},
};
-static const struct r600_reg r600_ctl_const_list[] = {
- {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0},
-};
-
static const struct r600_reg r600_context_reg_list[] = {
{R_028A4C_PA_SC_MODE_CNTL, 0, 0},
{GROUP_FORCE_NEW_BLOCK, 0, 0},
@@ -461,9 +457,6 @@ static const struct r600_reg r600_context_reg_list[] = {
{GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_028850_SQ_PGM_RESOURCES_PS, 0, 0},
{R_028854_SQ_PGM_EXPORTS_PS, 0, 0},
- {R_028408_VGT_INDX_OFFSET, 0, 0},
- {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0},
- {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0},
{R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0},
{R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, 0, 0},
};
@@ -555,10 +548,6 @@ int r600_context_init(struct r600_context *ctx)
Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET);
if (r)
goto out_err;
- r = r600_context_add_block(ctx, r600_ctl_const_list,
- Elements(r600_ctl_const_list), PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET);
- if (r)
- goto out_err;
/* PS loop const */
r600_loop_const_init(ctx, 0);
@@ -1017,6 +1006,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
r600_atom_dirty(ctx, &ctx->clip_misc_state.atom);
r600_atom_dirty(ctx, &ctx->clip_state.atom);
r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ r600_atom_dirty(ctx, &ctx->vgt_state.atom);
+ r600_atom_dirty(ctx, &ctx->vgt2_state.atom);
r600_atom_dirty(ctx, &ctx->sample_mask.atom);
r600_atom_dirty(ctx, &ctx->stencil_ref.atom);
r600_atom_dirty(ctx, &ctx->viewport.atom);
@@ -1067,8 +1058,9 @@ void r600_begin_new_cs(struct r600_context *ctx)
enable_block->nreg_dirty = enable_block->nreg;
}
- /* Re-emit the primitive type. */
+ /* Re-emit the draw state. */
ctx->last_primitive_type = -1;
+ ctx->last_start_instance = -1;
}
void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
index 1a039c2..5104698 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -30,7 +30,7 @@
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 44
-#define R600_MAX_DRAW_CS_DWORDS 22
+#define R600_MAX_DRAW_CS_DWORDS 34
/* these flags are used in register flags and added into block flags */
#define REG_FLAG_NEED_BO 1
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 104ae21..0e2c0cc 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -35,7 +35,7 @@
#include "r600_resource.h"
#include "evergreen_compute.h"
-#define R600_NUM_ATOMS 25
+#define R600_NUM_ATOMS 26
#define R600_MAX_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFER_SIZE 4096
@@ -105,6 +105,17 @@ struct r600_alphatest_state {
bool cb0_export_16bpc; /* from set_framebuffer_state */
};
+struct r600_vgt_state {
+ struct r600_atom atom;
+ uint32_t vgt_multi_prim_ib_reset_en;
+ uint32_t vgt_multi_prim_ib_reset_indx;
+};
+
+struct r600_vgt2_state {
+ struct r600_atom atom;
+ uint32_t vgt_indx_offset;
+};
+
struct r600_blend_color {
struct r600_atom atom;
struct pipe_blend_color state;
@@ -147,7 +158,6 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_BLEND = 0,
R600_PIPE_STATE_SCISSOR,
R600_PIPE_STATE_RASTERIZER,
- R600_PIPE_STATE_VGT,
R600_PIPE_STATE_FRAMEBUFFER,
R600_PIPE_STATE_DSA,
R600_PIPE_STATE_POLYGON_OFFSET,
@@ -361,7 +371,6 @@ struct r600_context {
struct r600_pipe_shader_selector *ps_shader;
struct r600_pipe_shader_selector *vs_shader;
struct r600_pipe_rasterizer *rasterizer;
- struct r600_pipe_state vgt;
struct r600_pipe_state spi;
struct pipe_query *current_render_cond;
unsigned current_render_cond_mode;
@@ -400,6 +409,8 @@ struct r600_context {
struct r600_db_misc_state db_misc_state;
struct r600_seamless_cube_map seamless_cube_map;
struct r600_stencil_ref_state stencil_ref;
+ struct r600_vgt_state vgt_state;
+ struct r600_vgt2_state vgt2_state;
struct r600_sample_mask sample_mask;
struct r600_viewport_state viewport;
/* Shaders and shader resources. */
@@ -481,8 +492,9 @@ struct r600_context {
struct r600_resource *dummy_fmask;
struct r600_resource *dummy_cmask;
- /* Last primitive type used in draw_vbo. */
- int last_primitive_type;
+ /* Last draw state (-1 = unset). */
+ int last_primitive_type; /* Last primitive type used in draw_vbo. */
+ int last_start_instance;
};
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
@@ -606,6 +618,8 @@ void r600_translate_index_buffer(struct r600_context *r600,
void r600_init_common_state_functions(struct r600_context *rctx);
void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom);
+void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom);
+void r600_emit_vgt2_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 4a10261..9b90d45 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2034,6 +2034,9 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, r600_emit_ps_sampler_views, 0);
r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, r600_emit_vertex_buffers, 0);
+ r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 6);
+ r600_init_atom(rctx, &rctx->vgt2_state.atom, id++, r600_emit_vgt2_state, 3);
+
r600_init_atom(rctx, &rctx->seamless_cube_map.atom, id++, r600_emit_seamless_cube_map, 3);
r600_init_atom(rctx, &rctx->sample_mask.atom, id++, r600_emit_sample_mask, 3);
rctx->sample_mask.sample_mask = ~0;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 8ff0cdf..635804c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -188,6 +188,23 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
r600_write_value(cs, fui(state->color[3])); /* R_028420_CB_BLEND_ALPHA */
}
+void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
+
+ r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
+ r600_write_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, a->vgt_multi_prim_ib_reset_indx);
+}
+
+void r600_emit_vgt2_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct r600_vgt2_state *a = (struct r600_vgt2_state *)atom;
+
+ r600_write_context_reg(cs, R_028408_VGT_INDX_OFFSET, a->vgt_indx_offset);
+}
+
static void r600_set_clip_state(struct pipe_context *ctx,
const struct pipe_clip_state *state)
{
@@ -1197,28 +1214,24 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
info.index_bias = info.start;
}
- if (rctx->vgt.id != R600_PIPE_STATE_VGT) {
- rctx->vgt.id = R600_PIPE_STATE_VGT;
- rctx->vgt.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, info.index_bias);
- r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info.restart_index);
- r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info.primitive_restart);
- r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
- }
-
- rctx->vgt.nregs = 0;
- r600_pipe_state_mod_reg(&rctx->vgt, info.index_bias);
- r600_pipe_state_mod_reg(&rctx->vgt, info.restart_index);
- r600_pipe_state_mod_reg(&rctx->vgt, info.primitive_restart);
- r600_pipe_state_mod_reg(&rctx->vgt, info.start_instance);
- r600_context_pipe_state_set(rctx, &rctx->vgt);
-
/* Enable stream out if needed. */
if (rctx->streamout_start) {
r600_context_streamout_begin(rctx);
rctx->streamout_start = FALSE;
}
+ /* Set the index offset and multi primitive */
+ if (rctx->vgt2_state.vgt_indx_offset != info.index_bias) {
+ rctx->vgt2_state.vgt_indx_offset = info.index_bias;
+ r600_atom_dirty(rctx, &rctx->vgt2_state.atom);
+ }
+ if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info.primitive_restart ||
+ rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info.restart_index) {
+ rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart;
+ rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index;
+ r600_atom_dirty(rctx, &rctx->vgt_state.atom);
+ }
+
/* Emit states (the function expects that we emit at most 17 dwords here). */
r600_need_cs_space(rctx, 0, TRUE);
r600_flush_emit(rctx);
@@ -1234,6 +1247,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
rctx->pm4_dirty_cdwords = 0;
+ /* Update start instance. */
+ if (rctx->last_start_instance != info.start_instance) {
+ r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
+ rctx->last_start_instance = info.start_instance;
+ }
+
/* Update the primitive type. */
if (rctx->last_primitive_type != info.mode) {
unsigned ls_mask = 0;
--
1.7.11.4

View file

@ -0,0 +1,458 @@
From e0dbf03d527b781e9128957a59135527fbd71c6e Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Thu, 1 Nov 2012 16:09:40 -0400
Subject: [PATCH] r600g: rework flusing and synchronization pattern
This bring r600g allmost inline with closed source driver when
it comes to flushing and synchronization pattern. This also gave
a performance improvement up to 50% or more depending on GPU and
use case considered.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
src/gallium/drivers/r600/evergreen_compute.c | 8 +-
.../drivers/r600/evergreen_compute_internal.c | 4 +-
src/gallium/drivers/r600/evergreen_state.c | 4 +-
src/gallium/drivers/r600/r600.h | 16 +--
src/gallium/drivers/r600/r600_hw_context.c | 145 +++++----------------
src/gallium/drivers/r600/r600_state.c | 16 ++-
src/gallium/drivers/r600/r600_state_common.c | 12 +-
7 files changed, 64 insertions(+), 141 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 44831a7..33a5910 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
/* The vertex instructions in the compute shaders use the texture cache,
* so we need to invalidate it. */
- rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
state->enabled_mask |= 1 << vb_index;
state->dirty_mask |= 1 << vb_index;
state->atom.dirty = true;
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
*/
r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
- ctx->flags |= R600_CONTEXT_CB_FLUSH;
+ ctx->flags |= R600_CONTEXT_FLUSH;
r600_flush_emit(ctx);
/* Emit colorbuffers. */
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
*/
- ctx->flags |= R600_CONTEXT_CB_FLUSH;
+ ctx->flags |= R600_CONTEXT_FLUSH;
r600_flush_emit(ctx);
#if 0
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo,
RADEON_USAGE_READ));
- rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
}
static void evergreen_launch_grid(
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 3b1e581..5c5d778 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -538,7 +538,7 @@ void evergreen_set_tex_resource(
util_format_get_blockwidth(tmp->resource.b.b.format) *
view->base.texture->width0*height*depth;
- pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH;
+ pipe->ctx->flags |= R600_CONTEXT_FLUSH;
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
@@ -597,7 +597,7 @@ void evergreen_set_const_cache(
res->usage = RADEON_USAGE_READ;
res->coher_bo_size = size;
- pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+ pipe->ctx->flags |= R600_CONTEXT_FLUSH;
}
struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index cf3c60f..b5b0abe 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1558,14 +1558,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
uint32_t i, log_samples;
if (rctx->framebuffer.state.nr_cbufs) {
- rctx->flags |= R600_CONTEXT_CB_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx->framebuffer.state.zsbuf) {
- rctx->flags |= R600_CONTEXT_DB_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
}
util_copy_framebuffer_state(&rctx->framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7d43416..4060672 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -180,17 +180,11 @@ struct r600_so_target {
unsigned so_index;
};
-#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0)
-#define R600_CONTEXT_CB_FLUSH (1 << 1)
-#define R600_CONTEXT_DB_FLUSH (1 << 2)
-#define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3)
-#define R600_CONTEXT_TEX_FLUSH (1 << 4)
-#define R600_CONTEXT_VTX_FLUSH (1 << 5)
-#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6)
-#define R600_CONTEXT_WAIT_IDLE (1 << 7)
-#define R600_CONTEXT_FLUSH_AND_INV (1 << 8)
-#define R600_CONTEXT_HTILE_ERRATA (1 << 9)
-#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 10)
+#define R600_CONTEXT_FLUSH (1 << 0)
+#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1)
+#define R600_CONTEXT_WAIT_IDLE (1 << 2)
+#define R600_CONTEXT_FLUSH_AND_INV (1 << 3)
+#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4)
struct r600_context;
struct r600_screen;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 97c7d6d..4da7426 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -424,7 +424,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
LIST_ADDTAIL(&block->list,&ctx->dirty);
if (block->flags & REG_FLAG_FLUSH_CHANGE) {
- ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+ ctx->flags |= R600_CONTEXT_WAIT_IDLE;
}
}
}
@@ -595,14 +595,16 @@ out:
void r600_flush_emit(struct r600_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->cs;
+ unsigned cp_coher_cntl = 0;
if (!rctx->flags) {
return;
}
- if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+ if (rctx->chip_class >= EVERGREEN) {
+ cp_coher_cntl = 0x99900000;
+ } else {
+ cp_coher_cntl = 0x09900000;
}
if (rctx->chip_class >= R700 &&
@@ -614,110 +616,33 @@ void r600_flush_emit(struct r600_context *rctx)
if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
-
- /* DB flushes are special due to errata with hyperz, we need to
- * insert a no-op, so that the cache has time to really flush.
- */
- if (rctx->chip_class <= R700 &&
- rctx->flags & R600_CONTEXT_HTILE_ERRATA) {
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0);
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
- cs->buf[cs->cdw++] = 0xdeadcafe;
+ if (rctx->chip_class >= EVERGREEN) {
+ cp_coher_cntl = 0x1e97ffc0;
+ } else {
+ cp_coher_cntl = 0x19900000;
}
}
- if (rctx->flags & (R600_CONTEXT_CB_FLUSH |
- R600_CONTEXT_DB_FLUSH |
- R600_CONTEXT_SHADERCONST_FLUSH |
- R600_CONTEXT_TEX_FLUSH |
- R600_CONTEXT_VTX_FLUSH |
- R600_CONTEXT_STREAMOUT_FLUSH)) {
- /* anything left (cb, vtx, shader, streamout) can be flushed
- * using the surface sync packet
- */
- unsigned flags = 0;
-
- if (rctx->flags & R600_CONTEXT_CB_FLUSH) {
- flags |= S_0085F0_CB_ACTION_ENA(1) |
- S_0085F0_CB0_DEST_BASE_ENA(1) |
- S_0085F0_CB1_DEST_BASE_ENA(1) |
- S_0085F0_CB2_DEST_BASE_ENA(1) |
- S_0085F0_CB3_DEST_BASE_ENA(1) |
- S_0085F0_CB4_DEST_BASE_ENA(1) |
- S_0085F0_CB5_DEST_BASE_ENA(1) |
- S_0085F0_CB6_DEST_BASE_ENA(1) |
- S_0085F0_CB7_DEST_BASE_ENA(1);
-
- if (rctx->chip_class >= EVERGREEN) {
- flags |= S_0085F0_CB8_DEST_BASE_ENA(1) |
- S_0085F0_CB9_DEST_BASE_ENA(1) |
- S_0085F0_CB10_DEST_BASE_ENA(1) |
- S_0085F0_CB11_DEST_BASE_ENA(1);
- }
-
- /* RV670 errata
- * (CB1_DEST_BASE_ENA is also required, which is
- * included unconditionally above). */
- if (rctx->family == CHIP_RV670 ||
- rctx->family == CHIP_RS780 ||
- rctx->family == CHIP_RS880) {
- flags |= S_0085F0_DEST_BASE_0_ENA(1);
- }
- }
-
- if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
- flags |= S_0085F0_SO0_DEST_BASE_ENA(1) |
- S_0085F0_SO1_DEST_BASE_ENA(1) |
- S_0085F0_SO2_DEST_BASE_ENA(1) |
- S_0085F0_SO3_DEST_BASE_ENA(1) |
- S_0085F0_SMX_ACTION_ENA(1);
-
- /* RV670 errata */
- if (rctx->family == CHIP_RV670 ||
- rctx->family == CHIP_RS780 ||
- rctx->family == CHIP_RS880) {
- flags |= S_0085F0_DEST_BASE_0_ENA(1);
- }
+ if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
+ cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) |
+ S_0085F0_SO1_DEST_BASE_ENA(1) |
+ S_0085F0_SO2_DEST_BASE_ENA(1) |
+ S_0085F0_SO3_DEST_BASE_ENA(1) |
+ S_0085F0_SMX_ACTION_ENA(1);
+ rctx->flags |= R600_CONTEXT_FLUSH;
+#if 0
+ /* RV670 errata */
+ if (rctx->family == CHIP_RV670 ||
+ rctx->family == CHIP_RS780 ||
+ rctx->family == CHIP_RS880) {
+ cp_coher_cntl |= S_0085F0_DEST_BASE_0_ENA(1);
}
+#endif
+ }
- flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) |
- S_0085F0_DB_DEST_BASE_ENA(1): 0;
- flags |= (rctx->flags & R600_CONTEXT_SHADERCONST_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0;
- flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0;
- flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0;
-
+ if (rctx->flags & R600_CONTEXT_FLUSH) {
cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
- cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */
+ cs->buf[cs->cdw++] = cp_coher_cntl; /* CP_COHER_CNTL */
cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */
cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */
cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */
@@ -758,11 +683,8 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
ctx->streamout_suspended = true;
}
- /* partial flush is needed to avoid lockups on some chips with user fences */
- ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
-
- /* flush the framebuffer */
- ctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_DB_FLUSH;
+ /* flush is needed to avoid lockups on some chips with user fences */
+ ctx->flags |= R600_CONTEXT_FLUSH | R600_CONTEXT_WAIT_IDLE;
/* R6xx errata */
if (ctx->chip_class == R600) {
@@ -884,9 +806,8 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
va = va + (offset << 2);
- ctx->flags &= ~R600_CONTEXT_PS_PARTIAL_FLUSH;
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+ ctx->flags &= ~R600_CONTEXT_FLUSH;
+ r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
@@ -955,7 +876,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
ctx->family <= CHIP_RV740 ? util_bitcount(buffer_en) * 5 : 0) + /* STRMOUT_BASE_UPDATE */
util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */
util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */
- (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */
+ (ctx->family > CHIP_R600 && ctx->family < CHIP_CEDAR ? 2 : 0) + /* SURFACE_BASE_UPDATE */
ctx->num_cs_dw_streamout_end, TRUE);
if (ctx->chip_class >= EVERGREEN) {
@@ -1028,7 +949,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
}
}
- if (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780) {
+ if (ctx->family > CHIP_R600 && ctx->family < CHIP_CEDAR) {
cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0);
cs->buf[cs->cdw++] = update_flags;
}
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 5c52f3d..9c589ae 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1451,7 +1451,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
unsigned i;
if (rctx->framebuffer.state.nr_cbufs) {
- rctx->flags |= R600_CONTEXT_CB_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
if (rctx->chip_class >= R700 &&
rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
@@ -1459,7 +1459,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
}
}
if (rctx->framebuffer.state.zsbuf) {
- rctx->flags |= R600_CONTEXT_DB_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
}
/* R6xx errata */
if (rctx->chip_class == R600) {
@@ -1741,6 +1741,13 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
sbu |= SURFACE_BASE_UPDATE_COLOR_NUM(nr_cbufs);
}
+ /* SURFACE_BASE_UPDATE */
+ if (rctx->family > CHIP_R600 && sbu) {
+ r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
+ r600_write_value(cs, sbu);
+ sbu = 0;
+ }
+
/* Zbuffer. */
if (state->zsbuf) {
struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
@@ -1771,9 +1778,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
}
/* SURFACE_BASE_UPDATE */
- if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770 && sbu) {
+ if (rctx->family > CHIP_R600 && sbu) {
r600_write_value(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
r600_write_value(cs, sbu);
+ sbu = 0;
}
/* Framebuffer dimensions. */
@@ -2238,7 +2246,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.atom.dirty = true;
- rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+ rctx->flags |= R600_CONTEXT_WAIT_IDLE;
}
return true;
}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 7cd84bc..11ea9cf 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -87,7 +87,7 @@ static void r600_texture_barrier(struct pipe_context *ctx)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH | R600_CONTEXT_WAIT_IDLE;
/* R6xx errata */
if (rctx->chip_class == R600) {
@@ -359,7 +359,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
{
if (state->dirty_mask) {
if (state->dirty_mask & state->has_bordercolor_mask) {
- rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+ rctx->flags |= R600_CONTEXT_WAIT_IDLE;
}
state->atom.num_dw =
util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
@@ -422,7 +422,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
seamless_cube_map != -1 &&
seamless_cube_map != rctx->seamless_cube_map.enabled) {
/* change in TA_CNTL_AUX need a pipeline flush */
- rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+ rctx->flags |= R600_CONTEXT_WAIT_IDLE;
rctx->seamless_cube_map.enabled = seamless_cube_map;
rctx->seamless_cube_map.atom.dirty = true;
}
@@ -488,7 +488,7 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
void r600_vertex_buffers_dirty(struct r600_context *rctx)
{
if (rctx->vertex_buffer_state.dirty_mask) {
- rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
util_bitcount(rctx->vertex_buffer_state.dirty_mask);
rctx->vertex_buffer_state.atom.dirty = true;
@@ -544,7 +544,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
struct r600_samplerview_state *state)
{
if (state->dirty_mask) {
- rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) *
util_bitcount(state->dirty_mask);
state->atom.dirty = true;
@@ -886,7 +886,7 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state)
void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
{
if (state->dirty_mask) {
- rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+ rctx->flags |= R600_CONTEXT_FLUSH;
state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
: util_bitcount(state->dirty_mask)*19;
state->atom.dirty = true;
--
1.7.11.7