From 853165b41a4e53d1e5a82cde6dab06117c70bf70 Mon Sep 17 00:00:00 2001 From: PktSurf Date: Sat, 9 Nov 2024 16:55:47 +0530 Subject: [PATCH] Fixed build options in base/alsa-plugins,alsa-utils Upgraded xorg/libtiff to 4.7.0 Upgraded xorg/xf86-video-intel to 20201215 Updated xorg build list --- base/alsa-plugins/smbuild | 2 +- base/alsa-utils/smbuild | 6 +- xorg/.buildlist.xorg | 1 + xorg/libtiff/smbuild | 6 +- xorg/xf86-video-intel/git.patch | 43164 ------------------------------ xorg/xf86-video-intel/smbuild | 38 + 6 files changed, 46 insertions(+), 43171 deletions(-) delete mode 100644 xorg/xf86-video-intel/git.patch create mode 100755 xorg/xf86-video-intel/smbuild diff --git a/base/alsa-plugins/smbuild b/base/alsa-plugins/smbuild index 65df623..456d54e 100644 --- a/base/alsa-plugins/smbuild +++ b/base/alsa-plugins/smbuild @@ -18,7 +18,7 @@ prepbuilddir() { build() { ./configure \ - --prefix= + --prefix=/usr make make install DESTDIR=$pkg diff --git a/base/alsa-utils/smbuild b/base/alsa-utils/smbuild index b36194b..4a1cdca 100644 --- a/base/alsa-utils/smbuild +++ b/base/alsa-utils/smbuild @@ -18,9 +18,9 @@ prepbuilddir() { build() { ./configure \ - --prefix= \ - --sbindir=/bin \ - --with-udev-rules-dir=/lib/udev/rules.d + --prefix=/usr \ + --sbindir=/usr/bin \ + --with-udev-rules-dir=/usr/lib/udev/rules.d make make install DESTDIR=$pkg diff --git a/xorg/.buildlist.xorg b/xorg/.buildlist.xorg index 5779c3f..ab92c95 100644 --- a/xorg/.buildlist.xorg +++ b/xorg/.buildlist.xorg @@ -62,6 +62,7 @@ xf86-video-fbdev xf86-video-vesa xf86-video-fbturbo xf86-video-ati +xf86-video-intel xf86-video-dummy bdftopcf imake diff --git a/xorg/libtiff/smbuild b/xorg/libtiff/smbuild index 0741c84..0ca6a71 100644 --- a/xorg/libtiff/smbuild +++ b/xorg/libtiff/smbuild @@ -1,6 +1,6 @@ # Maintainer: PktSurf app=libtiff -version=4.0.10 +version=4.7.0 build=1sml homepage="http://www.simplesystems.org/libtiff/" download="https://download.osgeo.org/libtiff/tiff-$version.tar.gz" @@ -26,11 +26,11 @@ build() { make make install DESTDIR=$pkg - cp COPYRIGHT $pkgdocs/ + cp LICENSE.md $pkgdocs/ mkfinalpkg } sha512sums=" -2ea696ac44738d32d40c05efcc033eac6cc39c289ee9dc312e9846397f28623823755aea0ef414fc6538c4b68c311c39e409b83068ffb61b61e5612bcd1970ff tiff-4.0.10.tar.lz +50ab0c2d4a25c845d39e6e98fdf4cf600c93bf6be0692640f43f929c9fb57403fd3fc9009457919bc88bd2613815383a4d611959574d1333eb9e5148659d0088 tiff-4.7.0.tar.lz " diff --git a/xorg/xf86-video-intel/git.patch b/xorg/xf86-video-intel/git.patch deleted file mode 100644 index 2008442..0000000 --- a/xorg/xf86-video-intel/git.patch +++ /dev/null @@ -1,43164 +0,0 @@ -diff --git a/Makefile.am b/Makefile.am -index 418fdc92..de5fbe12 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -18,14 +18,16 @@ - # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - --ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 -+#Having problems passing through user flags as libtool complains -+#ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 -+ACLOCAL_AMFLAGS = -I m4 - - SUBDIRS = man libobj xvmc src tools - - MAINTAINERCLEANFILES = ChangeLog INSTALL - - if HAVE_X11 --SUBDIRS += test -+SUBDIRS += test benchmarks - endif - - .PHONY: ChangeLog INSTALL -diff --git a/NEWS b/NEWS -index 604b9cce..0e200332 100644 ---- a/NEWS -+++ b/NEWS -@@ -21,7 +21,7 @@ should make one more snapshot before an imminent release. - Before kernel 3.19, O_NONBLOCK support is broken and so we must avoid - reading if we are not expecting an event. - -- * Backwards compatibilty fix for fake triple buffering with PRIME and -+ * Backwards compatibility fix for fake triple buffering with PRIME and - Xorg-1.15 - https://bugs.freedesktop.org/show_bug.cgi?id=85144#c12 - -@@ -51,7 +51,7 @@ should make one more snapshot before an imminent release. - Snapshot 2.99.916 (2014-09-08) - ============================== - Quick update for MST in UXA - we need to hook up the RandR outputs for --dynamicaly added connectors. -+dynamically added connectors. - - - Snapshot 2.99.915 (2014-09-08) -@@ -503,7 +503,7 @@ release. - backlight property is queried whilst the connector is disabled - https://bugs.freedesktop.org/show_bug.cgi?id=70406 - -- * Pad GETCONNECTOR ioctl for compatability between 32/64-bit userspace -+ * Pad GETCONNECTOR ioctl for compatibility between 32/64-bit userspace - and kernel - - * Handle long glyph runs correctly -@@ -523,7 +523,7 @@ snapshot beforehand to push out the bug fixes from the last week. - - * Fix video output using sprites when changing the image size - -- * Apply more restrictive tile constaints for 915g class devices -+ * Apply more restrictive tile constraints for 915g class devices - https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1232546 - - * Ensure all overlapping rectangles are drawn for XRenderFillRectangles -@@ -1132,7 +1132,7 @@ operation. - * Explicitly prevent ring-switching for synchronized rendering to - scanouts (for vsync). - -- * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable) -+ * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusable) - https://bugs.freedesktop.org/show_bug.cgi?id=59539 - - -@@ -1226,7 +1226,7 @@ Release 2.20.15 (2012-12-03) - ============================ - And lo, enabling more of the common acceleration paths for gen4 revealed - another lurking bug - something is wrong with how we prepare Y-tiling --surfaces for rendering. For the time being, we can surreptiously disable -+surfaces for rendering. For the time being, we can surreptitiously disable - them for gen4 and avoid hitting GPU hangs. - - * Avoid clobbering the render state after failing to convert the -@@ -1515,7 +1515,7 @@ Release 2.20.5 (2012-08-26) - Another silly bug found, another small bugfix release. The goal was for - the driver to bind to all Intel devices supported by the kernel. - Unfortunately we were too successful and started claiming Pouslbo, --Medfield and Cedarview devices which are still encumbered by propietary -+Medfield and Cedarview devices which are still encumbered by proprietary - IP and not supported by this driver. - - Bugs fixed since 2.20.4: -diff --git a/README b/README -index cf4d88d8..348983b4 100644 ---- a/README -+++ b/README -@@ -15,9 +15,9 @@ Intel graphics chipsets including: - G/Q33,G/Q35,G41,G/Q43,G/GM/Q45 - PineView-M (Atom N400 series) - PineView-D (Atom D400/D500 series) -- Intel(R) HD Graphics: 2000-6000, -- Intel(R) Iris(TM) Graphics: 5100/6100, and -- Intel(R) Iris(TM) Pro Graphics: 5200/6200/P6300. -+ Intel(R) HD Graphics, -+ Intel(R) Iris(TM) Graphics, -+ Intel(R) Iris(TM) Pro Graphics. - - Where to get more information about the driver - ---------------------------------------------- -diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore -new file mode 100644 -index 00000000..301c0129 ---- /dev/null -+++ b/benchmarks/.gitignore -@@ -0,0 +1,2 @@ -+dri2-swap -+dri3-swap -diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am -new file mode 100644 -index 00000000..4976e8a3 ---- /dev/null -+++ b/benchmarks/Makefile.am -@@ -0,0 +1,14 @@ -+AM_CFLAGS = @CWARNFLAGS@ $(X11_CFLAGS) $(DRM_CFLAGS) -+LDADD = $(X11_LIBS) $(DRM_LIBS) $(CLOCK_GETTIME_LIBS) -+ -+check_PROGRAMS = -+ -+if DRI2 -+check_PROGRAMS += dri2-swap -+endif -+ -+if DRI3 -+check_PROGRAMS += dri3-swap -+AM_CFLAGS += $(X11_DRI3_CFLAGS) -+LDADD += $(X11_DRI3_LIBS) -+endif -diff --git a/benchmarks/dri2-swap.c b/benchmarks/dri2-swap.c -new file mode 100644 -index 00000000..3d9d30aa ---- /dev/null -+++ b/benchmarks/dri2-swap.c -@@ -0,0 +1,588 @@ -+/* -+ * Copyright (c) 2015 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static char dri2ExtensionName[] = DRI2_NAME; -+static XExtensionInfo *dri2Info; -+static XEXT_GENERATE_CLOSE_DISPLAY (DRI2CloseDisplay, dri2Info) -+ -+static Bool -+DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire); -+static Status -+DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire); -+static int -+DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code); -+ -+static /* const */ XExtensionHooks dri2ExtensionHooks = { -+ NULL, /* create_gc */ -+ NULL, /* copy_gc */ -+ NULL, /* flush_gc */ -+ NULL, /* free_gc */ -+ NULL, /* create_font */ -+ NULL, /* free_font */ -+ DRI2CloseDisplay, /* close_display */ -+ DRI2WireToEvent, /* wire_to_event */ -+ DRI2EventToWire, /* event_to_wire */ -+ DRI2Error, /* error */ -+ NULL, /* error_string */ -+}; -+ -+static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay, -+ dri2Info, -+ dri2ExtensionName, -+ &dri2ExtensionHooks, -+ 0, NULL) -+ -+static Bool -+DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ -+ XextCheckExtension(dpy, info, dri2ExtensionName, False); -+ -+ switch ((wire->u.u.type & 0x7f) - info->codes->first_event) { -+#ifdef X_DRI2SwapBuffers -+ case DRI2_BufferSwapComplete: -+ return False; -+#endif -+#ifdef DRI2_InvalidateBuffers -+ case DRI2_InvalidateBuffers: -+ return False; -+#endif -+ default: -+ /* client doesn't support server event */ -+ break; -+ } -+ -+ return False; -+} -+ -+/* We don't actually support this. It doesn't make sense for clients to -+ * send each other DRI2 events. -+ */ -+static Status -+DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ -+ XextCheckExtension(dpy, info, dri2ExtensionName, False); -+ -+ switch (event->type) { -+ default: -+ /* client doesn't support server event */ -+ break; -+ } -+ -+ return Success; -+} -+ -+static int -+DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code) -+{ -+ if (err->majorCode == codes->major_opcode && -+ err->errorCode == BadDrawable && -+ err->minorCode == X_DRI2CopyRegion) -+ return True; -+ -+ /* If the X drawable was destroyed before the GLX drawable, the -+ * DRI2 drawble will be gone by the time we call -+ * DRI2DestroyDrawable. So just ignore BadDrawable here. */ -+ if (err->majorCode == codes->major_opcode && -+ err->errorCode == BadDrawable && -+ err->minorCode == X_DRI2DestroyDrawable) -+ return True; -+ -+ /* If the server is non-local DRI2Connect will raise BadRequest. -+ * Swallow this so that DRI2Connect can signal this in its return code */ -+ if (err->majorCode == codes->major_opcode && -+ err->minorCode == X_DRI2Connect && -+ err->errorCode == BadRequest) { -+ *ret_code = False; -+ return True; -+ } -+ -+ return False; -+} -+ -+static Bool -+DRI2QueryExtension(Display * dpy, int *eventBase, int *errorBase) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ -+ if (XextHasExtension(info)) { -+ *eventBase = info->codes->first_event; -+ *errorBase = info->codes->first_error; -+ return True; -+ } -+ -+ return False; -+} -+ -+static Bool -+DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ xDRI2ConnectReply rep; -+ xDRI2ConnectReq *req; -+ -+ XextCheckExtension(dpy, info, dri2ExtensionName, False); -+ -+ LockDisplay(dpy); -+ GetReq(DRI2Connect, req); -+ req->reqType = info->codes->major_opcode; -+ req->dri2ReqType = X_DRI2Connect; -+ req->window = window; -+ req->driverType = DRI2DriverDRI; -+ if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) { -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ return False; -+ } -+ -+ if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) { -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ return False; -+ } -+ -+ *driverName = Xmalloc(rep.driverNameLength + 1); -+ if (*driverName == NULL) { -+ _XEatData(dpy, -+ ((rep.driverNameLength + 3) & ~3) + -+ ((rep.deviceNameLength + 3) & ~3)); -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ return False; -+ } -+ _XReadPad(dpy, *driverName, rep.driverNameLength); -+ (*driverName)[rep.driverNameLength] = '\0'; -+ -+ *deviceName = Xmalloc(rep.deviceNameLength + 1); -+ if (*deviceName == NULL) { -+ Xfree(*driverName); -+ _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3)); -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ return False; -+ } -+ _XReadPad(dpy, *deviceName, rep.deviceNameLength); -+ (*deviceName)[rep.deviceNameLength] = '\0'; -+ -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ -+ return True; -+} -+ -+static Bool -+DRI2Authenticate(Display * dpy, XID window, unsigned int magic) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ xDRI2AuthenticateReq *req; -+ xDRI2AuthenticateReply rep; -+ -+ XextCheckExtension(dpy, info, dri2ExtensionName, False); -+ -+ LockDisplay(dpy); -+ GetReq(DRI2Authenticate, req); -+ req->reqType = info->codes->major_opcode; -+ req->dri2ReqType = X_DRI2Authenticate; -+ req->window = window; -+ req->magic = magic; -+ -+ if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) { -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ return False; -+ } -+ -+ UnlockDisplay(dpy); -+ SyncHandle(); -+ -+ return rep.authenticated; -+} -+ -+static void -+DRI2CreateDrawable(Display * dpy, XID drawable) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ xDRI2CreateDrawableReq *req; -+ -+ XextSimpleCheckExtension(dpy, info, dri2ExtensionName); -+ -+ LockDisplay(dpy); -+ GetReq(DRI2CreateDrawable, req); -+ req->reqType = info->codes->major_opcode; -+ req->dri2ReqType = X_DRI2CreateDrawable; -+ req->drawable = drawable; -+ UnlockDisplay(dpy); -+ SyncHandle(); -+} -+ -+static void DRI2SwapInterval(Display *dpy, XID drawable, int interval) -+{ -+ XExtDisplayInfo *info = DRI2FindDisplay(dpy); -+ xDRI2SwapIntervalReq *req; -+ -+ XextSimpleCheckExtension (dpy, info, dri2ExtensionName); -+ -+ LockDisplay(dpy); -+ GetReq(DRI2SwapInterval, req); -+ req->reqType = info->codes->major_opcode; -+ req->dri2ReqType = X_DRI2SwapInterval; -+ req->drawable = drawable; -+ req->interval = interval; -+ UnlockDisplay(dpy); -+ SyncHandle(); -+} -+ -+static int _x_error_occurred; -+ -+static int -+_check_error_handler(Display *display, -+ XErrorEvent *event) -+{ -+ fprintf(stderr, -+ "X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", -+ DisplayString(display), -+ event->serial, -+ event->error_code, -+ event->request_code, -+ event->minor_code); -+ _x_error_occurred++; -+ return False; /* ignored */ -+} -+ -+static double elapsed(const struct timespec *start, -+ const struct timespec *end) -+{ -+ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; -+} -+ -+static void run(Display *dpy, Window win) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ struct timespec start, end; -+ int n, completed = 0; -+ -+ clock_gettime(CLOCK_MONOTONIC, &start); -+ do { -+ for (n = 0; n < 1000; n++) { -+ unsigned int attachments[] = { DRI2BufferBackLeft }; -+ unsigned int seq[2]; -+ -+ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, -+ 0, 0, 0, 0, 0, 0).sequence; -+ -+ -+ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, -+ 1, 1, attachments).sequence; -+ -+ xcb_flush(c); -+ xcb_discard_reply(c, seq[0]); -+ xcb_discard_reply(c, seq[1]); -+ completed++; -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ } while (end.tv_sec < start.tv_sec + 10); -+ -+ printf("%f\n", completed / (elapsed(&start, &end) / 1000000)); -+} -+ -+static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) -+{ -+ XRRScreenResources *res; -+ -+ res = XRRGetScreenResourcesCurrent(dpy, window); -+ if (res == NULL) -+ res = XRRGetScreenResources(dpy, window); -+ -+ return res; -+} -+ -+static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) -+{ -+ int i; -+ -+ for (i = 0; i < res->nmode; i++) { -+ if (res->modes[i].id == id) -+ return &res->modes[i]; -+ } -+ -+ return NULL; -+} -+ -+static int dri2_open(Display *dpy) -+{ -+ drm_auth_t auth; -+ char *driver, *device; -+ int fd; -+ -+ if (!DRI2QueryExtension(dpy, &fd, &fd)) -+ return -1; -+ -+ if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device)) -+ return -1; -+ -+ fd = open(device, O_RDWR); -+ if (fd < 0) -+ return -1; -+ -+ if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth)) -+ return -1; -+ -+ if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic)) -+ return -1; -+ -+ return fd; -+} -+ -+static void fullscreen(Display *dpy, Window win) -+{ -+ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); -+ XChangeProperty(dpy, win, -+ XInternAtom(dpy, "_NET_WM_STATE", False), -+ XA_ATOM, 32, PropModeReplace, -+ (unsigned char *)&atom, 1); -+} -+ -+static int has_composite(Display *dpy) -+{ -+ int event, error; -+ int major, minor; -+ -+ if (!XDamageQueryExtension (dpy, &event, &error)) -+ return 0; -+ -+ if (!XCompositeQueryExtension(dpy, &event, &error)) -+ return 0; -+ -+ XCompositeQueryVersion(dpy, &major, &minor); -+ -+ return major > 0 || minor >= 4; -+} -+ -+int main(int argc, char **argv) -+{ -+ Display *dpy; -+ Window root, win; -+ XRRScreenResources *res; -+ XRRCrtcInfo **original_crtc; -+ XSetWindowAttributes attr; -+ enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN; -+ enum visible {REDIRECTED, NORMAL } v = NORMAL; -+ enum display { OFF, ON } d = OFF; -+ int width, height; -+ int i, fd; -+ int c; -+ -+ while ((c = getopt(argc, argv, "d:v:w:")) != -1) { -+ switch (c) { -+ case 'd': -+ if (strcmp(optarg, "off") == 0) -+ d = OFF; -+ else if (strcmp(optarg, "on") == 0) -+ d = ON; -+ else -+ abort(); -+ break; -+ -+ case 'v': -+ if (strcmp(optarg, "redirected") == 0) -+ v = REDIRECTED; -+ else if (strcmp(optarg, "normal") == 0) -+ v = NORMAL; -+ else -+ abort(); -+ break; -+ -+ case 'w': -+ if (strcmp(optarg, "fullscreen") == 0) -+ w = FULLSCREEN; -+ else if (strcmp(optarg, "window") == 0) -+ w = WINDOW; -+ else if (strcmp(optarg, "root") == 0) -+ w = ROOT; -+ else -+ abort(); -+ break; -+ } -+ } -+ -+ attr.override_redirect = 1; -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ return 77; -+ -+ width = DisplayWidth(dpy, DefaultScreen(dpy)); -+ height = DisplayHeight(dpy, DefaultScreen(dpy)); -+ -+ fd = dri2_open(dpy); -+ if (fd < 0) -+ return 77; -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSDisable(dpy); -+ -+ root = DefaultRootWindow(dpy); -+ -+ signal(SIGALRM, SIG_IGN); -+ XSetErrorHandler(_check_error_handler); -+ -+ res = NULL; -+ if (XRRQueryVersion(dpy, &i, &i)) -+ res = _XRRGetScreenResourcesCurrent(dpy, root); -+ if (res == NULL) -+ return 77; -+ -+ if (v == REDIRECTED && !has_composite(dpy)) -+ return 77; -+ -+ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); -+ for (i = 0; i < res->ncrtc; i++) -+ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); -+ -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ -+ DRI2CreateDrawable(dpy, root); -+ DRI2SwapInterval(dpy, root, 0); -+ -+ if (d != OFF) { -+ for (i = 0; i < res->noutput; i++) { -+ XRROutputInfo *output; -+ XRRModeInfo *mode; -+ -+ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); -+ if (output == NULL) -+ continue; -+ -+ mode = NULL; -+ if (res->nmode) -+ mode = lookup_mode(res, output->modes[0]); -+ if (mode == NULL) -+ continue; -+ -+ XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime, -+ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); -+ width = mode->width; -+ height = mode->height; -+ break; -+ } -+ if (i == res->noutput) { -+ _x_error_occurred = 77; -+ goto restore; -+ } -+ } -+ -+ if (w == ROOT) { -+ run(dpy, root); -+ } else if (w == FULLSCREEN) { -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ DRI2CreateDrawable(dpy, win); -+ DRI2SwapInterval(dpy, win, 0); -+ if (v == REDIRECTED) { -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XDamageCreate(dpy, win, XDamageReportRawRectangles); -+ } else -+ fullscreen(dpy, win); -+ XMapWindow(dpy, win); -+ run(dpy, win); -+ } else if (w == WINDOW) { -+ win = XCreateWindow(dpy, root, -+ 0, 0, width/2, height/2, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ DRI2CreateDrawable(dpy, win); -+ DRI2SwapInterval(dpy, win, 0); -+ if (v == REDIRECTED) { -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XDamageCreate(dpy, win, XDamageReportRawRectangles); -+ } -+ XMapWindow(dpy, win); -+ run(dpy, win); -+ } -+ -+restore: -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ original_crtc[i]->x, -+ original_crtc[i]->y, -+ original_crtc[i]->mode, -+ original_crtc[i]->rotation, -+ original_crtc[i]->outputs, -+ original_crtc[i]->noutput); -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSEnable(dpy); -+ -+ XSync(dpy, True); -+ return _x_error_occurred; -+} -diff --git a/benchmarks/dri3-swap.c b/benchmarks/dri3-swap.c -new file mode 100644 -index 00000000..4dd423b3 ---- /dev/null -+++ b/benchmarks/dri3-swap.c -@@ -0,0 +1,595 @@ -+/* -+ * Copyright (c) 2015 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+struct dri3_fence { -+ XID xid; -+ void *addr; -+}; -+ -+static int _x_error_occurred; -+static uint32_t stamp; -+ -+struct list { -+ struct list *next, *prev; -+}; -+ -+static void -+list_init(struct list *list) -+{ -+ list->next = list->prev = list; -+} -+ -+static inline void -+__list_add(struct list *entry, -+ struct list *prev, -+ struct list *next) -+{ -+ next->prev = entry; -+ entry->next = next; -+ entry->prev = prev; -+ prev->next = entry; -+} -+ -+static inline void -+list_add(struct list *entry, struct list *head) -+{ -+ __list_add(entry, head, head->next); -+} -+ -+static inline void -+__list_del(struct list *prev, struct list *next) -+{ -+ next->prev = prev; -+ prev->next = next; -+} -+ -+static inline void -+_list_del(struct list *entry) -+{ -+ __list_del(entry->prev, entry->next); -+} -+ -+static inline void -+list_move(struct list *list, struct list *head) -+{ -+ if (list->prev != head) { -+ _list_del(list); -+ list_add(list, head); -+ } -+} -+ -+#define __container_of(ptr, sample, member) \ -+ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) -+ -+#define list_for_each_entry(pos, head, member) \ -+ for (pos = __container_of((head)->next, pos, member); \ -+ &pos->member != (head); \ -+ pos = __container_of(pos->member.next, pos, member)) -+ -+static int -+_check_error_handler(Display *display, -+ XErrorEvent *event) -+{ -+ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", -+ DisplayString(display), -+ event->serial, -+ event->error_code, -+ event->request_code, -+ event->minor_code); -+ _x_error_occurred++; -+ return False; /* ignored */ -+} -+ -+static int dri3_create_fence(Display *dpy, -+ Pixmap pixmap, -+ struct dri3_fence *fence) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ struct dri3_fence f; -+ int fd; -+ -+ fd = xshmfence_alloc_shm(); -+ if (fd < 0) -+ return -1; -+ -+ f.addr = xshmfence_map_shm(fd); -+ if (f.addr == NULL) { -+ close(fd); -+ return -1; -+ } -+ -+ f.xid = xcb_generate_id(c); -+ xcb_dri3_fence_from_fd(c, pixmap, f.xid, 0, fd); -+ -+ *fence = f; -+ return 0; -+} -+ -+static double elapsed(const struct timespec *start, -+ const struct timespec *end) -+{ -+ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; -+} -+ -+struct buffer { -+ struct list link; -+ Pixmap pixmap; -+ struct dri3_fence fence; -+ int fd; -+ int busy; -+}; -+ -+static void run(Display *dpy, Window win) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ struct timespec start, end; -+#define N_BACK 8 -+ struct buffer buffer[N_BACK]; -+ struct list mru; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ unsigned present_flags = XCB_PRESENT_OPTION_ASYNC; -+ xcb_xfixes_region_t update = 0; -+ int completed = 0; -+ int queued = 0; -+ uint32_t eid; -+ void *Q; -+ int i, n; -+ -+ list_init(&mru); -+ -+ XGetGeometry(dpy, win, -+ &root, &i, &n, &width, &height, &border, &depth); -+ -+ _x_error_occurred = 0; -+ -+ for (n = 0; n < N_BACK; n++) { -+ xcb_dri3_buffer_from_pixmap_reply_t *reply; -+ int *fds; -+ -+ buffer[n].pixmap = -+ XCreatePixmap(dpy, win, width, height, depth); -+ buffer[n].fence.xid = 0; -+ buffer[n].fd = -1; -+ -+ if (dri3_create_fence(dpy, win, &buffer[n].fence)) -+ return; -+ -+ reply = xcb_dri3_buffer_from_pixmap_reply (c, -+ xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap), -+ NULL); -+ if (reply == NULL) -+ return; -+ -+ fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply); -+ buffer[n].fd = fds[0]; -+ free(reply); -+ -+ /* start idle */ -+ xshmfence_trigger(buffer[n].fence.addr); -+ buffer[n].busy = 0; -+ list_add(&buffer[n].link, &mru); -+ } -+ -+ eid = xcb_generate_id(c); -+ xcb_present_select_input(c, eid, win, -+ XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY | -+ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); -+ Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp); -+ -+ clock_gettime(CLOCK_MONOTONIC, &start); -+ do { -+ for (n = 0; n < 1000; n++) { -+ struct buffer *tmp, *b = NULL; -+ list_for_each_entry(tmp, &mru, link) { -+ if (!tmp->busy) { -+ b = tmp; -+ break; -+ } -+ } -+ while (b == NULL) { -+ xcb_present_generic_event_t *ev; -+ -+ ev = (xcb_present_generic_event_t *) -+ xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ abort(); -+ -+ do { -+ switch (ev->evtype) { -+ case XCB_PRESENT_COMPLETE_NOTIFY: -+ completed++; -+ queued--; -+ break; -+ -+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: -+ { -+ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; -+ assert(ie->serial < N_BACK); -+ buffer[ie->serial].busy = 0; -+ if (b == NULL) -+ b = &buffer[ie->serial]; -+ break; -+ } -+ } -+ free(ev); -+ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); -+ } -+ -+ b->busy = 1; -+ if (b->fence.xid) { -+ xshmfence_await(b->fence.addr); -+ xshmfence_reset(b->fence.addr); -+ } -+ xcb_present_pixmap(c, win, b->pixmap, b - buffer, -+ 0, /* valid */ -+ update, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ b->fence.xid, -+ present_flags, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ list_move(&b->link, &mru); -+ queued++; -+ xcb_flush(c); -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ } while (end.tv_sec < start.tv_sec + 10); -+ -+ while (queued) { -+ xcb_present_generic_event_t *ev; -+ -+ ev = (xcb_present_generic_event_t *) -+ xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ abort(); -+ -+ do { -+ switch (ev->evtype) { -+ case XCB_PRESENT_COMPLETE_NOTIFY: -+ completed++; -+ queued--; -+ break; -+ -+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: -+ break; -+ } -+ free(ev); -+ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ -+ printf("%f\n", completed / (elapsed(&start, &end) / 1000000)); -+} -+ -+static int has_present(Display *dpy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_generic_error_t *error = NULL; -+ void *reply; -+ -+ reply = xcb_present_query_version_reply(c, -+ xcb_present_query_version(c, -+ XCB_PRESENT_MAJOR_VERSION, -+ XCB_PRESENT_MINOR_VERSION), -+ &error); -+ -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static int has_composite(Display *dpy) -+{ -+ int event, error; -+ int major, minor; -+ -+ if (!XDamageQueryExtension (dpy, &event, &error)) -+ return 0; -+ -+ if (!XCompositeQueryExtension(dpy, &event, &error)) -+ return 0; -+ -+ XCompositeQueryVersion(dpy, &major, &minor); -+ -+ return major > 0 || minor >= 4; -+} -+ -+static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) -+{ -+ XRRScreenResources *res; -+ -+ res = XRRGetScreenResourcesCurrent(dpy, window); -+ if (res == NULL) -+ res = XRRGetScreenResources(dpy, window); -+ -+ return res; -+} -+ -+static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) -+{ -+ int i; -+ -+ for (i = 0; i < res->nmode; i++) { -+ if (res->modes[i].id == id) -+ return &res->modes[i]; -+ } -+ -+ return NULL; -+} -+ -+static void fullscreen(Display *dpy, Window win) -+{ -+ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); -+ XChangeProperty(dpy, win, -+ XInternAtom(dpy, "_NET_WM_STATE", False), -+ XA_ATOM, 32, PropModeReplace, -+ (unsigned char *)&atom, 1); -+} -+ -+static int dri3_query_version(Display *dpy, int *major, int *minor) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_dri3_query_version_reply_t *reply; -+ xcb_generic_error_t *error; -+ -+ *major = *minor = -1; -+ -+ reply = xcb_dri3_query_version_reply(c, -+ xcb_dri3_query_version(c, -+ XCB_DRI3_MAJOR_VERSION, -+ XCB_DRI3_MINOR_VERSION), -+ &error); -+ free(error); -+ if (reply == NULL) -+ return -1; -+ -+ *major = reply->major_version; -+ *minor = reply->minor_version; -+ free(reply); -+ -+ return 0; -+} -+ -+static int has_dri3(Display *dpy) -+{ -+ const xcb_query_extension_reply_t *ext; -+ int major, minor; -+ -+ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); -+ if (ext == NULL || !ext->present) -+ return 0; -+ -+ if (dri3_query_version(dpy, &major, &minor) < 0) -+ return 0; -+ -+ return major >= 0; -+} -+ -+int main(int argc, char **argv) -+{ -+ Display *dpy; -+ Window root, win; -+ XRRScreenResources *res; -+ XRRCrtcInfo **original_crtc; -+ XSetWindowAttributes attr; -+ enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN; -+ enum visible {REDIRECTED, NORMAL } v = NORMAL; -+ enum display { OFF, ON } d = OFF; -+ int width, height; -+ int i; -+ -+ while ((i = getopt(argc, argv, "d:v:w:")) != -1) { -+ switch (i) { -+ case 'd': -+ if (strcmp(optarg, "off") == 0) -+ d = OFF; -+ else if (strcmp(optarg, "on") == 0) -+ d = ON; -+ else -+ abort(); -+ break; -+ -+ case 'v': -+ if (strcmp(optarg, "redirected") == 0) -+ v = REDIRECTED; -+ else if (strcmp(optarg, "normal") == 0) -+ v = NORMAL; -+ else -+ abort(); -+ break; -+ -+ case 'w': -+ if (strcmp(optarg, "fullscreen") == 0) -+ w = FULLSCREEN; -+ else if (strcmp(optarg, "window") == 0) -+ w = WINDOW; -+ else if (strcmp(optarg, "root") == 0) -+ w = ROOT; -+ else -+ abort(); -+ break; -+ } -+ } -+ -+ attr.override_redirect = 1; -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ return 77; -+ -+ width = DisplayWidth(dpy, DefaultScreen(dpy)); -+ height = DisplayHeight(dpy, DefaultScreen(dpy)); -+ -+ if (!has_present(dpy)) -+ return 77; -+ -+ if (!has_dri3(dpy)) -+ return 77; -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSDisable(dpy); -+ -+ root = DefaultRootWindow(dpy); -+ -+ signal(SIGALRM, SIG_IGN); -+ XSetErrorHandler(_check_error_handler); -+ -+ res = NULL; -+ if (XRRQueryVersion(dpy, &i, &i)) -+ res = _XRRGetScreenResourcesCurrent(dpy, root); -+ if (res == NULL) -+ return 77; -+ -+ if (v == REDIRECTED && !has_composite(dpy)) -+ return 77; -+ -+ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); -+ for (i = 0; i < res->ncrtc; i++) -+ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); -+ -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ -+ if (d != OFF) { -+ for (i = 0; i < res->noutput; i++) { -+ XRROutputInfo *output; -+ XRRModeInfo *mode; -+ -+ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); -+ if (output == NULL) -+ continue; -+ -+ mode = NULL; -+ if (res->nmode) -+ mode = lookup_mode(res, output->modes[0]); -+ if (mode == NULL) -+ continue; -+ -+ XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime, -+ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); -+ width = mode->width; -+ height = mode->height; -+ break; -+ } -+ if (i == res->noutput) { -+ _x_error_occurred = 77; -+ goto restore; -+ } -+ } -+ -+ if (w == ROOT) { -+ run(dpy, root); -+ } else if (w == FULLSCREEN) { -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (v == REDIRECTED) { -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XDamageCreate(dpy, win, XDamageReportRawRectangles); -+ } else -+ fullscreen(dpy, win); -+ XMapWindow(dpy, win); -+ run(dpy, win); -+ } else if (w == WINDOW) { -+ win = XCreateWindow(dpy, root, -+ 0, 0, width/2, height/2, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (v == REDIRECTED) { -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XDamageCreate(dpy, win, XDamageReportRawRectangles); -+ } -+ XMapWindow(dpy, win); -+ run(dpy, win); -+ } -+ -+restore: -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ original_crtc[i]->x, -+ original_crtc[i]->y, -+ original_crtc[i]->mode, -+ original_crtc[i]->rotation, -+ original_crtc[i]->outputs, -+ original_crtc[i]->noutput); -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSEnable(dpy); -+ -+ XSync(dpy, True); -+ return _x_error_occurred; -+} -diff --git a/configure.ac b/configure.ac -index 61bea435..d13917ec 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -195,18 +195,24 @@ AC_ARG_ENABLE(udev, - [UDEV="$enableval"], - [UDEV=auto]) - -+udev_msg=" disabled" - if test "x$UDEV" != "xno"; then - PKG_CHECK_MODULES(UDEV, [libudev], [udev="yes"], [udev="no"]) -+ AC_CHECK_HEADERS([sys/stat.h], [], [udev="no"]) - if test "x$UDEV" = "xyes" -a "x$udev" != "xyes"; then - AC_MSG_ERROR([udev support requested but not found (libudev)]) - fi - if test "x$udev" = "xyes"; then - AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection]) -+ udev_msg=" yes" -+ else -+ udev_msg=" no" - fi - fi - --PKG_CHECK_MODULES(X11, [x11 xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"]) -+PKG_CHECK_MODULES(X11, [x11 x11-xcb xcb-dri2 xcomposite xdamage xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"]) - AM_CONDITIONAL(HAVE_X11, test "x$x11" = "xyes") -+echo X11_CLFAGS="$X11_CLFAGS" X11_LIBS="$X11_LIBS" - - cpuid="yes" - AC_TRY_LINK([ -@@ -270,10 +276,13 @@ if test "x$shm" = "xyes"; then - AC_DEFINE([HAVE_MIT_SHM], 1, [Define to 1 if MIT-SHM is available]) - fi - --PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-present x11-xcb xshmfence x11 xrender xext libdrm], [x11_dri3="yes"], [x11_dri3="no"]) -+PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-xfixes xcb-present x11-xcb xshmfence x11 xcomposite xdamage xrender xrandr xxf86vm xext libdrm], [x11_dri3="yes"], [x11_dri3="no"]) - AM_CONDITIONAL(X11_DRI3, test "x$x11_dri3" = "xyes" -a "x$shm" = "xyes") - AM_CONDITIONAL(X11_SHM, test "x$shm" = "xyes") - -+PKG_CHECK_MODULES(X11_VM, [xxf86vm], [x11_vm="yes"], [x11_vm="no"]) -+AM_CONDITIONAL(X11_VM, test "x$x11_vm" = "xyes") -+ - AC_ARG_ENABLE(tools, - AS_HELP_STRING([--disable-tools], - [Enable building and installing the miscellaneous tools [default=auto]]), -@@ -285,7 +294,7 @@ if test "x$shm" != "xyes"; then - tools="no" - fi - if test "x$tools" != "xno"; then -- ivo_requires="xrandr xdamage xfixes xcursor xtst xrender xext x11 pixman-1" -+ ivo_requires="xrandr xdamage xfixes xcursor xtst xrender xscrnsaver xext x11 pixman-1" - extra_cflags="" - - ignore="xinerama" -@@ -307,6 +316,8 @@ if test "x$tools" != "xno"; then - tools="no" - fi - -+ PKG_CHECK_MODULES(TOOL_CURSOR, [xfixes x11 libpng], [cursor="yes"], [ivo="no"]) -+ - IVO_CFLAGS="$IVO_CFLAGS $extra_cflags" - fi - if test "x$tools" != "xno"; then -@@ -315,6 +326,7 @@ fi - AC_MSG_CHECKING([whether to build additional tools]) - AC_MSG_RESULT([$tools]) - AM_CONDITIONAL(BUILD_TOOLS, test "x$tools" != "xno") -+AM_CONDITIONAL(BUILD_TOOL_CURSOR, test "x$cursor" = "xyes") - - # Define a configure option for an alternate module directory - AC_ARG_WITH(xorg-module-dir, -@@ -339,10 +351,20 @@ AC_ARG_ENABLE(dri2, - [DRI2=$enableval], - [DRI2=yes]) - AC_ARG_ENABLE(dri3, -- AS_HELP_STRING([--enable-dri3], -- [Enable DRI3 support [[default=no]]]), -+ AS_HELP_STRING([--disable-dri3], -+ [Disable DRI3 support [[default=yes]]]), - [DRI3=$enableval], -- [DRI3=no]) -+ [DRI3=yes]) -+AC_ARG_WITH(default-dri, -+ AS_HELP_STRING([--with-default-dri], -+ [Select the default maximum DRI level [default 2]]), -+ [DRI_DEFAULT=$withval], -+ [DRI_DEFAULT=2]) -+if test "x$DRI_DEFAULT" = "x0"; then -+ AC_DEFINE(DEFAULT_DRI_LEVEL, 0,[Default DRI level]) -+else -+ AC_DEFINE(DEFAULT_DRI_LEVEL, ~0, [Default DRI level]) -+fi - - AC_ARG_ENABLE(xvmc, AS_HELP_STRING([--disable-xvmc], - [Disable XvMC support [[default=yes]]]), -@@ -375,14 +397,12 @@ AC_ARG_ENABLE(ums-only, - required_xorg_server_version=1.6 - required_pixman_version=0.16 - --if pkg-config --exists 'pixman-1 >= 0.27.1'; then -- AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache]) --fi -- --if pkg-config --exists 'pixman-1 >= 0.24.0'; then -- AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) --fi -- -+PKG_CHECK_EXISTS([pixman-1 >= 0.24.0], -+ AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) -+ []) -+PKG_CHECK_EXISTS([pixman-1 >= 0.27.1], -+ [AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])], -+ []) - # Store the list of server defined optional extensions in REQUIRED_MODULES - XORG_DRIVER_CHECK_EXT(RANDR, randrproto) - XORG_DRIVER_CHECK_EXT(RENDER, renderproto) -@@ -398,24 +418,25 @@ AC_ARG_ENABLE(sna, - [SNA="$enableval"], - [SNA=auto]) - -+AC_CHECK_HEADERS([dev/wscons/wsconsio.h]) -+AC_FUNC_ALLOCA -+AC_HEADER_MAJOR -+ - if test "x$SNA" != "xno"; then - AC_DEFINE(USE_SNA, 1, [Enable SNA support]) - AC_CHECK_HEADERS([sys/sysinfo.h], AC_CHECK_MEMBERS([struct sysinfo.totalram], [], [], [[#include ]])) - fi - - uxa_requires_libdrm=2.4.52 -+uxa_requires_pixman=0.24.0 -+ - AC_ARG_ENABLE(uxa, - AS_HELP_STRING([--enable-uxa], - [Enable Unified Acceleration Architecture (UXA) [default=auto]]), - [UXA="$enableval"], - [UXA=auto]) - if test "x$UXA" = "xauto"; then -- if ! pkg-config --exists "libdrm_intel >= $uxa_requires_libdrm"; then -- UXA=no -- fi -- if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then -- UXA=no -- fi -+ PKG_CHECK_EXISTS([libdrm_intel >= $uxa_requires_libdrm pixman-1 >= $uxa_requires_pixman], [], [UXA=no]) - fi - if test "x$UXA" != "xno"; then - AC_DEFINE(USE_UXA, 1, [Enable UXA support]) -@@ -424,8 +445,10 @@ if test "x$UXA" != "xno"; then - UXA=yes - fi - --PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES]) -+PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto damageproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES]) - ABI_VERSION=`$PKG_CONFIG --variable=abi_videodrv xorg-server` -+XSERVER_VERSION=`$PKG_CONFIG --modversion xorg-server` -+PIXMAN_VERSION=`$PKG_CONFIG --modversion pixman-1` - - if test "x$ONLY_UMS" = "xyes"; then - UMS="yes" -@@ -519,7 +542,12 @@ AC_MSG_RESULT([$have_dri1]) - AM_CONDITIONAL(DRI1, test "x$have_dri1" != "xno") - if test "x$have_dri1" != "xno"; then - AC_DEFINE(HAVE_DRI1,1,[Enable DRI1 driver support]) -- dri_msg="$dri_msg DRI1" -+ str="DRI1" -+ if test "x$DRI_DEFAULT" = "x1"; then -+ AC_DEFINE(DEFAULT_DRI_LEVEL,1,[Default DRI level]) -+ str="*$str" -+ fi -+ dri_msg="$dri_msg $str" - else - DRI1_CFLAGS="" - DRI1_LIBS="" -@@ -576,7 +604,12 @@ AM_CONDITIONAL(DRI2, test "x$have_dri2" != "xno") - AC_MSG_RESULT([$have_dri2]) - if test "x$have_dri2" != "xno"; then - AC_DEFINE(HAVE_DRI2,1,[Enable DRI2 driver support]) -- dri_msg="$dri_msg DRI2" -+ str="DRI2" -+ if test "x$DRI_DEFAULT" = "x2"; then -+ AC_DEFINE(DEFAULT_DRI_LEVEL,2,[Default DRI level]) -+ str="*$str" -+ fi -+ dri_msg="$dri_msg $str" - else - if test "x$DRI" = "xyes" -a "x$DRI2" != "xno" -a "x$KMS" = "xyes"; then - AC_MSG_ERROR([DRI2 requested but prerequisites not found]) -@@ -591,13 +624,21 @@ AM_CONDITIONAL(DRI3, test "x$have_dri3" != "xno") - AC_MSG_RESULT([$have_dri3]) - if test "x$have_dri3" != "xno"; then - AC_DEFINE(HAVE_DRI3,1,[Enable DRI3 driver support]) -- dri_msg="$dri_msg DRI3" -+ str="DRI3" -+ if test "x$DRI_DEFAULT" = "x3"; then -+ AC_DEFINE(DEFAULT_DRI_LEVEL,3,[Default DRI level]) -+ str="*$str" -+ fi -+ dri_msg="$dri_msg $str" - else - if test "x$DRI" = "xyes" -a "x$DRI3" != "xno" -a "x$KMS" = "xyes"; then - AC_MSG_ERROR([DRI3 requested but prerequisites not found]) - fi - fi - -+AC_MSG_CHECKING([default DRI support]) -+AC_MSG_RESULT([$DEFAULT_DRI_DEFAULT]) -+ - AC_CHECK_HEADERS([X11/extensions/dpmsconst.h]) - - PRESENT="no" -@@ -711,27 +752,6 @@ if test "x$TEARFREE" = "xyes"; then - xp_msg="$xp_msg TearFree" - fi - --AC_ARG_ENABLE(rendernode, -- AS_HELP_STRING([--enable-rendernode], -- [Enable use of render nodes (experimental) [default=no]]), -- [RENDERNODE="$enableval"], -- [RENDERNODE="no"]) --AM_CONDITIONAL(USE_RENDERNODE, test "x$RENDERNODE" = "xyes") --if test "x$RENDERNODE" = "xyes"; then -- AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support]) -- xp_msg="$xp_msg rendernode" --fi -- --AC_ARG_ENABLE(wc-mmap, -- AS_HELP_STRING([--enable-wc-mmap], -- [Enable use of WriteCombining mmaps [default=no]]), -- [WC_MMAP="$enableval"], -- [WC_MMAP="no"]) --if test "x$WC_MMAP" = "xyes"; then -- AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps]) -- xp_msg="$xp_msg mmap(wc)" --fi -- - AC_ARG_ENABLE(create2, - AS_HELP_STRING([--enable-create2], - [Enable use of create2 ioctl (experimental) [default=no]]), -@@ -848,6 +868,7 @@ AC_CONFIG_FILES([ - xvmc/shader/mc/Makefile - xvmc/shader/vld/Makefile - test/Makefile -+ benchmarks/Makefile - tools/Makefile - tools/org.x.xf86-video-intel.backlight-helper.policy - ]) -@@ -855,7 +876,7 @@ AC_OUTPUT - - echo "" - echo "" --test -e `pwd $0`/README && cat `pwd $0`/README -+cat $srcdir/README - - accel_msg="" - if test "x$SNA" != "xno"; then -@@ -895,13 +916,15 @@ fi - - echo "" - echo "AC_PACKAGE_STRING will be compiled with:" --echo " Xorg Video ABI version: $ABI_VERSION" -+echo " Xorg Video ABI version: $ABI_VERSION (xorg-server-$XSERVER_VERSION)" -+echo " pixman version: pixman-1-$PIXMAN_VERSION" - echo " Acceleration backends:$accel_msg" - echo " Additional debugging support?$debug_msg" - echo " Support for Kernel Mode Setting? $KMS" - echo " Support for legacy User Mode Setting (for i810)? $UMS" - echo " Support for Direct Rendering Infrastructure:$dri_msg" - echo " Support for Xv motion compensation (XvMC and libXvMC):$xvmc_msg" -+echo " Support for display hotplug notifications (udev):$udev_msg" - echo " Build additional tools and utilities?$tools_msg" - if test -n "$xp_msg"; then - echo " Experimental support:$xp_msg" -diff --git a/libobj/alloca.c b/libobj/alloca.c -new file mode 100644 -index 00000000..883e1e9f ---- /dev/null -+++ b/libobj/alloca.c -@@ -0,0 +1,4 @@ -+void *alloca(size_t sz) -+{ -+ return NULL; -+} -diff --git a/man/intel.man b/man/intel.man -index 17515206..be398fbe 100644 ---- a/man/intel.man -+++ b/man/intel.man -@@ -27,9 +27,9 @@ supports the i810, i810-DC100, i810e, i815, i830M, 845G, 852GM, 855GM, - 865G, 915G, 915GM, 945G, 945GM, 965G, 965Q, 946GZ, 965GM, 945GME, - G33, Q33, Q35, G35, GM45, G45, Q45, G43, G41 chipsets, Pineview-M in - Atom N400 series, Pineview-D in Atom D400/D500 series, --Intel(R) HD Graphics: 2000-6000, --Intel(R) Iris(TM) Graphics: 5100/6100, and --Intel(R) Iris(TM) Pro Graphics: 5200/6200/P6300. -+Intel(R) HD Graphics, -+Intel(R) Iris(TM) Graphics, -+Intel(R) Iris(TM) Pro Graphics. - - .SH CONFIGURATION DETAILS - Please refer to __xconfigfile__(__filemansuffix__) for general configuration -@@ -112,8 +112,8 @@ The default is 8192 if AGP allocable memory is < 128 MB, 16384 if < 192 MB, - 24576 if higher. DRI require at least a value of 16384. Higher values may give - better 3D performance, at expense of available system memory. - .TP --.BI "Option \*qNoAccel\*q \*q" boolean \*q --Disable or enable acceleration. -+.BI "Option \*qAccel\*q \*q" boolean \*q -+Enable or disable acceleration. - .IP - Default: acceleration is enabled. - -@@ -122,8 +122,8 @@ The following driver - .B Options - are supported for the 830M and later chipsets: - .TP --.BI "Option \*qNoAccel\*q \*q" boolean \*q --Disable or enable acceleration. -+.BI "Option \*qAccel\*q \*q" boolean \*q -+Enable or disable acceleration. - .IP - Default: acceleration is enabled. - .TP -@@ -201,6 +201,16 @@ that choice by specifying the entry under /sys/class/backlight to use. - .IP - Default: Automatic selection. - .TP -+.BI "Option \*qCustomEDID\*q \*q" string \*q -+Override the probed EDID on particular outputs. Sometimes the manufacturer -+supplied EDID is corrupt or lacking a few usable modes and supplying a -+corrected EDID may be easier than specifying every modeline. This option -+allows to pass the path to load an EDID from per output. The format is a -+comma separated string of output:path pairs, e.g. -+DP1:/path/to/dp1.edid,DP2:/path/to/dp2.edid -+.IP -+Default: No override, use manufacturer supplied EDIDs. -+.TP - .BI "Option \*qFallbackDebug\*q \*q" boolean \*q - Enable printing of debugging information on acceleration fallbacks to the - server log. -@@ -225,6 +235,15 @@ i.e. perform synchronous rendering. - .IP - Default: Disabled - .TP -+.BI "Option \*qHWRotation\*q \*q" boolean \*q -+Override the use of native hardware rotation and force the use of software, -+but GPU accelerated where possible, rotation. On some platforms the hardware -+can scanout directly into a rotated output bypassing the intermediate rendering -+and extra allocations required for software implemented rotation (i.e. native -+rotation uses less resources, is quicker and uses less power). This allows you -+to disable the native rotation in case of errors. -+.IP -+Default: Enabled (use hardware rotation) - .TP - .BI "Option \*qVSync\*q \*q" boolean \*q - This option controls the use of commands to synchronise rendering with the -@@ -324,13 +343,29 @@ Default: 0 - .BI "Option \*qZaphodHeads\*q \*q" string \*q - .IP - Specify the randr output(s) to use with zaphod mode for a particular driver --instance. If you this option you must use it with all instances of the --driver -+instance. If you set this option you must use it with all instances of the -+driver. By default, each head is assigned only one CRTC (which limits -+using multiple outputs with that head to cloned mode). CRTC can be manually -+assigned to individual heads by preceding the output names with a comma -+delimited list of pipe numbers followed by a colon. Note that different pipes -+may be limited in their functionality and some outputs may only work with -+different pipes. - .br - For example: -+ -+.RS - .B - Option \*qZaphodHeads\*q \*qLVDS1,VGA1\*q --will assign xrandr outputs LVDS1 and VGA0 to this instance of the driver. -+ -+will assign xrandr outputs LVDS1 and VGA1 to this instance of the driver. -+.RE -+ -+.RS -+.B -+Option \*qZaphodHeads\*q \*q0,2:HDMI1,DP2\*q -+ -+will assign xrandr outputs HDMI1 and DP2 and CRTCs 0 and 2 to this instance of the driver. -+.RE - - .SH OUTPUT CONFIGURATION - On 830M and better chipsets, the driver supports runtime configuration of -@@ -431,11 +466,11 @@ First DVI SDVO output - Second DVI SDVO output - - .SS "TMDS-1", "TMDS-2", "HDMI-1", "HDMI-2" --DVI/HDMI outputs. Avaliable common properties include: -+DVI/HDMI outputs. Available common properties include: - .TP - \fBBROADCAST_RGB\fP - method used to set RGB color range - Adjusting this property allows you to set RGB color range on each --channel in order to match HDTV requirment(default 0 for full -+channel in order to match HDTV requirement(default 0 for full - range). Setting 1 means RGB color range is 16-235, 0 means RGB color - range is 0-255 on each channel. (Full range is 0-255, not 16-235) - -diff --git a/src/backlight.c b/src/backlight.c -index 9f239867..fcbb279f 100644 ---- a/src/backlight.c -+++ b/src/backlight.c -@@ -34,6 +34,12 @@ - #include - #include - -+#if MAJOR_IN_MKDEV -+#include -+#elif MAJOR_IN_SYSMACROS -+#include -+#endif -+ - #include - #include - #include -@@ -42,6 +48,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -84,7 +91,7 @@ void backlight_init(struct backlight *b) - b->has_power = 0; - } - --#ifdef __OpenBSD__ -+#ifdef HAVE_DEV_WSCONS_WSCONSIO_H - - #include - #include -@@ -122,6 +129,11 @@ int backlight_get(struct backlight *b) - return param.curval; - } - -+char *backlight_find_for_device(struct pci_device *pci) -+{ -+ return NULL; -+} -+ - int backlight_open(struct backlight *b, char *iface) - { - struct wsdisplay_param param; -@@ -146,12 +158,9 @@ int backlight_open(struct backlight *b, char *iface) - return param.curval; - } - --enum backlight_type backlight_exists(const char *iface) -+int backlight_exists(const char *iface) - { -- if (iface != NULL) -- return BL_NONE; -- -- return BL_PLATFORM; -+ return iface == NULL; - } - - int backlight_on(struct backlight *b) -@@ -163,6 +172,7 @@ int backlight_off(struct backlight *b) - { - return 0; - } -+ - #else - - static int -@@ -213,6 +223,24 @@ __backlight_read(const char *iface, const char *file) - } - - static int -+writen(int fd, const char *value, int len) -+{ -+ int ret; -+ -+ do { -+ ret = write(fd, value, len); -+ if (ret < 0) { -+ if (errno == EAGAIN || errno == EINTR) -+ continue; -+ -+ return ret; -+ } -+ } while (value += ret, len -= ret); -+ -+ return 0; -+} -+ -+static int - __backlight_write(const char *iface, const char *file, const char *value) - { - int fd, ret; -@@ -221,7 +249,7 @@ __backlight_write(const char *iface, const char *file, const char *value) - if (fd < 0) - return -1; - -- ret = write(fd, value, strlen(value)+1); -+ ret = writen(fd, value, strlen(value)+1); - close(fd); - - return ret; -@@ -244,10 +272,10 @@ static const char *known_interfaces[] = { - "intel_backlight", - }; - --static enum backlight_type __backlight_type(const char *iface) -+static int __backlight_type(const char *iface) - { - char buf[1024]; -- int fd, v; -+ int fd, v, i; - - v = -1; - fd = __backlight_open(iface, "type", O_RDONLY); -@@ -261,39 +289,41 @@ static enum backlight_type __backlight_type(const char *iface) - buf[v] = '\0'; - - if (strcmp(buf, "raw") == 0) -- v = BL_RAW; -+ v = BL_RAW << 8; - else if (strcmp(buf, "platform") == 0) -- v = BL_PLATFORM; -+ v = BL_PLATFORM << 8; - else if (strcmp(buf, "firmware") == 0) -- v = BL_FIRMWARE; -+ v = BL_FIRMWARE << 8; - else -- v = BL_NAMED; -+ v = BL_NAMED << 8; - } else -- v = BL_NAMED; -+ v = BL_NAMED << 8; - -- if (v == BL_NAMED) { -- int i; -- for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) { -- if (strcmp(iface, known_interfaces[i]) == 0) -- break; -- } -- v += i; -+ for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) { -+ if (strcmp(iface, known_interfaces[i]) == 0) -+ break; - } -+ v += i; - - return v; - } - --enum backlight_type backlight_exists(const char *iface) -+static int __backlight_exists(const char *iface) - { - if (__backlight_read(iface, "brightness") < 0) -- return BL_NONE; -+ return -1; - - if (__backlight_read(iface, "max_brightness") <= 0) -- return BL_NONE; -+ return -1; - - return __backlight_type(iface); - } - -+int backlight_exists(const char *iface) -+{ -+ return __backlight_exists(iface) != -1; -+} -+ - static int __backlight_init(struct backlight *b, char *iface, int fd) - { - b->fd = fd_move_cloexec(fd_set_nonblock(fd)); -@@ -399,7 +429,50 @@ __backlight_find(void) - continue; - - /* Fallback to priority list of known iface for old kernels */ -- v = backlight_exists(de->d_name); -+ v = __backlight_exists(de->d_name); -+ if (v < 0) -+ continue; -+ -+ if (v < best_type) { -+ char *copy = strdup(de->d_name); -+ if (copy) { -+ free(best_iface); -+ best_iface = copy; -+ best_type = v; -+ } -+ } -+ } -+ closedir(dir); -+ -+ return best_iface; -+} -+ -+char *backlight_find_for_device(struct pci_device *pci) -+{ -+ char path[200]; -+ unsigned best_type = INT_MAX; -+ char *best_iface = NULL; -+ DIR *dir; -+ struct dirent *de; -+ -+ snprintf(path, sizeof(path), -+ "/sys/bus/pci/devices/%04x:%02x:%02x.%d/backlight", -+ pci->domain, pci->bus, pci->dev, pci->func); -+ -+ dir = opendir(path); -+ if (dir == NULL) -+ return NULL; -+ -+ while ((de = readdir(dir))) { -+ int v; -+ -+ if (*de->d_name == '.') -+ continue; -+ -+ v = __backlight_exists(de->d_name); -+ if (v < 0) -+ continue; -+ - if (v < best_type) { - char *copy = strdup(de->d_name); - if (copy) { -@@ -416,14 +489,17 @@ __backlight_find(void) - - int backlight_open(struct backlight *b, char *iface) - { -- int level; -+ int level, type; - - if (iface == NULL) - iface = __backlight_find(); - if (iface == NULL) - goto err; - -- b->type = __backlight_type(iface); -+ type = __backlight_type(iface); -+ if (type < 0) -+ goto err; -+ b->type = type >> 8; - - b->max = __backlight_read(iface, "max_brightness"); - if (b->max <= 0) -@@ -447,7 +523,7 @@ err: - int backlight_set(struct backlight *b, int level) - { - char val[BACKLIGHT_VALUE_LEN]; -- int len, ret = 0; -+ int len; - - if (b->iface == NULL) - return 0; -@@ -456,10 +532,7 @@ int backlight_set(struct backlight *b, int level) - level = b->max; - - len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level); -- if (write(b->fd, val, len) != len) -- ret = -1; -- -- return ret; -+ return writen(b->fd, val, len); - } - - int backlight_get(struct backlight *b) -@@ -517,43 +590,6 @@ void backlight_disable(struct backlight *b) - void backlight_close(struct backlight *b) - { - backlight_disable(b); -- if (b->pid) -+ if (b->pid > 0) - waitpid(b->pid, NULL, 0); - } -- --char *backlight_find_for_device(struct pci_device *pci) --{ -- char path[200]; -- unsigned best_type = INT_MAX; -- char *best_iface = NULL; -- DIR *dir; -- struct dirent *de; -- -- snprintf(path, sizeof(path), -- "/sys/bus/pci/devices/%04x:%02x:%02x.%d/backlight", -- pci->domain, pci->bus, pci->dev, pci->func); -- -- dir = opendir(path); -- if (dir == NULL) -- return NULL; -- -- while ((de = readdir(dir))) { -- int v; -- -- if (*de->d_name == '.') -- continue; -- -- v = backlight_exists(de->d_name); -- if (v < best_type) { -- char *copy = strdup(de->d_name); -- if (copy) { -- free(best_iface); -- best_iface = copy; -- best_type = v; -- } -- } -- } -- closedir(dir); -- -- return best_iface; --} -diff --git a/src/backlight.h b/src/backlight.h -index bb0e28bc..ba17755b 100644 ---- a/src/backlight.h -+++ b/src/backlight.h -@@ -43,7 +43,7 @@ struct backlight { - int pid, fd; - }; - --enum backlight_type backlight_exists(const char *iface); -+int backlight_exists(const char *iface); - - void backlight_init(struct backlight *backlight); - int backlight_open(struct backlight *backlight, char *iface); -diff --git a/src/compat-api.h b/src/compat-api.h -index d09e1fb3..05797a08 100644 ---- a/src/compat-api.h -+++ b/src/compat-api.h -@@ -30,6 +30,7 @@ - - #include - #include -+#include - - #include - #ifndef GLYPH_HAS_GLYPH_PICTURE_ACCESSOR -@@ -39,7 +40,17 @@ - - #ifndef XF86_HAS_SCRN_CONV - #define xf86ScreenToScrn(s) xf86Screens[(s)->myNum] -+#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,1,0,0,0) - #define xf86ScrnToScreen(s) screenInfo.screens[(s)->scrnIndex] -+#else -+#define xf86ScrnToScreen(s) ((s)->pScreen) -+#endif -+#else -+#define xf86ScrnToScreen(s) ((s)->pScreen) -+#endif -+ -+#if GET_ABI_MAJOR(ABI_VIDEODRV_VERSION) >= 22 -+#define HAVE_NOTIFY_FD 1 - #endif - - #ifndef XF86_SCRN_INTERFACE -@@ -131,6 +142,17 @@ region_rects(const RegionRec *r) - return r->data ? (const BoxRec *)(r->data + 1) : &r->extents; - } - -+inline static void -+region_get_boxes(const RegionRec *r, const BoxRec **s, const BoxRec **e) -+{ -+ int n; -+ if (r->data) -+ *s = region_boxptr(r), n = r->data->numRects; -+ else -+ *s = &r->extents, n = 1; -+ *e = *s + n; -+} -+ - #ifndef INCLUDE_LEGACY_REGION_DEFINES - #define RegionCreate(r, s) REGION_CREATE(NULL, r, s) - #define RegionBreak(r) REGION_BREAK(NULL, r) -@@ -223,4 +245,19 @@ static inline void FreePixmap(PixmapPtr pixmap) - dstx, dsty) - #endif - -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) -+#define isGPU(S) (S)->is_gpu -+#else -+#define isGPU(S) 0 -+#endif -+ -+#if HAS_DIRTYTRACKING_ROTATION -+#define PixmapSyncDirtyHelper(d, dd) PixmapSyncDirtyHelper(d) -+#endif -+ -+#if !HAVE_NOTIFY_FD -+#define SetNotifyFd(fd, cb, mode, data) AddGeneralSocket(fd); -+#define RemoveNotifyFd(fd) RemoveGeneralSocket(fd) -+#endif -+ - #endif -diff --git a/src/i915_pciids.h b/src/i915_pciids.h -index 180ad0e6..466c7159 100644 ---- a/src/i915_pciids.h -+++ b/src/i915_pciids.h -@@ -134,7 +134,7 @@ - #define INTEL_IVB_Q_IDS(info) \ - INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ - --#define INTEL_HSW_D_IDS(info) \ -+#define INTEL_HSW_IDS(info) \ - INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ -@@ -179,9 +179,7 @@ - INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ - INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ -- INTEL_VGA_DEVICE(0x0D2E, info) /* CRW GT3 reserved */ \ -- --#define INTEL_HSW_M_IDS(info) \ -+ INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ - INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ - INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ -@@ -198,60 +196,48 @@ - INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ - --#define INTEL_VLV_M_IDS(info) \ -+#define INTEL_VLV_IDS(info) \ - INTEL_VGA_DEVICE(0x0f30, info), \ - INTEL_VGA_DEVICE(0x0f31, info), \ - INTEL_VGA_DEVICE(0x0f32, info), \ - INTEL_VGA_DEVICE(0x0f33, info), \ -- INTEL_VGA_DEVICE(0x0157, info) -- --#define INTEL_VLV_D_IDS(info) \ -+ INTEL_VGA_DEVICE(0x0157, info), \ - INTEL_VGA_DEVICE(0x0155, info) - --#define _INTEL_BDW_M(gt, id, info) \ -- INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) --#define _INTEL_BDW_D(gt, id, info) \ -- INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) -- --#define _INTEL_BDW_M_IDS(gt, info) \ -- _INTEL_BDW_M(gt, 0x1602, info), /* ULT */ \ -- _INTEL_BDW_M(gt, 0x1606, info), /* ULT */ \ -- _INTEL_BDW_M(gt, 0x160B, info), /* Iris */ \ -- _INTEL_BDW_M(gt, 0x160E, info) /* ULX */ -- --#define _INTEL_BDW_D_IDS(gt, info) \ -- _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ -- _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -- --#define INTEL_BDW_GT12M_IDS(info) \ -- _INTEL_BDW_M_IDS(1, info), \ -- _INTEL_BDW_M_IDS(2, info) -- --#define INTEL_BDW_GT12D_IDS(info) \ -- _INTEL_BDW_D_IDS(1, info), \ -- _INTEL_BDW_D_IDS(2, info) -- --#define INTEL_BDW_GT3M_IDS(info) \ -- _INTEL_BDW_M_IDS(3, info) -- --#define INTEL_BDW_GT3D_IDS(info) \ -- _INTEL_BDW_D_IDS(3, info) -- --#define INTEL_BDW_RSVDM_IDS(info) \ -- _INTEL_BDW_M_IDS(4, info) -- --#define INTEL_BDW_RSVDD_IDS(info) \ -- _INTEL_BDW_D_IDS(4, info) -- --#define INTEL_BDW_M_IDS(info) \ -- INTEL_BDW_GT12M_IDS(info), \ -- INTEL_BDW_GT3M_IDS(info), \ -- INTEL_BDW_RSVDM_IDS(info) -- --#define INTEL_BDW_D_IDS(info) \ -- INTEL_BDW_GT12D_IDS(info), \ -- INTEL_BDW_GT3D_IDS(info), \ -- INTEL_BDW_RSVDD_IDS(info) -+#define INTEL_BDW_GT12_IDS(info) \ -+ INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \ -+ INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \ -+ INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \ -+ INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \ -+ INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ -+ INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \ -+ INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \ -+ INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ -+ INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ -+ INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \ -+ INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \ -+ INTEL_VGA_DEVICE(0x161D, info) /* GT2 Workstation */ -+ -+#define INTEL_BDW_GT3_IDS(info) \ -+ INTEL_VGA_DEVICE(0x1622, info), /* ULT */ \ -+ INTEL_VGA_DEVICE(0x1626, info), /* ULT */ \ -+ INTEL_VGA_DEVICE(0x162B, info), /* Iris */ \ -+ INTEL_VGA_DEVICE(0x162E, info), /* ULX */\ -+ INTEL_VGA_DEVICE(0x162A, info), /* Server */ \ -+ INTEL_VGA_DEVICE(0x162D, info) /* Workstation */ -+ -+#define INTEL_BDW_RSVD_IDS(info) \ -+ INTEL_VGA_DEVICE(0x1632, info), /* ULT */ \ -+ INTEL_VGA_DEVICE(0x1636, info), /* ULT */ \ -+ INTEL_VGA_DEVICE(0x163B, info), /* Iris */ \ -+ INTEL_VGA_DEVICE(0x163E, info), /* ULX */ \ -+ INTEL_VGA_DEVICE(0x163A, info), /* Server */ \ -+ INTEL_VGA_DEVICE(0x163D, info) /* Workstation */ -+ -+#define INTEL_BDW_IDS(info) \ -+ INTEL_BDW_GT12_IDS(info), \ -+ INTEL_BDW_GT3_IDS(info), \ -+ INTEL_BDW_RSVD_IDS(info) - - #define INTEL_CHV_IDS(info) \ - INTEL_VGA_DEVICE(0x22b0, info), \ -@@ -259,21 +245,85 @@ - INTEL_VGA_DEVICE(0x22b2, info), \ - INTEL_VGA_DEVICE(0x22b3, info) - --#define INTEL_SKL_IDS(info) \ -- INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \ -+#define INTEL_SKL_GT1_IDS(info) \ - INTEL_VGA_DEVICE(0x1906, info), /* ULT GT1 */ \ -- INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ -- INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \ - INTEL_VGA_DEVICE(0x190E, info), /* ULX GT1 */ \ -+ INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \ -+ INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \ -+ INTEL_VGA_DEVICE(0x190A, info) /* SRV GT1 */ -+ -+#define INTEL_SKL_GT2_IDS(info) \ -+ INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \ -+ INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \ - INTEL_VGA_DEVICE(0x191E, info), /* ULX GT2 */ \ - INTEL_VGA_DEVICE(0x1912, info), /* DT GT2 */ \ -- INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \ - INTEL_VGA_DEVICE(0x191B, info), /* Halo GT2 */ \ -- INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ -- INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \ - INTEL_VGA_DEVICE(0x191A, info), /* SRV GT2 */ \ -- INTEL_VGA_DEVICE(0x192A, info), /* SRV GT3 */ \ -- INTEL_VGA_DEVICE(0x190A, info), /* SRV GT1 */ \ - INTEL_VGA_DEVICE(0x191D, info) /* WKS GT2 */ - -+#define INTEL_SKL_GT3_IDS(info) \ -+ INTEL_VGA_DEVICE(0x1923, info), /* ULT GT3 */ \ -+ INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ -+ INTEL_VGA_DEVICE(0x1927, info), /* ULT GT3 */ \ -+ INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ -+ INTEL_VGA_DEVICE(0x192D, info) /* SRV GT3 */ -+ -+#define INTEL_SKL_GT4_IDS(info) \ -+ INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \ -+ INTEL_VGA_DEVICE(0x193B, info), /* Halo GT4 */ \ -+ INTEL_VGA_DEVICE(0x193D, info), /* WKS GT4 */ \ -+ INTEL_VGA_DEVICE(0x192A, info), /* SRV GT4 */ \ -+ INTEL_VGA_DEVICE(0x193A, info) /* SRV GT4e */ -+ -+#define INTEL_SKL_IDS(info) \ -+ INTEL_SKL_GT1_IDS(info), \ -+ INTEL_SKL_GT2_IDS(info), \ -+ INTEL_SKL_GT3_IDS(info), \ -+ INTEL_SKL_GT4_IDS(info) -+ -+#define INTEL_BXT_IDS(info) \ -+ INTEL_VGA_DEVICE(0x0A84, info), \ -+ INTEL_VGA_DEVICE(0x1A84, info), \ -+ INTEL_VGA_DEVICE(0x1A85, info), \ -+ INTEL_VGA_DEVICE(0x5A84, info), /* APL HD Graphics 505 */ \ -+ INTEL_VGA_DEVICE(0x5A85, info) /* APL HD Graphics 500 */ -+ -+#define INTEL_GLK_IDS(info) \ -+ INTEL_VGA_DEVICE(0x3184, info), \ -+ INTEL_VGA_DEVICE(0x3185, info) -+ -+#define INTEL_KBL_GT1_IDS(info) \ -+ INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \ -+ INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \ -+ INTEL_VGA_DEVICE(0x5917, info), /* DT GT1.5 */ \ -+ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ -+ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ -+ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ -+ INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \ -+ INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \ -+ INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */ -+ -+#define INTEL_KBL_GT2_IDS(info) \ -+ INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ -+ INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ -+ INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ -+ INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \ -+ INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \ -+ INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \ -+ INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */ -+ -+#define INTEL_KBL_GT3_IDS(info) \ -+ INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ -+ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ -+ INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */ -+ -+#define INTEL_KBL_GT4_IDS(info) \ -+ INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */ -+ -+#define INTEL_KBL_IDS(info) \ -+ INTEL_KBL_GT1_IDS(info), \ -+ INTEL_KBL_GT2_IDS(info), \ -+ INTEL_KBL_GT3_IDS(info), \ -+ INTEL_KBL_GT4_IDS(info) -+ - #endif /* _I915_PCIIDS_H */ -diff --git a/src/intel_device.c b/src/intel_device.c -index 140e1536..c4910cd8 100644 ---- a/src/intel_device.c -+++ b/src/intel_device.c -@@ -38,6 +38,12 @@ - #include - #include - -+#if MAJOR_IN_MKDEV -+#include -+#elif MAJOR_IN_SYSMACROS -+#include -+#endif -+ - #include - - #include -@@ -197,9 +203,15 @@ static inline struct intel_device *intel_device(ScrnInfoPtr scrn) - return xf86GetEntityPrivate(scrn->entityList[0], intel_device_key)->ptr; - } - -+static const char *kernel_module_names[] ={ -+ "i915", -+ NULL, -+}; -+ - static int is_i915_device(int fd) - { - drm_version_t version; -+ const char **kn; - char name[5] = ""; - - memset(&version, 0, sizeof(version)); -@@ -209,7 +221,22 @@ static int is_i915_device(int fd) - if (drmIoctl(fd, DRM_IOCTL_VERSION, &version)) - return 0; - -- return strcmp("i915", name) == 0; -+ for (kn = kernel_module_names; *kn; kn++) -+ if (strcmp(*kn, name) == 0) -+ return 1; -+ -+ return 0; -+} -+ -+static int load_i915_kernel_module(void) -+{ -+ const char **kn; -+ -+ for (kn = kernel_module_names; *kn; kn++) -+ if (xf86LoadKernelModule(*kn)) -+ return 0; -+ -+ return -1; - } - - static int is_i915_gem(int fd) -@@ -336,7 +363,7 @@ static int __intel_open_device__pci(const struct pci_device *pci) - - sprintf(path + base, "driver"); - if (stat(path, &st)) { -- if (xf86LoadKernelModule("i915")) -+ if (load_i915_kernel_module()) - return -1; - (void)xf86LoadKernelModule("fbcon"); - } -@@ -399,7 +426,7 @@ static int __intel_open_device__legacy(const struct pci_device *pci) - - ret = drmCheckModesettingSupported(id); - if (ret) { -- if (xf86LoadKernelModule("i915")) -+ if (load_i915_kernel_module() == 0) - ret = drmCheckModesettingSupported(id); - if (ret) - return -1; -@@ -461,9 +488,9 @@ static int is_render_node(int fd, struct stat *st) - - static char *find_render_node(int fd) - { --#if defined(USE_RENDERNODE) - struct stat master, render; - char buf[128]; -+ int i; - - /* Are we a render-node ourselves? */ - if (is_render_node(fd, &master)) -@@ -472,9 +499,17 @@ static char *find_render_node(int fd) - sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xbf)); - if (stat(buf, &render) == 0 && - master.st_mode == render.st_mode && -- render.st_rdev == ((master.st_rdev | 0x80) & 0xbf)) -+ render.st_rdev == (master.st_rdev | 0x80)) - return strdup(buf); --#endif -+ -+ /* Misaligned card <-> renderD, do a full search */ -+ for (i = 0; i < 16; i++) { -+ sprintf(buf, "/dev/dri/renderD%d", i + 128); -+ if (stat(buf, &render) == 0 && -+ master.st_mode == render.st_mode && -+ render.st_rdev == (master.st_rdev | 0x80)) -+ return strdup(buf); -+ } - - return NULL; - } -@@ -608,6 +643,27 @@ err_path: - return -1; - } - -+void intel_close_device(int entity_num) -+{ -+ struct intel_device *dev; -+ -+ if (intel_device_key == -1) -+ return; -+ -+ dev = xf86GetEntityPrivate(entity_num, intel_device_key)->ptr; -+ xf86GetEntityPrivate(entity_num, intel_device_key)->ptr = NULL; -+ if (!dev) -+ return; -+ -+ if (dev->master_count == 0) /* Don't close server-fds */ -+ close(dev->fd); -+ -+ if (dev->render_node != dev->master_node) -+ free(dev->render_node); -+ free(dev->master_node); -+ free(dev); -+} -+ - int __intel_peek_fd(ScrnInfoPtr scrn) - { - struct intel_device *dev; -@@ -672,6 +728,12 @@ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd) - return dev; - } - -+const char *intel_get_master_name(struct intel_device *dev) -+{ -+ assert(dev && dev->master_node); -+ return dev->master_node; -+} -+ - const char *intel_get_client_name(struct intel_device *dev) - { - assert(dev && dev->render_node); -diff --git a/src/intel_driver.h b/src/intel_driver.h -index 28ed1a0e..bece88a0 100644 ---- a/src/intel_driver.h -+++ b/src/intel_driver.h -@@ -124,9 +124,11 @@ int intel_entity_get_devid(int index); - int intel_open_device(int entity_num, - const struct pci_device *pci, - struct xf86_platform_device *dev); -+void intel_close_device(int entity_num); - int __intel_peek_fd(ScrnInfoPtr scrn); - struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd); - int intel_has_render_node(struct intel_device *dev); -+const char *intel_get_master_name(struct intel_device *dev); - const char *intel_get_client_name(struct intel_device *dev); - int intel_get_client_fd(struct intel_device *dev); - int intel_get_device_id(struct intel_device *dev); -diff --git a/src/intel_list.h b/src/intel_list.h -index 51af825d..c8a3187a 100644 ---- a/src/intel_list.h -+++ b/src/intel_list.h -@@ -306,8 +306,7 @@ list_is_empty(const struct list *head) - list_entry((ptr)->prev, type, member) - - #define __container_of(ptr, sample, member) \ -- (void *)((char *)(ptr) \ -- - ((char *)&(sample)->member - (char *)(sample))) -+ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) - /** - * Loop through the list given by head and set pos to struct in the list. - * -@@ -392,17 +391,50 @@ static inline void list_move_tail(struct list *list, struct list *head) - #define list_last_entry(ptr, type, member) \ - list_entry((ptr)->prev, type, member) - --#define list_for_each_entry_reverse(pos, head, member) \ -+#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = __container_of((head)->prev, pos, member); \ - &pos->member != (head); \ - pos = __container_of(pos->member.prev, pos, member)) - - #endif - -+#define list_for_each_entry_safe_from(pos, tmp, head, member) \ -+ for (tmp = __container_of(pos->member.next, pos, member); \ -+ &pos->member != (head); \ -+ pos = tmp, tmp = __container_of(tmp->member.next, tmp, member)) -+ - #undef container_of - #define container_of(ptr, type, member) \ - ((type *)((char *)(ptr) - (char *) &((type *)0)->member)) - -+static inline void __list_splice(const struct list *list, -+ struct list *prev, -+ struct list *next) -+{ -+ struct list *first = list->next; -+ struct list *last = list->prev; -+ -+ first->prev = prev; -+ prev->next = first; -+ -+ last->next = next; -+ next->prev = last; -+} -+ -+static inline void list_splice(const struct list *list, -+ struct list *head) -+{ -+ if (!list_is_empty(list)) -+ __list_splice(list, head, head->next); -+} -+ -+static inline void list_splice_tail(const struct list *list, -+ struct list *head) -+{ -+ if (!list_is_empty(list)) -+ __list_splice(list, head->prev, head); -+} -+ - static inline int list_is_singular(const struct list *list) - { - return list->next == list->prev; -diff --git a/src/intel_module.c b/src/intel_module.c -index 102d52aa..2e97b5ea 100644 ---- a/src/intel_module.c -+++ b/src/intel_module.c -@@ -126,6 +126,17 @@ static const struct intel_device_info intel_skylake_info = { - .gen = 0110, - }; - -+static const struct intel_device_info intel_broxton_info = { -+ .gen = 0111, -+}; -+ -+static const struct intel_device_info intel_kabylake_info = { -+ .gen = 0112, -+}; -+ -+static const struct intel_device_info intel_geminilake_info = { -+ .gen = 0113, -+}; - - static const SymTabRec intel_chipsets[] = { - {PCI_CHIP_I810, "i810"}, -@@ -234,30 +245,63 @@ static const SymTabRec intel_chipsets[] = { - {0x0157, "HD Graphics"}, - - /* Broadwell Marketing names */ -- {0x1602, "HD graphics"}, -- {0x1606, "HD graphics"}, -- {0x160B, "HD graphics"}, -- {0x160A, "HD graphics"}, -- {0x160D, "HD graphics"}, -- {0x160E, "HD graphics"}, -- {0x1612, "HD graphics 5600"}, -- {0x1616, "HD graphics 5500"}, -- {0x161B, "HD graphics"}, -- {0x161A, "HD graphics"}, -- {0x161D, "HD graphics"}, -- {0x161E, "HD graphics 5300"}, -- {0x1622, "Iris Pro graphics 6200"}, -- {0x1626, "HD graphics 6000"}, -- {0x162B, "Iris graphics 6100"}, -- {0x162A, "Iris Pro graphics P6300"}, -- {0x162D, "HD graphics"}, -- {0x162E, "HD graphics"}, -- {0x1632, "HD graphics"}, -- {0x1636, "HD graphics"}, -- {0x163B, "HD graphics"}, -- {0x163A, "HD graphics"}, -- {0x163D, "HD graphics"}, -- {0x163E, "HD graphics"}, -+ {0x1602, "HD Graphics"}, -+ {0x1606, "HD Graphics"}, -+ {0x160B, "HD Graphics"}, -+ {0x160A, "HD Graphics"}, -+ {0x160D, "HD Graphics"}, -+ {0x160E, "HD Graphics"}, -+ {0x1612, "HD Graphics 5600"}, -+ {0x1616, "HD Graphics 5500"}, -+ {0x161B, "HD Graphics"}, -+ {0x161A, "HD Graphics"}, -+ {0x161D, "HD Graphics"}, -+ {0x161E, "HD Graphics 5300"}, -+ {0x1622, "Iris Pro Graphics 6200"}, -+ {0x1626, "HD Graphics 6000"}, -+ {0x162B, "Iris Graphics 6100"}, -+ {0x162A, "Iris Pro Graphics P6300"}, -+ {0x162D, "HD Graphics"}, -+ {0x162E, "HD Graphics"}, -+ {0x1632, "HD Graphics"}, -+ {0x1636, "HD Graphics"}, -+ {0x163B, "HD Graphics"}, -+ {0x163A, "HD Graphics"}, -+ {0x163D, "HD Graphics"}, -+ {0x163E, "HD Graphics"}, -+ -+ /* Cherryview (Cherrytrail/Braswell) */ -+ {0x22b0, "HD Graphics"}, -+ {0x22b1, "HD Graphics"}, -+ {0x22b2, "HD Graphics"}, -+ {0x22b3, "HD Graphics"}, -+ -+ /* Skylake */ -+ {0x1902, "HD Graphics 510"}, -+ {0x1906, "HD Graphics 510"}, -+ {0x190B, "HD Graphics 510"}, -+ {0x1912, "HD Graphics 530"}, -+ {0x1916, "HD Graphics 520"}, -+ {0x191B, "HD Graphics 530"}, -+ {0x191D, "HD Graphics P530"}, -+ {0x191E, "HD Graphics 515"}, -+ {0x1921, "HD Graphics 520"}, -+ {0x1926, "Iris Graphics 540"}, -+ {0x1927, "Iris Graphics 550"}, -+ {0x192B, "Iris Graphics 555"}, -+ {0x192D, "Iris Graphics P555"}, -+ {0x1932, "Iris Pro Graphics 580"}, -+ {0x193A, "Iris Pro Graphics P580"}, -+ {0x193B, "Iris Pro Graphics 580"}, -+ {0x193D, "Iris Pro Graphics P580"}, -+ -+ /* Broxton (Apollolake) */ -+ {0x5A84, "HD Graphics 505"}, -+ {0x5A85, "HD Graphics 500"}, -+ -+ /* Kabylake */ -+ {0x5916, "HD Graphics 620"}, -+ {0x591E, "HD Graphics 615"}, - - /* When adding new identifiers, also update: - * 1. intel_identify() -@@ -305,18 +349,14 @@ static const struct pci_id_match intel_device_match[] = { - INTEL_IVB_D_IDS(&intel_ivybridge_info), - INTEL_IVB_M_IDS(&intel_ivybridge_info), - -- INTEL_HSW_D_IDS(&intel_haswell_info), -- INTEL_HSW_M_IDS(&intel_haswell_info), -- -- INTEL_VLV_D_IDS(&intel_valleyview_info), -- INTEL_VLV_M_IDS(&intel_valleyview_info), -- -- INTEL_BDW_D_IDS(&intel_broadwell_info), -- INTEL_BDW_M_IDS(&intel_broadwell_info), -- -+ INTEL_HSW_IDS(&intel_haswell_info), -+ INTEL_VLV_IDS(&intel_valleyview_info), -+ INTEL_BDW_IDS(&intel_broadwell_info), - INTEL_CHV_IDS(&intel_cherryview_info), -- - INTEL_SKL_IDS(&intel_skylake_info), -+ INTEL_BXT_IDS(&intel_broxton_info), -+ INTEL_KBL_IDS(&intel_kabylake_info), -+ INTEL_GLK_IDS(&intel_geminilake_info), - - INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info), - #endif -@@ -448,9 +488,9 @@ static void intel_identify(int flags) - if (unique != stack) - free(unique); - -- xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) HD Graphics: 2000-6000\n"); -- xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Graphics: 5100, 6100\n"); -- xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Pro Graphics: 5200, 6200, P6300\n"); -+ xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) HD Graphics\n"); -+ xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Graphics\n"); -+ xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Pro Graphics\n"); - } - - static Bool intel_driver_func(ScrnInfoPtr pScrn, -@@ -508,6 +548,9 @@ static enum accel_method { NOACCEL, SNA, UXA } get_accel_method(void) - if (hosted()) - return SNA; - -+ if (xf86configptr == NULL) /* X -configure */ -+ return SNA; -+ - dev = _xf86findDriver("intel", xf86configptr->conf_device_lst); - if (dev && dev->dev_option_lst) { - const char *s; -@@ -582,10 +625,17 @@ intel_scrn_create(DriverPtr driver, - case NOACCEL: - #endif - case UXA: -- return intel_init_scrn(scrn); -+ return intel_init_scrn(scrn); - #endif - -- default: break; -+ default: -+#if USE_SNA -+ return sna_init_scrn(scrn, entity_num); -+#elif USE_UXA -+ return intel_init_scrn(scrn); -+#else -+ break; -+#endif - } - #endif - -@@ -604,6 +654,8 @@ static Bool intel_pci_probe(DriverPtr driver, - struct pci_device *pci, - intptr_t match_data) - { -+ Bool ret; -+ - if (intel_open_device(entity_num, pci, NULL) == -1) { - #if UMS - switch (pci->device_id) { -@@ -621,7 +673,11 @@ static Bool intel_pci_probe(DriverPtr driver, - #endif - } - -- return intel_scrn_create(driver, entity_num, match_data, 0); -+ ret = intel_scrn_create(driver, entity_num, match_data, 0); -+ if (!ret) -+ intel_close_device(entity_num); -+ -+ return ret; - } - - #ifdef XSERVER_PLATFORM_BUS -@@ -644,9 +700,16 @@ intel_platform_probe(DriverPtr driver, - - /* if we get any flags we don't understand fail to probe for now */ - if (flags) -- return FALSE; -+ goto err; -+ -+ if (!intel_scrn_create(driver, entity_num, match_data, scrn_flags)) -+ goto err; - -- return intel_scrn_create(driver, entity_num, match_data, scrn_flags); -+ return TRUE; -+ -+err: -+ intel_close_device(entity_num); -+ return FALSE; - } - #endif - -diff --git a/src/intel_options.c b/src/intel_options.c -index ff8541a4..7f253ac1 100644 ---- a/src/intel_options.c -+++ b/src/intel_options.c -@@ -2,18 +2,24 @@ - #include "config.h" - #endif - -+#include -+#include -+#include -+ - #include "intel_options.h" - - const OptionInfoRec intel_options[] = { -- {OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0}, -+ {OPTION_ACCEL_ENABLE, "Accel", OPTV_BOOLEAN, {0}, 0}, - {OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0}, - {OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0}, -+ {OPTION_EDID, "CustomEDID", OPTV_STRING, {0}, 0}, - {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0}, - {OPTION_PRESENT, "Present", OPTV_BOOLEAN, {0}, 1}, - {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0}, - {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0}, - {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1}, - {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, 0}, -+ {OPTION_ROTATION, "HWRotation", OPTV_BOOLEAN, {0}, 1}, - {OPTION_VSYNC, "VSync", OPTV_BOOLEAN, {0}, 1}, - {OPTION_PAGEFLIP, "PageFlip", OPTV_BOOLEAN, {0}, 1}, - {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, 1}, -@@ -21,7 +27,6 @@ const OptionInfoRec intel_options[] = { - {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0}, - {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, 1}, - {OPTION_REPROBE, "ReprobeOutputs", OPTV_BOOLEAN, {0}, 0}, -- {OPTION_DELETE_DP12, "DeleteUnusedDP12Displays", OPTV_BOOLEAN, {0}, 0}, - #ifdef INTEL_XVMC - {OPTION_XVMC, "XvMC", OPTV_BOOLEAN, {0}, 1}, - #endif -@@ -54,3 +59,85 @@ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn) - - return options; - } -+ -+Bool intel_option_cast_to_bool(OptionInfoPtr options, int id, Bool val) -+{ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) -+ xf86getBoolValue(&val, xf86GetOptValString(options, id)); -+#endif -+ return val; -+} -+ -+static int -+namecmp(const char *s1, const char *s2) -+{ -+ char c1, c2; -+ -+ if (!s1 || *s1 == 0) { -+ if (!s2 || *s2 == 0) -+ return 0; -+ else -+ return 1; -+ } -+ -+ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') -+ s1++; -+ -+ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') -+ s2++; -+ -+ c1 = isupper(*s1) ? tolower(*s1) : *s1; -+ c2 = isupper(*s2) ? tolower(*s2) : *s2; -+ while (c1 == c2) { -+ if (c1 == '\0') -+ return 0; -+ -+ s1++; -+ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') -+ s1++; -+ -+ s2++; -+ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') -+ s2++; -+ -+ c1 = isupper(*s1) ? tolower(*s1) : *s1; -+ c2 = isupper(*s2) ? tolower(*s2) : *s2; -+ } -+ -+ return c1 - c2; -+} -+ -+unsigned intel_option_cast_to_unsigned(OptionInfoPtr options, int id, unsigned val) -+{ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) -+ const char *str = xf86GetOptValString(options, id); -+#else -+ const char *str = NULL; -+#endif -+ unsigned v; -+ -+ if (str == NULL || *str == '\0') -+ return val; -+ -+ if (namecmp(str, "on") == 0) -+ return val; -+ if (namecmp(str, "true") == 0) -+ return val; -+ if (namecmp(str, "yes") == 0) -+ return val; -+ -+ if (namecmp(str, "0") == 0) -+ return 0; -+ if (namecmp(str, "off") == 0) -+ return 0; -+ if (namecmp(str, "false") == 0) -+ return 0; -+ if (namecmp(str, "no") == 0) -+ return 0; -+ -+ v = atoi(str); -+ if (v) -+ return v; -+ -+ return val; -+} -diff --git a/src/intel_options.h b/src/intel_options.h -index 7e2cbd9b..43635f1f 100644 ---- a/src/intel_options.h -+++ b/src/intel_options.h -@@ -12,15 +12,17 @@ - */ - - enum intel_options { -- OPTION_ACCEL_DISABLE, -+ OPTION_ACCEL_ENABLE, - OPTION_ACCEL_METHOD, - OPTION_BACKLIGHT, -+ OPTION_EDID, - OPTION_DRI, - OPTION_PRESENT, - OPTION_VIDEO_KEY, - OPTION_COLOR_KEY, - OPTION_TILING_2D, - OPTION_TILING_FB, -+ OPTION_ROTATION, - OPTION_VSYNC, - OPTION_PAGEFLIP, - OPTION_SWAPBUFFERS_WAIT, -@@ -28,7 +30,6 @@ enum intel_options { - OPTION_PREFER_OVERLAY, - OPTION_HOTPLUG, - OPTION_REPROBE, -- OPTION_DELETE_DP12, - #if defined(XvMCExtension) && defined(ENABLE_XVMC) - OPTION_XVMC, - #define INTEL_XVMC 1 -@@ -51,5 +52,7 @@ enum intel_options { - - extern const OptionInfoRec intel_options[]; - OptionInfoPtr intel_options_get(ScrnInfoPtr scrn); -+unsigned intel_option_cast_to_unsigned(OptionInfoPtr, int id, unsigned val); -+Bool intel_option_cast_to_bool(OptionInfoPtr, int id, Bool val); - - #endif /* INTEL_OPTIONS_H */ -diff --git a/src/legacy/i810/i810_common.h b/src/legacy/i810/i810_common.h -index 4cc10e8b..8355708c 100644 ---- a/src/legacy/i810/i810_common.h -+++ b/src/legacy/i810/i810_common.h -@@ -52,7 +52,7 @@ - - #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) - --/* Using usleep() makes things noticably slow. */ -+/* Using usleep() makes things noticeably slow. */ - #if 0 - #define DELAY(x) usleep(x) - #else -@@ -185,7 +185,7 @@ enum { - * - zbuffer linear offset and pitch -- also invarient - * - drawing origin in back and depth buffers. - * -- * Keep the depth/back buffer state here to acommodate private buffers -+ * Keep the depth/back buffer state here to accommodate private buffers - * in the future. - */ - #define I810_DESTREG_DI0 0 /* CMD_OP_DESTBUFFER_INFO (2 dwords) */ -diff --git a/src/legacy/i810/i810_hwmc.c b/src/legacy/i810/i810_hwmc.c -index 7cb9c1ab..58661b0a 100644 ---- a/src/legacy/i810/i810_hwmc.c -+++ b/src/legacy/i810/i810_hwmc.c -@@ -171,7 +171,7 @@ static XF86MCAdaptorPtr ppAdapt[1] = - * - * I810InitMC - * -- * Initialize the hardware motion compenstation extention for this -+ * Initialize the hardware motion compensation extension for this - * hardware. The initialization routines want the address of the pointers - * to the structures, not the address of the structures. This means we - * allocate (or create static?) the pointer memory and pass that -diff --git a/src/legacy/i810/i810_memory.c b/src/legacy/i810/i810_memory.c -index c3de2777..6f274836 100644 ---- a/src/legacy/i810/i810_memory.c -+++ b/src/legacy/i810/i810_memory.c -@@ -76,7 +76,7 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn) - unsigned long size = pScrn->videoRam * 1024UL; - I810Ptr pI810 = I810PTR(pScrn); - int key; -- long tom = 0; -+ unsigned long tom = 0; - unsigned long physical; - - if (!xf86AgpGARTSupported() || !xf86AcquireGART(pScrn->scrnIndex)) { -@@ -132,8 +132,8 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn) - * Keep it 512K aligned for the sake of tiled regions. - */ - -- tom += 0x7ffff; -- tom &= ~0x7ffff; -+ tom += 0x7ffffUL; -+ tom &= ~0x7ffffUL; - - if ((key = xf86AllocateGARTMemory(pScrn->scrnIndex, size, 1, NULL)) != -1) { - pI810->DcacheOffset = tom; -diff --git a/src/legacy/i810/i810_reg.h b/src/legacy/i810/i810_reg.h -index 54faeb3d..fa091c5b 100644 ---- a/src/legacy/i810/i810_reg.h -+++ b/src/legacy/i810/i810_reg.h -@@ -245,7 +245,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * not sure they refer to local (graphics) memory. - * - * These details are for the local memory control registers, -- * (pp301-310). The test machines are not equiped with local memory, -+ * (pp301-310). The test machines are not equipped with local memory, - * so nothing is tested. Only a single row seems to be supported. - */ - #define DRAM_ROW_TYPE 0x3000 -diff --git a/src/legacy/i810/i810_video.c b/src/legacy/i810/i810_video.c -index be49b91d..af683c81 100644 ---- a/src/legacy/i810/i810_video.c -+++ b/src/legacy/i810/i810_video.c -@@ -77,7 +77,11 @@ static int I810PutImage( ScrnInfoPtr, - static int I810QueryImageAttributes(ScrnInfoPtr, - int, unsigned short *, unsigned short *, int *, int *); - -+#if !HAVE_NOTIFY_FD - static void I810BlockHandler(BLOCKHANDLER_ARGS_DECL); -+#else -+static void I810BlockHandler(void *data, void *_timeout); -+#endif - - #define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) - -@@ -418,8 +422,14 @@ I810SetupImageVideo(ScreenPtr screen) - - pI810->adaptor = adapt; - -+#if !HAVE_NOTIFY_FD - pI810->BlockHandler = screen->BlockHandler; - screen->BlockHandler = I810BlockHandler; -+#else -+ RegisterBlockAndWakeupHandlers(I810BlockHandler, -+ (ServerWakeupHandlerProcPtr)NoopDDA, -+ pScrn); -+#endif - - xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); - xvContrast = MAKE_ATOM("XV_CONTRAST"); -@@ -1135,6 +1145,7 @@ I810QueryImageAttributes( - return size; - } - -+#if !HAVE_NOTIFY_FD - static void - I810BlockHandler (BLOCKHANDLER_ARGS_DECL) - { -@@ -1172,6 +1183,38 @@ I810BlockHandler (BLOCKHANDLER_ARGS_DECL) - } - } - } -+#else -+static void -+I810BlockHandler(void *data, void *_timeout) -+{ -+ ScrnInfoPtr pScrn = data; -+ I810Ptr pI810 = I810PTR(pScrn); -+ I810PortPrivPtr pPriv = GET_PORT_PRIVATE(pScrn); -+ I810OverlayRegPtr overlay = (I810OverlayRegPtr) (pI810->FbBase + pI810->OverlayStart); -+ -+ if(pPriv->videoStatus & TIMER_MASK) { -+ UpdateCurrentTime(); -+ if(pPriv->videoStatus & OFF_TIMER) { -+ if(pPriv->offTime < currentTime.milliseconds) { -+ /* Turn off the overlay */ -+ overlay->OV0CMD &= 0xFFFFFFFE; -+ OVERLAY_UPDATE(pI810->OverlayPhysical); -+ -+ pPriv->videoStatus = FREE_TIMER; -+ pPriv->freeTime = currentTime.milliseconds + FREE_DELAY; -+ } -+ } else { /* FREE_TIMER */ -+ if(pPriv->freeTime < currentTime.milliseconds) { -+ if(pPriv->linear) { -+ xf86FreeOffscreenLinear(pPriv->linear); -+ pPriv->linear = NULL; -+ } -+ pPriv->videoStatus = 0; -+ } -+ } -+ } -+} -+#endif - - - /*************************************************************************** -@@ -1373,7 +1416,6 @@ I810DisplaySurface( - UpdateCurrentTime(); - pI810Priv->videoStatus = FREE_TIMER; - pI810Priv->freeTime = currentTime.milliseconds + FREE_DELAY; -- pScrn->pScreen->BlockHandler = I810BlockHandler; - } - - return Success; -diff --git a/src/legacy/i810/xvmc/I810XvMC.c b/src/legacy/i810/xvmc/I810XvMC.c -index e6b63d30..a538e999 100644 ---- a/src/legacy/i810/xvmc/I810XvMC.c -+++ b/src/legacy/i810/xvmc/I810XvMC.c -@@ -61,7 +61,7 @@ static int event_base; - // Arguments: pI810XvMC private data structure from the current context. - // Notes: We faked the drmMapBufs for the i810's security so now we have - // to insert an allocated page into the correct spot in the faked --// list to keep up appearences. -+// list to keep up appearances. - // Concept for this function was taken from Mesa sources. - // Returns: drmBufPtr containing the information about the allocated page. - ***************************************************************************/ -@@ -188,7 +188,7 @@ _X_EXPORT Status XvMCCreateContext(Display *display, XvPortID port, - - /* Check for drm */ - if(! drmAvailable()) { -- printf("Direct Rendering is not avilable on this system!\n"); -+ printf("Direct Rendering is not available on this system!\n"); - return BadAlloc; - } - -@@ -3279,7 +3279,7 @@ _X_EXPORT Status XvMCSyncSurface(Display *display,XvMCSurface *surface) { - // display - Connection to X server - // surface - Surface to flush - // Info: --// This command is a noop for i810 becuase we always dispatch buffers in -+// This command is a noop for i810 because we always dispatch buffers in - // render. There is little gain to be had with 4k buffers. - // Returns: Status - ***************************************************************************/ -diff --git a/src/render_program/exa_wm.g4i b/src/render_program/exa_wm.g4i -index 5d3d45b1..587b581c 100644 ---- a/src/render_program/exa_wm.g4i -+++ b/src/render_program/exa_wm.g4i -@@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F') - define(`mask_wo', `g6.12<0,1,0>F') - - /* -- * Local variables. Pairs must be aligned on even reg boundry -+ * Local variables. Pairs must be aligned on even reg boundary - */ - - /* this holds the X dest coordinates */ -diff --git a/src/render_program/exa_wm_yuv_rgb.g8a b/src/render_program/exa_wm_yuv_rgb.g8a -index 7def0930..34973ba8 100644 ---- a/src/render_program/exa_wm_yuv_rgb.g8a -+++ b/src/render_program/exa_wm_yuv_rgb.g8a -@@ -76,7 +76,7 @@ add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - /* - * R = Y + Cr * 1.596 - */ --mov (8) acc0<1>F Yn<8,8,1>F { compr align1 }; -+mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 }; - mac.sat(8) src_sample_r_01<1>F Crn_01<8,8,1>F 1.596F { compr align1 }; - - mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 }; -@@ -84,7 +84,7 @@ mac.sat(8) src_sample_r_23<1>F Crn_23<8,8,1>F 1.596F { compr align1 }; - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ --mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 }; -+mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 }; - mac (8) acc0<1>F Crn_01<8,8,1>F -0.813F { compr align1 }; - mac.sat(8) src_sample_g_01<1>F Cbn_01<8,8,1>F -0.392F { compr align1 }; - -diff --git a/src/render_program/exa_wm_yuv_rgb.g8b b/src/render_program/exa_wm_yuv_rgb.g8b -index 44949538..2cd6fc44 100644 ---- a/src/render_program/exa_wm_yuv_rgb.g8b -+++ b/src/render_program/exa_wm_yuv_rgb.g8b -@@ -6,7 +6,7 @@ - { 0x80600048, 0x21c03ae8, 0x3e8d02c0, 0x3fcc49ba }, - { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, - { 0x80600048, 0x21e03ae8, 0x3e8d02e0, 0x3fcc49ba }, -- { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, -+ { 0x00600001, 0x24003ae0, 0x008d0300, 0x00000000 }, - { 0x00600048, 0x24003ae0, 0x3e8d02c0, 0xbf5020c5 }, - { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 }, - { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, -diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am -index e09a8d49..adf13963 100644 ---- a/src/sna/Makefile.am -+++ b/src/sna/Makefile.am -@@ -107,6 +107,8 @@ libsna_la_SOURCES = \ - gen8_render.h \ - gen8_vertex.c \ - gen8_vertex.h \ -+ gen9_render.c \ -+ gen9_render.h \ - xassert.h \ - $(NULL) - -diff --git a/src/sna/blt.c b/src/sna/blt.c -index b5bfee69..cb90437a 100644 ---- a/src/sna/blt.c -+++ b/src/sna/blt.c -@@ -30,112 +30,608 @@ - #endif - - #include "sna.h" -+#include - --#if __x86_64__ --#define USE_SSE2 1 --#endif -- --#if USE_SSE2 -+#if defined(sse2) -+#pragma GCC push_options -+#pragma GCC target("sse2,inline-all-stringops,fpmath=sse") -+#pragma GCC optimize("Ofast") - #include - - #if __x86_64__ - #define have_sse2() 1 - #else --enum { -- MMX = 0x1, -- MMX_EXTENSIONS = 0x2, -- SSE = 0x6, -- SSE2 = 0x8, -- CMOV = 0x10 --}; -- --#ifdef __GNUC__ --static unsigned int --detect_cpu_features(void) --{ -- unsigned int features; -- unsigned int result = 0; -- -- char vendor[13]; -- vendor[0] = 0; -- vendor[12] = 0; -- -- asm ( -- "pushf\n" -- "pop %%eax\n" -- "mov %%eax, %%ecx\n" -- "xor $0x00200000, %%eax\n" -- "push %%eax\n" -- "popf\n" -- "pushf\n" -- "pop %%eax\n" -- "mov $0x0, %%edx\n" -- "xor %%ecx, %%eax\n" -- "jz 1f\n" -- -- "mov $0x00000000, %%eax\n" -- "push %%ebx\n" -- "cpuid\n" -- "mov %%ebx, %%eax\n" -- "pop %%ebx\n" -- "mov %%eax, %1\n" -- "mov %%edx, %2\n" -- "mov %%ecx, %3\n" -- "mov $0x00000001, %%eax\n" -- "push %%ebx\n" -- "cpuid\n" -- "pop %%ebx\n" -- "1:\n" -- "mov %%edx, %0\n" -- : "=r" (result), "=m" (vendor[0]), "=m" (vendor[4]), "=m" (vendor[8]) -- :: "%eax", "%ecx", "%edx"); -- -- features = 0; -- if (result) { -- /* result now contains the standard feature bits */ -- if (result & (1 << 15)) -- features |= CMOV; -- if (result & (1 << 23)) -- features |= MMX; -- if (result & (1 << 25)) -- features |= SSE; -- if (result & (1 << 26)) -- features |= SSE2; -- } -- return features; --} --#else --static unsigned int detect_cpu_features(void) { return 0; } --#endif -- - static bool have_sse2(void) - { - static int sse2_present = -1; - - if (sse2_present == -1) -- sse2_present = detect_cpu_features() & SSE2; -+ sse2_present = sna_cpu_detect() & SSE2; - - return sse2_present; - } - #endif - --static inline __m128i -+static force_inline __m128i - xmm_create_mask_32(uint32_t mask) - { - return _mm_set_epi32(mask, mask, mask, mask); - } - --static inline __m128i -+static force_inline __m128i -+xmm_load_128(const __m128i *src) -+{ -+ return _mm_load_si128(src); -+} -+ -+static force_inline __m128i - xmm_load_128u(const __m128i *src) - { - return _mm_loadu_si128(src); - } - --static inline void -+static force_inline void - xmm_save_128(__m128i *dst, __m128i data) - { - _mm_store_si128(dst, data); - } -+ -+static force_inline void -+xmm_save_128u(__m128i *dst, __m128i data) -+{ -+ _mm_storeu_si128(dst, data); -+} -+ -+static force_inline void -+to_sse128xN(uint8_t *dst, const uint8_t *src, int bytes) -+{ -+ int i; -+ -+ for (i = 0; i < bytes / 128; i++) { -+ __m128i xmm0, xmm1, xmm2, xmm3; -+ __m128i xmm4, xmm5, xmm6, xmm7; -+ -+ xmm0 = xmm_load_128u((const __m128i*)src + 0); -+ xmm1 = xmm_load_128u((const __m128i*)src + 1); -+ xmm2 = xmm_load_128u((const __m128i*)src + 2); -+ xmm3 = xmm_load_128u((const __m128i*)src + 3); -+ xmm4 = xmm_load_128u((const __m128i*)src + 4); -+ xmm5 = xmm_load_128u((const __m128i*)src + 5); -+ xmm6 = xmm_load_128u((const __m128i*)src + 6); -+ xmm7 = xmm_load_128u((const __m128i*)src + 7); -+ -+ xmm_save_128((__m128i*)dst + 0, xmm0); -+ xmm_save_128((__m128i*)dst + 1, xmm1); -+ xmm_save_128((__m128i*)dst + 2, xmm2); -+ xmm_save_128((__m128i*)dst + 3, xmm3); -+ xmm_save_128((__m128i*)dst + 4, xmm4); -+ xmm_save_128((__m128i*)dst + 5, xmm5); -+ xmm_save_128((__m128i*)dst + 6, xmm6); -+ xmm_save_128((__m128i*)dst + 7, xmm7); -+ -+ dst += 128; -+ src += 128; -+ } -+} -+ -+static force_inline void -+to_sse64(uint8_t *dst, const uint8_t *src) -+{ -+ __m128i xmm1, xmm2, xmm3, xmm4; -+ -+ xmm1 = xmm_load_128u((const __m128i*)src + 0); -+ xmm2 = xmm_load_128u((const __m128i*)src + 1); -+ xmm3 = xmm_load_128u((const __m128i*)src + 2); -+ xmm4 = xmm_load_128u((const __m128i*)src + 3); -+ -+ xmm_save_128((__m128i*)dst + 0, xmm1); -+ xmm_save_128((__m128i*)dst + 1, xmm2); -+ xmm_save_128((__m128i*)dst + 2, xmm3); -+ xmm_save_128((__m128i*)dst + 3, xmm4); -+} -+ -+static force_inline void -+to_sse32(uint8_t *dst, const uint8_t *src) -+{ -+ __m128i xmm1, xmm2; -+ -+ xmm1 = xmm_load_128u((const __m128i*)src + 0); -+ xmm2 = xmm_load_128u((const __m128i*)src + 1); -+ -+ xmm_save_128((__m128i*)dst + 0, xmm1); -+ xmm_save_128((__m128i*)dst + 1, xmm2); -+} -+ -+static force_inline void -+to_sse16(uint8_t *dst, const uint8_t *src) -+{ -+ xmm_save_128((__m128i*)dst, xmm_load_128u((const __m128i*)src)); -+} -+ -+static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len) -+{ -+ assert(len); -+ if ((uintptr_t)dst & 15) { -+ if (len <= 16 - ((uintptr_t)dst & 15)) { -+ memcpy(dst, src, len); -+ return; -+ } -+ -+ if ((uintptr_t)dst & 1) { -+ assert(len >= 1); -+ *dst++ = *src++; -+ len--; -+ } -+ if ((uintptr_t)dst & 2) { -+ assert(((uintptr_t)dst & 1) == 0); -+ assert(len >= 2); -+ *(uint16_t *)dst = *(const uint16_t *)src; -+ dst += 2; -+ src += 2; -+ len -= 2; -+ } -+ if ((uintptr_t)dst & 4) { -+ assert(((uintptr_t)dst & 3) == 0); -+ assert(len >= 4); -+ *(uint32_t *)dst = *(const uint32_t *)src; -+ dst += 4; -+ src += 4; -+ len -= 4; -+ } -+ if ((uintptr_t)dst & 8) { -+ assert(((uintptr_t)dst & 7) == 0); -+ assert(len >= 8); -+ *(uint64_t *)dst = *(const uint64_t *)src; -+ dst += 8; -+ src += 8; -+ len -= 8; -+ } -+ } -+ -+ assert(((uintptr_t)dst & 15) == 0); -+ while (len >= 64) { -+ to_sse64(dst, src); -+ dst += 64; -+ src += 64; -+ len -= 64; -+ } -+ if (len == 0) -+ return; -+ -+ if (len & 32) { -+ to_sse32(dst, src); -+ dst += 32; -+ src += 32; -+ } -+ if (len & 16) { -+ to_sse16(dst, src); -+ dst += 16; -+ src += 16; -+ } -+ if (len & 8) { -+ *(uint64_t *)dst = *(uint64_t *)src; -+ dst += 8; -+ src += 8; -+ } -+ if (len & 4) { -+ *(uint32_t *)dst = *(uint32_t *)src; -+ dst += 4; -+ src += 4; -+ } -+ memcpy(dst, src, len & 3); -+} -+ -+static void -+memcpy_to_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height) -+{ -+ const unsigned tile_width = 512; -+ const unsigned tile_height = 8; -+ const unsigned tile_size = 4096; -+ -+ const unsigned cpp = bpp / 8; -+ const unsigned tile_pixels = tile_width / cpp; -+ const unsigned tile_shift = ffs(tile_pixels) - 1; -+ const unsigned tile_mask = tile_pixels - 1; -+ -+ unsigned offset_x, length_x; -+ -+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", -+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ assert(src != dst); -+ -+ if (src_x | src_y) -+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; -+ width *= cpp; -+ assert(src_stride >= width); -+ -+ if (dst_x & tile_mask) { -+ offset_x = (dst_x & tile_mask) * cpp; -+ length_x = min(tile_width - offset_x, width); -+ } else -+ length_x = 0; -+ dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; -+ -+ while (height--) { -+ unsigned w = width; -+ const uint8_t *src_row = src; -+ uint8_t *tile_row = dst; -+ -+ src = (const uint8_t *)src + src_stride; -+ -+ tile_row += dst_y / tile_height * dst_stride * tile_height; -+ tile_row += (dst_y & (tile_height-1)) * tile_width; -+ dst_y++; -+ -+ if (length_x) { -+ to_memcpy(tile_row + offset_x, src_row, length_x); -+ -+ tile_row += tile_size; -+ src_row = (const uint8_t *)src_row + length_x; -+ w -= length_x; -+ } -+ while (w >= tile_width) { -+ assert(((uintptr_t)tile_row & (tile_width - 1)) == 0); -+ to_sse128xN(assume_aligned(tile_row, tile_width), -+ src_row, tile_width); -+ tile_row += tile_size; -+ src_row = (const uint8_t *)src_row + tile_width; -+ w -= tile_width; -+ } -+ if (w) { -+ assert(((uintptr_t)tile_row & (tile_width - 1)) == 0); -+ to_memcpy(assume_aligned(tile_row, tile_width), -+ src_row, w); -+ } -+ } -+} -+ -+static force_inline void -+from_sse128xNu(uint8_t *dst, const uint8_t *src, int bytes) -+{ -+ int i; -+ -+ assert(((uintptr_t)src & 15) == 0); -+ -+ for (i = 0; i < bytes / 128; i++) { -+ __m128i xmm0, xmm1, xmm2, xmm3; -+ __m128i xmm4, xmm5, xmm6, xmm7; -+ -+ xmm0 = xmm_load_128((const __m128i*)src + 0); -+ xmm1 = xmm_load_128((const __m128i*)src + 1); -+ xmm2 = xmm_load_128((const __m128i*)src + 2); -+ xmm3 = xmm_load_128((const __m128i*)src + 3); -+ xmm4 = xmm_load_128((const __m128i*)src + 4); -+ xmm5 = xmm_load_128((const __m128i*)src + 5); -+ xmm6 = xmm_load_128((const __m128i*)src + 6); -+ xmm7 = xmm_load_128((const __m128i*)src + 7); -+ -+ xmm_save_128u((__m128i*)dst + 0, xmm0); -+ xmm_save_128u((__m128i*)dst + 1, xmm1); -+ xmm_save_128u((__m128i*)dst + 2, xmm2); -+ xmm_save_128u((__m128i*)dst + 3, xmm3); -+ xmm_save_128u((__m128i*)dst + 4, xmm4); -+ xmm_save_128u((__m128i*)dst + 5, xmm5); -+ xmm_save_128u((__m128i*)dst + 6, xmm6); -+ xmm_save_128u((__m128i*)dst + 7, xmm7); -+ -+ dst += 128; -+ src += 128; -+ } -+} -+ -+static force_inline void -+from_sse128xNa(uint8_t *dst, const uint8_t *src, int bytes) -+{ -+ int i; -+ -+ assert(((uintptr_t)dst & 15) == 0); -+ assert(((uintptr_t)src & 15) == 0); -+ -+ for (i = 0; i < bytes / 128; i++) { -+ __m128i xmm0, xmm1, xmm2, xmm3; -+ __m128i xmm4, xmm5, xmm6, xmm7; -+ -+ xmm0 = xmm_load_128((const __m128i*)src + 0); -+ xmm1 = xmm_load_128((const __m128i*)src + 1); -+ xmm2 = xmm_load_128((const __m128i*)src + 2); -+ xmm3 = xmm_load_128((const __m128i*)src + 3); -+ xmm4 = xmm_load_128((const __m128i*)src + 4); -+ xmm5 = xmm_load_128((const __m128i*)src + 5); -+ xmm6 = xmm_load_128((const __m128i*)src + 6); -+ xmm7 = xmm_load_128((const __m128i*)src + 7); -+ -+ xmm_save_128((__m128i*)dst + 0, xmm0); -+ xmm_save_128((__m128i*)dst + 1, xmm1); -+ xmm_save_128((__m128i*)dst + 2, xmm2); -+ xmm_save_128((__m128i*)dst + 3, xmm3); -+ xmm_save_128((__m128i*)dst + 4, xmm4); -+ xmm_save_128((__m128i*)dst + 5, xmm5); -+ xmm_save_128((__m128i*)dst + 6, xmm6); -+ xmm_save_128((__m128i*)dst + 7, xmm7); -+ -+ dst += 128; -+ src += 128; -+ } -+} -+ -+static force_inline void -+from_sse64u(uint8_t *dst, const uint8_t *src) -+{ -+ __m128i xmm1, xmm2, xmm3, xmm4; -+ -+ assert(((uintptr_t)src & 15) == 0); -+ -+ xmm1 = xmm_load_128((const __m128i*)src + 0); -+ xmm2 = xmm_load_128((const __m128i*)src + 1); -+ xmm3 = xmm_load_128((const __m128i*)src + 2); -+ xmm4 = xmm_load_128((const __m128i*)src + 3); -+ -+ xmm_save_128u((__m128i*)dst + 0, xmm1); -+ xmm_save_128u((__m128i*)dst + 1, xmm2); -+ xmm_save_128u((__m128i*)dst + 2, xmm3); -+ xmm_save_128u((__m128i*)dst + 3, xmm4); -+} -+ -+static force_inline void -+from_sse64a(uint8_t *dst, const uint8_t *src) -+{ -+ __m128i xmm1, xmm2, xmm3, xmm4; -+ -+ assert(((uintptr_t)dst & 15) == 0); -+ assert(((uintptr_t)src & 15) == 0); -+ -+ xmm1 = xmm_load_128((const __m128i*)src + 0); -+ xmm2 = xmm_load_128((const __m128i*)src + 1); -+ xmm3 = xmm_load_128((const __m128i*)src + 2); -+ xmm4 = xmm_load_128((const __m128i*)src + 3); -+ -+ xmm_save_128((__m128i*)dst + 0, xmm1); -+ xmm_save_128((__m128i*)dst + 1, xmm2); -+ xmm_save_128((__m128i*)dst + 2, xmm3); -+ xmm_save_128((__m128i*)dst + 3, xmm4); -+} -+ -+static force_inline void -+from_sse32u(uint8_t *dst, const uint8_t *src) -+{ -+ __m128i xmm1, xmm2; -+ -+ xmm1 = xmm_load_128((const __m128i*)src + 0); -+ xmm2 = xmm_load_128((const __m128i*)src + 1); -+ -+ xmm_save_128u((__m128i*)dst + 0, xmm1); -+ xmm_save_128u((__m128i*)dst + 1, xmm2); -+} -+ -+static force_inline void -+from_sse32a(uint8_t *dst, const uint8_t *src) -+{ -+ __m128i xmm1, xmm2; -+ -+ assert(((uintptr_t)dst & 15) == 0); -+ assert(((uintptr_t)src & 15) == 0); -+ -+ xmm1 = xmm_load_128((const __m128i*)src + 0); -+ xmm2 = xmm_load_128((const __m128i*)src + 1); -+ -+ xmm_save_128((__m128i*)dst + 0, xmm1); -+ xmm_save_128((__m128i*)dst + 1, xmm2); -+} -+ -+static force_inline void -+from_sse16u(uint8_t *dst, const uint8_t *src) -+{ -+ assert(((uintptr_t)src & 15) == 0); -+ -+ xmm_save_128u((__m128i*)dst, xmm_load_128((const __m128i*)src)); -+} -+ -+static force_inline void -+from_sse16a(uint8_t *dst, const uint8_t *src) -+{ -+ assert(((uintptr_t)dst & 15) == 0); -+ assert(((uintptr_t)src & 15) == 0); -+ -+ xmm_save_128((__m128i*)dst, xmm_load_128((const __m128i*)src)); -+} -+ -+static void -+memcpy_from_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height) -+{ -+ const unsigned tile_width = 512; -+ const unsigned tile_height = 8; -+ const unsigned tile_size = 4096; -+ -+ const unsigned cpp = bpp / 8; -+ const unsigned tile_pixels = tile_width / cpp; -+ const unsigned tile_shift = ffs(tile_pixels) - 1; -+ const unsigned tile_mask = tile_pixels - 1; -+ -+ unsigned length_x, offset_x; -+ -+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", -+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ assert(src != dst); -+ -+ if (dst_x | dst_y) -+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; -+ width *= cpp; -+ assert(dst_stride >= width); -+ if (src_x & tile_mask) { -+ offset_x = (src_x & tile_mask) * cpp; -+ length_x = min(tile_width - offset_x, width); -+ dst_stride -= width; -+ dst_stride += (width - length_x) & 15; -+ } else { -+ offset_x = 0; -+ dst_stride -= width & ~15; -+ } -+ assert(dst_stride >= 0); -+ src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; -+ -+ while (height--) { -+ unsigned w = width; -+ const uint8_t *tile_row = src; -+ -+ tile_row += src_y / tile_height * src_stride * tile_height; -+ tile_row += (src_y & (tile_height-1)) * tile_width; -+ src_y++; -+ -+ if (offset_x) { -+ memcpy(dst, tile_row + offset_x, length_x); -+ tile_row += tile_size; -+ dst = (uint8_t *)dst + length_x; -+ w -= length_x; -+ } -+ -+ if ((uintptr_t)dst & 15) { -+ while (w >= tile_width) { -+ from_sse128xNu(dst, -+ assume_aligned(tile_row, tile_width), -+ tile_width); -+ tile_row += tile_size; -+ dst = (uint8_t *)dst + tile_width; -+ w -= tile_width; -+ } -+ while (w >= 64) { -+ from_sse64u(dst, tile_row); -+ tile_row += 64; -+ dst = (uint8_t *)dst + 64; -+ w -= 64; -+ } -+ if (w & 32) { -+ from_sse32u(dst, tile_row); -+ tile_row += 32; -+ dst = (uint8_t *)dst + 32; -+ } -+ if (w & 16) { -+ from_sse16u(dst, tile_row); -+ tile_row += 16; -+ dst = (uint8_t *)dst + 16; -+ } -+ memcpy(dst, assume_aligned(tile_row, 16), w & 15); -+ } else { -+ while (w >= tile_width) { -+ from_sse128xNa(assume_aligned(dst, 16), -+ assume_aligned(tile_row, tile_width), -+ tile_width); -+ tile_row += tile_size; -+ dst = (uint8_t *)dst + tile_width; -+ w -= tile_width; -+ } -+ while (w >= 64) { -+ from_sse64a(dst, tile_row); -+ tile_row += 64; -+ dst = (uint8_t *)dst + 64; -+ w -= 64; -+ } -+ if (w & 32) { -+ from_sse32a(dst, tile_row); -+ tile_row += 32; -+ dst = (uint8_t *)dst + 32; -+ } -+ if (w & 16) { -+ from_sse16a(dst, tile_row); -+ tile_row += 16; -+ dst = (uint8_t *)dst + 16; -+ } -+ memcpy(assume_aligned(dst, 16), -+ assume_aligned(tile_row, 16), -+ w & 15); -+ } -+ dst = (uint8_t *)dst + dst_stride; -+ } -+} -+ -+static void -+memcpy_between_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height) -+{ -+ const unsigned tile_width = 512; -+ const unsigned tile_height = 8; -+ const unsigned tile_size = 4096; -+ -+ const unsigned cpp = bpp / 8; -+ const unsigned tile_pixels = tile_width / cpp; -+ const unsigned tile_shift = ffs(tile_pixels) - 1; -+ const unsigned tile_mask = tile_pixels - 1; -+ -+ unsigned ox, lx; -+ -+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", -+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ assert(src != dst); -+ -+ width *= cpp; -+ dst_stride *= tile_height; -+ src_stride *= tile_height; -+ -+ assert((dst_x & tile_mask) == (src_x & tile_mask)); -+ if (dst_x & tile_mask) { -+ ox = (dst_x & tile_mask) * cpp; -+ lx = min(tile_width - ox, width); -+ assert(lx != 0); -+ } else -+ lx = 0; -+ -+ if (dst_x) -+ dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; -+ if (src_x) -+ src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; -+ -+ while (height--) { -+ const uint8_t *src_row; -+ uint8_t *dst_row; -+ unsigned w = width; -+ -+ dst_row = dst; -+ dst_row += dst_y / tile_height * dst_stride; -+ dst_row += (dst_y & (tile_height-1)) * tile_width; -+ dst_y++; -+ -+ src_row = src; -+ src_row += src_y / tile_height * src_stride; -+ src_row += (src_y & (tile_height-1)) * tile_width; -+ src_y++; -+ -+ if (lx) { -+ to_memcpy(dst_row + ox, src_row + ox, lx); -+ dst_row += tile_size; -+ src_row += tile_size; -+ w -= lx; -+ } -+ while (w >= tile_width) { -+ assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); -+ assert(((uintptr_t)src_row & (tile_width - 1)) == 0); -+ to_sse128xN(assume_aligned(dst_row, tile_width), -+ assume_aligned(src_row, tile_width), -+ tile_width); -+ dst_row += tile_size; -+ src_row += tile_size; -+ w -= tile_width; -+ } -+ if (w) { -+ assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); -+ assert(((uintptr_t)src_row & (tile_width - 1)) == 0); -+ to_memcpy(assume_aligned(dst_row, tile_width), -+ assume_aligned(src_row, tile_width), -+ w); -+ } -+ } -+} -+ -+#pragma GCC push_options - #endif - - fast void -@@ -257,7 +753,8 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, - if (dst_x & tile_mask) { - const unsigned x = (dst_x & tile_mask) * cpp; - const unsigned len = min(tile_width - x, w); -- memcpy(tile_row + x, src, len); -+ memcpy(assume_misaligned(tile_row + x, tile_width, x), -+ src, len); - - tile_row += tile_size; - src = (const uint8_t *)src + len; -@@ -265,13 +762,13 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, - } - } - while (w >= tile_width) { -- memcpy(tile_row, src, tile_width); -- -+ memcpy(assume_aligned(tile_row, tile_width), -+ src, tile_width); - tile_row += tile_size; - src = (const uint8_t *)src + tile_width; - w -= tile_width; - } -- memcpy(tile_row, src, w); -+ memcpy(assume_aligned(tile_row, tile_width), src, w); - src = (const uint8_t *)src + src_stride + w; - dst_y++; - } -@@ -313,7 +810,7 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, - if (src_x & tile_mask) { - const unsigned x = (src_x & tile_mask) * cpp; - const unsigned len = min(tile_width - x, w); -- memcpy(dst, tile_row + x, len); -+ memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len); - - tile_row += tile_size; - dst = (uint8_t *)dst + len; -@@ -321,440 +818,371 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, - } - } - while (w >= tile_width) { -- memcpy(dst, tile_row, tile_width); -+ memcpy(dst, -+ assume_aligned(tile_row, tile_width), -+ tile_width); - - tile_row += tile_size; - dst = (uint8_t *)dst + tile_width; - w -= tile_width; - } -- memcpy(dst, tile_row, w); -+ memcpy(dst, assume_aligned(tile_row, tile_width), w); - dst = (uint8_t *)dst + dst_stride + w; - src_y++; - } - } - --fast_memcpy static void --memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height) -+static fast_memcpy void -+memcpy_between_tiled_x__swizzle_0(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height) - { - const unsigned tile_width = 512; - const unsigned tile_height = 8; - const unsigned tile_size = 4096; - - const unsigned cpp = bpp / 8; -- const unsigned stride_tiles = dst_stride / tile_width; -- const unsigned swizzle_pixels = 64 / cpp; -- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; -- const unsigned tile_mask = (1 << tile_pixels) - 1; -- -- unsigned x, y; -+ const unsigned tile_pixels = tile_width / cpp; -+ const unsigned tile_shift = ffs(tile_pixels) - 1; -+ const unsigned tile_mask = tile_pixels - 1; - - DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ assert(src != dst); -+ assert((dst_x & tile_mask) == (src_x & tile_mask)); - -- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; -- -- for (y = 0; y < height; ++y) { -- const uint32_t dy = y + dst_y; -- const uint32_t tile_row = -- (dy / tile_height * stride_tiles * tile_size + -- (dy & (tile_height-1)) * tile_width); -- const uint8_t *src_row = (const uint8_t *)src + src_stride * y; -- uint32_t dx = dst_x, offset; -- -- x = width * cpp; -- if (dx & (swizzle_pixels - 1)) { -- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); -- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= (offset >> 3) & 64; -- -- memcpy((char *)dst + offset, src_row, length * cpp); -- -- src_row += length * cpp; -- x -= length * cpp; -- dx += length; -- } -- while (x >= 64) { -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= (offset >> 3) & 64; -- -- memcpy((char *)dst + offset, src_row, 64); -- -- src_row += 64; -- x -= 64; -- dx += swizzle_pixels; -- } -- if (x) { -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= (offset >> 3) & 64; -- memcpy((char *)dst + offset, src_row, x); -- } -- } --} -+ while (height--) { -+ unsigned w = width * cpp; -+ uint8_t *dst_row = dst; -+ const uint8_t *src_row = src; - --fast_memcpy static void --memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height) --{ -- const unsigned tile_width = 512; -- const unsigned tile_height = 8; -- const unsigned tile_size = 4096; -+ dst_row += dst_y / tile_height * dst_stride * tile_height; -+ dst_row += (dst_y & (tile_height-1)) * tile_width; -+ if (dst_x) -+ dst_row += (dst_x >> tile_shift) * tile_size; -+ dst_y++; - -- const unsigned cpp = bpp / 8; -- const unsigned stride_tiles = src_stride / tile_width; -- const unsigned swizzle_pixels = 64 / cpp; -- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; -- const unsigned tile_mask = (1 << tile_pixels) - 1; -+ src_row += src_y / tile_height * src_stride * tile_height; -+ src_row += (src_y & (tile_height-1)) * tile_width; -+ if (src_x) -+ src_row += (src_x >> tile_shift) * tile_size; -+ src_y++; - -- unsigned x, y; -+ if (dst_x & tile_mask) { -+ const unsigned x = (dst_x & tile_mask) * cpp; -+ const unsigned len = min(tile_width - x, w); - -- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", -- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ memcpy(assume_misaligned(dst_row + x, tile_width, x), -+ assume_misaligned(src_row + x, tile_width, x), -+ len); - -- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; -- -- for (y = 0; y < height; ++y) { -- const uint32_t sy = y + src_y; -- const uint32_t tile_row = -- (sy / tile_height * stride_tiles * tile_size + -- (sy & (tile_height-1)) * tile_width); -- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; -- uint32_t sx = src_x, offset; -- -- x = width * cpp; -- if (sx & (swizzle_pixels - 1)) { -- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); -- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= (offset >> 3) & 64; -- -- memcpy(dst_row, (const char *)src + offset, length * cpp); -- -- dst_row += length * cpp; -- x -= length * cpp; -- sx += length; -+ dst_row += tile_size; -+ src_row += tile_size; -+ w -= len; - } -- while (x >= 64) { -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= (offset >> 3) & 64; - -- memcpy(dst_row, (const char *)src + offset, 64); -- -- dst_row += 64; -- x -= 64; -- sx += swizzle_pixels; -- } -- if (x) { -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= (offset >> 3) & 64; -- memcpy(dst_row, (const char *)src + offset, x); -+ while (w >= tile_width) { -+ memcpy(assume_aligned(dst_row, tile_width), -+ assume_aligned(src_row, tile_width), -+ tile_width); -+ dst_row += tile_size; -+ src_row += tile_size; -+ w -= tile_width; - } -+ memcpy(assume_aligned(dst_row, tile_width), -+ assume_aligned(src_row, tile_width), -+ w); - } - } - --fast_memcpy static void --memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height) --{ -- const unsigned tile_width = 512; -- const unsigned tile_height = 8; -- const unsigned tile_size = 4096; -- -- const unsigned cpp = bpp / 8; -- const unsigned stride_tiles = dst_stride / tile_width; -- const unsigned swizzle_pixels = 64 / cpp; -- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; -- const unsigned tile_mask = (1 << tile_pixels) - 1; -+#define memcpy_to_tiled_x(swizzle) \ -+fast_memcpy static void \ -+memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \ -+ int32_t src_stride, int32_t dst_stride, \ -+ int16_t src_x, int16_t src_y, \ -+ int16_t dst_x, int16_t dst_y, \ -+ uint16_t width, uint16_t height) \ -+{ \ -+ const unsigned tile_width = 512; \ -+ const unsigned tile_height = 8; \ -+ const unsigned tile_size = 4096; \ -+ const unsigned cpp = bpp / 8; \ -+ const unsigned stride_tiles = dst_stride / tile_width; \ -+ const unsigned swizzle_pixels = 64 / cpp; \ -+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \ -+ const unsigned tile_mask = (1 << tile_pixels) - 1; \ -+ unsigned x, y; \ -+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \ -+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \ -+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; \ -+ for (y = 0; y < height; ++y) { \ -+ const uint32_t dy = y + dst_y; \ -+ const uint32_t tile_row = \ -+ (dy / tile_height * stride_tiles * tile_size + \ -+ (dy & (tile_height-1)) * tile_width); \ -+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y; \ -+ uint32_t dx = dst_x; \ -+ x = width * cpp; \ -+ if (dx & (swizzle_pixels - 1)) { \ -+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); \ -+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; \ -+ uint32_t offset = \ -+ tile_row + \ -+ (dx >> tile_pixels) * tile_size + \ -+ (dx & tile_mask) * cpp; \ -+ memcpy((char *)dst + swizzle(offset), src_row, length * cpp); \ -+ src_row += length * cpp; \ -+ x -= length * cpp; \ -+ dx += length; \ -+ } \ -+ while (x >= 64) { \ -+ uint32_t offset = \ -+ tile_row + \ -+ (dx >> tile_pixels) * tile_size + \ -+ (dx & tile_mask) * cpp; \ -+ memcpy(assume_aligned((char *)dst+swizzle(offset),64), \ -+ src_row, 64); \ -+ src_row += 64; \ -+ x -= 64; \ -+ dx += swizzle_pixels; \ -+ } \ -+ if (x) { \ -+ uint32_t offset = \ -+ tile_row + \ -+ (dx >> tile_pixels) * tile_size + \ -+ (dx & tile_mask) * cpp; \ -+ memcpy(assume_aligned((char *)dst + swizzle(offset), 64), src_row, x); \ -+ } \ -+ } \ -+} - -- unsigned x, y; -+#define memcpy_from_tiled_x(swizzle) \ -+fast_memcpy static void \ -+memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \ -+ int32_t src_stride, int32_t dst_stride, \ -+ int16_t src_x, int16_t src_y, \ -+ int16_t dst_x, int16_t dst_y, \ -+ uint16_t width, uint16_t height) \ -+{ \ -+ const unsigned tile_width = 512; \ -+ const unsigned tile_height = 8; \ -+ const unsigned tile_size = 4096; \ -+ const unsigned cpp = bpp / 8; \ -+ const unsigned stride_tiles = src_stride / tile_width; \ -+ const unsigned swizzle_pixels = 64 / cpp; \ -+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \ -+ const unsigned tile_mask = (1 << tile_pixels) - 1; \ -+ unsigned x, y; \ -+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \ -+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \ -+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; \ -+ for (y = 0; y < height; ++y) { \ -+ const uint32_t sy = y + src_y; \ -+ const uint32_t tile_row = \ -+ (sy / tile_height * stride_tiles * tile_size + \ -+ (sy & (tile_height-1)) * tile_width); \ -+ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; \ -+ uint32_t sx = src_x; \ -+ x = width * cpp; \ -+ if (sx & (swizzle_pixels - 1)) { \ -+ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); \ -+ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; \ -+ uint32_t offset = \ -+ tile_row + \ -+ (sx >> tile_pixels) * tile_size + \ -+ (sx & tile_mask) * cpp; \ -+ memcpy(dst_row, (const char *)src + swizzle(offset), length * cpp); \ -+ dst_row += length * cpp; \ -+ x -= length * cpp; \ -+ sx += length; \ -+ } \ -+ while (x >= 64) { \ -+ uint32_t offset = \ -+ tile_row + \ -+ (sx >> tile_pixels) * tile_size + \ -+ (sx & tile_mask) * cpp; \ -+ memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), 64); \ -+ dst_row += 64; \ -+ x -= 64; \ -+ sx += swizzle_pixels; \ -+ } \ -+ if (x) { \ -+ uint32_t offset = \ -+ tile_row + \ -+ (sx >> tile_pixels) * tile_size + \ -+ (sx & tile_mask) * cpp; \ -+ memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), x); \ -+ } \ -+ } \ -+} - -- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", -- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+#define swizzle_9(X) ((X) ^ (((X) >> 3) & 64)) -+memcpy_to_tiled_x(swizzle_9) -+memcpy_from_tiled_x(swizzle_9) -+#undef swizzle_9 - -- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; -- -- for (y = 0; y < height; ++y) { -- const uint32_t dy = y + dst_y; -- const uint32_t tile_row = -- (dy / tile_height * stride_tiles * tile_size + -- (dy & (tile_height-1)) * tile_width); -- const uint8_t *src_row = (const uint8_t *)src + src_stride * y; -- uint32_t dx = dst_x, offset; -- -- x = width * cpp; -- if (dx & (swizzle_pixels - 1)) { -- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); -- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; -- -- memcpy((char *)dst + offset, src_row, length * cpp); -- -- src_row += length * cpp; -- x -= length * cpp; -- dx += length; -- } -- while (x >= 64) { -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; -+#define swizzle_9_10(X) ((X) ^ ((((X) ^ ((X) >> 1)) >> 3) & 64)) -+memcpy_to_tiled_x(swizzle_9_10) -+memcpy_from_tiled_x(swizzle_9_10) -+#undef swizzle_9_10 - -- memcpy((char *)dst + offset, src_row, 64); -+#define swizzle_9_11(X) ((X) ^ ((((X) ^ ((X) >> 2)) >> 3) & 64)) -+memcpy_to_tiled_x(swizzle_9_11) -+memcpy_from_tiled_x(swizzle_9_11) -+#undef swizzle_9_11 - -- src_row += 64; -- x -= 64; -- dx += swizzle_pixels; -- } -- if (x) { -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; -- memcpy((char *)dst + offset, src_row, x); -- } -- } --} -+#define swizzle_9_10_11(X) ((X) ^ ((((X) ^ ((X) >> 1) ^ ((X) >> 2)) >> 3) & 64)) -+memcpy_to_tiled_x(swizzle_9_10_11) -+memcpy_from_tiled_x(swizzle_9_10_11) -+#undef swizzle_9_10_11 - --fast_memcpy static void --memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height) -+static fast_memcpy void -+memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height) - { -- const unsigned tile_width = 512; -- const unsigned tile_height = 8; -- const unsigned tile_size = 4096; -+ const unsigned tile_width = 128; -+ const unsigned tile_height = 16; -+ const unsigned tile_size = 2048; - - const unsigned cpp = bpp / 8; -- const unsigned stride_tiles = src_stride / tile_width; -- const unsigned swizzle_pixels = 64 / cpp; -- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; -- const unsigned tile_mask = (1 << tile_pixels) - 1; -- -- unsigned x, y; -+ const unsigned tile_pixels = tile_width / cpp; -+ const unsigned tile_shift = ffs(tile_pixels) - 1; -+ const unsigned tile_mask = tile_pixels - 1; - - DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ assert(src != dst); - -- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; -- -- for (y = 0; y < height; ++y) { -- const uint32_t sy = y + src_y; -- const uint32_t tile_row = -- (sy / tile_height * stride_tiles * tile_size + -- (sy & (tile_height-1)) * tile_width); -- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; -- uint32_t sx = src_x, offset; -- -- x = width * cpp; -- if (sx & (swizzle_pixels - 1)) { -- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); -- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; -- -- memcpy(dst_row, (const char *)src + offset, length * cpp); -- -- dst_row += length * cpp; -- x -= length * cpp; -- sx += length; -- } -- while (x >= 64) { -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; -- -- memcpy(dst_row, (const char *)src + offset, 64); -- -- dst_row += 64; -- x -= 64; -- sx += swizzle_pixels; -- } -- if (x) { -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; -- memcpy(dst_row, (const char *)src + offset, x); -- } -- } --} -- --fast_memcpy static void --memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height) --{ -- const unsigned tile_width = 512; -- const unsigned tile_height = 8; -- const unsigned tile_size = 4096; -- -- const unsigned cpp = bpp / 8; -- const unsigned stride_tiles = dst_stride / tile_width; -- const unsigned swizzle_pixels = 64 / cpp; -- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; -- const unsigned tile_mask = (1 << tile_pixels) - 1; -+ if (src_x | src_y) -+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; -+ assert(src_stride >= width * cpp); -+ src_stride -= width * cpp; - -- unsigned x, y; -+ while (height--) { -+ unsigned w = width * cpp; -+ uint8_t *tile_row = dst; - -- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", -- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ tile_row += dst_y / tile_height * dst_stride * tile_height; -+ tile_row += (dst_y & (tile_height-1)) * tile_width; -+ if (dst_x) { -+ tile_row += (dst_x >> tile_shift) * tile_size; -+ if (dst_x & tile_mask) { -+ const unsigned x = (dst_x & tile_mask) * cpp; -+ const unsigned len = min(tile_width - x, w); -+ memcpy(assume_misaligned(tile_row + x, tile_width, x), src, len); - -- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; -- -- for (y = 0; y < height; ++y) { -- const uint32_t dy = y + dst_y; -- const uint32_t tile_row = -- (dy / tile_height * stride_tiles * tile_size + -- (dy & (tile_height-1)) * tile_width); -- const uint8_t *src_row = (const uint8_t *)src + src_stride * y; -- uint32_t dx = dst_x, offset; -- -- x = width * cpp; -- if (dx & (swizzle_pixels - 1)) { -- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); -- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; -- memcpy((char *)dst + offset, src_row, length * cpp); -- -- src_row += length * cpp; -- x -= length * cpp; -- dx += length; -+ tile_row += tile_size; -+ src = (const uint8_t *)src + len; -+ w -= len; -+ } - } -- while (x >= 64) { -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; -- -- memcpy((char *)dst + offset, src_row, 64); -+ while (w >= tile_width) { -+ memcpy(assume_aligned(tile_row, tile_width), -+ src, tile_width); - -- src_row += 64; -- x -= 64; -- dx += swizzle_pixels; -- } -- if (x) { -- offset = tile_row + -- (dx >> tile_pixels) * tile_size + -- (dx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; -- memcpy((char *)dst + offset, src_row, x); -+ tile_row += tile_size; -+ src = (const uint8_t *)src + tile_width; -+ w -= tile_width; - } -+ memcpy(assume_aligned(tile_row, tile_width), src, w); -+ src = (const uint8_t *)src + src_stride + w; -+ dst_y++; - } - } - --fast_memcpy static void --memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height) -+static fast_memcpy void -+memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height) - { -- const unsigned tile_width = 512; -- const unsigned tile_height = 8; -- const unsigned tile_size = 4096; -+ const unsigned tile_width = 128; -+ const unsigned tile_height = 16; -+ const unsigned tile_size = 2048; - - const unsigned cpp = bpp / 8; -- const unsigned stride_tiles = src_stride / tile_width; -- const unsigned swizzle_pixels = 64 / cpp; -- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; -- const unsigned tile_mask = (1 << tile_pixels) - 1; -- -- unsigned x, y; -+ const unsigned tile_pixels = tile_width / cpp; -+ const unsigned tile_shift = ffs(tile_pixels) - 1; -+ const unsigned tile_mask = tile_pixels - 1; - - DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); -+ assert(src != dst); - -- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; -- -- for (y = 0; y < height; ++y) { -- const uint32_t sy = y + src_y; -- const uint32_t tile_row = -- (sy / tile_height * stride_tiles * tile_size + -- (sy & (tile_height-1)) * tile_width); -- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; -- uint32_t sx = src_x, offset; -- -- x = width * cpp; -- if (sx & (swizzle_pixels - 1)) { -- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); -- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; -- memcpy(dst_row, (const char *)src + offset, length * cpp); -- -- dst_row += length * cpp; -- x -= length * cpp; -- sx += length; -- } -- while (x >= 64) { -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; -+ if (dst_x | dst_y) -+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; -+ assert(dst_stride >= width * cpp); -+ dst_stride -= width * cpp; -+ -+ while (height--) { -+ unsigned w = width * cpp; -+ const uint8_t *tile_row = src; - -- memcpy(dst_row, (const char *)src + offset, 64); -+ tile_row += src_y / tile_height * src_stride * tile_height; -+ tile_row += (src_y & (tile_height-1)) * tile_width; -+ if (src_x) { -+ tile_row += (src_x >> tile_shift) * tile_size; -+ if (src_x & tile_mask) { -+ const unsigned x = (src_x & tile_mask) * cpp; -+ const unsigned len = min(tile_width - x, w); -+ memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len); - -- dst_row += 64; -- x -= 64; -- sx += swizzle_pixels; -+ tile_row += tile_size; -+ dst = (uint8_t *)dst + len; -+ w -= len; -+ } - } -- if (x) { -- offset = tile_row + -- (sx >> tile_pixels) * tile_size + -- (sx & tile_mask) * cpp; -- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; -- memcpy(dst_row, (const char *)src + offset, x); -+ while (w >= tile_width) { -+ memcpy(dst, -+ assume_aligned(tile_row, tile_width), -+ tile_width); -+ -+ tile_row += tile_size; -+ dst = (uint8_t *)dst + tile_width; -+ w -= tile_width; - } -+ memcpy(dst, assume_aligned(tile_row, tile_width), w); -+ dst = (uint8_t *)dst + dst_stride + w; -+ src_y++; - } - } - --void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) -+void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu) - { -+ if (kgem->gen < 030) { -+ if (swizzling == I915_BIT_6_SWIZZLE_NONE) { -+ DBG(("%s: gen2, no swizzling\n", __FUNCTION__)); -+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__gen2; -+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__gen2; -+ } else -+ DBG(("%s: no detiling with swizzle functions for gen2\n", __FUNCTION__)); -+ return; -+ } -+ - switch (swizzling) { - default: - DBG(("%s: unknown swizzling, %d\n", __FUNCTION__, swizzling)); - break; - case I915_BIT_6_SWIZZLE_NONE: - DBG(("%s: no swizzling\n", __FUNCTION__)); -- kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; -- kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; -+#if defined(sse2) -+ if (cpu & SSE2) { -+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0__sse2; -+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0__sse2; -+ kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0__sse2; -+ } else -+#endif -+ { -+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; -+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; -+ kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0; -+ } - break; - case I915_BIT_6_SWIZZLE_9: - DBG(("%s: 6^9 swizzling\n", __FUNCTION__)); -@@ -771,6 +1199,11 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) - kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11; - kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11; - break; -+ case I915_BIT_6_SWIZZLE_9_10_11: -+ DBG(("%s: 6^9^10^11 swizzling\n", __FUNCTION__)); -+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10_11; -+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10_11; -+ break; - } - } - -@@ -995,7 +1428,7 @@ memcpy_xor(const void *src, void *dst, int bpp, - height = 1; - } - --#if USE_SSE2 -+#if defined(sse2) && __x86_64__ - if (have_sse2()) { - do { - uint32_t *d = (uint32_t *)dst_bytes; -@@ -1118,3 +1551,241 @@ memcpy_xor(const void *src, void *dst, int bpp, - } - } - } -+ -+#define BILINEAR_INTERPOLATION_BITS 4 -+static inline int -+bilinear_weight(pixman_fixed_t x) -+{ -+ return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & -+ ((1 << BILINEAR_INTERPOLATION_BITS) - 1); -+} -+ -+#if BILINEAR_INTERPOLATION_BITS <= 4 -+/* Inspired by Filter_32_opaque from Skia */ -+static inline uint32_t -+bilinear_interpolation(uint32_t tl, uint32_t tr, -+ uint32_t bl, uint32_t br, -+ int distx, int disty) -+{ -+ int distxy, distxiy, distixy, distixiy; -+ uint32_t lo, hi; -+ -+ distx <<= (4 - BILINEAR_INTERPOLATION_BITS); -+ disty <<= (4 - BILINEAR_INTERPOLATION_BITS); -+ -+ distxy = distx * disty; -+ distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ -+ distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ -+ distixiy = -+ 16 * 16 - (disty << 4) - -+ (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ -+ -+ lo = (tl & 0xff00ff) * distixiy; -+ hi = ((tl >> 8) & 0xff00ff) * distixiy; -+ -+ lo += (tr & 0xff00ff) * distxiy; -+ hi += ((tr >> 8) & 0xff00ff) * distxiy; -+ -+ lo += (bl & 0xff00ff) * distixy; -+ hi += ((bl >> 8) & 0xff00ff) * distixy; -+ -+ lo += (br & 0xff00ff) * distxy; -+ hi += ((br >> 8) & 0xff00ff) * distxy; -+ -+ return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); -+} -+#elif SIZEOF_LONG > 4 -+static inline uint32_t -+bilinear_interpolation(uint32_t tl, uint32_t tr, -+ uint32_t bl, uint32_t br, -+ int distx, int disty) -+{ -+ uint64_t distxy, distxiy, distixy, distixiy; -+ uint64_t tl64, tr64, bl64, br64; -+ uint64_t f, r; -+ -+ distx <<= (8 - BILINEAR_INTERPOLATION_BITS); -+ disty <<= (8 - BILINEAR_INTERPOLATION_BITS); -+ -+ distxy = distx * disty; -+ distxiy = distx * (256 - disty); -+ distixy = (256 - distx) * disty; -+ distixiy = (256 - distx) * (256 - disty); -+ -+ /* Alpha and Blue */ -+ tl64 = tl & 0xff0000ff; -+ tr64 = tr & 0xff0000ff; -+ bl64 = bl & 0xff0000ff; -+ br64 = br & 0xff0000ff; -+ -+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; -+ r = f & 0x0000ff0000ff0000ull; -+ -+ /* Red and Green */ -+ tl64 = tl; -+ tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); -+ -+ tr64 = tr; -+ tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); -+ -+ bl64 = bl; -+ bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); -+ -+ br64 = br; -+ br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); -+ -+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; -+ r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); -+ -+ return (uint32_t)(r >> 16); -+} -+#else -+static inline uint32_t -+bilinear_interpolation(uint32_t tl, uint32_t tr, -+ uint32_t bl, uint32_t br, -+ int distx, int disty) -+{ -+ int distxy, distxiy, distixy, distixiy; -+ uint32_t f, r; -+ -+ distx <<= (8 - BILINEAR_INTERPOLATION_BITS); -+ disty <<= (8 - BILINEAR_INTERPOLATION_BITS); -+ -+ distxy = distx * disty; -+ distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ -+ distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ -+ distixiy = -+ 256 * 256 - (disty << 8) - -+ (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ -+ -+ /* Blue */ -+ r = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + -+ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy); -+ -+ /* Green */ -+ f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + -+ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy); -+ r |= f & 0xff000000; -+ -+ tl >>= 16; -+ tr >>= 16; -+ bl >>= 16; -+ br >>= 16; -+ r >>= 16; -+ -+ /* Red */ -+ f = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + -+ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy); -+ r |= f & 0x00ff0000; -+ -+ /* Alpha */ -+ f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + -+ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy); -+ r |= f & 0xff000000; -+ -+ return r; -+} -+#endif -+ -+static inline uint32_t convert_pixel(const uint8_t *p, int x) -+{ -+ return ((uint32_t *)p)[x]; -+} -+ -+fast void -+affine_blt(const void *src, void *dst, int bpp, -+ int16_t src_x, int16_t src_y, -+ int16_t src_width, int16_t src_height, -+ int32_t src_stride, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t dst_width, uint16_t dst_height, -+ int32_t dst_stride, -+ const struct pixman_f_transform *t) -+{ -+ static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; -+ const pixman_fixed_t ux = pixman_double_to_fixed(t->m[0][0]); -+ const pixman_fixed_t uy = pixman_double_to_fixed(t->m[1][0]); -+ int i, j; -+ -+ assert(bpp == 32); -+ -+ for (j = 0; j < dst_height; j++) { -+ pixman_fixed_t x, y; -+ struct pixman_f_vector v; -+ uint32_t *b; -+ -+ /* reference point is the center of the pixel */ -+ v.v[0] = dst_x + 0.5; -+ v.v[1] = dst_y + j + 0.5; -+ v.v[2] = 1.0; -+ -+ pixman_f_transform_point_3d(t, &v); -+ -+ x = pixman_double_to_fixed(v.v[0]); -+ x += pixman_int_to_fixed(src_x - dst_x); -+ y = pixman_double_to_fixed(v.v[1]); -+ y += pixman_int_to_fixed(src_y - dst_y); -+ -+ b = (uint32_t*)((uint8_t *)dst + (dst_y + j) * dst_stride + dst_x * bpp / 8); -+ for (i = 0; i < dst_width; i++) { -+ const uint8_t *row1; -+ const uint8_t *row2; -+ int x1, y1, x2, y2; -+ uint32_t tl, tr, bl, br; -+ int32_t fx, fy; -+ -+ x1 = x - pixman_fixed_1/2; -+ y1 = y - pixman_fixed_1/2; -+ -+ fx = bilinear_weight(x1); -+ fy = bilinear_weight(y1); -+ -+ x1 = pixman_fixed_to_int(x1); -+ x2 = x1 + 1; -+ y1 = pixman_fixed_to_int(y1); -+ y2 = y1 + 1; -+ -+ if (x1 >= src_width || x2 < 0 || -+ y1 >= src_height || y2 < 0) { -+ b[i] = 0; -+ goto next; -+ } -+ -+ if (y2 == 0) { -+ row1 = zero; -+ } else { -+ row1 = (uint8_t *)src + src_stride * y1; -+ row1 += bpp / 8 * x1; -+ } -+ -+ if (y1 == src_height - 1) { -+ row2 = zero; -+ } else { -+ row2 = (uint8_t *)src + src_stride * y2; -+ row2 += bpp / 8 * x1; -+ } -+ -+ if (x2 == 0) { -+ tl = 0; -+ bl = 0; -+ } else { -+ tl = convert_pixel(row1, 0); -+ bl = convert_pixel(row2, 0); -+ } -+ -+ if (x1 == src_width - 1) { -+ tr = 0; -+ br = 0; -+ } else { -+ tr = convert_pixel(row1, 1); -+ br = convert_pixel(row2, 1); -+ } -+ -+ b[i] = bilinear_interpolation(tl, tr, bl, br, fx, fy); -+ -+next: -+ x += ux; -+ y += uy; -+ } -+ } -+} -diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c -index 00c984d9..154f939a 100644 ---- a/src/sna/brw/brw_eu_emit.c -+++ b/src/sna/brw/brw_eu_emit.c -@@ -178,7 +178,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) - } - - if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && -- reg.file == BRW_ARF_NULL) -+ reg.nr == BRW_ARF_NULL) - return; - - assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); -@@ -700,7 +700,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst) - * - * When the matching 'else' instruction is reached (presumably by - * countdown of the instruction count patched in by our ELSE/ENDIF -- * functions), the relevent flags are inverted. -+ * functions), the relevant flags are inverted. - * - * When the matching 'endif' instruction is reached, the flags are - * popped off. If the stack is now empty, normal execution resumes. -diff --git a/src/sna/compiler.h b/src/sna/compiler.h -index ff412179..0f3775ec 100644 ---- a/src/sna/compiler.h -+++ b/src/sna/compiler.h -@@ -39,6 +39,7 @@ - #define pure __attribute__((pure)) - #define tightly_packed __attribute__((__packed__)) - #define flatten __attribute__((flatten)) -+#define nonnull __attribute__((nonnull)) - #define page_aligned __attribute__((aligned(4096))) - #else - #define likely(expr) (expr) -@@ -51,18 +52,15 @@ - #define pure - #define tighly_packed - #define flatten -+#define nonnull - #define page_aligned - #endif - - #define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) - - #if HAS_GCC(4, 5) --#define sse2 __attribute__((target("sse2,fpmath=sse"))) --#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse"))) --#endif -- --#if HAS_GCC(4, 7) --#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse"))) -+#define sse2 fast __attribute__((target("sse2,fpmath=sse"))) -+#define sse4_2 fast __attribute__((target("sse4.2,sse2,fpmath=sse"))) - #endif - - #if HAS_GCC(4, 6) && defined(__OPTIMIZE__) -@@ -71,10 +69,17 @@ - #define fast - #endif - --#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) --#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) --#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__) --#define fast_memcpy __attribute__((target("inline-all-stringops"))) -+#if HAS_GCC(4, 7) -+#define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse"))) -+#define assume_aligned(ptr, align) __builtin_assume_aligned((ptr), (align)) -+#define assume_misaligned(ptr, align, offset) __builtin_assume_aligned((ptr), (align), (offset)) -+#else -+#define assume_aligned(ptr, align) (ptr) -+#define assume_misaligned(ptr, align, offset) (ptr) -+#endif -+ -+#if HAS_GCC(4, 5) && defined(__OPTIMIZE__) -+#define fast_memcpy fast __attribute__((target("inline-all-stringops"))) - #else - #define fast_memcpy - #endif -diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h -index 8bf9008a..90431747 100644 ---- a/src/sna/fb/fb.h -+++ b/src/sna/fb/fb.h -@@ -24,10 +24,6 @@ - #ifndef FB_H - #define FB_H - --#ifdef HAVE_CONFIG_H --#include "config.h" --#endif -- - #include - #include - #include -diff --git a/src/sna/fb/fbimage.c b/src/sna/fb/fbimage.c -index 5af23890..cc81c85b 100644 ---- a/src/sna/fb/fbimage.c -+++ b/src/sna/fb/fbimage.c -@@ -229,13 +229,19 @@ fbGetImage(DrawablePtr drawable, - FbBits pm; - - pm = fbReplicatePixel(planeMask, srcBpp); -+ - dstStride = PixmapBytePad(w, drawable->depth); -- if (pm != FB_ALLONES) -- memset(d, 0, dstStride * h); - dstStride /= sizeof(FbStip); -+ - fbBltStip((FbStip *)(src + (y + srcYoff) * srcStride), srcStride, - (x + srcXoff) * srcBpp, -- dst, dstStride, 0, w * srcBpp, h, GXcopy, pm, srcBpp); -+ dst, dstStride, 0, w * srcBpp, h, GXcopy, FB_ALLONES, srcBpp); -+ -+ if (pm != FB_ALLONES) { -+ int i = dstStride * h; -+ while (i--) -+ *dst++ &= pm; -+ } - } else { - dstStride = BitmapBytePad(w) / sizeof(FbStip); - fbBltPlane(src + (y + srcYoff) * srcStride, -diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h -index 932032f9..20877777 100644 ---- a/src/sna/fb/fbpict.h -+++ b/src/sna/fb/fbpict.h -@@ -24,10 +24,6 @@ - #ifndef FBPICT_H - #define FBPICT_H - --#ifdef HAVE_CONFIG_H --#include "config.h" --#endif -- - #include - #include - -diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c -index 1104f462..49ad16a3 100644 ---- a/src/sna/gen2_render.c -+++ b/src/sna/gen2_render.c -@@ -35,6 +35,7 @@ - #include "sna_reg.h" - #include "sna_render.h" - #include "sna_render_inline.h" -+#include "sna_video.h" - - #include "gen2_render.h" - -@@ -48,6 +49,7 @@ - - #define MAX_3D_SIZE 2048 - #define MAX_3D_PITCH 8192 -+#define MAX_INLINE (1 << 18) - - #define BATCH(v) batch_emit(sna, v) - #define BATCH_F(v) batch_emit_float(sna, v) -@@ -596,39 +598,43 @@ gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) - gen2_emit_invariant(sna); - } - --static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) -+static void gen2_emit_target(struct sna *sna, -+ struct kgem_bo *bo, -+ int width, -+ int height, -+ int format) - { -- assert(!too_large(op->dst.width, op->dst.height)); -- assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); -+ assert(!too_large(width, height)); -+ assert(bo->pitch >= 8 && bo->pitch <= MAX_3D_PITCH); - assert(sna->render.vertex_offset == 0); - -- assert(op->dst.bo->unique_id); -- if (sna->render_state.gen2.target == op->dst.bo->unique_id) { -- kgem_bo_mark_dirty(op->dst.bo); -+ assert(bo->unique_id); -+ if (sna->render_state.gen2.target == bo->unique_id) { -+ kgem_bo_mark_dirty(bo); - return; - } - - BATCH(_3DSTATE_BUF_INFO_CMD); - BATCH(BUF_3D_ID_COLOR_BACK | -- gen2_buf_tiling(op->dst.bo->tiling) | -- BUF_3D_PITCH(op->dst.bo->pitch)); -+ gen2_buf_tiling(bo->tiling) | -+ BUF_3D_PITCH(bo->pitch)); - BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, -- op->dst.bo, -+ bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER, - 0)); - - BATCH(_3DSTATE_DST_BUF_VARS_CMD); -- BATCH(gen2_get_dst_format(op->dst.format)); -+ BATCH(gen2_get_dst_format(format)); - - BATCH(_3DSTATE_DRAW_RECT_CMD); - BATCH(0); - BATCH(0); /* ymin, xmin */ -- BATCH(DRAW_YMAX(op->dst.height - 1) | -- DRAW_XMAX(op->dst.width - 1)); -+ BATCH(DRAW_YMAX(height - 1) | -+ DRAW_XMAX(width - 1)); - BATCH(0); /* yorig, xorig */ - -- sna->render_state.gen2.target = op->dst.bo->unique_id; -+ sna->render_state.gen2.target = bo->unique_id; - } - - static void gen2_disable_logic_op(struct sna *sna) -@@ -701,7 +707,11 @@ static void gen2_emit_composite_state(struct sna *sna, - kgem_clear_dirty(&sna->kgem); - } - -- gen2_emit_target(sna, op); -+ gen2_emit_target(sna, -+ op->dst.bo, -+ op->dst.width, -+ op->dst.height, -+ op->dst.format); - - unwind = sna->kgem.nbatch; - BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | -@@ -1190,7 +1200,13 @@ inline static int gen2_get_rectangles(struct sna *sna, - sna->render.vertex_offset = sna->kgem.nbatch; - BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); - } -- } -+ -+ need = 0; -+ } else -+ need = sna->kgem.nbatch - sna->render.vertex_offset; -+ -+ if (rem > MAX_INLINE - need) -+ rem = MAX_INLINE -need; - - if (want > 1 && want * size > rem) - want = rem / size; -@@ -1572,12 +1588,12 @@ gen2_composite_picture(struct sna *sna, - if (channel->repeat && - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen2_composite_solid_init(sna, channel, priv->clear_color); -+ return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color)); - } - } - } else -@@ -1619,7 +1635,9 @@ gen2_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -2423,7 +2441,11 @@ static void gen2_emit_composite_spans_state(struct sna *sna, - uint32_t unwind; - - gen2_get_batch(sna, &op->base); -- gen2_emit_target(sna, &op->base); -+ gen2_emit_target(sna, -+ op->base.dst.bo, -+ op->base.dst.width, -+ op->base.dst.height, -+ op->base.dst.format); - - unwind = sna->kgem.nbatch; - BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | -@@ -2706,7 +2728,11 @@ static void gen2_emit_fill_composite_state(struct sna *sna, - uint32_t ls1; - - gen2_get_batch(sna, op); -- gen2_emit_target(sna, op); -+ gen2_emit_target(sna, -+ op->dst.bo, -+ op->dst.width, -+ op->dst.height, -+ op->dst.format); - - ls1 = sna->kgem.nbatch; - BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | -@@ -2868,7 +2894,11 @@ static void gen2_emit_fill_state(struct sna *sna, - uint32_t ls1; - - gen2_get_batch(sna, op); -- gen2_emit_target(sna, op); -+ gen2_emit_target(sna, -+ op->dst.bo, -+ op->dst.width, -+ op->dst.height, -+ op->dst.format); - - ls1 = sna->kgem.nbatch; - BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | -@@ -3102,6 +3132,276 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, - } - - static void -+gen2_emit_video_state(struct sna *sna, -+ struct sna_video *video, -+ struct sna_video_frame *frame, -+ PixmapPtr pixmap, -+ struct kgem_bo *dst_bo, -+ int width, int height, -+ bool bilinear) -+{ -+ uint32_t ms1, v, unwind; -+ -+ gen2_emit_target(sna, dst_bo, width, height, -+ sna_format_for_depth(pixmap->drawable.depth)); -+ -+ unwind = sna->kgem.nbatch; -+ BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | -+ I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); -+ BATCH(1 << 12); -+ BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); -+ BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); -+ if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, -+ sna->kgem.batch + unwind + 1, -+ 3 * sizeof(uint32_t)) == 0) -+ sna->kgem.nbatch = unwind; -+ else -+ sna->render_state.gen2.ls1 = unwind; -+ -+ gen2_disable_logic_op(sna); -+ -+ unwind = sna->kgem.nbatch; -+ BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | -+ LOAD_TEXTURE_BLEND_STAGE(0) | 1); -+ BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT | -+ TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0); -+ BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT | -+ TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE); -+ if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, -+ sna->kgem.batch + unwind + 1, -+ 2 * sizeof(uint32_t)) == 0) -+ sna->kgem.nbatch = unwind; -+ else -+ sna->render_state.gen2.ls2 = unwind; -+ -+ BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4); -+ BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, -+ frame->bo, -+ I915_GEM_DOMAIN_SAMPLER << 16, -+ 0)); -+ ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION; -+ switch (frame->id) { -+ case FOURCC_YUY2: -+ ms1 |= MT_422_YCRCB_NORMAL; -+ break; -+ case FOURCC_UYVY: -+ ms1 |= MT_422_YCRCB_SWAPY; -+ break; -+ } -+ BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) | -+ ((frame->width - 1) << TM0S1_WIDTH_SHIFT) | -+ ms1 | -+ gen2_sampler_tiling_bits(frame->bo->tiling)); -+ BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); -+ if (bilinear) -+ BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | -+ FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | -+ MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); -+ else -+ BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | -+ FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | -+ MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); -+ BATCH(0); /* default color */ -+ -+ BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) | -+ ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN | -+ ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) | -+ ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP)); -+ -+ v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; -+ if (sna->render_state.gen2.vft != v) { -+ BATCH(v); -+ sna->render_state.gen2.vft = v; -+ } -+} -+ -+static void -+gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo) -+{ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); -+ -+ if (!kgem_check_batch(&sna->kgem, 120) || -+ !kgem_check_reloc(&sna->kgem, 4) || -+ !kgem_check_exec(&sna->kgem, 2)) { -+ _kgem_submit(&sna->kgem); -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ if (sna->render_state.gen2.need_invariant) -+ gen2_emit_invariant(sna); -+} -+ -+static int -+gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) -+{ -+ int size = floats_per_vertex * 3; -+ int rem = batch_space(sna) - 1; -+ -+ if (rem > MAX_INLINE) -+ rem = MAX_INLINE; -+ -+ if (size * want > rem) -+ want = rem / size; -+ -+ return want; -+} -+ -+static bool -+gen2_render_video(struct sna *sna, -+ struct sna_video *video, -+ struct sna_video_frame *frame, -+ RegionPtr dstRegion, -+ PixmapPtr pixmap) -+{ -+ struct sna_pixmap *priv = sna_pixmap(pixmap); -+ const BoxRec *pbox = region_rects(dstRegion); -+ int nbox = region_num_rects(dstRegion); -+ int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; -+ int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; -+ int src_width = frame->src.x2 - frame->src.x1; -+ int src_height = frame->src.y2 - frame->src.y1; -+ float src_offset_x, src_offset_y; -+ float src_scale_x, src_scale_y; -+ int pix_xoff, pix_yoff; -+ struct kgem_bo *dst_bo; -+ bool bilinear; -+ int copy = 0; -+ -+ DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, -+ src_width, src_height, frame->width, frame->height, dst_width, dst_height)); -+ -+ assert(priv->gpu_bo); -+ dst_bo = priv->gpu_bo; -+ -+ bilinear = src_width != dst_width || src_height != dst_height; -+ -+ src_scale_x = (float)src_width / dst_width / frame->width; -+ src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; -+ -+ src_scale_y = (float)src_height / dst_height / frame->height; -+ src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; -+ DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", -+ __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); -+ -+ if (too_large(pixmap->drawable.width, pixmap->drawable.height) || -+ dst_bo->pitch > MAX_3D_PITCH) { -+ int bpp = pixmap->drawable.bitsPerPixel; -+ -+ if (too_large(dst_width, dst_height)) -+ return false; -+ -+ dst_bo = kgem_create_2d(&sna->kgem, -+ dst_width, dst_height, bpp, -+ kgem_choose_tiling(&sna->kgem, -+ I915_TILING_X, -+ dst_width, dst_height, bpp), -+ 0); -+ if (!dst_bo) -+ return false; -+ -+ pix_xoff = -dstRegion->extents.x1; -+ pix_yoff = -dstRegion->extents.y1; -+ copy = 1; -+ } else { -+ /* Set up the offset for translating from the given region -+ * (in screen coordinates) to the backing pixmap. -+ */ -+#ifdef COMPOSITE -+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -+#else -+ pix_xoff = 0; -+ pix_yoff = 0; -+#endif -+ -+ dst_width = pixmap->drawable.width; -+ dst_height = pixmap->drawable.height; -+ } -+ -+ gen2_video_get_batch(sna, dst_bo); -+ gen2_emit_video_state(sna, video, frame, pixmap, -+ dst_bo, dst_width, dst_height, bilinear); -+ do { -+ int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); -+ if (nbox_this_time == 0) { -+ gen2_video_get_batch(sna, dst_bo); -+ gen2_emit_video_state(sna, video, frame, pixmap, -+ dst_bo, dst_width, dst_height, bilinear); -+ nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); -+ assert(nbox_this_time); -+ } -+ nbox -= nbox_this_time; -+ -+ BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | -+ ((12 * nbox_this_time) - 1)); -+ do { -+ int box_x1 = pbox->x1; -+ int box_y1 = pbox->y1; -+ int box_x2 = pbox->x2; -+ int box_y2 = pbox->y2; -+ -+ pbox++; -+ -+ DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", -+ __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, -+ box_x1 * src_scale_x + src_offset_x, -+ box_y1 * src_scale_y + src_offset_y, -+ box_x2 * src_scale_x + src_offset_x, -+ box_y2 * src_scale_y + src_offset_y)); -+ -+ /* bottom right */ -+ BATCH_F(box_x2 + pix_xoff); -+ BATCH_F(box_y2 + pix_yoff); -+ BATCH_F(box_x2 * src_scale_x + src_offset_x); -+ BATCH_F(box_y2 * src_scale_y + src_offset_y); -+ -+ /* bottom left */ -+ BATCH_F(box_x1 + pix_xoff); -+ BATCH_F(box_y2 + pix_yoff); -+ BATCH_F(box_x1 * src_scale_x + src_offset_x); -+ BATCH_F(box_y2 * src_scale_y + src_offset_y); -+ -+ /* top left */ -+ BATCH_F(box_x1 + pix_xoff); -+ BATCH_F(box_y1 + pix_yoff); -+ BATCH_F(box_x1 * src_scale_x + src_offset_x); -+ BATCH_F(box_y1 * src_scale_y + src_offset_y); -+ } while (--nbox_this_time); -+ } while (nbox); -+ -+ if (copy) { -+#ifdef COMPOSITE -+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -+#else -+ pix_xoff = 0; -+ pix_yoff = 0; -+#endif -+ sna_blt_copy_boxes(sna, GXcopy, -+ dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, -+ priv->gpu_bo, pix_xoff, pix_yoff, -+ pixmap->drawable.bitsPerPixel, -+ region_rects(dstRegion), -+ region_num_rects(dstRegion)); -+ -+ kgem_bo_destroy(&sna->kgem, dst_bo); -+ } -+ -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -+ if ((pix_xoff | pix_yoff) == 0) { -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ } else { -+ sna_damage_add_boxes(&priv->gpu_damage, -+ region_rects(dstRegion), -+ region_num_rects(dstRegion), -+ pix_xoff, pix_yoff); -+ } -+ } -+ -+ return true; -+} -+ -+static void - gen2_render_copy_setup_source(struct sna_composite_channel *channel, - const DrawableRec *draw, - struct kgem_bo *bo) -@@ -3176,7 +3476,11 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op - PIPELINE_FLUSH_TEXTURE_CACHE); - kgem_clear_dirty(&sna->kgem); - } -- gen2_emit_target(sna, op); -+ gen2_emit_target(sna, -+ op->dst.bo, -+ op->dst.width, -+ op->dst.height, -+ op->dst.format); - - ls1 = sna->kgem.nbatch; - BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | -@@ -3511,7 +3815,7 @@ const char *gen2_render_init(struct sna *sna, const char *backend) - render->copy = gen2_render_copy; - render->copy_boxes = gen2_render_copy_boxes; - -- /* XXX YUV color space conversion for video? */ -+ render->video = gen2_render_video; - - render->reset = gen2_render_reset; - render->flush = gen2_render_flush; -diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c -index 78289f00..4459a562 100644 ---- a/src/sna/gen3_render.c -+++ b/src/sna/gen3_render.c -@@ -448,14 +448,14 @@ gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, - float *v) - { - do { -- v[0] = box->x2; -- v[1] = box->y2; -+ v[0] = box->x2 + op->dst.x; -+ v[1] = box->y2 + op->dst.y; - -- v[2] = box->x1; -- v[3] = box->y2; -+ v[2] = box->x1 + op->dst.x; -+ v[3] = box->y2 + op->dst.y; - -- v[4] = box->x1; -- v[5] = box->y1; -+ v[4] = box->x1 + op->dst.x; -+ v[5] = box->y1 + op->dst.y; - - box++; - v += 6; -@@ -494,18 +494,18 @@ gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, - float *v) - { - do { -- v[0] = box->x2; -- v[1] = box->y2; -+ v[0] = box->x2 + op->dst.x; -+ v[1] = box->y2 + op->dst.y; - v[2] = box->x2 + op->src.offset[0]; - v[3] = box->y2 + op->src.offset[1]; - -- v[4] = box->x1; -- v[5] = box->y2; -+ v[4] = box->x1 + op->dst.x; -+ v[5] = box->y2 + op->dst.y; - v[6] = box->x1 + op->src.offset[0]; - v[7] = box->y2 + op->src.offset[1]; - -- v[8] = box->x1; -- v[9] = box->y1; -+ v[8] = box->x1 + op->dst.x; -+ v[9] = box->y1 + op->dst.y; - v[10] = box->x1 + op->src.offset[0]; - v[11] = box->y1 + op->src.offset[1]; - -@@ -531,6 +531,7 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + r->width; - v[1] = dst_y + r->height; -@@ -559,22 +560,22 @@ gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, - const PictTransform *transform = op->src.transform; - - do { -- v[0] = box->x2; -- v[1] = box->y2; -+ v[0] = box->x2 + op->dst.x; -+ v[1] = box->y2 + op->dst.y; - _sna_get_transformed_scaled(box->x2 + op->src.offset[0], - box->y2 + op->src.offset[1], - transform, op->src.scale, - &v[2], &v[3]); - -- v[4] = box->x1; -- v[5] = box->y2; -+ v[4] = box->x1 + op->dst.x; -+ v[5] = box->y2 + op->dst.y; - _sna_get_transformed_scaled(box->x1 + op->src.offset[0], - box->y2 + op->src.offset[1], - transform, op->src.scale, - &v[6], &v[7]); - -- v[8] = box->x1; -- v[9] = box->y1; -+ v[8] = box->x1 + op->dst.x; -+ v[9] = box->y1 + op->dst.y; - _sna_get_transformed_scaled(box->x1 + op->src.offset[0], - box->y1 + op->src.offset[1], - transform, op->src.scale, -@@ -596,6 +597,7 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x + op->dst.x; - v[0] = v[4] + w; -@@ -643,6 +645,7 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x; - v[9] = r->dst.y; -@@ -693,6 +696,7 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + r->width; - v[5] = v[1] = dst_y + r->height; -@@ -720,10 +724,10 @@ gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, - const PictTransform *transform = op->src.transform; - - do { -- v[0] = box->x2; -- v[5] = v[1] = box->y2; -- v[8] = v[4] = box->x1; -- v[9] = box->y1; -+ v[0] = box->x2 + op->dst.x; -+ v[5] = v[1] = box->y2 + op->dst.y; -+ v[8] = v[4] = box->x1 + op->dst.x; -+ v[9] = box->y1 + op->dst.y; - - _sna_get_transformed_scaled(box->x2 + op->src.offset[0], - box->y2 + op->src.offset[1], -@@ -756,6 +760,7 @@ gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x + op->dst.x; - v[0] = v[4] + w; -@@ -781,6 +786,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x; - v[9] = r->dst.y; -@@ -817,6 +823,7 @@ gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 18; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + w; - v[1] = dst_y + h; -@@ -862,6 +869,7 @@ gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 18; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + w; - v[1] = dst_y + h; -@@ -978,6 +986,7 @@ gen3_emit_composite_primitive_constant__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[4] = v[2] = r->dst.x + op->dst.x; - v[5] = r->dst.y + op->dst.y; -@@ -993,10 +1002,10 @@ gen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op, - float *v) - { - do { -- v[0] = box->x2; -- v[3] = v[1] = box->y2; -- v[4] = v[2] = box->x1; -- v[5] = box->y1; -+ v[0] = box->x2 + op->dst.x; -+ v[3] = v[1] = box->y2 + op->dst.y; -+ v[4] = v[2] = box->x1 + op->dst.x; -+ v[5] = box->y1 + op->dst.y; - - box++; - v += 6; -@@ -1013,6 +1022,7 @@ gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - x = r->dst.x + op->dst.x; - y = r->dst.y + op->dst.y; -@@ -1035,10 +1045,10 @@ gen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op - float *v) - { - do { -- v[0] = box->x2; -- v[5] = v[1] = box->y2; -- v[8] = v[4] = box->x1; -- v[9] = box->y1; -+ v[0] = box->x2 + op->dst.x; -+ v[5] = v[1] = box->y2 + op->dst.y; -+ v[8] = v[4] = box->x1 + op->dst.x; -+ v[9] = box->y1 + op->dst.y; - - v[2] = box->x2 + op->src.offset[0]; - v[7] = v[3] = box->y2 + op->src.offset[1]; -@@ -1067,6 +1077,7 @@ gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + r->width; - v[1] = dst_y + r->height; -@@ -1095,22 +1106,22 @@ gen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *o - const PictTransform *transform = op->src.transform; - - do { -- v[0] = box->x2; -- v[1] = box->y2; -+ v[0] = box->x2 + op->dst.x; -+ v[1] = box->y2 + op->dst.y; - _sna_get_transformed_scaled(box->x2 + op->src.offset[0], - box->y2 + op->src.offset[1], - transform, op->src.scale, - &v[2], &v[3]); - -- v[4] = box->x1; -- v[5] = box->y2; -+ v[4] = box->x1 + op->dst.x; -+ v[5] = box->y2 + op->dst.y; - _sna_get_transformed_scaled(box->x1 + op->src.offset[0], - box->y2 + op->src.offset[1], - transform, op->src.scale, - &v[6], &v[7]); - -- v[8] = box->x1; -- v[9] = box->y1; -+ v[8] = box->x1 + op->dst.x; -+ v[9] = box->y1 + op->dst.y; - _sna_get_transformed_scaled(box->x1 + op->src.offset[0], - box->y1 + op->src.offset[1], - transform, op->src.scale, -@@ -1132,6 +1143,7 @@ gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x + op->dst.x; - v[0] = v[4] + w; -@@ -1179,6 +1191,7 @@ gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x; - v[9] = r->dst.y; -@@ -1227,8 +1240,12 @@ gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna, - int src_y = r->src.y + (int)op->src.offset[1]; - float *v; - -+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", -+ __FUNCTION__, src_x, src_y, dst_x, dst_y, r->width, r->height)); -+ - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + r->width; - v[5] = v[1] = dst_y + r->height; -@@ -1256,10 +1273,13 @@ gen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op, - const PictTransform *transform = op->src.transform; - - do { -- v[0] = box->x2; -- v[5] = v[1] = box->y2; -- v[8] = v[4] = box->x1; -- v[9] = box->y1; -+ DBG(("%s: box=(%d, %d), (%d, %d), src.offset=(%d, %d)\n", -+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2, op->src.offset[0], op->src.offset[1])); -+ -+ v[0] = box->x2 + op->dst.x; -+ v[5] = v[1] = box->y2 + op->dst.y; -+ v[8] = v[4] = box->x1 + op->dst.x; -+ v[9] = box->y1 + op->dst.y; - - _sna_get_transformed_scaled(box->x2 + op->src.offset[0], - box->y2 + op->src.offset[1], -@@ -1292,6 +1312,7 @@ gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x + op->dst.x; - v[0] = v[4] + w; -@@ -1317,6 +1338,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[8] = v[4] = r->dst.x; - v[9] = r->dst.y; -@@ -1353,6 +1375,7 @@ gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 18; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + w; - v[1] = dst_y + h; -@@ -1398,6 +1421,7 @@ gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 18; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst_x + w; - v[1] = dst_y + h; -@@ -2233,6 +2257,7 @@ static void gen3_vertex_flush(struct sna *sna) - static int gen3_vertex_finish(struct sna *sna) - { - struct kgem_bo *bo; -+ unsigned hint, size; - - DBG(("%s: used=%d/%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, -@@ -2243,6 +2268,7 @@ static int gen3_vertex_finish(struct sna *sna) - - sna_vertex_wait__locked(&sna->render); - -+ hint = CREATE_GTT_MAP; - bo = sna->render.vbo; - if (bo) { - DBG(("%s: reloc = %d\n", __FUNCTION__, -@@ -2251,7 +2277,7 @@ static int gen3_vertex_finish(struct sna *sna) - if (sna->render.vertex_reloc[0]) { - sna->kgem.batch[sna->render.vertex_reloc[0]] = - kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], -- bo, I915_GEM_DOMAIN_VERTEX << 16, 0); -+ bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0); - - sna->render.vertex_reloc[0] = 0; - } -@@ -2260,17 +2286,29 @@ static int gen3_vertex_finish(struct sna *sna) - sna->render.vbo = NULL; - - kgem_bo_destroy(&sna->kgem, bo); -+ hint |= CREATE_CACHED | CREATE_NO_THROTTLE; - } - -+ size = 256*1024; - sna->render.vertices = NULL; -- sna->render.vbo = kgem_create_linear(&sna->kgem, -- 256*1024, CREATE_GTT_MAP); -- if (sna->render.vbo) -+ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); -+ while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) { -+ size /= 2; -+ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); -+ } -+ if (sna->render.vbo == NULL) -+ sna->render.vbo = kgem_create_linear(&sna->kgem, -+ 256*1024, CREATE_GTT_MAP); -+ if (sna->render.vbo && -+ kgem_check_bo(&sna->kgem, sna->render.vbo, NULL)) - sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { -- if (sna->render.vbo) -+ if (sna->render.vbo) { - kgem_bo_destroy(&sna->kgem, sna->render.vbo); -- sna->render.vbo = NULL; -+ sna->render.vbo = NULL; -+ } -+ sna->render.vertices = sna->render.vertex_data; -+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - return 0; - } - assert(sna->render.vbo->snoop == false); -@@ -2280,8 +2318,14 @@ static int gen3_vertex_finish(struct sna *sna) - sna->render.vertex_data, - sizeof(float)*sna->render.vertex_used); - } -- sna->render.vertex_size = 64 * 1024 - 1; -- return sna->render.vertex_size - sna->render.vertex_used; -+ -+ size = __kgem_bo_size(sna->render.vbo)/4; -+ if (size >= UINT16_MAX) -+ size = UINT16_MAX - 1; -+ assert(size > sna->render.vertex_used); -+ -+ sna->render.vertex_size = size; -+ return size - sna->render.vertex_used; - } - - static void gen3_vertex_close(struct sna *sna) -@@ -2345,7 +2389,7 @@ static void gen3_vertex_close(struct sna *sna) - DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); - sna->kgem.batch[sna->render.vertex_reloc[0]] = - kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], -- bo, I915_GEM_DOMAIN_VERTEX << 16, delta); -+ bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, delta); - sna->render.vertex_reloc[0] = 0; - - if (sna->render.vbo == NULL) { -@@ -2580,6 +2624,7 @@ gen3_render_composite_boxes(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * op->floats_per_rect; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - op->emit_boxes(op, box, nbox_this_time, v); - box += nbox_this_time; -@@ -2604,6 +2649,7 @@ gen3_render_composite_boxes__thread(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * op->floats_per_rect; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - sna_vertex_acquire__locked(&sna->render); - sna_vertex_unlock(&sna->render); -@@ -3065,7 +3111,7 @@ gen3_composite_picture(struct sna *sna, - - if (sna_picture_is_clear(picture, x, y, w, h, &color)) { - DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); -- return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8)); -+ return gen3_init_solid(channel, solid_color(picture->format, color)); - } - - if (!gen3_check_repeat(picture)) -@@ -3097,12 +3143,12 @@ gen3_composite_picture(struct sna *sna, - if (channel->repeat || - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen3_init_solid(channel, priv->clear_color); -+ return gen3_init_solid(channel, solid_color(picture->format, priv->clear_color)); - } - } - } else { -@@ -3182,7 +3228,9 @@ gen3_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -3645,8 +3693,11 @@ gen3_render_composite(struct sna *sna, - } - } - } -- DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, -+ DBG(("%s: final src/mask type=%d/%d [constant? %d/%d], transform? %d/%d, affine=%d/%d\n", __FUNCTION__, - tmp->src.u.gen3.type, tmp->mask.u.gen3.type, -+ is_constant_ps(tmp->src.u.gen3.type), -+ is_constant_ps(tmp->mask.u.gen3.type), -+ !!tmp->src.transform, !!tmp->mask.transform, - tmp->src.is_affine, tmp->mask.is_affine)); - - tmp->prim_emit = gen3_emit_composite_primitive; -@@ -3862,6 +3913,7 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -3901,6 +3953,7 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = box->x2; - v[3] = v[1] = box->y2; -@@ -3932,6 +3985,7 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[6] = v[3] = op->base.dst.x + box->x1; -@@ -3966,6 +4020,7 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = box->x2; - v[6] = v[3] = box->x1; -@@ -3999,6 +4054,7 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -4060,6 +4116,7 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[6] = v[1] = op->base.dst.y + box->y2; -@@ -4125,6 +4182,7 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -4184,6 +4242,7 @@ gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[6] = v[3] = op->base.dst.x + box->x1; -@@ -4229,6 +4288,7 @@ gen3_render_composite_spans_constant_box__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = box->x2; - v[6] = v[3] = box->x1; -@@ -4259,6 +4319,7 @@ gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - sna_vertex_acquire__locked(&sna->render); - sna_vertex_unlock(&sna->render); -@@ -4287,6 +4348,7 @@ gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = box->x2; - v[6] = v[3] = box->x1; -@@ -4320,6 +4382,7 @@ gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -4380,6 +4443,7 @@ gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[6] = v[1] = op->base.dst.y + box->y2; -@@ -4445,6 +4509,7 @@ gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna, - { - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -4504,6 +4569,7 @@ gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna, - PictTransform *transform = op->base.src.transform; - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -4577,6 +4643,7 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, - PictTransform *transform = op->base.src.transform; - float *v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = op->base.dst.x + box->x2; - v[1] = op->base.dst.y + box->y2; -@@ -4676,6 +4743,7 @@ gen3_render_composite_spans_constant_box(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = box->x2; - v[6] = v[3] = box->x1; -@@ -4706,6 +4774,7 @@ gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * 9; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - sna_vertex_acquire__locked(&sna->render); - sna_vertex_unlock(&sna->render); -@@ -4795,6 +4864,7 @@ gen3_render_composite_spans_boxes__thread(struct sna *sna, - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); - - sna_vertex_acquire__locked(&sna->render); - sna_vertex_unlock(&sna->render); -@@ -5436,17 +5506,7 @@ gen3_render_video(struct sna *sna, - pix_yoff = -dstRegion->extents.y1; - copy = 1; - } else { -- /* Set up the offset for translating from the given region -- * (in screen coordinates) to the backing pixmap. -- */ --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif -- -+ pix_xoff = pix_yoff = 0; - dst_width = pixmap->drawable.width; - dst_height = pixmap->drawable.height; - } -@@ -5502,16 +5562,9 @@ gen3_render_video(struct sna *sna, - } while (nbox); - - if (copy) { --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif - sna_blt_copy_boxes(sna, GXcopy, - dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, -- priv->gpu_bo, pix_xoff, pix_yoff, -+ priv->gpu_bo, 0, 0, - pixmap->drawable.bitsPerPixel, - region_rects(dstRegion), - region_num_rects(dstRegion)); -@@ -5519,21 +5572,8 @@ gen3_render_video(struct sna *sna, - kgem_bo_destroy(&sna->kgem, dst_bo); - } - -- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -- if ((pix_xoff | pix_yoff) == 0) { -- sna_damage_add(&priv->gpu_damage, dstRegion); -- sna_damage_subtract(&priv->cpu_damage, dstRegion); -- } else { -- sna_damage_add_boxes(&priv->gpu_damage, -- region_rects(dstRegion), -- region_num_rects(dstRegion), -- pix_xoff, pix_yoff); -- sna_damage_subtract_boxes(&priv->cpu_damage, -- region_rects(dstRegion), -- region_num_rects(dstRegion), -- pix_xoff, pix_yoff); -- } -- } -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); - - return true; - } -diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c -index 6c2d3808..72a98aee 100644 ---- a/src/sna/gen4_render.c -+++ b/src/sna/gen4_render.c -@@ -1405,8 +1405,8 @@ gen4_render_video(struct sna *sna, - int src_height = frame->src.y2 - frame->src.y1; - float src_offset_x, src_offset_y; - float src_scale_x, src_scale_y; -- int nbox, pix_xoff, pix_yoff; - const BoxRec *box; -+ int nbox; - - DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, - src_width, src_height, dst_width, dst_height)); -@@ -1445,17 +1445,6 @@ gen4_render_video(struct sna *sna, - gen4_align_vertex(sna, &tmp); - gen4_video_bind_surfaces(sna, &tmp); - -- /* Set up the offset for translating from the given region (in screen -- * coordinates) to the backing pixmap. -- */ --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif -- - src_scale_x = (float)src_width / dst_width / frame->width; - src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; - -@@ -1473,34 +1462,26 @@ gen4_render_video(struct sna *sna, - nbox -= n; - - do { -- BoxRec r; -- -- r.x1 = box->x1 + pix_xoff; -- r.x2 = box->x2 + pix_xoff; -- r.y1 = box->y1 + pix_yoff; -- r.y2 = box->y2 + pix_yoff; -- -- OUT_VERTEX(r.x2, r.y2); -+ OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y2); -+ OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y1); -+ OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - -- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -- sna_damage_add_box(&priv->gpu_damage, &r); -- sna_damage_subtract_box(&priv->cpu_damage, &r); -- } - box++; - } while (--n); - } while (nbox); - gen4_vertex_flush(sna); - -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ - return true; - } - -@@ -1585,12 +1566,14 @@ gen4_composite_picture(struct sna *sna, - if (channel->repeat && - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen4_channel_init_solid(sna, channel, priv->clear_color); -+ return gen4_channel_init_solid(sna, channel, -+ solid_color(picture->format, -+ priv->clear_color)); - } - } - } else -@@ -1664,7 +1647,9 @@ gen4_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -2738,6 +2723,20 @@ gen4_render_fill_boxes(struct sna *sna, - tmp.dst.format = format; - tmp.dst.bo = dst_bo; - -+ sna_render_composite_redirect_init(&tmp); -+ if (too_large(dst->width, dst->height)) { -+ BoxRec extents; -+ -+ boxes_extents(box, n, &extents); -+ if (!sna_render_composite_redirect(sna, &tmp, -+ extents.x1, extents.y1, -+ extents.x2 - extents.x1, -+ extents.y2 - extents.y1, -+ n > 1)) -+ return sna_tiling_fill_boxes(sna, op, format, color, -+ dst, dst_bo, box, n); -+ } -+ - gen4_channel_init_solid(sna, &tmp.src, pixel); - - tmp.is_affine = true; -@@ -2748,8 +2747,10 @@ gen4_render_fill_boxes(struct sna *sna, - - if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { - kgem_submit(&sna->kgem); -- if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) -+ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); - return false; -+ } - } - - gen4_align_vertex(sna, &tmp); -@@ -2765,6 +2766,7 @@ gen4_render_fill_boxes(struct sna *sna, - - gen4_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ sna_render_composite_redirect_done(sna, &tmp); - return true; - } - -diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c -index 37cf1ff9..fb3e79bf 100644 ---- a/src/sna/gen5_render.c -+++ b/src/sna/gen5_render.c -@@ -1355,8 +1355,8 @@ gen5_render_video(struct sna *sna, - int src_height = frame->src.y2 - frame->src.y1; - float src_offset_x, src_offset_y; - float src_scale_x, src_scale_y; -- int nbox, pix_xoff, pix_yoff; - const BoxRec *box; -+ int nbox; - - DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, - src_width, src_height, dst_width, dst_height)); -@@ -1395,17 +1395,6 @@ gen5_render_video(struct sna *sna, - gen5_align_vertex(sna, &tmp); - gen5_video_bind_surfaces(sna, &tmp); - -- /* Set up the offset for translating from the given region (in screen -- * coordinates) to the backing pixmap. -- */ --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif -- - src_scale_x = (float)src_width / dst_width / frame->width; - src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; - -@@ -1415,35 +1404,27 @@ gen5_render_video(struct sna *sna, - box = region_rects(dstRegion); - nbox = region_num_rects(dstRegion); - while (nbox--) { -- BoxRec r; -- -- r.x1 = box->x1 + pix_xoff; -- r.x2 = box->x2 + pix_xoff; -- r.y1 = box->y1 + pix_yoff; -- r.y2 = box->y2 + pix_yoff; -- - gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces); - -- OUT_VERTEX(r.x2, r.y2); -+ OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y2); -+ OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y1); -+ OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - -- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -- sna_damage_add_box(&priv->gpu_damage, &r); -- sna_damage_subtract_box(&priv->cpu_damage, &r); -- } - box++; - } -- - gen4_vertex_flush(sna); -+ -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ - return true; - } - -@@ -1524,12 +1505,12 @@ gen5_composite_picture(struct sna *sna, - if (channel->repeat || - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen4_channel_init_solid(sna, channel, priv->clear_color); -+ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); - } - } - } else -@@ -1618,7 +1599,9 @@ gen5_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -2734,6 +2717,19 @@ gen5_render_fill_boxes(struct sna *sna, - tmp.dst.format = format; - tmp.dst.bo = dst_bo; - -+ if (too_large(dst->width, dst->height)) { -+ BoxRec extents; -+ -+ boxes_extents(box, n, &extents); -+ if (!sna_render_composite_redirect(sna, &tmp, -+ extents.x1, extents.y1, -+ extents.x2 - extents.x1, -+ extents.y2 - extents.y1, -+ n > 1)) -+ return sna_tiling_fill_boxes(sna, op, format, color, -+ dst, dst_bo, box, n); -+ } -+ - tmp.src.bo = sna_render_get_solid(sna, pixel); - tmp.src.filter = SAMPLER_FILTER_NEAREST; - tmp.src.repeat = SAMPLER_EXTEND_REPEAT; -@@ -2780,6 +2776,7 @@ gen5_render_fill_boxes(struct sna *sna, - - gen4_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ sna_render_composite_redirect_done(sna, &tmp); - return true; - } - -diff --git a/src/sna/gen6_common.h b/src/sna/gen6_common.h -index 6668620b..b53ec0c9 100644 ---- a/src/sna/gen6_common.h -+++ b/src/sna/gen6_common.h -@@ -30,8 +30,8 @@ - - #include "sna.h" - --#define NO_RING_SWITCH 0 --#define PREFER_RENDER 0 -+#define NO_RING_SWITCH(sna) (!(sna)->kgem.has_semaphores) -+#define PREFER_RENDER 0 /* -1 -> BLT, 1 -> RENDER */ - - static inline bool is_uncached(struct sna *sna, - struct kgem_bo *bo) -@@ -46,40 +46,28 @@ inline static bool can_switch_to_blt(struct sna *sna, - if (sna->kgem.ring != KGEM_RENDER) - return true; - -- if (NO_RING_SWITCH) -- return false; -- -- if (!sna->kgem.has_semaphores) -- return false; -- -- if (flags & COPY_LAST) -- return true; -- - if (bo && RQ_IS_BLT(bo->rq)) - return true; - -- if (sna->render_state.gt < 2) -- return true; -+ if (bo && bo->tiling == I915_TILING_Y) -+ return false; - -- return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); --} -+ if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) -+ return false; - --inline static bool can_switch_to_render(struct sna *sna, -- struct kgem_bo *bo) --{ -- if (sna->kgem.ring == KGEM_RENDER) -+ if (sna->render_state.gt < 2) - return true; - -- if (NO_RING_SWITCH) -+ if (bo && RQ_IS_RENDER(bo->rq)) - return false; - -- if (!sna->kgem.has_semaphores) -+ if (NO_RING_SWITCH(sna)) - return false; - -- if (bo && !RQ_IS_BLT(bo->rq) && !is_uncached(sna, bo)) -+ if (flags & COPY_LAST) - return true; - -- return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); -+ return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); - } - - static inline bool untiled_tlb_miss(struct kgem_bo *bo) -@@ -90,57 +78,95 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) - return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; - } - --static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) -+static int prefer_blt_bo(struct sna *sna, -+ struct kgem_bo *src, -+ struct kgem_bo *dst) - { -+ assert(dst != NULL); -+ - if (PREFER_RENDER) - return PREFER_RENDER < 0; - -- if (bo->rq) -- return RQ_IS_BLT(bo->rq); -+ if (dst->rq) -+ return RQ_IS_BLT(dst->rq); - - if (sna->flags & SNA_POWERSAVE) - return true; - -- return bo->tiling == I915_TILING_NONE || is_uncached(sna, bo); --} -+ if (src) { -+ if (sna->render_state.gt > 1) -+ return false; - --inline static bool force_blt_ring(struct sna *sna) --{ -- if (sna->flags & SNA_POWERSAVE) -+ if (src->rq) -+ return RQ_IS_BLT(src->rq); -+ -+ if (src->tiling == I915_TILING_Y) -+ return false; -+ } else { -+ if (sna->render_state.gt > 2) -+ return false; -+ } -+ -+ if (sna->render_state.gt < 2) - return true; - -+ return dst->tiling == I915_TILING_NONE || is_uncached(sna, dst); -+} -+ -+inline static bool force_blt_ring(struct sna *sna, struct kgem_bo *bo) -+{ - if (sna->kgem.mode == KGEM_RENDER) - return false; - -+ if (NO_RING_SWITCH(sna)) -+ return sna->kgem.ring == KGEM_BLT; -+ -+ if (bo->tiling == I915_TILING_Y) -+ return false; -+ -+ if (sna->flags & SNA_POWERSAVE) -+ return true; -+ - if (sna->render_state.gt < 2) - return true; - - return false; - } - --inline static bool prefer_blt_ring(struct sna *sna, -- struct kgem_bo *bo, -- unsigned flags) -+nonnull inline static bool -+prefer_blt_ring(struct sna *sna, struct kgem_bo *bo, unsigned flags) - { - if (PREFER_RENDER) - return PREFER_RENDER < 0; - -- assert(!force_blt_ring(sna)); -- assert(!kgem_bo_is_render(bo)); -+ assert(!force_blt_ring(sna, bo)); -+ assert(!kgem_bo_is_render(bo) || NO_RING_SWITCH(sna)); -+ -+ if (kgem_bo_is_blt(bo)) -+ return true; - - return can_switch_to_blt(sna, bo, flags); - } - --inline static bool prefer_render_ring(struct sna *sna, -- struct kgem_bo *bo) -+nonnull inline static bool -+prefer_render_ring(struct sna *sna, struct kgem_bo *bo) - { -+ if (sna->kgem.ring == KGEM_RENDER) -+ return true; -+ -+ if (sna->kgem.ring != KGEM_NONE && NO_RING_SWITCH(sna)) -+ return false; -+ -+ if (kgem_bo_is_render(bo)) -+ return true; -+ - if (sna->flags & SNA_POWERSAVE) - return false; - -- if (sna->render_state.gt < 2) -- return false; -+ if (!prefer_blt_bo(sna, NULL, bo)) -+ return true; - -- return can_switch_to_render(sna, bo); -+ return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); - } - - inline static bool -@@ -153,25 +179,20 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) - untiled_tlb_miss(tmp->src.bo)) - return true; - -- if (force_blt_ring(sna)) -+ if (force_blt_ring(sna, tmp->dst.bo)) - return true; - -- if (kgem_bo_is_render(tmp->dst.bo) || -- kgem_bo_is_render(tmp->src.bo)) -- return false; -- - if (prefer_render_ring(sna, tmp->dst.bo)) - return false; - - if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) - return false; - -- return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo); -+ return prefer_blt_bo(sna, tmp->src.bo, tmp->dst.bo); - } - --static inline bool prefer_blt_fill(struct sna *sna, -- struct kgem_bo *bo, -- unsigned flags) -+nonnull static inline bool -+prefer_blt_fill(struct sna *sna, struct kgem_bo *bo, unsigned flags) - { - if (PREFER_RENDER) - return PREFER_RENDER < 0; -@@ -179,24 +200,21 @@ static inline bool prefer_blt_fill(struct sna *sna, - if (untiled_tlb_miss(bo)) - return true; - -- if (force_blt_ring(sna)) -+ if (force_blt_ring(sna, bo)) - return true; - - if ((flags & (FILL_POINTS | FILL_SPANS)) == 0) { -- if (kgem_bo_is_render(bo)) -- return false; -- - if (prefer_render_ring(sna, bo)) - return false; - - if (!prefer_blt_ring(sna, bo, 0)) - return false; - } else { -- if (can_switch_to_blt(sna, bo, 0)) -+ if (can_switch_to_blt(sna, bo, COPY_LAST)) - return true; - } - -- return prefer_blt_bo(sna, bo); -+ return prefer_blt_bo(sna, NULL, bo); - } - - void gen6_render_context_switch(struct kgem *kgem, int new_mode); -diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c -index 25044685..6b69f216 100644 ---- a/src/sna/gen6_render.c -+++ b/src/sna/gen6_render.c -@@ -1633,9 +1633,9 @@ gen6_render_video(struct sna *sna, - int src_height = frame->src.y2 - frame->src.y1; - float src_offset_x, src_offset_y; - float src_scale_x, src_scale_y; -- int nbox, pix_xoff, pix_yoff; - unsigned filter; - const BoxRec *box; -+ int nbox; - - DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", - __FUNCTION__, -@@ -1686,17 +1686,6 @@ gen6_render_video(struct sna *sna, - gen6_align_vertex(sna, &tmp); - gen6_emit_video_state(sna, &tmp); - -- /* Set up the offset for translating from the given region (in screen -- * coordinates) to the backing pixmap. -- */ --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif -- - src_scale_x = (float)src_width / dst_width / frame->width; - src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; - -@@ -1706,35 +1695,27 @@ gen6_render_video(struct sna *sna, - box = region_rects(dstRegion); - nbox = region_num_rects(dstRegion); - while (nbox--) { -- BoxRec r; -- -- r.x1 = box->x1 + pix_xoff; -- r.x2 = box->x2 + pix_xoff; -- r.y1 = box->y1 + pix_yoff; -- r.y2 = box->y2 + pix_yoff; -- - gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); - -- OUT_VERTEX(r.x2, r.y2); -+ OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y2); -+ OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y1); -+ OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - -- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -- sna_damage_add_box(&priv->gpu_damage, &r); -- sna_damage_subtract_box(&priv->cpu_damage, &r); -- } - box++; - } -- - gen4_vertex_flush(sna); -+ -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ - return true; - } - -@@ -1815,12 +1796,12 @@ gen6_composite_picture(struct sna *sna, - if (channel->repeat && - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen4_channel_init_solid(sna, channel, priv->clear_color); -+ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); - } - } - } else -@@ -1927,7 +1908,9 @@ gen6_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -1965,46 +1948,77 @@ gen6_composite_set_target(struct sna *sna, - - static bool - try_blt(struct sna *sna, -- PicturePtr dst, PicturePtr src, -- int width, int height) -+ uint8_t op, -+ PicturePtr src, -+ PicturePtr mask, -+ PicturePtr dst, -+ int16_t src_x, int16_t src_y, -+ int16_t msk_x, int16_t msk_y, -+ int16_t dst_x, int16_t dst_y, -+ int16_t width, int16_t height, -+ unsigned flags, -+ struct sna_composite_op *tmp) - { - struct kgem_bo *bo; - - if (sna->kgem.mode == KGEM_BLT) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); -- return true; -+ goto execute; - } - - if (too_large(width, height)) { - DBG(("%s: operation too large for 3D pipe (%d, %d)\n", - __FUNCTION__, width, height)); -- return true; -+ goto execute; - } - - bo = __sna_drawable_peek_bo(dst->pDrawable); - if (bo == NULL) -- return true; -- if (bo->rq) -- return RQ_IS_BLT(bo->rq); -+ goto execute; -+ -+ if (untiled_tlb_miss(bo)) -+ goto execute; -+ -+ if (bo->rq) { -+ if (RQ_IS_BLT(bo->rq)) -+ goto execute; -+ -+ return false; -+ } -+ -+ if (bo->tiling == I915_TILING_Y) -+ goto upload; -+ -+ if (src->pDrawable == dst->pDrawable && -+ can_switch_to_blt(sna, bo, 0)) -+ goto execute; - - if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) -- return true; -+ goto execute; - - if (src->pDrawable) { -- bo = __sna_drawable_peek_bo(src->pDrawable); -- if (bo == NULL) -- return true; -+ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); -+ if (s == NULL) -+ goto execute; - -- if (prefer_blt_bo(sna, bo)) -- return true; -+ if (prefer_blt_bo(sna, s, bo)) -+ goto execute; - } - - if (sna->kgem.ring == KGEM_BLT) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); -- return true; -+ goto execute; - } - -- return false; -+upload: -+ flags |= COMPOSITE_UPLOAD; -+execute: -+ return sna_blt_composite(sna, op, -+ src, dst, -+ src_x, src_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp); - } - - static bool -@@ -2234,13 +2248,13 @@ gen6_render_composite(struct sna *sna, - width, height, sna->kgem.ring)); - - if (mask == NULL && -- try_blt(sna, dst, src, width, height) && -- sna_blt_composite(sna, op, -- src, dst, -- src_x, src_y, -- dst_x, dst_y, -- width, height, -- flags, tmp)) -+ try_blt(sna, op, -+ src, mask, dst, -+ src_x, src_y, -+ msk_x, msk_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp)) - return true; - - if (gen6_composite_fallback(sna, src, mask, dst)) -@@ -2676,27 +2690,35 @@ static inline bool prefer_blt_copy(struct sna *sna, - if (sna->kgem.ring == KGEM_BLT) - return true; - -- if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) -+ if (flags & COPY_DRI && !sna->kgem.has_semaphores) -+ return false; -+ -+ if ((flags & COPY_SMALL || src_bo == dst_bo) && -+ can_switch_to_blt(sna, dst_bo, flags)) - return true; - - if (untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)) - return true; - -- if (force_blt_ring(sna)) -+ if (force_blt_ring(sna, dst_bo)) - return true; - - if (kgem_bo_is_render(dst_bo) || - kgem_bo_is_render(src_bo)) - return false; - -+ if (flags & COPY_LAST && -+ can_switch_to_blt(sna, dst_bo, flags)) -+ return true; -+ - if (prefer_render_ring(sna, dst_bo)) - return false; - - if (!prefer_blt_ring(sna, dst_bo, flags)) - return false; - -- return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); -+ return prefer_blt_bo(sna, src_bo, dst_bo); - } - - static bool -@@ -2758,8 +2780,7 @@ fallback_blt: - assert(src->depth == dst->depth); - assert(src->width == dst->width); - assert(src->height == dst->height); -- return sna_render_copy_boxes__overlap(sna, alu, -- src, src_bo, -+ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, - src_dx, src_dy, - dst_dx, dst_dy, - box, n, &extents); -diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c -index 2ecfd641..aabb8693 100644 ---- a/src/sna/gen7_render.c -+++ b/src/sna/gen7_render.c -@@ -60,8 +60,6 @@ - #define NO_FILL_ONE 0 - #define NO_FILL_CLEAR 0 - --#define NO_RING_SWITCH 0 -- - #define USE_8_PIXEL_DISPATCH 1 - #define USE_16_PIXEL_DISPATCH 1 - #define USE_32_PIXEL_DISPATCH 0 -@@ -149,7 +147,7 @@ static const struct gt_info hsw_gt1_info = { - .max_vs_threads = 70, - .max_gs_threads = 70, - .max_wm_threads = -- (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | -+ (70 - 1) << HSW_PS_MAX_THREADS_SHIFT | - 1 << HSW_PS_SAMPLE_MASK_SHIFT, - .urb = { 128, 640, 256, 8 }, - .gt = 1, -@@ -209,6 +207,12 @@ static const uint32_t ps_kernel_planar[][4] = { - #include "exa_wm_write.g7b" - }; - -+static const uint32_t ps_kernel_rgb[][4] = { -+#include "exa_wm_src_affine.g7b" -+#include "exa_wm_src_sample_argb.g7b" -+#include "exa_wm_write.g7b" -+}; -+ - #define KERNEL(kernel_enum, kernel, num_surfaces) \ - [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} - #define NOKERNEL(kernel_enum, func, num_surfaces) \ -@@ -218,7 +222,7 @@ static const struct wm_kernel_info { - const void *data; - unsigned int size; - int num_surfaces; --} wm_kernels[] = { -+} wm_kernels[GEN7_WM_KERNEL_COUNT] = { - NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), - NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), - -@@ -236,6 +240,7 @@ static const struct wm_kernel_info { - - KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), - KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), -+ KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), - }; - #undef KERNEL - -@@ -810,7 +815,7 @@ gen7_emit_cc(struct sna *sna, uint32_t blend_offset) - - DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); - -- /* XXX can have upto 8 blend states preload, selectable via -+ /* XXX can have up to 8 blend states preload, selectable via - * Render Target Index. What other side-effects of Render Target Index? - */ - -@@ -1792,7 +1797,9 @@ static void gen7_emit_video_state(struct sna *sna, - frame->pitch[0]; - n_src = 6; - } else { -- if (frame->id == FOURCC_UYVY) -+ if (frame->id == FOURCC_RGB888) -+ src_surf_format = GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; -+ else if (frame->id == FOURCC_UYVY) - src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; - else - src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; -@@ -1826,6 +1833,23 @@ static void gen7_emit_video_state(struct sna *sna, - gen7_emit_state(sna, op, offset | dirty); - } - -+static unsigned select_video_kernel(const struct sna_video_frame *frame) -+{ -+ switch (frame->id) { -+ case FOURCC_YV12: -+ case FOURCC_I420: -+ case FOURCC_XVMC: -+ return GEN7_WM_KERNEL_VIDEO_PLANAR; -+ -+ case FOURCC_RGB888: -+ case FOURCC_RGB565: -+ return GEN7_WM_KERNEL_VIDEO_RGB; -+ -+ default: -+ return GEN7_WM_KERNEL_VIDEO_PACKED; -+ } -+} -+ - static bool - gen7_render_video(struct sna *sna, - struct sna_video *video, -@@ -1841,9 +1865,9 @@ gen7_render_video(struct sna *sna, - int src_height = frame->src.y2 - frame->src.y1; - float src_offset_x, src_offset_y; - float src_scale_x, src_scale_y; -- int nbox, pix_xoff, pix_yoff; - unsigned filter; - const BoxRec *box; -+ int nbox; - - DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", - __FUNCTION__, -@@ -1878,9 +1902,7 @@ gen7_render_video(struct sna *sna, - GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, - SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), - NO_BLEND, -- is_planar_fourcc(frame->id) ? -- GEN7_WM_KERNEL_VIDEO_PLANAR : -- GEN7_WM_KERNEL_VIDEO_PACKED, -+ select_video_kernel(frame), - 2); - tmp.priv = frame; - -@@ -1896,17 +1918,6 @@ gen7_render_video(struct sna *sna, - gen7_align_vertex(sna, &tmp); - gen7_emit_video_state(sna, &tmp); - -- /* Set up the offset for translating from the given region (in screen -- * coordinates) to the backing pixmap. -- */ --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif -- - DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", - __FUNCTION__, - frame->src.x1, frame->src.y1, -@@ -1928,45 +1939,36 @@ gen7_render_video(struct sna *sna, - box = region_rects(dstRegion); - nbox = region_num_rects(dstRegion); - while (nbox--) { -- BoxRec r; -- -- DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", -+ DBG(("%s: dst=(%d, %d), (%d, %d); src=(%f, %f), (%f, %f)\n", - __FUNCTION__, - box->x1, box->y1, - box->x2, box->y2, -- pix_xoff, pix_yoff, - box->x1 * src_scale_x + src_offset_x, - box->y1 * src_scale_y + src_offset_y, - box->x2 * src_scale_x + src_offset_x, - box->y2 * src_scale_y + src_offset_y)); - -- r.x1 = box->x1 + pix_xoff; -- r.x2 = box->x2 + pix_xoff; -- r.y1 = box->y1 + pix_yoff; -- r.y2 = box->y2 + pix_yoff; -- - gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); - -- OUT_VERTEX(r.x2, r.y2); -+ OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y2); -+ OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y1); -+ OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - -- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -- sna_damage_add_box(&priv->gpu_damage, &r); -- sna_damage_subtract_box(&priv->cpu_damage, &r); -- } - box++; - } -- - gen4_vertex_flush(sna); -+ -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ - return true; - } - -@@ -2048,12 +2050,13 @@ gen7_composite_picture(struct sna *sna, - if (channel->repeat || - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen4_channel_init_solid(sna, channel, priv->clear_color); -+ return gen4_channel_init_solid(sna, channel, -+ solid_color(picture->format, priv->clear_color)); - } - } - } else -@@ -2147,7 +2150,9 @@ gen7_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -2185,46 +2190,78 @@ gen7_composite_set_target(struct sna *sna, - - static bool - try_blt(struct sna *sna, -- PicturePtr dst, PicturePtr src, -- int width, int height) -+ uint8_t op, -+ PicturePtr src, -+ PicturePtr mask, -+ PicturePtr dst, -+ int16_t src_x, int16_t src_y, -+ int16_t msk_x, int16_t msk_y, -+ int16_t dst_x, int16_t dst_y, -+ int16_t width, int16_t height, -+ unsigned flags, -+ struct sna_composite_op *tmp) - { - struct kgem_bo *bo; - - if (sna->kgem.mode == KGEM_BLT) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); -- return true; -+ goto execute; - } - - if (too_large(width, height)) { - DBG(("%s: operation too large for 3D pipe (%d, %d)\n", - __FUNCTION__, width, height)); -- return true; -+ goto execute; - } - - bo = __sna_drawable_peek_bo(dst->pDrawable); - if (bo == NULL) -- return true; -- if (bo->rq) -- return RQ_IS_BLT(bo->rq); -+ goto execute; -+ -+ if (untiled_tlb_miss(bo)) -+ goto execute; -+ -+ if (bo->rq) { -+ if (RQ_IS_BLT(bo->rq)) -+ goto execute; -+ -+ return false; -+ } -+ -+ if (bo->tiling == I915_TILING_Y) -+ goto upload; -+ -+ if (src->pDrawable == dst->pDrawable && -+ (sna->render_state.gt < 3 || width*height < 1024) && -+ can_switch_to_blt(sna, bo, 0)) -+ goto execute; - - if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) -- return true; -+ goto execute; - - if (src->pDrawable) { -- bo = __sna_drawable_peek_bo(src->pDrawable); -- if (bo == NULL) -- return true; -+ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); -+ if (s == NULL) -+ goto upload; - -- if (prefer_blt_bo(sna, bo)) -- return true; -+ if (prefer_blt_bo(sna, s, bo)) -+ goto execute; - } - - if (sna->kgem.ring == KGEM_BLT) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); -- return true; -+ goto execute; - } - -- return false; -+upload: -+ flags |= COMPOSITE_UPLOAD; -+execute: -+ return sna_blt_composite(sna, op, -+ src, dst, -+ src_x, src_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp); - } - - static bool -@@ -2454,13 +2491,13 @@ gen7_render_composite(struct sna *sna, - width, height, sna->kgem.mode, sna->kgem.ring)); - - if (mask == NULL && -- try_blt(sna, dst, src, width, height) && -- sna_blt_composite(sna, op, -- src, dst, -- src_x, src_y, -- dst_x, dst_y, -- width, height, -- flags, tmp)) -+ try_blt(sna, op, -+ src, mask, dst, -+ src_x, src_y, -+ msk_x, msk_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp)) - return true; - - if (gen7_composite_fallback(sna, src, mask, dst)) -@@ -2878,27 +2915,37 @@ prefer_blt_copy(struct sna *sna, - - assert((flags & COPY_SYNC) == 0); - -- if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) -- return true; -- - if (untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)) - return true; - -- if (force_blt_ring(sna)) -+ if (flags & COPY_DRI && !sna->kgem.has_semaphores) -+ return false; -+ -+ if (force_blt_ring(sna, dst_bo)) -+ return true; -+ -+ if ((flags & COPY_SMALL || -+ (sna->render_state.gt < 3 && src_bo == dst_bo)) && -+ can_switch_to_blt(sna, dst_bo, flags)) - return true; - - if (kgem_bo_is_render(dst_bo) || - kgem_bo_is_render(src_bo)) - return false; - -+ if (flags & COPY_LAST && -+ sna->render_state.gt < 3 && -+ can_switch_to_blt(sna, dst_bo, flags)) -+ return true; -+ - if (prefer_render_ring(sna, dst_bo)) - return false; - - if (!prefer_blt_ring(sna, dst_bo, flags)) - return false; - -- return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); -+ return prefer_blt_bo(sna, src_bo, dst_bo); - } - - static bool -@@ -2946,7 +2993,7 @@ fallback_blt: - &extents)) { - bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); - -- if ((big || can_switch_to_blt(sna, dst_bo, flags)) && -+ if ((big || !prefer_render_ring(sna, dst_bo)) && - sna_blt_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, -@@ -2961,8 +3008,7 @@ fallback_blt: - assert(src->depth == dst->depth); - assert(src->width == dst->width); - assert(src->height == dst->height); -- return sna_render_copy_boxes__overlap(sna, alu, -- src, src_bo, -+ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, - src_dx, src_dy, - dst_dx, dst_dy, - box, n, &extents); -diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c -index 6eb11452..445983b1 100644 ---- a/src/sna/gen8_render.c -+++ b/src/sna/gen8_render.c -@@ -106,6 +106,12 @@ static const uint32_t ps_kernel_planar[][4] = { - #include "exa_wm_yuv_rgb.g8b" - #include "exa_wm_write.g8b" - }; -+ -+static const uint32_t ps_kernel_rgb[][4] = { -+#include "exa_wm_src_affine.g8b" -+#include "exa_wm_src_sample_argb.g8b" -+#include "exa_wm_write.g8b" -+}; - #endif - - #define SURFACE_DW (64 / sizeof(uint32_t)); -@@ -119,7 +125,7 @@ static const struct wm_kernel_info { - const void *data; - unsigned int size; - int num_surfaces; --} wm_kernels[] = { -+} wm_kernels[GEN8_WM_KERNEL_COUNT] = { - NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), - NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), - -@@ -138,6 +144,7 @@ static const struct wm_kernel_info { - #if !NO_VIDEO - KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), - KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), -+ KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), - #endif - }; - #undef KERNEL -@@ -205,6 +212,33 @@ static const struct blendinfo { - #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) - #define OUT_VERTEX_F(v) vertex_emit(sna, v) - -+struct gt_info { -+ const char *name; -+ struct { -+ int max_vs_entries; -+ } urb; -+}; -+ -+static const struct gt_info bdw_gt_info = { -+ .name = "Broadwell (gen8)", -+ .urb = { .max_vs_entries = 960 }, -+}; -+ -+static bool is_bdw(struct sna *sna) -+{ -+ return sna->kgem.gen == 0100; -+} -+ -+static const struct gt_info chv_gt_info = { -+ .name = "Cherryview (gen8)", -+ .urb = { .max_vs_entries = 640 }, -+}; -+ -+static bool is_chv(struct sna *sna) -+{ -+ return sna->kgem.gen == 0101; -+} -+ - static inline bool too_large(int width, int height) - { - return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; -@@ -462,7 +496,7 @@ gen8_emit_urb(struct sna *sna) - { - /* num of VS entries must be divisible by 8 if size < 9 */ - OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); -- OUT_BATCH(960 << URB_ENTRY_NUMBER_SHIFT | -+ OUT_BATCH(sna->render_state.gen8.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | - (2 - 1) << URB_ENTRY_SIZE_SHIFT | - 4 << URB_STARTING_ADDRESS_SHIFT); - -@@ -873,7 +907,7 @@ gen8_emit_cc(struct sna *sna, uint32_t blend) - assert(blend / GEN8_BLENDFACTOR_COUNT > 0); - assert(blend % GEN8_BLENDFACTOR_COUNT > 0); - -- /* XXX can have upto 8 blend states preload, selectable via -+ /* XXX can have up to 8 blend states preload, selectable via - * Render Target Index. What other side-effects of Render Target Index? - */ - -@@ -1167,6 +1201,7 @@ gen8_emit_pipe_stall(struct sna *sna) - { - OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); - OUT_BATCH(PIPE_CONTROL_CS_STALL | -+ PIPE_CONTROL_FLUSH | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - OUT_BATCH64(0); - OUT_BATCH64(0); -@@ -1876,12 +1911,12 @@ gen8_composite_picture(struct sna *sna, - if (channel->repeat || - (x >= 0 && - y >= 0 && -- x + w < pixmap->drawable.width && -- y + h < pixmap->drawable.height)) { -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv && priv->clear) { - DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -- return gen4_channel_init_solid(sna, channel, priv->clear_color); -+ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); - } - } - } else -@@ -1961,7 +1996,9 @@ gen8_composite_set_target(struct sna *sna, - } else - sna_render_picture_extents(dst, &box); - -- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; - if (!partial) { - hint |= IGNORE_DAMAGE; - if (w == op->dst.width && h == op->dst.height) -@@ -2002,46 +2039,78 @@ gen8_composite_set_target(struct sna *sna, - - static bool - try_blt(struct sna *sna, -- PicturePtr dst, PicturePtr src, -- int width, int height) -+ uint8_t op, -+ PicturePtr src, -+ PicturePtr mask, -+ PicturePtr dst, -+ int16_t src_x, int16_t src_y, -+ int16_t msk_x, int16_t msk_y, -+ int16_t dst_x, int16_t dst_y, -+ int16_t width, int16_t height, -+ unsigned flags, -+ struct sna_composite_op *tmp) - { - struct kgem_bo *bo; - - if (sna->kgem.mode == KGEM_BLT) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); -- return true; -+ goto execute; - } - - if (too_large(width, height)) { - DBG(("%s: operation too large for 3D pipe (%d, %d)\n", - __FUNCTION__, width, height)); -- return true; -+ goto execute; - } - - bo = __sna_drawable_peek_bo(dst->pDrawable); - if (bo == NULL) -- return true; -- if (bo->rq) -- return RQ_IS_BLT(bo->rq); -+ goto execute; -+ -+ if (untiled_tlb_miss(bo)) -+ goto execute; -+ -+ if (bo->rq) { -+ if (RQ_IS_BLT(bo->rq)) -+ goto execute; -+ -+ return false; -+ } -+ -+ if (bo->tiling == I915_TILING_Y) -+ goto upload; - - if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) -- return true; -+ goto execute; -+ -+ if (src->pDrawable == dst->pDrawable && -+ (sna->render_state.gt < 3 || width*height < 1024) && -+ can_switch_to_blt(sna, bo, 0)) -+ goto execute; - - if (src->pDrawable) { -- bo = __sna_drawable_peek_bo(src->pDrawable); -- if (bo == NULL) -- return true; -+ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); -+ if (s == NULL) -+ goto upload; - -- if (prefer_blt_bo(sna, bo)) -- return RQ_IS_BLT(bo->rq); -+ if (prefer_blt_bo(sna, s, bo)) -+ goto execute; - } - - if (sna->kgem.ring == KGEM_BLT) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); -- return true; -+ goto execute; - } - -- return false; -+upload: -+ flags |= COMPOSITE_UPLOAD; -+execute: -+ return sna_blt_composite(sna, op, -+ src, dst, -+ src_x, src_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp); - } - - static bool -@@ -2271,13 +2340,13 @@ gen8_render_composite(struct sna *sna, - width, height, sna->kgem.mode, sna->kgem.ring)); - - if (mask == NULL && -- try_blt(sna, dst, src, width, height) && -- sna_blt_composite(sna, op, -- src, dst, -- src_x, src_y, -- dst_x, dst_y, -- width, height, -- flags, tmp)) -+ try_blt(sna, op, -+ src, mask, dst, -+ src_x, src_y, -+ msk_x, msk_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp)) - return true; - - if (gen8_composite_fallback(sna, src, mask, dst)) -@@ -2700,27 +2769,37 @@ prefer_blt_copy(struct sna *sna, - - assert((flags & COPY_SYNC) == 0); - -- if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) -- return true; -- - if (untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)) - return true; - -- if (force_blt_ring(sna)) -+ if (flags & COPY_DRI && !sna->kgem.has_semaphores) -+ return false; -+ -+ if (force_blt_ring(sna, dst_bo)) -+ return true; -+ -+ if ((flags & COPY_SMALL || -+ (sna->render_state.gt < 3 && src_bo == dst_bo)) && -+ can_switch_to_blt(sna, dst_bo, flags)) - return true; - - if (kgem_bo_is_render(dst_bo) || - kgem_bo_is_render(src_bo)) - return false; - -+ if (flags & COPY_LAST && -+ sna->render_state.gt < 3 && -+ can_switch_to_blt(sna, dst_bo, flags)) -+ return true; -+ - if (prefer_render_ring(sna, dst_bo)) - return false; - - if (!prefer_blt_ring(sna, dst_bo, flags)) - return false; - -- return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); -+ return prefer_blt_bo(sna, src_bo, dst_bo); - } - - static bool -@@ -2770,7 +2849,7 @@ fallback_blt: - &extents)) { - bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); - -- if ((big || can_switch_to_blt(sna, dst_bo, flags)) && -+ if ((big || !prefer_render_ring(sna, dst_bo)) && - sna_blt_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, -@@ -2785,8 +2864,7 @@ fallback_blt: - assert(src->depth == dst->depth); - assert(src->width == dst->width); - assert(src->height == dst->height); -- return sna_render_copy_boxes__overlap(sna, alu, -- src, src_bo, -+ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, - src_dx, src_dy, - dst_dx, dst_dy, - box, n, &extents); -@@ -3665,7 +3743,9 @@ static void gen8_emit_video_state(struct sna *sna, - frame->pitch[0]; - n_src = 6; - } else { -- if (frame->id == FOURCC_UYVY) -+ if (frame->id == FOURCC_RGB888) -+ src_surf_format = SURFACEFORMAT_B8G8R8X8_UNORM; -+ else if (frame->id == FOURCC_UYVY) - src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; - else - src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; -@@ -3697,6 +3777,23 @@ static void gen8_emit_video_state(struct sna *sna, - gen8_emit_state(sna, op, offset); - } - -+static unsigned select_video_kernel(const struct sna_video_frame *frame) -+{ -+ switch (frame->id) { -+ case FOURCC_YV12: -+ case FOURCC_I420: -+ case FOURCC_XVMC: -+ return GEN8_WM_KERNEL_VIDEO_PLANAR; -+ -+ case FOURCC_RGB888: -+ case FOURCC_RGB565: -+ return GEN8_WM_KERNEL_VIDEO_RGB; -+ -+ default: -+ return GEN8_WM_KERNEL_VIDEO_PACKED; -+ } -+} -+ - static bool - gen8_render_video(struct sna *sna, - struct sna_video *video, -@@ -3712,9 +3809,9 @@ gen8_render_video(struct sna *sna, - int src_height = frame->src.y2 - frame->src.y1; - float src_offset_x, src_offset_y; - float src_scale_x, src_scale_y; -- int nbox, pix_xoff, pix_yoff; - unsigned filter; - const BoxRec *box; -+ int nbox; - - DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", - __FUNCTION__, -@@ -3743,6 +3840,11 @@ gen8_render_video(struct sna *sna, - tmp.floats_per_vertex = 3; - tmp.floats_per_rect = 9; - -+ DBG(("%s: scaling?=%d, planar?=%d [%x]\n", -+ __FUNCTION__, -+ src_width != dst_width || src_height != dst_height, -+ is_planar_fourcc(frame->id), frame->id)); -+ - if (src_width == dst_width && src_height == dst_height) - filter = SAMPLER_FILTER_NEAREST; - else -@@ -3752,9 +3854,7 @@ gen8_render_video(struct sna *sna, - GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, - SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), - NO_BLEND, -- is_planar_fourcc(frame->id) ? -- GEN8_WM_KERNEL_VIDEO_PLANAR : -- GEN8_WM_KERNEL_VIDEO_PACKED, -+ select_video_kernel(frame), - 2); - tmp.priv = frame; - -@@ -3770,17 +3870,6 @@ gen8_render_video(struct sna *sna, - gen8_align_vertex(sna, &tmp); - gen8_emit_video_state(sna, &tmp); - -- /* Set up the offset for translating from the given region (in screen -- * coordinates) to the backing pixmap. -- */ --#ifdef COMPOSITE -- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; -- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; --#else -- pix_xoff = 0; -- pix_yoff = 0; --#endif -- - DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", - __FUNCTION__, - frame->src.x1, frame->src.y1, -@@ -3802,45 +3891,36 @@ gen8_render_video(struct sna *sna, - box = region_rects(dstRegion); - nbox = region_num_rects(dstRegion); - while (nbox--) { -- BoxRec r; -- - DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", - __FUNCTION__, - box->x1, box->y1, - box->x2, box->y2, -- pix_xoff, pix_yoff, - box->x1 * src_scale_x + src_offset_x, - box->y1 * src_scale_y + src_offset_y, - box->x2 * src_scale_x + src_offset_x, - box->y2 * src_scale_y + src_offset_y)); - -- r.x1 = box->x1 + pix_xoff; -- r.x2 = box->x2 + pix_xoff; -- r.y1 = box->y1 + pix_yoff; -- r.y2 = box->y2 + pix_yoff; -- - gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); - -- OUT_VERTEX(r.x2, r.y2); -+ OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y2); -+ OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - -- OUT_VERTEX(r.x1, r.y1); -+ OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); - OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - -- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { -- sna_damage_add_box(&priv->gpu_damage, &r); -- sna_damage_subtract_box(&priv->cpu_damage, &r); -- } - box++; - } -- - gen8_vertex_flush(sna); -+ -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ - return true; - } - #endif -@@ -3896,6 +3976,13 @@ static bool gen8_render_setup(struct sna *sna) - state->gt = ((devid >> 4) & 0xf) + 1; - DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); - -+ if (is_bdw(sna)) -+ state->info = &bdw_gt_info; -+ else if (is_chv(sna)) -+ state->info = &chv_gt_info; -+ else -+ return false; -+ - sna_static_stream_init(&general); - - /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer -@@ -4007,5 +4094,5 @@ const char *gen8_render_init(struct sna *sna, const char *backend) - - sna->render.max_3d_size = GEN8_MAX_SIZE; - sna->render.max_3d_pitch = 1 << 18; -- return "Broadwell"; -+ return sna->render_state.gen8.info->name; - } -diff --git a/src/sna/gen8_render.h b/src/sna/gen8_render.h -index eb4928e7..e6a8dc55 100644 ---- a/src/sna/gen8_render.h -+++ b/src/sna/gen8_render.h -@@ -335,6 +335,7 @@ - #define PIPE_CONTROL_IS_FLUSH (1 << 11) - #define PIPE_CONTROL_TC_FLUSH (1 << 10) - #define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) -+#define PIPE_CONTROL_FLUSH (1 << 7) - #define PIPE_CONTROL_GLOBAL_GTT (1 << 2) - #define PIPE_CONTROL_LOCAL_PGTT (0 << 2) - #define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) -diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c -new file mode 100644 -index 00000000..e5f12c72 ---- /dev/null -+++ b/src/sna/gen9_render.c -@@ -0,0 +1,4156 @@ -+/* -+ * Copyright © 2012,2013 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: -+ * Chris Wilson -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include "sna.h" -+#include "sna_reg.h" -+#include "sna_render.h" -+#include "sna_render_inline.h" -+#include "sna_video.h" -+ -+#include "gen9_render.h" -+#include "gen8_eu.h" -+#include "gen4_common.h" -+#include "gen4_source.h" -+#include "gen4_vertex.h" -+#include "gen6_common.h" -+#include "gen8_vertex.h" -+ -+#define SIM 1 -+ -+#define ALWAYS_INVALIDATE 0 -+#define ALWAYS_FLUSH 0 -+#define ALWAYS_STALL 0 -+ -+#define NO_COMPOSITE 0 -+#define NO_COMPOSITE_SPANS 0 -+#define NO_COPY 0 -+#define NO_COPY_BOXES 0 -+#define NO_FILL 0 -+#define NO_FILL_BOXES 0 -+#define NO_FILL_ONE 0 -+#define NO_FILL_CLEAR 0 -+#define NO_VIDEO 0 -+ -+#define USE_8_PIXEL_DISPATCH 1 -+#define USE_16_PIXEL_DISPATCH 1 -+#define USE_32_PIXEL_DISPATCH 0 -+ -+#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH -+#error "Must select at least 8, 16 or 32 pixel dispatch" -+#endif -+ -+#define GEN9_MAX_SIZE 16384 -+#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */ -+ -+/* XXX Todo -+ * -+ * STR (software tiled rendering) mode. No, really. -+ * 64x32 pixel blocks align with the rendering cache. Worth considering. -+ */ -+ -+#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) -+ -+/* Pipeline stages: -+ * 1. Command Streamer (CS) -+ * 2. Vertex Fetch (VF) -+ * 3. Vertex Shader (VS) -+ * 4. Hull Shader (HS) -+ * 5. Tesselation Engine (TE) -+ * 6. Domain Shader (DS) -+ * 7. Geometry Shader (GS) -+ * 8. Stream Output Logic (SOL) -+ * 9. Clipper (CLIP) -+ * 10. Strip/Fan (SF) -+ * 11. Windower/Masker (WM) -+ * 12. Color Calculator (CC) -+ */ -+ -+#if !NO_VIDEO -+static const uint32_t ps_kernel_packed[][4] = { -+#include "exa_wm_src_affine.g8b" -+#include "exa_wm_src_sample_argb.g8b" -+#include "exa_wm_yuv_rgb.g8b" -+#include "exa_wm_write.g8b" -+}; -+ -+static const uint32_t ps_kernel_planar[][4] = { -+#include "exa_wm_src_affine.g8b" -+#include "exa_wm_src_sample_planar.g8b" -+#include "exa_wm_yuv_rgb.g8b" -+#include "exa_wm_write.g8b" -+}; -+ -+static const uint32_t ps_kernel_rgb[][4] = { -+#include "exa_wm_src_affine.g8b" -+#include "exa_wm_src_sample_argb.g8b" -+#include "exa_wm_write.g8b" -+}; -+#endif -+ -+#define SURFACE_DW (64 / sizeof(uint32_t)); -+ -+#define KERNEL(kernel_enum, kernel, num_surfaces) \ -+ [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} -+#define NOKERNEL(kernel_enum, func, num_surfaces) \ -+ [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} -+static const struct wm_kernel_info { -+ const char *name; -+ const void *data; -+ unsigned int size; -+ int num_surfaces; -+} wm_kernels[] = { -+ NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), -+ NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), -+ -+ NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), -+ NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), -+ -+ NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), -+ NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), -+ -+ NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), -+ NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), -+ -+ NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), -+ NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), -+ -+#if !NO_VIDEO -+ KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), -+ KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), -+ KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), -+#endif -+}; -+#undef KERNEL -+ -+static const struct blendinfo { -+ uint8_t src_alpha; -+ uint8_t src_blend; -+ uint8_t dst_blend; -+} gen9_blend_op[] = { -+ /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, -+ /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, -+ /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, -+ /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, -+ /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, -+ /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, -+ /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, -+ /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, -+ /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, -+ /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, -+ /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, -+ /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, -+ /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, -+}; -+ -+/** -+ * Highest-valued BLENDFACTOR used in gen9_blend_op. -+ * -+ * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR, -+ * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, -+ * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} -+ */ -+#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) -+ -+#define GEN9_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen9_blend_state), 64) -+ -+#define BLEND_OFFSET(s, d) \ -+ ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4) -+ -+#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) -+#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) -+ -+#define SAMPLER_OFFSET(sf, se, mf, me) \ -+ (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) -+ -+#define VERTEX_2s2s 0 -+ -+#define COPY_SAMPLER 0 -+#define COPY_VERTEX VERTEX_2s2s -+#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN9_WM_KERNEL_NOMASK, COPY_VERTEX) -+ -+#define FILL_SAMPLER 1 -+#define FILL_VERTEX VERTEX_2s2s -+#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), GEN9_WM_KERNEL_NOMASK, FILL_VERTEX) -+#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN9_WM_KERNEL_NOMASK, FILL_VERTEX) -+ -+#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff) -+#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff) -+#define GEN9_READS_DST(f) (((f) >> 15) & 1) -+#define GEN9_KERNEL(f) (((f) >> 16) & 0xf) -+#define GEN9_VERTEX(f) (((f) >> 0) & 0xf) -+#define GEN9_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) -+ -+#define OUT_BATCH(v) batch_emit(sna, v) -+#define OUT_BATCH64(v) batch_emit64(sna, v) -+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) -+#define OUT_VERTEX_F(v) vertex_emit(sna, v) -+ -+struct gt_info { -+ const char *name; -+ struct { -+ int max_vs_entries; -+ } urb; -+}; -+ -+static const struct gt_info min_gt_info = { -+ .name = "Skylake (gen9)", -+ .urb = { .max_vs_entries = 240 }, -+}; -+ -+static const struct gt_info skl_gt_info = { -+ .name = "Skylake (gen9)", -+ .urb = { .max_vs_entries = 960 }, -+}; -+ -+static const struct gt_info bxt_gt_info = { -+ .name = "Broxton (gen9)", -+ .urb = { .max_vs_entries = 320 }, -+}; -+ -+static const struct gt_info kbl_gt_info = { -+ .name = "Kabylake (gen9)", -+ .urb = { .max_vs_entries = 960 }, -+}; -+ -+static const struct gt_info glk_gt_info = { -+ .name = "Geminilake (gen9)", -+ .urb = { .max_vs_entries = 320 }, -+}; -+ -+static bool is_skl(struct sna *sna) -+{ -+ return sna->kgem.gen == 0110; -+} -+ -+static bool is_bxt(struct sna *sna) -+{ -+ return sna->kgem.gen == 0111; -+} -+ -+static bool is_kbl(struct sna *sna) -+{ -+ return sna->kgem.gen == 0112; -+} -+ -+static bool is_glk(struct sna *sna) -+{ -+ return sna->kgem.gen == 0113; -+} -+ -+ -+static inline bool too_large(int width, int height) -+{ -+ return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE; -+} -+ -+static inline bool unaligned(struct kgem_bo *bo, int bpp) -+{ -+ /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ -+#if 0 -+ int x, y; -+ -+ if (bo->proxy == NULL) -+ return false; -+ -+ /* Assume that all tiled proxies are constructed correctly. */ -+ if (bo->tiling) -+ return false; -+ -+ DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", -+ __FUNCTION__, bo->delta, bo->pitch, bpp, -+ 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); -+ -+ /* This may be a random userptr map, check that it meets the -+ * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. -+ */ -+ y = bo->delta / bo->pitch; -+ if (y & 3) -+ return true; -+ -+ x = 8 * (bo->delta - y * bo->pitch); -+ if (x & (4*bpp - 1)) -+ return true; -+ -+ return false; -+#else -+ return false; -+#endif -+} -+ -+static uint32_t gen9_get_blend(int op, -+ bool has_component_alpha, -+ uint32_t dst_format) -+{ -+ uint32_t src, dst; -+ -+ COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); -+ -+ src = gen9_blend_op[op].src_blend; -+ dst = gen9_blend_op[op].dst_blend; -+ -+ /* If there's no dst alpha channel, adjust the blend op so that -+ * we'll treat it always as 1. -+ */ -+ if (PICT_FORMAT_A(dst_format) == 0) { -+ if (src == BLENDFACTOR_DST_ALPHA) -+ src = BLENDFACTOR_ONE; -+ else if (src == BLENDFACTOR_INV_DST_ALPHA) -+ src = BLENDFACTOR_ZERO; -+ } -+ -+ /* If the source alpha is being used, then we should only be in a -+ * case where the source blend factor is 0, and the source blend -+ * value is the mask channels multiplied by the source picture's alpha. -+ */ -+ if (has_component_alpha && gen9_blend_op[op].src_alpha) { -+ if (dst == BLENDFACTOR_SRC_ALPHA) -+ dst = BLENDFACTOR_SRC_COLOR; -+ else if (dst == BLENDFACTOR_INV_SRC_ALPHA) -+ dst = BLENDFACTOR_INV_SRC_COLOR; -+ } -+ -+ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", -+ op, dst_format, PICT_FORMAT_A(dst_format), -+ src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); -+ assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); -+ return BLEND_OFFSET(src, dst); -+} -+ -+static uint32_t gen9_get_card_format(PictFormat format) -+{ -+ switch (format) { -+ default: -+ return -1; -+ case PICT_a8r8g8b8: -+ return SURFACEFORMAT_B8G8R8A8_UNORM; -+ case PICT_x8r8g8b8: -+ return SURFACEFORMAT_B8G8R8X8_UNORM; -+ case PICT_a8b8g8r8: -+ return SURFACEFORMAT_R8G8B8A8_UNORM; -+ case PICT_x8b8g8r8: -+ return SURFACEFORMAT_R8G8B8X8_UNORM; -+#ifdef PICT_a2r10g10b10 -+ case PICT_a2r10g10b10: -+ return SURFACEFORMAT_B10G10R10A2_UNORM; -+ case PICT_x2r10g10b10: -+ return SURFACEFORMAT_B10G10R10X2_UNORM; -+#endif -+ case PICT_r8g8b8: -+ return SURFACEFORMAT_R8G8B8_UNORM; -+ case PICT_r5g6b5: -+ return SURFACEFORMAT_B5G6R5_UNORM; -+ case PICT_a1r5g5b5: -+ return SURFACEFORMAT_B5G5R5A1_UNORM; -+ case PICT_a8: -+ return SURFACEFORMAT_A8_UNORM; -+ case PICT_a4r4g4b4: -+ return SURFACEFORMAT_B4G4R4A4_UNORM; -+ } -+} -+ -+static uint32_t gen9_get_dest_format(PictFormat format) -+{ -+ switch (format) { -+ default: -+ return -1; -+ case PICT_a8r8g8b8: -+ case PICT_x8r8g8b8: -+ return SURFACEFORMAT_B8G8R8A8_UNORM; -+ case PICT_a8b8g8r8: -+ case PICT_x8b8g8r8: -+ return SURFACEFORMAT_R8G8B8A8_UNORM; -+#ifdef PICT_a2r10g10b10 -+ case PICT_a2r10g10b10: -+ case PICT_x2r10g10b10: -+ return SURFACEFORMAT_B10G10R10A2_UNORM; -+#endif -+ case PICT_r5g6b5: -+ return SURFACEFORMAT_B5G6R5_UNORM; -+ case PICT_x1r5g5b5: -+ case PICT_a1r5g5b5: -+ return SURFACEFORMAT_B5G5R5A1_UNORM; -+ case PICT_a8: -+ return SURFACEFORMAT_A8_UNORM; -+ case PICT_a4r4g4b4: -+ case PICT_x4r4g4b4: -+ return SURFACEFORMAT_B4G4R4A4_UNORM; -+ } -+} -+ -+static bool gen9_check_dst_format(PictFormat format) -+{ -+ if (gen9_get_dest_format(format) != -1) -+ return true; -+ -+ DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); -+ return false; -+} -+ -+static bool gen9_check_format(uint32_t format) -+{ -+ if (gen9_get_card_format(format) != -1) -+ return true; -+ -+ DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); -+ return false; -+} -+ -+static uint32_t gen9_filter(uint32_t filter) -+{ -+ switch (filter) { -+ default: -+ assert(0); -+ case PictFilterNearest: -+ return SAMPLER_FILTER_NEAREST; -+ case PictFilterBilinear: -+ return SAMPLER_FILTER_BILINEAR; -+ } -+} -+ -+static uint32_t gen9_check_filter(PicturePtr picture) -+{ -+ switch (picture->filter) { -+ case PictFilterNearest: -+ case PictFilterBilinear: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static uint32_t gen9_repeat(uint32_t repeat) -+{ -+ switch (repeat) { -+ default: -+ assert(0); -+ case RepeatNone: -+ return SAMPLER_EXTEND_NONE; -+ case RepeatNormal: -+ return SAMPLER_EXTEND_REPEAT; -+ case RepeatPad: -+ return SAMPLER_EXTEND_PAD; -+ case RepeatReflect: -+ return SAMPLER_EXTEND_REFLECT; -+ } -+} -+ -+static bool gen9_check_repeat(PicturePtr picture) -+{ -+ if (!picture->repeat) -+ return true; -+ -+ switch (picture->repeatType) { -+ case RepeatNone: -+ case RepeatNormal: -+ case RepeatPad: -+ case RepeatReflect: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static int -+gen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) -+{ -+ int base; -+ -+ if (has_mask) { -+ if (is_ca) { -+ if (gen9_blend_op[op].src_alpha) -+ base = GEN9_WM_KERNEL_MASKSA; -+ else -+ base = GEN9_WM_KERNEL_MASKCA; -+ } else -+ base = GEN9_WM_KERNEL_MASK; -+ } else -+ base = GEN9_WM_KERNEL_NOMASK; -+ -+ return base + !is_affine; -+} -+ -+static void -+gen9_emit_push_constants(struct sna *sna) -+{ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+} -+ -+static void -+gen9_emit_urb(struct sna *sna) -+{ -+ /* num of VS entries must be divisible by 8 if size < 9 */ -+ OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2)); -+ OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | -+ (2 - 1) << URB_ENTRY_SIZE_SHIFT | -+ 4 << URB_STARTING_ADDRESS_SHIFT); -+ -+ OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2)); -+ OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | -+ 4 << URB_STARTING_ADDRESS_SHIFT); -+ -+ OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2)); -+ OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | -+ 4 << URB_STARTING_ADDRESS_SHIFT); -+ -+ OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2)); -+ OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | -+ 4 << URB_STARTING_ADDRESS_SHIFT); -+} -+ -+static void -+gen9_emit_state_base_address(struct sna *sna) -+{ -+ uint32_t num_pages; -+ -+ assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); -+ -+ /* WaBindlessSurfaceStateModifyEnable:skl,bxt */ -+ OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2)); -+ OUT_BATCH64(0); /* general */ -+ OUT_BATCH(0); /* stateless dataport */ -+ OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ -+ sna->kgem.nbatch, -+ NULL, -+ I915_GEM_DOMAIN_INSTRUCTION << 16, -+ BASE_ADDRESS_MODIFY)); -+ OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ -+ sna->kgem.nbatch, -+ sna->render_state.gen9.general_bo, -+ I915_GEM_DOMAIN_INSTRUCTION << 16, -+ BASE_ADDRESS_MODIFY)); -+ OUT_BATCH64(0); /* indirect */ -+ OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ -+ sna->kgem.nbatch, -+ sna->render_state.gen9.general_bo, -+ I915_GEM_DOMAIN_INSTRUCTION << 16, -+ BASE_ADDRESS_MODIFY)); -+ /* upper bounds */ -+ num_pages = sna->render_state.gen9.general_bo->size.pages.count; -+ OUT_BATCH(0); /* general */ -+ OUT_BATCH(num_pages << 12 | 1); /* dynamic */ -+ OUT_BATCH(0); /* indirect */ -+ OUT_BATCH(num_pages << 12 | 1); /* instruction */ -+ -+ /* Bindless */ -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+} -+ -+static void -+gen9_emit_vs_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2)); -+ OUT_BATCH64(0); /* no VS kernel */ -+ OUT_BATCH(0); -+ OUT_BATCH64(0); /* scratch */ -+ OUT_BATCH(0); -+ OUT_BATCH(1 << 1); /* pass-through */ -+ OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+} -+ -+static void -+gen9_emit_hs_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); /* no HS kernel */ -+ OUT_BATCH64(0); /* scratch */ -+ OUT_BATCH(0); -+ OUT_BATCH(0); /* pass-through */ -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ -+#if 1 -+ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+#endif -+} -+ -+static void -+gen9_emit_te_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+} -+ -+static void -+gen9_emit_ds_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2)); -+ OUT_BATCH64(0); /* no kernel */ -+ OUT_BATCH(0); -+ OUT_BATCH64(0); /* scratch */ -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ -+#if 1 -+ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+#endif -+} -+ -+static void -+gen9_emit_gs_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2)); -+ OUT_BATCH64(0); /* no GS kernel */ -+ OUT_BATCH(0); -+ OUT_BATCH64(0); /* scratch */ -+ OUT_BATCH(0); -+ OUT_BATCH(0); /* pass-through */ -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ -+#if 1 -+ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+#endif -+} -+ -+static void -+gen9_emit_sol_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+} -+ -+static void -+gen9_emit_sf_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+} -+ -+static void -+gen9_emit_clip_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); /* pass-through */ -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); -+ OUT_BATCH(0); -+} -+ -+static void -+gen9_emit_null_depth_buffer(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2)); -+#if 1 -+ OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | -+ DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); -+#else -+ OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | -+ DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); -+#endif -+ OUT_BATCH64(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH(0); -+#endif -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH(0); -+#endif -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+#endif -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+#endif -+} -+ -+static void -+gen9_emit_wm_invariant(struct sna *sna) -+{ -+ gen9_emit_null_depth_buffer(sna); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+ -+ OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2)); -+ //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ -+ OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+ -+#if 0 -+ OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+#endif -+ -+ OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2)); -+ OUT_BATCH(PSX_PIXEL_SHADER_VALID | -+ PSX_ATTRIBUTE_ENABLE); -+ -+ OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2)); -+ OUT_BATCH(RASTER_FRONT_WINDING_CCW | -+ RASTER_CULL_NONE); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+#endif -+} -+ -+static void -+gen9_emit_cc_invariant(struct sna *sna) -+{ -+} -+ -+static void -+gen9_emit_vf_invariant(struct sna *sna) -+{ -+ int n; -+ -+#if 1 -+ OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2)); -+ OUT_BATCH(0); -+#endif -+ -+ OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2)); -+ OUT_BATCH(RECTLIST); -+ -+ OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0); -+ -+ for (n = 1; n <= 3; n++) { -+ OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2)); -+ OUT_BATCH(n); -+ OUT_BATCH(0); -+ } -+} -+ -+static void -+gen9_emit_invariant(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_PIPELINE_SELECT | -+ PIPELINE_SELECTION_MASK | -+ PIPELINE_SELECT_3D); -+ -+#if SIM -+ OUT_BATCH(GEN9_STATE_SIP | (3 - 2)); -+ OUT_BATCH64(0); -+#endif -+ -+ OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2)); -+ OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | -+ MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ -+ -+ OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2)); -+ OUT_BATCH(1); -+ -+#if SIM -+ OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ //OUT_BATCH(8<<20 | 8<<16); -+ OUT_BATCH(0); -+#endif -+ -+ gen9_emit_push_constants(sna); -+ gen9_emit_urb(sna); -+ -+ gen9_emit_state_base_address(sna); -+ -+ gen9_emit_vf_invariant(sna); -+ gen9_emit_vs_invariant(sna); -+ gen9_emit_hs_invariant(sna); -+ gen9_emit_te_invariant(sna); -+ gen9_emit_ds_invariant(sna); -+ gen9_emit_gs_invariant(sna); -+ gen9_emit_sol_invariant(sna); -+ gen9_emit_clip_invariant(sna); -+ gen9_emit_sf_invariant(sna); -+ gen9_emit_wm_invariant(sna); -+ gen9_emit_cc_invariant(sna); -+ -+ sna->render_state.gen9.needs_invariant = false; -+} -+ -+static void -+gen9_emit_cc(struct sna *sna, uint32_t blend) -+{ -+ struct gen9_render_state *render = &sna->render_state.gen9; -+ -+ if (render->blend == blend) -+ return; -+ -+ DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", -+ __FUNCTION__, blend, render->blend, -+ blend / GEN9_BLENDFACTOR_COUNT, -+ blend % GEN9_BLENDFACTOR_COUNT)); -+ -+ assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT); -+ assert(blend / GEN9_BLENDFACTOR_COUNT > 0); -+ assert(blend % GEN9_BLENDFACTOR_COUNT > 0); -+ -+ /* XXX can have up to 8 blend states preload, selectable via -+ * Render Target Index. What other side-effects of Render Target Index? -+ */ -+ -+ OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2)); -+ if (blend != GEN9_BLEND(NO_BLEND)) { -+ uint32_t src = blend / GEN9_BLENDFACTOR_COUNT; -+ uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT; -+ OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | -+ PS_BLEND_COLOR_BLEND_ENABLE | -+ src << PS_BLEND_SRC_ALPHA_SHIFT | -+ dst << PS_BLEND_DST_ALPHA_SHIFT | -+ src << PS_BLEND_SRC_SHIFT | -+ dst << PS_BLEND_DST_SHIFT); -+ } else -+ OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); -+ -+ assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64)); -+ OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); -+ OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1); -+ -+ /* Force a CC_STATE pointer change to improve blend performance */ -+ OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2)); -+ OUT_BATCH(0); -+ -+ render->blend = blend; -+} -+ -+static void -+gen9_emit_sampler(struct sna *sna, uint32_t state) -+{ -+ if (sna->render_state.gen9.samplers == state) -+ return; -+ -+ sna->render_state.gen9.samplers = state; -+ -+ DBG(("%s: sampler = %x\n", __FUNCTION__, state)); -+ -+ assert(2 * sizeof(struct gen9_sampler_state) == 32); -+ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); -+ OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state)); -+} -+ -+static void -+gen9_emit_sf(struct sna *sna, bool has_mask) -+{ -+ int num_sf_outputs = has_mask ? 2 : 1; -+ -+ if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs) -+ return; -+ -+ DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); -+ -+ sna->render_state.gen9.num_sf_outputs = num_sf_outputs; -+ -+ OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2)); -+ OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | -+ SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ -+ SBE_FORCE_VERTEX_URB_READ_OFFSET | -+ 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | -+ 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 | -+ SBE_ACTIVE_COMPONENT_XYZW << 1); -+ OUT_BATCH(0); -+} -+ -+static void -+gen9_emit_wm(struct sna *sna, int kernel) -+{ -+ const uint32_t *kernels; -+ -+ assert(kernel < ARRAY_SIZE(wm_kernels)); -+ if (sna->render_state.gen9.kernel == kernel) -+ return; -+ -+ sna->render_state.gen9.kernel = kernel; -+ kernels = sna->render_state.gen9.wm_kernel[kernel]; -+ -+ DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", -+ __FUNCTION__, -+ wm_kernels[kernel].name, -+ wm_kernels[kernel].num_surfaces, -+ kernels[0], kernels[1], kernels[2])); -+ assert(is_aligned(kernels[0], 64)); -+ assert(is_aligned(kernels[1], 64)); -+ assert(is_aligned(kernels[2], 64)); -+ -+ OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2)); -+ OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); -+ OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | -+ PS_VECTOR_MASK_ENABLE | -+ wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); -+ OUT_BATCH64(0); /* scratch address */ -+ OUT_BATCH(PS_MAX_THREADS | -+ (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | -+ (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | -+ (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); -+ OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | -+ 8 << PS_DISPATCH_START_GRF_SHIFT_1 | -+ 6 << PS_DISPATCH_START_GRF_SHIFT_2); -+ OUT_BATCH64(kernels[2]); -+ OUT_BATCH64(kernels[1]); -+} -+ -+static bool -+gen9_emit_binding_table(struct sna *sna, uint16_t offset) -+{ -+ if (sna->render_state.gen9.surface_table == offset) -+ return false; -+ -+ /* Binding table pointers */ -+ assert(is_aligned(4*offset, 32)); -+ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); -+ OUT_BATCH(offset*4); -+ -+ sna->render_state.gen9.surface_table = offset; -+ return true; -+} -+ -+static bool -+gen9_emit_drawing_rectangle(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); -+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; -+ -+ assert(!too_large(abs(op->dst.x), abs(op->dst.y))); -+ assert(!too_large(op->dst.width, op->dst.height)); -+ -+ if (sna->render_state.gen9.drawrect_limit == limit && -+ sna->render_state.gen9.drawrect_offset == offset) -+ return true; -+ -+ sna->render_state.gen9.drawrect_offset = offset; -+ sna->render_state.gen9.drawrect_limit = limit; -+ -+ OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); -+ OUT_BATCH(0); -+ OUT_BATCH(limit); -+ OUT_BATCH(offset); -+ return false; -+} -+ -+static void -+gen9_emit_vertex_elements(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ /* -+ * vertex data in vertex buffer -+ * position: (x, y) -+ * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) -+ * texture coordinate 1 if (has_mask is true): same as above -+ */ -+ struct gen9_render_state *render = &sna->render_state.gen9; -+ uint32_t src_format, dw; -+ int id = GEN9_VERTEX(op->u.gen9.flags); -+ bool has_mask; -+ -+ DBG(("%s: setup id=%d\n", __FUNCTION__, id)); -+ -+ if (render->ve_id == id) -+ return; -+ render->ve_id = id; -+ -+ if (render->ve_dirty) { -+ /* dummy primitive to flush vertex before change? */ -+ OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); -+ OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(1); /* single instance */ -+ OUT_BATCH(0); /* start instance location */ -+ OUT_BATCH(0); /* index buffer offset, ignored */ -+ } -+ -+ /* The VUE layout -+ * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) -+ * dword 4-7: position (x, y, 1.0, 1.0), -+ * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) -+ * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) -+ * -+ * dword 4-15 are fetched from vertex buffer -+ */ -+ has_mask = (id >> 2) != 0; -+ OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS | -+ ((2 * (3 + has_mask)) + 1 - 2)); -+ -+ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | -+ SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | -+ 0 << VE_OFFSET_SHIFT); -+ OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | -+ COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | -+ COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | -+ COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); -+ -+ /* x,y */ -+ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | -+ SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | -+ 0 << VE_OFFSET_SHIFT); -+ OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | -+ COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | -+ COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | -+ COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); -+ -+ /* u0, v0, w0 */ -+ DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); -+ dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; -+ switch (id & 3) { -+ default: -+ assert(0); -+ case 0: -+ src_format = SURFACEFORMAT_R16G16_SSCALED; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; -+ break; -+ case 1: -+ src_format = SURFACEFORMAT_R32_FLOAT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; -+ break; -+ case 2: -+ src_format = SURFACEFORMAT_R32G32_FLOAT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; -+ break; -+ case 3: -+ src_format = SURFACEFORMAT_R32G32B32_FLOAT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; -+ break; -+ } -+ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | -+ src_format << VE_FORMAT_SHIFT | -+ 4 << VE_OFFSET_SHIFT); -+ OUT_BATCH(dw); -+ -+ /* u1, v1, w1 */ -+ if (has_mask) { -+ unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); -+ DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); -+ dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; -+ switch (id >> 2) { -+ case 1: -+ src_format = SURFACEFORMAT_R32_FLOAT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; -+ break; -+ default: -+ assert(0); -+ case 2: -+ src_format = SURFACEFORMAT_R32G32_FLOAT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; -+ break; -+ case 3: -+ src_format = SURFACEFORMAT_R32G32B32_FLOAT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; -+ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; -+ break; -+ } -+ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | -+ src_format << VE_FORMAT_SHIFT | -+ offset << VE_OFFSET_SHIFT); -+ OUT_BATCH(dw); -+ } -+ -+ render->ve_dirty = true; -+} -+ -+inline static void -+gen9_emit_pipe_invalidate(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); -+ OUT_BATCH(PIPE_CONTROL_WC_FLUSH | -+ PIPE_CONTROL_TC_FLUSH | -+ PIPE_CONTROL_CS_STALL); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+} -+ -+inline static void -+gen9_emit_pipe_flush(struct sna *sna, bool need_stall) -+{ -+ unsigned stall; -+ -+ stall = 0; -+ if (need_stall) -+ stall = (PIPE_CONTROL_CS_STALL | -+ PIPE_CONTROL_STALL_AT_SCOREBOARD); -+ -+ OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); -+ OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+} -+ -+inline static void -+gen9_emit_pipe_stall(struct sna *sna) -+{ -+ OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); -+ OUT_BATCH(PIPE_CONTROL_CS_STALL | -+ PIPE_CONTROL_FLUSH | -+ PIPE_CONTROL_STALL_AT_SCOREBOARD); -+ OUT_BATCH64(0); -+ OUT_BATCH64(0); -+} -+ -+static void -+gen9_emit_state(struct sna *sna, -+ const struct sna_composite_op *op, -+ uint16_t wm_binding_table) -+{ -+ bool need_invalidate; -+ bool need_flush; -+ bool need_stall; -+ -+ assert(op->dst.bo->exec); -+ -+ need_flush = wm_binding_table & 1 || -+ (sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags)); -+ if (ALWAYS_FLUSH) -+ need_flush = true; -+ -+ wm_binding_table &= ~1; -+ -+ need_stall = sna->render_state.gen9.surface_table != wm_binding_table; -+ -+ need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); -+ if (ALWAYS_INVALIDATE) -+ need_invalidate = true; -+ -+ need_stall &= gen9_emit_drawing_rectangle(sna, op); -+ if (ALWAYS_STALL) -+ need_stall = true; -+ -+ if (need_invalidate) { -+ gen9_emit_pipe_invalidate(sna); -+ kgem_clear_dirty(&sna->kgem); -+ assert(op->dst.bo->exec); -+ kgem_bo_mark_dirty(op->dst.bo); -+ -+ need_flush = false; -+ need_stall = false; -+ } -+ if (need_flush) { -+ gen9_emit_pipe_flush(sna, need_stall); -+ need_stall = false; -+ } -+ if (need_stall) -+ gen9_emit_pipe_stall(sna); -+ -+ gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); -+ gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags)); -+ gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2); -+ gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags)); -+ gen9_emit_vertex_elements(sna, op); -+ gen9_emit_binding_table(sna, wm_binding_table); -+ -+ sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags); -+} -+ -+static bool gen9_magic_ca_pass(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ struct gen9_render_state *state = &sna->render_state.gen9; -+ -+ if (!op->need_magic_ca_pass) -+ return false; -+ -+ DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, -+ sna->render.vertex_start, sna->render.vertex_index)); -+ -+ gen9_emit_pipe_stall(sna); -+ -+ gen9_emit_cc(sna, -+ GEN9_BLEND(gen9_get_blend(PictOpAdd, true, -+ op->dst.format))); -+ gen9_emit_wm(sna, -+ gen9_choose_composite_kernel(PictOpAdd, -+ true, true, -+ op->is_affine)); -+ -+ OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); -+ OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ -+ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); -+ OUT_BATCH(sna->render.vertex_start); -+ OUT_BATCH(1); /* single instance */ -+ OUT_BATCH(0); /* start instance location */ -+ OUT_BATCH(0); /* index buffer offset, ignored */ -+ -+ state->last_primitive = sna->kgem.nbatch; -+ state->ve_dirty = false; -+ return true; -+} -+ -+static void null_create(struct sna_static_stream *stream) -+{ -+ /* A bunch of zeros useful for legacy border color and depth-stencil */ -+ sna_static_stream_map(stream, 64, 64); -+} -+ -+static void -+sampler_state_init(struct gen9_sampler_state *sampler_state, -+ sampler_filter_t filter, -+ sampler_extend_t extend) -+{ -+ COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); -+ -+ sampler_state->ss0.lod_preclamp = 2; /* GL mode */ -+ sampler_state->ss0.default_color_mode = 1; -+ -+ switch (filter) { -+ default: -+ case SAMPLER_FILTER_NEAREST: -+ sampler_state->ss0.min_filter = MAPFILTER_NEAREST; -+ sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; -+ break; -+ case SAMPLER_FILTER_BILINEAR: -+ sampler_state->ss0.min_filter = MAPFILTER_LINEAR; -+ sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; -+ break; -+ } -+ -+ /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ -+ -+ switch (extend) { -+ default: -+ case SAMPLER_EXTEND_NONE: -+ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; -+ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; -+ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; -+ break; -+ case SAMPLER_EXTEND_REPEAT: -+ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; -+ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; -+ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; -+ break; -+ case SAMPLER_EXTEND_PAD: -+ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; -+ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; -+ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; -+ break; -+ case SAMPLER_EXTEND_REFLECT: -+ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; -+ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; -+ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; -+ break; -+ } -+} -+ -+static void -+sampler_copy_init(struct gen9_sampler_state *ss) -+{ -+ sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); -+ ss->ss3.non_normalized_coord = 1; -+ -+ sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); -+} -+ -+static void -+sampler_fill_init(struct gen9_sampler_state *ss) -+{ -+ sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); -+ ss->ss3.non_normalized_coord = 1; -+ -+ sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); -+} -+ -+static uint32_t -+gen9_tiling_bits(uint32_t tiling) -+{ -+ switch (tiling) { -+ default: assert(0); -+ case I915_TILING_NONE: return 0; -+ case I915_TILING_X: return SURFACE_TILED; -+ case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; -+ } -+} -+ -+#define MOCS_PTE (1 << 1) -+#define MOCS_WB (2 << 1) -+ -+/** -+ * Sets up the common fields for a surface state buffer for the given -+ * picture in the given surface state buffer. -+ */ -+static uint32_t -+gen9_bind_bo(struct sna *sna, -+ struct kgem_bo *bo, -+ uint32_t width, -+ uint32_t height, -+ uint32_t format, -+ bool is_dst) -+{ -+ uint32_t *ss; -+ uint32_t domains; -+ int offset; -+ uint32_t is_scanout = is_dst && bo->scanout; -+ -+ /* After the first bind, we manage the cache domains within the batch */ -+ offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); -+ if (offset) { -+ if (is_dst) -+ kgem_bo_mark_dirty(bo); -+ assert(offset >= sna->kgem.surface); -+ return offset * sizeof(uint32_t); -+ } -+ -+ offset = sna->kgem.surface -= SURFACE_DW; -+ ss = sna->kgem.batch + offset; -+ ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | -+ gen9_tiling_bits(bo->tiling) | -+ format << SURFACE_FORMAT_SHIFT | -+ SURFACE_VALIGN_4 | SURFACE_HALIGN_4); -+ if (is_dst) { -+ ss[0] |= SURFACE_RC_READ_WRITE; -+ domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; -+ } else -+ domains = I915_GEM_DOMAIN_SAMPLER << 16; -+ ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24; -+ ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | -+ (height - 1) << SURFACE_HEIGHT_SHIFT); -+ ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; -+ ss[4] = 0; -+ ss[5] = 0; -+ ss[6] = 0; -+ ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); -+ *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); -+ ss[10] = 0; -+ ss[11] = 0; -+ ss[12] = 0; -+ ss[13] = 0; -+ ss[14] = 0; -+ ss[15] = 0; -+ -+ kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); -+ -+ DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", -+ offset, bo->handle, *(uint64_t *)(ss+8), -+ format, width, height, bo->pitch, bo->tiling, -+ domains & 0xffff ? "render" : "sampler")); -+ -+ return offset * sizeof(uint32_t); -+} -+ -+static void gen9_emit_vertex_buffer(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ int id = GEN9_VERTEX(op->u.gen9.flags); -+ -+ OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2)); -+ OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | -+ 4*op->floats_per_vertex); -+ sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; -+ OUT_BATCH64(0); -+ OUT_BATCH(~0); /* buffer size: disabled */ -+ -+ sna->render.vb_id |= 1 << id; -+} -+ -+static void gen9_emit_primitive(struct sna *sna) -+{ -+ if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) { -+ sna->render.vertex_offset = sna->kgem.nbatch - 5; -+ return; -+ } -+ -+ OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); -+ OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ -+ sna->render.vertex_offset = sna->kgem.nbatch; -+ OUT_BATCH(0); /* vertex count, to be filled in later */ -+ OUT_BATCH(sna->render.vertex_index); -+ OUT_BATCH(1); /* single instance */ -+ OUT_BATCH(0); /* start instance location */ -+ OUT_BATCH(0); /* index buffer offset, ignored */ -+ sna->render.vertex_start = sna->render.vertex_index; -+ -+ sna->render_state.gen9.last_primitive = sna->kgem.nbatch; -+ sna->render_state.gen9.ve_dirty = false; -+} -+ -+static bool gen9_rectangle_begin(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ int id = 1 << GEN9_VERTEX(op->u.gen9.flags); -+ int ndwords; -+ -+ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) -+ return true; -+ -+ ndwords = op->need_magic_ca_pass ? 60 : 6; -+ if ((sna->render.vb_id & id) == 0) -+ ndwords += 5; -+ if (!kgem_check_batch(&sna->kgem, ndwords)) -+ return false; -+ -+ if ((sna->render.vb_id & id) == 0) -+ gen9_emit_vertex_buffer(sna, op); -+ -+ gen9_emit_primitive(sna); -+ return true; -+} -+ -+static int gen9_get_rectangles__flush(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ /* Preventing discarding new vbo after lock contention */ -+ if (sna_vertex_wait__locked(&sna->render)) { -+ int rem = vertex_space(sna); -+ if (rem > op->floats_per_rect) -+ return rem; -+ } -+ -+ if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) -+ return 0; -+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) -+ return 0; -+ -+ if (sna->render.vertex_offset) { -+ gen8_vertex_flush(sna); -+ if (gen9_magic_ca_pass(sna, op)) { -+ gen9_emit_pipe_invalidate(sna); -+ gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); -+ gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags)); -+ } -+ } -+ -+ return gen8_vertex_finish(sna); -+} -+ -+inline static int gen9_get_rectangles(struct sna *sna, -+ const struct sna_composite_op *op, -+ int want, -+ void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) -+{ -+ int rem; -+ -+ assert(want); -+ -+start: -+ rem = vertex_space(sna); -+ if (unlikely(rem < op->floats_per_rect)) { -+ DBG(("flushing vbo for %s: %d < %d\n", -+ __FUNCTION__, rem, op->floats_per_rect)); -+ rem = gen9_get_rectangles__flush(sna, op); -+ if (unlikely(rem == 0)) -+ goto flush; -+ } -+ -+ if (unlikely(sna->render.vertex_offset == 0)) { -+ if (!gen9_rectangle_begin(sna, op)) -+ goto flush; -+ else -+ goto start; -+ } -+ -+ assert(rem <= vertex_space(sna)); -+ assert(op->floats_per_rect <= rem); -+ if (want > 1 && want * op->floats_per_rect > rem) -+ want = rem / op->floats_per_rect; -+ -+ assert(want > 0); -+ sna->render.vertex_index += 3*want; -+ return want; -+ -+flush: -+ if (sna->render.vertex_offset) { -+ gen8_vertex_flush(sna); -+ gen9_magic_ca_pass(sna, op); -+ } -+ sna_vertex_wait__locked(&sna->render); -+ _kgem_submit(&sna->kgem); -+ emit_state(sna, op); -+ goto start; -+} -+ -+inline static uint32_t *gen9_composite_get_binding_table(struct sna *sna, -+ uint16_t *offset) -+{ -+ uint32_t *table; -+ -+ assert(sna->kgem.surface <= 16384); -+ sna->kgem.surface -= SURFACE_DW; -+ /* Clear all surplus entries to zero in case of prefetch */ -+ table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); -+ -+ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); -+ -+ *offset = sna->kgem.surface; -+ return table; -+} -+ -+static void -+gen9_get_batch(struct sna *sna, const struct sna_composite_op *op) -+{ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); -+ -+ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { -+ DBG(("%s: flushing batch: %d < %d+%d\n", -+ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, -+ 150, 4*8*2)); -+ _kgem_submit(&sna->kgem); -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ assert(sna->kgem.mode == KGEM_RENDER); -+ assert(sna->kgem.ring == KGEM_RENDER); -+ -+ if (sna->render_state.gen9.needs_invariant) -+ gen9_emit_invariant(sna); -+} -+ -+static void gen9_emit_composite_state(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ uint32_t *binding_table; -+ uint16_t offset, dirty; -+ -+ gen9_get_batch(sna, op); -+ -+ binding_table = gen9_composite_get_binding_table(sna, &offset); -+ -+ dirty = kgem_bo_is_dirty(op->dst.bo); -+ -+ binding_table[0] = -+ gen9_bind_bo(sna, -+ op->dst.bo, op->dst.width, op->dst.height, -+ gen9_get_dest_format(op->dst.format), -+ true); -+ binding_table[1] = -+ gen9_bind_bo(sna, -+ op->src.bo, op->src.width, op->src.height, -+ op->src.card_format, -+ false); -+ if (op->mask.bo) { -+ binding_table[2] = -+ gen9_bind_bo(sna, -+ op->mask.bo, -+ op->mask.width, -+ op->mask.height, -+ op->mask.card_format, -+ false); -+ } -+ -+ if (sna->kgem.surface == offset && -+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table && -+ (op->mask.bo == NULL || -+ sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) { -+ sna->kgem.surface += SURFACE_DW; -+ offset = sna->render_state.gen9.surface_table; -+ } -+ -+ if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) -+ dirty = 0; -+ -+ gen9_emit_state(sna, op, offset | dirty); -+} -+ -+static void -+gen9_align_vertex(struct sna *sna, const struct sna_composite_op *op) -+{ -+ if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) { -+ DBG(("aligning vertex: was %d, now %d floats per vertex\n", -+ sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex)); -+ gen8_vertex_align(sna, op); -+ sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex; -+ } -+} -+ -+fastcall static void -+gen9_render_composite_blt(struct sna *sna, -+ const struct sna_composite_op *op, -+ const struct sna_composite_rectangles *r) -+{ -+ gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); -+ op->prim_emit(sna, op, r); -+} -+ -+fastcall static void -+gen9_render_composite_box(struct sna *sna, -+ const struct sna_composite_op *op, -+ const BoxRec *box) -+{ -+ struct sna_composite_rectangles r; -+ -+ gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); -+ -+ DBG((" %s: (%d, %d), (%d, %d)\n", -+ __FUNCTION__, -+ box->x1, box->y1, box->x2, box->y2)); -+ -+ r.dst.x = box->x1; -+ r.dst.y = box->y1; -+ r.width = box->x2 - box->x1; -+ r.height = box->y2 - box->y1; -+ r.src = r.mask = r.dst; -+ -+ op->prim_emit(sna, op, &r); -+} -+ -+static void -+gen9_render_composite_boxes__blt(struct sna *sna, -+ const struct sna_composite_op *op, -+ const BoxRec *box, int nbox) -+{ -+ DBG(("composite_boxes(%d)\n", nbox)); -+ -+ do { -+ int nbox_this_time; -+ -+ nbox_this_time = gen9_get_rectangles(sna, op, nbox, -+ gen9_emit_composite_state); -+ nbox -= nbox_this_time; -+ -+ do { -+ struct sna_composite_rectangles r; -+ -+ DBG((" %s: (%d, %d), (%d, %d)\n", -+ __FUNCTION__, -+ box->x1, box->y1, box->x2, box->y2)); -+ -+ r.dst.x = box->x1; -+ r.dst.y = box->y1; -+ r.width = box->x2 - box->x1; -+ r.height = box->y2 - box->y1; -+ r.src = r.mask = r.dst; -+ -+ op->prim_emit(sna, op, &r); -+ box++; -+ } while (--nbox_this_time); -+ } while (nbox); -+} -+ -+static void -+gen9_render_composite_boxes(struct sna *sna, -+ const struct sna_composite_op *op, -+ const BoxRec *box, int nbox) -+{ -+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); -+ -+ do { -+ int nbox_this_time; -+ float *v; -+ -+ nbox_this_time = gen9_get_rectangles(sna, op, nbox, -+ gen9_emit_composite_state); -+ assert(nbox_this_time); -+ nbox -= nbox_this_time; -+ -+ v = sna->render.vertices + sna->render.vertex_used; -+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect; -+ -+ op->emit_boxes(op, box, nbox_this_time, v); -+ box += nbox_this_time; -+ } while (nbox); -+} -+ -+static void -+gen9_render_composite_boxes__thread(struct sna *sna, -+ const struct sna_composite_op *op, -+ const BoxRec *box, int nbox) -+{ -+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); -+ -+ sna_vertex_lock(&sna->render); -+ do { -+ int nbox_this_time; -+ float *v; -+ -+ nbox_this_time = gen9_get_rectangles(sna, op, nbox, -+ gen9_emit_composite_state); -+ assert(nbox_this_time); -+ nbox -= nbox_this_time; -+ -+ v = sna->render.vertices + sna->render.vertex_used; -+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect; -+ -+ sna_vertex_acquire__locked(&sna->render); -+ sna_vertex_unlock(&sna->render); -+ -+ op->emit_boxes(op, box, nbox_this_time, v); -+ box += nbox_this_time; -+ -+ sna_vertex_lock(&sna->render); -+ sna_vertex_release__locked(&sna->render); -+ } while (nbox); -+ sna_vertex_unlock(&sna->render); -+} -+ -+static uint32_t -+gen9_create_blend_state(struct sna_static_stream *stream) -+{ -+ char *base, *ptr; -+ int src, dst; -+ -+ COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); -+ -+ base = sna_static_stream_map(stream, -+ GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE, -+ 64); -+ -+ ptr = base; -+ for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) { -+ for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) { -+ struct gen9_blend_state *blend = -+ (struct gen9_blend_state *)ptr; -+ -+ assert(((ptr - base) & 63) == 0); -+ COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); -+ COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); -+ COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); -+ -+ blend->rt.post_blend_clamp = 1; -+ blend->rt.pre_blend_clamp = 1; -+ -+ blend->rt.color_blend = -+ !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); -+ blend->rt.dest_blend_factor = dst; -+ blend->rt.source_blend_factor = src; -+ blend->rt.color_blend_function = BLENDFUNCTION_ADD; -+ -+ blend->rt.dest_alpha_blend_factor = dst; -+ blend->rt.source_alpha_blend_factor = src; -+ blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; -+ -+ ptr += GEN9_BLEND_STATE_PADDED_SIZE; -+ } -+ } -+ -+ return sna_static_stream_offsetof(stream, base); -+} -+ -+static int -+gen9_composite_picture(struct sna *sna, -+ PicturePtr picture, -+ struct sna_composite_channel *channel, -+ int x, int y, -+ int w, int h, -+ int dst_x, int dst_y, -+ bool precise) -+{ -+ PixmapPtr pixmap; -+ uint32_t color; -+ int16_t dx, dy; -+ -+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", -+ __FUNCTION__, x, y, w, h, dst_x, dst_y)); -+ -+ channel->is_solid = false; -+ channel->card_format = -1; -+ -+ if (sna_picture_is_solid(picture, &color)) -+ return gen4_channel_init_solid(sna, channel, color); -+ -+ if (picture->pDrawable == NULL) { -+ int ret; -+ -+ if (picture->pSourcePict->type == SourcePictTypeLinear) -+ return gen4_channel_init_linear(sna, picture, channel, -+ x, y, -+ w, h, -+ dst_x, dst_y); -+ -+ DBG(("%s -- fixup, gradient\n", __FUNCTION__)); -+ ret = -1; -+ if (!precise) -+ ret = sna_render_picture_approximate_gradient(sna, picture, channel, -+ x, y, w, h, dst_x, dst_y); -+ if (ret == -1) -+ ret = sna_render_picture_fixup(sna, picture, channel, -+ x, y, w, h, dst_x, dst_y); -+ return ret; -+ } -+ -+ if (picture->alphaMap) { -+ DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); -+ return sna_render_picture_fixup(sna, picture, channel, -+ x, y, w, h, dst_x, dst_y); -+ } -+ -+ if (!gen9_check_repeat(picture)) -+ return sna_render_picture_fixup(sna, picture, channel, -+ x, y, w, h, dst_x, dst_y); -+ -+ if (!gen9_check_filter(picture)) -+ return sna_render_picture_fixup(sna, picture, channel, -+ x, y, w, h, dst_x, dst_y); -+ -+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; -+ channel->filter = picture->filter; -+ -+ pixmap = get_drawable_pixmap(picture->pDrawable); -+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); -+ -+ x += dx + picture->pDrawable->x; -+ y += dy + picture->pDrawable->y; -+ -+ channel->is_affine = sna_transform_is_affine(picture->transform); -+ if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { -+ DBG(("%s: integer translation (%d, %d), removing\n", -+ __FUNCTION__, dx, dy)); -+ x += dx; -+ y += dy; -+ channel->transform = NULL; -+ channel->filter = PictFilterNearest; -+ -+ if (channel->repeat || -+ (x >= 0 && -+ y >= 0 && -+ x + w <= pixmap->drawable.width && -+ y + h <= pixmap->drawable.height)) { -+ struct sna_pixmap *priv = sna_pixmap(pixmap); -+ if (priv && priv->clear) { -+ DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); -+ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); -+ } -+ } -+ } else -+ channel->transform = picture->transform; -+ -+ channel->pict_format = picture->format; -+ channel->card_format = gen9_get_card_format(picture->format); -+ if (channel->card_format == (unsigned)-1) -+ return sna_render_picture_convert(sna, picture, channel, pixmap, -+ x, y, w, h, dst_x, dst_y, -+ false); -+ -+ if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { -+ DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, -+ pixmap->drawable.width, pixmap->drawable.height)); -+ return sna_render_picture_extract(sna, picture, channel, -+ x, y, w, h, dst_x, dst_y); -+ } -+ -+ return sna_render_pixmap_bo(sna, channel, pixmap, -+ x, y, w, h, dst_x, dst_y); -+} -+ -+inline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel) -+{ -+ if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) -+ return false; -+ -+ channel->repeat = gen9_repeat(channel->repeat); -+ channel->filter = gen9_filter(channel->filter); -+ if (channel->card_format == (unsigned)-1) -+ channel->card_format = gen9_get_card_format(channel->pict_format); -+ assert(channel->card_format != (unsigned)-1); -+ -+ return true; -+} -+ -+static void gen9_render_composite_done(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ if (sna->render.vertex_offset) { -+ gen8_vertex_flush(sna); -+ gen9_magic_ca_pass(sna, op); -+ } -+ -+ if (op->mask.bo) -+ kgem_bo_destroy(&sna->kgem, op->mask.bo); -+ if (op->src.bo) -+ kgem_bo_destroy(&sna->kgem, op->src.bo); -+ -+ sna_render_composite_redirect_done(sna, op); -+} -+ -+inline static bool -+gen9_composite_set_target(struct sna *sna, -+ struct sna_composite_op *op, -+ PicturePtr dst, -+ int x, int y, int w, int h, -+ bool partial) -+{ -+ BoxRec box; -+ unsigned int hint; -+ -+ DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); -+ -+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); -+ op->dst.format = dst->format; -+ op->dst.width = op->dst.pixmap->drawable.width; -+ op->dst.height = op->dst.pixmap->drawable.height; -+ -+ if (w | h) { -+ assert(w && h); -+ box.x1 = x; -+ box.y1 = y; -+ box.x2 = x + w; -+ box.y2 = y + h; -+ } else -+ sna_render_picture_extents(dst, &box); -+ -+ hint = PREFER_GPU | RENDER_GPU; -+ if (!need_tiling(sna, op->dst.width, op->dst.height)) -+ hint |= FORCE_GPU; -+ if (!partial) { -+ hint |= IGNORE_DAMAGE; -+ if (w == op->dst.width && h == op->dst.height) -+ hint |= REPLACES; -+ } -+ -+ op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); -+ if (op->dst.bo == NULL) -+ return false; -+ -+ assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); -+ -+ if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) -+ return false; -+ -+ if (hint & REPLACES) { -+ struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); -+ kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); -+ } -+ -+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap, -+ &op->dst.x, &op->dst.y); -+ -+ DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", -+ __FUNCTION__, -+ op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, -+ op->dst.width, op->dst.height, -+ op->dst.bo->pitch, -+ op->dst.x, op->dst.y, -+ op->damage ? *op->damage : (void *)-1)); -+ -+ assert(op->dst.bo->proxy == NULL); -+ -+ if (too_large(op->dst.width, op->dst.height) && -+ !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) -+ return false; -+ -+ return true; -+} -+ -+static bool -+try_blt(struct sna *sna, -+ uint8_t op, -+ PicturePtr src, -+ PicturePtr mask, -+ PicturePtr dst, -+ int16_t src_x, int16_t src_y, -+ int16_t msk_x, int16_t msk_y, -+ int16_t dst_x, int16_t dst_y, -+ int16_t width, int16_t height, -+ unsigned flags, -+ struct sna_composite_op *tmp) -+{ -+ struct kgem_bo *bo; -+ -+ if (sna->kgem.mode == KGEM_BLT) { -+ DBG(("%s: already performing BLT\n", __FUNCTION__)); -+ goto execute; -+ } -+ -+ if (too_large(width, height)) { -+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n", -+ __FUNCTION__, width, height)); -+ goto execute; -+ } -+ -+ bo = __sna_drawable_peek_bo(dst->pDrawable); -+ if (bo == NULL) -+ goto execute; -+ -+ if (untiled_tlb_miss(bo)) -+ goto execute; -+ -+ if (bo->rq) { -+ if (RQ_IS_BLT(bo->rq)) -+ goto execute; -+ -+ return false; -+ } -+ -+ if (bo->tiling == I915_TILING_Y) -+ goto upload; -+ -+ if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) -+ goto execute; -+ -+ if (src->pDrawable == dst->pDrawable && -+ (sna->render_state.gt < 3 || width*height < 1024) && -+ can_switch_to_blt(sna, bo, 0)) -+ goto execute; -+ -+ if (src->pDrawable) { -+ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); -+ if (s == NULL) -+ goto upload; -+ -+ if (prefer_blt_bo(sna, s, bo)) -+ goto execute; -+ } -+ -+ if (sna->kgem.ring == KGEM_BLT) { -+ DBG(("%s: already performing BLT\n", __FUNCTION__)); -+ goto execute; -+ } -+ -+upload: -+ flags |= COMPOSITE_UPLOAD; -+execute: -+ return sna_blt_composite(sna, op, -+ src, dst, -+ src_x, src_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp); -+} -+ -+static bool -+check_gradient(PicturePtr picture, bool precise) -+{ -+ if (picture->pDrawable) -+ return false; -+ -+ switch (picture->pSourcePict->type) { -+ case SourcePictTypeSolidFill: -+ case SourcePictTypeLinear: -+ return false; -+ default: -+ return precise; -+ } -+} -+ -+static bool -+has_alphamap(PicturePtr p) -+{ -+ return p->alphaMap != NULL; -+} -+ -+static bool -+need_upload(PicturePtr p) -+{ -+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p); -+} -+ -+static bool -+source_is_busy(PixmapPtr pixmap) -+{ -+ struct sna_pixmap *priv = sna_pixmap(pixmap); -+ if (priv == NULL || priv->clear) -+ return false; -+ -+ if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) -+ return true; -+ -+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) -+ return true; -+ -+ return priv->gpu_damage && !priv->cpu_damage; -+} -+ -+static bool -+source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) -+{ -+ if (sna_picture_is_solid(p, NULL)) -+ return false; -+ -+ if (p->pSourcePict) -+ return check_gradient(p, precise); -+ -+ if (!gen9_check_repeat(p) || !gen9_check_format(p->format)) -+ return true; -+ -+ if (pixmap && source_is_busy(pixmap)) -+ return false; -+ -+ return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p); -+} -+ -+static bool -+gen9_composite_fallback(struct sna *sna, -+ PicturePtr src, -+ PicturePtr mask, -+ PicturePtr dst) -+{ -+ PixmapPtr src_pixmap; -+ PixmapPtr mask_pixmap; -+ PixmapPtr dst_pixmap; -+ bool src_fallback, mask_fallback; -+ -+ if (!gen9_check_dst_format(dst->format)) { -+ DBG(("%s: unknown destination format: %d\n", -+ __FUNCTION__, dst->format)); -+ return true; -+ } -+ -+ dst_pixmap = get_drawable_pixmap(dst->pDrawable); -+ -+ src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; -+ src_fallback = source_fallback(src, src_pixmap, -+ dst->polyMode == PolyModePrecise); -+ -+ if (mask) { -+ mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; -+ mask_fallback = source_fallback(mask, mask_pixmap, -+ dst->polyMode == PolyModePrecise); -+ } else { -+ mask_pixmap = NULL; -+ mask_fallback = false; -+ } -+ -+ /* If we are using the destination as a source and need to -+ * readback in order to upload the source, do it all -+ * on the cpu. -+ */ -+ if (src_pixmap == dst_pixmap && src_fallback) { -+ DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); -+ return true; -+ } -+ if (mask_pixmap == dst_pixmap && mask_fallback) { -+ DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); -+ return true; -+ } -+ -+ /* If anything is on the GPU, push everything out to the GPU */ -+ if (dst_use_gpu(dst_pixmap)) { -+ DBG(("%s: dst is already on the GPU, try to use GPU\n", -+ __FUNCTION__)); -+ return false; -+ } -+ -+ if (src_pixmap && !src_fallback) { -+ DBG(("%s: src is already on the GPU, try to use GPU\n", -+ __FUNCTION__)); -+ return false; -+ } -+ if (mask_pixmap && !mask_fallback) { -+ DBG(("%s: mask is already on the GPU, try to use GPU\n", -+ __FUNCTION__)); -+ return false; -+ } -+ -+ /* However if the dst is not on the GPU and we need to -+ * render one of the sources using the CPU, we may -+ * as well do the entire operation in place onthe CPU. -+ */ -+ if (src_fallback) { -+ DBG(("%s: dst is on the CPU and src will fallback\n", -+ __FUNCTION__)); -+ return true; -+ } -+ -+ if (mask && mask_fallback) { -+ DBG(("%s: dst is on the CPU and mask will fallback\n", -+ __FUNCTION__)); -+ return true; -+ } -+ -+ if (too_large(dst_pixmap->drawable.width, -+ dst_pixmap->drawable.height) && -+ dst_is_cpu(dst_pixmap)) { -+ DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); -+ return true; -+ } -+ -+ DBG(("%s: dst is not on the GPU and the operation should not fallback\n", -+ __FUNCTION__)); -+ return dst_use_cpu(dst_pixmap); -+} -+ -+static int -+reuse_source(struct sna *sna, -+ PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, -+ PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) -+{ -+ uint32_t color; -+ -+ if (src_x != msk_x || src_y != msk_y) -+ return false; -+ -+ if (src == mask) { -+ DBG(("%s: mask is source\n", __FUNCTION__)); -+ *mc = *sc; -+ mc->bo = kgem_bo_reference(mc->bo); -+ return true; -+ } -+ -+ if (sna_picture_is_solid(mask, &color)) -+ return gen4_channel_init_solid(sna, mc, color); -+ -+ if (sc->is_solid) -+ return false; -+ -+ if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) -+ return false; -+ -+ DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); -+ -+ if (!sna_transform_equal(src->transform, mask->transform)) -+ return false; -+ -+ if (!sna_picture_alphamap_equal(src, mask)) -+ return false; -+ -+ if (!gen9_check_repeat(mask)) -+ return false; -+ -+ if (!gen9_check_filter(mask)) -+ return false; -+ -+ if (!gen9_check_format(mask->format)) -+ return false; -+ -+ DBG(("%s: reusing source channel for mask with a twist\n", -+ __FUNCTION__)); -+ -+ *mc = *sc; -+ mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone); -+ mc->filter = gen9_filter(mask->filter); -+ mc->pict_format = mask->format; -+ mc->card_format = gen9_get_card_format(mask->format); -+ mc->bo = kgem_bo_reference(mc->bo); -+ return true; -+} -+ -+static bool -+gen9_render_composite(struct sna *sna, -+ uint8_t op, -+ PicturePtr src, -+ PicturePtr mask, -+ PicturePtr dst, -+ int16_t src_x, int16_t src_y, -+ int16_t msk_x, int16_t msk_y, -+ int16_t dst_x, int16_t dst_y, -+ int16_t width, int16_t height, -+ unsigned flags, -+ struct sna_composite_op *tmp) -+{ -+ if (op >= ARRAY_SIZE(gen9_blend_op)) -+ return false; -+ -+ DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, -+ width, height, sna->kgem.mode, sna->kgem.ring)); -+ -+ if (mask == NULL && -+ try_blt(sna, op, -+ src, mask, dst, -+ src_x, src_y, -+ msk_x, msk_y, -+ dst_x, dst_y, -+ width, height, -+ flags, tmp)) -+ return true; -+ -+ if (gen9_composite_fallback(sna, src, mask, dst)) -+ goto fallback; -+ -+ if (need_tiling(sna, width, height)) -+ return sna_tiling_composite(op, src, mask, dst, -+ src_x, src_y, -+ msk_x, msk_y, -+ dst_x, dst_y, -+ width, height, -+ tmp); -+ -+ if (op == PictOpClear && src == sna->clear) -+ op = PictOpSrc; -+ tmp->op = op; -+ if (!gen9_composite_set_target(sna, tmp, dst, -+ dst_x, dst_y, width, height, -+ flags & COMPOSITE_PARTIAL || op > PictOpSrc)) -+ goto fallback; -+ -+ switch (gen9_composite_picture(sna, src, &tmp->src, -+ src_x, src_y, -+ width, height, -+ dst_x, dst_y, -+ dst->polyMode == PolyModePrecise)) { -+ case -1: -+ goto cleanup_dst; -+ case 0: -+ if (!gen4_channel_init_solid(sna, &tmp->src, 0)) -+ goto cleanup_dst; -+ /* fall through to fixup */ -+ case 1: -+ /* Did we just switch rings to prepare the source? */ -+ if (mask == NULL && -+ (prefer_blt_composite(sna, tmp) || -+ unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && -+ sna_blt_composite__convert(sna, -+ dst_x, dst_y, width, height, -+ tmp)) -+ return true; -+ -+ if (!gen9_composite_channel_convert(&tmp->src)) -+ goto cleanup_src; -+ -+ break; -+ } -+ -+ tmp->is_affine = tmp->src.is_affine; -+ tmp->has_component_alpha = false; -+ tmp->need_magic_ca_pass = false; -+ -+ tmp->mask.bo = NULL; -+ tmp->mask.filter = SAMPLER_FILTER_NEAREST; -+ tmp->mask.repeat = SAMPLER_EXTEND_NONE; -+ -+ if (mask) { -+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { -+ tmp->has_component_alpha = true; -+ -+ /* Check if it's component alpha that relies on a source alpha and on -+ * the source value. We can only get one of those into the single -+ * source value that we get to blend with. -+ */ -+ if (gen9_blend_op[op].src_alpha && -+ (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { -+ if (op != PictOpOver) -+ goto cleanup_src; -+ -+ tmp->need_magic_ca_pass = true; -+ tmp->op = PictOpOutReverse; -+ } -+ } -+ -+ if (!reuse_source(sna, -+ src, &tmp->src, src_x, src_y, -+ mask, &tmp->mask, msk_x, msk_y)) { -+ switch (gen9_composite_picture(sna, mask, &tmp->mask, -+ msk_x, msk_y, -+ width, height, -+ dst_x, dst_y, -+ dst->polyMode == PolyModePrecise)) { -+ case -1: -+ goto cleanup_src; -+ case 0: -+ if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) -+ goto cleanup_src; -+ /* fall through to fixup */ -+ case 1: -+ if (!gen9_composite_channel_convert(&tmp->mask)) -+ goto cleanup_mask; -+ break; -+ } -+ } -+ -+ tmp->is_affine &= tmp->mask.is_affine; -+ } -+ -+ tmp->u.gen9.flags = -+ GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, -+ tmp->src.repeat, -+ tmp->mask.filter, -+ tmp->mask.repeat), -+ gen9_get_blend(tmp->op, -+ tmp->has_component_alpha, -+ tmp->dst.format), -+ gen9_choose_composite_kernel(tmp->op, -+ tmp->mask.bo != NULL, -+ tmp->has_component_alpha, -+ tmp->is_affine), -+ gen4_choose_composite_emitter(sna, tmp)); -+ -+ tmp->blt = gen9_render_composite_blt; -+ tmp->box = gen9_render_composite_box; -+ tmp->boxes = gen9_render_composite_boxes__blt; -+ if (tmp->emit_boxes){ -+ tmp->boxes = gen9_render_composite_boxes; -+ tmp->thread_boxes = gen9_render_composite_boxes__thread; -+ } -+ tmp->done = gen9_render_composite_done; -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); -+ if (!kgem_check_bo(&sna->kgem, -+ tmp->dst.bo, tmp->src.bo, tmp->mask.bo, -+ NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, -+ tmp->dst.bo, tmp->src.bo, tmp->mask.bo, -+ NULL)) -+ goto cleanup_mask; -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, tmp); -+ gen9_emit_composite_state(sna, tmp); -+ return true; -+ -+cleanup_mask: -+ if (tmp->mask.bo) { -+ kgem_bo_destroy(&sna->kgem, tmp->mask.bo); -+ tmp->mask.bo = NULL; -+ } -+cleanup_src: -+ if (tmp->src.bo) { -+ kgem_bo_destroy(&sna->kgem, tmp->src.bo); -+ tmp->src.bo = NULL; -+ } -+cleanup_dst: -+ if (tmp->redirect.real_bo) { -+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo); -+ tmp->redirect.real_bo = NULL; -+ } -+fallback: -+ return (mask == NULL && -+ sna_blt_composite(sna, op, -+ src, dst, -+ src_x, src_y, -+ dst_x, dst_y, -+ width, height, -+ flags | COMPOSITE_FALLBACK, tmp)); -+} -+ -+#if !NO_COMPOSITE_SPANS -+fastcall static void -+gen9_render_composite_spans_box(struct sna *sna, -+ const struct sna_composite_spans_op *op, -+ const BoxRec *box, float opacity) -+{ -+ DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", -+ __FUNCTION__, -+ op->base.src.offset[0], op->base.src.offset[1], -+ opacity, -+ op->base.dst.x, op->base.dst.y, -+ box->x1, box->y1, -+ box->x2 - box->x1, -+ box->y2 - box->y1)); -+ -+ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state); -+ op->prim_emit(sna, op, box, opacity); -+} -+ -+static void -+gen9_render_composite_spans_boxes(struct sna *sna, -+ const struct sna_composite_spans_op *op, -+ const BoxRec *box, int nbox, -+ float opacity) -+{ -+ DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", -+ __FUNCTION__, nbox, -+ op->base.src.offset[0], op->base.src.offset[1], -+ opacity, -+ op->base.dst.x, op->base.dst.y)); -+ -+ do { -+ int nbox_this_time; -+ -+ nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, -+ gen9_emit_composite_state); -+ nbox -= nbox_this_time; -+ -+ do { -+ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, -+ box->x1, box->y1, -+ box->x2 - box->x1, -+ box->y2 - box->y1)); -+ -+ op->prim_emit(sna, op, box++, opacity); -+ } while (--nbox_this_time); -+ } while (nbox); -+} -+ -+fastcall static void -+gen9_render_composite_spans_boxes__thread(struct sna *sna, -+ const struct sna_composite_spans_op *op, -+ const struct sna_opacity_box *box, -+ int nbox) -+{ -+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", -+ __FUNCTION__, nbox, -+ op->base.src.offset[0], op->base.src.offset[1], -+ op->base.dst.x, op->base.dst.y)); -+ -+ sna_vertex_lock(&sna->render); -+ do { -+ int nbox_this_time; -+ float *v; -+ -+ nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, -+ gen9_emit_composite_state); -+ assert(nbox_this_time); -+ nbox -= nbox_this_time; -+ -+ v = sna->render.vertices + sna->render.vertex_used; -+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; -+ -+ sna_vertex_acquire__locked(&sna->render); -+ sna_vertex_unlock(&sna->render); -+ -+ op->emit_boxes(op, box, nbox_this_time, v); -+ box += nbox_this_time; -+ -+ sna_vertex_lock(&sna->render); -+ sna_vertex_release__locked(&sna->render); -+ } while (nbox); -+ sna_vertex_unlock(&sna->render); -+} -+ -+fastcall static void -+gen9_render_composite_spans_done(struct sna *sna, -+ const struct sna_composite_spans_op *op) -+{ -+ if (sna->render.vertex_offset) -+ gen8_vertex_flush(sna); -+ -+ DBG(("%s()\n", __FUNCTION__)); -+ -+ if (op->base.src.bo) -+ kgem_bo_destroy(&sna->kgem, op->base.src.bo); -+ -+ sna_render_composite_redirect_done(sna, &op->base); -+} -+ -+static bool -+gen9_check_composite_spans(struct sna *sna, -+ uint8_t op, PicturePtr src, PicturePtr dst, -+ int16_t width, int16_t height, unsigned flags) -+{ -+ if (op >= ARRAY_SIZE(gen9_blend_op)) -+ return false; -+ -+ if (gen9_composite_fallback(sna, src, NULL, dst)) -+ return false; -+ -+ if (need_tiling(sna, width, height) && -+ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { -+ DBG(("%s: fallback, tiled operation not on GPU\n", -+ __FUNCTION__)); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool -+gen9_render_composite_spans(struct sna *sna, -+ uint8_t op, -+ PicturePtr src, -+ PicturePtr dst, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ int16_t width, int16_t height, -+ unsigned flags, -+ struct sna_composite_spans_op *tmp) -+{ -+ DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, -+ width, height, flags, sna->kgem.ring)); -+ -+ assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags)); -+ -+ if (need_tiling(sna, width, height)) { -+ DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", -+ __FUNCTION__, width, height)); -+ return sna_tiling_composite_spans(op, src, dst, -+ src_x, src_y, dst_x, dst_y, -+ width, height, flags, tmp); -+ } -+ -+ tmp->base.op = op; -+ if (!gen9_composite_set_target(sna, &tmp->base, dst, -+ dst_x, dst_y, width, height, true)) -+ return false; -+ -+ switch (gen9_composite_picture(sna, src, &tmp->base.src, -+ src_x, src_y, -+ width, height, -+ dst_x, dst_y, -+ dst->polyMode == PolyModePrecise)) { -+ case -1: -+ goto cleanup_dst; -+ case 0: -+ if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) -+ goto cleanup_dst; -+ /* fall through to fixup */ -+ case 1: -+ if (!gen9_composite_channel_convert(&tmp->base.src)) -+ goto cleanup_src; -+ break; -+ } -+ tmp->base.mask.bo = NULL; -+ -+ tmp->base.is_affine = tmp->base.src.is_affine; -+ tmp->base.need_magic_ca_pass = false; -+ -+ tmp->base.u.gen9.flags = -+ GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, -+ tmp->base.src.repeat, -+ SAMPLER_FILTER_NEAREST, -+ SAMPLER_EXTEND_PAD), -+ gen9_get_blend(tmp->base.op, false, tmp->base.dst.format), -+ GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine, -+ gen4_choose_spans_emitter(sna, tmp)); -+ -+ tmp->box = gen9_render_composite_spans_box; -+ tmp->boxes = gen9_render_composite_spans_boxes; -+ if (tmp->emit_boxes) -+ tmp->thread_boxes = gen9_render_composite_spans_boxes__thread; -+ tmp->done = gen9_render_composite_spans_done; -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); -+ if (!kgem_check_bo(&sna->kgem, -+ tmp->base.dst.bo, tmp->base.src.bo, -+ NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, -+ tmp->base.dst.bo, tmp->base.src.bo, -+ NULL)) -+ goto cleanup_src; -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &tmp->base); -+ gen9_emit_composite_state(sna, &tmp->base); -+ return true; -+ -+cleanup_src: -+ if (tmp->base.src.bo) -+ kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); -+cleanup_dst: -+ if (tmp->base.redirect.real_bo) -+ kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); -+ return false; -+} -+#endif -+ -+static void -+gen9_emit_copy_state(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ uint32_t *binding_table; -+ uint16_t offset, dirty; -+ -+ gen9_get_batch(sna, op); -+ -+ binding_table = gen9_composite_get_binding_table(sna, &offset); -+ -+ dirty = kgem_bo_is_dirty(op->dst.bo); -+ -+ binding_table[0] = -+ gen9_bind_bo(sna, -+ op->dst.bo, op->dst.width, op->dst.height, -+ gen9_get_dest_format(op->dst.format), -+ true); -+ binding_table[1] = -+ gen9_bind_bo(sna, -+ op->src.bo, op->src.width, op->src.height, -+ op->src.card_format, -+ false); -+ -+ if (sna->kgem.surface == offset && -+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { -+ sna->kgem.surface += SURFACE_DW; -+ offset = sna->render_state.gen9.surface_table; -+ } -+ -+ if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) -+ dirty = 0; -+ -+ assert(!GEN9_READS_DST(op->u.gen9.flags)); -+ gen9_emit_state(sna, op, offset | dirty); -+} -+ -+static inline bool -+prefer_blt_copy(struct sna *sna, -+ struct kgem_bo *src_bo, -+ struct kgem_bo *dst_bo, -+ unsigned flags) -+{ -+ if (sna->kgem.mode == KGEM_BLT) -+ return true; -+ -+ assert((flags & COPY_SYNC) == 0); -+ -+ if (untiled_tlb_miss(src_bo) || -+ untiled_tlb_miss(dst_bo)) -+ return true; -+ -+ if (flags & COPY_DRI && !sna->kgem.has_semaphores) -+ return false; -+ -+ if (force_blt_ring(sna, dst_bo)) -+ return true; -+ -+ if ((flags & COPY_SMALL || -+ (sna->render_state.gt < 3 && src_bo == dst_bo)) && -+ can_switch_to_blt(sna, dst_bo, flags)) -+ return true; -+ -+ if (kgem_bo_is_render(dst_bo) || -+ kgem_bo_is_render(src_bo)) -+ return false; -+ -+ if (flags & COPY_LAST && -+ sna->render_state.gt < 3 && -+ can_switch_to_blt(sna, dst_bo, flags)) -+ return true; -+ -+ if (prefer_render_ring(sna, dst_bo)) -+ return false; -+ -+ if (!prefer_blt_ring(sna, dst_bo, flags)) -+ return false; -+ -+ return prefer_blt_bo(sna, src_bo, dst_bo); -+} -+ -+static bool -+gen9_render_copy_boxes(struct sna *sna, uint8_t alu, -+ const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, -+ const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, -+ const BoxRec *box, int n, unsigned flags) -+{ -+ struct sna_composite_op tmp; -+ BoxRec extents; -+ -+ DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", -+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, -+ src_bo == dst_bo, -+ overlaps(sna, -+ src_bo, src_dx, src_dy, -+ dst_bo, dst_dx, dst_dy, -+ box, n, flags, &extents))); -+ -+ if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && -+ sna_blt_compare_depth(src, dst) && -+ sna_blt_copy_boxes(sna, alu, -+ src_bo, src_dx, src_dy, -+ dst_bo, dst_dx, dst_dy, -+ dst->bitsPerPixel, -+ box, n)) -+ return true; -+ -+ if (!(alu == GXcopy || alu == GXclear) || -+ unaligned(src_bo, src->bitsPerPixel) || -+ unaligned(dst_bo, dst->bitsPerPixel)) { -+fallback_blt: -+ DBG(("%s: fallback blt\n", __FUNCTION__)); -+ if (!sna_blt_compare_depth(src, dst)) -+ return false; -+ -+ return sna_blt_copy_boxes_fallback(sna, alu, -+ src, src_bo, src_dx, src_dy, -+ dst, dst_bo, dst_dx, dst_dy, -+ box, n); -+ } -+ -+ if (overlaps(sna, -+ src_bo, src_dx, src_dy, -+ dst_bo, dst_dx, dst_dy, -+ box, n, flags, -+ &extents)) { -+ bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); -+ -+ if ((big || !prefer_render_ring(sna, dst_bo)) && -+ sna_blt_copy_boxes(sna, alu, -+ src_bo, src_dx, src_dy, -+ dst_bo, dst_dx, dst_dy, -+ dst->bitsPerPixel, -+ box, n)) -+ return true; -+ -+ if (big) -+ goto fallback_blt; -+ -+ assert(src_bo == dst_bo); -+ assert(src->depth == dst->depth); -+ assert(src->width == dst->width); -+ assert(src->height == dst->height); -+ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, -+ src_dx, src_dy, -+ dst_dx, dst_dy, -+ box, n, &extents); -+ } -+ -+ if (dst->depth == src->depth) { -+ tmp.dst.format = sna_render_format_for_depth(dst->depth); -+ tmp.src.pict_format = tmp.dst.format; -+ } else { -+ tmp.dst.format = sna_format_for_depth(dst->depth); -+ tmp.src.pict_format = sna_format_for_depth(src->depth); -+ } -+ if (!gen9_check_format(tmp.src.pict_format)) -+ goto fallback_blt; -+ -+ tmp.dst.pixmap = (PixmapPtr)dst; -+ tmp.dst.width = dst->width; -+ tmp.dst.height = dst->height; -+ tmp.dst.bo = dst_bo; -+ tmp.dst.x = tmp.dst.y = 0; -+ tmp.damage = NULL; -+ -+ sna_render_composite_redirect_init(&tmp); -+ if (too_large(tmp.dst.width, tmp.dst.height)) { -+ int i; -+ -+ extents = box[0]; -+ for (i = 1; i < n; i++) { -+ if (box[i].x1 < extents.x1) -+ extents.x1 = box[i].x1; -+ if (box[i].y1 < extents.y1) -+ extents.y1 = box[i].y1; -+ -+ if (box[i].x2 > extents.x2) -+ extents.x2 = box[i].x2; -+ if (box[i].y2 > extents.y2) -+ extents.y2 = box[i].y2; -+ } -+ -+ if (!sna_render_composite_redirect(sna, &tmp, -+ extents.x1 + dst_dx, -+ extents.y1 + dst_dy, -+ extents.x2 - extents.x1, -+ extents.y2 - extents.y1, -+ n > 1)) -+ goto fallback_tiled; -+ } -+ -+ tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format); -+ if (too_large(src->width, src->height)) { -+ int i; -+ -+ extents = box[0]; -+ for (i = 1; i < n; i++) { -+ if (box[i].x1 < extents.x1) -+ extents.x1 = box[i].x1; -+ if (box[i].y1 < extents.y1) -+ extents.y1 = box[i].y1; -+ -+ if (box[i].x2 > extents.x2) -+ extents.x2 = box[i].x2; -+ if (box[i].y2 > extents.y2) -+ extents.y2 = box[i].y2; -+ } -+ -+ if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, -+ extents.x1 + src_dx, -+ extents.y1 + src_dy, -+ extents.x2 - extents.x1, -+ extents.y2 - extents.y1)) -+ goto fallback_tiled_dst; -+ } else { -+ tmp.src.bo = src_bo; -+ tmp.src.width = src->width; -+ tmp.src.height = src->height; -+ tmp.src.offset[0] = tmp.src.offset[1] = 0; -+ } -+ -+ tmp.mask.bo = NULL; -+ -+ tmp.floats_per_vertex = 2; -+ tmp.floats_per_rect = 6; -+ tmp.need_magic_ca_pass = 0; -+ -+ tmp.u.gen9.flags = COPY_FLAGS(alu); -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); -+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { -+ if (tmp.src.bo != src_bo) -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ if (tmp.redirect.real_bo) -+ kgem_bo_destroy(&sna->kgem, tmp.dst.bo); -+ goto fallback_blt; -+ } -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ src_dx += tmp.src.offset[0]; -+ src_dy += tmp.src.offset[1]; -+ -+ dst_dx += tmp.dst.x; -+ dst_dy += tmp.dst.y; -+ -+ tmp.dst.x = tmp.dst.y = 0; -+ -+ gen9_align_vertex(sna, &tmp); -+ gen9_emit_copy_state(sna, &tmp); -+ -+ do { -+ int16_t *v; -+ int n_this_time; -+ -+ n_this_time = gen9_get_rectangles(sna, &tmp, n, -+ gen9_emit_copy_state); -+ n -= n_this_time; -+ -+ v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); -+ sna->render.vertex_used += 6 * n_this_time; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ do { -+ -+ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", -+ box->x1 + src_dx, box->y1 + src_dy, -+ box->x1 + dst_dx, box->y1 + dst_dy, -+ box->x2 - box->x1, box->y2 - box->y1)); -+ v[0] = box->x2 + dst_dx; -+ v[2] = box->x2 + src_dx; -+ v[1] = v[5] = box->y2 + dst_dy; -+ v[3] = v[7] = box->y2 + src_dy; -+ v[8] = v[4] = box->x1 + dst_dx; -+ v[10] = v[6] = box->x1 + src_dx; -+ v[9] = box->y1 + dst_dy; -+ v[11] = box->y1 + src_dy; -+ v += 12; box++; -+ } while (--n_this_time); -+ } while (n); -+ -+ gen8_vertex_flush(sna); -+ sna_render_composite_redirect_done(sna, &tmp); -+ if (tmp.src.bo != src_bo) -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ return true; -+ -+fallback_tiled_dst: -+ if (tmp.redirect.real_bo) -+ kgem_bo_destroy(&sna->kgem, tmp.dst.bo); -+fallback_tiled: -+ DBG(("%s: fallback tiled\n", __FUNCTION__)); -+ if (sna_blt_compare_depth(src, dst) && -+ sna_blt_copy_boxes(sna, alu, -+ src_bo, src_dx, src_dy, -+ dst_bo, dst_dx, dst_dy, -+ dst->bitsPerPixel, -+ box, n)) -+ return true; -+ -+ return sna_tiling_copy_boxes(sna, alu, -+ src, src_bo, src_dx, src_dy, -+ dst, dst_bo, dst_dx, dst_dy, -+ box, n); -+} -+ -+static void -+gen9_render_copy_blt(struct sna *sna, -+ const struct sna_copy_op *op, -+ int16_t sx, int16_t sy, -+ int16_t w, int16_t h, -+ int16_t dx, int16_t dy) -+{ -+ int16_t *v; -+ -+ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state); -+ -+ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; -+ sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ -+ v[0] = dx+w; v[1] = dy+h; -+ v[2] = sx+w; v[3] = sy+h; -+ v[4] = dx; v[5] = dy+h; -+ v[6] = sx; v[7] = sy+h; -+ v[8] = dx; v[9] = dy; -+ v[10] = sx; v[11] = sy; -+} -+ -+static void -+gen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op) -+{ -+ if (sna->render.vertex_offset) -+ gen8_vertex_flush(sna); -+} -+ -+static bool -+gen9_render_copy(struct sna *sna, uint8_t alu, -+ PixmapPtr src, struct kgem_bo *src_bo, -+ PixmapPtr dst, struct kgem_bo *dst_bo, -+ struct sna_copy_op *op) -+{ -+ DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", -+ __FUNCTION__, alu, -+ src->drawable.width, src->drawable.height, -+ dst->drawable.width, dst->drawable.height)); -+ -+ if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && -+ sna_blt_compare_depth(&src->drawable, &dst->drawable) && -+ sna_blt_copy(sna, alu, -+ src_bo, dst_bo, -+ dst->drawable.bitsPerPixel, -+ op)) -+ return true; -+ -+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || -+ too_large(src->drawable.width, src->drawable.height) || -+ too_large(dst->drawable.width, dst->drawable.height) || -+ unaligned(src_bo, src->drawable.bitsPerPixel) || -+ unaligned(dst_bo, dst->drawable.bitsPerPixel)) { -+fallback: -+ if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) -+ return false; -+ -+ return sna_blt_copy(sna, alu, src_bo, dst_bo, -+ dst->drawable.bitsPerPixel, -+ op); -+ } -+ -+ if (dst->drawable.depth == src->drawable.depth) { -+ op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); -+ op->base.src.pict_format = op->base.dst.format; -+ } else { -+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth); -+ op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); -+ } -+ if (!gen9_check_format(op->base.src.pict_format)) -+ goto fallback; -+ -+ op->base.dst.pixmap = dst; -+ op->base.dst.width = dst->drawable.width; -+ op->base.dst.height = dst->drawable.height; -+ op->base.dst.bo = dst_bo; -+ -+ op->base.src.bo = src_bo; -+ op->base.src.card_format = -+ gen9_get_card_format(op->base.src.pict_format); -+ op->base.src.width = src->drawable.width; -+ op->base.src.height = src->drawable.height; -+ -+ op->base.mask.bo = NULL; -+ -+ op->base.floats_per_vertex = 2; -+ op->base.floats_per_rect = 6; -+ -+ op->base.u.gen9.flags = COPY_FLAGS(alu); -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); -+ if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) -+ goto fallback; -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &op->base); -+ gen9_emit_copy_state(sna, &op->base); -+ -+ op->blt = gen9_render_copy_blt; -+ op->done = gen9_render_copy_done; -+ return true; -+} -+ -+static void -+gen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) -+{ -+ uint32_t *binding_table; -+ uint16_t offset, dirty; -+ -+ /* XXX Render Target Fast Clear -+ * Set RTFC Enable in PS and render a rectangle. -+ * Limited to a clearing the full MSC surface only with a -+ * specific kernel. -+ */ -+ -+ gen9_get_batch(sna, op); -+ -+ binding_table = gen9_composite_get_binding_table(sna, &offset); -+ -+ dirty = kgem_bo_is_dirty(op->dst.bo); -+ -+ binding_table[0] = -+ gen9_bind_bo(sna, -+ op->dst.bo, op->dst.width, op->dst.height, -+ gen9_get_dest_format(op->dst.format), -+ true); -+ binding_table[1] = -+ gen9_bind_bo(sna, -+ op->src.bo, 1, 1, -+ SURFACEFORMAT_B8G8R8A8_UNORM, -+ false); -+ -+ if (sna->kgem.surface == offset && -+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { -+ sna->kgem.surface += SURFACE_DW; -+ offset = sna->render_state.gen9.surface_table; -+ } -+ -+ if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) -+ dirty = 0; -+ -+ gen9_emit_state(sna, op, offset | dirty); -+} -+ -+static bool -+gen9_render_fill_boxes(struct sna *sna, -+ CARD8 op, -+ PictFormat format, -+ const xRenderColor *color, -+ const DrawableRec *dst, struct kgem_bo *dst_bo, -+ const BoxRec *box, int n) -+{ -+ struct sna_composite_op tmp; -+ uint32_t pixel; -+ -+ DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", -+ __FUNCTION__, op, -+ color->red, color->green, color->blue, color->alpha, (int)format)); -+ -+ if (op >= ARRAY_SIZE(gen9_blend_op)) { -+ DBG(("%s: fallback due to unhandled blend op: %d\n", -+ __FUNCTION__, op)); -+ return false; -+ } -+ -+ if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || -+ !gen9_check_dst_format(format) || -+ unaligned(dst_bo, PICT_FORMAT_BPP(format))) { -+ uint8_t alu = GXinvalid; -+ -+ if (op <= PictOpSrc) { -+ pixel = 0; -+ if (op == PictOpClear) -+ alu = GXclear; -+ else if (sna_get_pixel_from_rgba(&pixel, -+ color->red, -+ color->green, -+ color->blue, -+ color->alpha, -+ format)) -+ alu = GXcopy; -+ } -+ -+ if (alu != GXinvalid && -+ sna_blt_fill_boxes(sna, alu, -+ dst_bo, dst->bitsPerPixel, -+ pixel, box, n)) -+ return true; -+ -+ if (!gen9_check_dst_format(format)) -+ return false; -+ } -+ -+ if (op == PictOpClear) { -+ pixel = 0; -+ op = PictOpSrc; -+ } else if (!sna_get_pixel_from_rgba(&pixel, -+ color->red, -+ color->green, -+ color->blue, -+ color->alpha, -+ PICT_a8r8g8b8)) -+ return false; -+ -+ DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", -+ __FUNCTION__, pixel, n, -+ box[0].x1, box[0].y1, box[0].x2, box[0].y2)); -+ -+ tmp.dst.pixmap = (PixmapPtr)dst; -+ tmp.dst.width = dst->width; -+ tmp.dst.height = dst->height; -+ tmp.dst.format = format; -+ tmp.dst.bo = dst_bo; -+ tmp.dst.x = tmp.dst.y = 0; -+ tmp.damage = NULL; -+ -+ sna_render_composite_redirect_init(&tmp); -+ if (too_large(dst->width, dst->height)) { -+ BoxRec extents; -+ -+ boxes_extents(box, n, &extents); -+ if (!sna_render_composite_redirect(sna, &tmp, -+ extents.x1, extents.y1, -+ extents.x2 - extents.x1, -+ extents.y2 - extents.y1, -+ n > 1)) -+ return sna_tiling_fill_boxes(sna, op, format, color, -+ dst, dst_bo, box, n); -+ } -+ -+ tmp.src.bo = sna_render_get_solid(sna, pixel); -+ tmp.mask.bo = NULL; -+ -+ tmp.floats_per_vertex = 2; -+ tmp.floats_per_rect = 6; -+ tmp.need_magic_ca_pass = false; -+ -+ tmp.u.gen9.flags = FILL_FLAGS(op, format); -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); -+ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ tmp.src.bo = NULL; -+ -+ if (tmp.redirect.real_bo) { -+ kgem_bo_destroy(&sna->kgem, tmp.dst.bo); -+ tmp.redirect.real_bo = NULL; -+ } -+ -+ return false; -+ } -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &tmp); -+ gen9_emit_fill_state(sna, &tmp); -+ -+ do { -+ int n_this_time; -+ int16_t *v; -+ -+ n_this_time = gen9_get_rectangles(sna, &tmp, n, -+ gen9_emit_fill_state); -+ n -= n_this_time; -+ -+ v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); -+ sna->render.vertex_used += 6 * n_this_time; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ do { -+ DBG((" (%d, %d), (%d, %d)\n", -+ box->x1, box->y1, box->x2, box->y2)); -+ -+ v[0] = box->x2; -+ v[5] = v[1] = box->y2; -+ v[8] = v[4] = box->x1; -+ v[9] = box->y1; -+ v[2] = v[3] = v[7] = 1; -+ v[6] = v[10] = v[11] = 0; -+ v += 12; box++; -+ } while (--n_this_time); -+ } while (n); -+ -+ gen8_vertex_flush(sna); -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ sna_render_composite_redirect_done(sna, &tmp); -+ return true; -+} -+ -+static void -+gen9_render_fill_op_blt(struct sna *sna, -+ const struct sna_fill_op *op, -+ int16_t x, int16_t y, int16_t w, int16_t h) -+{ -+ int16_t *v; -+ -+ DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); -+ -+ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); -+ -+ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; -+ sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ -+ v[0] = x+w; -+ v[4] = v[8] = x; -+ v[1] = v[5] = y+h; -+ v[9] = y; -+ -+ v[2] = v[3] = v[7] = 1; -+ v[6] = v[10] = v[11] = 0; -+} -+ -+fastcall static void -+gen9_render_fill_op_box(struct sna *sna, -+ const struct sna_fill_op *op, -+ const BoxRec *box) -+{ -+ int16_t *v; -+ -+ DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, -+ box->x1, box->y1, box->x2, box->y2)); -+ -+ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); -+ -+ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; -+ sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ -+ v[0] = box->x2; -+ v[8] = v[4] = box->x1; -+ v[5] = v[1] = box->y2; -+ v[9] = box->y1; -+ -+ v[7] = v[2] = v[3] = 1; -+ v[6] = v[10] = v[11] = 0; -+} -+ -+fastcall static void -+gen9_render_fill_op_boxes(struct sna *sna, -+ const struct sna_fill_op *op, -+ const BoxRec *box, -+ int nbox) -+{ -+ DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, -+ box->x1, box->y1, box->x2, box->y2, nbox)); -+ -+ do { -+ int nbox_this_time; -+ int16_t *v; -+ -+ nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, -+ gen9_emit_fill_state); -+ nbox -= nbox_this_time; -+ -+ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; -+ sna->render.vertex_used += 6 * nbox_this_time; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ -+ do { -+ v[0] = box->x2; -+ v[8] = v[4] = box->x1; -+ v[5] = v[1] = box->y2; -+ v[9] = box->y1; -+ v[7] = v[2] = v[3] = 1; -+ v[6] = v[10] = v[11] = 0; -+ box++; v += 12; -+ } while (--nbox_this_time); -+ } while (nbox); -+} -+ -+static void -+gen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) -+{ -+ if (sna->render.vertex_offset) -+ gen8_vertex_flush(sna); -+ kgem_bo_destroy(&sna->kgem, op->base.src.bo); -+} -+ -+static bool -+gen9_render_fill(struct sna *sna, uint8_t alu, -+ PixmapPtr dst, struct kgem_bo *dst_bo, -+ uint32_t color, unsigned flags, -+ struct sna_fill_op *op) -+{ -+ DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); -+ -+ if (prefer_blt_fill(sna, dst_bo, flags) && -+ sna_blt_fill(sna, alu, -+ dst_bo, dst->drawable.bitsPerPixel, -+ color, -+ op)) -+ return true; -+ -+ if (!(alu == GXcopy || alu == GXclear) || -+ too_large(dst->drawable.width, dst->drawable.height) || -+ unaligned(dst_bo, dst->drawable.bitsPerPixel)) -+ return sna_blt_fill(sna, alu, -+ dst_bo, dst->drawable.bitsPerPixel, -+ color, -+ op); -+ -+ if (alu == GXclear) -+ color = 0; -+ -+ op->base.dst.pixmap = dst; -+ op->base.dst.width = dst->drawable.width; -+ op->base.dst.height = dst->drawable.height; -+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth); -+ op->base.dst.bo = dst_bo; -+ op->base.dst.x = op->base.dst.y = 0; -+ -+ op->base.src.bo = -+ sna_render_get_solid(sna, -+ sna_rgba_for_color(color, -+ dst->drawable.depth)); -+ op->base.mask.bo = NULL; -+ -+ op->base.need_magic_ca_pass = false; -+ op->base.floats_per_vertex = 2; -+ op->base.floats_per_rect = 6; -+ -+ op->base.u.gen9.flags = FILL_FLAGS_NOBLEND; -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); -+ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { -+ kgem_bo_destroy(&sna->kgem, op->base.src.bo); -+ return false; -+ } -+ -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &op->base); -+ gen9_emit_fill_state(sna, &op->base); -+ -+ op->blt = gen9_render_fill_op_blt; -+ op->box = gen9_render_fill_op_box; -+ op->boxes = gen9_render_fill_op_boxes; -+ op->points = NULL; -+ op->done = gen9_render_fill_op_done; -+ return true; -+} -+ -+static bool -+gen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, -+ uint32_t color, -+ int16_t x1, int16_t y1, int16_t x2, int16_t y2, -+ uint8_t alu) -+{ -+ BoxRec box; -+ -+ box.x1 = x1; -+ box.y1 = y1; -+ box.x2 = x2; -+ box.y2 = y2; -+ -+ return sna_blt_fill_boxes(sna, alu, -+ bo, dst->drawable.bitsPerPixel, -+ color, &box, 1); -+} -+ -+static bool -+gen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, -+ uint32_t color, -+ int16_t x1, int16_t y1, -+ int16_t x2, int16_t y2, -+ uint8_t alu) -+{ -+ struct sna_composite_op tmp; -+ int16_t *v; -+ -+ /* Prefer to use the BLT if already engaged */ -+ if (prefer_blt_fill(sna, bo, FILL_BOXES) && -+ gen9_render_fill_one_try_blt(sna, dst, bo, color, -+ x1, y1, x2, y2, alu)) -+ return true; -+ -+ /* Must use the BLT if we can't RENDER... */ -+ if (!(alu == GXcopy || alu == GXclear) || -+ too_large(dst->drawable.width, dst->drawable.height) || -+ unaligned(bo, dst->drawable.bitsPerPixel)) -+ return gen9_render_fill_one_try_blt(sna, dst, bo, color, -+ x1, y1, x2, y2, alu); -+ -+ if (alu == GXclear) -+ color = 0; -+ -+ tmp.dst.pixmap = dst; -+ tmp.dst.width = dst->drawable.width; -+ tmp.dst.height = dst->drawable.height; -+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth); -+ tmp.dst.bo = bo; -+ tmp.dst.x = tmp.dst.y = 0; -+ -+ tmp.src.bo = -+ sna_render_get_solid(sna, -+ sna_rgba_for_color(color, -+ dst->drawable.depth)); -+ tmp.mask.bo = NULL; -+ -+ tmp.floats_per_vertex = 2; -+ tmp.floats_per_rect = 6; -+ tmp.need_magic_ca_pass = false; -+ -+ tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); -+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (kgem_check_bo(&sna->kgem, bo, NULL)) { -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ return false; -+ } -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &tmp); -+ gen9_emit_fill_state(sna, &tmp); -+ -+ gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); -+ -+ DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); -+ -+ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; -+ sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ -+ v[0] = x2; -+ v[8] = v[4] = x1; -+ v[5] = v[1] = y2; -+ v[9] = y1; -+ v[7] = v[2] = v[3] = 1; -+ v[6] = v[10] = v[11] = 0; -+ -+ gen8_vertex_flush(sna); -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ -+ return true; -+} -+ -+static bool -+gen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) -+{ -+ BoxRec box; -+ -+ box.x1 = 0; -+ box.y1 = 0; -+ box.x2 = dst->drawable.width; -+ box.y2 = dst->drawable.height; -+ -+ return sna_blt_fill_boxes(sna, GXclear, -+ bo, dst->drawable.bitsPerPixel, -+ 0, &box, 1); -+} -+ -+static bool -+gen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) -+{ -+ struct sna_composite_op tmp; -+ int16_t *v; -+ -+ DBG(("%s: %dx%d\n", -+ __FUNCTION__, -+ dst->drawable.width, -+ dst->drawable.height)); -+ -+ /* Prefer to use the BLT if already engaged */ -+ if (sna->kgem.mode == KGEM_BLT && -+ gen9_render_clear_try_blt(sna, dst, bo)) -+ return true; -+ -+ /* Must use the BLT if we can't RENDER... */ -+ if (too_large(dst->drawable.width, dst->drawable.height) || -+ unaligned(bo, dst->drawable.bitsPerPixel)) -+ return gen9_render_clear_try_blt(sna, dst, bo); -+ -+ tmp.dst.pixmap = dst; -+ tmp.dst.width = dst->drawable.width; -+ tmp.dst.height = dst->drawable.height; -+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth); -+ tmp.dst.bo = bo; -+ tmp.dst.x = tmp.dst.y = 0; -+ -+ tmp.src.bo = sna_render_get_solid(sna, 0); -+ tmp.mask.bo = NULL; -+ -+ tmp.floats_per_vertex = 2; -+ tmp.floats_per_rect = 6; -+ tmp.need_magic_ca_pass = false; -+ -+ tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); -+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) { -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ return false; -+ } -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &tmp); -+ gen9_emit_fill_state(sna, &tmp); -+ -+ gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); -+ -+ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; -+ sna->render.vertex_used += 6; -+ assert(sna->render.vertex_used <= sna->render.vertex_size); -+ -+ v[0] = dst->drawable.width; -+ v[5] = v[1] = dst->drawable.height; -+ v[8] = v[4] = 0; -+ v[9] = 0; -+ -+ v[7] = v[2] = v[3] = 1; -+ v[6] = v[10] = v[11] = 0; -+ -+ gen8_vertex_flush(sna); -+ kgem_bo_destroy(&sna->kgem, tmp.src.bo); -+ -+ return true; -+} -+ -+#if !NO_VIDEO -+static uint32_t gen9_bind_video_source(struct sna *sna, -+ struct kgem_bo *bo, -+ uint32_t delta, -+ int width, -+ int height, -+ int pitch, -+ uint32_t format) -+{ -+ uint32_t *ss; -+ int offset; -+ -+ offset = sna->kgem.surface -= SURFACE_DW; -+ ss = sna->kgem.batch + offset; -+ ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | -+ gen9_tiling_bits(bo->tiling) | -+ format << SURFACE_FORMAT_SHIFT | -+ SURFACE_VALIGN_4 | SURFACE_HALIGN_4); -+ ss[1] = 0; -+ ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | -+ (height - 1) << SURFACE_HEIGHT_SHIFT); -+ ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; -+ ss[4] = 0; -+ ss[5] = 0; -+ ss[6] = 0; -+ ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); -+ *(uint64_t *)(ss+8) = -+ kgem_add_reloc64(&sna->kgem, offset + 8, bo, -+ I915_GEM_DOMAIN_SAMPLER << 16, -+ delta); -+ ss[10] = 0; -+ ss[11] = 0; -+ ss[12] = 0; -+ ss[13] = 0; -+ ss[14] = 0; -+ ss[15] = 0; -+ -+ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", -+ offset, bo->handle, ss[1], -+ format, width, height, bo->pitch, bo->tiling)); -+ -+ return offset * sizeof(uint32_t); -+} -+ -+static void gen9_emit_video_state(struct sna *sna, -+ const struct sna_composite_op *op) -+{ -+ struct sna_video_frame *frame = op->priv; -+ uint32_t src_surf_format; -+ uint32_t src_surf_base[6]; -+ int src_width[6]; -+ int src_height[6]; -+ int src_pitch[6]; -+ uint32_t *binding_table; -+ uint16_t offset; -+ int n_src, n; -+ -+ /* XXX VeBox, bicubic */ -+ -+ gen9_get_batch(sna, op); -+ -+ src_surf_base[0] = 0; -+ src_surf_base[1] = 0; -+ src_surf_base[2] = frame->VBufOffset; -+ src_surf_base[3] = frame->VBufOffset; -+ src_surf_base[4] = frame->UBufOffset; -+ src_surf_base[5] = frame->UBufOffset; -+ -+ if (is_planar_fourcc(frame->id)) { -+ src_surf_format = SURFACEFORMAT_R8_UNORM; -+ src_width[1] = src_width[0] = frame->width; -+ src_height[1] = src_height[0] = frame->height; -+ src_pitch[1] = src_pitch[0] = frame->pitch[1]; -+ src_width[4] = src_width[5] = src_width[2] = src_width[3] = -+ frame->width / 2; -+ src_height[4] = src_height[5] = src_height[2] = src_height[3] = -+ frame->height / 2; -+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = -+ frame->pitch[0]; -+ n_src = 6; -+ } else { -+ if (frame->id == FOURCC_RGB888) -+ src_surf_format = SURFACEFORMAT_B8G8R8X8_UNORM; -+ else if (frame->id == FOURCC_UYVY) -+ src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; -+ else -+ src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; -+ -+ src_width[0] = frame->width; -+ src_height[0] = frame->height; -+ src_pitch[0] = frame->pitch[0]; -+ n_src = 1; -+ } -+ -+ binding_table = gen9_composite_get_binding_table(sna, &offset); -+ -+ binding_table[0] = -+ gen9_bind_bo(sna, -+ op->dst.bo, op->dst.width, op->dst.height, -+ gen9_get_dest_format(op->dst.format), -+ true); -+ for (n = 0; n < n_src; n++) { -+ binding_table[1+n] = -+ gen9_bind_video_source(sna, -+ frame->bo, -+ src_surf_base[n], -+ src_width[n], -+ src_height[n], -+ src_pitch[n], -+ src_surf_format); -+ } -+ -+ gen9_emit_state(sna, op, offset); -+} -+ -+static unsigned select_video_kernel(const struct sna_video_frame *frame) -+{ -+ switch (frame->id) { -+ case FOURCC_YV12: -+ case FOURCC_I420: -+ case FOURCC_XVMC: -+ return GEN9_WM_KERNEL_VIDEO_PLANAR; -+ -+ case FOURCC_RGB888: -+ case FOURCC_RGB565: -+ return GEN9_WM_KERNEL_VIDEO_RGB; -+ -+ default: -+ return GEN9_WM_KERNEL_VIDEO_PACKED; -+ } -+} -+ -+static bool -+gen9_render_video(struct sna *sna, -+ struct sna_video *video, -+ struct sna_video_frame *frame, -+ RegionPtr dstRegion, -+ PixmapPtr pixmap) -+{ -+ struct sna_composite_op tmp; -+ struct sna_pixmap *priv = sna_pixmap(pixmap); -+ int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; -+ int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; -+ int src_width = frame->src.x2 - frame->src.x1; -+ int src_height = frame->src.y2 - frame->src.y1; -+ float src_offset_x, src_offset_y; -+ float src_scale_x, src_scale_y; -+ unsigned filter; -+ const BoxRec *box; -+ int nbox; -+ -+ DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", -+ __FUNCTION__, -+ src_width, src_height, dst_width, dst_height, -+ region_num_rects(dstRegion), -+ REGION_EXTENTS(NULL, dstRegion)->x1, -+ REGION_EXTENTS(NULL, dstRegion)->y1, -+ REGION_EXTENTS(NULL, dstRegion)->x2, -+ REGION_EXTENTS(NULL, dstRegion)->y2)); -+ -+ assert(priv->gpu_bo); -+ assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); -+ assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); -+ -+ memset(&tmp, 0, sizeof(tmp)); -+ -+ tmp.dst.pixmap = pixmap; -+ tmp.dst.width = pixmap->drawable.width; -+ tmp.dst.height = pixmap->drawable.height; -+ tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); -+ tmp.dst.bo = priv->gpu_bo; -+ -+ tmp.src.bo = frame->bo; -+ tmp.mask.bo = NULL; -+ -+ tmp.floats_per_vertex = 3; -+ tmp.floats_per_rect = 9; -+ -+ DBG(("%s: scaling?=%d, planar?=%d [%x]\n", -+ __FUNCTION__, -+ src_width != dst_width || src_height != dst_height, -+ is_planar_fourcc(frame->id), frame->id)); -+ -+ if (src_width == dst_width && src_height == dst_height) -+ filter = SAMPLER_FILTER_NEAREST; -+ else -+ filter = SAMPLER_FILTER_BILINEAR; -+ -+ tmp.u.gen9.flags = -+ GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, -+ SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), -+ NO_BLEND, -+ select_video_kernel(frame), -+ 2); -+ tmp.priv = frame; -+ -+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); -+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { -+ kgem_submit(&sna->kgem); -+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) -+ return false; -+ -+ _kgem_set_mode(&sna->kgem, KGEM_RENDER); -+ } -+ -+ gen9_align_vertex(sna, &tmp); -+ gen9_emit_video_state(sna, &tmp); -+ -+ DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", -+ __FUNCTION__, -+ frame->src.x1, frame->src.y1, -+ src_width, src_height, -+ dst_width, dst_height, -+ frame->width, frame->height)); -+ -+ src_scale_x = (float)src_width / dst_width / frame->width; -+ src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; -+ -+ src_scale_y = (float)src_height / dst_height / frame->height; -+ src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; -+ -+ DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", -+ __FUNCTION__, -+ src_scale_x, src_scale_y, -+ src_offset_x, src_offset_y)); -+ -+ box = region_rects(dstRegion); -+ nbox = region_num_rects(dstRegion); -+ while (nbox--) { -+ DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", -+ __FUNCTION__, -+ box->x1, box->y1, -+ box->x2, box->y2, -+ box->x1 * src_scale_x + src_offset_x, -+ box->y1 * src_scale_y + src_offset_y, -+ box->x2 * src_scale_x + src_offset_x, -+ box->y2 * src_scale_y + src_offset_y)); -+ -+ gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state); -+ -+ OUT_VERTEX(box->x2, box->y2); -+ OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); -+ OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); -+ -+ OUT_VERTEX(box->x1, box->y2); -+ OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); -+ OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); -+ -+ OUT_VERTEX(box->x1, box->y1); -+ OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); -+ OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); -+ -+ box++; -+ } -+ gen8_vertex_flush(sna); -+ -+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) -+ sna_damage_add(&priv->gpu_damage, dstRegion); -+ -+ return true; -+} -+#endif -+ -+static void gen9_render_flush(struct sna *sna) -+{ -+ gen8_vertex_close(sna); -+ -+ assert(sna->render.vb_id == 0); -+ assert(sna->render.vertex_offset == 0); -+} -+ -+static void gen9_render_reset(struct sna *sna) -+{ -+ sna->render_state.gen9.emit_flush = false; -+ sna->render_state.gen9.needs_invariant = true; -+ sna->render_state.gen9.ve_id = 3 << 2; -+ sna->render_state.gen9.ve_dirty = false; -+ sna->render_state.gen9.last_primitive = -1; -+ -+ sna->render_state.gen9.num_sf_outputs = 0; -+ sna->render_state.gen9.samplers = -1; -+ sna->render_state.gen9.blend = -1; -+ sna->render_state.gen9.kernel = -1; -+ sna->render_state.gen9.drawrect_offset = -1; -+ sna->render_state.gen9.drawrect_limit = -1; -+ sna->render_state.gen9.surface_table = 0; -+ -+ if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { -+ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); -+ discard_vbo(sna); -+ } -+ -+ sna->render.vertex_offset = 0; -+ sna->render.nvertex_reloc = 0; -+ sna->render.vb_id = 0; -+} -+ -+static void gen9_render_fini(struct sna *sna) -+{ -+ kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo); -+} -+ -+static bool gen9_render_setup(struct sna *sna) -+{ -+ struct gen9_render_state *state = &sna->render_state.gen9; -+ struct sna_static_stream general; -+ struct gen9_sampler_state *ss; -+ int i, j, k, l, m; -+ uint32_t devid; -+ -+ devid = intel_get_device_id(sna->dev); -+ if (devid & 0xf) -+ state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1; -+ DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); -+ -+ state->info = &min_gt_info; -+ if (is_skl(sna)) -+ state->info = &skl_gt_info; -+ if (is_bxt(sna)) -+ state->info = &bxt_gt_info; -+ if (is_kbl(sna)) -+ state->info = &kbl_gt_info; -+ if (is_glk(sna)) -+ state->info = &glk_gt_info; -+ -+ sna_static_stream_init(&general); -+ -+ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer -+ * dumps, you know it points to zero. -+ */ -+ null_create(&general); -+ -+ for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { -+ if (wm_kernels[m].size) { -+ state->wm_kernel[m][1] = -+ sna_static_stream_add(&general, -+ wm_kernels[m].data, -+ wm_kernels[m].size, -+ 64); -+ } else { -+ if (USE_8_PIXEL_DISPATCH) { -+ state->wm_kernel[m][0] = -+ sna_static_stream_compile_wm(sna, &general, -+ wm_kernels[m].data, 8); -+ } -+ -+ if (USE_16_PIXEL_DISPATCH) { -+ state->wm_kernel[m][1] = -+ sna_static_stream_compile_wm(sna, &general, -+ wm_kernels[m].data, 16); -+ } -+ -+ if (USE_32_PIXEL_DISPATCH) { -+ state->wm_kernel[m][2] = -+ sna_static_stream_compile_wm(sna, &general, -+ wm_kernels[m].data, 32); -+ } -+ } -+ assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); -+ } -+ -+ COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); -+ ss = sna_static_stream_map(&general, -+ 2 * sizeof(*ss) * -+ (2 + -+ FILTER_COUNT * EXTEND_COUNT * -+ FILTER_COUNT * EXTEND_COUNT), -+ 32); -+ state->wm_state = sna_static_stream_offsetof(&general, ss); -+ sampler_copy_init(ss); ss += 2; -+ sampler_fill_init(ss); ss += 2; -+ for (i = 0; i < FILTER_COUNT; i++) { -+ for (j = 0; j < EXTEND_COUNT; j++) { -+ for (k = 0; k < FILTER_COUNT; k++) { -+ for (l = 0; l < EXTEND_COUNT; l++) { -+ sampler_state_init(ss++, i, j); -+ sampler_state_init(ss++, k, l); -+ } -+ } -+ } -+ } -+ -+ state->cc_blend = gen9_create_blend_state(&general); -+ -+ state->general_bo = sna_static_stream_fini(sna, &general); -+ return state->general_bo != NULL; -+} -+ -+const char *gen9_render_init(struct sna *sna, const char *backend) -+{ -+ if (!gen9_render_setup(sna)) -+ return backend; -+ -+ sna->kgem.context_switch = gen6_render_context_switch; -+ sna->kgem.retire = gen6_render_retire; -+ sna->kgem.expire = gen4_render_expire; -+ -+#if !NO_COMPOSITE -+ sna->render.composite = gen9_render_composite; -+ sna->render.prefer_gpu |= PREFER_GPU_RENDER; -+#endif -+#if !NO_COMPOSITE_SPANS -+ sna->render.check_composite_spans = gen9_check_composite_spans; -+ sna->render.composite_spans = gen9_render_composite_spans; -+ sna->render.prefer_gpu |= PREFER_GPU_SPANS; -+#endif -+#if !NO_VIDEO -+ sna->render.video = gen9_render_video; -+#endif -+ -+#if !NO_COPY_BOXES -+ sna->render.copy_boxes = gen9_render_copy_boxes; -+#endif -+#if !NO_COPY -+ sna->render.copy = gen9_render_copy; -+#endif -+ -+#if !NO_FILL_BOXES -+ sna->render.fill_boxes = gen9_render_fill_boxes; -+#endif -+#if !NO_FILL -+ sna->render.fill = gen9_render_fill; -+#endif -+#if !NO_FILL_ONE -+ sna->render.fill_one = gen9_render_fill_one; -+#endif -+#if !NO_FILL_CLEAR -+ sna->render.clear = gen9_render_clear; -+#endif -+ -+ sna->render.flush = gen9_render_flush; -+ sna->render.reset = gen9_render_reset; -+ sna->render.fini = gen9_render_fini; -+ -+ sna->render.max_3d_size = GEN9_MAX_SIZE; -+ sna->render.max_3d_pitch = 1 << 18; -+ return sna->render_state.gen9.info->name; -+} -diff --git a/src/sna/gen9_render.h b/src/sna/gen9_render.h -new file mode 100644 -index 00000000..e3cb3f93 ---- /dev/null -+++ b/src/sna/gen9_render.h -@@ -0,0 +1,1130 @@ -+#ifndef GEN9_RENDER_H -+#define GEN9_RENDER_H -+ -+#define INTEL_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) -+ -+#define GEN9_3D(pipeline,op,sub) \ -+ ((3 << 29) | ((pipeline) << 27) | ((op) << 24) | ((sub) << 16)) -+ -+#define GEN9_STATE_BASE_ADDRESS GEN9_3D(0, 1, 1) -+# define BASE_ADDRESS_MODIFY (1 << 0) -+ -+#define GEN9_STATE_SIP GEN9_3D(0, 1, 2) -+ -+#define GEN9_3DSTATE_VF_STATISTICS GEN9_3D(1, 0, 0xb) -+#define GEN9_PIPELINE_SELECT GEN9_3D(1, 1, 4) -+# define PIPELINE_SELECT_3D 0 -+# define PIPELINE_SELECT_MEDIA 1 -+#define PIPELINE_SELECTION_MASK (3 << 8) -+ -+#define GEN9_MEDIA_STATE_POINTERS GEN9_3D(2, 0, 0) -+#define GEN9_MEDIA_OBJECT GEN9_3D(2, 1, 0) -+ -+#define GEN9_3DSTATE_CLEAR_PARAMS GEN9_3D(3, 0, 0x04) -+#define GEN9_3DSTATE_DEPTH_BUFFER GEN9_3D(3, 0, 0x05) -+# define DEPTH_BUFFER_TYPE_SHIFT 29 -+# define DEPTH_BUFFER_FORMAT_SHIFT 18 -+ -+#define GEN9_3DSTATE_STENCIL_BUFFER GEN9_3D(3, 0, 0x06) -+#define GEN9_3DSTATE_HIER_DEPTH_BUFFER GEN9_3D(3, 0, 0x07) -+#define GEN9_3DSTATE_VERTEX_BUFFERS GEN9_3D(3, 0, 0x08) -+# define VB_INDEX_SHIFT 26 -+# define VB_MODIFY_ENABLE (1 << 14) -+#define GEN9_3DSTATE_VERTEX_ELEMENTS GEN9_3D(3, 0, 0x09) -+# define VE_INDEX_SHIFT 26 -+# define VE_VALID (1 << 25) -+# define VE_FORMAT_SHIFT 16 -+# define VE_OFFSET_SHIFT 0 -+# define VE_COMPONENT_0_SHIFT 28 -+# define VE_COMPONENT_1_SHIFT 24 -+# define VE_COMPONENT_2_SHIFT 20 -+# define VE_COMPONENT_3_SHIFT 16 -+#define GEN9_3DSTATE_INDEX_BUFFER GEN9_3D(3, 0, 0x0a) -+#define GEN9_3DSTATE_VF GEN9_3D(3, 0, 0x0c) -+ -+#define GEN9_3DSTATE_MULTISAMPLE GEN9_3D(3, 0, 0x0d) -+/* DW1 */ -+# define MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) -+# define MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) -+# define MULTISAMPLE_NUMSAMPLES_1 (0 << 1) -+# define MULTISAMPLE_NUMSAMPLES_4 (2 << 1) -+# define MULTISAMPLE_NUMSAMPLES_8 (3 << 1) -+ -+#define GEN9_3DSTATE_CC_STATE_POINTERS GEN9_3D(3, 0, 0x0e) -+#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS GEN9_3D(3, 0, 0x0f) -+ -+#define GEN9_3DSTATE_VS GEN9_3D(3, 0, 0x10) -+#define GEN9_3DSTATE_GS GEN9_3D(3, 0, 0x11) -+#define GEN9_3DSTATE_CLIP GEN9_3D(3, 0, 0x12) -+#define GEN9_3DSTATE_SF GEN9_3D(3, 0, 0x13) -+# define SF_TRI_PROVOKE_SHIFT 29 -+# define SF_LINE_PROVOKE_SHIFT 27 -+# define SF_FAN_PROVOKE_SHIFT 25 -+ -+#define GEN9_3DSTATE_WM GEN9_3D(3, 0, 0x14) -+/* DW1 */ -+# define WM_STATISTICS_ENABLE (1 << 31) -+# define WM_DEPTH_CLEAR (1 << 30) -+# define WM_DEPTH_RESOLVE (1 << 28) -+# define WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) -+# define WM_KILL_ENABLE (1 << 25) -+# define WM_POSITION_ZW_PIXEL (0 << 17) -+# define WM_POSITION_ZW_CENTROID (2 << 17) -+# define WM_POSITION_ZW_SAMPLE (3 << 17) -+# define WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16) -+# define WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15) -+# define WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14) -+# define WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13) -+# define WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12) -+# define WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11) -+# define WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) -+# define WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) -+# define WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) -+# define WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) -+# define WM_LINE_AA_WIDTH_0_5 (0 << 6) -+# define WM_LINE_AA_WIDTH_1_0 (1 << 6) -+# define WM_LINE_AA_WIDTH_2_0 (2 << 6) -+# define WM_LINE_AA_WIDTH_4_0 (3 << 6) -+# define WM_POLYGON_STIPPLE_ENABLE (1 << 4) -+# define WM_LINE_STIPPLE_ENABLE (1 << 3) -+# define WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) -+# define WM_MSRAST_OFF_PIXEL (0 << 0) -+# define WM_MSRAST_OFF_PATTERN (1 << 0) -+# define WM_MSRAST_ON_PIXEL (2 << 0) -+# define WM_MSRAST_ON_PATTERN (3 << 0) -+ -+#define GEN9_3DSTATE_CONSTANT_VS GEN9_3D(3, 0, 0x15) -+#define GEN9_3DSTATE_CONSTANT_GS GEN9_3D(3, 0, 0x16) -+#define GEN9_3DSTATE_CONSTANT_PS GEN9_3D(3, 0, 0x17) -+ -+#define GEN9_3DSTATE_SAMPLE_MASK GEN9_3D(3, 0, 0x18) -+ -+#define GEN9_3DSTATE_CONSTANT_HS GEN9_3D(3, 0, 0x19) -+#define GEN9_3DSTATE_CONSTANT_DS GEN9_3D(3, 0, 0x1a) -+ -+#define GEN9_3DSTATE_HS GEN9_3D(3, 0, 0x1b) -+#define GEN9_3DSTATE_TE GEN9_3D(3, 0, 0x1c) -+#define GEN9_3DSTATE_DS GEN9_3D(3, 0, 0x1d) -+#define GEN9_3DSTATE_STREAMOUT GEN9_3D(3, 0, 0x1e) -+ -+#define GEN9_3DSTATE_SBE GEN9_3D(3, 0, 0x1f) -+/* DW1 */ -+# define SBE_FORCE_VERTEX_URB_READ_LENGTH (1<<29) -+# define SBE_FORCE_VERTEX_URB_READ_OFFSET (1<<28) -+# define SBE_NUM_OUTPUTS_SHIFT 22 -+# define SBE_SWIZZLE_ENABLE (1 << 21) -+# define SBE_POINT_SPRITE_LOWERLEFT (1 << 20) -+# define SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 -+# define SBE_URB_ENTRY_READ_OFFSET_SHIFT 5 -+#define SBE_ACTIVE_COMPONENT_NONE 0 -+#define SBE_ACTIVE_COMPONENT_XY 1 -+#define SBE_ACTIVE_COMPONENT_XYZ 2 -+#define SBE_ACTIVE_COMPONENT_XYZW 3 -+ -+ -+#define GEN9_3DSTATE_PS GEN9_3D(3, 0, 0x20) -+/* DW1:DW2 kernel pointer */ -+/* DW3 */ -+# define PS_SPF_MODE (1 << 31) -+# define PS_VECTOR_MASK_ENABLE (1 << 30) -+# define PS_SAMPLER_COUNT_SHIFT 27 -+# define PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -+# define PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -+# define PS_FLOATING_POINT_MODE_ALT (1 << 16) -+/* DW4:DW5: scratch space */ -+/* DW6 */ -+# define PS_MAX_THREADS_SHIFT 23 -+# define PS_MAX_THREADS (63 << PS_MAX_THREADS_SHIFT) -+# define PS_PUSH_CONSTANT_ENABLE (1 << 11) -+# define PS_RENDER_TARGET_CLEAR (1 << 8) -+# define PS_RENDER_TARGET_RESOLVE (1 << 6) -+# define PS_POSOFFSET_NONE (0 << 3) -+# define PS_POSOFFSET_CENTROID (2 << 3) -+# define PS_POSOFFSET_SAMPLE (3 << 3) -+# define PS_32_DISPATCH_ENABLE (1 << 2) -+# define PS_16_DISPATCH_ENABLE (1 << 1) -+# define PS_8_DISPATCH_ENABLE (1 << 0) -+/* DW7 */ -+# define PS_DISPATCH_START_GRF_SHIFT_0 16 -+# define PS_DISPATCH_START_GRF_SHIFT_1 8 -+# define PS_DISPATCH_START_GRF_SHIFT_2 0 -+/* DW8:D9: kernel 1 pointer */ -+/* DW10:D11: kernel 2 pointer */ -+ -+#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP GEN9_3D(3, 0, 0x21) -+#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN9_3D(3, 0, 0x23) -+ -+#define GEN9_3DSTATE_BLEND_STATE_POINTERS GEN9_3D(3, 0, 0x24) -+ -+#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS GEN9_3D(3, 0, 0x26) -+#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS GEN9_3D(3, 0, 0x27) -+#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS GEN9_3D(3, 0, 0x28) -+#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS GEN9_3D(3, 0, 0x29) -+#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS GEN9_3D(3, 0, 0x2a) -+ -+#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS GEN9_3D(3, 0, 0x2b) -+#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS GEN9_3D(3, 0, 0x2c) -+#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS GEN9_3D(3, 0, 0x2d) -+#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN9_3D(3, 0, 0x2e) -+#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN9_3D(3, 0, 0x2f) -+ -+#define GEN9_3DSTATE_URB_VS GEN9_3D(3, 0, 0x30) -+#define GEN9_3DSTATE_URB_HS GEN9_3D(3, 0, 0x31) -+#define GEN9_3DSTATE_URB_DS GEN9_3D(3, 0, 0x32) -+#define GEN9_3DSTATE_URB_GS GEN9_3D(3, 0, 0x33) -+/* DW1 */ -+# define URB_ENTRY_NUMBER_SHIFT 0 -+# define URB_ENTRY_SIZE_SHIFT 16 -+# define URB_STARTING_ADDRESS_SHIFT 25 -+ -+#define GEN9_3DSTATE_GATHER_CONSTANT_VS GEN9_3D(3, 0, 0x34) -+#define GEN9_3DSTATE_GATHER_CONSTANT_GS GEN9_3D(3, 0, 0x35) -+#define GEN9_3DSTATE_GATHER_CONSTANT_HS GEN9_3D(3, 0, 0x36) -+#define GEN9_3DSTATE_GATHER_CONSTANT_DS GEN9_3D(3, 0, 0x37) -+#define GEN9_3DSTATE_GATHER_CONSTANT_PS GEN9_3D(3, 0, 0x38) -+ -+#define GEN9_3DSTATE_DX9_CONSTANTF_VS GEN9_3D(3, 0, 0x39) -+#define GEN9_3DSTATE_DX9_CONSTANTF_PS GEN9_3D(3, 0, 0x3a) -+#define GEN9_3DSTATE_DX9_CONSTANTI_VS GEN9_3D(3, 0, 0x3b) -+#define GEN9_3DSTATE_DX9_CONSTANTI_PS GEN9_3D(3, 0, 0x3c) -+#define GEN9_3DSTATE_DX9_CONSTANTB_VS GEN9_3D(3, 0, 0x3d) -+#define GEN9_3DSTATE_DX9_CONSTANTB_PS GEN9_3D(3, 0, 0x3e) -+#define GEN9_3DSTATE_DX9_LOCAL_VALID_VS GEN9_3D(3, 0, 0x3f) -+#define GEN9_3DSTATE_DX9_LOCAL_VALID_PS GEN9_3D(3, 0, 0x40) -+#define GEN9_3DSTATE_DX9_GENERATE_ACTIVE_VS GEN9_3D(3, 0, 0x41) -+#define GEN9_3DSTATE_DX9_GENERATE_ACTIVE_PS GEN9_3D(3, 0, 0x42) -+ -+#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS GEN9_3D(3, 0, 0x43) -+#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS GEN9_3D(3, 0, 0x44) -+#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS GEN9_3D(3, 0, 0x45) -+#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS GEN9_3D(3, 0, 0x46) -+#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS GEN9_3D(3, 0, 0x47) -+ -+#define GEN9_3DSTATE_VF_INSTANCING GEN9_3D(3, 0, 0x49) -+#define GEN9_3DSTATE_VF_SGVS GEN9_3D(3, 0, 0x4a) -+# define SGVS_ENABLE_INSTANCE_ID (1 << 31) -+# define SGVS_INSTANCE_ID_COMPONENT_SHIFT 29 -+# define SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16 -+# define SGVS_ENABLE_VERTEX_ID (1 << 15) -+# define SGVS_VERTEX_ID_COMPONENT_SHIFT 13 -+# define SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT 0 -+#define GEN9_3DSTATE_VF_TOPOLOGY GEN9_3D(3, 0, 0x4b) -+# define POINTLIST 0x01 -+# define LINELIST 0x02 -+# define LINESTRIP 0x03 -+# define TRILIST 0x04 -+# define TRISTRIP 0x05 -+# define TRIFAN 0x06 -+# define QUADLIST 0x07 -+# define QUADSTRIP 0x08 -+# define LINELIST_ADJ 0x09 -+# define LINESTRIP_ADJ 0x0A -+# define TRILIST_ADJ 0x0B -+# define TRISTRIP_ADJ 0x0C -+# define TRISTRIP_REVERSE 0x0D -+# define POLYGON 0x0E -+# define RECTLIST 0x0F -+# define LINELOOP 0x10 -+# define POINTLIST_BF 0x11 -+# define LINESTRIP_CONT 0x12 -+# define LINESTRIP_BF 0x13 -+# define LINESTRIP_CONT_BF 0x14 -+# define TRIFAN_NOSTIPPLE 0x15 -+ -+#define GEN9_3DSTATE_WM_CHROMAKEY GEN9_3D(3, 0, 0x4c) -+ -+#define GEN9_3DSTATE_PS_BLEND GEN9_3D(3, 0, 0x4d) -+# define PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) -+# define PS_BLEND_HAS_WRITEABLE_RT (1 << 30) -+# define PS_BLEND_COLOR_BLEND_ENABLE (1 << 29) -+# define PS_BLEND_SRC_ALPHA_SHIFT 24 -+# define PS_BLEND_DST_ALPHA_SHIFT 19 -+# define PS_BLEND_SRC_SHIFT 14 -+# define PS_BLEND_DST_SHIFT 9 -+# define PS_BLEND_ALPHA_TEST_ENABLE (1 << 8) -+# define PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7) -+ -+#define GEN9_3DSTATE_WM_DEPTH_STENCIL GEN9_3D(3, 0, 0x4e) -+/* DW1 */ -+# define WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) -+# define WM_DS_STENCIL_TEST_MASK_SHIFT 24 -+# define WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) -+# define WM_DS_STENCIL_WRITE_MASK_SHIFT 16 -+# define WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) -+# define WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 -+# define WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) -+# define WM_DS_DEPTH_FUNC_SHIFT 5 -+# define WM_DS_DOUBLE_SIDED_STENCIL_ENABLE (1 << 4) -+# define WM_DS_STENCIL_TEST_ENABLE (1 << 3) -+# define WM_DS_STENCIL_BUFFER_WRITE_ENABLE (1 << 2) -+# define WM_DS_DEPTH_TEST_ENABLE (1 << 1) -+# define WM_DS_DEPTH_BUFFER_WRITE_ENABLE (1 << 0) -+/* DW2 */ -+# define WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) -+# define WM_DS_STENCIL_TEST_MASK_SHIFT 24 -+# define WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) -+# define WM_DS_STENCIL_WRITE_MASK_SHIFT 16 -+# define WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) -+# define WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 -+# define WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) -+# define WM_DS_BF_STENCIL_WRITE_MASK_SHIFT 0 -+ -+#define GEN9_3DSTATE_PS_EXTRA GEN9_3D(3, 0, 0x4f) -+# define PSX_PIXEL_SHADER_VALID (1 << 31) -+# define PSX_PIXEL_SHADER_NO_RT_WRITE (1 << 30) -+# define PSX_OMASK_TO_RENDER_TARGET (1 << 29) -+# define PSX_KILL_ENABLE (1 << 28) -+# define PSX_PSCDEPTH_OFF (0 << 26) -+# define PSX_PSCDEPTH_ON (1 << 26) -+# define PSX_PSCDEPTH_ON_GE (2 << 26) -+# define PSX_PSCDEPTH_ON_LE (3 << 26) -+# define PSX_FORCE_COMPUTED_DEPTH (1 << 25) -+# define PSX_USES_SOURCE_DEPTH (1 << 24) -+# define PSX_USES_SOURCE_W (1 << 23) -+# define PSX_ATTRIBUTE_ENABLE (1 << 8) -+# define PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) -+# define PSX_SHADER_IS_PER_SAMPLE (1 << 6) -+# define PSX_SHADER_HAS_UAV (1 << 2) -+# define PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) -+ -+#define GEN9_3DSTATE_RASTER GEN9_3D(3, 0, 0x50) -+/* DW1 */ -+# define RASTER_FRONT_WINDING_CCW (1 << 21) -+# define RASTER_CULL_BOTH (0 << 16) -+# define RASTER_CULL_NONE (1 << 16) -+# define RASTER_CULL_FRONT (2 << 16) -+# define RASTER_CULL_BACK (3 << 16) -+# define RASTER_SMOOTH_POINT_ENABLE (1 << 13) -+# define RASTER_LINE_AA_ENABLE (1 << 2) -+# define RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0) -+ -+#define GEN9_3DSTATE_SBE_SWIZ GEN9_3D(3, 0, 0x51) -+#define GEN9_3DSTATE_WM_HZ_OP GEN9_3D(3, 0, 0x52) -+ -+#define GEN9_3DSTATE_COMPONENT_PACKING GEN6_3D(3, 0, 0x55) -+ -+ -+ -+#define GEN9_3DSTATE_DRAWING_RECTANGLE GEN9_3D(3, 1, 0x00) -+#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD GEN9_3D(3, 1, 0x02) -+#define GEN9_3DSTATE_CHROMA_KEY GEN9_3D(3, 1, 0x04) -+ -+#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET GEN9_3D(3, 1, 0x06) -+#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN GEN9_3D(3, 1, 0x07) -+#define GEN9_3DSTATE_LINE_STIPPLE GEN9_3D(3, 1, 0x08) -+#define GEN9_3DSTATE_AA_LINE_PARAMS GEN9_3D(3, 1, 0x0a) -+#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 GEN9_3D(3, 1, 0x0c) -+#define GEN9_3DSTATE_MONOFILTER_SIZE GEN9_3D(3, 1, 0x11) -+#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN9_3D(3, 1, 0x12) -+#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS GEN9_3D(3, 1, 0x13) -+#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS GEN9_3D(3, 1, 0x14) -+#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS GEN9_3D(3, 1, 0x15) -+#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS GEN9_3D(3, 1, 0x16) -+/* DW1 */ -+# define PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 -+# define PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0 -+ -+#define GEN9_3DSTATE_SO_DECL_LIST GEN9_3D(3, 1, 0x17) -+#define GEN9_3DSTATE_SO_BUFFER GEN9_3D(3, 1, 0x18) -+#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC GEN9_3D(3, 1, 0x19) -+#define GEN9_3DSTATE_GATHER_BUFFER_POOL_ALLOC GEN9_3D(3, 1, 0x1a) -+#define GEN9_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC GEN9_3D(3, 1, 0x1b) -+#define GEN9_3DSTATE_SAMPLE_PATTERN GEN9_3D(3, 1, 0x1c) -+ -+ -+/* for GEN9_PIPE_CONTROL */ -+#define GEN9_PIPE_CONTROL GEN9_3D(3, 2, 0) -+#define PIPE_CONTROL_CS_STALL (1 << 20) -+#define PIPE_CONTROL_NOWRITE (0 << 14) -+#define PIPE_CONTROL_WRITE_QWORD (1 << 14) -+#define PIPE_CONTROL_WRITE_DEPTH (2 << 14) -+#define PIPE_CONTROL_WRITE_TIME (3 << 14) -+#define PIPE_CONTROL_DEPTH_STALL (1 << 13) -+#define PIPE_CONTROL_WC_FLUSH (1 << 12) -+#define PIPE_CONTROL_IS_FLUSH (1 << 11) -+#define PIPE_CONTROL_TC_FLUSH (1 << 10) -+#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) -+#define PIPE_CONTROL_FLUSH (1 << 7) -+#define PIPE_CONTROL_GLOBAL_GTT (1 << 2) -+#define PIPE_CONTROL_LOCAL_PGTT (0 << 2) -+#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) -+#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) -+ -+ -+#define GEN9_3DPRIMITIVE GEN9_3D(3, 3, 0) -+ -+/* 3DPRIMITIVE bits */ -+#define VERTEX_SEQUENTIAL (0 << 15) -+#define VERTEX_RANDOM (1 << 15) -+ -+#define ANISORATIO_2 0 -+#define ANISORATIO_4 1 -+#define ANISORATIO_6 2 -+#define ANISORATIO_8 3 -+#define ANISORATIO_10 4 -+#define ANISORATIO_12 5 -+#define ANISORATIO_14 6 -+#define ANISORATIO_16 7 -+ -+#define BLENDFACTOR_ONE 0x1 -+#define BLENDFACTOR_SRC_COLOR 0x2 -+#define BLENDFACTOR_SRC_ALPHA 0x3 -+#define BLENDFACTOR_DST_ALPHA 0x4 -+#define BLENDFACTOR_DST_COLOR 0x5 -+#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 -+#define BLENDFACTOR_CONST_COLOR 0x7 -+#define BLENDFACTOR_CONST_ALPHA 0x8 -+#define BLENDFACTOR_SRC1_COLOR 0x9 -+#define BLENDFACTOR_SRC1_ALPHA 0x0A -+#define BLENDFACTOR_ZERO 0x11 -+#define BLENDFACTOR_INV_SRC_COLOR 0x12 -+#define BLENDFACTOR_INV_SRC_ALPHA 0x13 -+#define BLENDFACTOR_INV_DST_ALPHA 0x14 -+#define BLENDFACTOR_INV_DST_COLOR 0x15 -+#define BLENDFACTOR_INV_CONST_COLOR 0x17 -+#define BLENDFACTOR_INV_CONST_ALPHA 0x18 -+#define BLENDFACTOR_INV_SRC1_COLOR 0x19 -+#define BLENDFACTOR_INV_SRC1_ALPHA 0x1A -+ -+#define BLENDFUNCTION_ADD 0 -+#define BLENDFUNCTION_SUBTRACT 1 -+#define BLENDFUNCTION_REVERSE_SUBTRACT 2 -+#define GEN9_BLENDFUNCTION_MIN 3 -+#define BLENDFUNCTION_MAX 4 -+ -+#define ALPHATEST_FORMAT_UNORM8 0 -+#define ALPHATEST_FORMAT_FLOAT32 1 -+ -+#define CHROMAKEY_KILL_ON_ANY_MATCH 0 -+#define CHROMAKEY_REPLACE_BLACK 1 -+ -+#define CLIP_API_OGL 0 -+#define CLIP_API_DX 1 -+ -+#define CLIPMODE_NORMAL 0 -+#define CLIPMODE_CLIP_ALL 1 -+#define CLIPMODE_CLIP_NON_REJECTED 2 -+#define CLIPMODE_REJECT_ALL 3 -+#define CLIPMODE_ACCEPT_ALL 4 -+ -+#define CLIP_NDCSPACE 0 -+#define CLIP_SCREENSPACE 1 -+ -+#define COMPAREFUNCTION_ALWAYS 0 -+#define COMPAREFUNCTION_NEVER 1 -+#define COMPAREFUNCTION_LESS 2 -+#define COMPAREFUNCTION_EQUAL 3 -+#define COMPAREFUNCTION_LEQUAL 4 -+#define COMPAREFUNCTION_GREATER 5 -+#define COMPAREFUNCTION_NOTEQUAL 6 -+#define COMPAREFUNCTION_GEQUAL 7 -+ -+#define COVERAGE_PIXELS_HALF 0 -+#define COVERAGE_PIXELS_1 1 -+#define COVERAGE_PIXELS_2 2 -+#define COVERAGE_PIXELS_4 3 -+ -+#define DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 -+#define DEPTHFORMAT_D32_FLOAT 1 -+#define DEPTHFORMAT_D24_UNORM_S8_UINT 2 -+#define DEPTHFORMAT_D16_UNORM 5 -+ -+#define FLOATING_POINT_IEEE_754 0 -+#define FLOATING_POINT_NON_IEEE_754 1 -+ -+#define INDEX_BYTE 0 -+#define INDEX_WORD 1 -+#define INDEX_DWORD 2 -+ -+#define LOGICOPFUNCTION_CLEAR 0 -+#define LOGICOPFUNCTION_NOR 1 -+#define LOGICOPFUNCTION_AND_INVERTED 2 -+#define LOGICOPFUNCTION_COPY_INVERTED 3 -+#define LOGICOPFUNCTION_AND_REVERSE 4 -+#define LOGICOPFUNCTION_INVERT 5 -+#define LOGICOPFUNCTION_XOR 6 -+#define LOGICOPFUNCTION_NAND 7 -+#define LOGICOPFUNCTION_AND 8 -+#define LOGICOPFUNCTION_EQUIV 9 -+#define LOGICOPFUNCTION_NOOP 10 -+#define LOGICOPFUNCTION_OR_INVERTED 11 -+#define LOGICOPFUNCTION_COPY 12 -+#define LOGICOPFUNCTION_OR_REVERSE 13 -+#define LOGICOPFUNCTION_OR 14 -+#define LOGICOPFUNCTION_SET 15 -+ -+#define MAPFILTER_NEAREST 0x0 -+#define MAPFILTER_LINEAR 0x1 -+#define MAPFILTER_ANISOTROPIC 0x2 -+#define MAPFILTER_FLEXIBLE 0x3 -+#define MAPFILTER_MONO 0x6 -+ -+#define MIPFILTER_NONE 0 -+#define MIPFILTER_NEAREST 1 -+#define MIPFILTER_LINEAR 3 -+ -+#define POLYGON_FRONT_FACING 0 -+#define POLYGON_BACK_FACING 1 -+ -+#define PREFILTER_ALWAYS 0x0 -+#define PREFILTER_NEVER 0x1 -+#define PREFILTER_LESS 0x2 -+#define PREFILTER_EQUAL 0x3 -+#define PREFILTER_LEQUAL 0x4 -+#define PREFILTER_GREATER 0x5 -+#define PREFILTER_NOTEQUAL 0x6 -+#define PREFILTER_GEQUAL 0x7 -+ -+#define RASTRULE_UPPER_LEFT 0 -+#define RASTRULE_UPPER_RIGHT 1 -+ -+#define STENCILOP_KEEP 0 -+#define STENCILOP_ZERO 1 -+#define STENCILOP_REPLACE 2 -+#define STENCILOP_INCRSAT 3 -+#define STENCILOP_DECRSAT 4 -+#define STENCILOP_INCR 5 -+#define STENCILOP_DECR 6 -+#define STENCILOP_INVERT 7 -+ -+#define SURFACE_MIPMAPLAYOUT_BELOW 0 -+#define SURFACE_MIPMAPLAYOUT_RIGHT 1 -+ -+#define SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 -+#define SURFACEFORMAT_R32G32B32A32_SINT 0x001 -+#define SURFACEFORMAT_R32G32B32A32_UINT 0x002 -+#define SURFACEFORMAT_R32G32B32A32_UNORM 0x003 -+#define SURFACEFORMAT_R32G32B32A32_SNORM 0x004 -+#define SURFACEFORMAT_R64G64_FLOAT 0x005 -+#define SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 -+#define SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 -+#define SURFACEFORMAT_R32G32B32A32_USCALED 0x008 -+#define SURFACEFORMAT_R32G32B32_FLOAT 0x040 -+#define SURFACEFORMAT_R32G32B32_SINT 0x041 -+#define SURFACEFORMAT_R32G32B32_UINT 0x042 -+#define SURFACEFORMAT_R32G32B32_UNORM 0x043 -+#define SURFACEFORMAT_R32G32B32_SNORM 0x044 -+#define SURFACEFORMAT_R32G32B32_SSCALED 0x045 -+#define SURFACEFORMAT_R32G32B32_USCALED 0x046 -+#define SURFACEFORMAT_R16G16B16A16_UNORM 0x080 -+#define SURFACEFORMAT_R16G16B16A16_SNORM 0x081 -+#define SURFACEFORMAT_R16G16B16A16_SINT 0x082 -+#define SURFACEFORMAT_R16G16B16A16_UINT 0x083 -+#define SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 -+#define SURFACEFORMAT_R32G32_FLOAT 0x085 -+#define SURFACEFORMAT_R32G32_SINT 0x086 -+#define SURFACEFORMAT_R32G32_UINT 0x087 -+#define SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 -+#define SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 -+#define SURFACEFORMAT_L32A32_FLOAT 0x08A -+#define SURFACEFORMAT_R32G32_UNORM 0x08B -+#define SURFACEFORMAT_R32G32_SNORM 0x08C -+#define SURFACEFORMAT_R64_FLOAT 0x08D -+#define SURFACEFORMAT_R16G16B16X16_UNORM 0x08E -+#define SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F -+#define SURFACEFORMAT_A32X32_FLOAT 0x090 -+#define SURFACEFORMAT_L32X32_FLOAT 0x091 -+#define SURFACEFORMAT_I32X32_FLOAT 0x092 -+#define SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 -+#define SURFACEFORMAT_R16G16B16A16_USCALED 0x094 -+#define SURFACEFORMAT_R32G32_SSCALED 0x095 -+#define SURFACEFORMAT_R32G32_USCALED 0x096 -+#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 -+#define SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 -+#define SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 -+#define SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 -+#define SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 -+#define SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 -+#define SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 -+#define SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 -+#define SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 -+#define SURFACEFORMAT_R8G8B8A8_SINT 0x0CA -+#define SURFACEFORMAT_R8G8B8A8_UINT 0x0CB -+#define SURFACEFORMAT_R16G16_UNORM 0x0CC -+#define SURFACEFORMAT_R16G16_SNORM 0x0CD -+#define SURFACEFORMAT_R16G16_SINT 0x0CE -+#define SURFACEFORMAT_R16G16_UINT 0x0CF -+#define SURFACEFORMAT_R16G16_FLOAT 0x0D0 -+#define SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 -+#define SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 -+#define SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 -+#define SURFACEFORMAT_R32_SINT 0x0D6 -+#define SURFACEFORMAT_R32_UINT 0x0D7 -+#define SURFACEFORMAT_R32_FLOAT 0x0D8 -+#define SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 -+#define SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA -+#define SURFACEFORMAT_L16A16_UNORM 0x0DF -+#define SURFACEFORMAT_I24X8_UNORM 0x0E0 -+#define SURFACEFORMAT_L24X8_UNORM 0x0E1 -+#define SURFACEFORMAT_A24X8_UNORM 0x0E2 -+#define SURFACEFORMAT_I32_FLOAT 0x0E3 -+#define SURFACEFORMAT_L32_FLOAT 0x0E4 -+#define SURFACEFORMAT_A32_FLOAT 0x0E5 -+#define SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 -+#define SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA -+#define SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB -+#define SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC -+#define SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED -+#define SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE -+#define SURFACEFORMAT_L16A16_FLOAT 0x0F0 -+#define SURFACEFORMAT_R32_UNORM 0x0F1 -+#define SURFACEFORMAT_R32_SNORM 0x0F2 -+#define SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 -+#define SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 -+#define SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 -+#define SURFACEFORMAT_R16G16_SSCALED 0x0F6 -+#define SURFACEFORMAT_R16G16_USCALED 0x0F7 -+#define SURFACEFORMAT_R32_SSCALED 0x0F8 -+#define SURFACEFORMAT_R32_USCALED 0x0F9 -+#define SURFACEFORMAT_B5G6R5_UNORM 0x100 -+#define SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 -+#define SURFACEFORMAT_B5G5R5A1_UNORM 0x102 -+#define SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 -+#define SURFACEFORMAT_B4G4R4A4_UNORM 0x104 -+#define SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 -+#define SURFACEFORMAT_R8G8_UNORM 0x106 -+#define SURFACEFORMAT_R8G8_SNORM 0x107 -+#define SURFACEFORMAT_R8G8_SINT 0x108 -+#define SURFACEFORMAT_R8G8_UINT 0x109 -+#define SURFACEFORMAT_R16_UNORM 0x10A -+#define SURFACEFORMAT_R16_SNORM 0x10B -+#define SURFACEFORMAT_R16_SINT 0x10C -+#define SURFACEFORMAT_R16_UINT 0x10D -+#define SURFACEFORMAT_R16_FLOAT 0x10E -+#define SURFACEFORMAT_I16_UNORM 0x111 -+#define SURFACEFORMAT_L16_UNORM 0x112 -+#define SURFACEFORMAT_A16_UNORM 0x113 -+#define SURFACEFORMAT_L8A8_UNORM 0x114 -+#define SURFACEFORMAT_I16_FLOAT 0x115 -+#define SURFACEFORMAT_L16_FLOAT 0x116 -+#define SURFACEFORMAT_A16_FLOAT 0x117 -+#define SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -+#define SURFACEFORMAT_B5G5R5X1_UNORM 0x11A -+#define SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B -+#define SURFACEFORMAT_R8G8_SSCALED 0x11C -+#define SURFACEFORMAT_R8G8_USCALED 0x11D -+#define SURFACEFORMAT_R16_SSCALED 0x11E -+#define SURFACEFORMAT_R16_USCALED 0x11F -+#define SURFACEFORMAT_R8_UNORM 0x140 -+#define SURFACEFORMAT_R8_SNORM 0x141 -+#define SURFACEFORMAT_R8_SINT 0x142 -+#define SURFACEFORMAT_R8_UINT 0x143 -+#define SURFACEFORMAT_A8_UNORM 0x144 -+#define SURFACEFORMAT_I8_UNORM 0x145 -+#define SURFACEFORMAT_L8_UNORM 0x146 -+#define SURFACEFORMAT_P4A4_UNORM 0x147 -+#define SURFACEFORMAT_A4P4_UNORM 0x148 -+#define SURFACEFORMAT_R8_SSCALED 0x149 -+#define SURFACEFORMAT_R8_USCALED 0x14A -+#define SURFACEFORMAT_R1_UINT 0x181 -+#define SURFACEFORMAT_YCRCB_NORMAL 0x182 -+#define SURFACEFORMAT_YCRCB_SWAPUVY 0x183 -+#define SURFACEFORMAT_BC1_UNORM 0x186 -+#define SURFACEFORMAT_BC2_UNORM 0x187 -+#define SURFACEFORMAT_BC3_UNORM 0x188 -+#define SURFACEFORMAT_BC4_UNORM 0x189 -+#define SURFACEFORMAT_BC5_UNORM 0x18A -+#define SURFACEFORMAT_BC1_UNORM_SRGB 0x18B -+#define SURFACEFORMAT_BC2_UNORM_SRGB 0x18C -+#define SURFACEFORMAT_BC3_UNORM_SRGB 0x18D -+#define SURFACEFORMAT_MONO8 0x18E -+#define SURFACEFORMAT_YCRCB_SWAPUV 0x18F -+#define SURFACEFORMAT_YCRCB_SWAPY 0x190 -+#define SURFACEFORMAT_DXT1_RGB 0x191 -+#define SURFACEFORMAT_FXT1 0x192 -+#define SURFACEFORMAT_R8G8B8_UNORM 0x193 -+#define SURFACEFORMAT_R8G8B8_SNORM 0x194 -+#define SURFACEFORMAT_R8G8B8_SSCALED 0x195 -+#define SURFACEFORMAT_R8G8B8_USCALED 0x196 -+#define SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 -+#define SURFACEFORMAT_R64G64B64_FLOAT 0x198 -+#define SURFACEFORMAT_BC4_SNORM 0x199 -+#define SURFACEFORMAT_BC5_SNORM 0x19A -+#define SURFACEFORMAT_R16G16B16_UNORM 0x19C -+#define SURFACEFORMAT_R16G16B16_SNORM 0x19D -+#define SURFACEFORMAT_R16G16B16_SSCALED 0x19E -+#define SURFACEFORMAT_R16G16B16_USCALED 0x19F -+ -+#define SURFACE_1D 0 -+#define SURFACE_2D 1 -+#define SURFACE_3D 2 -+#define SURFACE_CUBE 3 -+#define SURFACE_BUFFER 4 -+#define SURFACE_NULL 7 -+ -+#define TEXCOORDMODE_WRAP 0 -+#define TEXCOORDMODE_MIRROR 1 -+#define TEXCOORDMODE_CLAMP 2 -+#define TEXCOORDMODE_CUBE 3 -+#define TEXCOORDMODE_CLAMP_BORDER 4 -+#define TEXCOORDMODE_MIRROR_ONCE 5 -+ -+#define THREAD_PRIORITY_NORMAL 0 -+#define THREAD_PRIORITY_HIGH 1 -+ -+#define VERTEX_SUBPIXEL_PRECISION_8BITS 0 -+#define VERTEX_SUBPIXEL_PRECISION_4BITS 1 -+ -+#define COMPONENT_NOSTORE 0 -+#define COMPONENT_STORE_SRC 1 -+#define COMPONENT_STORE_0 2 -+#define COMPONENT_STORE_1_FLT 3 -+#define COMPONENT_STORE_1_INT 4 -+#define COMPONENT_STORE_VID 5 -+#define COMPONENT_STORE_IID 6 -+#define COMPONENT_STORE_PID 7 -+ -+/* Execution Unit (EU) defines -+ */ -+ -+#define GEN9_ALIGN_1 0 -+#define GEN9_ALIGN_16 1 -+ -+#define GEN9_ADDRESS_DIRECT 0 -+#define GEN9_ADDRESS_REGISTER_INDIRECT_REGISTER 1 -+ -+#define GEN9_CHANNEL_X 0 -+#define GEN9_CHANNEL_Y 1 -+#define GEN9_CHANNEL_Z 2 -+#define GEN9_CHANNEL_W 3 -+ -+#define GEN9_COMPRESSION_NONE 0 -+#define GEN9_COMPRESSION_2NDHALF 1 -+#define GEN9_COMPRESSION_COMPRESSED 2 -+ -+#define GEN9_CONDITIONAL_NONE 0 -+#define GEN9_CONDITIONAL_Z 1 -+#define GEN9_CONDITIONAL_NZ 2 -+#define GEN9_CONDITIONAL_EQ 1 /* Z */ -+#define GEN9_CONDITIONAL_NEQ 2 /* NZ */ -+#define GEN9_CONDITIONAL_G 3 -+#define GEN9_CONDITIONAL_GE 4 -+#define GEN9_CONDITIONAL_L 5 -+#define GEN9_CONDITIONAL_LE 6 -+#define GEN9_CONDITIONAL_C 7 -+#define GEN9_CONDITIONAL_O 8 -+ -+#define GEN9_DEBUG_NONE 0 -+#define GEN9_DEBUG_BREAKPOINT 1 -+ -+#define GEN9_DEPENDENCY_NORMAL 0 -+#define GEN9_DEPENDENCY_NOTCLEARED 1 -+#define GEN9_DEPENDENCY_NOTCHECKED 2 -+#define GEN9_DEPENDENCY_DISABLE 3 -+ -+#define GEN9_EXECUTE_1 0 -+#define GEN9_EXECUTE_2 1 -+#define GEN9_EXECUTE_4 2 -+#define GEN9_EXECUTE_8 3 -+#define GEN9_EXECUTE_16 4 -+#define GEN9_EXECUTE_32 5 -+ -+#define GEN9_HORIZONTAL_STRIDE_0 0 -+#define GEN9_HORIZONTAL_STRIDE_1 1 -+#define GEN9_HORIZONTAL_STRIDE_2 2 -+#define GEN9_HORIZONTAL_STRIDE_4 3 -+ -+#define GEN9_INSTRUCTION_NORMAL 0 -+#define GEN9_INSTRUCTION_SATURATE 1 -+ -+#define GEN9_OPCODE_MOV 1 -+#define GEN9_OPCODE_SEL 2 -+#define GEN9_OPCODE_NOT 4 -+#define GEN9_OPCODE_AND 5 -+#define GEN9_OPCODE_OR 6 -+#define GEN9_OPCODE_XOR 7 -+#define GEN9_OPCODE_SHR 8 -+#define GEN9_OPCODE_SHL 9 -+#define GEN9_OPCODE_RSR 10 -+#define GEN9_OPCODE_RSL 11 -+#define GEN9_OPCODE_ASR 12 -+#define GEN9_OPCODE_CMP 16 -+#define GEN9_OPCODE_JMPI 32 -+#define GEN9_OPCODE_IF 34 -+#define GEN9_OPCODE_IFF 35 -+#define GEN9_OPCODE_ELSE 36 -+#define GEN9_OPCODE_ENDIF 37 -+#define GEN9_OPCODE_DO 38 -+#define GEN9_OPCODE_WHILE 39 -+#define GEN9_OPCODE_BREAK 40 -+#define GEN9_OPCODE_CONTINUE 41 -+#define GEN9_OPCODE_HALT 42 -+#define GEN9_OPCODE_MSAVE 44 -+#define GEN9_OPCODE_MRESTORE 45 -+#define GEN9_OPCODE_PUSH 46 -+#define GEN9_OPCODE_POP 47 -+#define GEN9_OPCODE_WAIT 48 -+#define GEN9_OPCODE_SEND 49 -+#define GEN9_OPCODE_ADD 64 -+#define GEN9_OPCODE_MUL 65 -+#define GEN9_OPCODE_AVG 66 -+#define GEN9_OPCODE_FRC 67 -+#define GEN9_OPCODE_RNDU 68 -+#define GEN9_OPCODE_RNDD 69 -+#define GEN9_OPCODE_RNDE 70 -+#define GEN9_OPCODE_RNDZ 71 -+#define GEN9_OPCODE_MAC 72 -+#define GEN9_OPCODE_MACH 73 -+#define GEN9_OPCODE_LZD 74 -+#define GEN9_OPCODE_SAD2 80 -+#define GEN9_OPCODE_SADA2 81 -+#define GEN9_OPCODE_DP4 84 -+#define GEN9_OPCODE_DPH 85 -+#define GEN9_OPCODE_DP3 86 -+#define GEN9_OPCODE_DP2 87 -+#define GEN9_OPCODE_DPA2 88 -+#define GEN9_OPCODE_LINE 89 -+#define GEN9_OPCODE_NOP 126 -+ -+#define GEN9_PREDICATE_NONE 0 -+#define GEN9_PREDICATE_NORMAL 1 -+#define GEN9_PREDICATE_ALIGN1_ANYV 2 -+#define GEN9_PREDICATE_ALIGN1_ALLV 3 -+#define GEN9_PREDICATE_ALIGN1_ANY2H 4 -+#define GEN9_PREDICATE_ALIGN1_ALL2H 5 -+#define GEN9_PREDICATE_ALIGN1_ANY4H 6 -+#define GEN9_PREDICATE_ALIGN1_ALL4H 7 -+#define GEN9_PREDICATE_ALIGN1_ANY8H 8 -+#define GEN9_PREDICATE_ALIGN1_ALL8H 9 -+#define GEN9_PREDICATE_ALIGN1_ANY16H 10 -+#define GEN9_PREDICATE_ALIGN1_ALL16H 11 -+#define GEN9_PREDICATE_ALIGN16_REPLICATE_X 2 -+#define GEN9_PREDICATE_ALIGN16_REPLICATE_Y 3 -+#define GEN9_PREDICATE_ALIGN16_REPLICATE_Z 4 -+#define GEN9_PREDICATE_ALIGN16_REPLICATE_W 5 -+#define GEN9_PREDICATE_ALIGN16_ANY4H 6 -+#define GEN9_PREDICATE_ALIGN16_ALL4H 7 -+ -+#define GEN9_ARCHITECTURE_REGISTER_FILE 0 -+#define GEN9_GENERAL_REGISTER_FILE 1 -+#define GEN9_MESSAGE_REGISTER_FILE 2 -+#define GEN9_IMMEDIATE_VALUE 3 -+ -+#define GEN9_REGISTER_TYPE_UD 0 -+#define GEN9_REGISTER_TYPE_D 1 -+#define GEN9_REGISTER_TYPE_UW 2 -+#define GEN9_REGISTER_TYPE_W 3 -+#define GEN9_REGISTER_TYPE_UB 4 -+#define GEN9_REGISTER_TYPE_B 5 -+#define GEN9_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ -+#define GEN9_REGISTER_TYPE_HF 6 -+#define GEN9_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ -+#define GEN9_REGISTER_TYPE_F 7 -+ -+#define GEN9_ARF_NULL 0x00 -+#define GEN9_ARF_ADDRESS 0x10 -+#define GEN9_ARF_ACCUMULATOR 0x20 -+#define GEN9_ARF_FLAG 0x30 -+#define GEN9_ARF_MASK 0x40 -+#define GEN9_ARF_MASK_STACK 0x50 -+#define GEN9_ARF_MASK_STACK_DEPTH 0x60 -+#define GEN9_ARF_STATE 0x70 -+#define GEN9_ARF_CONTROL 0x80 -+#define GEN9_ARF_NOTIFICATION_COUNT 0x90 -+#define GEN9_ARF_IP 0xA0 -+ -+#define GEN9_AMASK 0 -+#define GEN9_IMASK 1 -+#define GEN9_LMASK 2 -+#define GEN9_CMASK 3 -+ -+#define GEN9_THREAD_NORMAL 0 -+#define GEN9_THREAD_ATOMIC 1 -+#define GEN9_THREAD_SWITCH 2 -+ -+#define GEN9_VERTICAL_STRIDE_0 0 -+#define GEN9_VERTICAL_STRIDE_1 1 -+#define GEN9_VERTICAL_STRIDE_2 2 -+#define GEN9_VERTICAL_STRIDE_4 3 -+#define GEN9_VERTICAL_STRIDE_8 4 -+#define GEN9_VERTICAL_STRIDE_16 5 -+#define GEN9_VERTICAL_STRIDE_32 6 -+#define GEN9_VERTICAL_STRIDE_64 7 -+#define GEN9_VERTICAL_STRIDE_128 8 -+#define GEN9_VERTICAL_STRIDE_256 9 -+#define GEN9_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF -+ -+#define GEN9_WIDTH_1 0 -+#define GEN9_WIDTH_2 1 -+#define GEN9_WIDTH_4 2 -+#define GEN9_WIDTH_8 3 -+#define GEN9_WIDTH_16 4 -+ -+#define GEN9_STATELESS_BUFFER_BOUNDARY_1K 0 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_2K 1 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_4K 2 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_8K 3 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_16K 4 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_32K 5 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_64K 6 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_128K 7 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_256K 8 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_512K 9 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_1M 10 -+#define GEN9_STATELESS_BUFFER_BOUNDARY_2M 11 -+ -+#define GEN9_POLYGON_FACING_FRONT 0 -+#define GEN9_POLYGON_FACING_BACK 1 -+ -+#define GEN9_MESSAGE_TARGET_NULL 0 -+#define GEN9_MESSAGE_TARGET_MATH 1 -+#define GEN9_MESSAGE_TARGET_SAMPLER 2 -+#define GEN9_MESSAGE_TARGET_GATEWAY 3 -+#define GEN9_MESSAGE_TARGET_DATAPORT_READ 4 -+#define GEN9_MESSAGE_TARGET_DATAPORT_WRITE 5 -+#define GEN9_MESSAGE_TARGET_URB 6 -+#define GEN9_MESSAGE_TARGET_THREAD_SPAWNER 7 -+ -+#define GEN9_SAMPLER_RETURN_FORMAT_FLOAT32 0 -+#define GEN9_SAMPLER_RETURN_FORMAT_UINT32 2 -+#define GEN9_SAMPLER_RETURN_FORMAT_SINT32 3 -+ -+#define GEN9_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 -+#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 -+#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 -+#define GEN9_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 -+#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 -+#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 -+#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 -+#define GEN9_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 -+#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 -+#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 -+#define GEN9_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -+#define GEN9_SAMPLER_MESSAGE_SIMD8_RESINFO 2 -+#define GEN9_SAMPLER_MESSAGE_SIMD16_RESINFO 2 -+#define GEN9_SAMPLER_MESSAGE_SIMD4X2_LD 3 -+#define GEN9_SAMPLER_MESSAGE_SIMD8_LD 3 -+#define GEN9_SAMPLER_MESSAGE_SIMD16_LD 3 -+ -+#define GEN9_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 -+#define GEN9_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 -+#define GEN9_DATAPORT_OWORD_BLOCK_2_OWORDS 2 -+#define GEN9_DATAPORT_OWORD_BLOCK_4_OWORDS 3 -+#define GEN9_DATAPORT_OWORD_BLOCK_8_OWORDS 4 -+ -+#define GEN9_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 -+#define GEN9_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 -+ -+#define GEN9_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 -+#define GEN9_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 -+ -+#define GEN9_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 -+#define GEN9_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -+#define GEN9_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 -+#define GEN9_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 -+ -+#define GEN9_DATAPORT_READ_TARGET_DATA_CACHE 0 -+#define GEN9_DATAPORT_READ_TARGET_RENDER_CACHE 1 -+#define GEN9_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 -+ -+#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 -+#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 -+#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 -+#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 -+#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 -+ -+#define GEN9_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 -+#define GEN9_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 -+#define GEN9_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 -+#define GEN9_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 -+#define GEN9_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 -+#define GEN9_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 -+#define GEN9_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 -+ -+#define GEN9_MATH_FUNCTION_INV 1 -+#define GEN9_MATH_FUNCTION_LOG 2 -+#define GEN9_MATH_FUNCTION_EXP 3 -+#define GEN9_MATH_FUNCTION_SQRT 4 -+#define GEN9_MATH_FUNCTION_RSQ 5 -+#define GEN9_MATH_FUNCTION_SIN 6 /* was 7 */ -+#define GEN9_MATH_FUNCTION_COS 7 /* was 8 */ -+#define GEN9_MATH_FUNCTION_SINCOS 8 /* was 6 */ -+#define GEN9_MATH_FUNCTION_TAN 9 -+#define GEN9_MATH_FUNCTION_POW 10 -+#define GEN9_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 -+#define GEN9_MATH_FUNCTION_INT_DIV_QUOTIENT 12 -+#define GEN9_MATH_FUNCTION_INT_DIV_REMAINDER 13 -+ -+#define GEN9_MATH_INTEGER_UNSIGNED 0 -+#define GEN9_MATH_INTEGER_SIGNED 1 -+ -+#define GEN9_MATH_PRECISION_FULL 0 -+#define GEN9_MATH_PRECISION_PARTIAL 1 -+ -+#define GEN9_MATH_SATURATE_NONE 0 -+#define GEN9_MATH_SATURATE_SATURATE 1 -+ -+#define GEN9_MATH_DATA_VECTOR 0 -+#define GEN9_MATH_DATA_SCALAR 1 -+ -+#define GEN9_URB_OPCODE_WRITE 0 -+ -+#define GEN9_URB_SWIZZLE_NONE 0 -+#define GEN9_URB_SWIZZLE_INTERLEAVE 1 -+#define GEN9_URB_SWIZZLE_TRANSPOSE 2 -+ -+#define GEN9_SCRATCH_SPACE_SIZE_1K 0 -+#define GEN9_SCRATCH_SPACE_SIZE_2K 1 -+#define GEN9_SCRATCH_SPACE_SIZE_4K 2 -+#define GEN9_SCRATCH_SPACE_SIZE_8K 3 -+#define GEN9_SCRATCH_SPACE_SIZE_16K 4 -+#define GEN9_SCRATCH_SPACE_SIZE_32K 5 -+#define GEN9_SCRATCH_SPACE_SIZE_64K 6 -+#define GEN9_SCRATCH_SPACE_SIZE_128K 7 -+#define GEN9_SCRATCH_SPACE_SIZE_256K 8 -+#define GEN9_SCRATCH_SPACE_SIZE_512K 9 -+#define GEN9_SCRATCH_SPACE_SIZE_1M 10 -+#define GEN9_SCRATCH_SPACE_SIZE_2M 11 -+ -+struct gen9_blend_state { -+ struct { -+ /* 00 */ uint32_t pad:19; -+ /* 19 */ uint32_t y_dither_offset:2; -+ /* 21 */ uint32_t x_dither_offset:2; -+ /* 23 */ uint32_t color_dither_enable:1; -+ /* 24 */ uint32_t alpha_test_function:3; -+ /* 27 */ uint32_t alpha_test:1; -+ /* 28 */ uint32_t alpha_to_coverage_dither:1; -+ /* 29 */ uint32_t alpha_to_one:1; -+ /* 30 */ uint32_t ia_blend:1; -+ /* 31 */ uint32_t alpha_to_coverage:1; -+ } common; -+ -+ struct { -+ /* 00 */ uint32_t write_disable_blue:1; -+ /* 01 */ uint32_t write_disable_green:1; -+ /* 02 */ uint32_t write_disable_red:1; -+ /* 03 */ uint32_t write_disable_alpha:1; -+ /* 04 */ uint32_t pad0:1; -+ /* 05 */ uint32_t alpha_blend_function:3; -+ /* 08 */ uint32_t dest_alpha_blend_factor:5; -+ /* 13 */ uint32_t source_alpha_blend_factor:5; -+ /* 18 */ uint32_t color_blend_function:3; -+ /* 21 */ uint32_t dest_blend_factor:5; -+ /* 26 */ uint32_t source_blend_factor:5; -+ /* 31 */ uint32_t color_blend:1; -+ /* 32 */ uint32_t post_blend_clamp:1; -+ /* 33 */ uint32_t pre_blend_clamp:1; -+ /* 34 */ uint32_t color_clamp_range:2; -+ /* 36 */ uint32_t pre_blend_source_only_clamp:1; -+ /* 37 */ uint32_t pad1:22; -+ /* 59 */ uint32_t logic_op_function:4; -+ /* 63 */ uint32_t logic_op:1; -+ } rt; -+}; -+ -+struct gen9_color_calc_state { -+ struct { -+ /* 00 */ uint32_t alpha_test_format:1; -+ /* 01 */ uint32_t pad0:14; -+ /* 15 */ uint32_t round_disable:1; -+ /* 16 */ uint32_t bf_stencil_ref:8; -+ /* 24 */ uint32_t stencil_ref:8; -+ } cc0; -+ -+ union { -+ float alpha_ref_f; -+ struct { -+ uint32_t ui:8; -+ uint32_t pad0:24; -+ } alpha_ref_fi; -+ } cc1; -+ -+ float constant_r; -+ float constant_g; -+ float constant_b; -+ float constant_a; -+}; -+ -+struct gen9_sampler_state { -+ struct { -+ /* 00 */ unsigned int aniso_algorithm:1; -+ /* 01 */ unsigned int lod_bias:13; -+ /* 14 */ unsigned int min_filter:3; -+ /* 17 */ unsigned int mag_filter:3; -+ /* 20 */ unsigned int mip_filter:2; -+ /* 22 */ unsigned int base_level:5; -+ /* 27 */ unsigned int lod_preclamp:2; -+ /* 29 */ unsigned int default_color_mode:1; -+ /* 30 */ unsigned int flexible_filter_clamp:1; -+ /* 31 */ unsigned int disable:1; -+ } ss0; -+ -+ struct { -+ /* 00 */ unsigned int cube_control_mode:1; -+ /* 01 */ unsigned int shadow_function:3; -+ /* 04 */ unsigned int chroma_key_mode:1; -+ /* 05 */ unsigned int chroma_key_index:2; -+ /* 07 */ unsigned int chroma_key_enable:1; -+ /* 08 */ unsigned int max_lod:12; -+ /* 20 */ unsigned int min_lod:12; -+ } ss1; -+ -+ struct { -+ unsigned int pad:6; -+ unsigned int default_color_pointer:26; -+ } ss2; -+ -+ struct { -+ /* 00 */ unsigned int r_wrap_mode:3; -+ /* 03 */ unsigned int t_wrap_mode:3; -+ /* 06 */ unsigned int s_wrap_mode:3; -+ /* 09 */ unsigned int pad:1; -+ /* 10 */ unsigned int non_normalized_coord:1; -+ /* 11 */ unsigned int trilinear_quality:2; -+ /* 13 */ unsigned int address_round:6; -+ /* 19 */ unsigned int max_aniso:3; -+ /* 22 */ unsigned int pad0:2; -+ /* 24 */ unsigned int non_separable_filter:8; -+ } ss3; -+}; -+ -+/* Surface state DW0 */ -+#define SURFACE_RC_READ_WRITE (1 << 8) -+#define SURFACE_TILED (1 << 13) -+#define SURFACE_TILED_Y (1 << 12) -+#define SURFACE_FORMAT_SHIFT 18 -+#define SURFACE_VALIGN_1 (0 << 16) /* reserved! */ -+#define SURFACE_VALIGN_4 (1 << 16) -+#define SURFACE_VALIGN_8 (2 << 16) -+#define SURFACE_VALIGN_16 (3 << 16) -+#define SURFACE_HALIGN_1 (0 << 14) /* reserved! */ -+#define SURFACE_HALIGN_4 (1 << 14) -+#define SURFACE_HALIGN_8 (2 << 14) -+#define SURFACE_HALIGN_16 (3 << 14) -+#define SURFACE_TYPE_SHIFT 29 -+ -+/* Surface state DW2 */ -+#define SURFACE_HEIGHT_SHIFT 16 -+#define SURFACE_WIDTH_SHIFT 0 -+ -+/* Surface state DW3 */ -+#define SURFACE_DEPTH_SHIFT 21 -+#define SURFACE_PITCH_SHIFT 0 -+ -+#define SWIZZLE_ZERO 0 -+#define SWIZZLE_ONE 1 -+#define SWIZZLE_RED 4 -+#define SWIZZLE_GREEN 5 -+#define SWIZZLE_BLUE 6 -+#define SWIZZLE_ALPHA 7 -+#define __SURFACE_SWIZZLE(r,g,b,a) \ -+ ((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25) -+#define SURFACE_SWIZZLE(r,g,b,a) \ -+ __SURFACE_SWIZZLE(SWIZZLE_##r, SWIZZLE_##g, SWIZZLE_##b, SWIZZLE_##a) -+ -+typedef enum { -+ SAMPLER_FILTER_NEAREST = 0, -+ SAMPLER_FILTER_BILINEAR, -+ FILTER_COUNT -+} sampler_filter_t; -+ -+typedef enum { -+ SAMPLER_EXTEND_NONE = 0, -+ SAMPLER_EXTEND_REPEAT, -+ SAMPLER_EXTEND_PAD, -+ SAMPLER_EXTEND_REFLECT, -+ EXTEND_COUNT -+} sampler_extend_t; -+ -+#endif -diff --git a/src/sna/kgem.c b/src/sna/kgem.c -index 78ed5407..f0d171ac 100644 ---- a/src/sna/kgem.c -+++ b/src/sna/kgem.c -@@ -84,6 +84,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); - #define DBG_NO_HANDLE_LUT 0 - #define DBG_NO_WT 0 - #define DBG_NO_WC_MMAP 0 -+#define DBG_NO_BLT_Y 0 -+#define DBG_NO_SCANOUT_Y 0 -+#define DBG_NO_DIRTYFB 0 -+#define DBG_NO_DETILING 0 - #define DBG_DUMP 0 - #define DBG_NO_MALLOC_CACHE 0 - -@@ -96,11 +100,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); - #define SHOW_BATCH_BEFORE 0 - #define SHOW_BATCH_AFTER 0 - --#if !USE_WC_MMAP --#undef DBG_NO_WC_MMAP --#define DBG_NO_WC_MMAP 1 --#endif -- - #if 0 - #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) - #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) -@@ -187,6 +186,15 @@ struct local_i915_gem_caching { - #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) - #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) - -+struct local_i915_gem_mmap { -+ uint32_t handle; -+ uint32_t pad; -+ uint64_t offset; -+ uint64_t size; -+ uint64_t addr_ptr; -+}; -+#define LOCAL_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap) -+ - struct local_i915_gem_mmap2 { - uint32_t handle; - uint32_t pad; -@@ -216,6 +224,12 @@ static struct kgem_bo *__kgem_freed_bo; - static struct kgem_request *__kgem_freed_request; - static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; - -+static inline struct sna *__to_sna(struct kgem *kgem) -+{ -+ /* minor layering violations */ -+ return container_of(kgem, struct sna, kgem); -+} -+ - static inline int bytes(struct kgem_bo *bo) - { - return __kgem_bo_size(bo); -@@ -224,25 +238,31 @@ static inline int bytes(struct kgem_bo *bo) - #define bucket(B) (B)->size.pages.bucket - #define num_pages(B) (B)->size.pages.count - --static int do_ioctl(int fd, unsigned long req, void *arg) -+static int __do_ioctl(int fd, unsigned long req, void *arg) - { -- int err; -- --restart: -- if (ioctl(fd, req, arg) == 0) -- return 0; -+ do { -+ int err; - -- err = errno; -+ switch ((err = errno)) { -+ case EAGAIN: -+ sched_yield(); -+ case EINTR: -+ break; -+ default: -+ return -err; -+ } - -- if (err == EINTR) -- goto restart; -+ if (likely(ioctl(fd, req, arg) == 0)) -+ return 0; -+ } while (1); -+} - -- if (err == EAGAIN) { -- sched_yield(); -- goto restart; -- } -+inline static int do_ioctl(int fd, unsigned long req, void *arg) -+{ -+ if (likely(ioctl(fd, req, arg) == 0)) -+ return 0; - -- return -err; -+ return __do_ioctl(fd, req, arg); - } - - #ifdef DEBUG_MEMORY -@@ -266,6 +286,9 @@ static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) - - assert(bo); - -+ if (!kgem->can_fence && kgem->gen >= 040 && bo->tiling) -+ return; /* lies */ -+ - VG_CLEAR(tiling); - tiling.handle = bo->handle; - tiling.tiling_mode = bo->tiling; -@@ -273,7 +296,7 @@ static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) - assert(tiling.tiling_mode == bo->tiling); - } - --static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo) -+static void assert_caching(struct kgem *kgem, struct kgem_bo *bo) - { - struct local_i915_gem_caching arg; - int expect = kgem->has_llc ? SNOOPED : UNCACHED; -@@ -294,24 +317,117 @@ static void assert_bo_retired(struct kgem_bo *bo) - assert(bo->refcnt); - assert(bo->rq == NULL); - assert(bo->exec == NULL); -+ assert(!bo->needs_flush); - assert(list_is_empty(&bo->request)); - } - #else - #define assert_tiling(kgem, bo) --#define assert_cacheing(kgem, bo) -+#define assert_caching(kgem, bo) - #define assert_bo_retired(bo) - #endif - -+static int __find_debugfs(struct kgem *kgem) -+{ -+ int i; -+ -+ for (i = 0; i < DRM_MAX_MINOR; i++) { -+ char path[80]; -+ -+ sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); -+ if (access(path, R_OK) == 0) -+ return i; -+ -+ sprintf(path, "/debug/dri/%d/i915_wedged", i); -+ if (access(path, R_OK) == 0) -+ return i; -+ } -+ -+ return -1; -+} -+ -+static int kgem_get_minor(struct kgem *kgem) -+{ -+ struct stat st; -+ -+ if (fstat(kgem->fd, &st)) -+ return __find_debugfs(kgem); -+ -+ if (!S_ISCHR(st.st_mode)) -+ return __find_debugfs(kgem); -+ -+ return st.st_rdev & 0x63; -+} -+ -+static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) -+{ -+ int minor = kgem_get_minor(kgem); -+ -+ /* Search for our hang state in a few canonical locations. -+ * In the unlikely event of having multiple devices, we -+ * will need to check which minor actually corresponds to ours. -+ */ -+ -+ snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); -+ if (access(path, R_OK) == 0) -+ return true; -+ -+ snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); -+ if (access(path, R_OK) == 0) -+ return true; -+ -+ snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); -+ if (access(path, R_OK) == 0) -+ return true; -+ -+ path[0] = '\0'; -+ return false; -+} -+ -+static bool has_error_state(struct kgem *kgem, char *path) -+{ -+ bool ret = false; -+ char no; -+ int fd; -+ -+ fd = open(path, O_RDONLY); -+ if (fd >= 0) { -+ ret = read(fd, &no, 1) == 1 && no != 'N'; -+ close(fd); -+ } -+ -+ return ret; -+} -+ -+static int kgem_get_screen_index(struct kgem *kgem) -+{ -+ return __to_sna(kgem)->scrn->scrnIndex; -+} -+ - static void - __kgem_set_wedged(struct kgem *kgem) - { -+ static int once; -+ char path[256]; -+ -+ if (kgem->wedged) -+ return; -+ -+ if (!once && -+ find_hang_state(kgem, path, sizeof(path)) && -+ has_error_state(kgem, path)) { -+ xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, -+ "When reporting this, please include %s and the full dmesg.\n", -+ path); -+ once = 1; -+ } -+ - kgem->wedged = true; -- sna_render_mark_wedged(container_of(kgem, struct sna, kgem)); -+ sna_render_mark_wedged(__to_sna(kgem)); - } - - static void kgem_sna_reset(struct kgem *kgem) - { -- struct sna *sna = container_of(kgem, struct sna, kgem); -+ struct sna *sna = __to_sna(kgem); - - sna->render.reset(sna); - sna->blt_state.fill_bo = 0; -@@ -319,7 +435,7 @@ static void kgem_sna_reset(struct kgem *kgem) - - static void kgem_sna_flush(struct kgem *kgem) - { -- struct sna *sna = container_of(kgem, struct sna, kgem); -+ struct sna *sna = __to_sna(kgem); - - sna->render.flush(sna); - -@@ -327,22 +443,53 @@ static void kgem_sna_flush(struct kgem *kgem) - sna_render_flush_solid(sna); - } - --static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) -+static bool kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) -+{ -+ if (bo->scanout && bo->delta) { -+ DBG(("%s: releasing fb=%d for handle=%d\n", -+ __FUNCTION__, bo->delta, bo->handle)); -+ /* XXX will leak if we are not DRM_MASTER. *shrug* */ -+ do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); -+ bo->delta = 0; -+ return true; -+ } else -+ return false; -+} -+ -+static bool kgem_set_tiling(struct kgem *kgem, struct kgem_bo *bo, -+ int tiling, int stride) - { - struct drm_i915_gem_set_tiling set_tiling; - int err; - -+ if (tiling == bo->tiling) { -+ if (tiling == I915_TILING_NONE) { -+ bo->pitch = stride; -+ return true; -+ } -+ if (stride == bo->pitch) -+ return true; -+ } -+ - if (DBG_NO_TILING) - return false; - - VG_CLEAR(set_tiling); - restart: -- set_tiling.handle = handle; -+ set_tiling.handle = bo->handle; - set_tiling.tiling_mode = tiling; -- set_tiling.stride = stride; -+ set_tiling.stride = tiling ? stride : 0; - -- if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) -- return true; -+ if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) { -+ bo->tiling = set_tiling.tiling_mode; -+ bo->pitch = set_tiling.tiling_mode ? set_tiling.stride : stride; -+ DBG(("%s: handle=%d, tiling=%d [%d], pitch=%d [%d]: %d\n", -+ __FUNCTION__, bo->handle, -+ bo->tiling, tiling, -+ bo->pitch, stride, -+ set_tiling.tiling_mode == tiling)); -+ return set_tiling.tiling_mode == tiling; -+ } - - err = errno; - if (err == EINTR) -@@ -353,6 +500,11 @@ restart: - goto restart; - } - -+ if (err == EBUSY && kgem_bo_rmfb(kgem, bo)) -+ goto restart; -+ -+ ERR(("%s: failed to set-tiling(tiling=%d, pitch=%d) for handle=%d: %d\n", -+ __FUNCTION__, tiling, stride, bo->handle, err)); - return false; - } - -@@ -437,10 +589,15 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) - DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, - bo->handle, bytes(bo))); - -+ if (bo->tiling && !kgem->can_fence) -+ return NULL; -+ - VG_CLEAR(gtt); - retry_gtt: - gtt.handle = bo->handle; - if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, >t))) { -+ DBG(("%s: failed %d, throttling/cleaning caches\n", -+ __FUNCTION__, err)); - assert(err != EINVAL); - - (void)__kgem_throttle_retire(kgem, 0); -@@ -460,6 +617,8 @@ retry_mmap: - kgem->fd, gtt.offset); - if (ptr == MAP_FAILED) { - err = errno; -+ DBG(("%s: failed %d, throttling/cleaning caches\n", -+ __FUNCTION__, err)); - assert(err != EINVAL); - - if (__kgem_throttle_retire(kgem, 0)) -@@ -498,6 +657,8 @@ retry_wc: - wc.size = bytes(bo); - wc.flags = I915_MMAP_WC; - if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) { -+ DBG(("%s: failed %d, throttling/cleaning caches\n", -+ __FUNCTION__, err)); - assert(err != EINVAL); - - if (__kgem_throttle_retire(kgem, 0)) -@@ -519,16 +680,19 @@ retry_wc: - - static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) - { -- struct drm_i915_gem_mmap mmap_arg; -+ struct local_i915_gem_mmap arg; - int err; - -+ VG_CLEAR(arg); -+ arg.offset = 0; -+ - retry: -- VG_CLEAR(mmap_arg); -- mmap_arg.handle = bo->handle; -- mmap_arg.offset = 0; -- mmap_arg.size = bytes(bo); -- if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) { -- assert(err != EINVAL); -+ arg.handle = bo->handle; -+ arg.size = bytes(bo); -+ if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP, &arg))) { -+ DBG(("%s: failed %d, throttling/cleaning caches\n", -+ __FUNCTION__, err)); -+ assert(err != -EINVAL || bo->prime); - - if (__kgem_throttle_retire(kgem, 0)) - goto retry; -@@ -536,15 +700,16 @@ retry: - if (kgem_cleanup_cache(kgem)) - goto retry; - -- ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", -- __FUNCTION__, bo->handle, bytes(bo), -err)); -+ ERR(("%s: failed to mmap handle=%d (prime? %d), %d bytes, into CPU domain: %d\n", -+ __FUNCTION__, bo->handle, bo->prime, bytes(bo), -err)); -+ bo->purged = 1; - return NULL; - } - -- VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); -+ VG(VALGRIND_MAKE_MEM_DEFINED(arg.addr_ptr, bytes(bo))); - - DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); -- return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; -+ return bo->map__cpu = (void *)(uintptr_t)arg.addr_ptr; - } - - static int gem_write(int fd, uint32_t handle, -@@ -634,16 +799,10 @@ static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) - assert(bo->exec == NULL); - assert(list_is_empty(&bo->vma)); - -- if (bo->rq) { -- __kgem_bo_clear_busy(bo); -- kgem_retire(kgem); -- assert_bo_retired(bo); -- } else { -- assert(bo->exec == NULL); -- assert(list_is_empty(&bo->request)); -- assert(!bo->needs_flush); -- ASSERT_IDLE(kgem, bo->handle); -- } -+ if (bo->rq) -+ __kgem_retire_requests_upto(kgem, bo); -+ ASSERT_IDLE(kgem, bo->handle); -+ assert_bo_retired(bo); - } - - static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) -@@ -655,10 +814,8 @@ static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) - assert(list_is_empty(&bo->vma)); - - if (bo->rq) { -- if (!__kgem_busy(kgem, bo->handle)) { -- __kgem_bo_clear_busy(bo); -- kgem_retire(kgem); -- } -+ if (!__kgem_busy(kgem, bo->handle)) -+ __kgem_retire_requests_upto(kgem, bo); - } else { - assert(!bo->needs_flush); - ASSERT_IDLE(kgem, bo->handle); -@@ -694,6 +851,8 @@ retry: - } - - if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) { -+ DBG(("%s: failed %d, throttling/cleaning caches\n", -+ __FUNCTION__, err)); - assert(err != EINVAL); - - (void)__kgem_throttle_retire(kgem, 0); -@@ -728,27 +887,21 @@ static uint32_t gem_create(int fd, int num_pages) - return create.handle; - } - --static bool -+static void - kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) - { --#if DBG_NO_MADV -- return true; --#else -+#if !DBG_NO_MADV - struct drm_i915_gem_madvise madv; - - assert(bo->exec == NULL); -- assert(!bo->purged); - - VG_CLEAR(madv); - madv.handle = bo->handle; - madv.madv = I915_MADV_DONTNEED; - if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { -- bo->purged = 1; -- kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; -- return madv.retained; -+ bo->purged = true; -+ kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU; - } -- -- return true; - #endif - } - -@@ -788,7 +941,7 @@ kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) - madv.madv = I915_MADV_WILLNEED; - if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { - bo->purged = !madv.retained; -- kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; -+ kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU; - return madv.retained; - } - -@@ -869,13 +1022,17 @@ static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) - { - struct kgem_request *rq; - -- rq = __kgem_freed_request; -- if (rq) { -- __kgem_freed_request = *(struct kgem_request **)rq; -+ if (unlikely(kgem->wedged)) { -+ rq = &kgem->static_request; - } else { -- rq = malloc(sizeof(*rq)); -- if (rq == NULL) -- rq = &kgem->static_request; -+ rq = __kgem_freed_request; -+ if (rq) { -+ __kgem_freed_request = *(struct kgem_request **)rq; -+ } else { -+ rq = malloc(sizeof(*rq)); -+ if (rq == NULL) -+ rq = &kgem->static_request; -+ } - } - - list_init(&rq->buffers); -@@ -925,11 +1082,11 @@ total_ram_size(void) - #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM - struct sysinfo info; - if (sysinfo(&info) == 0) -- return info.totalram * info.mem_unit; -+ return (size_t)info.totalram * info.mem_unit; - #endif - - #ifdef _SC_PHYS_PAGES -- return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); -+ return (size_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); - #endif - - return 0; -@@ -1150,6 +1307,10 @@ static bool test_has_wc_mmap(struct kgem *kgem) - if (DBG_NO_WC_MMAP) - return false; - -+ /* XXX See https://bugs.freedesktop.org/show_bug.cgi?id=90841 */ -+ if (kgem->gen < 033) -+ return false; -+ - if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1) - return false; - -@@ -1187,7 +1348,7 @@ static bool test_has_caching(struct kgem *kgem) - - static bool test_has_userptr(struct kgem *kgem) - { -- uint32_t handle; -+ struct local_i915_gem_userptr arg; - void *ptr; - - if (DBG_NO_USERPTR) -@@ -1200,11 +1361,23 @@ static bool test_has_userptr(struct kgem *kgem) - if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) - return false; - -- handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); -- gem_close(kgem->fd, handle); -- free(ptr); -+ VG_CLEAR(arg); -+ arg.user_ptr = (uintptr_t)ptr; -+ arg.user_size = PAGE_SIZE; -+ arg.flags = I915_USERPTR_UNSYNCHRONIZED; - -- return handle != 0; -+ if (DBG_NO_UNSYNCHRONIZED_USERPTR || -+ do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { -+ arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED; -+ if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) -+ arg.handle = 0; -+ /* Leak the userptr bo to keep the mmu_notifier alive */ -+ } else { -+ gem_close(kgem->fd, arg.handle); -+ free(ptr); -+ } -+ -+ return arg.handle != 0; - } - - static bool test_has_create2(struct kgem *kgem) -@@ -1227,67 +1400,187 @@ static bool test_has_create2(struct kgem *kgem) - #endif - } - --static bool test_has_secure_batches(struct kgem *kgem) -+static bool test_can_blt_y(struct kgem *kgem) - { -- if (DBG_NO_SECURE_BATCHES) -+ struct drm_i915_gem_exec_object2 object; -+ uint32_t batch[] = { -+#define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2)) -+#define BCS_SWCTRL 0x22200 -+#define BCS_SRC_Y (1 << 0) -+#define BCS_DST_Y (1 << 1) -+ MI_LOAD_REGISTER_IMM, -+ BCS_SWCTRL, -+ (BCS_SRC_Y | BCS_DST_Y) << 16 | (BCS_SRC_Y | BCS_DST_Y), -+ -+ MI_LOAD_REGISTER_IMM, -+ BCS_SWCTRL, -+ (BCS_SRC_Y | BCS_DST_Y) << 16, -+ -+ MI_BATCH_BUFFER_END, -+ 0, -+ }; -+ int ret; -+ -+ if (DBG_NO_BLT_Y) - return false; - -- return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; -+ if (kgem->gen < 060) -+ return false; -+ -+ memset(&object, 0, sizeof(object)); -+ object.handle = gem_create(kgem->fd, 1); -+ -+ ret = gem_write(kgem->fd, object.handle, 0, sizeof(batch), batch); -+ if (ret == 0) { -+ struct drm_i915_gem_execbuffer2 execbuf; -+ -+ memset(&execbuf, 0, sizeof(execbuf)); -+ execbuf.buffers_ptr = (uintptr_t)&object; -+ execbuf.buffer_count = 1; -+ execbuf.flags = KGEM_BLT; -+ -+ ret = do_ioctl(kgem->fd, -+ DRM_IOCTL_I915_GEM_EXECBUFFER2, -+ &execbuf); -+ } -+ gem_close(kgem->fd, object.handle); -+ -+ return ret == 0; - } - --static bool test_has_pinned_batches(struct kgem *kgem) -+static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) - { -- if (DBG_NO_PINNED_BATCHES) -+ struct drm_i915_gem_set_tiling set_tiling; -+ -+ if (DBG_NO_TILING) - return false; - -- return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; -+ VG_CLEAR(set_tiling); -+ set_tiling.handle = handle; -+ set_tiling.tiling_mode = tiling; -+ set_tiling.stride = stride; -+ -+ if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) -+ return set_tiling.tiling_mode == tiling; -+ -+ return false; - } - --static int kgem_get_screen_index(struct kgem *kgem) -+static bool test_can_scanout_y(struct kgem *kgem) - { -- struct sna *sna = container_of(kgem, struct sna, kgem); -- return sna->scrn->scrnIndex; -+ struct drm_mode_fb_cmd arg; -+ bool ret = false; -+ -+ if (DBG_NO_SCANOUT_Y) -+ return false; -+ -+ VG_CLEAR(arg); -+ arg.width = 32; -+ arg.height = 32; -+ arg.pitch = 4*32; -+ arg.bpp = 32; -+ arg.depth = 24; -+ arg.handle = gem_create(kgem->fd, 1); -+ -+ if (gem_set_tiling(kgem->fd, arg.handle, I915_TILING_Y, arg.pitch)) -+ ret = do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0; -+ if (!ret) { -+ struct local_mode_fb_cmd2 { -+ uint32_t fb_id; -+ uint32_t width, height; -+ uint32_t pixel_format; -+ uint32_t flags; -+ -+ uint32_t handles[4]; -+ uint32_t pitches[4]; -+ uint32_t offsets[4]; -+ uint64_t modifiers[4]; -+ } f; -+#define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2) -+ memset(&f, 0, sizeof(f)); -+ f.width = arg.width; -+ f.height = arg.height; -+ f.handles[0] = arg.handle; -+ f.pitches[0] = arg.pitch; -+ f.modifiers[0] = (uint64_t)1 << 56 | 2; /* MOD_Y_TILED */ -+ f.pixel_format = 'X' | 'R' << 8 | '2' << 16 | '4' << 24; /* XRGB8888 */ -+ f.flags = 1 << 1; /* + modifier */ -+ if (drmIoctl(kgem->fd, LOCAL_IOCTL_MODE_ADDFB2, &f) == 0) { -+ ret = true; -+ arg.fb_id = f.fb_id; -+ } -+ } -+ do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &arg.fb_id); -+ gem_close(kgem->fd, arg.handle); -+ -+ return ret; - } - --static int __find_debugfs(struct kgem *kgem) -+static bool test_has_dirtyfb(struct kgem *kgem) - { -- int i; -+ struct drm_mode_fb_cmd create; -+ bool ret = false; - -- for (i = 0; i < DRM_MAX_MINOR; i++) { -- char path[80]; -+ if (DBG_NO_DIRTYFB) -+ return false; - -- sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); -- if (access(path, R_OK) == 0) -- return i; -+ VG_CLEAR(create); -+ create.width = 32; -+ create.height = 32; -+ create.pitch = 4*32; -+ create.bpp = 32; -+ create.depth = 32; -+ create.handle = gem_create(kgem->fd, 1); -+ if (create.handle == 0) -+ return false; - -- sprintf(path, "/debug/dri/%d/i915_wedged", i); -- if (access(path, R_OK) == 0) -- return i; -+ if (drmIoctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &create) == 0) { -+ struct drm_mode_fb_dirty_cmd dirty; -+ -+ memset(&dirty, 0, sizeof(dirty)); -+ dirty.fb_id = create.fb_id; -+ ret = drmIoctl(kgem->fd, -+ DRM_IOCTL_MODE_DIRTYFB, -+ &dirty) == 0; -+ -+ /* XXX There may be multiple levels of DIRTYFB, depending on -+ * whether the kernel thinks tracking dirty regions is -+ * beneficial vs flagging the whole fb as dirty. -+ */ -+ -+ drmIoctl(kgem->fd, -+ DRM_IOCTL_MODE_RMFB, -+ &create.fb_id); - } -+ gem_close(kgem->fd, create.handle); - -- return -1; -+ return ret; - } - --static int kgem_get_minor(struct kgem *kgem) -+static bool test_has_secure_batches(struct kgem *kgem) - { -- struct stat st; -+ if (DBG_NO_SECURE_BATCHES) -+ return false; - -- if (fstat(kgem->fd, &st)) -- return __find_debugfs(kgem); -+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; -+} - -- if (!S_ISCHR(st.st_mode)) -- return __find_debugfs(kgem); -+static bool test_has_pinned_batches(struct kgem *kgem) -+{ -+ if (DBG_NO_PINNED_BATCHES) -+ return false; - -- return st.st_rdev & 0x63; -+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; - } - - static bool kgem_init_pinned_batches(struct kgem *kgem) - { - int count[2] = { 16, 4 }; - int size[2] = { 1, 4 }; -+ int ret = 0; - int n, i; - -- if (kgem->wedged) -+ if (unlikely(kgem->wedged)) - return true; - - for (n = 0; n < ARRAY_SIZE(count); n++) { -@@ -1311,7 +1604,8 @@ static bool kgem_init_pinned_batches(struct kgem *kgem) - } - - pin.alignment = 0; -- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { -+ ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin); -+ if (ret) { - gem_close(kgem->fd, pin.handle); - free(bo); - goto err; -@@ -1333,6 +1627,16 @@ err: - } - } - -+ /* If we fail to pin some memory for 830gm/845g, we need to disable -+ * acceleration as otherwise the machine will eventually fail. However, -+ * the kernel started arbitrarily rejecting PIN, so hope for the best -+ * if the ioctl no longer works. -+ */ -+ if (ret != -ENODEV && kgem->gen == 020) -+ return false; -+ -+ kgem->has_pinned_batches = false; -+ - /* For simplicity populate the lists with a single unpinned bo */ - for (n = 0; n < ARRAY_SIZE(count); n++) { - struct kgem_bo *bo; -@@ -1340,18 +1644,18 @@ err: - - handle = gem_create(kgem->fd, size[n]); - if (handle == 0) -- break; -+ return false; - - bo = __kgem_bo_alloc(handle, size[n]); - if (bo == NULL) { - gem_close(kgem->fd, handle); -- break; -+ return false; - } - - debug_alloc__bo(kgem, bo); - list_add(&bo->list, &kgem->pinned_batches[n]); - } -- return false; -+ return true; - } - - static void kgem_init_swizzling(struct kgem *kgem) -@@ -1364,7 +1668,7 @@ static void kgem_init_swizzling(struct kgem *kgem) - } tiling; - #define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2) - -- VG_CLEAR(tiling); -+ memset(&tiling, 0, sizeof(tiling)); - tiling.handle = gem_create(kgem->fd, 1); - if (!tiling.handle) - return; -@@ -1375,12 +1679,23 @@ static void kgem_init_swizzling(struct kgem *kgem) - if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling)) - goto out; - -- if (kgem->gen < 50 && tiling.phys_swizzle_mode != tiling.swizzle_mode) -+ DBG(("%s: swizzle_mode=%d, phys_swizzle_mode=%d\n", -+ __FUNCTION__, tiling.swizzle_mode, tiling.phys_swizzle_mode)); -+ -+ kgem->can_fence = -+ !DBG_NO_TILING && -+ tiling.swizzle_mode != I915_BIT_6_SWIZZLE_UNKNOWN; -+ -+ if (kgem->gen < 050 && tiling.phys_swizzle_mode != tiling.swizzle_mode) - goto out; - -- choose_memcpy_tiled_x(kgem, tiling.swizzle_mode); -+ if (!DBG_NO_DETILING) -+ choose_memcpy_tiled_x(kgem, -+ tiling.swizzle_mode, -+ __to_sna(kgem)->cpu_features); - out: - gem_close(kgem->fd, tiling.handle); -+ DBG(("%s: can fence?=%d\n", __FUNCTION__, kgem->can_fence)); - } - - static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) -@@ -1399,6 +1714,7 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) - bo->handle, (long long)bo->presumed_offset)); - for (n = 0; n < kgem->nreloc__self; n++) { - int i = kgem->reloc__self[n]; -+ uint64_t addr; - - assert(kgem->reloc[i].target_handle == ~0U); - kgem->reloc[i].target_handle = bo->target_handle; -@@ -1412,13 +1728,17 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) - - kgem->reloc[i].delta -= shrink; - } -- kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = -- kgem->reloc[i].delta + bo->presumed_offset; -+ addr = (int)kgem->reloc[i].delta + bo->presumed_offset; -+ kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = addr; -+ if (kgem->gen >= 0100) -+ kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t) + 1] = addr >> 32; - } - - if (n == 256) { - for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == ~0U) { -+ uint64_t addr; -+ - kgem->reloc[n].target_handle = bo->target_handle; - kgem->reloc[n].presumed_offset = bo->presumed_offset; - -@@ -1429,8 +1749,11 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) - kgem->reloc[n].delta - shrink)); - kgem->reloc[n].delta -= shrink; - } -- kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = -- kgem->reloc[n].delta + bo->presumed_offset; -+ -+ addr = (int)kgem->reloc[n].delta + bo->presumed_offset; -+ kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = addr; -+ if (kgem->gen >= 0100) -+ kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t) + 1] = addr >> 32; - } - } - } -@@ -1444,6 +1767,44 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) - } - } - -+static int kgem_bo_wait(struct kgem *kgem, struct kgem_bo *bo) -+{ -+ struct local_i915_gem_wait { -+ uint32_t handle; -+ uint32_t flags; -+ int64_t timeout; -+ } wait; -+#define LOCAL_I915_GEM_WAIT 0x2c -+#define LOCAL_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + LOCAL_I915_GEM_WAIT, struct local_i915_gem_wait) -+ int ret; -+ -+ DBG(("%s: waiting for handle=%d\n", __FUNCTION__, bo->handle)); -+ if (bo->rq == NULL) -+ return 0; -+ -+ VG_CLEAR(wait); -+ wait.handle = bo->handle; -+ wait.flags = 0; -+ wait.timeout = -1; -+ ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_WAIT, &wait); -+ if (ret) { -+ struct drm_i915_gem_set_domain set_domain; -+ -+ VG_CLEAR(set_domain); -+ set_domain.handle = bo->handle; -+ set_domain.read_domains = I915_GEM_DOMAIN_GTT; -+ set_domain.write_domain = I915_GEM_DOMAIN_GTT; -+ ret = do_ioctl(kgem->fd, -+ DRM_IOCTL_I915_GEM_SET_DOMAIN, -+ &set_domain); -+ } -+ -+ if (ret == 0) -+ __kgem_retire_requests_upto(kgem, bo); -+ -+ return ret; -+} -+ - static struct kgem_bo *kgem_new_batch(struct kgem *kgem) - { - struct kgem_bo *last; -@@ -1464,20 +1825,41 @@ static struct kgem_bo *kgem_new_batch(struct kgem *kgem) - if (!kgem->has_llc) - flags |= CREATE_UNCACHED; - -+restart: - kgem->batch_bo = kgem_create_linear(kgem, - sizeof(uint32_t)*kgem->batch_size, - flags); - if (kgem->batch_bo) - kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo); - if (kgem->batch == NULL) { -- DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", -- __FUNCTION__, -- sizeof(uint32_t)*kgem->batch_size)); -+ int ring = kgem->ring == KGEM_BLT; -+ assert(ring < ARRAY_SIZE(kgem->requests)); -+ - if (kgem->batch_bo) { - kgem_bo_destroy(kgem, kgem->batch_bo); - kgem->batch_bo = NULL; - } - -+ if (!list_is_empty(&kgem->requests[ring])) { -+ struct kgem_request *rq; -+ -+ rq = list_first_entry(&kgem->requests[ring], -+ struct kgem_request, list); -+ assert(rq->ring == ring); -+ assert(rq->bo); -+ assert(RQ(rq->bo->rq) == rq); -+ if (kgem_bo_wait(kgem, rq->bo) == 0) -+ goto restart; -+ } -+ -+ if (flags & CREATE_NO_THROTTLE) { -+ flags &= ~CREATE_NO_THROTTLE; -+ if (kgem_cleanup_cache(kgem)) -+ goto restart; -+ } -+ -+ DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", -+ __FUNCTION__, sizeof(uint32_t)*kgem->batch_size)); - if (posix_memalign((void **)&kgem->batch, PAGE_SIZE, - ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) { - ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__)); -@@ -1495,18 +1877,79 @@ static struct kgem_bo *kgem_new_batch(struct kgem *kgem) - return last; - } - --void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) -+static void -+no_retire(struct kgem *kgem) -+{ -+ (void)kgem; -+} -+ -+static void -+no_expire(struct kgem *kgem) -+{ -+ (void)kgem; -+} -+ -+static void -+no_context_switch(struct kgem *kgem, int new_mode) -+{ -+ (void)kgem; -+ (void)new_mode; -+} -+ -+static uint64_t get_gtt_size(int fd) - { - struct drm_i915_gem_get_aperture aperture; -+ struct local_i915_gem_context_param { -+ uint32_t context; -+ uint32_t size; -+ uint64_t param; -+#define LOCAL_CONTEXT_PARAM_BAN_PERIOD 0x1 -+#define LOCAL_CONTEXT_PARAM_NO_ZEROMAP 0x2 -+#define LOCAL_CONTEXT_PARAM_GTT_SIZE 0x3 -+ uint64_t value; -+ } p; -+#define LOCAL_I915_GEM_CONTEXT_GETPARAM 0x34 -+#define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param) -+ -+ memset(&aperture, 0, sizeof(aperture)); -+ -+ memset(&p, 0, sizeof(p)); -+ p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE; -+ if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0) -+ aperture.aper_size = p.value; -+ if (aperture.aper_size == 0) -+ (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); -+ if (aperture.aper_size == 0) -+ aperture.aper_size = 64*1024*1024; -+ -+ DBG(("%s: aperture size %lld, available now %lld\n", -+ __FUNCTION__, -+ (long long)aperture.aper_size, -+ (long long)aperture.aper_available_size)); -+ -+ /* clamp aperture to uint32_t for simplicity */ -+ if (aperture.aper_size > 0xc0000000) -+ aperture.aper_size = 0xc0000000; -+ -+ return aperture.aper_size; -+} -+ -+void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) -+{ - size_t totalram; - unsigned half_gpu_max; - unsigned int i, j; -+ uint64_t gtt_size; - - DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); - - kgem->fd = fd; - kgem->gen = gen; - -+ kgem->retire = no_retire; -+ kgem->expire = no_expire; -+ kgem->context_switch = no_context_switch; -+ - list_init(&kgem->requests[0]); - list_init(&kgem->requests[1]); - list_init(&kgem->batch_buffers); -@@ -1586,10 +2029,21 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) - DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, - kgem->can_blt_cpu)); - -+ kgem->can_blt_y = test_can_blt_y(kgem); -+ DBG(("%s: can blit to Y-tiled surfaces? %d\n", __FUNCTION__, -+ kgem->can_blt_y)); -+ - kgem->can_render_y = gen != 021 && (gen >> 3) != 4; - DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__, - kgem->can_render_y)); - -+ kgem->can_scanout_y = test_can_scanout_y(kgem); -+ DBG(("%s: can scanout Y-tiled surfaces? %d\n", __FUNCTION__, -+ kgem->can_scanout_y)); -+ -+ kgem->has_dirtyfb = test_has_dirtyfb(kgem); -+ DBG(("%s: has dirty fb? %d\n", __FUNCTION__, kgem->has_dirtyfb)); -+ - kgem->has_secure_batches = test_has_secure_batches(kgem); - DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, - kgem->has_secure_batches)); -@@ -1620,7 +2074,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) - if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) - kgem->batch_size = 4*1024; - -- if (!kgem_init_pinned_batches(kgem) && gen == 020) { -+ if (!kgem_init_pinned_batches(kgem)) { - xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, - "Unable to reserve memory for GPU, disabling acceleration.\n"); - __kgem_set_wedged(kgem); -@@ -1640,35 +2094,24 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) - !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), - kgem->has_llc, kgem->has_caching, kgem->has_userptr)); - -- VG_CLEAR(aperture); -- aperture.aper_size = 0; -- (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); -- if (aperture.aper_size == 0) -- aperture.aper_size = 64*1024*1024; -- -- DBG(("%s: aperture size %lld, available now %lld\n", -- __FUNCTION__, -- (long long)aperture.aper_size, -- (long long)aperture.aper_available_size)); -- -- kgem->aperture_total = aperture.aper_size; -- kgem->aperture_high = aperture.aper_size * 3/4; -- kgem->aperture_low = aperture.aper_size * 1/3; -+ gtt_size = get_gtt_size(fd); -+ kgem->aperture_total = gtt_size; -+ kgem->aperture_high = gtt_size * 3/4; -+ kgem->aperture_low = gtt_size * 1/3; - if (gen < 033) { - /* Severe alignment penalties */ - kgem->aperture_high /= 2; - kgem->aperture_low /= 2; - } -- DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, -+ DBG(("%s: aperture low=%u [%u], high=%u [%u]\n", __FUNCTION__, - kgem->aperture_low, kgem->aperture_low / (1024*1024), - kgem->aperture_high, kgem->aperture_high / (1024*1024))); - - kgem->aperture_mappable = 256 * 1024 * 1024; - if (dev != NULL) - kgem->aperture_mappable = agp_aperture_size(dev, gen); -- if (kgem->aperture_mappable == 0 || -- kgem->aperture_mappable > aperture.aper_size) -- kgem->aperture_mappable = aperture.aper_size; -+ if (kgem->aperture_mappable == 0 || kgem->aperture_mappable > gtt_size) -+ kgem->aperture_mappable = gtt_size; - DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, - kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); - -@@ -1697,7 +2140,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) - __FUNCTION__)); - totalram = kgem->aperture_total; - } -- DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); -+ DBG(("%s: total ram=%lld\n", __FUNCTION__, (long long)totalram)); - if (kgem->max_object_size > totalram / 2) - kgem->max_object_size = totalram / 2; - if (kgem->max_gpu_size > totalram / 4) -@@ -1749,11 +2192,11 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) - if (DBG_NO_CPU) - kgem->max_cpu_size = 0; - -- DBG(("%s: maximum object size=%d\n", -+ DBG(("%s: maximum object size=%u\n", - __FUNCTION__, kgem->max_object_size)); -- DBG(("%s: large object thresold=%d\n", -+ DBG(("%s: large object thresold=%u\n", - __FUNCTION__, kgem->large_object_size)); -- DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", -+ DBG(("%s: max object sizes (gpu=%u, cpu=%u, tile upload=%u, copy=%u)\n", - __FUNCTION__, - kgem->max_gpu_size, kgem->max_cpu_size, - kgem->max_upload_tile_size, kgem->max_copy_tile_size)); -@@ -2043,8 +2486,34 @@ static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) - kgem->flush |= bo->flush; - } - -+static void kgem_clear_swctrl(struct kgem *kgem) -+{ -+ uint32_t *b; -+ -+ if (kgem->bcs_state == 0) -+ return; -+ -+ DBG(("%s: clearin SWCTRL LRI from %x\n", -+ __FUNCTION__, kgem->bcs_state)); -+ -+ b = kgem->batch + kgem->nbatch; -+ kgem->nbatch += 7; -+ -+ *b++ = MI_FLUSH_DW; -+ *b++ = 0; -+ *b++ = 0; -+ *b++ = 0; -+ -+ *b++ = MI_LOAD_REGISTER_IMM; -+ *b++ = BCS_SWCTRL; -+ *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16; -+ -+ kgem->bcs_state = 0; -+} -+ - static uint32_t kgem_end_batch(struct kgem *kgem) - { -+ kgem_clear_swctrl(kgem); - kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; - if (kgem->nbatch & 1) - kgem->batch[kgem->nbatch++] = MI_NOOP; -@@ -2064,17 +2533,6 @@ static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) - } - } - --static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) --{ -- if (bo->scanout && bo->delta) { -- DBG(("%s: releasing fb=%d for handle=%d\n", -- __FUNCTION__, bo->delta, bo->handle)); -- /* XXX will leak if we are not DRM_MASTER. *shrug* */ -- do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); -- bo->delta = 0; -- } --} -- - static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) - { - DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); -@@ -2150,13 +2608,16 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, - assert(!bo->snoop); - assert(!bo->flush); - assert(!bo->needs_flush); -+ assert(!bo->delta); - assert(list_is_empty(&bo->vma)); - assert_tiling(kgem, bo); -- assert_cacheing(kgem, bo); -+ assert_caching(kgem, bo); - ASSERT_IDLE(kgem, bo->handle); - - if (bucket(bo) >= NUM_CACHE_BUCKETS) { - if (bo->map__gtt) { -+ DBG(("%s: relinquishing large GTT mapping for handle=%d\n", -+ __FUNCTION__, bo->handle)); - munmap(bo->map__gtt, bytes(bo)); - bo->map__gtt = NULL; - } -@@ -2167,6 +2628,8 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, - assert(list_is_empty(&bo->vma)); - list_move(&bo->list, &kgem->inactive[bucket(bo)]); - if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) { -+ DBG(("%s: relinquishing old GTT mapping for handle=%d\n", -+ __FUNCTION__, bo->handle)); - munmap(bo->map__gtt, bytes(bo)); - bo->map__gtt = NULL; - } -@@ -2191,6 +2654,10 @@ static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) - return bo; - - assert(!bo->snoop); -+ assert(!bo->purged); -+ assert(!bo->scanout); -+ assert(!bo->delta); -+ - if (__kgem_freed_bo) { - base = __kgem_freed_bo; - __kgem_freed_bo = *(struct kgem_bo **)base; -@@ -2221,6 +2688,7 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, - list_del(&bo->list); - assert(bo->rq == NULL); - assert(bo->exec == NULL); -+ assert(!bo->purged); - if (!list_is_empty(&bo->vma)) { - assert(bo->map__gtt || bo->map__wc || bo->map__cpu); - list_del(&bo->vma); -@@ -2305,7 +2773,6 @@ static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) - list_move(&bo->list, &kgem->scanout); - - kgem->need_expire = true; -- - } - - static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) -@@ -2316,6 +2783,8 @@ static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) - assert(!bo->needs_flush); - assert(bo->refcnt == 0); - assert(bo->exec == NULL); -+ assert(!bo->purged); -+ assert(!bo->delta); - - if (DBG_NO_SNOOP_CACHE) { - kgem_bo_free(kgem, bo); -@@ -2351,8 +2820,7 @@ static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo) - kgem_bo_move_to_snoop(kgem, bo); - } else if (bo->scanout) { - kgem_bo_move_to_scanout(kgem, bo); -- } else if ((bo = kgem_bo_replace_io(bo))->reusable && -- kgem_bo_set_purgeable(kgem, bo)) { -+ } else if ((bo = kgem_bo_replace_io(bo))->reusable) { - kgem_bo_move_to_inactive(kgem, bo); - retired = true; - } else -@@ -2429,7 +2897,7 @@ void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) - DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", - __FUNCTION__, bo->handle)); - -- assert(bo->exec == &kgem->exec[0]); -+ assert(bo->exec == &_kgem_dummy_exec || bo->exec == &kgem->exec[0]); - assert(kgem->exec[0].handle == bo->handle); - assert(RQ(bo->rq) == kgem->next_request); - -@@ -2457,16 +2925,23 @@ void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b) - - if (a == NULL || b == NULL) - return; -+ assert(a != b); - if (a->exec == NULL || b->exec == NULL) - return; - -- DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n", -- __FUNCTION__, a->handle, b->handle)); -+ DBG(("%s: only handles in batch, discarding last operations for handle=%d (index=%d) and handle=%d (index=%d)\n", -+ __FUNCTION__, -+ a->handle, a->proxy ? -1 : a->exec - kgem->exec, -+ b->handle, b->proxy ? -1 : b->exec - kgem->exec)); - -- assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]); -+ assert(a->exec == &_kgem_dummy_exec || -+ a->exec == &kgem->exec[0] || -+ a->exec == &kgem->exec[1]); - assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle); - assert(RQ(a->rq) == kgem->next_request); -- assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]); -+ assert(b->exec == &_kgem_dummy_exec || -+ b->exec == &kgem->exec[0] || -+ b->exec == &kgem->exec[1]); - assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle); - assert(RQ(b->rq) == kgem->next_request); - -@@ -2487,6 +2962,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) - DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); - - assert(list_is_empty(&bo->list)); -+ assert(list_is_empty(&bo->vma)); - assert(bo->refcnt == 0); - assert(bo->proxy == NULL); - assert(bo->active_scanout == 0); -@@ -2532,7 +3008,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) - assert(bo->snoop == false); - assert(bo->io == false); - assert(bo->scanout == false); -- assert_cacheing(kgem, bo); -+ assert_caching(kgem, bo); - - kgem_bo_undo(kgem, bo); - assert(bo->refcnt == 0); -@@ -2556,9 +3032,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) - assert(list_is_empty(&bo->request)); - - if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) { -- if (!kgem_bo_set_purgeable(kgem, bo)) -- goto destroy; -- - if (!kgem->has_llc && bo->domain == DOMAIN_CPU) - goto destroy; - -@@ -2647,7 +3120,7 @@ static bool kgem_retire__flushing(struct kgem *kgem) - int count = 0; - list_for_each_entry(bo, &kgem->flushing, request) - count++; -- DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count)); -+ DBG(("%s: %d bo on flushing list, retired? %d\n", __FUNCTION__, count, retired)); - } - #endif - -@@ -2656,6 +3129,34 @@ static bool kgem_retire__flushing(struct kgem *kgem) - return retired; - } - -+static bool __kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) -+{ -+ struct drm_i915_gem_busy busy; -+ -+ if (!bo->needs_flush) -+ return false; -+ -+ bo->needs_flush = false; -+ -+ VG_CLEAR(busy); -+ busy.handle = bo->handle; -+ busy.busy = !kgem->wedged; -+ (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); -+ DBG(("%s: handle=%d, busy=%d, wedged=%d\n", -+ __FUNCTION__, bo->handle, busy.busy, kgem->wedged)); -+ -+ if (busy.busy == 0) -+ return false; -+ -+ DBG(("%s: moving %d to flushing\n", -+ __FUNCTION__, bo->handle)); -+ list_add(&bo->request, &kgem->flushing); -+ bo->rq = MAKE_REQUEST(kgem, !!(busy.busy & ~0x1ffff)); -+ bo->needs_flush = busy.busy & 0xffff; -+ kgem->need_retire = true; -+ return true; -+} -+ - static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) - { - bool retired = false; -@@ -2663,6 +3164,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) - DBG(("%s: request %d complete\n", - __FUNCTION__, rq->bo->handle)); - assert(RQ(rq->bo->rq) == rq); -+ assert(rq != (struct kgem_request *)kgem); -+ assert(rq != &kgem->static_request); - - if (rq == kgem->fence[rq->ring]) - kgem->fence[rq->ring] = NULL; -@@ -2680,19 +3183,14 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) - - list_del(&bo->request); - -- if (bo->needs_flush) -- bo->needs_flush = __kgem_busy(kgem, bo->handle); -- if (bo->needs_flush) { -- DBG(("%s: moving %d to flushing\n", -+ if (unlikely(__kgem_bo_flush(kgem, bo))) { -+ assert(bo != rq->bo); -+ DBG(("%s: movied %d to flushing\n", - __FUNCTION__, bo->handle)); -- list_add(&bo->request, &kgem->flushing); -- bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq)); -- kgem->need_retire = true; - continue; - } - - bo->domain = DOMAIN_NONE; -- bo->gtt_dirty = false; - bo->rq = NULL; - if (bo->refcnt) - continue; -@@ -2706,14 +3204,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) - assert(rq->bo->refcnt > 0); - - if (--rq->bo->refcnt == 0) { -- if (kgem_bo_set_purgeable(kgem, rq->bo)) { -- kgem_bo_move_to_inactive(kgem, rq->bo); -- retired = true; -- } else { -- DBG(("%s: closing %d\n", -- __FUNCTION__, rq->bo->handle)); -- kgem_bo_free(kgem, rq->bo); -- } -+ kgem_bo_move_to_inactive(kgem, rq->bo); -+ retired = true; - } - - __kgem_request_free(rq); -@@ -2724,13 +3216,18 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) - { - bool retired = false; - -+ assert(ring < ARRAY_SIZE(kgem->requests)); - while (!list_is_empty(&kgem->requests[ring])) { - struct kgem_request *rq; - -+ DBG(("%s: retiring ring %d\n", __FUNCTION__, ring)); -+ - rq = list_first_entry(&kgem->requests[ring], - struct kgem_request, - list); - assert(rq->ring == ring); -+ assert(rq->bo); -+ assert(RQ(rq->bo->rq) == rq); - if (__kgem_busy(kgem, rq->bo->handle)) - break; - -@@ -2751,8 +3248,8 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) - struct kgem_request, - list)->bo; - -- DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n", -- __FUNCTION__, ring, count, bo ? bo->handle : 0)); -+ DBG(("%s: ring=%d, %d outstanding requests, oldest=%d, retired? %d\n", -+ __FUNCTION__, ring, count, bo ? bo->handle : 0, retired)); - } - #endif - -@@ -2824,6 +3321,8 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) - rq = list_last_entry(&kgem->requests[ring], - struct kgem_request, list); - assert(rq->ring == ring); -+ assert(rq->bo); -+ assert(RQ(rq->bo->rq) == rq); - if (__kgem_busy(kgem, rq->bo->handle)) { - DBG(("%s: last requests handle=%d still busy\n", - __FUNCTION__, rq->bo->handle)); -@@ -2845,23 +3344,30 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) - return true; - } - --void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) -+bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) - { -- struct kgem_request *rq = bo->rq, *tmp; -- struct list *requests = &kgem->requests[RQ_RING(rq) == I915_EXEC_BLT]; -+ struct kgem_request * const rq = RQ(bo->rq), *tmp; -+ struct list *requests = &kgem->requests[rq->ring]; -+ -+ DBG(("%s(handle=%d, ring=%d)\n", __FUNCTION__, bo->handle, rq->ring)); - -- rq = RQ(rq); - assert(rq != &kgem->static_request); - if (rq == (struct kgem_request *)kgem) { - __kgem_bo_clear_busy(bo); -- return; -+ return false; - } - -+ assert(rq->ring < ARRAY_SIZE(kgem->requests)); - do { - tmp = list_first_entry(requests, struct kgem_request, list); - assert(tmp->ring == rq->ring); - __kgem_retire_rq(kgem, tmp); - } while (tmp != rq); -+ -+ assert(bo->needs_flush || bo->rq == NULL); -+ assert(bo->needs_flush || list_is_empty(&bo->request)); -+ assert(bo->needs_flush || bo->domain == DOMAIN_NONE); -+ return bo->rq; - } - - #if 0 -@@ -2932,6 +3438,7 @@ static void kgem_commit(struct kgem *kgem) - bo->binding.offset = 0; - bo->domain = DOMAIN_GPU; - bo->gpu_dirty = false; -+ bo->gtt_dirty = false; - - if (bo->proxy) { - /* proxies are not used for domain tracking */ -@@ -2955,6 +3462,23 @@ static void kgem_commit(struct kgem *kgem) - kgem_throttle(kgem); - } - -+ while (!list_is_empty(&rq->buffers)) { -+ bo = list_first_entry(&rq->buffers, -+ struct kgem_bo, -+ request); -+ -+ assert(RQ(bo->rq) == rq); -+ assert(bo->exec == NULL); -+ assert(bo->domain == DOMAIN_GPU); -+ -+ list_del(&bo->request); -+ bo->domain = DOMAIN_NONE; -+ bo->rq = NULL; -+ -+ if (bo->refcnt == 0) -+ _kgem_bo_destroy(kgem, bo); -+ } -+ - kgem_retire(kgem); - assert(list_is_empty(&rq->buffers)); - -@@ -2964,7 +3488,9 @@ static void kgem_commit(struct kgem *kgem) - gem_close(kgem->fd, rq->bo->handle); - kgem_cleanup_cache(kgem); - } else { -+ assert(rq != (struct kgem_request *)kgem); - assert(rq->ring < ARRAY_SIZE(kgem->requests)); -+ assert(rq->bo); - list_add_tail(&rq->list, &kgem->requests[rq->ring]); - kgem->need_throttle = kgem->need_retire = 1; - -@@ -2988,8 +3514,10 @@ static void kgem_close_inactive(struct kgem *kgem) - { - unsigned int i; - -- for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) -+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { - kgem_close_list(kgem, &kgem->inactive[i]); -+ assert(list_is_empty(&kgem->inactive[i])); -+ } - } - - static void kgem_finish_buffers(struct kgem *kgem) -@@ -3079,10 +3607,13 @@ static void kgem_finish_buffers(struct kgem *kgem) - kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; - for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == bo->base.target_handle) { -+ uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset; -+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr; -+ if (kgem->gen >= 0100) -+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32; -+ - kgem->reloc[n].target_handle = shrink->target_handle; - kgem->reloc[n].presumed_offset = shrink->presumed_offset; -- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = -- kgem->reloc[n].delta + shrink->presumed_offset; - } - } - -@@ -3124,10 +3655,13 @@ static void kgem_finish_buffers(struct kgem *kgem) - kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; - for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == bo->base.target_handle) { -+ uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset; -+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr; -+ if (kgem->gen >= 0100) -+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32; -+ - kgem->reloc[n].target_handle = shrink->target_handle; - kgem->reloc[n].presumed_offset = shrink->presumed_offset; -- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = -- kgem->reloc[n].delta + shrink->presumed_offset; - } - } - -@@ -3195,6 +3729,9 @@ static void kgem_cleanup(struct kgem *kgem) - kgem_bo_free(kgem, bo); - } - -+ if (--rq->bo->refcnt == 0) -+ kgem_bo_free(kgem, rq->bo); -+ - __kgem_request_free(rq); - } - } -@@ -3210,7 +3747,9 @@ kgem_batch_write(struct kgem *kgem, - char *ptr; - int ret; - -- ASSERT_IDLE(kgem, bo->handle); -+ assert(bo->exec == NULL); -+ assert(bo->rq == NULL); -+ assert(!__kgem_busy(kgem, bo->handle)); - - #if DBG_NO_EXEC - { -@@ -3371,55 +3910,54 @@ static int compact_batch_surface(struct kgem *kgem, int *shrink) - return size * sizeof(uint32_t); - } - -+static struct kgem_bo *first_available(struct kgem *kgem, struct list *list) -+{ -+ struct kgem_bo *bo; -+ -+ list_for_each_entry(bo, list, list) { -+ assert(bo->refcnt > 0); -+ -+ if (bo->rq) { -+ assert(RQ(bo->rq)->bo == bo); -+ if (__kgem_busy(kgem, bo->handle)) -+ break; -+ -+ __kgem_retire_rq(kgem, RQ(bo->rq)); -+ assert(bo->rq == NULL); -+ } -+ -+ if (bo->refcnt > 1) -+ continue; -+ -+ list_move_tail(&bo->list, list); -+ return kgem_bo_reference(bo); -+ } -+ -+ return NULL; -+} -+ - static struct kgem_bo * - kgem_create_batch(struct kgem *kgem) - { --#if !DBG_NO_SHRINK_BATCHES -- struct drm_i915_gem_set_domain set_domain; - struct kgem_bo *bo; -- int shrink = 0; -- int size; -+ int size, shrink = 0; - -+#if !DBG_NO_SHRINK_BATCHES - if (kgem->surface != kgem->batch_size) - size = compact_batch_surface(kgem, &shrink); - else - size = kgem->nbatch * sizeof(uint32_t); - - if (size <= 4096) { -- bo = list_first_entry(&kgem->pinned_batches[0], -- struct kgem_bo, -- list); -- if (!bo->rq) { --out_4096: -- assert(bo->refcnt > 0); -- list_move_tail(&bo->list, &kgem->pinned_batches[0]); -- bo = kgem_bo_reference(bo); -+ bo = first_available(kgem, &kgem->pinned_batches[0]); -+ if (bo) - goto write; -- } -- -- if (!__kgem_busy(kgem, bo->handle)) { -- assert(RQ(bo->rq)->bo == bo); -- __kgem_retire_rq(kgem, RQ(bo->rq)); -- goto out_4096; -- } - } - -- if (size <= 16384) { -- bo = list_first_entry(&kgem->pinned_batches[1], -- struct kgem_bo, -- list); -- if (!bo->rq) { --out_16384: -- assert(bo->refcnt > 0); -- list_move_tail(&bo->list, &kgem->pinned_batches[1]); -- bo = kgem_bo_reference(bo); -- goto write; -- } -- -- if (!__kgem_busy(kgem, bo->handle)) { -- __kgem_retire_rq(kgem, RQ(bo->rq)); -- goto out_16384; -- } -+ if (size <= 16384) { -+ bo = first_available(kgem, &kgem->pinned_batches[1]); -+ if (bo) -+ goto write; - } - - if (kgem->gen == 020) { -@@ -3443,16 +3981,8 @@ out_16384: - list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); - - DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); -- -- VG_CLEAR(set_domain); -- set_domain.handle = bo->handle; -- set_domain.read_domains = I915_GEM_DOMAIN_GTT; -- set_domain.write_domain = I915_GEM_DOMAIN_GTT; -- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { -- DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); -- kgem_throttle(kgem); -+ if (kgem_bo_wait(kgem, bo)) - return NULL; -- } - - kgem_retire(kgem); - assert(bo->rq == NULL); -@@ -3460,9 +3990,14 @@ out_16384: - goto write; - } - } -+#else -+ if (kgem->surface != kgem->batch_size) -+ size = kgem->batch_size * sizeof(uint32_t); -+ else -+ size = kgem->nbatch * sizeof(uint32_t); -+#endif - -- bo = NULL; -- if (!kgem->has_llc) { -+ if (!kgem->batch_bo || !kgem->has_llc) { - bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); - if (bo) { - write: -@@ -3471,14 +4006,11 @@ write: - kgem_bo_destroy(kgem, bo); - return NULL; - } -+ return bo; - } - } -- if (bo == NULL) -- bo = kgem_new_batch(kgem); -- return bo; --#else -+ - return kgem_new_batch(kgem); --#endif - } - - #if !NDEBUG -@@ -3530,7 +4062,7 @@ static void dump_fence_regs(struct kgem *kgem) - - static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf) - { -- int ret, err; -+ int ret; - - retry: - ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); -@@ -3547,26 +4079,25 @@ retry: - - /* last gasp */ - ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); -- if (ret == 0) -- return 0; -+ if (ret != -ENOSPC) -+ return ret; -+ -+ /* One final trick up our sleeve for when we run out of space. -+ * We turn everything off to free up our pinned framebuffers, -+ * sprites and cursors, and try just one more time. -+ */ - - xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, - "Failed to submit rendering commands, trying again with outputs disabled.\n"); - -- /* One last trick up our sleeve for when we run out of space. -- * We turn everything off to free up our pinned framebuffers, -- * sprites and cursors, and try one last time. -- */ -- err = errno; -- if (sna_mode_disable(container_of(kgem, struct sna, kgem))) { -+ if (sna_mode_disable(__to_sna(kgem))) { - kgem_cleanup_cache(kgem); - ret = do_ioctl(kgem->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER2, - execbuf); - DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret)); -- sna_mode_enable(container_of(kgem, struct sna, kgem)); -+ sna_mode_enable(__to_sna(kgem)); - } -- errno = err; - - return ret; - } -@@ -3575,6 +4106,7 @@ void _kgem_submit(struct kgem *kgem) - { - struct kgem_request *rq; - uint32_t batch_end; -+ int i, ret; - - assert(!DBG_NO_HW); - assert(!kgem->wedged); -@@ -3609,7 +4141,6 @@ void _kgem_submit(struct kgem *kgem) - rq->bo = kgem_create_batch(kgem); - if (rq->bo) { - struct drm_i915_gem_execbuffer2 execbuf; -- int i, ret; - - assert(!rq->bo->needs_flush); - -@@ -3619,7 +4150,8 @@ void _kgem_submit(struct kgem *kgem) - kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; - kgem->exec[i].alignment = 0; - kgem->exec[i].offset = rq->bo->presumed_offset; -- kgem->exec[i].flags = 0; -+ /* Make sure the kernel releases any fence, ignored if gen4+ */ -+ kgem->exec[i].flags = EXEC_OBJECT_NEEDS_FENCE; - kgem->exec[i].rsvd1 = 0; - kgem->exec[i].rsvd2 = 0; - -@@ -3631,7 +4163,8 @@ void _kgem_submit(struct kgem *kgem) - memset(&execbuf, 0, sizeof(execbuf)); - execbuf.buffers_ptr = (uintptr_t)kgem->exec; - execbuf.buffer_count = kgem->nexec; -- execbuf.batch_len = batch_end*sizeof(uint32_t); -+ if (kgem->gen < 030) -+ execbuf.batch_len = batch_end*sizeof(uint32_t); - execbuf.flags = kgem->ring | kgem->batch_flags; - - if (DBG_DUMP) { -@@ -3645,91 +4178,98 @@ void _kgem_submit(struct kgem *kgem) - } - - ret = do_execbuf(kgem, &execbuf); -- if (DEBUG_SYNC && ret == 0) { -- struct drm_i915_gem_set_domain set_domain; -- -- VG_CLEAR(set_domain); -- set_domain.handle = rq->bo->handle; -- set_domain.read_domains = I915_GEM_DOMAIN_GTT; -- set_domain.write_domain = I915_GEM_DOMAIN_GTT; -+ } else -+ ret = -ENOMEM; - -- ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); -+ if (ret < 0) { -+ kgem_throttle(kgem); -+ if (!kgem->wedged) { -+ xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, -+ "Failed to submit rendering commands (%s), disabling acceleration.\n", -+ strerror(-ret)); -+ __kgem_set_wedged(kgem); - } -- if (ret < 0) { -- kgem_throttle(kgem); -- if (!kgem->wedged) { -- xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, -- "Failed to submit rendering commands, disabling acceleration.\n"); -- __kgem_set_wedged(kgem); -- } - - #if !NDEBUG -- ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", -- kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, -- kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); -+ ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", -+ kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, -+ kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); - -- for (i = 0; i < kgem->nexec; i++) { -- struct kgem_bo *bo, *found = NULL; -+ for (i = 0; i < kgem->nexec; i++) { -+ struct kgem_bo *bo, *found = NULL; - -- list_for_each_entry(bo, &kgem->next_request->buffers, request) { -- if (bo->handle == kgem->exec[i].handle) { -- found = bo; -- break; -- } -+ list_for_each_entry(bo, &kgem->next_request->buffers, request) { -+ if (bo->handle == kgem->exec[i].handle) { -+ found = bo; -+ break; - } -- ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", -- i, -- kgem->exec[i].handle, -- (int)kgem->exec[i].offset, -- found ? kgem_bo_size(found) : -1, -- found ? found->tiling : -1, -- (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), -- found ? found->snoop : -1, -- found ? found->purged : -1); - } -- for (i = 0; i < kgem->nreloc; i++) { -- ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", -- i, -- (int)kgem->reloc[i].offset, -- kgem->reloc[i].target_handle, -- kgem->reloc[i].delta, -- kgem->reloc[i].read_domains, -- kgem->reloc[i].write_domain, -- (int)kgem->reloc[i].presumed_offset); -+ ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", -+ i, -+ kgem->exec[i].handle, -+ (int)kgem->exec[i].offset, -+ found ? kgem_bo_size(found) : -1, -+ found ? found->tiling : -1, -+ (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), -+ found ? found->snoop : -1, -+ found ? found->purged : -1); -+ } -+ for (i = 0; i < kgem->nreloc; i++) { -+ ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", -+ i, -+ (int)kgem->reloc[i].offset, -+ kgem->reloc[i].target_handle, -+ kgem->reloc[i].delta, -+ kgem->reloc[i].read_domains, -+ kgem->reloc[i].write_domain, -+ (int)kgem->reloc[i].presumed_offset); -+ } -+ -+ { -+ struct drm_i915_gem_get_aperture aperture; -+ if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) -+ ErrorF("Aperture size %lld, available %lld\n", -+ (long long)aperture.aper_size, -+ (long long)aperture.aper_available_size); -+ } -+ -+ if (ret == -ENOSPC) -+ dump_gtt_info(kgem); -+ if (ret == -EDEADLK) -+ dump_fence_regs(kgem); -+ -+ if (DEBUG_SYNC) { -+ int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); -+ if (fd != -1) { -+ int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); -+ assert(ignored == batch_end*sizeof(uint32_t)); -+ close(fd); - } - -- { -- struct drm_i915_gem_get_aperture aperture; -- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) -- ErrorF("Aperture size %lld, available %lld\n", -- (long long)aperture.aper_size, -- (long long)aperture.aper_available_size); -- } -+ FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); -+ } -+#endif -+ } else { -+ if (DEBUG_SYNC) { -+ struct drm_i915_gem_set_domain set_domain; - -- if (ret == -ENOSPC) -- dump_gtt_info(kgem); -- if (ret == -EDEADLK) -- dump_fence_regs(kgem); -- -- if (DEBUG_SYNC) { -- int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); -- if (fd != -1) { -- int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); -- assert(ignored == batch_end*sizeof(uint32_t)); -- close(fd); -- } -+ VG_CLEAR(set_domain); -+ set_domain.handle = rq->bo->handle; -+ set_domain.read_domains = I915_GEM_DOMAIN_GTT; -+ set_domain.write_domain = I915_GEM_DOMAIN_GTT; - -- FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); -- } --#endif -+ ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); - } -- } -+ - #if SHOW_BATCH_AFTER -- if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) -- __kgem_batch_debug(kgem, batch_end); -+ if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) -+ __kgem_batch_debug(kgem, batch_end); - #endif -- kgem_commit(kgem); -- if (kgem->wedged) -+ -+ kgem_commit(kgem); -+ } -+ -+ if (unlikely(kgem->wedged)) - kgem_cleanup(kgem); - - kgem_reset(kgem); -@@ -3737,49 +4277,14 @@ void _kgem_submit(struct kgem *kgem) - assert(kgem->next_request != NULL); - } - --static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) --{ -- int minor = kgem_get_minor(kgem); -- -- /* Search for our hang state in a few canonical locations. -- * In the unlikely event of having multiple devices, we -- * will need to check which minor actually corresponds to ours. -- */ -- -- snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); -- if (access(path, R_OK) == 0) -- return true; -- -- snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); -- if (access(path, R_OK) == 0) -- return true; -- -- snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); -- if (access(path, R_OK) == 0) -- return true; -- -- path[0] = '\0'; -- return false; --} -- - void kgem_throttle(struct kgem *kgem) - { -- if (kgem->wedged) -+ if (unlikely(kgem->wedged)) - return; - - if (__kgem_throttle(kgem, true)) { -- static int once; -- char path[128]; -- - xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, - "Detected a hung GPU, disabling acceleration.\n"); -- if (!once && find_hang_state(kgem, path, sizeof(path))) { -- xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, -- "When reporting this, please include %s and the full dmesg.\n", -- path); -- once = 1; -- } -- - __kgem_set_wedged(kgem); - kgem->need_throttle = false; - } -@@ -3860,7 +4365,8 @@ bool kgem_expire_cache(struct kgem *kgem) - bool idle; - unsigned int i; - -- time(&now); -+ if (!time(&now)) -+ return false; - - while (__kgem_freed_bo) { - bo = __kgem_freed_bo; -@@ -3875,7 +4381,7 @@ bool kgem_expire_cache(struct kgem *kgem) - } - - kgem_clean_large_cache(kgem); -- if (container_of(kgem, struct sna, kgem)->scrn->vtSema) -+ if (__to_sna(kgem)->scrn->vtSema) - kgem_clean_scanout_cache(kgem); - - expire = 0; -@@ -3885,6 +4391,7 @@ bool kgem_expire_cache(struct kgem *kgem) - break; - } - -+ assert(now); - bo->delta = now; - } - if (expire) { -@@ -3909,7 +4416,7 @@ bool kgem_expire_cache(struct kgem *kgem) - #endif - - kgem_retire(kgem); -- if (kgem->wedged) -+ if (unlikely(kgem->wedged)) - kgem_cleanup(kgem); - - kgem->expire(kgem); -@@ -3930,6 +4437,8 @@ bool kgem_expire_cache(struct kgem *kgem) - break; - } - -+ assert(now); -+ kgem_bo_set_purgeable(kgem, bo); - bo->delta = now; - } - } -@@ -3960,16 +4469,11 @@ bool kgem_expire_cache(struct kgem *kgem) - count++; - size += bytes(bo); - kgem_bo_free(kgem, bo); -- DBG(("%s: expiring %d\n", -+ DBG(("%s: expiring handle=%d\n", - __FUNCTION__, bo->handle)); - } - } -- if (!list_is_empty(&preserve)) { -- preserve.prev->next = kgem->inactive[i].next; -- kgem->inactive[i].next->prev = preserve.prev; -- kgem->inactive[i].next = preserve.next; -- preserve.next->prev = &kgem->inactive[i]; -- } -+ list_splice_tail(&preserve, &kgem->inactive[i]); - } - - #ifdef DEBUG_MEMORY -@@ -3998,31 +4502,30 @@ bool kgem_cleanup_cache(struct kgem *kgem) - unsigned int i; - int n; - -+ DBG(("%s\n", __FUNCTION__)); -+ - /* sync to the most recent request */ - for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { - if (!list_is_empty(&kgem->requests[n])) { - struct kgem_request *rq; -- struct drm_i915_gem_set_domain set_domain; - -- rq = list_first_entry(&kgem->requests[n], -- struct kgem_request, -- list); -+ rq = list_last_entry(&kgem->requests[n], -+ struct kgem_request, -+ list); - - DBG(("%s: sync on cleanup\n", __FUNCTION__)); -- -- VG_CLEAR(set_domain); -- set_domain.handle = rq->bo->handle; -- set_domain.read_domains = I915_GEM_DOMAIN_GTT; -- set_domain.write_domain = I915_GEM_DOMAIN_GTT; -- (void)do_ioctl(kgem->fd, -- DRM_IOCTL_I915_GEM_SET_DOMAIN, -- &set_domain); -+ assert(rq->ring == n); -+ assert(rq->bo); -+ assert(RQ(rq->bo->rq) == rq); -+ kgem_bo_wait(kgem, rq->bo); - } -+ assert(list_is_empty(&kgem->requests[n])); - } - - kgem_retire(kgem); - kgem_cleanup(kgem); - -+ DBG(("%s: need_expire?=%d\n", __FUNCTION__, kgem->need_expire)); - if (!kgem->need_expire) - return false; - -@@ -4049,6 +4552,8 @@ bool kgem_cleanup_cache(struct kgem *kgem) - - kgem->need_purge = false; - kgem->need_expire = false; -+ -+ DBG(("%s: complete\n", __FUNCTION__)); - return true; - } - -@@ -4079,16 +4584,15 @@ retry_large: - goto discard; - - if (bo->tiling != I915_TILING_NONE) { -- if (use_active) -+ if (use_active && kgem->gen < 040) - goto discard; - -- if (!gem_set_tiling(kgem->fd, bo->handle, -+ if (!kgem_set_tiling(kgem, bo, - I915_TILING_NONE, 0)) - goto discard; -- -- bo->tiling = I915_TILING_NONE; -- bo->pitch = 0; - } -+ assert(bo->tiling == I915_TILING_NONE); -+ bo->pitch = 0; - - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) - goto discard; -@@ -4169,17 +4673,17 @@ discard: - break; - } - -- if (I915_TILING_NONE != bo->tiling && -- !gem_set_tiling(kgem->fd, bo->handle, -- I915_TILING_NONE, 0)) -- continue; -+ if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) { -+ kgem_bo_free(kgem, bo); -+ break; -+ } - - kgem_bo_remove_from_inactive(kgem, bo); - assert(list_is_empty(&bo->vma)); - assert(list_is_empty(&bo->list)); - -- bo->tiling = I915_TILING_NONE; -- bo->pitch = 0; -+ assert(bo->tiling == I915_TILING_NONE); -+ assert(bo->pitch == 0); - bo->delta = 0; - DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", - __FUNCTION__, bo->handle, num_pages(bo))); -@@ -4225,13 +4729,13 @@ discard: - if (first) - continue; - -- if (!gem_set_tiling(kgem->fd, bo->handle, -- I915_TILING_NONE, 0)) -- continue; -- -- bo->tiling = I915_TILING_NONE; -- bo->pitch = 0; -+ if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) { -+ kgem_bo_free(kgem, bo); -+ break; -+ } - } -+ assert(bo->tiling == I915_TILING_NONE); -+ bo->pitch = 0; - - if (bo->map__gtt || bo->map__wc || bo->map__cpu) { - if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { -@@ -4269,7 +4773,7 @@ discard: - kgem_bo_remove_from_inactive(kgem, bo); - - assert(bo->tiling == I915_TILING_NONE); -- bo->pitch = 0; -+ assert(bo->pitch == 0); - bo->delta = 0; - DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", - __FUNCTION__, bo->handle, num_pages(bo), -@@ -4340,9 +4844,9 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) - - bo->unique_id = kgem_get_unique_id(kgem); - bo->tiling = tiling.tiling_mode; -- bo->reusable = false; - bo->prime = true; -- bo->purged = true; /* no coherency guarantees */ -+ bo->reusable = false; -+ kgem_bo_unclean(kgem, bo); - - debug_alloc__bo(kgem, bo); - return bo; -@@ -4448,6 +4952,8 @@ int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo) - #if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC) - struct drm_prime_handle args; - -+ assert(kgem_bo_is_fenced(kgem, bo)); -+ - VG_CLEAR(args); - args.handle = bo->handle; - args.flags = O_CLOEXEC; -@@ -4479,6 +4985,8 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) - if ((flags & CREATE_UNCACHED) == 0) { - bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); - if (bo) { -+ assert(!bo->purged); -+ assert(!bo->delta); - assert(bo->domain != DOMAIN_GPU); - ASSERT_IDLE(kgem, bo->handle); - bo->refcnt = 1; -@@ -4760,8 +5268,7 @@ static void __kgem_bo_make_scanout(struct kgem *kgem, - struct kgem_bo *bo, - int width, int height) - { -- ScrnInfoPtr scrn = -- container_of(kgem, struct sna, kgem)->scrn; -+ ScrnInfoPtr scrn = __to_sna(kgem)->scrn; - struct drm_mode_fb_cmd arg; - - assert(bo->proxy == NULL); -@@ -4809,6 +5316,48 @@ static void __kgem_bo_make_scanout(struct kgem *kgem, - } - } - -+static bool tiling_changed(struct kgem_bo *bo, int tiling, int pitch) -+{ -+ if (tiling != bo->tiling) -+ return true; -+ -+ return tiling != I915_TILING_NONE && pitch != bo->pitch; -+} -+ -+static void set_gpu_tiling(struct kgem *kgem, -+ struct kgem_bo *bo, -+ int tiling, int pitch) -+{ -+ DBG(("%s: handle=%d, tiling=%d, pitch=%d\n", -+ __FUNCTION__, bo->handle, tiling, pitch)); -+ -+ if (tiling_changed(bo, tiling, pitch) && bo->map__gtt) { -+ if (!list_is_empty(&bo->vma)) { -+ list_del(&bo->vma); -+ kgem->vma[0].count--; -+ } -+ munmap(bo->map__gtt, bytes(bo)); -+ bo->map__gtt = NULL; -+ } -+ -+ bo->tiling = tiling; -+ bo->pitch = pitch; -+} -+ -+bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo) -+{ -+ struct drm_i915_gem_get_tiling tiling; -+ -+ assert(kgem); -+ assert(bo); -+ -+ VG_CLEAR(tiling); -+ tiling.handle = bo->handle; -+ tiling.tiling_mode = bo->tiling; -+ (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); -+ return tiling.tiling_mode == bo->tiling; /* assume pitch is fine! */ -+} -+ - struct kgem_bo *kgem_create_2d(struct kgem *kgem, - int width, - int height, -@@ -4892,8 +5441,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, - return last; - } - -- if (container_of(kgem, struct sna, kgem)->scrn->vtSema) { -- ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn; -+ if (__to_sna(kgem)->scrn->vtSema) { -+ ScrnInfoPtr scrn = __to_sna(kgem)->scrn; - - list_for_each_entry_reverse(bo, &kgem->scanout, list) { - struct drm_mode_fb_cmd arg; -@@ -4915,11 +5464,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, - bo->delta = 0; - } - -- if (gem_set_tiling(kgem->fd, bo->handle, -- tiling, pitch)) { -- bo->tiling = tiling; -- bo->pitch = pitch; -- } else { -+ if (!kgem_set_tiling(kgem, bo, -+ tiling, pitch)) { - kgem_bo_free(kgem, bo); - break; - } -@@ -4950,6 +5496,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, - } - } - -+ if (flags & CREATE_CACHED) -+ return NULL; -+ - bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch); - if (bo) - return bo; -@@ -4987,14 +5536,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, - if (num_pages(bo) < size) - continue; - -- if (bo->pitch != pitch || bo->tiling != tiling) { -- if (!gem_set_tiling(kgem->fd, bo->handle, -- tiling, pitch)) -- continue; -- -- bo->pitch = pitch; -- bo->tiling = tiling; -- } -+ if (!kgem_set_tiling(kgem, bo, tiling, pitch) && -+ !exact) -+ set_gpu_tiling(kgem, bo, tiling, pitch); - } - - kgem_bo_remove_from_active(kgem, bo); -@@ -5020,14 +5564,11 @@ large_inactive: - if (size > num_pages(bo)) - continue; - -- if (bo->tiling != tiling || -- (tiling != I915_TILING_NONE && bo->pitch != pitch)) { -- if (!gem_set_tiling(kgem->fd, bo->handle, -- tiling, pitch)) -+ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { -+ if (kgem->gen >= 040 && !exact) -+ set_gpu_tiling(kgem, bo, tiling, pitch); -+ else - continue; -- -- bo->tiling = tiling; -- bo->pitch = pitch; - } - - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { -@@ -5039,7 +5580,6 @@ large_inactive: - - assert(bo->domain != DOMAIN_GPU); - bo->unique_id = kgem_get_unique_id(kgem); -- bo->pitch = pitch; - bo->delta = 0; - DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", - bo->pitch, bo->tiling, bo->handle, bo->unique_id)); -@@ -5088,14 +5628,13 @@ large_inactive: - if (bo->tiling != tiling || - (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (bo->map__gtt || -- !gem_set_tiling(kgem->fd, bo->handle, -- tiling, pitch)) { -+ !kgem_set_tiling(kgem, bo, -+ tiling, pitch)) { - DBG(("inactive GTT vma with wrong tiling: %d < %d\n", - bo->tiling, tiling)); -- continue; -+ kgem_bo_free(kgem, bo); -+ break; - } -- bo->tiling = tiling; -- bo->pitch = pitch; - } - - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { -@@ -5103,8 +5642,11 @@ large_inactive: - break; - } - -+ if (tiling == I915_TILING_NONE) -+ bo->pitch = pitch; -+ - assert(bo->tiling == tiling); -- bo->pitch = pitch; -+ assert(bo->pitch >= pitch); - bo->delta = 0; - bo->unique_id = kgem_get_unique_id(kgem); - -@@ -5170,15 +5712,12 @@ search_active: - if (num_pages(bo) < size) - continue; - -- if (bo->pitch != pitch) { -- if (!gem_set_tiling(kgem->fd, -- bo->handle, -- tiling, pitch)) -- continue; -- -- bo->pitch = pitch; -- } -+ if (!kgem_set_tiling(kgem, bo, tiling, pitch) && -+ !exact) -+ set_gpu_tiling(kgem, bo, tiling, pitch); - } -+ assert(bo->tiling == tiling); -+ assert(bo->pitch >= pitch); - - kgem_bo_remove_from_active(kgem, bo); - -@@ -5233,19 +5772,21 @@ search_active: - if (num_pages(bo) < size) - continue; - -- if (bo->tiling != tiling || -- (tiling != I915_TILING_NONE && bo->pitch != pitch)) { -- if (!gem_set_tiling(kgem->fd, -- bo->handle, -- tiling, pitch)) -- continue; -+ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { -+ if (kgem->gen >= 040 && !exact) { -+ set_gpu_tiling(kgem, bo, -+ tiling, pitch); -+ } else { -+ kgem_bo_free(kgem, bo); -+ break; -+ } - } -+ assert(bo->tiling == tiling); -+ assert(bo->pitch >= pitch); - - kgem_bo_remove_from_active(kgem, bo); - - bo->unique_id = kgem_get_unique_id(kgem); -- bo->pitch = pitch; -- bo->tiling = tiling; - bo->delta = 0; - DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", - bo->pitch, bo->tiling, bo->handle, bo->unique_id)); -@@ -5323,11 +5864,13 @@ search_inactive: - continue; - } - -- if (bo->tiling != tiling || -- (tiling != I915_TILING_NONE && bo->pitch != pitch)) { -- if (!gem_set_tiling(kgem->fd, bo->handle, -- tiling, pitch)) -- continue; -+ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { -+ if (kgem->gen >= 040 && !exact) { -+ set_gpu_tiling(kgem, bo, tiling, pitch); -+ } else { -+ kgem_bo_free(kgem, bo); -+ break; -+ } - } - - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { -@@ -5338,9 +5881,8 @@ search_inactive: - kgem_bo_remove_from_inactive(kgem, bo); - assert(list_is_empty(&bo->list)); - assert(list_is_empty(&bo->vma)); -- -- bo->pitch = pitch; -- bo->tiling = tiling; -+ assert(bo->tiling == tiling); -+ assert(bo->pitch >= pitch); - - bo->delta = 0; - bo->unique_id = kgem_get_unique_id(kgem); -@@ -5388,14 +5930,17 @@ search_inactive: - kgem_bo_remove_from_active(kgem, bo); - __kgem_bo_clear_busy(bo); - -- if (tiling != I915_TILING_NONE && bo->pitch != pitch) { -- if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) { -+ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { -+ if (kgem->gen >= 040 && !exact) { -+ set_gpu_tiling(kgem, bo, tiling, pitch); -+ } else { - kgem_bo_free(kgem, bo); - goto no_retire; - } - } -+ assert(bo->tiling == tiling); -+ assert(bo->pitch >= pitch); - -- bo->pitch = pitch; - bo->unique_id = kgem_get_unique_id(kgem); - bo->delta = 0; - DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", -@@ -5440,18 +5985,21 @@ create: - } - - bo->unique_id = kgem_get_unique_id(kgem); -- if (tiling == I915_TILING_NONE || -- gem_set_tiling(kgem->fd, handle, tiling, pitch)) { -- bo->tiling = tiling; -- bo->pitch = pitch; -+ if (kgem_set_tiling(kgem, bo, tiling, pitch)) { - if (flags & CREATE_SCANOUT) - __kgem_bo_make_scanout(kgem, bo, width, height); - } else { -- if (flags & CREATE_EXACT) { -- DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); -- gem_close(kgem->fd, handle); -- free(bo); -- return NULL; -+ if (kgem->gen >= 040) { -+ assert(!kgem->can_fence); -+ bo->tiling = tiling; -+ bo->pitch = pitch; -+ } else { -+ if (flags & CREATE_EXACT) { -+ DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); -+ gem_close(kgem->fd, handle); -+ free(bo); -+ return NULL; -+ } - } - } - -@@ -5608,7 +6156,7 @@ static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) - - void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) - { -- if (!bo->needs_flush) -+ if (!bo->needs_flush && !bo->gtt_dirty) - return; - - kgem_bo_submit(kgem, bo); -@@ -5621,18 +6169,24 @@ void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) - if (bo->rq) - __kgem_flush(kgem, bo); - -+ if (bo->scanout && kgem->needs_dirtyfb) { -+ struct drm_mode_fb_dirty_cmd cmd; -+ memset(&cmd, 0, sizeof(cmd)); -+ cmd.fb_id = bo->delta; -+ (void)drmIoctl(kgem->fd, DRM_IOCTL_MODE_DIRTYFB, &cmd); -+ } -+ - /* Whatever actually happens, we can regard the GTT write domain - * as being flushed. - */ -- bo->gtt_dirty = false; -- bo->needs_flush = false; -- bo->domain = DOMAIN_NONE; -+ __kgem_bo_clear_dirty(bo); - } - - inline static bool nearly_idle(struct kgem *kgem) - { - int ring = kgem->ring == KGEM_BLT; - -+ assert(ring < ARRAY_SIZE(kgem->requests)); - if (list_is_singular(&kgem->requests[ring])) - return true; - -@@ -5720,7 +6274,7 @@ static inline bool kgem_flush(struct kgem *kgem, bool flush) - if (kgem->nreloc == 0) - return true; - -- if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE) -+ if (__to_sna(kgem)->flags & SNA_POWERSAVE) - return true; - - if (kgem->flush == flush && kgem->aperture < kgem->aperture_low) -@@ -5982,6 +6536,55 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) - return kgem_flush(kgem, flush); - } - -+void __kgem_bcs_set_tiling(struct kgem *kgem, -+ struct kgem_bo *src, -+ struct kgem_bo *dst) -+{ -+ uint32_t state, *b; -+ -+ DBG(("%s: src handle=%d:tiling=%d, dst handle=%d:tiling=%d\n", -+ __FUNCTION__, -+ src ? src->handle : 0, src ? src->tiling : 0, -+ dst ? dst->handle : 0, dst ? dst->tiling : 0)); -+ assert(kgem->mode == KGEM_BLT); -+ assert(dst == NULL || kgem_bo_can_blt(kgem, dst)); -+ assert(src == NULL || kgem_bo_can_blt(kgem, src)); -+ -+ state = 0; -+ if (dst && dst->tiling == I915_TILING_Y) -+ state |= BCS_DST_Y; -+ if (src && src->tiling == I915_TILING_Y) -+ state |= BCS_SRC_Y; -+ -+ if (kgem->bcs_state == state) -+ return; -+ -+ DBG(("%s: updating SWCTRL %x -> %x\n", __FUNCTION__, -+ kgem->bcs_state, state)); -+ -+ /* Over-estimate space in case we need to re-emit the cmd packet */ -+ if (!kgem_check_batch(kgem, 24)) { -+ _kgem_submit(kgem); -+ _kgem_set_mode(kgem, KGEM_BLT); -+ if (state == 0) -+ return; -+ } -+ -+ b = kgem->batch + kgem->nbatch; -+ if (kgem->nbatch) { -+ *b++ = MI_FLUSH_DW; -+ *b++ = 0; -+ *b++ = 0; -+ *b++ = 0; -+ } -+ *b++ = MI_LOAD_REGISTER_IMM; -+ *b++ = BCS_SWCTRL; -+ *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16 | state; -+ kgem->nbatch = b - kgem->batch; -+ -+ kgem->bcs_state = state; -+} -+ - uint32_t kgem_add_reloc(struct kgem *kgem, - uint32_t pos, - struct kgem_bo *bo, -@@ -6195,12 +6798,6 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) - - list_del(&bo->vma); - kgem->vma[type].count--; -- -- if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { -- DBG(("%s: freeing unpurgeable old mapping\n", -- __FUNCTION__)); -- kgem_bo_free(kgem, bo); -- } - } - } - -@@ -6216,8 +6813,8 @@ static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo) - kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); - - if (bo->tiling || !kgem->has_wc_mmap) { -- assert(num_pages(bo) <= kgem->aperture_mappable / 2); - assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); -+ warn_unless(num_pages(bo) <= kgem->aperture_mappable / 2); - - ptr = bo->map__gtt; - if (ptr == NULL) -@@ -6291,6 +6888,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) - DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); - } -+ bo->needs_flush = false; - kgem_bo_retire(kgem, bo); - bo->domain = DOMAIN_GTT; - bo->gtt_dirty = true; -@@ -6319,14 +6917,16 @@ void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) - bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); - - assert(bo->proxy == NULL); -- assert(bo->exec == NULL); - assert(list_is_empty(&bo->list)); - assert_tiling(kgem, bo); - assert(!bo->purged || bo->reusable); - - if (bo->map__wc) - return bo->map__wc; -+ if (!kgem->has_wc_mmap) -+ return NULL; - -+ kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); - return __kgem_bo_map__wc(kgem, bo); - } - -@@ -6373,6 +6973,8 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) - { - struct drm_gem_flink flink; - -+ assert(kgem_bo_is_fenced(kgem, bo)); -+ - VG_CLEAR(flink); - flink.handle = bo->handle; - if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink)) -@@ -6387,7 +6989,6 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) - * party, we track the lifetime accurately. - */ - bo->reusable = false; -- - kgem_bo_unclean(kgem, bo); - - return flink.name; -@@ -6411,16 +7012,34 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem, - first_page = (uintptr_t)ptr; - last_page = first_page + size + PAGE_SIZE - 1; - -- first_page &= ~(PAGE_SIZE-1); -- last_page &= ~(PAGE_SIZE-1); -+ first_page &= ~(uintptr_t)(PAGE_SIZE-1); -+ last_page &= ~(uintptr_t)(PAGE_SIZE-1); - assert(last_page > first_page); - - handle = gem_userptr(kgem->fd, - (void *)first_page, last_page-first_page, - read_only); - if (handle == 0) { -- DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); -- return NULL; -+ if (read_only && kgem->has_wc_mmap) { -+ struct drm_i915_gem_set_domain set_domain; -+ -+ handle = gem_userptr(kgem->fd, -+ (void *)first_page, last_page-first_page, -+ false); -+ -+ VG_CLEAR(set_domain); -+ set_domain.handle = handle; -+ set_domain.read_domains = I915_GEM_DOMAIN_GTT; -+ set_domain.write_domain = 0; -+ if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { -+ gem_close(kgem->fd, handle); -+ handle = 0; -+ } -+ } -+ if (handle == 0) { -+ DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); -+ return NULL; -+ } - } - - bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE); -@@ -6483,8 +7102,10 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) - DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); - } -+ bo->needs_flush = false; - kgem_bo_retire(kgem, bo); - bo->domain = DOMAIN_CPU; -+ bo->gtt_dirty = true; - } - } - -@@ -6505,6 +7126,9 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) - assert(bo->refcnt); - assert(!bo->purged); - -+ if (bo->rq == NULL && (kgem->has_llc || bo->snoop) && !write) -+ return; -+ - if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { - struct drm_i915_gem_set_domain set_domain; - -@@ -6522,9 +7146,11 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) - DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); - } -+ bo->needs_flush = false; - if (write) { - kgem_bo_retire(kgem, bo); - bo->domain = DOMAIN_CPU; -+ bo->gtt_dirty = true; - } else { - if (bo->exec == NULL) - kgem_bo_maybe_retire(kgem, bo); -@@ -6539,6 +7165,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) - assert(bo->refcnt); - assert(bo->proxy == NULL); - assert_tiling(kgem, bo); -+ assert(!bo->snoop); - - kgem_bo_submit(kgem, bo); - -@@ -6559,6 +7186,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) - DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); - } -+ bo->needs_flush = false; - kgem_bo_retire(kgem, bo); - bo->domain = DOMAIN_GTT; - bo->gtt_dirty = true; -@@ -7485,6 +8113,7 @@ kgem_replace_bo(struct kgem *kgem, - } - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(kgem, src, dst); - - br00 = XY_SRC_COPY_BLT_CMD; - br13 = pitch; -@@ -7553,6 +8182,9 @@ bool kgem_bo_convert_to_gpu(struct kgem *kgem, - __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo))); - assert(bo->tiling == I915_TILING_NONE); - -+ if (flags & (__MOVE_PRIME | __MOVE_SCANOUT)) -+ return false; -+ - if (kgem->has_llc) - return true; - -diff --git a/src/sna/kgem.h b/src/sna/kgem.h -index 2267bacf..08b4eb20 100644 ---- a/src/sna/kgem.h -+++ b/src/sna/kgem.h -@@ -42,6 +42,7 @@ struct kgem_bo { - #define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3)) - #define RQ_RING(rq) ((uintptr_t)(rq) & 3) - #define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT) -+#define RQ_IS_RENDER(rq) (RQ_RING(rq) == KGEM_RENDER) - #define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) - - struct drm_i915_gem_exec_object2 *exec; -@@ -103,7 +104,7 @@ struct kgem_request { - struct list list; - struct kgem_bo *bo; - struct list buffers; -- int ring; -+ unsigned ring; - }; - - enum { -@@ -112,6 +113,12 @@ enum { - NUM_MAP_TYPES, - }; - -+typedef void (*memcpy_box_func)(const void *src, void *dst, int bpp, -+ int32_t src_stride, int32_t dst_stride, -+ int16_t src_x, int16_t src_y, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t width, uint16_t height); -+ - struct kgem { - unsigned wedged; - int fd; -@@ -157,6 +164,8 @@ struct kgem { - int16_t count; - } vma[NUM_MAP_TYPES]; - -+ uint32_t bcs_state; -+ - uint32_t batch_flags; - uint32_t batch_flags_base; - #define I915_EXEC_SECURE (1<<9) -@@ -186,9 +195,15 @@ struct kgem { - uint32_t has_no_reloc :1; - uint32_t has_handle_lut :1; - uint32_t has_wc_mmap :1; -+ uint32_t has_dirtyfb :1; - -+ uint32_t can_fence :1; - uint32_t can_blt_cpu :1; -+ uint32_t can_blt_y :1; - uint32_t can_render_y :1; -+ uint32_t can_scanout_y :1; -+ -+ uint32_t needs_dirtyfb :1; - - uint16_t fence_max; - uint16_t half_cpu_cache_pages; -@@ -203,16 +218,9 @@ struct kgem { - void (*retire)(struct kgem *kgem); - void (*expire)(struct kgem *kgem); - -- void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height); -- void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, -- int32_t src_stride, int32_t dst_stride, -- int16_t src_x, int16_t src_y, -- int16_t dst_x, int16_t dst_y, -- uint16_t width, uint16_t height); -+ memcpy_box_func memcpy_to_tiled_x; -+ memcpy_box_func memcpy_from_tiled_x; -+ memcpy_box_func memcpy_between_tiled_x; - - struct kgem_bo *batch_bo; - -@@ -230,7 +238,7 @@ struct kgem { - - #define KGEM_MAX_DEFERRED_VBO 16 - --#define KGEM_BATCH_RESERVED 1 -+#define KGEM_BATCH_RESERVED 8 /* LRI(SWCTRL) + END */ - #define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO) - #define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO) - -@@ -317,6 +325,7 @@ bool kgem_bo_convert_to_gpu(struct kgem *kgem, - struct kgem_bo *bo, - unsigned flags); - -+bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo); - uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); - void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); - -@@ -342,6 +351,11 @@ static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring) - { - ring = ring == KGEM_BLT; - -+ if (kgem->needs_semaphore && -+ !list_is_empty(&kgem->requests[!ring]) && -+ !__kgem_ring_is_idle(kgem, !ring)) -+ return false; -+ - if (list_is_empty(&kgem->requests[ring])) - return true; - -@@ -390,6 +404,7 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); - static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) - { - assert(bo->refcnt); -+ assert(bo->refcnt > bo->active_scanout); - if (--bo->refcnt == 0) - _kgem_bo_destroy(kgem, bo); - } -@@ -400,13 +415,13 @@ static inline void kgem_set_mode(struct kgem *kgem, - enum kgem_mode mode, - struct kgem_bo *bo) - { -- assert(!kgem->wedged); -+ warn_unless(!kgem->wedged); - - #if DEBUG_FLUSH_BATCH - kgem_submit(kgem); - #endif - -- if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { -+ if (kgem->nreloc && bo->rq == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { - DBG(("%s: flushing before new bo\n", __FUNCTION__)); - _kgem_submit(kgem); - } -@@ -422,7 +437,7 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) - { - assert(kgem->mode == KGEM_NONE); - assert(kgem->nbatch == 0); -- assert(!kgem->wedged); -+ warn_unless(!kgem->wedged); - kgem->context_switch(kgem, mode); - kgem->mode = mode; - } -@@ -566,7 +581,7 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, - { - assert(bo->refcnt); - -- if (bo->tiling == I915_TILING_Y) { -+ if (bo->tiling == I915_TILING_Y && !kgem->can_blt_y) { - DBG(("%s: can not blt to handle=%d, tiling=Y\n", - __FUNCTION__, bo->handle)); - return false; -@@ -581,6 +596,22 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, - return kgem_bo_blt_pitch_is_ok(kgem, bo); - } - -+void __kgem_bcs_set_tiling(struct kgem *kgem, -+ struct kgem_bo *src, -+ struct kgem_bo *dst); -+ -+inline static void kgem_bcs_set_tiling(struct kgem *kgem, -+ struct kgem_bo *src, -+ struct kgem_bo *dst) -+{ -+ assert(kgem->mode == KGEM_BLT); -+ -+ if (!kgem->can_blt_y) -+ return; -+ -+ __kgem_bcs_set_tiling(kgem, src, dst); -+} -+ - static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) - { - assert(bo->refcnt); -@@ -607,17 +638,24 @@ static inline void kgem_bo_mark_busy(struct kgem *kgem, struct kgem_bo *bo, int - } - } - --inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) -+static inline void __kgem_bo_clear_dirty(struct kgem_bo *bo) - { - DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); -- bo->rq = NULL; -- list_del(&bo->request); - - bo->domain = DOMAIN_NONE; - bo->needs_flush = false; - bo->gtt_dirty = false; - } - -+inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) -+{ -+ DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); -+ bo->rq = NULL; -+ list_del(&bo->request); -+ -+ __kgem_bo_clear_dirty(bo); -+} -+ - static inline bool kgem_bo_is_busy(struct kgem_bo *bo) - { - DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, -@@ -626,7 +664,7 @@ static inline bool kgem_bo_is_busy(struct kgem_bo *bo) - return bo->rq; - } - --void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo); -+bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo); - static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) - { - DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, -@@ -636,14 +674,13 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) - if (bo->exec) - return true; - -- if (bo->rq && !__kgem_busy(kgem, bo->handle)) { -- __kgem_retire_requests_upto(kgem, bo); -- assert(list_is_empty(&bo->request)); -- assert(bo->rq == NULL); -- assert(bo->domain == DOMAIN_NONE); -- } -+ if (bo->rq == NULL) -+ return false; -+ -+ if (__kgem_busy(kgem, bo->handle)) -+ return true; - -- return kgem_bo_is_busy(bo); -+ return __kgem_retire_requests_upto(kgem, bo); - } - - static inline bool kgem_bo_is_render(struct kgem_bo *bo) -@@ -651,7 +688,15 @@ static inline bool kgem_bo_is_render(struct kgem_bo *bo) - DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__, - bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); - assert(bo->refcnt); -- return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER; -+ return bo->rq && RQ_RING(bo->rq) != KGEM_BLT; -+} -+ -+static inline bool kgem_bo_is_blt(struct kgem_bo *bo) -+{ -+ DBG(("%s: handle=%d, rq? %d\n", __FUNCTION__, -+ bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); -+ assert(bo->refcnt); -+ return RQ_RING(bo->rq) == KGEM_BLT; - } - - static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo) -@@ -852,6 +897,6 @@ memcpy_from_tiled_x(struct kgem *kgem, - width, height); - } - --void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling); -+void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu); - - #endif /* KGEM_H */ -diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c -index 9b80dc88..8e6e47b6 100644 ---- a/src/sna/kgem_debug_gen4.c -+++ b/src/sna/kgem_debug_gen4.c -@@ -598,7 +598,7 @@ int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) - assert(len == 7); - kgem_debug_print(data, offset, 0, - "3DSTATE_DEPTH_BUFFER\n"); -- kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", -+ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", - get_965_surfacetype(data[1] >> 29), - get_965_depthformat((data[1] >> 18) & 0x7), - (data[1] & 0x0001ffff) + 1, -diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c -index 8b55dd91..f1b1275f 100644 ---- a/src/sna/kgem_debug_gen5.c -+++ b/src/sna/kgem_debug_gen5.c -@@ -573,7 +573,7 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) - assert(len == 7); - kgem_debug_print(data, offset, 0, - "3DSTATE_DEPTH_BUFFER\n"); -- kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", -+ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", - get_965_surfacetype(data[1] >> 29), - get_965_depthformat((data[1] >> 18) & 0x7), - (data[1] & 0x0001ffff) + 1, -diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c -index 7ef55d38..579c5d54 100644 ---- a/src/sna/kgem_debug_gen6.c -+++ b/src/sna/kgem_debug_gen6.c -@@ -985,7 +985,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) - assert(len == 7); - kgem_debug_print(data, offset, 0, - "3DSTATE_DEPTH_BUFFER\n"); -- kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", -+ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", - get_965_surfacetype(data[1] >> 29), - get_965_depthformat((data[1] >> 18) & 0x7), - (data[1] & 0x0001ffff) + 1, -diff --git a/src/sna/sna.h b/src/sna/sna.h -index 18425e30..7861110a 100644 ---- a/src/sna/sna.h -+++ b/src/sna/sna.h -@@ -154,6 +154,8 @@ struct sna_pixmap { - #define MAPPED_GTT 1 - #define MAPPED_CPU 2 - uint8_t flush :2; -+#define FLUSH_READ 1 -+#define FLUSH_WRITE 2 - uint8_t shm :1; - uint8_t clear :1; - uint8_t header :1; -@@ -179,18 +181,31 @@ static inline WindowPtr get_root_window(ScreenPtr screen) - #endif - } - -+#if !NDEBUG -+static PixmapPtr check_pixmap(PixmapPtr pixmap) -+{ -+ if (pixmap != NULL) { -+ assert(pixmap->refcnt >= 1); -+ assert(pixmap->devKind != 0xdeadbeef); -+ } -+ return pixmap; -+} -+#else -+#define check_pixmap(p) p -+#endif -+ - static inline PixmapPtr get_window_pixmap(WindowPtr window) - { - assert(window); - assert(window->drawable.type != DRAWABLE_PIXMAP); -- return fbGetWindowPixmap(window); -+ return check_pixmap(fbGetWindowPixmap(window)); - } - - static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable) - { - assert(drawable); - if (drawable->type == DRAWABLE_PIXMAP) -- return (PixmapPtr)drawable; -+ return check_pixmap((PixmapPtr)drawable); - else - return get_window_pixmap((WindowPtr)drawable); - } -@@ -244,11 +259,12 @@ struct sna { - #define SNA_NO_VSYNC 0x40 - #define SNA_TRIPLE_BUFFER 0x80 - #define SNA_TEAR_FREE 0x100 --#define SNA_FORCE_SHADOW 0x200 --#define SNA_FLUSH_GTT 0x400 -+#define SNA_WANT_TEAR_FREE 0x200 -+#define SNA_FORCE_SHADOW 0x400 -+#define SNA_FLUSH_GTT 0x800 - #define SNA_PERFORMANCE 0x1000 - #define SNA_POWERSAVE 0x2000 --#define SNA_REMOVE_OUTPUTS 0x4000 -+#define SNA_NO_DPMS 0x4000 - #define SNA_HAS_FLIP 0x10000 - #define SNA_HAS_ASYNC_FLIP 0x20000 - #define SNA_LINEAR_FB 0x40000 -@@ -265,7 +281,13 @@ struct sna { - #define AVX 0x80 - #define AVX2 0x100 - -- unsigned watch_flush; -+ bool ignore_copy_area : 1; -+ -+ unsigned watch_shm_flush; -+ unsigned watch_dri_flush; -+ unsigned damage_event; -+ bool needs_shm_flush; -+ bool needs_dri_flush; - - struct timeval timer_tv; - uint32_t timer_expire[NUM_TIMERS]; -@@ -284,9 +306,17 @@ struct sna { - struct kgem_bo *shadow; - unsigned front_active; - unsigned shadow_active; -+ unsigned rr_active; - unsigned flip_active; -+ unsigned hidden; -+ bool shadow_enabled; -+ bool shadow_wait; - bool dirty; - -+ struct drm_event_vblank *shadow_events; -+ int shadow_nevent; -+ int shadow_size; -+ - int max_crtc_width, max_crtc_height; - RegionRec shadow_region; - RegionRec shadow_cancel; -@@ -318,7 +348,8 @@ struct sna { - uint32_t fg, bg; - int size; - -- int active; -+ bool disable; -+ bool active; - int last_x; - int last_y; - -@@ -331,8 +362,9 @@ struct sna { - } cursor; - - struct sna_dri2 { -- bool available; -- bool open; -+ bool available : 1; -+ bool enable : 1; -+ bool open : 1; - - #if HAVE_DRI2 - void *flip_pending; -@@ -341,8 +373,11 @@ struct sna { - } dri2; - - struct sna_dri3 { -- bool available; -- bool open; -+ bool available :1; -+ bool override : 1; -+ bool enable : 1; -+ bool open :1; -+ - #if HAVE_DRI3 - SyncScreenCreateFenceFunc create_fence; - struct list pixmaps; -@@ -353,6 +388,9 @@ struct sna { - bool available; - bool open; - #if HAVE_PRESENT -+ struct list vblank_queue; -+ uint64_t unflip; -+ void *freed_info; - #endif - } present; - -@@ -364,8 +402,10 @@ struct sna { - EntityInfoPtr pEnt; - const struct intel_device_info *info; - -+#if !HAVE_NOTIFY_FD - ScreenBlockHandlerProcPtr BlockHandler; - ScreenWakeupHandlerProcPtr WakeupHandler; -+#endif - CloseScreenProcPtr CloseScreen; - - PicturePtr clear; -@@ -383,6 +423,7 @@ struct sna { - struct gen6_render_state gen6; - struct gen7_render_state gen7; - struct gen8_render_state gen8; -+ struct gen9_render_state gen9; - } render_state; - - /* Broken-out options. */ -@@ -420,7 +461,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna); - bool sna_mode_fake_init(struct sna *sna, int num_fake); - bool sna_mode_wants_tear_free(struct sna *sna); - void sna_mode_adjust_frame(struct sna *sna, int x, int y); --extern void sna_mode_discover(struct sna *sna); -+extern void sna_mode_discover(struct sna *sna, bool tell); - extern void sna_mode_check(struct sna *sna); - extern bool sna_mode_disable(struct sna *sna); - extern void sna_mode_enable(struct sna *sna); -@@ -434,6 +475,7 @@ extern void sna_shadow_unset_crtc(struct sna *sna, xf86CrtcPtr crtc); - extern bool sna_pixmap_discard_shadow_damage(struct sna_pixmap *priv, - const RegionRec *region); - extern void sna_mode_set_primary(struct sna *sna); -+extern bool sna_mode_find_hotplug_connector(struct sna *sna, unsigned id); - extern void sna_mode_close(struct sna *sna); - extern void sna_mode_fini(struct sna *sna); - -@@ -444,6 +486,7 @@ extern bool sna_cursors_init(ScreenPtr screen, struct sna *sna); - typedef void (*sna_flip_handler_t)(struct drm_event_vblank *e, - void *data); - -+extern bool sna_needs_page_flip(struct sna *sna, struct kgem_bo *bo); - extern int sna_page_flip(struct sna *sna, - struct kgem_bo *bo, - sna_flip_handler_t handler, -@@ -461,6 +504,11 @@ to_sna_from_screen(ScreenPtr screen) - return to_sna(xf86ScreenToScrn(screen)); - } - -+pure static inline ScreenPtr to_screen_from_sna(struct sna *sna) -+{ -+ return xf86ScrnToScreen(sna->scrn); -+} -+ - pure static inline struct sna * - to_sna_from_pixmap(PixmapPtr pixmap) - { -@@ -498,12 +546,11 @@ to_sna_from_kgem(struct kgem *kgem) - extern xf86CrtcPtr sna_covering_crtc(struct sna *sna, - const BoxRec *box, - xf86CrtcPtr desired); -+extern xf86CrtcPtr sna_primary_crtc(struct sna *sna); - - extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap, - xf86CrtcPtr crtc, const BoxRec *clip); - --xf86CrtcPtr sna_mode_first_crtc(struct sna *sna); -- - const struct ust_msc { - uint64_t msc; - int tv_sec; -@@ -536,6 +583,11 @@ static inline uint64_t ust64(int tv_sec, int tv_usec) - return (uint64_t)tv_sec * 1000000 + tv_usec; - } - -+static inline uint64_t swap_ust(const struct ust_msc *swap) -+{ -+ return ust64(swap->tv_sec, swap->tv_usec); -+} -+ - #if HAVE_DRI2 - bool sna_dri2_open(struct sna *sna, ScreenPtr pScreen); - void sna_dri2_page_flip_handler(struct sna *sna, struct drm_event_vblank *event); -@@ -567,20 +619,59 @@ bool sna_present_open(struct sna *sna, ScreenPtr pScreen); - void sna_present_update(struct sna *sna); - void sna_present_close(struct sna *sna, ScreenPtr pScreen); - void sna_present_vblank_handler(struct drm_event_vblank *event); -+void sna_present_cancel_flip(struct sna *sna); - #else - static inline bool sna_present_open(struct sna *sna, ScreenPtr pScreen) { return false; } - static inline void sna_present_update(struct sna *sna) { } - static inline void sna_present_close(struct sna *sna, ScreenPtr pScreen) { } - static inline void sna_present_vblank_handler(struct drm_event_vblank *event) { } -+static inline void sna_present_cancel_flip(struct sna *sna) { } - #endif - --extern bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, uint32_t rotation); --extern int sna_crtc_to_pipe(xf86CrtcPtr crtc); --extern uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc); --extern uint32_t sna_crtc_id(xf86CrtcPtr crtc); --extern bool sna_crtc_is_on(xf86CrtcPtr crtc); -+extern unsigned sna_crtc_count_sprites(xf86CrtcPtr crtc); -+extern bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, unsigned idx, uint32_t rotation); -+extern uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc, unsigned idx); - extern bool sna_crtc_is_transformed(xf86CrtcPtr crtc); - -+#define CRTC_VBLANK 0x3 -+#define CRTC_ON 0x80000000 -+ -+uint32_t sna_crtc_id(xf86CrtcPtr crtc); -+ -+static inline unsigned long *sna_crtc_flags(xf86CrtcPtr crtc) -+{ -+ unsigned long *flags = crtc->driver_private; -+ assert(flags); -+ return flags; -+} -+ -+static inline unsigned sna_crtc_pipe(xf86CrtcPtr crtc) -+{ -+ return *sna_crtc_flags(crtc) >> 8 & 0xff; -+} -+ -+static inline bool sna_crtc_is_on(xf86CrtcPtr crtc) -+{ -+ return *sna_crtc_flags(crtc) & CRTC_ON; -+} -+ -+static inline void sna_crtc_set_vblank(xf86CrtcPtr crtc) -+{ -+ assert((*sna_crtc_flags(crtc) & CRTC_VBLANK) < 3); -+ ++*sna_crtc_flags(crtc); -+} -+ -+static inline void sna_crtc_clear_vblank(xf86CrtcPtr crtc) -+{ -+ assert(*sna_crtc_flags(crtc) & CRTC_VBLANK); -+ --*sna_crtc_flags(crtc); -+} -+ -+static inline bool sna_crtc_has_vblank(xf86CrtcPtr crtc) -+{ -+ return *sna_crtc_flags(crtc) & CRTC_VBLANK; -+} -+ - CARD32 sna_format_for_depth(int depth); - CARD32 sna_render_format_for_depth(int depth); - -@@ -998,15 +1089,14 @@ static inline uint32_t pixmap_size(PixmapPtr pixmap) - - bool sna_accel_init(ScreenPtr sreen, struct sna *sna); - void sna_accel_create(struct sna *sna); --void sna_accel_block_handler(struct sna *sna, struct timeval **tv); --void sna_accel_wakeup_handler(struct sna *sna); --void sna_accel_watch_flush(struct sna *sna, int enable); -+void sna_accel_block(struct sna *sna, struct timeval **tv); - void sna_accel_flush(struct sna *sna); - void sna_accel_enter(struct sna *sna); - void sna_accel_leave(struct sna *sna); - void sna_accel_close(struct sna *sna); - void sna_accel_free(struct sna *sna); - -+void sna_watch_flush(struct sna *sna, int enable); - void sna_copy_fbcon(struct sna *sna); - - bool sna_composite_create(struct sna *sna); -@@ -1127,6 +1217,16 @@ memcpy_blt(const void *src, void *dst, int bpp, - uint16_t width, uint16_t height); - - void -+affine_blt(const void *src, void *dst, int bpp, -+ int16_t src_x, int16_t src_y, -+ int16_t src_width, int16_t src_height, -+ int32_t src_stride, -+ int16_t dst_x, int16_t dst_y, -+ uint16_t dst_width, uint16_t dst_height, -+ int32_t dst_stride, -+ const struct pixman_f_transform *t); -+ -+void - memmove_box(const void *src, void *dst, - int bpp, int32_t stride, - const BoxRec *box, -@@ -1182,6 +1282,31 @@ box_intersect(BoxPtr a, const BoxRec *b) - return true; - } - -+const BoxRec * -+__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y); -+inline static const BoxRec * -+find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) -+{ -+ /* Special case for incremental trapezoid clipping */ -+ if (begin == end) -+ return end; -+ -+ /* Quick test if scanline is within range of clip boxes */ -+ if (begin->y2 > y) { -+ assert(end == begin + 1 || -+ __find_clip_box_for_y(begin, end, y) == begin); -+ return begin; -+ } -+ if (y >= end[-1].y2) { -+ assert(end == begin + 1 || -+ __find_clip_box_for_y(begin, end, y) == end); -+ return end; -+ } -+ -+ /* Otherwise bisect to find the first box crossing y */ -+ return __find_clip_box_for_y(begin, end, y); -+} -+ - unsigned sna_cpu_detect(void); - char *sna_cpu_features_to_string(unsigned features, char *line); - -@@ -1237,4 +1362,17 @@ static inline void sigtrap_put(void) - extern int getline(char **line, size_t *len, FILE *file); - #endif - -+static inline void add_shm_flush(struct sna *sna, struct sna_pixmap *priv) -+{ -+ if (!priv->shm) -+ return; -+ -+ DBG(("%s: marking handle=%d for SHM flush\n", -+ __FUNCTION__, priv->cpu_bo->handle)); -+ -+ assert(!priv->flush); -+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -+ sna->needs_shm_flush = true; -+} -+ - #endif /* _SNA_H */ -diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c -index baf5f609..25a075cf 100644 ---- a/src/sna/sna_accel.c -+++ b/src/sna/sna_accel.c -@@ -50,8 +50,11 @@ - #endif - #include - -+#include -+ - #include - #include -+#include - #include - - #ifdef HAVE_VALGRIND -@@ -66,7 +69,8 @@ - #define FORCE_FLUSH 0 - #define FORCE_FULL_SYNC 0 /* https://bugs.freedesktop.org/show_bug.cgi?id=61628 */ - --#define DEFAULT_TILING I915_TILING_X -+#define DEFAULT_PIXMAP_TILING I915_TILING_X -+#define DEFAULT_SCANOUT_TILING I915_TILING_X - - #define USE_INPLACE 1 - #define USE_SPANS 0 /* -1 force CPU, 1 force GPU */ -@@ -115,6 +119,11 @@ - #define RECTILINEAR 0x4 - #define OVERWRITES 0x8 - -+#if XFONT2_CLIENT_FUNCS_VERSION >= 1 -+#define AllocateFontPrivateIndex() xfont2_allocate_font_private_index() -+#define FontSetPrivate(font, idx, data) xfont2_font_set_private(font, idx, data) -+#endif -+ - #if 0 - static void __sna_fallback_flush(DrawablePtr d) - { -@@ -213,6 +222,7 @@ static GCOps sna_gc_ops__tmp; - static const GCFuncs sna_gc_funcs; - static const GCFuncs sna_gc_funcs__cpu; - -+static void sna_shm_watch_flush(struct sna *sna, int enable); - static void - sna_poly_fill_rect__gpu(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect); - -@@ -527,10 +537,10 @@ sna_pixmap_alloc_cpu(struct sna *sna, - DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__, - pixmap->drawable.width, pixmap->drawable.height)); - -- hint = 0; -- if ((flags & MOVE_ASYNC_HINT) == 0 && -- ((flags & MOVE_READ) == 0 || (priv->gpu_damage && !priv->clear && !sna->kgem.has_llc))) -- hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE; -+ hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE; -+ if ((flags & MOVE_ASYNC_HINT) || -+ (priv->gpu_damage && !priv->clear && kgem_bo_is_busy(priv->gpu_bo) && sna->kgem.can_blt_cpu)) -+ hint = 0; - - priv->cpu_bo = kgem_create_cpu_2d(&sna->kgem, - pixmap->drawable.width, -@@ -580,7 +590,7 @@ static void __sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) - if (priv->cpu_bo->flush) { - assert(!priv->cpu_bo->reusable); - kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); -- sna_accel_watch_flush(sna, -1); -+ sna_shm_watch_flush(sna, -1); - } - kgem_bo_destroy(&sna->kgem, priv->cpu_bo); - } else if (!IS_STATIC_PTR(priv->ptr)) -@@ -612,9 +622,9 @@ static bool sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv, bool a - - static inline uint32_t default_tiling(struct sna *sna, PixmapPtr pixmap) - { --#if DEFAULT_TILING == I915_TILING_NONE -+#if DEFAULT_PIXMAP_TILING == I915_TILING_NONE - return I915_TILING_NONE; --#elif DEFAULT_TILING == I915_TILING_X -+#elif DEFAULT_PIXMAP_TILING == I915_TILING_X - return I915_TILING_X; - #else - /* Try to avoid hitting the Y-tiling GTT mapping bug on 855GM */ -@@ -630,15 +640,6 @@ static inline uint32_t default_tiling(struct sna *sna, PixmapPtr pixmap) - pixmap->drawable.height > sna->render.max_3d_size)) - return I915_TILING_X; - -- if (sna_damage_is_all(&sna_pixmap(pixmap)->cpu_damage, -- pixmap->drawable.width, -- pixmap->drawable.height)) { -- DBG(("%s: entire source is damaged, using Y-tiling\n", -- __FUNCTION__)); -- sna_damage_destroy(&sna_pixmap(priv)->gpu_damage); -- return I915_TILING_Y; -- } -- - return I915_TILING_Y; - #endif - } -@@ -666,6 +667,7 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) - __FUNCTION__, priv->gpu_bo->tiling, tiling, - pixmap->drawable.width, pixmap->drawable.height)); - assert(priv->gpu_damage == NULL || priv->gpu_bo); -+ assert(priv->gpu_bo->tiling != tiling); - - if (priv->pinned) { - DBG(("%s: can't convert pinned bo\n", __FUNCTION__)); -@@ -690,6 +692,12 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) - return NULL; - } - -+ if (bo->tiling == priv->gpu_bo->tiling) { -+ DBG(("%s: tiling request failed\n", __FUNCTION__)); -+ kgem_bo_destroy(&sna->kgem, bo); -+ return NULL; -+ } -+ - box.x1 = box.y1 = 0; - box.x2 = pixmap->drawable.width; - box.y2 = pixmap->drawable.height; -@@ -824,8 +832,8 @@ create_pixmap(struct sna *sna, ScreenPtr screen, - datasize += adjust; - } - -- DBG(("%s: allocating pixmap %dx%d, depth=%d, size=%ld\n", -- __FUNCTION__, width, height, depth, (long)datasize)); -+ DBG(("%s: allocating pixmap %dx%d, depth=%d/%d, size=%ld\n", -+ __FUNCTION__, width, height, depth, bpp, (long)datasize)); - pixmap = AllocatePixmap(screen, datasize); - if (!pixmap) - return NullPixmap; -@@ -878,7 +886,11 @@ __pop_freed_pixmap(struct sna *sna) - pixmap = sna->freed_pixmap; - sna->freed_pixmap = pixmap->devPrivate.ptr; - -+ DBG(("%s: reusing freed pixmap=%ld header\n", -+ __FUNCTION__, pixmap->drawable.serialNumber)); -+ - assert(pixmap->refcnt == 0); -+ assert(pixmap->devKind = 0xdeadbeef); - assert(sna_pixmap(pixmap)); - assert(sna_pixmap(pixmap)->header); - -@@ -990,7 +1002,7 @@ fallback: - } - priv->cpu_bo->pitch = pitch; - kgem_bo_mark_unreusable(priv->cpu_bo); -- sna_accel_watch_flush(sna, 1); -+ sna_shm_watch_flush(sna, 1); - #ifdef DEBUG_MEMORY - sna->debug_memory.cpu_bo_allocs++; - sna->debug_memory.cpu_bo_bytes += kgem_bo_size(priv->cpu_bo); -@@ -1081,6 +1093,18 @@ sna_pixmap_create_scratch(ScreenPtr screen, - return pixmap; - } - -+static unsigned small_copy(const RegionRec *region) -+{ -+ if ((region->extents.x2 - region->extents.x1)*(region->extents.y2 - region->extents.y1) < 1024) { -+ DBG(("%s: region:%dx%d\n", __FUNCTION__, -+ (region->extents.x2 - region->extents.x1), -+ (region->extents.y2 - region->extents.y1))); -+ return COPY_SMALL; -+ } -+ -+ return 0; -+} -+ - #ifdef CREATE_PIXMAP_USAGE_SHARED - static Bool - sna_share_pixmap_backing(PixmapPtr pixmap, ScreenPtr slave, void **fd_handle) -@@ -1124,7 +1148,7 @@ sna_share_pixmap_backing(PixmapPtr pixmap, ScreenPtr slave, void **fd_handle) - pixmap->drawable.height, - pixmap->drawable.bitsPerPixel, - I915_TILING_NONE, -- CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT); -+ CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT); - if (bo == NULL) { - DBG(("%s: allocation failed\n", __FUNCTION__)); - return FALSE; -@@ -1243,7 +1267,7 @@ sna_create_pixmap_shared(struct sna *sna, ScreenPtr screen, - width, height, - pixmap->drawable.bitsPerPixel, - I915_TILING_NONE, -- CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT); -+ CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT); - if (priv->gpu_bo == NULL) { - free(priv); - FreePixmap(pixmap); -@@ -1311,7 +1335,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, - - if (unlikely((sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0)) - flags &= ~KGEM_CAN_CREATE_GPU; -- if (wedged(sna)) -+ if (wedged(sna) && usage != SNA_CREATE_FB) - flags &= ~KGEM_CAN_CREATE_GTT; - - DBG(("%s: usage=%d, flags=%x\n", __FUNCTION__, usage, flags)); -@@ -1417,10 +1441,13 @@ static void __sna_free_pixmap(struct sna *sna, - __sna_pixmap_free_cpu(sna, priv); - - if (priv->flush) -- sna_accel_watch_flush(sna, -1); -+ sna_watch_flush(sna, -1); - -+#if !NDEBUG -+ pixmap->devKind = 0xdeadbeef; -+#endif - if (priv->header) { -- assert(pixmap->drawable.pScreen == sna->scrn->pScreen); -+ assert(pixmap->drawable.pScreen == to_screen_from_sna(sna)); - assert(!priv->shm); - pixmap->devPrivate.ptr = sna->freed_pixmap; - sna->freed_pixmap = pixmap; -@@ -1485,7 +1512,7 @@ static Bool sna_destroy_pixmap(PixmapPtr pixmap) - if (priv->shm && kgem_bo_is_busy(priv->cpu_bo)) { - DBG(("%s: deferring release of active SHM pixmap=%ld\n", - __FUNCTION__, pixmap->drawable.serialNumber)); -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -+ add_shm_flush(sna, priv); - kgem_bo_submit(&sna->kgem, priv->cpu_bo); /* XXX ShmDetach */ - } else - __sna_free_pixmap(sna, pixmap, priv); -@@ -1529,7 +1556,7 @@ static inline bool has_coherent_ptr(struct sna *sna, struct sna_pixmap *priv, un - if (!priv->cpu_bo) - return true; - -- assert(!priv->cpu_bo->needs_flush); -+ assert(!priv->cpu_bo->needs_flush || (flags & MOVE_WRITE) == 0); - assert(priv->pixmap->devKind == priv->cpu_bo->pitch); - return priv->pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu); - } -@@ -1557,6 +1584,11 @@ static inline bool has_coherent_ptr(struct sna *sna, struct sna_pixmap *priv, un - return true; - } - -+ if (priv->pixmap->devPrivate.ptr == MAP(priv->gpu_bo->map__wc)) { -+ assert(priv->mapped == MAPPED_GTT); -+ return true; -+ } -+ - return false; - } - -@@ -1577,6 +1609,16 @@ static inline bool pixmap_inplace(struct sna *sna, - return false; - - if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) { -+ if (priv->clear) { -+ DBG(("%s: no, clear GPU bo is busy\n", __FUNCTION__)); -+ return false; -+ } -+ -+ if (flags & MOVE_ASYNC_HINT) { -+ DBG(("%s: no, async hint and GPU bo is busy\n", __FUNCTION__)); -+ return false; -+ } -+ - if ((flags & (MOVE_WRITE | MOVE_READ)) == (MOVE_WRITE | MOVE_READ)) { - DBG(("%s: no, GPU bo is busy\n", __FUNCTION__)); - return false; -@@ -1624,7 +1666,7 @@ static bool sna_pixmap_alloc_gpu(struct sna *sna, - if (pixmap->usage_hint == SNA_CREATE_FB && (sna->flags & SNA_LINEAR_FB) == 0) { - flags |= CREATE_SCANOUT; - tiling = kgem_choose_tiling(&sna->kgem, -- -I915_TILING_X, -+ -DEFAULT_SCANOUT_TILING, - pixmap->drawable.width, - pixmap->drawable.height, - pixmap->drawable.bitsPerPixel); -@@ -1861,7 +1903,9 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) - assert(priv->gpu_bo == cow->bo); - assert(cow->refcnt); - -- if (flags && (flags & MOVE_WRITE) == 0 && IS_COW_OWNER(priv->cow)) -+ if (flags && /* flags == 0 => force decouple */ -+ (flags & MOVE_WRITE) == 0 && -+ (((flags & __MOVE_FORCE) == 0) || IS_COW_OWNER(priv->cow))) - return true; - - if (!IS_COW_OWNER(priv->cow)) -@@ -1933,7 +1977,7 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) - box.y2 = pixmap->drawable.height; - - if (flags & __MOVE_PRIME) { -- create = CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT; -+ create = CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT; - tiling = I915_TILING_NONE; - } else { - create = 0; -@@ -2021,6 +2065,10 @@ sna_pixmap_make_cow(struct sna *sna, - cow->bo->handle)); - - src_priv->cow = MAKE_COW_OWNER(cow); -+ if (src_priv->flush & FLUSH_WRITE) { -+ assert(!src_priv->shm); -+ sna_add_flush_pixmap(sna, src_priv, src_priv->gpu_bo); -+ } - } - - if (cow == COW(dst_priv->cow)) { -@@ -2267,6 +2315,7 @@ skip_inplace_map: - (flags & MOVE_WRITE ? (void *)priv->gpu_bo : (void *)priv->gpu_damage) && priv->cpu_damage == NULL && - priv->gpu_bo->tiling == I915_TILING_NONE && - (flags & MOVE_READ || kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE)) && -+ (!priv->clear || !kgem_bo_is_busy(priv->gpu_bo)) && - ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || - (!priv->cow && !priv->move_to_gpu && !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)))) { - void *ptr; -@@ -2330,7 +2379,9 @@ skip_inplace_map: - pixmap->devKind, pixmap->devKind * pixmap->drawable.height)); - - if (priv->cpu_bo) { -+ kgem_bo_undo(&sna->kgem, priv->cpu_bo); - if ((flags & MOVE_ASYNC_HINT || priv->cpu_bo->exec) && -+ sna->kgem.can_blt_cpu && - sna->render.fill_one(sna, - pixmap, priv->cpu_bo, priv->clear_color, - 0, 0, -@@ -2344,21 +2395,26 @@ skip_inplace_map: - assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu)); - } - -- assert(pixmap->devKind); -- if (priv->clear_color == 0 || -- pixmap->drawable.bitsPerPixel == 8 || -- priv->clear_color == (1 << pixmap->drawable.depth) - 1) { -- memset(pixmap->devPrivate.ptr, priv->clear_color, -- (size_t)pixmap->devKind * pixmap->drawable.height); -- } else { -- pixman_fill(pixmap->devPrivate.ptr, -- pixmap->devKind/sizeof(uint32_t), -- pixmap->drawable.bitsPerPixel, -- 0, 0, -- pixmap->drawable.width, -- pixmap->drawable.height, -- priv->clear_color); -- } -+ if (sigtrap_get() == 0) { -+ assert(pixmap->devKind); -+ sigtrap_assert_active(); -+ if (priv->clear_color == 0 || -+ pixmap->drawable.bitsPerPixel == 8 || -+ priv->clear_color == (1 << pixmap->drawable.depth) - 1) { -+ memset(pixmap->devPrivate.ptr, priv->clear_color, -+ (size_t)pixmap->devKind * pixmap->drawable.height); -+ } else { -+ pixman_fill(pixmap->devPrivate.ptr, -+ pixmap->devKind/sizeof(uint32_t), -+ pixmap->drawable.bitsPerPixel, -+ 0, 0, -+ pixmap->drawable.width, -+ pixmap->drawable.height, -+ priv->clear_color); -+ } -+ sigtrap_put(); -+ } else -+ return false; - - clear_done: - sna_damage_all(&priv->cpu_damage, pixmap); -@@ -2414,6 +2470,10 @@ done: - DBG(("%s: discarding idle GPU bo\n", __FUNCTION__)); - sna_pixmap_free_gpu(sna, priv); - } -+ if (priv->flush) { -+ assert(!priv->shm); -+ sna_add_flush_pixmap(sna, priv, priv->gpu_bo); -+ } - priv->source_count = SOURCE_BIAS; - } - -@@ -2531,6 +2591,9 @@ static bool cpu_clear_boxes(struct sna *sna, - { - struct sna_fill_op fill; - -+ if (!sna->kgem.can_blt_cpu) -+ return false; -+ - if (!sna_fill_init_blt(&fill, sna, - pixmap, priv->cpu_bo, - GXcopy, priv->clear_color, -@@ -2659,6 +2722,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, - } - } - sna_damage_add_to_pixmap(&priv->cpu_damage, region, pixmap); -+ if (priv->flush) { -+ assert(!priv->shm); -+ sna_add_flush_pixmap(sna, priv, priv->gpu_bo); -+ } - - if (dx | dy) - RegionTranslate(region, -dx, -dy); -@@ -2904,17 +2971,22 @@ move_to_cpu: - assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu)); - } - -- assert(pixmap->devKind); -- do { -- pixman_fill(pixmap->devPrivate.ptr, -- pixmap->devKind/sizeof(uint32_t), -- pixmap->drawable.bitsPerPixel, -- box->x1, box->y1, -- box->x2 - box->x1, -- box->y2 - box->y1, -- priv->clear_color); -- box++; -- } while (--n); -+ if (sigtrap_get() == 0) { -+ assert(pixmap->devKind); -+ sigtrap_assert_active(); -+ do { -+ pixman_fill(pixmap->devPrivate.ptr, -+ pixmap->devKind/sizeof(uint32_t), -+ pixmap->drawable.bitsPerPixel, -+ box->x1, box->y1, -+ box->x2 - box->x1, -+ box->y2 - box->y1, -+ priv->clear_color); -+ box++; -+ } while (--n); -+ sigtrap_put(); -+ } else -+ return false; - - clear_done: - if (flags & MOVE_WRITE || -@@ -3209,13 +3281,14 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags) - { - struct sna_pixmap *priv; - -+ assert(flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE)); - if ((flags & __MOVE_FORCE) == 0 && wedged(sna)) - return NULL; - - priv = sna_pixmap(pixmap); - if (priv == NULL) { - DBG(("%s: not attached\n", __FUNCTION__)); -- if ((flags & __MOVE_DRI) == 0) -+ if ((flags & (__MOVE_DRI | __MOVE_SCANOUT)) == 0) - return NULL; - - if (pixmap->usage_hint == -1) { -@@ -3238,6 +3311,44 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags) - return priv; - } - -+inline static void sna_pixmap_unclean(struct sna *sna, -+ struct sna_pixmap *priv, -+ unsigned flags) -+{ -+ struct drm_i915_gem_busy busy; -+ -+ assert(DAMAGE_IS_ALL(priv->gpu_damage)); -+ assert(priv->gpu_bo); -+ assert(priv->gpu_bo->proxy == NULL); -+ assert_pixmap_map(priv->pixmap, priv); -+ -+ sna_damage_destroy(&priv->cpu_damage); -+ list_del(&priv->flush_list); -+ -+ if (flags & (__MOVE_DRI | __MOVE_SCANOUT)) -+ return; -+ -+ if (!priv->flush || priv->gpu_bo->exec) -+ return; -+ -+ busy.handle = priv->gpu_bo->handle; -+ busy.busy = 0; -+ ioctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy); -+ -+ DBG(("%s(pixmap=%ld): cleaning foreign bo handle=%u, busy=%x [ring=%d]\n", -+ __FUNCTION__, -+ priv->pixmap->drawable.serialNumber, -+ busy.handle, busy.busy, !!(busy.busy & (0xfffe << 16)))); -+ -+ if (busy.busy) { -+ unsigned mode = KGEM_RENDER; -+ if (busy.busy & (0xfffe << 16)) -+ mode = KGEM_BLT; -+ kgem_bo_mark_busy(&sna->kgem, priv->gpu_bo, mode); -+ } else -+ __kgem_bo_clear_busy(priv->gpu_bo); -+} -+ - struct sna_pixmap * - sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags) - { -@@ -3287,12 +3398,14 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl - if (priv->cow) { - unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE); - -+ assert(cow); -+ - if ((flags & MOVE_READ) == 0) { - if (priv->gpu_damage) { - r.extents = *box; - r.data = NULL; - if (!region_subsumes_damage(&r, priv->gpu_damage)) -- cow |= MOVE_READ; -+ cow |= MOVE_READ | __MOVE_FORCE; - } - } else { - if (priv->cpu_damage) { -@@ -3303,22 +3416,18 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl - } - } - -- if (cow) { -- if (!sna_pixmap_undo_cow(sna, priv, cow)) -- return NULL; -+ if (!sna_pixmap_undo_cow(sna, priv, cow)) -+ return NULL; - -- if (priv->gpu_bo == NULL) -- sna_damage_destroy(&priv->gpu_damage); -- } -+ if (priv->gpu_bo == NULL) -+ sna_damage_destroy(&priv->gpu_damage); - } - - if (sna_damage_is_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height)) { -- assert(priv->gpu_bo); -- assert(priv->gpu_bo->proxy == NULL); -- sna_damage_destroy(&priv->cpu_damage); -- list_del(&priv->flush_list); -+ DBG(("%s: already all-damaged\n", __FUNCTION__)); -+ sna_pixmap_unclean(sna, priv, flags); - goto done; - } - -@@ -3360,10 +3469,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl - return priv; - } - -- if (priv->shm) { -- assert(!priv->flush); -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -- } -+ add_shm_flush(sna, priv); - - assert(priv->cpu_damage); - region_set(&r, box); -@@ -3527,7 +3633,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, - } - - if (priv->cow) { -- unsigned cow = MOVE_WRITE | MOVE_READ; -+ unsigned cow = MOVE_WRITE | MOVE_READ | __MOVE_FORCE; -+ assert(cow); - - if (flags & IGNORE_DAMAGE) { - if (priv->gpu_damage) { -@@ -3717,8 +3824,11 @@ create_gpu_bo: - else - move = MOVE_WRITE | MOVE_READ | MOVE_ASYNC_HINT; - -- if (sna_pixmap_move_to_gpu(pixmap, move)) -+ if (sna_pixmap_move_to_gpu(pixmap, move)) { -+ sna_damage_all(&priv->gpu_damage, -+ pixmap); - goto use_gpu_bo; -+ } - } - - if (DAMAGE_IS_ALL(priv->gpu_damage) || -@@ -3934,26 +4044,28 @@ prefer_gpu_bo: - goto move_to_gpu; - } - -- if ((priv->cpu_damage == NULL || flags & IGNORE_DAMAGE)) { -- if (priv->gpu_bo && priv->gpu_bo->tiling) { -- DBG(("%s: prefer to use GPU bo for rendering large pixmaps\n", __FUNCTION__)); -- goto prefer_gpu_bo; -+ if (!priv->shm) { -+ if ((priv->cpu_damage == NULL || flags & IGNORE_DAMAGE)) { -+ if (priv->gpu_bo && priv->gpu_bo->tiling) { -+ DBG(("%s: prefer to use GPU bo for rendering large pixmaps\n", __FUNCTION__)); -+ goto prefer_gpu_bo; -+ } -+ -+ if (priv->cpu_bo->pitch >= 4096) { -+ DBG(("%s: prefer to use GPU bo for rendering wide pixmaps\n", __FUNCTION__)); -+ goto prefer_gpu_bo; -+ } - } - -- if (priv->cpu_bo->pitch >= 4096) { -- DBG(("%s: prefer to use GPU bo for rendering wide pixmaps\n", __FUNCTION__)); -+ if ((flags & IGNORE_DAMAGE) == 0 && priv->cpu_bo->snoop) { -+ DBG(("%s: prefer to use GPU bo for reading from snooped target bo\n", __FUNCTION__)); - goto prefer_gpu_bo; - } -- } -- -- if ((flags & IGNORE_DAMAGE) == 0 && priv->cpu_bo->snoop) { -- DBG(("%s: prefer to use GPU bo for reading from snooped target bo\n", __FUNCTION__)); -- goto prefer_gpu_bo; -- } - -- if (!sna->kgem.can_blt_cpu) { -- DBG(("%s: can't render to CPU bo, try to use GPU bo\n", __FUNCTION__)); -- goto prefer_gpu_bo; -+ if (!sna->kgem.can_blt_cpu) { -+ DBG(("%s: can't render to CPU bo, try to use GPU bo\n", __FUNCTION__)); -+ goto prefer_gpu_bo; -+ } - } - } - -@@ -3967,9 +4079,7 @@ prefer_gpu_bo: - } - - if (priv->shm) { -- assert(!priv->flush); -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -- -+ add_shm_flush(sna, priv); - /* As we may have flushed and retired,, recheck for busy bo */ - if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo)) - return NULL; -@@ -4019,7 +4129,7 @@ sna_pixmap_create_upload(ScreenPtr screen, - assert(width); - assert(height); - -- if (depth == 1) -+ if (depth < 8) - return create_pixmap(sna, screen, width, height, depth, - CREATE_PIXMAP_USAGE_SCRATCH); - -@@ -4121,27 +4231,21 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) - - if (priv->cow) { - unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE); -+ assert(cow); - if (flags & MOVE_READ && priv->cpu_damage) - cow |= MOVE_WRITE; -- if (cow) { -- if (!sna_pixmap_undo_cow(sna, priv, cow)) -- return NULL; -+ if (!sna_pixmap_undo_cow(sna, priv, cow)) -+ return NULL; - -- if (priv->gpu_bo == NULL) -- sna_damage_destroy(&priv->gpu_damage); -- } -+ if (priv->gpu_bo == NULL) -+ sna_damage_destroy(&priv->gpu_damage); - } - - if (sna_damage_is_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height)) { - DBG(("%s: already all-damaged\n", __FUNCTION__)); -- assert(DAMAGE_IS_ALL(priv->gpu_damage)); -- assert(priv->gpu_bo); -- assert(priv->gpu_bo->proxy == NULL); -- assert_pixmap_map(pixmap, priv); -- sna_damage_destroy(&priv->cpu_damage); -- list_del(&priv->flush_list); -+ sna_pixmap_unclean(sna, priv, flags); - goto active; - } - -@@ -4206,7 +4310,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) - if (flags & MOVE_INPLACE_HINT || (priv->cpu_damage && priv->cpu_bo == NULL)) - create = CREATE_GTT_MAP | CREATE_INACTIVE; - if (flags & __MOVE_PRIME) -- create |= CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT; -+ create |= CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT; - - sna_pixmap_alloc_gpu(sna, pixmap, priv, create); - } -@@ -4282,10 +4386,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) - goto done; - } - -- if (priv->shm) { -- assert(!priv->flush); -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -- } -+ add_shm_flush(sna, priv); - - n = sna_damage_get_boxes(priv->cpu_damage, &box); - assert(n); -@@ -4534,7 +4635,7 @@ static inline bool box32_trim_and_translate(Box32Rec *box, DrawablePtr d, GCPtr - return box32_clip(box, gc); - } - --static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) -+static inline void box_add_xy(BoxPtr box, int16_t x, int16_t y) - { - if (box->x1 > x) - box->x1 = x; -@@ -4547,6 +4648,11 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) - box->y2 = y; - } - -+static inline void box_add_pt(BoxPtr box, const DDXPointRec *pt) -+{ -+ box_add_xy(box, pt->x, pt->y); -+} -+ - static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16) - { - b16->x1 = b32->x1; -@@ -4864,6 +4970,7 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region, - pixmap->devPrivate.ptr = dst; - pixmap->devKind = priv->gpu_bo->pitch; - priv->mapped = dst == MAP(priv->gpu_bo->map__cpu) ? MAPPED_CPU : MAPPED_GTT; -+ priv->cpu &= priv->mapped == MAPPED_CPU; - assert(has_coherent_ptr(sna, priv, MOVE_WRITE)); - - box = region_rects(region); -@@ -4923,8 +5030,7 @@ done: - sna_damage_all(&priv->gpu_damage, pixmap); - } - -- if (priv->shm) -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -+ add_shm_flush(sna, priv); - } - - assert(!priv->clear); -@@ -5172,6 +5278,16 @@ static inline uint8_t blt_depth(int depth) - } - } - -+inline static void blt_done(struct sna *sna) -+{ -+ sna->blt_state.fill_bo = 0; -+ if (sna->kgem.nbatch && __kgem_ring_empty(&sna->kgem)) { -+ DBG(("%s: flushing BLT operation on empty ring\n", -+ __FUNCTION__)); -+ _kgem_submit(&sna->kgem); -+ } -+} -+ - static bool - sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - int x, int y, int w, int h, char *bits) -@@ -5217,6 +5333,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - - kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - assert(kgem_bo_can_blt(&sna->kgem, bo)); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - /* Region is pre-clipped and translated into pixmap space */ - box = region_rects(region); -@@ -5238,6 +5355,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -5331,7 +5449,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - box++; - } while (--n); - -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -5381,6 +5499,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - - kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - assert(kgem_bo_can_blt(&sna->kgem, bo)); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - skip = h * BitmapBytePad(w + left); - for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) { -@@ -5408,6 +5527,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -5509,7 +5629,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - } while (--n); - } - -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -5837,7 +5957,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, - if (!sna->render.copy_boxes(sna, alu, - &pixmap->drawable, priv->gpu_bo, sx, sy, - &pixmap->drawable, priv->gpu_bo, tx, ty, -- box, n, 0)) { -+ box, n, small_copy(region))) { - DBG(("%s: fallback - accelerated copy boxes failed\n", - __FUNCTION__)); - goto fallback; -@@ -6098,6 +6218,9 @@ sna_copy_boxes__inplace(struct sna *sna, RegionPtr region, int alu, - - kgem_bo_sync__cpu_full(&sna->kgem, src_priv->gpu_bo, FORCE_FULL_SYNC); - -+ if (sigtrap_get()) -+ return false; -+ - box = region_rects(region); - n = region_num_rects(region); - if (src_priv->gpu_bo->tiling) { -@@ -6137,6 +6260,8 @@ sna_copy_boxes__inplace(struct sna *sna, RegionPtr region, int alu, - } - } - -+ sigtrap_put(); -+ - return true; - - upload_inplace: -@@ -6234,6 +6359,9 @@ upload_inplace: - - assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); - -+ if (sigtrap_get()) -+ return false; -+ - box = region_rects(region); - n = region_num_rects(region); - if (dst_priv->gpu_bo->tiling) { -@@ -6265,15 +6393,19 @@ upload_inplace: - } while (--n); - - if (!dst_priv->shm) { -- assert(ptr == MAP(dst_priv->gpu_bo->map__cpu)); - dst_pixmap->devPrivate.ptr = ptr; - dst_pixmap->devKind = dst_priv->gpu_bo->pitch; -- dst_priv->mapped = MAPPED_CPU; -+ if (ptr == MAP(dst_priv->gpu_bo->map__cpu)) { -+ dst_priv->mapped = MAPPED_CPU; -+ dst_priv->cpu = true; -+ } else -+ dst_priv->mapped = MAPPED_GTT; - assert_pixmap_map(dst_pixmap, dst_priv); -- dst_priv->cpu = true; - } - } - -+ sigtrap_put(); -+ - return true; - } - -@@ -6326,6 +6458,16 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, - - assert(region_num_rects(region)); - -+ if (src_priv && -+ src_priv->gpu_bo == NULL && -+ src_priv->cpu_bo == NULL && -+ src_priv->ptr == NULL) { -+ /* Rare but still happens, nothing to copy */ -+ DBG(("%s: src pixmap=%ld is empty\n", -+ __FUNCTION__, src_pixmap->drawable.serialNumber)); -+ return; -+ } -+ - if (src_pixmap == dst_pixmap) - return sna_self_copy_boxes(src, dst, gc, - region, dx, dy, -@@ -6491,15 +6633,14 @@ discard_cow: - sna_damage_all(&dst_priv->gpu_damage, dst_pixmap); - sna_damage_destroy(&dst_priv->cpu_damage); - list_del(&dst_priv->flush_list); -- if (dst_priv->shm) -- sna_add_flush_pixmap(sna, dst_priv, dst_priv->cpu_bo); -+ add_shm_flush(sna, dst_priv); - return; - } - } - if (!sna->render.copy_boxes(sna, alu, - &src_pixmap->drawable, src_priv->gpu_bo, src_dx, src_dy, - &dst_pixmap->drawable, bo, 0, 0, -- box, n, 0)) { -+ box, n, small_copy(region))) { - DBG(("%s: fallback - accelerated copy boxes failed\n", - __FUNCTION__)); - goto fallback; -@@ -6536,7 +6677,7 @@ discard_cow: - if (!sna->render.copy_boxes(sna, alu, - &src_pixmap->drawable, src_priv->gpu_bo, src_dx, src_dy, - &dst_pixmap->drawable, bo, 0, 0, -- box, n, 0)) { -+ box, n, small_copy(region))) { - DBG(("%s: fallback - accelerated copy boxes failed\n", - __FUNCTION__)); - goto fallback; -@@ -6571,15 +6712,12 @@ discard_cow: - if (replaces && UNDO) - kgem_bo_pair_undo(&sna->kgem, dst_priv->gpu_bo, dst_priv->cpu_bo); - -- if (src_priv->shm) { -- assert(!src_priv->flush); -- sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo); -- } -+ add_shm_flush(sna, src_priv); - - if (!sna->render.copy_boxes(sna, alu, - &src_pixmap->drawable, src_priv->cpu_bo, src_dx, src_dy, - &dst_pixmap->drawable, bo, 0, 0, -- box, n, src_priv->shm ? COPY_LAST : 0)) { -+ box, n, small_copy(region) | (src_priv->shm ? COPY_LAST : 0))) { - DBG(("%s: fallback - accelerated copy boxes failed\n", - __FUNCTION__)); - goto fallback; -@@ -6631,8 +6769,7 @@ discard_cow: - ok = sna->render.copy_boxes(sna, alu, - &src_pixmap->drawable, src_bo, src_dx, src_dy, - &dst_pixmap->drawable, bo, 0, 0, -- box, n, COPY_LAST); -- -+ box, n, small_copy(region) | COPY_LAST); - kgem_bo_sync__cpu(&sna->kgem, src_bo); - assert(src_bo->rq == NULL); - kgem_bo_destroy(&sna->kgem, src_bo); -@@ -6780,18 +6917,22 @@ fallback: - return; - } - -- assert(dst_pixmap->devPrivate.ptr); -- assert(dst_pixmap->devKind); -- do { -- pixman_fill(dst_pixmap->devPrivate.ptr, -- dst_pixmap->devKind/sizeof(uint32_t), -- dst_pixmap->drawable.bitsPerPixel, -- box->x1, box->y1, -- box->x2 - box->x1, -- box->y2 - box->y1, -- src_priv->clear_color); -- box++; -- } while (--n); -+ if (sigtrap_get() == 0) { -+ assert(dst_pixmap->devPrivate.ptr); -+ assert(dst_pixmap->devKind); -+ sigtrap_assert_active(); -+ do { -+ pixman_fill(dst_pixmap->devPrivate.ptr, -+ dst_pixmap->devKind/sizeof(uint32_t), -+ dst_pixmap->drawable.bitsPerPixel, -+ box->x1, box->y1, -+ box->x2 - box->x1, -+ box->y2 - box->y1, -+ src_priv->clear_color); -+ box++; -+ } while (--n); -+ sigtrap_put(); -+ } - } else if (!sna_copy_boxes__inplace(sna, region, alu, - src_pixmap, src_priv, - src_dx, src_dy, -@@ -6848,36 +6989,39 @@ fallback: - ((char *)src_pixmap->devPrivate.ptr + - src_dy * src_stride + src_dx * bpp / 8); - -- do { -- DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), pitches=(%d, %d))\n", -- __FUNCTION__, -- box->x1, box->y1, -- box->x2 - box->x1, -- box->y2 - box->y1, -- src_dx, src_dy, -- src_stride, dst_stride)); -- -- assert(box->x1 >= 0); -- assert(box->y1 >= 0); -- assert(box->x2 <= dst_pixmap->drawable.width); -- assert(box->y2 <= dst_pixmap->drawable.height); -- -- assert(box->x1 + src_dx >= 0); -- assert(box->y1 + src_dy >= 0); -- assert(box->x2 + src_dx <= src_pixmap->drawable.width); -- assert(box->y2 + src_dy <= src_pixmap->drawable.height); -- assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); -- assert(has_coherent_ptr(sna, dst_priv, MOVE_WRITE)); -- assert(src_stride); -- assert(dst_stride); -- memcpy_blt(src_bits, dst_bits, bpp, -- src_stride, dst_stride, -- box->x1, box->y1, -- box->x1, box->y1, -- box->x2 - box->x1, -- box->y2 - box->y1); -- box++; -- } while (--n); -+ if (sigtrap_get() == 0) { -+ do { -+ DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), pitches=(%d, %d))\n", -+ __FUNCTION__, -+ box->x1, box->y1, -+ box->x2 - box->x1, -+ box->y2 - box->y1, -+ src_dx, src_dy, -+ src_stride, dst_stride)); -+ -+ assert(box->x1 >= 0); -+ assert(box->y1 >= 0); -+ assert(box->x2 <= dst_pixmap->drawable.width); -+ assert(box->y2 <= dst_pixmap->drawable.height); -+ -+ assert(box->x1 + src_dx >= 0); -+ assert(box->y1 + src_dy >= 0); -+ assert(box->x2 + src_dx <= src_pixmap->drawable.width); -+ assert(box->y2 + src_dy <= src_pixmap->drawable.height); -+ assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); -+ assert(has_coherent_ptr(sna, dst_priv, MOVE_WRITE)); -+ assert(src_stride); -+ assert(dst_stride); -+ memcpy_blt(src_bits, dst_bits, bpp, -+ src_stride, dst_stride, -+ box->x1, box->y1, -+ box->x1, box->y1, -+ box->x2 - box->x1, -+ box->y2 - box->y1); -+ box++; -+ } while (--n); -+ sigtrap_put(); -+ } - } else { - DBG(("%s: fallback -- miCopyRegion\n", __FUNCTION__)); - -@@ -6931,7 +7075,8 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc, - - /* Short cut for unmapped windows */ - if (dst->type == DRAWABLE_WINDOW && !((WindowPtr)dst)->realized) { -- DBG(("%s: unmapped\n", __FUNCTION__)); -+ DBG(("%s: unmapped/unrealized dst (pixmap=%ld)\n", -+ __FUNCTION__, get_window_pixmap((WindowPtr)dst))); - return NULL; - } - -@@ -7115,19 +7260,28 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, - if (gc->planemask == 0) - return NULL; - -- DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d); alu=%d, pm=%lx, depth=%d\n", -+ if (sna->ignore_copy_area) -+ return NULL; -+ -+ DBG(("%s: src=pixmap=%ld:(%d, %d)x(%d, %d)+(%d, %d) -> dst=pixmap=%ld:(%d, %d)+(%d, %d); alu=%d, pm=%lx, depth=%d\n", - __FUNCTION__, -+ get_drawable_pixmap(src)->drawable.serialNumber, - src_x, src_y, width, height, src->x, src->y, -+ get_drawable_pixmap(dst)->drawable.serialNumber, - dst_x, dst_y, dst->x, dst->y, - gc->alu, gc->planemask, gc->depth)); - - if (FORCE_FALLBACK || !ACCEL_COPY_AREA || wedged(sna) || -- !PM_IS_SOLID(dst, gc->planemask) || gc->depth < 8) -+ !PM_IS_SOLID(dst, gc->planemask) || gc->depth < 8) { -+ DBG(("%s: fallback copy\n", __FUNCTION__)); - copy = sna_fallback_copy_boxes; -- else if (src == dst) -+ } else if (src == dst) { -+ DBG(("%s: self copy\n", __FUNCTION__)); - copy = sna_self_copy_boxes; -- else -+ } else { -+ DBG(("%s: normal copy\n", __FUNCTION__)); - copy = sna_copy_boxes; -+ } - - return sna_do_copy(src, dst, gc, - src_x, src_y, -@@ -7136,30 +7290,21 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, - copy, 0, NULL); - } - --static const BoxRec * --find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) -+const BoxRec * -+__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) - { -- const BoxRec *mid; -- -- if (end == begin) -- return end; -- -- if (end - begin == 1) { -+ assert(end - begin > 1); -+ do { -+ const BoxRec *mid = begin + (end - begin) / 2; -+ if (mid->y2 > y) -+ end = mid; -+ else -+ begin = mid; -+ } while (end > begin + 1); - if (begin->y2 > y) -- return begin; -+ return begin; - else -- return end; -- } -- -- mid = begin + (end - begin) / 2; -- if (mid->y2 > y) -- /* If no box is found in [begin, mid], the function -- * will return @mid, which is then known to be the -- * correct answer. -- */ -- return find_clip_box_for_y(begin, mid, y); -- else -- return find_clip_box_for_y(mid, end, y); -+ return end; - } - - struct sna_fill_spans { -@@ -8223,6 +8368,8 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, - } - br13 |= blt_depth(drawable->depth) << 24; - br13 |= copy_ROP[gc->alu] << 16; -+ DBG(("%s: target-depth=%d, alu=%d, bg=%08x, fg=%08x\n", -+ __FUNCTION__, drawable->depth, gc->alu, gc->bgPixel, gc->fgPixel)); - - kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo); - assert(kgem_bo_can_blt(&sna->kgem, arg->bo)); -@@ -8255,6 +8402,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, - return; /* XXX fallback? */ - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); - - assert(sna->kgem.mode == KGEM_BLT); - if (sna->kgem.gen >= 0100) { -@@ -8270,8 +8418,8 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); -- b[5] = gc->bgPixel; -- b[6] = gc->fgPixel; -+ b[6] = gc->bgPixel; -+ b[7] = gc->fgPixel; - - dst = (uint8_t *)&b[8]; - sna->kgem.nbatch += 8 + src_stride; -@@ -8322,6 +8470,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, - return; /* XXX fallback? */ - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -8408,7 +8557,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, - sna_damage_add_to_pixmap(arg->damage, region, pixmap); - } - assert_pixmap_damage(pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - } - - static void -@@ -8472,6 +8621,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, - return; /* XXX fallback? */ - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -8588,6 +8738,8 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, - } - } - -+ kgem_bcs_set_tiling(&sna->kgem, upload, arg->bo); -+ - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; - if (sna->kgem.gen >= 0100) { -@@ -8641,7 +8793,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, - sna_damage_add_to_pixmap(arg->damage, region, dst_pixmap); - } - assert_pixmap_damage(dst_pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - } - - static RegionPtr -@@ -8895,36 +9047,11 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc, - last.x += pt->x; - last.y += pt->y; - pt++; -- box_add_pt(&box, last.x, last.y); -+ box_add_xy(&box, last.x, last.y); - } - } else { -- --n; ++pt; -- while (n >= 8) { -- box_add_pt(&box, pt[0].x, pt[0].y); -- box_add_pt(&box, pt[1].x, pt[1].y); -- box_add_pt(&box, pt[2].x, pt[2].y); -- box_add_pt(&box, pt[3].x, pt[3].y); -- box_add_pt(&box, pt[4].x, pt[4].y); -- box_add_pt(&box, pt[5].x, pt[5].y); -- box_add_pt(&box, pt[6].x, pt[6].y); -- box_add_pt(&box, pt[7].x, pt[7].y); -- pt += 8; -- n -= 8; -- } -- if (n & 4) { -- box_add_pt(&box, pt[0].x, pt[0].y); -- box_add_pt(&box, pt[1].x, pt[1].y); -- box_add_pt(&box, pt[2].x, pt[2].y); -- box_add_pt(&box, pt[3].x, pt[3].y); -- pt += 4; -- } -- if (n & 2) { -- box_add_pt(&box, pt[0].x, pt[0].y); -- box_add_pt(&box, pt[1].x, pt[1].y); -- pt += 2; -- } -- if (n & 1) -- box_add_pt(&box, pt[0].x, pt[0].y); -+ while (--n) -+ box_add_pt(&box, ++pt); - } - box.x2++; - box.y2++; -@@ -9636,7 +9763,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, - y += pt->y; - if (blt) - blt &= pt->x == 0 || pt->y == 0; -- box_add_pt(&box, x, y); -+ box_add_xy(&box, x, y); - } - } else { - int x = box.x1; -@@ -9648,7 +9775,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, - x = pt->x; - y = pt->y; - } -- box_add_pt(&box, pt->x, pt->y); -+ box_add_pt(&box, pt); - } - } - box.x2++; -@@ -10037,7 +10164,7 @@ out: - RegionUninit(&data.region); - } - --static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) -+static inline bool box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) - { - if (seg->x1 == seg->x2) { - if (seg->y1 > seg->y2) { -@@ -10051,6 +10178,9 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) - if (gc->capStyle != CapNotLast) - b->y2++; - } -+ if (b->y1 >= b->y2) -+ return false; -+ - b->x1 = seg->x1; - b->x2 = seg->x1 + 1; - } else { -@@ -10065,6 +10195,9 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) - if (gc->capStyle != CapNotLast) - b->x2++; - } -+ if (b->x1 >= b->x2) -+ return false; -+ - b->y1 = seg->y1; - b->y2 = seg->y1 + 1; - } -@@ -10073,6 +10206,7 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) - __FUNCTION__, - seg->x1, seg->y1, seg->x2, seg->y2, - b->x1, b->y1, b->x2, b->y2)); -+ return true; - } - - static bool -@@ -10107,12 +10241,13 @@ sna_poly_segment_blt(DrawablePtr drawable, - nbox = ARRAY_SIZE(boxes); - n -= nbox; - do { -- box_from_seg(b, seg++, gc); -- if (b->y2 > b->y1 && b->x2 > b->x1) { -+ if (box_from_seg(b, seg++, gc)) { -+ assert(!box_empty(b)); - b->x1 += dx; - b->x2 += dx; - b->y1 += dy; - b->y2 += dy; -+ assert(!box_empty(b)); - b++; - } - } while (--nbox); -@@ -10131,7 +10266,10 @@ sna_poly_segment_blt(DrawablePtr drawable, - nbox = ARRAY_SIZE(boxes); - n -= nbox; - do { -- box_from_seg(b++, seg++, gc); -+ if (box_from_seg(b, seg++, gc)) { -+ assert(!box_empty(b)); -+ b++; -+ } - } while (--nbox); - - if (b != boxes) { -@@ -10156,7 +10294,10 @@ sna_poly_segment_blt(DrawablePtr drawable, - do { - BoxRec box; - -- box_from_seg(&box, seg++, gc); -+ if (!box_from_seg(&box, seg++, gc)) -+ continue; -+ -+ assert(!box_empty(&box)); - box.x1 += drawable->x; - box.x2 += drawable->x; - box.y1 += drawable->y; -@@ -10174,6 +10315,7 @@ sna_poly_segment_blt(DrawablePtr drawable, - b->x2 += dx; - b->y1 += dy; - b->y2 += dy; -+ assert(!box_empty(b)); - if (++b == last_box) { - fill.boxes(sna, &fill, boxes, last_box-boxes); - if (damage) -@@ -10185,7 +10327,10 @@ sna_poly_segment_blt(DrawablePtr drawable, - } while (--n); - } else { - do { -- box_from_seg(b, seg++, gc); -+ if (!box_from_seg(b, seg++, gc)) -+ continue; -+ -+ assert(!box_empty(b)); - b->x1 += drawable->x; - b->x2 += drawable->x; - b->y1 += drawable->y; -@@ -10195,6 +10340,7 @@ sna_poly_segment_blt(DrawablePtr drawable, - b->x2 += dx; - b->y1 += dy; - b->y2 += dy; -+ assert(!box_empty(b)); - if (++b == last_box) { - fill.boxes(sna, &fill, boxes, last_box-boxes); - if (damage) -@@ -10319,8 +10465,11 @@ sna_poly_zero_segment_blt(DrawablePtr drawable, - } - b->x2++; - b->y2++; -- if (oc1 | oc2) -- box_intersect(b, extents); -+ -+ if ((oc1 | oc2) && !box_intersect(b, extents)) -+ continue; -+ -+ assert(!box_empty(b)); - if (++b == last_box) { - ret = &&rectangle_continue; - goto *jump; -@@ -10383,6 +10532,7 @@ rectangle_continue: - __FUNCTION__, x1, y1, - b->x1, b->y1, b->x2, b->y2)); - -+ assert(!box_empty(b)); - if (++b == last_box) { - ret = &&X_continue; - goto *jump; -@@ -10407,6 +10557,7 @@ X_continue: - b->x2 = x1 + 1; - b->y2 = b->y1 + 1; - -+ assert(!box_empty(b)); - if (++b == last_box) { - ret = &&X2_continue; - goto *jump; -@@ -10468,6 +10619,7 @@ X2_continue: - b->y2 = y1 + 1; - b->x2 = x1 + 1; - -+ assert(!box_empty(b)); - if (++b == last_box) { - ret = &&Y_continue; - goto *jump; -@@ -10491,6 +10643,7 @@ Y_continue: - b->y2 = y1 + 1; - b->x2 = x1 + 1; - -+ assert(!box_empty(b)); - if (++b == last_box) { - ret = &&Y2_continue; - goto *jump; -@@ -11785,14 +11938,29 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, - if (nbox > ARRAY_SIZE(boxes)) - nbox = ARRAY_SIZE(boxes); - n -= nbox; -- do { -+ while (nbox >= 2) { -+ b[0].x1 = rect[0].x + dx; -+ b[0].y1 = rect[0].y + dy; -+ b[0].x2 = b[0].x1 + rect[0].width; -+ b[0].y2 = b[0].y1 + rect[0].height; -+ -+ b[1].x1 = rect[1].x + dx; -+ b[1].y1 = rect[1].y + dy; -+ b[1].x2 = b[1].x1 + rect[1].width; -+ b[1].y2 = b[1].y1 + rect[1].height; -+ -+ b += 2; -+ rect += 2; -+ nbox -= 2; -+ } -+ if (nbox) { - b->x1 = rect->x + dx; - b->y1 = rect->y + dy; - b->x2 = b->x1 + rect->width; - b->y2 = b->y1 + rect->height; - b++; - rect++; -- } while (--nbox); -+ } - fill.boxes(sna, &fill, boxes, b-boxes); - b = boxes; - } while (n); -@@ -11802,14 +11970,29 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, - if (nbox > ARRAY_SIZE(boxes)) - nbox = ARRAY_SIZE(boxes); - n -= nbox; -- do { -+ while (nbox >= 2) { -+ b[0].x1 = rect[0].x; -+ b[0].y1 = rect[0].y; -+ b[0].x2 = b[0].x1 + rect[0].width; -+ b[0].y2 = b[0].y1 + rect[0].height; -+ -+ b[1].x1 = rect[1].x; -+ b[1].y1 = rect[1].y; -+ b[1].x2 = b[1].x1 + rect[1].width; -+ b[1].y2 = b[1].y1 + rect[1].height; -+ -+ b += 2; -+ rect += 2; -+ nbox -= 2; -+ } -+ if (nbox) { - b->x1 = rect->x; - b->y1 = rect->y; - b->x2 = b->x1 + rect->width; - b->y2 = b->y1 + rect->height; - b++; - rect++; -- } while (--nbox); -+ } - fill.boxes(sna, &fill, boxes, b-boxes); - b = boxes; - } while (n); -@@ -12192,6 +12375,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); - - get_drawable_deltas(drawable, pixmap, &dx, &dy); - assert(extents->x1 + dx >= 0); -@@ -12335,6 +12519,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, - - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); - } while (1); - } else { - RegionRec clip; -@@ -12403,6 +12588,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, - if (!kgem_check_batch(&sna->kgem, 3)) { - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); - - unwind_batch = sna->kgem.nbatch; - unwind_reloc = sna->kgem.nreloc; -@@ -12499,6 +12685,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, - DBG(("%s: emitting split batch\n", __FUNCTION__)); - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); - - unwind_batch = sna->kgem.nbatch; - unwind_reloc = sna->kgem.nreloc; -@@ -12572,7 +12759,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, - } - done: - assert_pixmap_damage(pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -13128,6 +13315,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - if (!clipped) { - dx += drawable->x; -@@ -13240,6 +13428,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, - - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - } while (1); - } else { - RegionRec clip; -@@ -13297,6 +13486,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, - if (!kgem_check_batch(&sna->kgem, 3)) { - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -13369,6 +13559,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, - if (!kgem_check_batch(&sna->kgem, 3)) { - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -13419,7 +13610,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, - } - - assert_pixmap_damage(pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -13499,6 +13690,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - get_drawable_deltas(drawable, pixmap, &dx, &dy); - kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - assert(kgem_bo_can_blt(&sna->kgem, bo)); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - br00 = 3 << 20; - br13 = bo->pitch; -@@ -13543,6 +13735,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -13606,6 +13799,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -13736,6 +13930,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -13797,6 +13992,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -13927,6 +14123,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -13987,6 +14184,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -14064,7 +14262,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, - } - } - -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -14126,6 +14324,7 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna, - return; /* XXX fallback? */ - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -14251,6 +14450,7 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, - return; /* XXX fallback? */ - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; -@@ -14414,6 +14614,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, - get_drawable_deltas(drawable, pixmap, &dx, &dy); - kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - assert(kgem_bo_can_blt(&sna->kgem, bo)); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - br00 = XY_MONO_SRC_COPY_IMM | 3 << 20; - br13 = bo->pitch; -@@ -14526,7 +14727,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, - } - - assert_pixmap_damage(pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -14559,6 +14760,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, - get_drawable_deltas(drawable, pixmap, &dx, &dy); - kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - assert(kgem_bo_can_blt(&sna->kgem, bo)); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - br00 = XY_MONO_SRC_COPY | 3 << 20; - br13 = bo->pitch; -@@ -14673,7 +14875,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, - assert_pixmap_damage(pixmap); - if (tile) - kgem_bo_destroy(&sna->kgem, tile); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -15281,6 +15483,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, - } - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - DBG(("%s: glyph clip box (%d, %d), (%d, %d)\n", - __FUNCTION__, -@@ -15368,6 +15571,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, - if (!kgem_check_batch(&sna->kgem, 3+len)) { - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - DBG(("%s: new batch, glyph clip box (%d, %d), (%d, %d)\n", - __FUNCTION__, -@@ -15479,7 +15683,7 @@ skip: - } - - assert_pixmap_damage(pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -16002,6 +16206,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, - } - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - unwind_batch = sna->kgem.nbatch; - unwind_reloc = sna->kgem.nreloc; -@@ -16111,6 +16316,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, - if (!kgem_check_batch(&sna->kgem, 3+len)) { - _kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - unwind_batch = sna->kgem.nbatch; - unwind_reloc = sna->kgem.nreloc; -@@ -16229,7 +16435,7 @@ skip: - } - - assert_pixmap_damage(pixmap); -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -16450,6 +16656,7 @@ sna_push_pixels_solid_blt(GCPtr gc, - - kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - assert(kgem_bo_can_blt(&sna->kgem, bo)); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - /* Region is pre-clipped and translated into pixmap space */ - box = region_rects(region); -@@ -16471,6 +16678,7 @@ sna_push_pixels_solid_blt(GCPtr gc, - return false; - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - upload = kgem_create_buffer(&sna->kgem, - bstride*bh, -@@ -16564,7 +16772,7 @@ sna_push_pixels_solid_blt(GCPtr gc, - box++; - } while (--n); - -- sna->blt_state.fill_bo = 0; -+ blt_done(sna); - return true; - } - -@@ -16754,7 +16962,9 @@ static int sna_create_gc(GCPtr gc) - - gc->freeCompClip = 0; - gc->pCompositeClip = 0; -+#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,19,99,1,0) - gc->pRotatedPixmap = 0; -+#endif - - fb_gc(gc)->bpp = bits_per_pixel(gc->depth); - -@@ -16789,7 +16999,8 @@ sna_get_image__inplace(PixmapPtr pixmap, - break; - } - -- if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) -+ if ((flags & MOVE_INPLACE_HINT) == 0 && -+ !kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) - return false; - - if (idle && __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) -@@ -16801,11 +17012,19 @@ sna_get_image__inplace(PixmapPtr pixmap, - assert(sna_damage_contains_box(&priv->gpu_damage, ®ion->extents) == PIXMAN_REGION_IN); - assert(sna_damage_contains_box(&priv->cpu_damage, ®ion->extents) == PIXMAN_REGION_OUT); - -- src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); -- if (src == NULL) -- return false; -+ if (kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) { -+ src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); -+ if (src == NULL) -+ return false; - -- kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); -+ kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); -+ } else { -+ src = kgem_bo_map__wc(&sna->kgem, priv->gpu_bo); -+ if (src == NULL) -+ return false; -+ -+ kgem_bo_sync__gtt(&sna->kgem, priv->gpu_bo); -+ } - - if (sigtrap_get()) - return false; -@@ -16833,12 +17052,11 @@ sna_get_image__inplace(PixmapPtr pixmap, - region->extents.x2 - region->extents.x1, - region->extents.y2 - region->extents.y1); - if (!priv->shm) { -- assert(src == MAP(priv->gpu_bo->map__cpu)); - pixmap->devPrivate.ptr = src; - pixmap->devKind = priv->gpu_bo->pitch; -- priv->mapped = MAPPED_CPU; -+ priv->mapped = src == MAP(priv->gpu_bo->map__cpu) ? MAPPED_CPU : MAPPED_GTT; - assert_pixmap_map(pixmap, priv); -- priv->cpu = true; -+ priv->cpu &= priv->mapped == MAPPED_CPU; - } - } - -@@ -16930,7 +17148,7 @@ sna_get_image__fast(PixmapPtr pixmap, - if (priv == NULL || priv->gpu_damage == NULL) - return false; - -- if (priv->clear) { -+ if (priv->clear && sigtrap_get() == 0) { - int w = region->extents.x2 - region->extents.x1; - int h = region->extents.y2 - region->extents.y1; - int pitch = PixmapBytePad(w, pixmap->drawable.depth); -@@ -16939,6 +17157,7 @@ sna_get_image__fast(PixmapPtr pixmap, - __FUNCTION__, priv->clear_color)); - assert(DAMAGE_IS_ALL(priv->gpu_damage)); - assert(priv->cpu_damage == NULL); -+ sigtrap_assert_active(); - - if (priv->clear_color == 0 || - pixmap->drawable.bitsPerPixel == 8 || -@@ -16955,6 +17174,7 @@ sna_get_image__fast(PixmapPtr pixmap, - priv->clear_color); - } - -+ sigtrap_put(); - return true; - } - -@@ -17001,8 +17221,7 @@ sna_get_image(DrawablePtr drawable, - if (ACCEL_GET_IMAGE && - !FORCE_FALLBACK && - format == ZPixmap && -- drawable->bitsPerPixel >= 8 && -- PM_IS_SOLID(drawable, mask)) { -+ drawable->bitsPerPixel >= 8) { - PixmapPtr pixmap = get_drawable_pixmap(drawable); - int16_t dx, dy; - -@@ -17014,7 +17233,7 @@ sna_get_image(DrawablePtr drawable, - region.data = NULL; - - if (sna_get_image__fast(pixmap, ®ion, dst, flags)) -- return; -+ goto apply_planemask; - - if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, - ®ion, flags)) -@@ -17032,6 +17251,16 @@ sna_get_image(DrawablePtr drawable, - region.extents.x1, region.extents.y1, 0, 0, w, h); - sigtrap_put(); - } -+ -+apply_planemask: -+ if (!PM_IS_SOLID(drawable, mask)) { -+ FbStip pm = fbReplicatePixel(mask, drawable->bitsPerPixel); -+ FbStip *d = (FbStip *)dst; -+ int i, n = PixmapBytePad(w, drawable->depth) / sizeof(FbStip) * h; -+ -+ for (i = 0; i < n; i++) -+ d[i] &= pm; -+ } - } else { - region.extents.x1 = x + drawable->x; - region.extents.y1 = y + drawable->y; -@@ -17162,17 +17391,19 @@ void sna_accel_flush(struct sna *sna) - __sna_free_pixmap(sna, priv->pixmap, priv); - } - } else { -+ unsigned hints; - DBG(("%s: flushing DRI pixmap=%ld\n", __FUNCTION__, - priv->pixmap->drawable.serialNumber)); - assert(priv->flush); -- if (sna_pixmap_move_to_gpu(priv->pixmap, -- MOVE_READ | __MOVE_FORCE)) { -- if (priv->flush & IS_CLIPPED) { -+ hints = MOVE_READ | __MOVE_FORCE; -+ if (priv->flush & FLUSH_WRITE) -+ hints |= MOVE_WRITE; -+ if (sna_pixmap_move_to_gpu(priv->pixmap, hints)) { -+ if (priv->flush & FLUSH_WRITE) { - kgem_bo_unclean(&sna->kgem, priv->gpu_bo); - sna_damage_all(&priv->gpu_damage, priv->pixmap); - assert(priv->cpu_damage == NULL); -- priv->clear = false; -- priv->cpu = false; -+ assert(priv->clear == false); - } - } - } -@@ -17184,10 +17415,46 @@ void sna_accel_flush(struct sna *sna) - } - - static void --sna_accel_flush_callback(CallbackListPtr *list, -- pointer user_data, pointer call_data) -+sna_shm_flush_callback(CallbackListPtr *list, -+ pointer user_data, pointer call_data) - { -- sna_accel_flush(user_data); -+ struct sna *sna = user_data; -+ -+ if (!sna->needs_shm_flush) -+ return; -+ -+ sna_accel_flush(sna); -+ sna->needs_shm_flush = false; -+} -+ -+static void -+sna_flush_callback(CallbackListPtr *list, pointer user_data, pointer call_data) -+{ -+ struct sna *sna = user_data; -+ -+ if (!sna->needs_dri_flush) -+ return; -+ -+ sna_accel_flush(sna); -+ sna->needs_dri_flush = false; -+} -+ -+static void -+sna_event_callback(CallbackListPtr *list, pointer user_data, pointer call_data) -+{ -+ EventInfoRec *eventinfo = call_data; -+ struct sna *sna = user_data; -+ int i; -+ -+ if (sna->needs_dri_flush) -+ return; -+ -+ for (i = 0; i < eventinfo->count; i++) { -+ if (eventinfo->events[i].u.u.type == sna->damage_event) { -+ sna->needs_dri_flush = true; -+ return; -+ } -+ } - } - - static struct sna_pixmap *sna_accel_scanout(struct sna *sna) -@@ -17199,6 +17466,7 @@ static struct sna_pixmap *sna_accel_scanout(struct sna *sna) - - assert(sna->vblank_interval); - assert(sna->front); -+ assert(!sna->mode.hidden); - - priv = sna_pixmap(sna->front); - if (priv->gpu_bo == NULL) -@@ -17217,7 +17485,7 @@ static void sna_accel_disarm_timer(struct sna *sna, int id) - static bool has_offload_slaves(struct sna *sna) - { - #if HAS_PIXMAP_SHARING -- ScreenPtr screen = sna->scrn->pScreen; -+ ScreenPtr screen = to_screen_from_sna(sna); - PixmapDirtyUpdatePtr dirty; - - xorg_list_for_each_entry(dirty, &screen->pixmap_dirty_list, ent) { -@@ -17231,11 +17499,14 @@ static bool has_offload_slaves(struct sna *sna) - - static bool has_shadow(struct sna *sna) - { -- DamagePtr damage = sna->mode.shadow_damage; -+ DamagePtr damage; - -- if (damage == NULL) -+ if (!sna->mode.shadow_enabled) - return false; - -+ damage = sna->mode.shadow_damage; -+ assert(damage); -+ - DBG(("%s: has pending damage? %d, outstanding flips: %d\n", - __FUNCTION__, - RegionNotEmpty(DamageRegion(damage)), -@@ -17365,9 +17636,8 @@ static bool sna_accel_do_expire(struct sna *sna) - static void sna_accel_post_damage(struct sna *sna) - { - #if HAS_PIXMAP_SHARING -- ScreenPtr screen = sna->scrn->pScreen; -+ ScreenPtr screen = to_screen_from_sna(sna); - PixmapDirtyUpdatePtr dirty; -- bool flush = false; - - xorg_list_for_each_entry(dirty, &screen->pixmap_dirty_list, ent) { - RegionRec region, *damage; -@@ -17376,8 +17646,6 @@ static void sna_accel_post_damage(struct sna *sna) - int16_t dx, dy; - int n; - -- assert(dirty->src == sna->front); -- - damage = DamageRegion(dirty->damage); - if (RegionNil(damage)) - continue; -@@ -17477,7 +17745,14 @@ fallback: - box, n, COPY_LAST)) - goto fallback; - -- flush = true; -+ /* Before signalling the slave via ProcessPending, -+ * ensure not only the batch is submitted as the -+ * slave may be using the Damage callback to perform -+ * its copy, but also that the memory must be coherent -+ * - we need to treat it as uncached for the PCI slave -+ * will bypass LLC. -+ */ -+ kgem_bo_sync__gtt(&sna->kgem, __sna_pixmap_get_bo(dst)); - } - - DamageRegionProcessPending(&dirty->slave_dst->drawable); -@@ -17485,8 +17760,6 @@ skip: - RegionUninit(®ion); - DamageEmpty(dirty->damage); - } -- if (flush) -- kgem_submit(&sna->kgem); - #endif - } - -@@ -17689,6 +17962,7 @@ sna_set_screen_pixmap(PixmapPtr pixmap) - static Bool - sna_create_window(WindowPtr win) - { -+ DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); - sna_set_window_pixmap(win, win->drawable.pScreen->devPrivate); - return TRUE; - } -@@ -17714,6 +17988,7 @@ sna_unmap_window(WindowPtr win) - static Bool - sna_destroy_window(WindowPtr win) - { -+ DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); - sna_video_destroy_window(win); - sna_dri2_destroy_window(win); - return TRUE; -@@ -17790,20 +18065,34 @@ static bool sna_option_accel_none(struct sna *sna) - if (wedged(sna)) - return true; - -- if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE)) -+ if (!xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_ENABLE, TRUE)) - return true; - -+ if (sna->kgem.gen >= 0120) -+ return true; -+ -+ if (!intel_option_cast_to_bool(sna->Options, -+ OPTION_ACCEL_METHOD, -+ !IS_DEFAULT_ACCEL_METHOD(NOACCEL))) -+ return false; -+ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) - s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); - if (s == NULL) - return IS_DEFAULT_ACCEL_METHOD(NOACCEL); - - return strcasecmp(s, "none") == 0; -+#else -+ return IS_DEFAULT_ACCEL_METHOD(NOACCEL); -+#endif - } - - static bool sna_option_accel_blt(struct sna *sna) - { - const char *s; - -+ assert(sna->kgem.gen < 0120); -+ - s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); - if (s == NULL) - return false; -@@ -17811,6 +18100,13 @@ static bool sna_option_accel_blt(struct sna *sna) - return strcasecmp(s, "blt") == 0; - } - -+#if HAVE_NOTIFY_FD -+static void sna_accel_notify(int fd, int ready, void *data) -+{ -+ sna_mode_wakeup(data); -+} -+#endif -+ - bool sna_accel_init(ScreenPtr screen, struct sna *sna) - { - const char *backend; -@@ -17822,7 +18118,7 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) - list_init(&sna->flush_pixmaps); - list_init(&sna->active_pixmaps); - -- AddGeneralSocket(sna->kgem.fd); -+ SetNotifyFd(sna->kgem.fd, sna_accel_notify, X_NOTIFY_READ, sna); - - #ifdef DEBUG_MEMORY - sna->timer_expire[DEBUG_MEMORY_TIMER] = GetTimeInMillis()+ 10 * 1000; -@@ -17892,21 +18188,23 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) - backend = "disabled"; - sna->kgem.wedged = true; - sna_render_mark_wedged(sna); -- } else if (sna_option_accel_blt(sna) || sna->info->gen >= 0110) -+ } else if (sna_option_accel_blt(sna)) - (void)backend; -- else if (sna->info->gen >= 0100) -+ else if (sna->kgem.gen >= 0110) -+ backend = gen9_render_init(sna, backend); -+ else if (sna->kgem.gen >= 0100) - backend = gen8_render_init(sna, backend); -- else if (sna->info->gen >= 070) -+ else if (sna->kgem.gen >= 070) - backend = gen7_render_init(sna, backend); -- else if (sna->info->gen >= 060) -+ else if (sna->kgem.gen >= 060) - backend = gen6_render_init(sna, backend); -- else if (sna->info->gen >= 050) -+ else if (sna->kgem.gen >= 050) - backend = gen5_render_init(sna, backend); -- else if (sna->info->gen >= 040) -+ else if (sna->kgem.gen >= 040) - backend = gen4_render_init(sna, backend); -- else if (sna->info->gen >= 030) -+ else if (sna->kgem.gen >= 030) - backend = gen3_render_init(sna, backend); -- else if (sna->info->gen >= 020) -+ else if (sna->kgem.gen >= 020) - backend = gen2_render_init(sna, backend); - - DBG(("%s(backend=%s, prefer_gpu=%x)\n", -@@ -17924,8 +18222,14 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) - - void sna_accel_create(struct sna *sna) - { -+ ExtensionEntry *damage; -+ - DBG(("%s\n", __FUNCTION__)); - -+ damage = CheckExtension("DAMAGE"); -+ if (damage) -+ sna->damage_event = damage->eventBase + XDamageNotify; -+ - if (!sna_glyphs_create(sna)) - goto fail; - -@@ -17943,27 +18247,59 @@ fail: - no_render_init(sna); - } - --void sna_accel_watch_flush(struct sna *sna, int enable) -+static void sna_shm_watch_flush(struct sna *sna, int enable) - { - DBG(("%s: enable=%d\n", __FUNCTION__, enable)); - assert(enable); - -- if (sna->watch_flush == 0) { -+ if (sna->watch_shm_flush == 0) { -+ DBG(("%s: installing shm watchers\n", __FUNCTION__)); -+ assert(enable > 0); -+ -+ if (!AddCallback(&FlushCallback, sna_shm_flush_callback, sna)) -+ return; -+ -+ sna->watch_shm_flush++; -+ } -+ -+ sna->watch_shm_flush += enable; -+} -+ -+void sna_watch_flush(struct sna *sna, int enable) -+{ -+ DBG(("%s: enable=%d\n", __FUNCTION__, enable)); -+ assert(enable); -+ -+ if (sna->watch_dri_flush == 0) { -+ int err = 0; -+ - DBG(("%s: installing watchers\n", __FUNCTION__)); - assert(enable > 0); -- if (!AddCallback(&FlushCallback, sna_accel_flush_callback, sna)) { -+ -+ if (!sna->damage_event) -+ return; -+ -+ if (!AddCallback(&EventCallback, sna_event_callback, sna)) -+ err = 1; -+ -+ if (!AddCallback(&FlushCallback, sna_flush_callback, sna)) -+ err = 1; -+ -+ if (err) { - xf86DrvMsg(sna->scrn->scrnIndex, X_Error, - "Failed to attach ourselves to the flush callbacks, expect missing synchronisation with DRI clients (e.g a compositor)\n"); - } -- sna->watch_flush++; -+ -+ sna->watch_dri_flush++; - } - -- sna->watch_flush += enable; -+ sna->watch_dri_flush += enable; - } - - void sna_accel_leave(struct sna *sna) - { - DBG(("%s\n", __FUNCTION__)); -+ sna_scanout_flush(sna); - - /* as root we always have permission to render */ - if (geteuid() == 0) -@@ -17997,13 +18333,15 @@ void sna_accel_close(struct sna *sna) - - sna_pixmap_expire(sna); - -- DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); -- RemoveGeneralSocket(sna->kgem.fd); -+ DeleteCallback(&FlushCallback, sna_shm_flush_callback, sna); -+ DeleteCallback(&FlushCallback, sna_flush_callback, sna); -+ DeleteCallback(&EventCallback, sna_event_callback, sna); -+ RemoveNotifyFd(sna->kgem.fd); - - kgem_cleanup_cache(&sna->kgem); - } - --void sna_accel_block_handler(struct sna *sna, struct timeval **tv) -+void sna_accel_block(struct sna *sna, struct timeval **tv) - { - sigtrap_assert_inactive(); - -@@ -18044,10 +18382,17 @@ restart: - if (sna_accel_do_debug_memory(sna)) - sna_accel_debug_memory(sna); - -- if (sna->watch_flush == 1) { -- DBG(("%s: removing watchers\n", __FUNCTION__)); -- DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); -- sna->watch_flush = 0; -+ if (sna->watch_shm_flush == 1) { -+ DBG(("%s: removing shm watchers\n", __FUNCTION__)); -+ DeleteCallback(&FlushCallback, sna_shm_flush_callback, sna); -+ sna->watch_shm_flush = 0; -+ } -+ -+ if (sna->watch_dri_flush == 1) { -+ DBG(("%s: removing dri watchers\n", __FUNCTION__)); -+ DeleteCallback(&FlushCallback, sna_flush_callback, sna); -+ DeleteCallback(&EventCallback, sna_event_callback, sna); -+ sna->watch_dri_flush = 0; - } - - if (sna->timer_active & 1) { -@@ -18083,22 +18428,6 @@ set_tv: - } - } - --void sna_accel_wakeup_handler(struct sna *sna) --{ -- DBG(("%s: nbatch=%d, need_retire=%d, need_purge=%d\n", __FUNCTION__, -- sna->kgem.nbatch, sna->kgem.need_retire, sna->kgem.need_purge)); -- -- if (!sna->kgem.nbatch) -- return; -- -- if (kgem_is_idle(&sna->kgem)) { -- DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); -- _kgem_submit(&sna->kgem); -- } -- -- sigtrap_assert_inactive(); --} -- - void sna_accel_free(struct sna *sna) - { - DBG(("%s\n", __FUNCTION__)); -diff --git a/src/sna/sna_acpi.c b/src/sna/sna_acpi.c -index dcc0287b..643d04af 100644 ---- a/src/sna/sna_acpi.c -+++ b/src/sna/sna_acpi.c -@@ -92,7 +92,7 @@ void _sna_acpi_wakeup(struct sna *sna) - DBG(("%s: error [%d], detaching from acpid\n", __FUNCTION__, n)); - - /* XXX reattach later? */ -- RemoveGeneralSocket(sna->acpi.fd); -+ RemoveNotifyFd(sna->acpi.fd); - sna_acpi_fini(sna); - return; - } -@@ -136,6 +136,13 @@ void _sna_acpi_wakeup(struct sna *sna) - } while (n); - } - -+#if HAVE_NOTIFY_FD -+static void sna_acpi_notify(int fd, int read, void *data) -+{ -+ _sna_acpi_wakeup(data); -+} -+#endif -+ - static int read_power_state(const char *path) - { - DIR *dir; -@@ -200,7 +207,7 @@ void sna_acpi_init(struct sna *sna) - - DBG(("%s: attaching to acpid\n", __FUNCTION__)); - -- AddGeneralSocket(sna->acpi.fd); -+ SetNotifyFd(sna->acpi.fd, sna_acpi_notify, X_NOTIFY_READ, sna); - sna->acpi.remain = sizeof(sna->acpi.event) - 1; - sna->acpi.offset = 0; - -diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c -index de8f6ec3..ddd2586d 100644 ---- a/src/sna/sna_blt.c -+++ b/src/sna/sna_blt.c -@@ -86,6 +86,11 @@ static const uint8_t fill_ROP[] = { - ROP_1 - }; - -+static void sig_done(struct sna *sna, const struct sna_composite_op *op) -+{ -+ sigtrap_put(); -+} -+ - static void nop_done(struct sna *sna, const struct sna_composite_op *op) - { - assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); -@@ -129,7 +134,6 @@ static bool sna_blt_fill_init(struct sna *sna, - struct kgem *kgem = &sna->kgem; - - assert(kgem_bo_can_blt (kgem, bo)); -- assert(bo->tiling != I915_TILING_Y); - blt->bo[0] = bo; - - blt->br13 = bo->pitch; -@@ -183,6 +187,7 @@ static bool sna_blt_fill_init(struct sna *sna, - return false; - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = kgem->batch + kgem->nbatch; -@@ -237,17 +242,13 @@ static bool sna_blt_fill_init(struct sna *sna, - return true; - } - --noinline static void sna_blt_fill_begin(struct sna *sna, -- const struct sna_blt_state *blt) -+noinline static void __sna_blt_fill_begin(struct sna *sna, -+ const struct sna_blt_state *blt) - { - struct kgem *kgem = &sna->kgem; - uint32_t *b; - -- if (kgem->nreloc) { -- _kgem_submit(kgem); -- _kgem_set_mode(kgem, KGEM_BLT); -- assert(kgem->nbatch == 0); -- } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]); - - assert(kgem->mode == KGEM_BLT); - b = kgem->batch + kgem->nbatch; -@@ -293,6 +294,21 @@ noinline static void sna_blt_fill_begin(struct sna *sna, - } - } - -+inline static void sna_blt_fill_begin(struct sna *sna, -+ const struct sna_blt_state *blt) -+{ -+ struct kgem *kgem = &sna->kgem; -+ -+ if (kgem->nreloc) { -+ _kgem_submit(kgem); -+ _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]); -+ assert(kgem->nbatch == 0); -+ } -+ -+ __sna_blt_fill_begin(sna, blt); -+} -+ - inline static void sna_blt_fill_one(struct sna *sna, - const struct sna_blt_state *blt, - int16_t x, int16_t y, -@@ -330,8 +346,8 @@ static bool sna_blt_copy_init(struct sna *sna, - { - struct kgem *kgem = &sna->kgem; - -- assert(kgem_bo_can_blt (kgem, src)); -- assert(kgem_bo_can_blt (kgem, dst)); -+ assert(kgem_bo_can_blt(kgem, src)); -+ assert(kgem_bo_can_blt(kgem, dst)); - - blt->bo[0] = src; - blt->bo[1] = dst; -@@ -370,6 +386,7 @@ static bool sna_blt_copy_init(struct sna *sna, - return false; - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, src, dst); - - sna->blt_state.fill_bo = 0; - return true; -@@ -424,6 +441,7 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna, - return false; - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, src, dst); - - sna->blt_state.fill_bo = 0; - return true; -@@ -454,6 +472,7 @@ static void sna_blt_alpha_fixup_one(struct sna *sna, - !kgem_check_reloc(kgem, 2)) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); - } - - assert(sna->kgem.mode == KGEM_BLT); -@@ -582,6 +601,7 @@ static void sna_blt_copy_one(struct sna *sna, - !kgem_check_reloc(kgem, 2)) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); - } - - assert(sna->kgem.mode == KGEM_BLT); -@@ -912,8 +932,27 @@ sna_composite_mask_is_opaque(PicturePtr mask) - return is_solid(mask) && is_white(mask); - else if (!PICT_FORMAT_A(mask->format)) - return true; -- else -- return is_solid(mask) && is_opaque_solid(mask); -+ else if (mask->pSourcePict) { -+ PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict; -+ return (fill->color >> 24) == 0xff; -+ } else { -+ struct sna_pixmap *priv; -+ assert(mask->pDrawable); -+ -+ if (mask->pDrawable->width == 1 && -+ mask->pDrawable->height == 1 && -+ mask->repeat) -+ return pixel_is_opaque(get_pixel(mask), mask->format); -+ -+ if (mask->transform) -+ return false; -+ -+ priv = sna_pixmap_from_drawable(mask->pDrawable); -+ if (priv == NULL || !priv->clear) -+ return false; -+ -+ return pixel_is_opaque(priv->clear_color, mask->format); -+ } - } - - fastcall -@@ -971,6 +1010,7 @@ static void blt_composite_fill__cpu(struct sna *sna, - - assert(op->dst.pixmap->devPrivate.ptr); - assert(op->dst.pixmap->devKind); -+ sigtrap_assert_active(); - pixman_fill(op->dst.pixmap->devPrivate.ptr, - op->dst.pixmap->devKind / sizeof(uint32_t), - op->dst.pixmap->drawable.bitsPerPixel, -@@ -990,6 +1030,7 @@ blt_composite_fill_box_no_offset__cpu(struct sna *sna, - - assert(op->dst.pixmap->devPrivate.ptr); - assert(op->dst.pixmap->devKind); -+ sigtrap_assert_active(); - pixman_fill(op->dst.pixmap->devPrivate.ptr, - op->dst.pixmap->devKind / sizeof(uint32_t), - op->dst.pixmap->drawable.bitsPerPixel, -@@ -1010,6 +1051,7 @@ blt_composite_fill_boxes_no_offset__cpu(struct sna *sna, - - assert(op->dst.pixmap->devPrivate.ptr); - assert(op->dst.pixmap->devKind); -+ sigtrap_assert_active(); - pixman_fill(op->dst.pixmap->devPrivate.ptr, - op->dst.pixmap->devKind / sizeof(uint32_t), - op->dst.pixmap->drawable.bitsPerPixel, -@@ -1031,6 +1073,7 @@ blt_composite_fill_box__cpu(struct sna *sna, - - assert(op->dst.pixmap->devPrivate.ptr); - assert(op->dst.pixmap->devKind); -+ sigtrap_assert_active(); - pixman_fill(op->dst.pixmap->devPrivate.ptr, - op->dst.pixmap->devKind / sizeof(uint32_t), - op->dst.pixmap->drawable.bitsPerPixel, -@@ -1052,6 +1095,7 @@ blt_composite_fill_boxes__cpu(struct sna *sna, - - assert(op->dst.pixmap->devPrivate.ptr); - assert(op->dst.pixmap->devKind); -+ sigtrap_assert_active(); - pixman_fill(op->dst.pixmap->devPrivate.ptr, - op->dst.pixmap->devKind / sizeof(uint32_t), - op->dst.pixmap->drawable.bitsPerPixel, -@@ -1159,12 +1203,15 @@ static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const - box->y2 - box->y1 >= op->dst.height) { - struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); - if (op->dst.bo == priv->gpu_bo) { -+ sna_damage_all(&priv->gpu_damage, op->dst.pixmap); -+ sna_damage_destroy(&priv->cpu_damage); - priv->clear = true; - priv->clear_color = op->u.blt.pixel; - DBG(("%s: pixmap=%ld marking clear [%08x]\n", - __FUNCTION__, - op->dst.pixmap->drawable.serialNumber, - op->u.blt.pixel)); -+ ((struct sna_composite_op *)op)->damage = NULL; - } - } - } -@@ -1404,6 +1451,7 @@ begin_blt(struct sna *sna, - return false; - - _kgem_set_mode(&sna->kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo); - } - - return true; -@@ -1429,6 +1477,7 @@ prepare_blt_clear(struct sna *sna, - DBG(("%s\n", __FUNCTION__)); - - if (op->dst.bo == NULL) { -+ op->u.blt.pixel = 0; - op->blt = blt_composite_fill__cpu; - if (op->dst.x|op->dst.y) { - op->box = blt_composite_fill_box__cpu; -@@ -1439,9 +1488,8 @@ prepare_blt_clear(struct sna *sna, - op->boxes = blt_composite_fill_boxes_no_offset__cpu; - op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; - } -- op->done = nop_done; -- op->u.blt.pixel = 0; -- return true; -+ op->done = sig_done; -+ return sigtrap_get() == 0; - } - - op->blt = blt_composite_fill; -@@ -1484,8 +1532,8 @@ prepare_blt_fill(struct sna *sna, - op->boxes = blt_composite_fill_boxes_no_offset__cpu; - op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; - } -- op->done = nop_done; -- return true; -+ op->done = sig_done; -+ return sigtrap_get() == 0; - } - - op->blt = blt_composite_fill; -@@ -1668,6 +1716,7 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } else { - do { -@@ -1724,6 +1773,7 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } - sna_vertex_unlock(&sna->render); -@@ -1806,6 +1856,7 @@ static void blt_composite_copy_boxes__thread64(struct sna *sna, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } else { - do { -@@ -1864,6 +1915,7 @@ static void blt_composite_copy_boxes__thread64(struct sna *sna, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } - sna_vertex_unlock(&sna->render); -@@ -1973,6 +2025,7 @@ prepare_blt_copy(struct sna *sna, - } - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo); - - DBG(("%s\n", __FUNCTION__)); - -@@ -2396,6 +2449,9 @@ prepare_blt_put(struct sna *sna, - op->box = blt_put_composite_box; - op->boxes = blt_put_composite_boxes; - } -+ -+ op->done = nop_done; -+ return true; - } else { - if (alpha_fixup) { - op->u.blt.pixel = alpha_fixup; -@@ -2407,10 +2463,10 @@ prepare_blt_put(struct sna *sna, - op->box = blt_put_composite_box__cpu; - op->boxes = blt_put_composite_boxes__cpu; - } -- } -- op->done = nop_done; - -- return true; -+ op->done = sig_done; -+ return sigtrap_get() == 0; -+ } - } - - static bool -@@ -2544,6 +2600,7 @@ sna_blt_composite(struct sna *sna, - clear: - if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) { - sna_pixmap(tmp->dst.pixmap)->clear = true; -+nop: - return prepare_blt_nop(sna, tmp); - } - -@@ -2559,6 +2616,7 @@ clear: - } - tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, - &dst_box, &tmp->damage); -+ assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); - if (tmp->dst.bo) { - if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { - DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", -@@ -2567,6 +2625,8 @@ clear: - } - if (hint & REPLACES) - kgem_bo_undo(&sna->kgem, tmp->dst.bo); -+ if (flags & COMPOSITE_UPLOAD) -+ return false; - } else { - RegionRec region; - -@@ -2590,32 +2650,40 @@ clear: - } - if (op == PictOpOver && is_opaque_solid(src)) - op = PictOpSrc; -- if (op == PictOpAdd && is_white(src)) -+ if (op == PictOpAdd && -+ PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) && -+ is_white(src)) - op = PictOpSrc; - if (was_clear && (op == PictOpAdd || op == PictOpOver)) { - if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0) - op = PictOpSrc; - if (op == PictOpOver) { -+ unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); - color = over(get_solid_color(src, PICT_a8r8g8b8), -- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, -- dst->format, PICT_a8r8g8b8)); -+ dst_color); - op = PictOpSrc; - DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n", - __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), -- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, -- dst->format, PICT_a8r8g8b8), -+ solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), - color)); -+ if (color == dst_color) -+ goto nop; -+ else -+ goto fill; - } - if (op == PictOpAdd) { -+ unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); - color = add(get_solid_color(src, PICT_a8r8g8b8), -- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, -- dst->format, PICT_a8r8g8b8)); -+ dst_color); - op = PictOpSrc; - DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n", - __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), -- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, -- dst->format, PICT_a8r8g8b8), -+ solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), - color)); -+ if (color == dst_color) -+ goto nop; -+ else -+ goto fill; - } - } - if (op == PictOpOutReverse && is_opaque_solid(src)) -@@ -2649,6 +2717,7 @@ fill: - } - tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, - &dst_box, &tmp->damage); -+ assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); - if (tmp->dst.bo) { - if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { - DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", -@@ -2657,6 +2726,8 @@ fill: - } - if (hint & REPLACES) - kgem_bo_undo(&sna->kgem, tmp->dst.bo); -+ if (flags & COMPOSITE_UPLOAD) -+ return false; - } else { - RegionRec region; - -@@ -2720,8 +2791,8 @@ fill: - if (is_clear(src_pixmap)) { - if (src->repeat || - (x >= 0 && y >= 0 && -- x + width < src_pixmap->drawable.width && -- y + height < src_pixmap->drawable.height)) { -+ x + width <= src_pixmap->drawable.width && -+ y + height <= src_pixmap->drawable.height)) { - color = color_convert(sna_pixmap(src_pixmap)->clear_color, - src->format, tmp->dst.format); - goto fill; -@@ -2795,7 +2866,7 @@ fill: - if (src_pixmap->drawable.width <= sna->render.max_3d_size && - src_pixmap->drawable.height <= sna->render.max_3d_size && - bo->pitch <= sna->render.max_3d_pitch && -- (flags & COMPOSITE_FALLBACK) == 0) -+ (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0) - { - return false; - } -@@ -2817,6 +2888,7 @@ fill: - } - tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, - &dst_box, &tmp->damage); -+ assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); - - if (tmp->dst.bo && hint & REPLACES) { - struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap); -@@ -2846,7 +2918,7 @@ fallback: - DBG(("%s: fallback -- unaccelerated upload\n", - __FUNCTION__)); - goto fallback; -- } else { -+ } else if ((flags & COMPOSITE_UPLOAD) == 0) { - ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup); - if (!ret) - goto fallback; -@@ -3023,6 +3095,7 @@ sna_blt_composite__convert(struct sna *sna, - } - _kgem_set_mode(&sna->kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo); - - if (alpha_fixup) { - tmp->blt = blt_composite_copy_with_alpha; -@@ -3062,7 +3135,7 @@ static void sna_blt_fill_op_blt(struct sna *sna, - if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { - const struct sna_blt_state *blt = &op->base.u.blt; - -- sna_blt_fill_begin(sna, blt); -+ __sna_blt_fill_begin(sna, blt); - - sna->blt_state.fill_bo = blt->bo[0]->unique_id; - sna->blt_state.fill_pixel = blt->pixel; -@@ -3079,7 +3152,7 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna, - if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { - const struct sna_blt_state *blt = &op->base.u.blt; - -- sna_blt_fill_begin(sna, blt); -+ __sna_blt_fill_begin(sna, blt); - - sna->blt_state.fill_bo = blt->bo[0]->unique_id; - sna->blt_state.fill_pixel = blt->pixel; -@@ -3097,7 +3170,7 @@ fastcall static void sna_blt_fill_op_boxes(struct sna *sna, - if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { - const struct sna_blt_state *blt = &op->base.u.blt; - -- sna_blt_fill_begin(sna, blt); -+ __sna_blt_fill_begin(sna, blt); - - sna->blt_state.fill_bo = blt->bo[0]->unique_id; - sna->blt_state.fill_pixel = blt->pixel; -@@ -3132,7 +3205,7 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, - DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n)); - - if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { -- sna_blt_fill_begin(sna, blt); -+ __sna_blt_fill_begin(sna, blt); - - sna->blt_state.fill_bo = blt->bo[0]->unique_id; - sna->blt_state.fill_pixel = blt->pixel; -@@ -3162,65 +3235,15 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, - assert(kgem->nbatch < kgem->surface); - - if ((dx|dy) == 0) { -- while (n_this_time >= 8) { -- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); -- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); -- *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); -- *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); -- *((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0); -- *((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0); -- *((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0); -- *((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0); -- b += 16; -- n_this_time -= 8; -- p += 8; -- } -- if (n_this_time & 4) { -- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); -- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); -- *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); -- *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); -- b += 8; -- p += 4; -- } -- if (n_this_time & 2) { -- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); -- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); -- b += 4; -- p += 2; -- } -- if (n_this_time & 1) -- *((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0); -+ do { -+ *(uint64_t *)b = pt_add(cmd, p++, 0, 0); -+ b += 2; -+ } while (--n_this_time); - } else { -- while (n_this_time >= 8) { -- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); -- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); -- *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); -- *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); -- *((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy); -- *((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy); -- *((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy); -- *((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy); -- b += 16; -- n_this_time -= 8; -- p += 8; -- } -- if (n_this_time & 4) { -- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); -- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); -- *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); -- *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); -- b += 8; -- p += 8; -- } -- if (n_this_time & 2) { -- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); -- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); -- b += 4; -- p += 2; -- } -- if (n_this_time & 1) -- *((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy); -+ do { -+ *(uint64_t *)b = pt_add(cmd, p++, dx, dy); -+ b += 2; -+ } while (--n_this_time); - } - - if (!n) -@@ -3414,6 +3437,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, - - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(kgem_check_batch(kgem, 6)); - assert(kgem_check_reloc(kgem, 1)); -@@ -3520,6 +3544,8 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, - _kgem_set_mode(kgem, KGEM_BLT); - } - -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); -+ - assert(sna->kgem.mode == KGEM_BLT); - b = kgem->batch + kgem->nbatch; - if (kgem->gen >= 0100) { -@@ -3608,6 +3634,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); - - assert(sna->kgem.mode == KGEM_BLT); - b = kgem->batch + kgem->nbatch; -@@ -3754,6 +3781,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, - } - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - - if ((dst_dx | dst_dy) == 0) { - if (kgem->gen >= 0100) { -@@ -3814,6 +3842,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } else { - uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6; -@@ -3871,6 +3900,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } - } else { -@@ -3932,6 +3962,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } else { - cmd |= 6; -@@ -3989,6 +4020,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } while (1); - } - } -@@ -4095,6 +4127,7 @@ bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, - !kgem_check_reloc(kgem, 2)) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); - } - - assert(sna->kgem.mode == KGEM_BLT); -@@ -4190,6 +4223,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, - DBG(("%s: dst == src\n", __FUNCTION__)); - - if (src_bo->tiling == I915_TILING_Y && -+ !sna->kgem.can_blt_y && - kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { - struct kgem_bo *bo; - -@@ -4237,6 +4271,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, - } - } else { - if (src_bo->tiling == I915_TILING_Y && -+ !sna->kgem.can_blt_y && - kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { - DBG(("%s: src is y-tiled\n", __FUNCTION__)); - if (src->type != DRAWABLE_PIXMAP) -@@ -4251,6 +4286,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, - } - - if (dst_bo->tiling == I915_TILING_Y && -+ !sna->kgem.can_blt_y && - kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) { - DBG(("%s: dst is y-tiled\n", __FUNCTION__)); - if (dst->type != DRAWABLE_PIXMAP) -diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c -index f01f020e..1da8c291 100644 ---- a/src/sna/sna_composite.c -+++ b/src/sna/sna_composite.c -@@ -452,6 +452,8 @@ static void apply_damage(struct sna_composite_op *op, RegionPtr region) - op->damage = NULL; - } else - sna_damage_add(op->damage, region); -+ -+ assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); - } - - static inline bool use_cpu(PixmapPtr pixmap, struct sna_pixmap *priv, -@@ -653,8 +655,9 @@ sna_composite(CARD8 op, - RegionRec region; - int dx, dy; - -- DBG(("%s(%d src=%ld+(%d, %d), mask=%ld+(%d, %d), dst=%ld+(%d, %d)+(%d, %d), size=(%d, %d)\n", -- __FUNCTION__, op, -+ DBG(("%s(pixmap=%ld, op=%d, src=%ld+(%d, %d), mask=%ld+(%d, %d), dst=%ld+(%d, %d)+(%d, %d), size=(%d, %d)\n", -+ __FUNCTION__, -+ pixmap->drawable.serialNumber, op, - get_picture_id(src), src_x, src_y, - get_picture_id(mask), mask_x, mask_y, - get_picture_id(dst), dst_x, dst_y, -@@ -673,13 +676,6 @@ sna_composite(CARD8 op, - src = sna->clear; - } - -- if (mask && sna_composite_mask_is_opaque(mask)) { -- DBG(("%s: removing opaque %smask\n", -- __FUNCTION__, -- mask->componentAlpha && PICT_FORMAT_RGB(mask->format) ? "CA " : "")); -- mask = NULL; -- } -- - if (!sna_compute_composite_region(®ion, - src, mask, dst, - src_x, src_y, -@@ -688,6 +684,13 @@ sna_composite(CARD8 op, - width, height)) - return; - -+ if (mask && sna_composite_mask_is_opaque(mask)) { -+ DBG(("%s: removing opaque %smask\n", -+ __FUNCTION__, -+ mask->componentAlpha && PICT_FORMAT_RGB(mask->format) ? "CA " : "")); -+ mask = NULL; -+ } -+ - if (NO_COMPOSITE) - goto fallback; - -@@ -756,6 +759,7 @@ sna_composite(CARD8 op, - DBG(("%s: fallback due unhandled composite op\n", __FUNCTION__)); - goto fallback; - } -+ assert(!tmp.damage || !DAMAGE_IS_ALL(*tmp.damage)); - - if (region.data == NULL) - tmp.box(sna, &tmp, ®ion.extents); -@@ -797,8 +801,10 @@ sna_composite_rectangles(CARD8 op, - int i, num_boxes; - unsigned hint; - -- DBG(("%s(op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", -- __FUNCTION__, op, -+ DBG(("%s(pixmap=%ld, op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", -+ __FUNCTION__, -+ get_drawable_pixmap(dst->pDrawable)->drawable.serialNumber, -+ op, - (color->alpha >> 8 << 24) | - (color->red >> 8 << 16) | - (color->green >> 8 << 8) | -@@ -814,38 +820,40 @@ sna_composite_rectangles(CARD8 op, - return; - } - -- if ((color->red|color->green|color->blue|color->alpha) <= 0x00ff) { -- switch (op) { -- case PictOpOver: -- case PictOpOutReverse: -- case PictOpAdd: -- return; -- case PictOpInReverse: -- case PictOpSrc: -- op = PictOpClear; -- break; -- case PictOpAtopReverse: -- op = PictOpOut; -- break; -- case PictOpXor: -- op = PictOpOverReverse; -- break; -- } -- } - if (color->alpha <= 0x00ff) { -- switch (op) { -- case PictOpOver: -- case PictOpOutReverse: -- return; -- case PictOpInReverse: -- op = PictOpClear; -- break; -- case PictOpAtopReverse: -- op = PictOpOut; -- break; -- case PictOpXor: -- op = PictOpOverReverse; -- break; -+ if (PICT_FORMAT_TYPE(dst->format) == PICT_TYPE_A || -+ (color->red|color->green|color->blue) <= 0x00ff) { -+ switch (op) { -+ case PictOpOver: -+ case PictOpOutReverse: -+ case PictOpAdd: -+ return; -+ case PictOpInReverse: -+ case PictOpSrc: -+ op = PictOpClear; -+ break; -+ case PictOpAtopReverse: -+ op = PictOpOut; -+ break; -+ case PictOpXor: -+ op = PictOpOverReverse; -+ break; -+ } -+ } else { -+ switch (op) { -+ case PictOpOver: -+ case PictOpOutReverse: -+ return; -+ case PictOpInReverse: -+ op = PictOpClear; -+ break; -+ case PictOpAtopReverse: -+ op = PictOpOut; -+ break; -+ case PictOpXor: -+ op = PictOpOverReverse; -+ break; -+ } - } - } else if (color->alpha >= 0xff00) { - switch (op) { -@@ -863,11 +871,16 @@ sna_composite_rectangles(CARD8 op, - case PictOpXor: - op = PictOpOut; - break; -+ case PictOpAdd: -+ if (PICT_FORMAT_TYPE(dst->format) == PICT_TYPE_A || -+ (color->red&color->green&color->blue) >= 0xff00) -+ op = PictOpSrc; -+ break; - } - } - - /* Avoid reducing overlapping translucent rectangles */ -- if (op == PictOpOver && -+ if ((op == PictOpOver || op == PictOpAdd) && - num_rects == 1 && - sna_drawable_is_clear(dst->pDrawable)) - op = PictOpSrc; -@@ -979,6 +992,9 @@ sna_composite_rectangles(CARD8 op, - bool ok; - - if (op == PictOpClear) { -+ if (priv->clear_color == 0) -+ goto done; -+ - ok = sna_get_pixel_from_rgba(&pixel, - 0, 0, 0, 0, - dst->format); -@@ -990,8 +1006,11 @@ sna_composite_rectangles(CARD8 op, - color->alpha, - dst->format); - } -- if (ok && priv->clear_color == pixel) -+ if (ok && priv->clear_color == pixel) { -+ DBG(("%s: matches current clear, skipping\n", -+ __FUNCTION__)); - goto done; -+ } - } - - if (region.data == NULL) { -diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h -index 272e83bc..d5c727ee 100644 ---- a/src/sna/sna_damage.h -+++ b/src/sna/sna_damage.h -@@ -267,7 +267,7 @@ int _sna_damage_get_boxes(struct sna_damage *damage, const BoxRec **boxes); - static inline int - sna_damage_get_boxes(struct sna_damage *damage, const BoxRec **boxes) - { -- assert(damage); -+ assert(DAMAGE_PTR(damage)); - - if (DAMAGE_IS_ALL(damage)) { - *boxes = &DAMAGE_PTR(damage)->extents; -@@ -322,7 +322,8 @@ static inline void sna_damage_destroy(struct sna_damage **damage) - if (*damage == NULL) - return; - -- __sna_damage_destroy(DAMAGE_PTR(*damage)); -+ if (DAMAGE_PTR(*damage)) -+ __sna_damage_destroy(DAMAGE_PTR(*damage)); - *damage = NULL; - } - -diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c -index 4b218b70..9b77550e 100644 ---- a/src/sna/sna_display.c -+++ b/src/sna/sna_display.c -@@ -39,6 +39,25 @@ - #include - #include - #include -+#include -+ -+#if HAVE_ALLOCA_H -+#include -+#elif defined __GNUC__ -+#define alloca __builtin_alloca -+#elif defined _AIX -+#define alloca __alloca -+#elif defined _MSC_VER -+#include -+#define alloca _alloca -+#else -+void *alloca(size_t); -+#endif -+ -+#define _PARSE_EDID_ -+/* Jump through a few hoops in order to fixup EDIDs */ -+#undef VERSION -+#undef REVISION - - #include "sna.h" - #include "sna_reg.h" -@@ -72,6 +91,10 @@ - #include - #endif - -+#define FAIL_CURSOR_IOCTL 0 -+ -+#define COLDPLUG_DELAY_MS 2000 -+ - /* Minor discrepancy between 32-bit/64-bit ABI in old kernels */ - union compat_mode_get_connector{ - struct drm_mode_get_connector conn; -@@ -88,6 +111,8 @@ union compat_mode_get_connector{ - #define DEFAULT_DPI 96 - #endif - -+#define OUTPUT_STATUS_CACHE_MS 15000 -+ - #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 - - #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 -@@ -106,33 +131,87 @@ struct local_mode_obj_get_properties { - }; - #define LOCAL_MODE_OBJECT_PLANE 0xeeeeeeee - --#if 0 -+struct local_mode_set_plane { -+ uint32_t plane_id; -+ uint32_t crtc_id; -+ uint32_t fb_id; /* fb object contains surface format type */ -+ uint32_t flags; -+ -+ /* Signed dest location allows it to be partially off screen */ -+ int32_t crtc_x, crtc_y; -+ uint32_t crtc_w, crtc_h; -+ -+ /* Source values are 16.16 fixed point */ -+ uint32_t src_x, src_y; -+ uint32_t src_h, src_w; -+}; -+#define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) -+ -+struct local_mode_get_plane { -+ uint32_t plane_id; -+ -+ uint32_t crtc_id; -+ uint32_t fb_id; -+ -+ uint32_t possible_crtcs; -+ uint32_t gamma_size; -+ -+ uint32_t count_format_types; -+ uint64_t format_type_ptr; -+}; -+#define LOCAL_IOCTL_MODE_GETPLANE DRM_IOWR(0xb6, struct local_mode_get_plane) -+ -+struct local_mode_get_plane_res { -+ uint64_t plane_id_ptr; -+ uint64_t count_planes; -+}; -+#define LOCAL_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xb5, struct local_mode_get_plane_res) -+ -+#if 1 - #define __DBG DBG - #else - #define __DBG(x) - #endif - -+#define DBG_NATIVE_ROTATION ~0 /* minimum RR_Rotate_0 */ -+ - extern XF86ConfigPtr xf86configptr; - -+struct sna_cursor { -+ struct sna_cursor *next; -+ uint32_t *image; -+ bool transformed; -+ Rotation rotation; -+ int ref; -+ int size; -+ int last_width; -+ int last_height; -+ unsigned handle; -+ unsigned serial; -+ unsigned alloc; -+}; -+ - struct sna_crtc { -+ unsigned long flags; -+ uint32_t id; - xf86CrtcPtr base; - struct drm_mode_modeinfo kmode; -- int dpms_mode; - PixmapPtr slave_pixmap; - DamagePtr slave_damage; -- struct kgem_bo *bo, *shadow_bo, *client_bo; -+ struct kgem_bo *bo, *shadow_bo, *client_bo, *cache_bo; - struct sna_cursor *cursor; - unsigned int last_cursor_size; - uint32_t offset; - bool shadow; - bool fallback_shadow; - bool transform; -+ bool cursor_transform; -+ bool hwcursor; - bool flip_pending; -- uint8_t id; -- uint8_t pipe; - -- RegionRec client_damage; /* XXX overlap with shadow damage? */ -+ struct pict_f_transform cursor_to_fb, fb_to_cursor; - -+ RegionRec crtc_damage; - uint16_t shadow_bo_width, shadow_bo_height; - - uint32_t rotation; -@@ -143,7 +222,9 @@ struct sna_crtc { - uint32_t supported; - uint32_t current; - } rotation; -- } primary, sprite; -+ struct list link; -+ } primary; -+ struct list sprites; - - uint32_t mode_serial, flip_serial; - -@@ -173,21 +254,33 @@ struct sna_output { - - unsigned int is_panel : 1; - unsigned int add_default_modes : 1; -+ int connector_type; -+ int connector_type_id; -+ -+ uint32_t link_status_idx; - - uint32_t edid_idx; - uint32_t edid_blob_id; - uint32_t edid_len; - void *edid_raw; -+ xf86MonPtr fake_edid_mon; -+ void *fake_edid_raw; - - bool has_panel_limits; - int panel_hdisplay; - int panel_vdisplay; - - uint32_t dpms_id; -- int dpms_mode; -+ uint8_t dpms_mode; - struct backlight backlight; - int backlight_active_level; - -+ uint32_t last_detect; -+ uint32_t status; -+ unsigned int hotplug_count; -+ bool update_properties; -+ bool reprobe; -+ - int num_modes; - struct drm_mode_modeinfo *modes; - -@@ -218,13 +311,91 @@ enum { /* XXX copied from hw/xfree86/modes/xf86Crtc.c */ - OPTION_DEFAULT_MODES, - }; - -+static void __sna_output_dpms(xf86OutputPtr output, int dpms, int fixup); - static void sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc); -+static bool sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, -+ struct kgem_bo *bo, int x, int y); - - static bool is_zaphod(ScrnInfoPtr scrn) - { - return xf86IsEntityShared(scrn->entityList[0]); - } - -+static bool -+sna_zaphod_match(struct sna *sna, const char *output) -+{ -+ const char *s, *colon; -+ char t[20]; -+ unsigned int i = 0; -+ -+ s = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); -+ if (s == NULL) -+ return false; -+ -+ colon = strchr(s, ':'); -+ if (colon) /* Skip over the ZaphodPipes */ -+ s = colon + 1; -+ -+ do { -+ /* match any outputs in a comma list, stopping at whitespace */ -+ switch (*s) { -+ case '\0': -+ t[i] = '\0'; -+ return strcmp(t, output) == 0; -+ -+ case ',': -+ t[i] ='\0'; -+ if (strcmp(t, output) == 0) -+ return TRUE; -+ i = 0; -+ break; -+ -+ case ' ': -+ case '\t': -+ case '\n': -+ case '\r': -+ break; -+ -+ default: -+ t[i++] = *s; -+ break; -+ } -+ -+ s++; -+ } while (i < sizeof(t)); -+ -+ return false; -+} -+ -+static unsigned -+get_zaphod_crtcs(struct sna *sna) -+{ -+ const char *str, *colon; -+ unsigned crtcs = 0; -+ -+ str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); -+ if (str == NULL || (colon = strchr(str, ':')) == NULL) { -+ DBG(("%s: no zaphod pipes, using screen number: %x\n", -+ __FUNCTION__, -+ sna->scrn->confScreen->device->screen)); -+ return 1 << sna->scrn->confScreen->device->screen; -+ } -+ -+ DBG(("%s: ZaphodHeads='%s'\n", __FUNCTION__, str)); -+ while (str < colon) { -+ char *end; -+ unsigned crtc = strtoul(str, &end, 0); -+ if (end == str) -+ break; -+ DBG(("%s: adding CRTC %d to zaphod pipes\n", -+ __FUNCTION__, crtc)); -+ crtcs |= 1 << crtc; -+ str = end + 1; -+ } -+ DBG(("%s: ZaphodPipes=%x\n", __FUNCTION__, crtcs)); -+ return crtcs; -+} -+ - inline static unsigned count_to_mask(int x) - { - return (1 << x) - 1; -@@ -247,6 +418,21 @@ static inline struct sna_crtc *to_sna_crtc(xf86CrtcPtr crtc) - return crtc->driver_private; - } - -+static inline unsigned __sna_crtc_pipe(struct sna_crtc *crtc) -+{ -+ return crtc->flags >> 8 & 0xff; -+} -+ -+static inline unsigned __sna_crtc_id(struct sna_crtc *crtc) -+{ -+ return crtc->id; -+} -+ -+uint32_t sna_crtc_id(xf86CrtcPtr crtc) -+{ -+ return __sna_crtc_id(to_sna_crtc(crtc)); -+} -+ - static inline bool event_pending(int fd) - { - struct pollfd pfd; -@@ -268,29 +454,37 @@ static inline uint32_t fb_id(struct kgem_bo *bo) - return bo->delta; - } - --uint32_t sna_crtc_id(xf86CrtcPtr crtc) -+unsigned sna_crtc_count_sprites(xf86CrtcPtr crtc) - { -- if (to_sna_crtc(crtc) == NULL) -- return 0; -- return to_sna_crtc(crtc)->id; --} -+ struct plane *sprite; -+ unsigned count; - --int sna_crtc_to_pipe(xf86CrtcPtr crtc) --{ -- assert(to_sna_crtc(crtc)); -- return to_sna_crtc(crtc)->pipe; -+ count = 0; -+ list_for_each_entry(sprite, &to_sna_crtc(crtc)->sprites, link) -+ count++; -+ -+ return count; - } - --uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc) -+static struct plane *lookup_sprite(struct sna_crtc *crtc, unsigned idx) - { -- assert(to_sna_crtc(crtc)); -- return to_sna_crtc(crtc)->sprite.id; -+ struct plane *sprite; -+ -+ list_for_each_entry(sprite, &crtc->sprites, link) -+ if (idx-- == 0) -+ return sprite; -+ -+ return NULL; - } - --bool sna_crtc_is_on(xf86CrtcPtr crtc) -+uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc, unsigned idx) - { -+ struct plane *sprite; -+ - assert(to_sna_crtc(crtc)); -- return to_sna_crtc(crtc)->bo != NULL; -+ -+ sprite = lookup_sprite(to_sna_crtc(crtc), idx); -+ return sprite ? sprite->id : 0; - } - - bool sna_crtc_is_transformed(xf86CrtcPtr crtc) -@@ -299,34 +493,48 @@ bool sna_crtc_is_transformed(xf86CrtcPtr crtc) - return to_sna_crtc(crtc)->transform; - } - --static inline uint64_t msc64(struct sna_crtc *sna_crtc, uint32_t seq) -+static inline bool msc64(struct sna_crtc *sna_crtc, uint32_t seq, uint64_t *msc) - { -+ bool record = true; - if (seq < sna_crtc->last_seq) { - if (sna_crtc->last_seq - seq > 0x40000000) { - sna_crtc->wrap_seq++; - DBG(("%s: pipe=%d wrapped; was %u, now %u, wraps=%u\n", -- __FUNCTION__, sna_crtc->pipe, -+ __FUNCTION__, __sna_crtc_pipe(sna_crtc), - sna_crtc->last_seq, seq, sna_crtc->wrap_seq)); -- } else { -- ERR(("%s: pipe=%d msc went backwards; was %u, now %u\n", -- __FUNCTION__, sna_crtc->pipe, sna_crtc->last_seq, seq)); -- seq = sna_crtc->last_seq; -+ } else { -+ DBG(("%s: pipe=%d msc went backwards; was %u, now %u; ignoring for last_swap\n", -+ __FUNCTION__, __sna_crtc_pipe(sna_crtc), sna_crtc->last_seq, seq)); -+ -+ record = false; - } - } -- sna_crtc->last_seq = seq; -- return (uint64_t)sna_crtc->wrap_seq << 32 | seq; -+ *msc = (uint64_t)sna_crtc->wrap_seq << 32 | seq; -+ return record; - } - - uint64_t sna_crtc_record_swap(xf86CrtcPtr crtc, - int tv_sec, int tv_usec, unsigned seq) - { - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -+ uint64_t msc; -+ - assert(sna_crtc); -- DBG(("%s: recording last swap on pipe=%d, frame %d, time %d.%06d\n", -- __FUNCTION__, sna_crtc->pipe, seq, tv_sec, tv_usec)); -- sna_crtc->swap.tv_sec = tv_sec; -- sna_crtc->swap.tv_usec = tv_usec; -- return sna_crtc->swap.msc = msc64(sna_crtc, seq); -+ -+ if (msc64(sna_crtc, seq, &msc)) { -+ DBG(("%s: recording last swap on pipe=%d, frame %d [msc=%08lld], time %d.%06d\n", -+ __FUNCTION__, __sna_crtc_pipe(sna_crtc), seq, (long long)msc, -+ tv_sec, tv_usec)); -+ sna_crtc->swap.tv_sec = tv_sec; -+ sna_crtc->swap.tv_usec = tv_usec; -+ sna_crtc->swap.msc = msc; -+ } else { -+ DBG(("%s: swap event on pipe=%d, frame %d [msc=%08lld], time %d.%06d\n", -+ __FUNCTION__, __sna_crtc_pipe(sna_crtc), seq, (long long)msc, -+ tv_sec, tv_usec)); -+ } -+ -+ return msc; - } - - const struct ust_msc *sna_crtc_last_swap(xf86CrtcPtr crtc) -@@ -342,15 +550,6 @@ const struct ust_msc *sna_crtc_last_swap(xf86CrtcPtr crtc) - } - } - --xf86CrtcPtr sna_mode_first_crtc(struct sna *sna) --{ -- xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -- if (sna->mode.num_real_crtc) -- return config->crtc[0]; -- else -- return NULL; --} -- - #ifndef NDEBUG - static void gem_close(int fd, uint32_t handle); - static void assert_scanout(struct kgem *kgem, struct kgem_bo *bo, -@@ -372,12 +571,24 @@ static void assert_scanout(struct kgem *kgem, struct kgem_bo *bo, - #define assert_scanout(k, b, w, h) - #endif - -+static void assert_crtc_fb(struct sna *sna, struct sna_crtc *crtc) -+{ -+#ifndef NDEBUG -+ struct drm_mode_crtc mode = { .crtc_id = __sna_crtc_id(crtc) }; -+ drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode); -+ assert(mode.fb_id == fb_id(crtc->bo)); -+#endif -+} -+ - static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, - int width, int height) - { - ScrnInfoPtr scrn = sna->scrn; - struct drm_mode_fb_cmd arg; - -+ if (!kgem_bo_is_fenced(&sna->kgem, bo)) -+ return 0; -+ - assert(bo->refcnt); - assert(bo->proxy == NULL); - assert(!bo->snoop); -@@ -393,8 +604,9 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, - DBG(("%s: create fb %dx%d@%d/%d\n", - __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel)); - -- assert(bo->tiling != I915_TILING_Y); -+ assert(bo->tiling != I915_TILING_Y || sna->kgem.can_scanout_y); - assert((bo->pitch & 63) == 0); -+ assert(scrn->vtSema); /* must be master */ - - VG_CLEAR(arg); - arg.width = width; -@@ -404,21 +616,83 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, - arg.depth = scrn->depth; - arg.handle = bo->handle; - -- assert(sna->scrn->vtSema); /* must be master */ - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_ADDFB, &arg)) { -- xf86DrvMsg(scrn->scrnIndex, X_ERROR, -- "%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d: %d\n", -- __FUNCTION__, width, height, -- scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); -- return 0; -+ /* Try again with the fancy version */ -+ struct local_mode_fb_cmd2 { -+ uint32_t fb_id; -+ uint32_t width, height; -+ uint32_t pixel_format; -+ uint32_t flags; -+ -+ uint32_t handles[4]; -+ uint32_t pitches[4]; /* pitch for each plane */ -+ uint32_t offsets[4]; /* offset of each plane */ -+ uint64_t modifiers[4]; -+ } f; -+#define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2) -+ memset(&f, 0, sizeof(f)); -+ f.width = width; -+ f.height = height; -+ /* XXX interlaced */ -+ f.flags = 1 << 1; /* +modifiers */ -+ f.handles[0] = bo->handle; -+ f.pitches[0] = bo->pitch; -+ -+ switch (bo->tiling) { -+ case I915_TILING_NONE: -+ break; -+ case I915_TILING_X: -+ /* I915_FORMAT_MOD_X_TILED */ -+ f.modifiers[0] = (uint64_t)1 << 56 | 1; -+ break; -+ case I915_TILING_Y: -+ /* I915_FORMAT_MOD_X_TILED */ -+ f.modifiers[0] = (uint64_t)1 << 56 | 2; -+ break; -+ } -+ -+#define fourcc(a,b,c,d) ((a) | (b) << 8 | (c) << 16 | (d) << 24) -+ switch (scrn->depth) { -+ default: -+ ERR(("%s: unhandled screen format, depth=%d\n", -+ __FUNCTION__, scrn->depth)); -+ goto fail; -+ case 8: -+ f.pixel_format = fourcc('C', '8', ' ', ' '); -+ break; -+ case 15: -+ f.pixel_format = fourcc('X', 'R', '1', '5'); -+ break; -+ case 16: -+ f.pixel_format = fourcc('R', 'G', '1', '6'); -+ break; -+ case 24: -+ f.pixel_format = fourcc('X', 'R', '2', '4'); -+ break; -+ case 30: -+ f.pixel_format = fourcc('X', 'R', '3', '0'); -+ break; -+ } -+#undef fourcc -+ -+ if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_ADDFB2, &f)) { -+fail: -+ xf86DrvMsg(scrn->scrnIndex, X_ERROR, -+ "%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d: %d\n", -+ __FUNCTION__, width, height, -+ scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); -+ return 0; -+ } -+ -+ arg.fb_id = f.fb_id; - } - assert(arg.fb_id != 0); -- -+ bo->delta = arg.fb_id; - DBG(("%s: attached fb=%d to handle=%d\n", -- __FUNCTION__, arg.fb_id, arg.handle)); -+ __FUNCTION__, bo->delta, arg.handle)); - - bo->scanout = true; -- return bo->delta = arg.fb_id; -+ return bo->delta; - } - - static uint32_t gem_create(int fd, int size) -@@ -438,6 +712,7 @@ static uint32_t gem_create(int fd, int size) - static void *gem_mmap(int fd, int handle, int size) - { - struct drm_i915_gem_mmap_gtt mmap_arg; -+ struct drm_i915_gem_set_domain set_domain; - void *ptr; - - VG_CLEAR(mmap_arg); -@@ -449,6 +724,15 @@ static void *gem_mmap(int fd, int handle, int size) - if (ptr == MAP_FAILED) - return NULL; - -+ VG_CLEAR(set_domain); -+ set_domain.handle = handle; -+ set_domain.read_domains = I915_GEM_DOMAIN_GTT; -+ set_domain.write_domain = I915_GEM_DOMAIN_GTT; -+ if (drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { -+ munmap(ptr, size); -+ return NULL; -+ } -+ - return ptr; - } - -@@ -497,8 +781,6 @@ sna_backlight_uevent(int fd, void *closure) - if (sna_output->dpms_mode != DPMSModeOn) - continue; - -- assert(output->randr_output); -- - val = backlight_get(&sna_output->backlight); - if (val < 0) - continue; -@@ -523,6 +805,7 @@ sna_backlight_uevent(int fd, void *closure) - TRUE, FALSE); - } - } -+ DBG(("%s: complete\n", __FUNCTION__)); - } - - static void sna_backlight_pre_init(struct sna *sna) -@@ -570,6 +853,7 @@ static void sna_backlight_drain_uevents(struct sna *sna) - if (sna->mode.backlight_monitor == NULL) - return; - -+ DBG(("%s()\n", __FUNCTION__)); - sna_backlight_uevent(udev_monitor_get_fd(sna->mode.backlight_monitor), - sna); - } -@@ -632,9 +916,22 @@ sna_output_backlight_set(struct sna_output *sna_output, int level) - return ret; - } - -+static bool -+has_native_backlight(struct sna_output *sna_output) -+{ -+ return sna_output->backlight.type == BL_RAW; -+} -+ - static void - sna_output_backlight_off(struct sna_output *sna_output) - { -+ /* Trust the kernel to turn the native backlight off. However, we -+ * do explicitly turn the backlight back on (when we wake the output) -+ * just in case a third party turns it off! -+ */ -+ if (has_native_backlight(sna_output)) -+ return; -+ - DBG(("%s(%s)\n", __FUNCTION__, sna_output->base->name)); - backlight_off(&sna_output->backlight); - sna_output_backlight_set(sna_output, 0); -@@ -674,7 +971,7 @@ has_user_backlight_override(xf86OutputPtr output) - if (*str == '\0') - return (char *)str; - -- if (backlight_exists(str) == BL_NONE) { -+ if (!backlight_exists(str)) { - xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, - "Unrecognised backlight control interface '%s'\n", - str); -@@ -684,6 +981,93 @@ has_user_backlight_override(xf86OutputPtr output) - return strdup(str); - } - -+static int get_device_minor(int fd) -+{ -+ struct stat st; -+ -+ if (fstat(fd, &st) || !S_ISCHR(st.st_mode)) -+ return -1; -+ -+ return st.st_rdev & 0x63; -+} -+ -+static const char * const sysfs_connector_types[] = { -+ /* DRM_MODE_CONNECTOR_Unknown */ "Unknown", -+ /* DRM_MODE_CONNECTOR_VGA */ "VGA", -+ /* DRM_MODE_CONNECTOR_DVII */ "DVI-I", -+ /* DRM_MODE_CONNECTOR_DVID */ "DVI-D", -+ /* DRM_MODE_CONNECTOR_DVIA */ "DVI-A", -+ /* DRM_MODE_CONNECTOR_Composite */ "Composite", -+ /* DRM_MODE_CONNECTOR_SVIDEO */ "SVIDEO", -+ /* DRM_MODE_CONNECTOR_LVDS */ "LVDS", -+ /* DRM_MODE_CONNECTOR_Component */ "Component", -+ /* DRM_MODE_CONNECTOR_9PinDIN */ "DIN", -+ /* DRM_MODE_CONNECTOR_DisplayPort */ "DP", -+ /* DRM_MODE_CONNECTOR_HDMIA */ "HDMI-A", -+ /* DRM_MODE_CONNECTOR_HDMIB */ "HDMI-B", -+ /* DRM_MODE_CONNECTOR_TV */ "TV", -+ /* DRM_MODE_CONNECTOR_eDP */ "eDP", -+ /* DRM_MODE_CONNECTOR_VIRTUAL */ "Virtual", -+ /* DRM_MODE_CONNECTOR_DSI */ "DSI", -+ /* DRM_MODE_CONNECTOR_DPI */ "DPI" -+}; -+ -+static char *has_connector_backlight(xf86OutputPtr output) -+{ -+ struct sna_output *sna_output = output->driver_private; -+ struct sna *sna = to_sna(output->scrn); -+ char path[1024]; -+ DIR *dir; -+ struct dirent *de; -+ int minor, len; -+ char *str = NULL; -+ -+ if (sna_output->connector_type >= ARRAY_SIZE(sysfs_connector_types)) -+ return NULL; -+ -+ minor = get_device_minor(sna->kgem.fd); -+ if (minor < 0) -+ return NULL; -+ -+ len = snprintf(path, sizeof(path), -+ "/sys/class/drm/card%d-%s-%d", -+ minor, -+ sysfs_connector_types[sna_output->connector_type], -+ sna_output->connector_type_id); -+ DBG(("%s: lookup %s\n", __FUNCTION__, path)); -+ -+ dir = opendir(path); -+ if (dir == NULL) -+ return NULL; -+ -+ while ((de = readdir(dir))) { -+ struct stat st; -+ -+ if (*de->d_name == '.') -+ continue; -+ -+ snprintf(path + len, sizeof(path) - len, -+ "/%s", de->d_name); -+ -+ if (stat(path, &st)) -+ continue; -+ -+ if (!S_ISDIR(st.st_mode)) -+ continue; -+ -+ DBG(("%s: testing %s as backlight\n", -+ __FUNCTION__, de->d_name)); -+ -+ if (backlight_exists(de->d_name)) { -+ str = strdup(de->d_name); /* leak! */ -+ break; -+ } -+ } -+ -+ closedir(dir); -+ return str; -+} -+ - static void - sna_output_backlight_init(xf86OutputPtr output) - { -@@ -696,11 +1080,20 @@ sna_output_backlight_init(xf86OutputPtr output) - return; - #endif - -- from = X_CONFIG; -- best_iface = has_user_backlight_override(output); -+ if (sna_output->is_panel) { -+ from = X_CONFIG; -+ best_iface = has_user_backlight_override(output); -+ if (best_iface) -+ goto done; -+ } -+ -+ best_iface = has_connector_backlight(output); - if (best_iface) - goto done; - -+ if (!sna_output->is_panel) -+ return; -+ - /* XXX detect right backlight for multi-GPU/panels */ - from = X_PROBED; - pci = xf86GetPciInfoForEntity(to_sna(output->scrn)->pEnt->index); -@@ -728,6 +1121,38 @@ done: - sna_output->backlight.iface, best_iface, output->name); - } - -+#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(22, 0) -+static inline int sigio_block(void) -+{ -+ return 0; -+} -+static inline void sigio_unblock(int was_blocked) -+{ -+ (void)was_blocked; -+} -+#elif XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) -+static inline int sigio_block(void) -+{ -+ OsBlockSIGIO(); -+ return 0; -+} -+static inline void sigio_unblock(int was_blocked) -+{ -+ OsReleaseSIGIO(); -+ (void)was_blocked; -+} -+#else -+#include -+static inline int sigio_block(void) -+{ -+ return xf86BlockSIGIO(); -+} -+static inline void sigio_unblock(int was_blocked) -+{ -+ xf86UnblockSIGIO(was_blocked); -+} -+#endif -+ - static char *canonical_kmode_name(const struct drm_mode_modeinfo *kmode) - { - char tmp[32], *buf; -@@ -781,6 +1206,7 @@ mode_from_kmode(ScrnInfoPtr scrn, - mode->VTotal = kmode->vtotal; - mode->VScan = kmode->vscan; - -+ mode->VRefresh = kmode->vrefresh; - mode->Flags = kmode->flags; - mode->name = get_kmode_name(kmode); - -@@ -814,6 +1240,7 @@ mode_to_kmode(struct drm_mode_modeinfo *kmode, DisplayModePtr mode) - kmode->vtotal = mode->VTotal; - kmode->vscan = mode->VScan; - -+ kmode->vrefresh = mode->VRefresh; - kmode->flags = mode->Flags; - if (mode->name) - strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN); -@@ -824,11 +1251,12 @@ static void - sna_crtc_force_outputs_on(xf86CrtcPtr crtc) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); -+ /* All attached outputs are valid, so update our timestamps */ -+ unsigned now = GetTimeInMillis(); - int i; - - assert(to_sna_crtc(crtc)); -- DBG(("%s(pipe=%d), currently? %d\n", __FUNCTION__, -- to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->dpms_mode)); -+ DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc))); - - /* DPMS handling by the kernel is inconsistent, so after setting a - * mode on an output presume that we intend for it to be on, or that -@@ -843,10 +1271,11 @@ sna_crtc_force_outputs_on(xf86CrtcPtr crtc) - if (output->crtc != crtc) - continue; - -- output->funcs->dpms(output, DPMSModeOn); -+ __sna_output_dpms(output, DPMSModeOn, false); -+ if (to_sna_output(output)->last_detect) -+ to_sna_output(output)->last_detect = now; - } - -- to_sna_crtc(crtc)->dpms_mode = DPMSModeOn; - #if XF86_CRTC_VERSION >= 3 - crtc->active = TRUE; - #endif -@@ -859,8 +1288,7 @@ sna_crtc_force_outputs_off(xf86CrtcPtr crtc) - int i; - - assert(to_sna_crtc(crtc)); -- DBG(("%s(pipe=%d), currently? %d\n", __FUNCTION__, -- to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->dpms_mode)); -+ DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc))); - - /* DPMS handling by the kernel is inconsistent, so after setting a - * mode on an output presume that we intend for it to be on, or that -@@ -875,35 +1303,47 @@ sna_crtc_force_outputs_off(xf86CrtcPtr crtc) - if (output->crtc != crtc) - continue; - -- output->funcs->dpms(output, DPMSModeOff); -+ __sna_output_dpms(output, DPMSModeOff, false); - } -- -- to_sna_crtc(crtc)->dpms_mode = DPMSModeOff; - } - - static unsigned --rotation_reduce(struct plane *p, unsigned rotation) -+rotation_reflect(unsigned rotation) - { -- unsigned unsupported_rotations = rotation & ~p->rotation.supported; -+ unsigned other_bits; - -- if (unsupported_rotations == 0) -- return rotation; -+ /* paranoia for future extensions */ -+ other_bits = rotation & ~RR_Rotate_All; - --#define RR_Reflect_XY (RR_Reflect_X | RR_Reflect_Y) -+ /* flip the reflection to compensate for reflecting the rotation */ -+ other_bits ^= RR_Reflect_X | RR_Reflect_Y; - -- if ((unsupported_rotations & RR_Reflect_XY) == RR_Reflect_XY && -- p->rotation.supported& RR_Rotate_180) { -- rotation &= ~RR_Reflect_XY; -- rotation ^= RR_Rotate_180; -- } -+ /* Reflect the screen by rotating the rotation bit, -+ * which has to have at least RR_Rotate_0 set. This allows -+ * us to reflect any of the rotation bits, not just 0. -+ */ -+ rotation &= RR_Rotate_All; -+ assert(rotation); -+ rotation <<= 2; /* RR_Rotate_0 -> RR_Rotate_180 etc */ -+ rotation |= rotation >> 4; /* RR_Rotate_270' to RR_Rotate_90 */ -+ rotation &= RR_Rotate_All; -+ assert(rotation); - -- if ((unsupported_rotations & RR_Rotate_180) && -- (p->rotation.supported& RR_Reflect_XY) == RR_Reflect_XY) { -- rotation ^= RR_Reflect_XY; -- rotation &= ~RR_Rotate_180; -+ return rotation | other_bits; -+} -+ -+static unsigned -+rotation_reduce(struct plane *p, unsigned rotation) -+{ -+ /* If unsupported try exchanging rotation for a reflection */ -+ if (rotation & ~p->rotation.supported) { -+ unsigned new_rotation = rotation_reflect(rotation); -+ if ((new_rotation & p->rotation.supported) == new_rotation) -+ rotation = new_rotation; - } - --#undef RR_Reflect_XY -+ /* Only one rotation bit should be set */ -+ assert(is_power_of_two(rotation & RR_Rotate_All)); - - return rotation; - } -@@ -923,7 +1363,7 @@ rotation_set(struct sna *sna, struct plane *p, uint32_t desired) - if (desired == p->rotation.current) - return true; - -- if ((desired & p->rotation.supported) == 0) { -+ if ((desired & p->rotation.supported) != desired) { - errno = EINVAL; - return false; - } -@@ -956,20 +1396,105 @@ rotation_reset(struct plane *p) - p->rotation.current = 0; - } - --bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, uint32_t rotation) -+bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, -+ unsigned idx, -+ uint32_t rotation) - { -+ struct plane *sprite; - assert(to_sna_crtc(crtc)); -+ -+ sprite = lookup_sprite(to_sna_crtc(crtc), idx); -+ if (!sprite) -+ return false; -+ - DBG(("%s: CRTC:%d [pipe=%d], sprite=%u set-rotation=%x\n", - __FUNCTION__, -- to_sna_crtc(crtc)->id, to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->sprite.id, -- rotation)); -+ sna_crtc_id(crtc), sna_crtc_pipe(crtc), -+ sprite->id, rotation)); - -- return rotation_set(to_sna(crtc->scrn), -- &to_sna_crtc(crtc)->sprite, -- rotation_reduce(&to_sna_crtc(crtc)->sprite, rotation)); -+ return rotation_set(to_sna(crtc->scrn), sprite, -+ rotation_reduce(sprite, rotation)); - } - --static bool -+#if HAS_DEBUG_FULL -+#if !HAS_DEBUG_FULL -+#define LogF ErrorF -+#endif -+struct kmsg { -+ int fd; -+ int saved_loglevel; -+}; -+ -+static int kmsg_get_debug(void) -+{ -+ FILE *file; -+ int v = -1; -+ -+ file = fopen("/sys/module/drm/parameters/debug", "r"); -+ if (file) { -+ fscanf(file, "%d", &v); -+ fclose(file); -+ } -+ -+ return v; -+} -+ -+static void kmsg_set_debug(int v) -+{ -+ FILE *file; -+ -+ file = fopen("/sys/module/drm/parameters/debug", "w"); -+ if (file) { -+ fprintf(file, "%d\n", v); -+ fclose(file); -+ } -+} -+ -+static void kmsg_open(struct kmsg *k) -+{ -+ k->saved_loglevel = kmsg_get_debug(); -+ if (k->saved_loglevel != -1) -+ kmsg_set_debug(0xff); -+ -+ k->fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK); -+ if (k->fd != -1) -+ lseek(k->fd, 0, SEEK_END); -+} -+ -+static void kmsg_close(struct kmsg *k, int dump) -+{ -+ FILE *file; -+ -+ file = NULL; -+ if (k->fd != -1 && dump) -+ file = fdopen(k->fd, "r"); -+ if (file) { -+ size_t len = 0; -+ char *line = NULL; -+ -+ while (getline(&line, &len, file) != -1) { -+ char *start = strchr(line, ';'); -+ if (start) -+ LogF("KMSG: %s", start + 1); -+ } -+ -+ free(line); -+ fclose(file); -+ } -+ -+ if (k->fd != -1) -+ close(k->fd); -+ -+ if (k->saved_loglevel != -1) -+ kmsg_set_debug(k->saved_loglevel); -+} -+#else -+struct kmsg { int unused; }; -+static void kmsg_open(struct kmsg *k) {} -+static void kmsg_close(struct kmsg *k, int dump) {} -+#endif -+ -+static int - sna_crtc_apply(xf86CrtcPtr crtc) - { - struct sna *sna = to_sna(crtc->scrn); -@@ -978,26 +1503,39 @@ sna_crtc_apply(xf86CrtcPtr crtc) - struct drm_mode_crtc arg; - uint32_t output_ids[32]; - int output_count = 0; -- int i; -+ int sigio, i; -+ struct kmsg kmsg; -+ int ret = EINVAL; - -- DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->bo->handle)); -+ DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, -+ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), -+ sna_crtc->bo->handle)); - if (!sna_crtc->kmode.clock) { - ERR(("%s(CRTC:%d [pipe=%d]): attempted to set an invalid mode\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe)); -- return false; -+ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc))); -+ return EINVAL; - } - -+ kmsg_open(&kmsg); -+ sigio = sigio_block(); -+ - assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids)); - sna_crtc_disable_cursor(sna, sna_crtc); - - if (!rotation_set(sna, &sna_crtc->primary, sna_crtc->rotation)) { -+ memset(&arg, 0, sizeof(arg)); -+ arg.crtc_id = __sna_crtc_id(sna_crtc); -+ (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg); -+ } -+ -+ if (!rotation_set(sna, &sna_crtc->primary, sna_crtc->rotation)) { - ERR(("%s: set-primary-rotation failed (rotation-id=%d, rotation=%d) on CRTC:%d [pipe=%d], errno=%d\n", -- __FUNCTION__, sna_crtc->primary.rotation.prop, sna_crtc->rotation, sna_crtc->id, sna_crtc->pipe, errno)); -+ __FUNCTION__, sna_crtc->primary.rotation.prop, sna_crtc->rotation, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), errno)); - sna_crtc->primary.rotation.supported &= ~sna_crtc->rotation; -- return false; -+ goto unblock; - } - DBG(("%s: CRTC:%d [pipe=%d] primary rotation set to %x\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->rotation)); -+ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), sna_crtc->rotation)); - - for (i = 0; i < sna->mode.num_real_output; i++) { - xf86OutputPtr output = config->output[i]; -@@ -1008,7 +1546,7 @@ sna_crtc_apply(xf86CrtcPtr crtc) - * and we lose track of the user settings. - */ - if (output->crtc == NULL) -- output->funcs->dpms(output, DPMSModeOff); -+ __sna_output_dpms(output, DPMSModeOff, false); - - if (output->crtc != crtc) - continue; -@@ -1022,29 +1560,27 @@ sna_crtc_apply(xf86CrtcPtr crtc) - - DBG(("%s: attaching output '%s' %d [%d] to crtc:%d (pipe %d) (possible crtc:%x, possible clones:%x)\n", - __FUNCTION__, output->name, i, to_connector_id(output), -- sna_crtc->id, sna_crtc->pipe, -+ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), - (uint32_t)output->possible_crtcs, - (uint32_t)output->possible_clones)); - -- assert(output->possible_crtcs & (1 << sna_crtc->pipe) || -+ assert(output->possible_crtcs & (1 << __sna_crtc_pipe(sna_crtc)) || - is_zaphod(crtc->scrn)); - - output_ids[output_count] = to_connector_id(output); - if (++output_count == ARRAY_SIZE(output_ids)) { - DBG(("%s: too many outputs (%d) for me!\n", - __FUNCTION__, output_count)); -- errno = EINVAL; -- return false; -+ goto unblock; - } - } - if (output_count == 0) { - DBG(("%s: no outputs\n", __FUNCTION__)); -- errno = EINVAL; -- return false; -+ goto unblock; - } - - VG_CLEAR(arg); -- arg.crtc_id = sna_crtc->id; -+ arg.crtc_id = __sna_crtc_id(sna_crtc); - arg.fb_id = fb_id(sna_crtc->bo); - if (sna_crtc->transform || sna_crtc->slave_pixmap) { - arg.x = 0; -@@ -1061,7 +1597,7 @@ sna_crtc_apply(xf86CrtcPtr crtc) - arg.mode_valid = 1; - - DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d%s%s update to %d outputs [%d...]\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, -+ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), - arg.mode.hdisplay, - arg.mode.vdisplay, - arg.x, arg.y, -@@ -1071,12 +1607,19 @@ sna_crtc_apply(xf86CrtcPtr crtc) - sna_crtc->transform ? " [transformed]" : "", - output_count, output_count ? output_ids[0] : 0)); - -- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) -- return false; -+ ret = 0; -+ if (unlikely(drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg))) { -+ ret = errno; -+ goto unblock; -+ } - - sna_crtc->mode_serial++; - sna_crtc_force_outputs_on(crtc); -- return true; -+ -+unblock: -+ sigio_unblock(sigio); -+ kmsg_close(&kmsg, ret); -+ return ret; - } - - static bool overlap(const BoxRec *a, const BoxRec *b) -@@ -1094,26 +1637,73 @@ static bool overlap(const BoxRec *a, const BoxRec *b) - return true; - } - -+static void defer_event(struct sna *sna, struct drm_event *base) -+{ -+ if (sna->mode.shadow_nevent == sna->mode.shadow_size) { -+ int size = sna->mode.shadow_size * 2; -+ void *ptr; -+ -+ ptr = realloc(sna->mode.shadow_events, -+ sizeof(struct drm_event_vblank)*size); -+ if (!ptr) -+ return; -+ -+ sna->mode.shadow_events = ptr; -+ sna->mode.shadow_size = size; -+ } -+ -+ memcpy(&sna->mode.shadow_events[sna->mode.shadow_nevent++], -+ base, sizeof(struct drm_event_vblank)); -+ DBG(("%s: deferring event count=%d\n", -+ __func__, sna->mode.shadow_nevent)); -+} -+ -+static void flush_events(struct sna *sna) -+{ -+ int n; -+ -+ if (!sna->mode.shadow_nevent) -+ return; -+ -+ DBG(("%s: flushing %d events=%d\n", __func__, sna->mode.shadow_nevent)); -+ -+ for (n = 0; n < sna->mode.shadow_nevent; n++) { -+ struct drm_event_vblank *vb = &sna->mode.shadow_events[n]; -+ -+ if ((uintptr_t)(vb->user_data) & 2) -+ sna_present_vblank_handler(vb); -+ else -+ sna_dri2_vblank_handler(vb); -+ } -+ -+ sna->mode.shadow_nevent = 0; -+} -+ -+ - static bool wait_for_shadow(struct sna *sna, - struct sna_pixmap *priv, - unsigned flags) - { - PixmapPtr pixmap = priv->pixmap; -- DamagePtr damage; - struct kgem_bo *bo, *tmp; - int flip_active; - bool ret = true; - -- DBG(("%s: flags=%x, flips=%d, handle=%d, shadow=%d\n", -- __FUNCTION__, flags, sna->mode.flip_active, -+ DBG(("%s: enabled? %d waiting? %d, flags=%x, flips=%d, pixmap=%ld [front?=%d], handle=%d, shadow=%d\n", -+ __FUNCTION__, sna->mode.shadow_enabled, sna->mode.shadow_wait, -+ flags, sna->mode.flip_active, -+ pixmap->drawable.serialNumber, pixmap == sna->front, - priv->gpu_bo->handle, sna->mode.shadow->handle)); - - assert(priv->move_to_gpu_data == sna); - assert(sna->mode.shadow != priv->gpu_bo); - -- if (flags == 0 || pixmap != sna->front || !sna->mode.shadow_damage) -+ if (flags == 0 || pixmap != sna->front || !sna->mode.shadow_enabled) - goto done; - -+ assert(sna->mode.shadow_damage); -+ assert(!sna->mode.shadow_wait); -+ - if ((flags & MOVE_WRITE) == 0) { - if ((flags & __MOVE_SCANOUT) == 0) { - struct sna_crtc *crtc; -@@ -1154,9 +1744,7 @@ static bool wait_for_shadow(struct sna *sna, - } - - assert(sna->mode.shadow_active); -- -- damage = sna->mode.shadow_damage; -- sna->mode.shadow_damage = NULL; -+ sna->mode.shadow_wait = true; - - flip_active = sna->mode.flip_active; - if (flip_active) { -@@ -1208,6 +1796,8 @@ static bool wait_for_shadow(struct sna *sna, - bo = sna->mode.shadow; - } - } -+ assert(sna->mode.shadow_wait); -+ sna->mode.shadow_wait = false; - - if (bo->refcnt > 1) { - bo = kgem_create_2d(&sna->kgem, -@@ -1230,8 +1820,6 @@ static bool wait_for_shadow(struct sna *sna, - bo = sna->mode.shadow; - } - -- sna->mode.shadow_damage = damage; -- - RegionSubtract(&sna->mode.shadow_region, - &sna->mode.shadow_region, - &sna->mode.shadow_cancel); -@@ -1269,6 +1857,7 @@ static bool wait_for_shadow(struct sna *sna, - RegionSubtract(&sna->mode.shadow_region, &sna->mode.shadow_region, ®ion); - } - -+ crtc->client_bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, crtc->client_bo); - crtc->client_bo = NULL; - list_del(&crtc->shadow_link); -@@ -1281,12 +1870,13 @@ static bool wait_for_shadow(struct sna *sna, - sna->mode.shadow_region.extents.y1, - sna->mode.shadow_region.extents.x2, - sna->mode.shadow_region.extents.y2)); -- ret = sna->render.copy_boxes(sna, GXcopy, -- &pixmap->drawable, priv->gpu_bo, 0, 0, -- &pixmap->drawable, bo, 0, 0, -- region_rects(&sna->mode.shadow_region), -- region_num_rects(&sna->mode.shadow_region), -- 0); -+ if (!sna->render.copy_boxes(sna, GXcopy, -+ &pixmap->drawable, priv->gpu_bo, 0, 0, -+ &pixmap->drawable, bo, 0, 0, -+ region_rects(&sna->mode.shadow_region), -+ region_num_rects(&sna->mode.shadow_region), -+ 0)) -+ ERR(("%s: copy failed\n", __FUNCTION__)); - } - - if (priv->cow) -@@ -1295,11 +1885,13 @@ static bool wait_for_shadow(struct sna *sna, - sna_pixmap_unmap(pixmap, priv); - - DBG(("%s: setting front pixmap to handle=%d\n", __FUNCTION__, bo->handle)); -+ sna->mode.shadow->active_scanout--; - tmp = priv->gpu_bo; - priv->gpu_bo = bo; - if (bo != sna->mode.shadow) - kgem_bo_destroy(&sna->kgem, sna->mode.shadow); - sna->mode.shadow = tmp; -+ sna->mode.shadow->active_scanout++; - - sna_dri2_pixmap_update_bo(sna, pixmap, bo); - -@@ -1311,6 +1903,9 @@ done: - priv->move_to_gpu_data = NULL; - priv->move_to_gpu = NULL; - -+ assert(!sna->mode.shadow_wait); -+ flush_events(sna); -+ - return ret; - } - -@@ -1358,22 +1953,43 @@ bool sna_pixmap_discard_shadow_damage(struct sna_pixmap *priv, - return RegionNil(&sna->mode.shadow_region); - } - -+static void sna_mode_damage(DamagePtr damage, RegionPtr region, void *closure) -+{ -+ struct sna *sna = closure; -+ -+ if (sna->mode.rr_active) -+ return; -+ -+ /* Throw away the rectangles if the region grows too big */ -+ region = DamageRegion(damage); -+ if (region->data) { -+ RegionRec dup; -+ -+ dup = *region; -+ RegionUninit(&dup); -+ -+ region->data = NULL; -+ } -+} -+ - static bool sna_mode_enable_shadow(struct sna *sna) - { -- ScreenPtr screen = sna->scrn->pScreen; -+ ScreenPtr screen = to_screen_from_sna(sna); - - DBG(("%s\n", __FUNCTION__)); - assert(sna->mode.shadow == NULL); - assert(sna->mode.shadow_damage == NULL); - assert(sna->mode.shadow_active == 0); -+ assert(!sna->mode.shadow_enabled); - -- sna->mode.shadow_damage = DamageCreate(NULL, NULL, -- DamageReportNone, TRUE, -- screen, screen); -+ sna->mode.shadow_damage = DamageCreate(sna_mode_damage, NULL, -+ DamageReportRawRegion, -+ TRUE, screen, sna); - if (!sna->mode.shadow_damage) - return false; - - DamageRegister(&sna->front->drawable, sna->mode.shadow_damage); -+ sna->mode.shadow_enabled = true; - return true; - } - -@@ -1381,8 +1997,10 @@ static void sna_mode_disable_shadow(struct sna *sna) - { - struct sna_pixmap *priv; - -- if (!sna->mode.shadow_damage) -+ if (!sna->mode.shadow_damage) { -+ assert(!sna->mode.shadow_enabled); - return; -+ } - - DBG(("%s\n", __FUNCTION__)); - -@@ -1393,8 +2011,10 @@ static void sna_mode_disable_shadow(struct sna *sna) - DamageUnregister(&sna->front->drawable, sna->mode.shadow_damage); - DamageDestroy(sna->mode.shadow_damage); - sna->mode.shadow_damage = NULL; -+ sna->mode.shadow_enabled = false; - - if (sna->mode.shadow) { -+ sna->mode.shadow->active_scanout--; - kgem_bo_destroy(&sna->kgem, sna->mode.shadow); - sna->mode.shadow = NULL; - } -@@ -1413,7 +2033,7 @@ static void sna_crtc_slave_damage(DamagePtr damage, RegionPtr region, void *clos - __FUNCTION__, - region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, - region_num_rects(region), -- crtc->pipe, crtc->base->x, crtc->base->y)); -+ __sna_crtc_pipe(crtc), crtc->base->x, crtc->base->y)); - - assert(crtc->slave_damage == damage); - assert(sna->mode.shadow_damage); -@@ -1431,7 +2051,7 @@ static bool sna_crtc_enable_shadow(struct sna *sna, struct sna_crtc *crtc) - return true; - } - -- DBG(("%s: enabling for crtc %d\n", __FUNCTION__, crtc->id)); -+ DBG(("%s: enabling for crtc %d\n", __FUNCTION__, __sna_crtc_id(crtc))); - - if (!sna->mode.shadow_active) { - if (!sna_mode_enable_shadow(sna)) -@@ -1443,9 +2063,12 @@ static bool sna_crtc_enable_shadow(struct sna *sna, struct sna_crtc *crtc) - if (crtc->slave_pixmap) { - assert(crtc->slave_damage == NULL); - -+ DBG(("%s: enabling PRIME slave tracking on CRTC %d [pipe=%d], pixmap=%ld\n", -+ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->slave_pixmap->drawable.serialNumber)); - crtc->slave_damage = DamageCreate(sna_crtc_slave_damage, NULL, - DamageReportRawRegion, TRUE, -- sna->scrn->pScreen, crtc); -+ to_screen_from_sna(sna), -+ crtc); - if (crtc->slave_damage == NULL) { - if (!--sna->mode.shadow_active) - sna_mode_disable_shadow(sna); -@@ -1465,6 +2088,9 @@ static void sna_crtc_disable_override(struct sna *sna, struct sna_crtc *crtc) - if (crtc->client_bo == NULL) - return; - -+ assert(crtc->client_bo->refcnt >= crtc->client_bo->active_scanout); -+ crtc->client_bo->active_scanout--; -+ - if (!crtc->transform) { - DrawableRec tmp; - -@@ -1489,7 +2115,7 @@ static void sna_crtc_disable_shadow(struct sna *sna, struct sna_crtc *crtc) - if (!crtc->shadow) - return; - -- DBG(("%s: disabling for crtc %d\n", __FUNCTION__, crtc->id)); -+ DBG(("%s: disabling for crtc %d\n", __FUNCTION__, __sna_crtc_id(crtc))); - assert(sna->mode.shadow_active > 0); - - if (crtc->slave_damage) { -@@ -1517,14 +2143,24 @@ __sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc) - sna_crtc_disable_shadow(sna, sna_crtc); - - if (sna_crtc->bo) { -+ DBG(("%s: releasing handle=%d from scanout, active=%d\n", -+ __FUNCTION__,sna_crtc->bo->handle, sna_crtc->bo->active_scanout-1)); -+ assert(sna_crtc->flags & CRTC_ON); - assert(sna_crtc->bo->active_scanout); - assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); - sna_crtc->bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, sna_crtc->bo); - sna_crtc->bo = NULL; -+ sna_crtc->flags &= ~CRTC_ON; - -- assert(sna->mode.front_active); -- sna->mode.front_active--; -+ if (sna->mode.hidden) { -+ sna->mode.hidden--; -+ assert(sna->mode.hidden); -+ assert(sna->mode.front_active == 0); -+ } else { -+ assert(sna->mode.front_active); -+ sna->mode.front_active--; -+ } - sna->mode.dirty = true; - } - -@@ -1532,13 +2168,19 @@ __sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc) - kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo); - sna_crtc->shadow_bo = NULL; - } -- sna_crtc->transform = false; -+ if (sna_crtc->transform) { -+ assert(sna->mode.rr_active); -+ sna->mode.rr_active--; -+ sna_crtc->transform = false; -+ } - -+ sna_crtc->cursor_transform = false; -+ sna_crtc->hwcursor = true; - assert(!sna_crtc->shadow); - } - - static void --sna_crtc_disable(xf86CrtcPtr crtc) -+sna_crtc_disable(xf86CrtcPtr crtc, bool force) - { - struct sna *sna = to_sna(crtc->scrn); - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -@@ -1547,14 +2189,16 @@ sna_crtc_disable(xf86CrtcPtr crtc) - if (sna_crtc == NULL) - return; - -- DBG(("%s: disabling crtc [%d, pipe=%d]\n", __FUNCTION__, -- sna_crtc->id, sna_crtc->pipe)); -+ if (!force && sna_crtc->bo == NULL) -+ return; -+ -+ DBG(("%s: disabling crtc [%d, pipe=%d], force?=%d\n", __FUNCTION__, -+ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), force)); - - sna_crtc_force_outputs_off(crtc); -- assert(sna_crtc->dpms_mode == DPMSModeOff); - - memset(&arg, 0, sizeof(arg)); -- arg.crtc_id = sna_crtc->id; -+ arg.crtc_id = __sna_crtc_id(sna_crtc); - (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg); - - __sna_crtc_disable(sna, sna_crtc); -@@ -1574,19 +2218,19 @@ static void update_flush_interval(struct sna *sna) - - if (!crtc->enabled) { - DBG(("%s: CRTC:%d (pipe %d) disabled\n", -- __FUNCTION__,i, to_sna_crtc(crtc)->pipe)); -+ __FUNCTION__,i, sna_crtc_pipe(crtc))); - assert(to_sna_crtc(crtc)->bo == NULL); - continue; - } - -- if (to_sna_crtc(crtc)->dpms_mode != DPMSModeOn) { -+ if (to_sna_crtc(crtc)->bo == NULL) { - DBG(("%s: CRTC:%d (pipe %d) turned off\n", -- __FUNCTION__,i, to_sna_crtc(crtc)->pipe)); -+ __FUNCTION__,i, sna_crtc_pipe(crtc))); - continue; - } - - DBG(("%s: CRTC:%d (pipe %d) vrefresh=%f\n", -- __FUNCTION__, i, to_sna_crtc(crtc)->pipe, -+ __FUNCTION__, i, sna_crtc_pipe(crtc), - xf86ModeVRefresh(&crtc->mode))); - max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(&crtc->mode)); - } -@@ -1642,7 +2286,7 @@ void sna_copy_fbcon(struct sna *sna) - int dx, dy; - int i; - -- if (wedged(sna)) -+ if (wedged(sna) || isGPU(sna->scrn)) - return; - - DBG(("%s\n", __FUNCTION__)); -@@ -1662,7 +2306,7 @@ void sna_copy_fbcon(struct sna *sna) - assert(crtc != NULL); - - VG_CLEAR(mode); -- mode.crtc_id = crtc->id; -+ mode.crtc_id = __sna_crtc_id(crtc); - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) - continue; - if (!mode.fb_id) -@@ -1726,7 +2370,7 @@ void sna_copy_fbcon(struct sna *sna) - kgem_bo_destroy(&sna->kgem, bo); - - #if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0) -- sna->scrn->pScreen->canDoBGNoneRoot = ok; -+ to_screen_from_sna(sna)->canDoBGNoneRoot = ok; - #endif - } - -@@ -1736,7 +2380,6 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) - PictTransform crtc_to_fb; - struct pict_f_transform f_crtc_to_fb, f_fb_to_crtc; - unsigned pitch_limit; -- struct sna_pixmap *priv; - BoxRec b; - - assert(sna->scrn->virtualX && sna->scrn->virtualY); -@@ -1765,27 +2408,31 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) - return true; - } - -- priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT); -- if (priv == NULL) -- return true; /* maybe we can create a bo for the scanout? */ -- -- if (sna->kgem.gen == 071) -- pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; -- else if ((sna->kgem.gen >> 3) > 4) -- pitch_limit = 32 * 1024; -- else if ((sna->kgem.gen >> 3) == 4) -- pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; -- else if ((sna->kgem.gen >> 3) == 3) -- pitch_limit = priv->gpu_bo->tiling ? 8 * 1024 : 16 * 1024; -- else -- pitch_limit = 8 * 1024; -- DBG(("%s: gpu bo handle=%d tiling=%d pitch=%d, limit=%d\n", __FUNCTION__, priv->gpu_bo->handle, priv->gpu_bo->tiling, priv->gpu_bo->pitch, pitch_limit)); -- if (priv->gpu_bo->pitch > pitch_limit) -- return true; -+ if (!isGPU(sna->scrn)) { -+ struct sna_pixmap *priv; - -- if (priv->gpu_bo->tiling && sna->flags & SNA_LINEAR_FB) { -- DBG(("%s: gpu bo is tiled, need linear, forcing shadow\n", __FUNCTION__)); -- return true; -+ priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT); -+ if (priv == NULL) -+ return true; /* maybe we can create a bo for the scanout? */ -+ -+ if (sna->kgem.gen == 071) -+ pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; -+ else if ((sna->kgem.gen >> 3) > 4) -+ pitch_limit = 32 * 1024; -+ else if ((sna->kgem.gen >> 3) == 4) -+ pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; -+ else if ((sna->kgem.gen >> 3) == 3) -+ pitch_limit = priv->gpu_bo->tiling ? 8 * 1024 : 16 * 1024; -+ else -+ pitch_limit = 8 * 1024; -+ DBG(("%s: gpu bo handle=%d tiling=%d pitch=%d, limit=%d\n", __FUNCTION__, priv->gpu_bo->handle, priv->gpu_bo->tiling, priv->gpu_bo->pitch, pitch_limit)); -+ if (priv->gpu_bo->pitch > pitch_limit) -+ return true; -+ -+ if (priv->gpu_bo->tiling && sna->flags & SNA_LINEAR_FB) { -+ DBG(("%s: gpu bo is tiled, need linear, forcing shadow\n", __FUNCTION__)); -+ return true; -+ } - } - - transform = NULL; -@@ -1800,9 +2447,9 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) - bool needs_transform = true; - unsigned rotation = rotation_reduce(&to_sna_crtc(crtc)->primary, crtc->rotation); - DBG(("%s: natively supported rotation? rotation=%x & supported=%x == %d\n", -- __FUNCTION__, crtc->rotation, to_sna_crtc(crtc)->primary.rotation.supported, -- !!(crtc->rotation & to_sna_crtc(crtc)->primary.rotation.supported))); -- if (to_sna_crtc(crtc)->primary.rotation.supported & rotation) -+ __FUNCTION__, rotation, to_sna_crtc(crtc)->primary.rotation.supported, -+ rotation == (rotation & to_sna_crtc(crtc)->primary.rotation.supported))); -+ if ((to_sna_crtc(crtc)->primary.rotation.supported & rotation) == rotation) - needs_transform = RRTransformCompute(crtc->x, crtc->y, - crtc->mode.HDisplay, crtc->mode.VDisplay, - RR_Rotate_0, transform, -@@ -1839,6 +2486,7 @@ static void set_shadow(struct sna *sna, RegionPtr region) - - assert(priv->gpu_bo); - assert(sna->mode.shadow); -+ assert(sna->mode.shadow->active_scanout); - - DBG(("%s: waiting for region %dx[(%d, %d), (%d, %d)], front handle=%d, shadow handle=%d\n", - __FUNCTION__, -@@ -1912,6 +2560,28 @@ get_scanout_bo(struct sna *sna, PixmapPtr pixmap) - return priv->gpu_bo; - } - -+static void shadow_clear(struct sna *sna, -+ PixmapPtr front, struct kgem_bo *bo, -+ xf86CrtcPtr crtc) -+{ -+ bool ok = false; -+ if (!wedged(sna)) -+ ok = sna->render.fill_one(sna, front, bo, 0, -+ 0, 0, crtc->mode.HDisplay, crtc->mode.VDisplay, -+ GXclear); -+ if (!ok) { -+ void *ptr = kgem_bo_map__gtt(&sna->kgem, bo); -+ if (ptr) -+ memset(ptr, 0, bo->pitch * crtc->mode.HDisplay); -+ } -+ sna->mode.shadow_dirty = true; -+} -+ -+static bool rr_active(xf86CrtcPtr crtc) -+{ -+ return crtc->transformPresent || crtc->rotation != RR_Rotate_0; -+} -+ - static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc) - { - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -@@ -1919,10 +2589,15 @@ static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc) - struct sna *sna = to_sna(scrn); - struct kgem_bo *bo; - -- sna_crtc->transform = false; -+ if (sna_crtc->transform) { -+ assert(sna->mode.rr_active); -+ sna_crtc->transform = false; -+ sna->mode.rr_active--; -+ } - sna_crtc->rotation = RR_Rotate_0; - - if (use_shadow(sna, crtc)) { -+ PixmapPtr front; - unsigned long tiled_limit; - int tiling; - -@@ -1949,6 +2624,10 @@ force_shadow: - } - - tiling = I915_TILING_X; -+ if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270) && -+ sna->kgem.can_scanout_y) -+ tiling = I915_TILING_Y; -+ - if (sna->kgem.gen == 071) - tiled_limit = 16 * 1024 * 8; - else if ((sna->kgem.gen >> 3) > 4) -@@ -1977,8 +2656,8 @@ force_shadow: - return NULL; - } - -- if (__sna_pixmap_get_bo(sna->front) && !crtc->transformPresent) { -- DrawableRec tmp; -+ front = sna_crtc->slave_pixmap ?: sna->front; -+ if (__sna_pixmap_get_bo(front) && !rr_active(crtc)) { - BoxRec b; - - b.x1 = crtc->x; -@@ -1986,28 +2665,48 @@ force_shadow: - b.x2 = crtc->x + crtc->mode.HDisplay; - b.y2 = crtc->y + crtc->mode.VDisplay; - -- DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d), handle=%d\n", -- __FUNCTION__, -- b.x1, b.y1, -- b.x2, b.y2, -- bo->handle)); -- -- tmp.width = crtc->mode.HDisplay; -- tmp.height = crtc->mode.VDisplay; -- tmp.depth = sna->front->drawable.depth; -- tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel; -- -- (void)sna->render.copy_boxes(sna, GXcopy, -- &sna->front->drawable, __sna_pixmap_get_bo(sna->front), 0, 0, -- &tmp, bo, -b.x1, -b.y1, -- &b, 1, 0); -- } -+ if (b.x1 < 0) -+ b.x1 = 0; -+ if (b.y1 < 0) -+ b.y1 = 0; -+ if (b.x2 > scrn->virtualX) -+ b.x2 = scrn->virtualX; -+ if (b.y2 > scrn->virtualY) -+ b.y2 = scrn->virtualY; -+ if (b.x2 - b.x1 < crtc->mode.HDisplay || -+ b.y2 - b.y1 < crtc->mode.VDisplay) -+ shadow_clear(sna, front, bo, crtc); -+ -+ if (b.y2 > b.y1 && b.x2 > b.x1) { -+ DrawableRec tmp; -+ -+ DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d) [fb=%dx%d], handle=%d\n", -+ __FUNCTION__, -+ b.x1, b.y1, -+ b.x2-b.x1, b.y2-b.y1, -+ scrn->virtualX, scrn->virtualY, -+ bo->handle)); -+ -+ tmp.width = crtc->mode.HDisplay; -+ tmp.height = crtc->mode.VDisplay; -+ tmp.depth = front->drawable.depth; -+ tmp.bitsPerPixel = front->drawable.bitsPerPixel; -+ -+ if (!sna->render.copy_boxes(sna, GXcopy, -+ &front->drawable, __sna_pixmap_get_bo(front), 0, 0, -+ &tmp, bo, -crtc->x, -crtc->y, -+ &b, 1, COPY_LAST)) -+ shadow_clear(sna, front, bo, crtc); -+ } -+ } else -+ shadow_clear(sna, front, bo, crtc); - - sna_crtc->shadow_bo_width = crtc->mode.HDisplay; - sna_crtc->shadow_bo_height = crtc->mode.VDisplay; - sna_crtc->shadow_bo = bo; - out_shadow: - sna_crtc->transform = true; -+ sna->mode.rr_active++; - return kgem_bo_reference(bo); - } else { - if (sna_crtc->shadow_bo) { -@@ -2048,26 +2747,26 @@ out_shadow: - } - - if (sna->flags & SNA_TEAR_FREE) { -+ RegionRec region; -+ - assert(sna_crtc->slave_pixmap == NULL); - - DBG(("%s: enabling TearFree shadow\n", __FUNCTION__)); -+ region.extents.x1 = 0; -+ region.extents.y1 = 0; -+ region.extents.x2 = sna->scrn->virtualX; -+ region.extents.y2 = sna->scrn->virtualY; -+ region.data = NULL; -+ - if (!sna_crtc_enable_shadow(sna, sna_crtc)) { - DBG(("%s: failed to enable crtc shadow\n", __FUNCTION__)); - return NULL; - } - -- if (sna->mode.shadow == NULL && !wedged(sna)) { -- RegionRec region; -+ if (sna->mode.shadow == NULL) { - struct kgem_bo *shadow; - - DBG(("%s: creating TearFree shadow bo\n", __FUNCTION__)); -- -- region.extents.x1 = 0; -- region.extents.y1 = 0; -- region.extents.x2 = sna->scrn->virtualX; -- region.extents.y2 = sna->scrn->virtualY; -- region.data = NULL; -- - shadow = kgem_create_2d(&sna->kgem, - region.extents.x2, - region.extents.y2, -@@ -2093,9 +2792,12 @@ out_shadow: - goto force_shadow; - } - -+ assert(__sna_pixmap_get_bo(sna->front) == NULL || -+ __sna_pixmap_get_bo(sna->front)->pitch == shadow->pitch); - sna->mode.shadow = shadow; -- set_shadow(sna, ®ion); -+ sna->mode.shadow->active_scanout++; - } -+ set_shadow(sna, ®ion); - - sna_crtc_disable_override(sna, sna_crtc); - } else -@@ -2107,6 +2809,37 @@ out_shadow: - } - } - -+#define SCALING_EPSILON (1./256) -+ -+static bool -+is_affine(const struct pixman_f_transform *t) -+{ -+ return (fabs(t->m[2][0]) < SCALING_EPSILON && -+ fabs(t->m[2][1]) < SCALING_EPSILON); -+} -+ -+static double determinant(const struct pixman_f_transform *t) -+{ -+ return t->m[0][0]*t->m[1][1] - t->m[1][0]*t->m[0][1]; -+} -+ -+static bool -+affine_is_pixel_exact(const struct pixman_f_transform *t) -+{ -+ double det = t->m[2][2] * determinant(t); -+ if (fabs (det * det - 1.0) < SCALING_EPSILON) { -+ if (fabs(t->m[0][1]) < SCALING_EPSILON && -+ fabs(t->m[1][0]) < SCALING_EPSILON) -+ return true; -+ -+ if (fabs(t->m[0][0]) < SCALING_EPSILON && -+ fabs(t->m[1][1]) < SCALING_EPSILON) -+ return true; -+ } -+ -+ return false; -+} -+ - static void sna_crtc_randr(xf86CrtcPtr crtc) - { - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -@@ -2152,6 +2885,25 @@ static void sna_crtc_randr(xf86CrtcPtr crtc) - } else - crtc->transform_in_use = sna_crtc->rotation != RR_Rotate_0; - -+ /* Recompute the cursor after a potential change in transform */ -+ if (sna_crtc->cursor) { -+ assert(sna_crtc->cursor->ref > 0); -+ sna_crtc->cursor->ref--; -+ sna_crtc->cursor = NULL; -+ } -+ -+ if (needs_transform) { -+ sna_crtc->hwcursor = is_affine(&f_fb_to_crtc); -+ sna_crtc->cursor_transform = -+ sna_crtc->hwcursor && -+ !affine_is_pixel_exact(&f_fb_to_crtc); -+ } else { -+ sna_crtc->hwcursor = true; -+ sna_crtc->cursor_transform = false; -+ } -+ DBG(("%s: hwcursor?=%d, cursor_transform?=%d\n", -+ __FUNCTION__, sna_crtc->hwcursor, sna_crtc->cursor_transform)); -+ - crtc->crtc_to_framebuffer = crtc_to_fb; - crtc->f_crtc_to_framebuffer = f_crtc_to_fb; - crtc->f_framebuffer_to_crtc = f_fb_to_crtc; -@@ -2184,7 +2936,7 @@ static void sna_crtc_randr(xf86CrtcPtr crtc) - static void - sna_crtc_damage(xf86CrtcPtr crtc) - { -- ScreenPtr screen = crtc->scrn->pScreen; -+ ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); - struct sna *sna = to_sna(crtc->scrn); - RegionRec region, *damage; - -@@ -2200,15 +2952,21 @@ sna_crtc_damage(xf86CrtcPtr crtc) - if (region.extents.y2 > screen->height) - region.extents.y2 = screen->height; - -+ if (region.extents.x2 <= region.extents.x1 || -+ region.extents.y2 <= region.extents.y1) { -+ DBG(("%s: crtc not damaged, all-clipped\n", __FUNCTION__)); -+ return; -+ } -+ - DBG(("%s: marking crtc %d as completely damaged (%d, %d), (%d, %d)\n", -- __FUNCTION__, to_sna_crtc(crtc)->id, -+ __FUNCTION__, sna_crtc_id(crtc), - region.extents.x1, region.extents.y1, - region.extents.x2, region.extents.y2)); -- to_sna_crtc(crtc)->client_damage = region; - - assert(sna->mode.shadow_damage && sna->mode.shadow_active); - damage = DamageRegion(sna->mode.shadow_damage); - RegionUnion(damage, damage, ®ion); -+ to_sna_crtc(crtc)->crtc_damage = region; - - DBG(("%s: damage now %dx[(%d, %d), (%d, %d)]\n", - __FUNCTION__, -@@ -2260,6 +3018,21 @@ static const char *reflection_to_str(Rotation rotation) - } - } - -+static void reprobe_connectors(xf86CrtcPtr crtc) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); -+ struct sna *sna = to_sna(crtc->scrn); -+ int i; -+ -+ for (i = 0; i < sna->mode.num_real_output; i++) { -+ xf86OutputPtr output = config->output[i]; -+ if (output->crtc == crtc) -+ to_sna_output(output)->reprobe = true; -+ } -+ -+ sna_mode_discover(sna, true); -+} -+ - static Bool - __sna_crtc_set_mode(xf86CrtcPtr crtc) - { -@@ -2268,11 +3041,19 @@ __sna_crtc_set_mode(xf86CrtcPtr crtc) - struct kgem_bo *saved_bo, *bo; - uint32_t saved_offset; - bool saved_transform; -+ bool saved_hwcursor; -+ bool saved_cursor_transform; -+ int ret; - -- DBG(("%s\n", __FUNCTION__)); -+ DBG(("%s: CRTC=%d, pipe=%d, hidden?=%d\n", __FUNCTION__, -+ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), sna->mode.hidden)); -+ if (sna->mode.hidden) -+ return TRUE; - - saved_bo = sna_crtc->bo; - saved_transform = sna_crtc->transform; -+ saved_cursor_transform = sna_crtc->cursor_transform; -+ saved_hwcursor = sna_crtc->hwcursor; - saved_offset = sna_crtc->offset; - - sna_crtc->fallback_shadow = false; -@@ -2285,26 +3066,31 @@ retry: /* Attach per-crtc pixmap or direct */ - } - - /* Prevent recursion when enabling outputs during execbuffer */ -- if (bo->exec && RQ(bo->rq)->bo == NULL) -+ if (bo->exec && RQ(bo->rq)->bo == NULL) { - _kgem_submit(&sna->kgem); -+ __kgem_bo_clear_dirty(bo); -+ } - - sna_crtc->bo = bo; -- if (!sna_crtc_apply(crtc)) { -- int err = errno; -- -+ ret = sna_crtc_apply(crtc); -+ if (ret) { - kgem_bo_destroy(&sna->kgem, bo); - -- if (!sna_crtc->shadow) { -+ if (!sna_crtc->fallback_shadow) { - sna_crtc->fallback_shadow = true; - goto retry; - } - - xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, -- "failed to set mode: %s [%d]\n", strerror(err), err); -+ "failed to set mode: %s [%d]\n", strerror(ret), ret); - goto error; - } - -+ sna_crtc->flags |= CRTC_ON; - bo->active_scanout++; -+ DBG(("%s: marking handle=%d as active=%d (removing %d from scanout, active=%d)\n", -+ __FUNCTION__, bo->handle, bo->active_scanout, -+ saved_bo ? saved_bo->handle : 0, saved_bo ? saved_bo->active_scanout - 1: -1)); - if (saved_bo) { - assert(saved_bo->active_scanout); - assert(saved_bo->refcnt >= saved_bo->active_scanout); -@@ -2315,17 +3101,34 @@ retry: /* Attach per-crtc pixmap or direct */ - sna_crtc_randr(crtc); - if (sna_crtc->transform) - sna_crtc_damage(crtc); -+ if (sna_crtc->cursor && /* Reload cursor if RandR maybe changed */ -+ (!sna_crtc->hwcursor || -+ saved_cursor_transform || sna_crtc->cursor_transform || -+ sna_crtc->cursor->rotation != crtc->rotation)) -+ sna_crtc_disable_cursor(sna, sna_crtc); -+ -+ assert(!sna->mode.hidden); - sna->mode.front_active += saved_bo == NULL; - sna->mode.dirty = true; -- DBG(("%s: front_active=%d\n", __FUNCTION__, sna->mode.front_active)); -+ DBG(("%s: handle=%d, scanout_active=%d, front_active=%d\n", -+ __FUNCTION__, bo->handle, bo->active_scanout, sna->mode.front_active)); - - return TRUE; - - error: - sna_crtc->offset = saved_offset; -+ if (sna_crtc->transform) { -+ assert(sna->mode.rr_active); -+ sna->mode.rr_active--; -+ } -+ if (saved_transform) -+ sna->mode.rr_active++; - sna_crtc->transform = saved_transform; -+ sna_crtc->cursor_transform = saved_cursor_transform; -+ sna_crtc->hwcursor = saved_hwcursor; - sna_crtc->bo = saved_bo; -- sna_mode_discover(sna); -+ -+ reprobe_connectors(crtc); - return FALSE; - } - -@@ -2346,14 +3149,14 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, - xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, - "switch to mode %dx%d@%.1f on %s using pipe %d, position (%d, %d), rotation %s, reflection %s\n", - mode->HDisplay, mode->VDisplay, xf86ModeVRefresh(mode), -- outputs_for_crtc(crtc, outputs, sizeof(outputs)), sna_crtc->pipe, -+ outputs_for_crtc(crtc, outputs, sizeof(outputs)), __sna_crtc_pipe(sna_crtc), - x, y, rotation_to_str(rotation), reflection_to_str(rotation)); - - assert(mode->HDisplay <= sna->mode.max_crtc_width && - mode->VDisplay <= sna->mode.max_crtc_height); - - #if HAS_GAMMA -- drmModeCrtcSetGamma(sna->kgem.fd, sna_crtc->id, -+ drmModeCrtcSetGamma(sna->kgem.fd, __sna_crtc_id(sna_crtc), - crtc->gamma_size, - crtc->gamma_red, - crtc->gamma_green, -@@ -2372,17 +3175,10 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, - static void - sna_crtc_dpms(xf86CrtcPtr crtc, int mode) - { -- struct sna_crtc *priv = to_sna_crtc(crtc); -- - DBG(("%s(pipe %d, dpms mode -> %d):= active=%d\n", -- __FUNCTION__, priv->pipe, mode, mode == DPMSModeOn)); -- if (priv->dpms_mode == mode) -- return; -- -- assert(priv); -- priv->dpms_mode = mode; -+ __FUNCTION__, sna_crtc_pipe(crtc), mode, mode == DPMSModeOn)); - -- if (mode == DPMSModeOn && crtc->enabled && priv->bo == NULL) { -+ if (mode == DPMSModeOn && crtc->enabled) { - if (__sna_crtc_set_mode(crtc)) - update_flush_interval(to_sna(crtc->scrn)); - else -@@ -2390,7 +3186,7 @@ sna_crtc_dpms(xf86CrtcPtr crtc, int mode) - } - - if (mode != DPMSModeOn) -- sna_crtc_disable(crtc); -+ sna_crtc_disable(crtc, false); - } - - void sna_mode_adjust_frame(struct sna *sna, int x, int y) -@@ -2426,7 +3222,7 @@ sna_crtc_gamma_set(xf86CrtcPtr crtc, - { - assert(to_sna_crtc(crtc)); - drmModeCrtcSetGamma(to_sna(crtc->scrn)->kgem.fd, -- to_sna_crtc(crtc)->id, -+ sna_crtc_id(crtc), - size, red, green, blue); - } - -@@ -2434,10 +3230,14 @@ static void - sna_crtc_destroy(xf86CrtcPtr crtc) - { - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -+ struct plane *sprite, *sn; - - if (sna_crtc == NULL) - return; - -+ list_for_each_entry_safe(sprite, sn, &sna_crtc->sprites, link) -+ free(sprite); -+ - free(sna_crtc); - crtc->driver_private = NULL; - } -@@ -2455,7 +3255,7 @@ sna_crtc_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr pixmap) - return TRUE; - - DBG(("%s: CRTC:%d, pipe=%d setting scanout pixmap=%ld\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, -+ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), - pixmap ? pixmap->drawable.serialNumber : 0)); - - /* Disable first so that we can unregister the damage tracking */ -@@ -2576,6 +3376,10 @@ static int plane_details(struct sna *sna, struct plane *p) - } - } - -+ p->rotation.supported &= DBG_NATIVE_ROTATION; -+ if (!xf86ReturnOptValBool(sna->Options, OPTION_ROTATION, TRUE)) -+ p->rotation.supported = RR_Rotate_0; -+ - if (props != (uint32_t *)stack_props) - free(props); - -@@ -2583,20 +3387,26 @@ static int plane_details(struct sna *sna, struct plane *p) - return type; - } - -+static void add_sprite_plane(struct sna_crtc *crtc, -+ struct plane *details) -+{ -+ struct plane *sprite = malloc(sizeof(*sprite)); -+ if (!sprite) -+ return; -+ -+ memcpy(sprite, details, sizeof(*sprite)); -+ list_add(&sprite->link, &crtc->sprites); -+} -+ - static void - sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) - { - #define LOCAL_IOCTL_SET_CAP DRM_IOWR(0x0d, struct local_set_cap) --#define LOCAL_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xb5, struct local_mode_get_plane_res) --#define LOCAL_IOCTL_MODE_GETPLANE DRM_IOWR(0xb6, struct local_mode_get_plane) - struct local_set_cap { - uint64_t name; - uint64_t value; - } cap; -- struct local_mode_get_plane_res { -- uint64_t plane_id_ptr; -- uint64_t count_planes; -- } r; -+ struct local_mode_get_plane_res r; - uint32_t stack_planes[32]; - uint32_t *planes = stack_planes; - int i; -@@ -2629,18 +3439,7 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) - VG(VALGRIND_MAKE_MEM_DEFINED(planes, sizeof(uint32_t)*r.count_planes)); - - for (i = 0; i < r.count_planes; i++) { -- struct local_mode_get_plane { -- uint32_t plane_id; -- -- uint32_t crtc_id; -- uint32_t fb_id; -- -- uint32_t possible_crtcs; -- uint32_t gamma_size; -- -- uint32_t count_format_types; -- uint64_t format_type_ptr; -- } p; -+ struct local_mode_get_plane p; - struct plane details; - - VG_CLEAR(p); -@@ -2649,11 +3448,11 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) - if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_GETPLANE, &p)) - continue; - -- if ((p.possible_crtcs & (1 << crtc->pipe)) == 0) -+ if ((p.possible_crtcs & (1 << __sna_crtc_pipe(crtc))) == 0) - continue; - - DBG(("%s: plane %d is attached to our pipe=%d\n", -- __FUNCTION__, planes[i], crtc->pipe)); -+ __FUNCTION__, planes[i], __sna_crtc_pipe(crtc))); - - details.id = p.plane_id; - details.rotation.prop = 0; -@@ -2672,8 +3471,7 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) - break; - - case DRM_PLANE_TYPE_OVERLAY: -- if (crtc->sprite.id == 0) -- crtc->sprite = details; -+ add_sprite_plane(crtc, &details); - break; - } - } -@@ -2688,7 +3486,6 @@ sna_crtc_init__rotation(struct sna *sna, struct sna_crtc *crtc) - crtc->rotation = RR_Rotate_0; - crtc->primary.rotation.supported = RR_Rotate_0; - crtc->primary.rotation.current = RR_Rotate_0; -- crtc->sprite.rotation = crtc->primary.rotation; - } - - static void -@@ -2698,55 +3495,55 @@ sna_crtc_init__cursor(struct sna *sna, struct sna_crtc *crtc) - - VG_CLEAR(arg); - arg.flags = DRM_MODE_CURSOR_BO; -- arg.crtc_id = crtc->id; -+ arg.crtc_id = __sna_crtc_id(crtc); - arg.width = arg.height = 0; - arg.handle = 0; - - (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); -+ crtc->hwcursor = true; - } - - static bool --sna_crtc_add(ScrnInfoPtr scrn, int id) -+sna_crtc_add(ScrnInfoPtr scrn, unsigned id) - { - struct sna *sna = to_sna(scrn); - xf86CrtcPtr crtc; - struct sna_crtc *sna_crtc; - struct drm_i915_get_pipe_from_crtc_id get_pipe; - -- DBG(("%s(%d)\n", __FUNCTION__, id)); -+ DBG(("%s(%d): is-zaphod? %d\n", __FUNCTION__, id, is_zaphod(scrn))); - - sna_crtc = calloc(sizeof(struct sna_crtc), 1); - if (sna_crtc == NULL) - return false; - - sna_crtc->id = id; -- sna_crtc->dpms_mode = -1; - - VG_CLEAR(get_pipe); - get_pipe.pipe = 0; -- get_pipe.crtc_id = sna_crtc->id; -+ get_pipe.crtc_id = id; - if (drmIoctl(sna->kgem.fd, - DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, - &get_pipe)) { - free(sna_crtc); - return false; - } -- sna_crtc->pipe = get_pipe.pipe; -+ assert((unsigned)get_pipe.pipe < 256); -+ sna_crtc->flags |= get_pipe.pipe << 8; - - if (is_zaphod(scrn) && -- scrn->confScreen->device->screen != sna_crtc->pipe) { -+ (get_zaphod_crtcs(sna) & (1 << get_pipe.pipe)) == 0) { - free(sna_crtc); - return true; - } - -+ list_init(&sna_crtc->sprites); - sna_crtc_init__rotation(sna, sna_crtc); -- - sna_crtc_find_planes(sna, sna_crtc); - -- DBG(("%s: CRTC:%d [pipe=%d], primary id=%x: supported-rotations=%x, current-rotation=%x, sprite id=%x: supported-rotations=%x, current-rotation=%x\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, -- sna_crtc->primary.id, sna_crtc->primary.rotation.supported, sna_crtc->primary.rotation.current, -- sna_crtc->sprite.id, sna_crtc->sprite.rotation.supported, sna_crtc->sprite.rotation.current)); -+ DBG(("%s: CRTC:%d [pipe=%d], primary id=%x: supported-rotations=%x, current-rotation=%x\n", -+ __FUNCTION__, id, get_pipe.pipe, -+ sna_crtc->primary.id, sna_crtc->primary.rotation.supported, sna_crtc->primary.rotation.current)); - - list_init(&sna_crtc->shadow_link); - -@@ -2761,7 +3558,7 @@ sna_crtc_add(ScrnInfoPtr scrn, int id) - crtc->driver_private = sna_crtc; - sna_crtc->base = crtc; - DBG(("%s: attached crtc[%d] pipe=%d\n", -- __FUNCTION__, id, sna_crtc->pipe)); -+ __FUNCTION__, id, __sna_crtc_pipe(sna_crtc))); - - return true; - } -@@ -2798,20 +3595,56 @@ find_property(struct sna *sna, struct sna_output *output, const char *name) - return -1; - } - -+static void update_properties(struct sna *sna, struct sna_output *output) -+{ -+ union compat_mode_get_connector compat_conn; -+ struct drm_mode_modeinfo dummy; -+ -+ VG_CLEAR(compat_conn); -+ -+ compat_conn.conn.connector_id = output->id; -+ compat_conn.conn.count_props = output->num_props; -+ compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; -+ compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; -+ compat_conn.conn.count_modes = 1; /* skip detect */ -+ compat_conn.conn.modes_ptr = (uintptr_t)&dummy; -+ compat_conn.conn.count_encoders = 0; -+ -+ (void)drmIoctl(sna->kgem.fd, -+ DRM_IOCTL_MODE_GETCONNECTOR, -+ &compat_conn.conn); -+ -+ assert(compat_conn.conn.count_props == output->num_props); -+ output->update_properties = false; -+} -+ - static xf86OutputStatus - sna_output_detect(xf86OutputPtr output) - { - struct sna *sna = to_sna(output->scrn); - struct sna_output *sna_output = output->driver_private; - union compat_mode_get_connector compat_conn; -+ uint32_t now; - - DBG(("%s(%s:%d)\n", __FUNCTION__, output->name, sna_output->id)); -+ sna_output->update_properties = false; - - if (!sna_output->id) { - DBG(("%s(%s) hiding due to lost connection\n", __FUNCTION__, output->name)); - return XF86OutputStatusDisconnected; - } - -+ /* Cache detections for 15s or hotplug event */ -+ now = GetTimeInMillis(); -+ if (sna_output->last_detect != 0 && -+ (int32_t)(now - sna_output->last_detect) <= OUTPUT_STATUS_CACHE_MS) { -+ DBG(("%s(%s) reporting cached status (since %dms): %d\n", -+ __FUNCTION__, output->name, now - sna_output->last_detect, -+ sna_output->status)); -+ sna_output->update_properties = true; -+ return sna_output->status; -+ } -+ - VG_CLEAR(compat_conn); - compat_conn.conn.connector_id = sna_output->id; - sna_output->num_modes = compat_conn.conn.count_modes = 0; /* reprobe */ -@@ -2854,15 +3687,23 @@ sna_output_detect(xf86OutputPtr output) - DBG(("%s(%s): found %d modes, connection status=%d\n", - __FUNCTION__, output->name, sna_output->num_modes, compat_conn.conn.connection)); - -+ sna_output->reprobe = false; -+ sna_output->last_detect = now; - switch (compat_conn.conn.connection) { - case DRM_MODE_CONNECTED: -- return XF86OutputStatusConnected; -+ sna_output->status = XF86OutputStatusConnected; -+ output->mm_width = compat_conn.conn.mm_width; -+ output->mm_height = compat_conn.conn.mm_height; -+ break; - case DRM_MODE_DISCONNECTED: -- return XF86OutputStatusDisconnected; -+ sna_output->status = XF86OutputStatusDisconnected; -+ break; - default: - case DRM_MODE_UNKNOWNCONNECTION: -- return XF86OutputStatusUnknown; -+ sna_output->status = XF86OutputStatusUnknown; -+ break; - } -+ return sna_output->status; - } - - static Bool -@@ -2895,6 +3736,27 @@ sna_output_mode_valid(xf86OutputPtr output, DisplayModePtr mode) - return MODE_OK; - } - -+static void sna_output_set_parsed_edid(xf86OutputPtr output, xf86MonPtr mon) -+{ -+ unsigned conn_mm_width, conn_mm_height; -+ -+ /* We set the output size based on values from the kernel */ -+ conn_mm_width = output->mm_width; -+ conn_mm_height = output->mm_height; -+ -+ xf86OutputSetEDID(output, mon); -+ -+ if (output->mm_width != conn_mm_width || output->mm_height != conn_mm_height) { -+ DBG(("%s)%s): kernel and Xorg disagree over physical size: kernel=%dx%dmm, Xorg=%dx%dmm\n", -+ __FUNCTION__, output->name, -+ conn_mm_width, conn_mm_height, -+ output->mm_width, output->mm_height)); -+ } -+ -+ output->mm_width = conn_mm_width; -+ output->mm_height = conn_mm_height; -+} -+ - static void - sna_output_attach_edid(xf86OutputPtr output) - { -@@ -2907,6 +3769,13 @@ sna_output_attach_edid(xf86OutputPtr output) - if (sna_output->edid_idx == -1) - return; - -+ /* Always refresh the blob as the kernel may randomly update the -+ * id even if the contents of the blob doesn't change, and a -+ * request for the stale id will return nothing. -+ */ -+ if (sna_output->update_properties) -+ update_properties(sna, sna_output); -+ - raw = sna_output->edid_raw; - blob.length = sna_output->edid_len; - -@@ -2917,8 +3786,12 @@ sna_output_attach_edid(xf86OutputPtr output) - old = NULL; - - blob.blob_id = sna_output->prop_values[sna_output->edid_idx]; -- DBG(("%s: attaching EDID id=%d, current=%d\n", -- __FUNCTION__, blob.blob_id, sna_output->edid_blob_id)); -+ if (!blob.blob_id) -+ goto done; -+ -+ DBG(("%s(%s): attaching EDID id=%d, current=%d\n", -+ __FUNCTION__, output->name, -+ blob.blob_id, sna_output->edid_blob_id)); - if (blob.blob_id == sna_output->edid_blob_id && 0) { /* sigh */ - if (output->MonInfo) { - /* XXX the property keeps on disappearing... */ -@@ -2936,26 +3809,45 @@ sna_output_attach_edid(xf86OutputPtr output) - } - - blob.data = (uintptr_t)raw; -- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) -- goto done; -+ do { -+ while (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) { -+ update_properties(sna, sna_output); -+ if (blob.blob_id == sna_output->prop_values[sna_output->edid_idx]) { -+ DBG(("%s(%s): failed to read blob, reusing previous\n", -+ __FUNCTION__, output->name)); -+ goto done; -+ } -+ blob.blob_id = sna_output->prop_values[sna_output->edid_idx]; -+ } - -- DBG(("%s: retrieving blob id=%d, length=%d\n", -- __FUNCTION__, blob.blob_id, blob.length)); -+ DBG(("%s(%s): retrieving blob id=%d, length=%d\n", -+ __FUNCTION__, output->name, blob.blob_id, blob.length)); - -- if (blob.length > sna_output->edid_len) { -- raw = realloc(raw, blob.length); -- if (raw == NULL) -+ if (blob.length < 128) - goto done; - -- VG(memset(raw, 0, blob.length)); -- blob.data = (uintptr_t)raw; -- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) -- goto done; -+ if (blob.length > sna_output->edid_len) { -+ raw = realloc(raw, blob.length); -+ if (raw == NULL) -+ goto done; -+ -+ VG(memset(raw, 0, blob.length)); -+ blob.data = (uintptr_t)raw; -+ } -+ } while (blob.length != sna_output->edid_len && -+ drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)); -+ -+ if (blob.length & 127) { -+ /* Truncated EDID! Make sure no one reads too far */ -+ *SECTION(NO_EDID, (uint8_t*)raw) = blob.length/128 - 1; -+ blob.length &= -128; - } - - if (old && - blob.length == sna_output->edid_len && - memcmp(old, raw, blob.length) == 0) { -+ DBG(("%s(%s): EDID + MonInfo is unchanged\n", -+ __FUNCTION__, output->name)); - assert(sna_output->edid_raw == raw); - sna_output->edid_blob_id = blob.blob_id; - RRChangeOutputProperty(output->randr_output, -@@ -2974,31 +3866,186 @@ skip_read: - mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; - } - --done: -- xf86OutputSetEDID(output, mon); -- if (raw) { -- sna_output->edid_raw = raw; -- sna_output->edid_len = blob.length; -- sna_output->edid_blob_id = blob.blob_id; -+done: -+ sna_output_set_parsed_edid(output, mon); -+ if (raw) { -+ sna_output->edid_raw = raw; -+ sna_output->edid_len = blob.length; -+ sna_output->edid_blob_id = blob.blob_id; -+ } -+} -+ -+static void -+sna_output_attach_tile(xf86OutputPtr output) -+{ -+#if XF86_OUTPUT_VERSION >= 3 -+ struct sna *sna = to_sna(output->scrn); -+ struct sna_output *sna_output = output->driver_private; -+ struct drm_mode_get_blob blob; -+ struct xf86CrtcTileInfo tile_info, *set = NULL; -+ char *tile; -+ int id; -+ -+ id = find_property(sna, sna_output, "TILE"); -+ DBG(("%s: found? TILE=%d\n", __FUNCTION__, id)); -+ if (id == -1) -+ goto out; -+ -+ if (sna_output->update_properties) -+ update_properties(sna, sna_output); -+ -+ VG_CLEAR(blob); -+ blob.blob_id = sna_output->prop_values[id]; -+ blob.length = 0; -+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) -+ goto out; -+ -+ do { -+ id = blob.length; -+ tile = alloca(id + 1); -+ blob.data = (uintptr_t)tile; -+ VG(memset(tile, 0, id)); -+ DBG(("%s: reading %d bytes for TILE blob\n", __FUNCTION__, id)); -+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) -+ goto out; -+ } while (id != blob.length); -+ -+ tile[blob.length] = '\0'; /* paranoia */ -+ DBG(("%s: TILE='%s'\n", __FUNCTION__, tile)); -+ if (xf86OutputParseKMSTile(tile, blob.length, &tile_info)) -+ set = &tile_info; -+out: -+ xf86OutputSetTile(output, set); -+#endif -+} -+ -+static bool duplicate_mode(DisplayModePtr modes, DisplayModePtr m) -+{ -+ if (m == NULL) -+ return false; -+ -+ while (modes) { -+ if (xf86ModesEqual(modes, m)) -+ return true; -+ -+ modes = modes->next; -+ } -+ -+ return false; -+} -+ -+static struct pixel_count { -+ int16_t width, height; -+} common_16_9[] = { -+ { 640, 360 }, -+ { 720, 405 }, -+ { 864, 486 }, -+ { 960, 540 }, -+ { 1024, 576 }, -+ { 1280, 720 }, -+ { 1366, 768 }, -+ { 1600, 900 }, -+ { 1920, 1080 }, -+ { 2048, 1152 }, -+ { 2560, 1440 }, -+ { 2880, 1620 }, -+ { 3200, 1800 }, -+ { 3840, 2160 }, -+ { 4096, 2304 }, -+ { 5120, 2880 }, -+ { 7680, 4320 }, -+ { 15360, 8640 }, -+}, common_16_10[] = { -+ { 1280, 800 }, -+ { 1400, 900 }, -+ { 1680, 1050 }, -+ { 1920, 1200 }, -+ { 2560, 1600 }, -+}; -+ -+static DisplayModePtr -+default_modes(DisplayModePtr preferred) -+{ -+ DisplayModePtr modes; -+ int n; -+ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) -+ modes = xf86GetDefaultModes(); -+#else -+ modes = xf86GetDefaultModes(0, 0); -+#endif -+ -+ /* XXX O(n^2) mode list generation :( */ -+ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,4,99,901,0) -+ if (preferred) { -+ DisplayModePtr m; -+ -+ /* Add a half-resolution mode useful for large panels */ -+ m = xf86GTFMode(preferred->HDisplay/2, -+ preferred->VDisplay/2, -+ xf86ModeVRefresh(preferred), -+ FALSE, FALSE); -+ if (!duplicate_mode(modes, m)) -+ modes = xf86ModesAdd(modes, m); -+ else -+ free(m); -+ -+ if (preferred->VDisplay * 16 > preferred->HDisplay*9 - preferred->HDisplay/32 && -+ preferred->VDisplay * 16 < preferred->HDisplay*9 + preferred->HDisplay/32) { -+ DBG(("Adding 16:9 modes -- %d < %d > %d\n", -+ preferred->HDisplay*9 - preferred->HDisplay/32, -+ preferred->VDisplay * 16, -+ preferred->HDisplay*9 + preferred->HDisplay/32)); -+ for (n = 0; n < ARRAY_SIZE(common_16_9); n++) { -+ if (preferred->HDisplay <= common_16_9[n].width || -+ preferred->VDisplay <= common_16_9[n].height) -+ break; -+ -+ m = xf86GTFMode(common_16_9[n].width, -+ common_16_9[n].height, -+ xf86ModeVRefresh(preferred), -+ FALSE, FALSE); -+ if (!duplicate_mode(modes, m)) -+ modes = xf86ModesAdd(modes, m); -+ else -+ free(m); -+ } -+ } -+ -+ if (preferred->VDisplay * 16 > preferred->HDisplay*10 - preferred->HDisplay/32 && -+ preferred->VDisplay * 16 < preferred->HDisplay*10 + preferred->HDisplay/32) { -+ DBG(("Adding 16:10 modes -- %d < %d > %d\n", -+ preferred->HDisplay*10 - preferred->HDisplay/32, -+ preferred->VDisplay * 16, -+ preferred->HDisplay*10 + preferred->HDisplay/32)); -+ for (n = 0; n < ARRAY_SIZE(common_16_10); n++) { -+ if (preferred->HDisplay <= common_16_10[n].width || -+ preferred->VDisplay <= common_16_10[n].height) -+ break; -+ -+ m = xf86GTFMode(common_16_10[n].width, -+ common_16_10[n].height, -+ xf86ModeVRefresh(preferred), -+ FALSE, FALSE); -+ if (!duplicate_mode(modes, m)) -+ modes = xf86ModesAdd(modes, m); -+ else -+ free(m); -+ } -+ } - } --} -- --static DisplayModePtr --default_modes(void) --{ --#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) -- return xf86GetDefaultModes(); --#else -- return xf86GetDefaultModes(0, 0); - #endif -+ -+ return modes; - } - - static DisplayModePtr --sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) -+sna_output_add_default_modes(xf86OutputPtr output, DisplayModePtr modes) - { - xf86MonPtr mon = output->MonInfo; - DisplayModePtr i, m, preferred = NULL; -- int max_x = 0, max_y = 0; -+ int max_x = 0, max_y = 0, max_clock = 0; - float max_vrefresh = 0.0; - - if (mon && GTF_SUPPORTED(mon->features.msc)) -@@ -3009,16 +4056,17 @@ sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) - preferred = m; - max_x = max(max_x, m->HDisplay); - max_y = max(max_y, m->VDisplay); -+ max_clock = max(max_clock, m->Clock); - max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); - } -- -- max_vrefresh = max(max_vrefresh, 60.0); - max_vrefresh *= (1 + SYNC_TOLERANCE); - -- m = default_modes(); -+ m = default_modes(preferred); - xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); - - for (i = m; i; i = i->next) { -+ if (i->Clock > max_clock) -+ i->status = MODE_CLOCK_HIGH; - if (xf86ModeVRefresh(i) > max_vrefresh) - i->status = MODE_VSYNC; - if (preferred && -@@ -3034,28 +4082,47 @@ sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) - } - - static DisplayModePtr -+sna_output_override_edid(xf86OutputPtr output) -+{ -+ struct sna_output *sna_output = output->driver_private; -+ -+ if (sna_output->fake_edid_mon == NULL) -+ return NULL; -+ -+ xf86OutputSetEDID(output, sna_output->fake_edid_mon); -+ return xf86DDCGetModes(output->scrn->scrnIndex, -+ sna_output->fake_edid_mon); -+} -+ -+static DisplayModePtr - sna_output_get_modes(xf86OutputPtr output) - { - struct sna_output *sna_output = output->driver_private; -- DisplayModePtr Modes = NULL, current = NULL; -+ DisplayModePtr Modes, current; - int i; - - DBG(("%s(%s:%d)\n", __FUNCTION__, output->name, sna_output->id)); - assert(sna_output->id); - -+ Modes = sna_output_override_edid(output); -+ if (Modes) -+ return Modes; -+ - sna_output_attach_edid(output); -+ sna_output_attach_tile(output); - -- if (output->crtc) { -+ current = NULL; -+ if (output->crtc && !sna_output->hotplug_count) { - struct drm_mode_crtc mode; - - VG_CLEAR(mode); - assert(to_sna_crtc(output->crtc)); -- mode.crtc_id = to_sna_crtc(output->crtc)->id; -+ mode.crtc_id = sna_crtc_id(output->crtc); - - if (drmIoctl(to_sna(output->scrn)->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode) == 0) { - DBG(("%s: CRTC:%d, pipe=%d: has mode?=%d\n", __FUNCTION__, -- to_sna_crtc(output->crtc)->id, -- to_sna_crtc(output->crtc)->pipe, -+ sna_crtc_id(output->crtc), -+ sna_crtc_pipe(output->crtc), - mode.mode_valid && mode.mode.clock)); - - if (mode.mode_valid && mode.mode.clock) { -@@ -3117,7 +4184,7 @@ sna_output_get_modes(xf86OutputPtr output) - } - - if (sna_output->add_default_modes) -- Modes = sna_output_panel_edid(output, Modes); -+ Modes = sna_output_add_default_modes(output, Modes); - - return Modes; - } -@@ -3132,6 +4199,8 @@ sna_output_destroy(xf86OutputPtr output) - return; - - free(sna_output->edid_raw); -+ free(sna_output->fake_edid_raw); -+ - for (i = 0; i < sna_output->num_props; i++) { - if (sna_output->props[i].kprop == NULL) - continue; -@@ -3155,7 +4224,7 @@ sna_output_destroy(xf86OutputPtr output) - } - - static void --sna_output_dpms(xf86OutputPtr output, int dpms) -+__sna_output_dpms(xf86OutputPtr output, int dpms, int fixup) - { - struct sna *sna = to_sna(output->scrn); - struct sna_output *sna_output = output->driver_private; -@@ -3182,8 +4251,9 @@ sna_output_dpms(xf86OutputPtr output, int dpms) - if (sna_output->backlight.iface && dpms != DPMSModeOn) { - if (old_dpms == DPMSModeOn) { - sna_output->backlight_active_level = sna_output_backlight_get(output); -- DBG(("%s: saving current backlight %d\n", -- __FUNCTION__, sna_output->backlight_active_level)); -+ DBG(("%s(%s:%d): saving current backlight %d\n", -+ __FUNCTION__, output->name, sna_output->id, -+ sna_output->backlight_active_level)); - } - sna_output->dpms_mode = dpms; - sna_output_backlight_off(sna_output); -@@ -3193,18 +4263,31 @@ sna_output_dpms(xf86OutputPtr output, int dpms) - drmModeConnectorSetProperty(sna->kgem.fd, - sna_output->id, - sna_output->dpms_id, -- dpms)) -- dpms = old_dpms; -+ dpms)) { -+ DBG(("%s(%s:%d): failed to set DPMS to %d (fixup? %d)\n", -+ __FUNCTION__, output->name, sna_output->id, dpms, fixup)); -+ if (fixup && dpms != DPMSModeOn) { -+ sna_crtc_disable(output->crtc, false); -+ return; -+ } -+ } - - if (sna_output->backlight.iface && dpms == DPMSModeOn) { -- DBG(("%s: restoring previous backlight %d\n", -- __FUNCTION__, sna_output->backlight_active_level)); -+ DBG(("%s(%d:%d: restoring previous backlight %d\n", -+ __FUNCTION__, output->name, sna_output->id, -+ sna_output->backlight_active_level)); - sna_output_backlight_on(sna_output); - } - - sna_output->dpms_mode = dpms; - } - -+static void -+sna_output_dpms(xf86OutputPtr output, int dpms) -+{ -+ __sna_output_dpms(output, dpms, true); -+} -+ - static bool - sna_property_ignore(drmModePropertyPtr prop) - { -@@ -3239,14 +4322,14 @@ sna_output_create_ranged_atom(xf86OutputPtr output, Atom *atom, - err = RRConfigureOutputProperty(output->randr_output, *atom, FALSE, - TRUE, immutable, 2, atom_range); - if (err != 0) -- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, -+ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, - "RRConfigureOutputProperty error, %d\n", err); - - err = RRChangeOutputProperty(output->randr_output, *atom, XA_INTEGER, - 32, PropModeReplace, 1, &value, - FALSE, FALSE); - if (err != 0) -- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, -+ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, - "RRChangeOutputProperty error, %d\n", err); - } - -@@ -3303,7 +4386,7 @@ sna_output_create_resources(xf86OutputPtr output) - p->kprop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE, - p->num_atoms - 1, (INT32 *)&p->atoms[1]); - if (err != 0) { -- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, -+ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, - "RRConfigureOutputProperty error, %d\n", err); - } - -@@ -3315,7 +4398,7 @@ sna_output_create_resources(xf86OutputPtr output) - XA_ATOM, 32, PropModeReplace, 1, &p->atoms[j+1], - FALSE, FALSE); - if (err != 0) { -- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, -+ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, - "RRChangeOutputProperty error, %d\n", err); - } - } -@@ -3385,18 +4468,19 @@ sna_output_set_property(xf86OutputPtr output, Atom property, - if (value->type != XA_INTEGER || value->format != 32 || - value->size != 1) - return FALSE; -- val = *(uint32_t *)value->data; - -+ val = *(uint32_t *)value->data; - drmModeConnectorSetProperty(sna->kgem.fd, sna_output->id, - p->kprop->prop_id, (uint64_t)val); - return TRUE; - } else if (p->kprop->flags & DRM_MODE_PROP_ENUM) { -- Atom atom; -- const char *name; -- int j; -+ Atom atom; -+ const char *name; -+ int j; - - if (value->type != XA_ATOM || value->format != 32 || value->size != 1) - return FALSE; -+ - memcpy(&atom, value->data, 4); - name = NameForAtom(atom); - if (name == NULL) -@@ -3425,7 +4509,7 @@ static Bool - sna_output_get_property(xf86OutputPtr output, Atom property) - { - struct sna_output *sna_output = output->driver_private; -- int err; -+ int err, i, j; - - if (property == backlight_atom || property == backlight_deprecated_atom) { - INT32 val; -@@ -3449,7 +4533,7 @@ sna_output_get_property(xf86OutputPtr output, Atom property) - XA_INTEGER, 32, PropModeReplace, 1, &val, - FALSE, FALSE); - if (err != 0) { -- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, -+ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, - "RRChangeOutputProperty error, %d\n", err); - return FALSE; - } -@@ -3457,6 +4541,40 @@ sna_output_get_property(xf86OutputPtr output, Atom property) - return TRUE; - } - -+ for (i = 0; i < sna_output->num_props; i++) { -+ struct sna_property *p = &sna_output->props[i]; -+ -+ if (p->atoms == NULL || p->atoms[0] != property) -+ continue; -+ -+ if (sna_output->update_properties && output->scrn->vtSema) -+ update_properties(to_sna(output->scrn), sna_output); -+ -+ err = 0; -+ if (p->kprop->flags & DRM_MODE_PROP_RANGE) { -+ err = RRChangeOutputProperty(output->randr_output, -+ property, XA_INTEGER, 32, -+ PropModeReplace, 1, -+ &sna_output->prop_values[i], -+ FALSE, FALSE); -+ } else if (p->kprop->flags & DRM_MODE_PROP_ENUM) { -+ for (j = 0; j < p->kprop->count_enums; j++) { -+ if (p->kprop->enums[j].value == sna_output->prop_values[i]) -+ break; -+ } -+ err = RRChangeOutputProperty(output->randr_output, -+ property, XA_ATOM, 32, -+ PropModeReplace, 1, -+ &p->atoms[j+1], -+ FALSE, FALSE); -+ } -+ -+ if (err != 0) -+ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, -+ "RRChangeOutputProperty error, %d\n", err); -+ return TRUE; -+ } -+ - return FALSE; - } - -@@ -3500,47 +4618,11 @@ static const char * const output_names[] = { - /* DRM_MODE_CONNECTOR_TV */ "TV", - /* DRM_MODE_CONNECTOR_eDP */ "eDP", - /* DRM_MODE_CONNECTOR_VIRTUAL */ "Virtual", -- /* DRM_MODE_CONNECTOR_DSI */ "DSI" -+ /* DRM_MODE_CONNECTOR_DSI */ "DSI", -+ /* DRM_MODE_CONNECTOR_DPI */ "DPI" - }; - - static bool --sna_zaphod_match(const char *s, const char *output) --{ -- char t[20]; -- unsigned int i = 0; -- -- do { -- /* match any outputs in a comma list, stopping at whitespace */ -- switch (*s) { -- case '\0': -- t[i] = '\0'; -- return strcmp(t, output) == 0; -- -- case ',': -- t[i] ='\0'; -- if (strcmp(t, output) == 0) -- return TRUE; -- i = 0; -- break; -- -- case ' ': -- case '\t': -- case '\n': -- case '\r': -- break; -- -- default: -- t[i++] = *s; -- break; -- } -- -- s++; -- } while (i < sizeof(t)); -- -- return false; --} -- --static bool - output_ignored(ScrnInfoPtr scrn, const char *name) - { - char monitor_name[64]; -@@ -3572,14 +4654,21 @@ gather_encoders(struct sna *sna, uint32_t id, int count, - struct drm_mode_get_encoder enc; - uint32_t *ids = NULL; - -+ DBG(("%s(%d): expected count=%d\n", __FUNCTION__, id, count)); -+ - VG_CLEAR(compat_conn); -+ VG_CLEAR(enc); - memset(out, 0, sizeof(*out)); - - do { -- free(ids); -- ids = malloc(sizeof(*ids) * count); -- if (ids == 0) -+ uint32_t *nids; -+ -+ nids = realloc(ids, sizeof(*ids) * count); -+ if (nids == NULL) { -+ free(ids); - return false; -+ } -+ ids = nids; - - compat_conn.conn.connector_id = id; - compat_conn.conn.count_props = 0; -@@ -3593,12 +4682,14 @@ gather_encoders(struct sna *sna, uint32_t id, int count, - compat_conn.conn.count_encoders = count = 0; - } - -+ VG(VALGRIND_MAKE_MEM_DEFINED(ids, sizeof(uint32_t)*compat_conn.conn.count_encoders)); - if (count == compat_conn.conn.count_encoders) - break; - - count = compat_conn.conn.count_encoders; - } while (1); - -+ DBG(("%s(%d): gathering %d encoders\n", __FUNCTION__, id, count)); - for (count = 0; count < compat_conn.conn.count_encoders; count++) { - enc.encoder_id = ids[count]; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETENCODER, &enc)) { -@@ -3606,6 +4697,8 @@ gather_encoders(struct sna *sna, uint32_t id, int count, - count = 0; - break; - } -+ DBG(("%s(%d): encoder=%d, possible_crtcs=%x, possible_clones=%x\n", -+ __FUNCTION__, id, enc.encoder_id, enc.possible_crtcs, enc.possible_clones)); - out->possible_crtcs |= enc.possible_crtcs; - out->possible_clones |= enc.possible_clones; - -@@ -3731,6 +4824,116 @@ static int name_from_path(struct sna *sna, - return 0; - } - -+static char *fake_edid_name(xf86OutputPtr output) -+{ -+ struct sna *sna = to_sna(output->scrn); -+ const char *str, *colon; -+ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) -+ str = xf86GetOptValString(sna->Options, OPTION_EDID); -+#else -+ str = NULL; -+#endif -+ if (str == NULL) -+ return NULL; -+ -+ do { -+ colon = strchr(str, ':'); -+ if (colon == NULL) -+ return NULL; -+ -+ if (strncmp(str, output->name, colon-str) == 0 && -+ output->name[colon-str] == '\0') { -+ char *path; -+ int len; -+ -+ str = colon + 1; -+ colon = strchr(str, ','); -+ if (colon) -+ len = colon - str; -+ else -+ len = strlen(str); -+ -+ path = malloc(len + 1); -+ if (path == NULL) -+ return NULL; -+ -+ memcpy(path, str, len); -+ path[len] = '\0'; -+ return path; -+ } -+ -+ str = strchr(colon + 1, ','); -+ if (str == NULL) -+ return NULL; -+ -+ str++; -+ } while (1); -+} -+ -+static void -+sna_output_load_fake_edid(xf86OutputPtr output) -+{ -+ struct sna_output *sna_output = output->driver_private; -+ const char *filename; -+ FILE *file; -+ void *raw; -+ int size; -+ xf86MonPtr mon; -+ -+ filename = fake_edid_name(output); -+ if (filename == NULL) -+ return; -+ -+ file = fopen(filename, "rb"); -+ if (file == NULL) -+ goto err; -+ -+ fseek(file, 0, SEEK_END); -+ size = ftell(file); -+ if (size % 128) { -+ fclose(file); -+ goto err; -+ } -+ -+ raw = malloc(size); -+ if (raw == NULL) { -+ fclose(file); -+ free(raw); -+ goto err; -+ } -+ -+ fseek(file, 0, SEEK_SET); -+ if (fread(raw, size, 1, file) != 1) { -+ fclose(file); -+ free(raw); -+ goto err; -+ } -+ fclose(file); -+ -+ mon = xf86InterpretEDID(output->scrn->scrnIndex, raw); -+ if (mon == NULL) { -+ free(raw); -+ goto err; -+ } -+ -+ if (mon && size > 128) -+ mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; -+ -+ sna_output->fake_edid_mon = mon; -+ sna_output->fake_edid_raw = raw; -+ -+ xf86DrvMsg(output->scrn->scrnIndex, X_CONFIG, -+ "Loading EDID from \"%s\" for output %s\n", -+ filename, output->name); -+ return; -+ -+err: -+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, -+ "Could not read EDID file \"%s\" for output %s\n", -+ filename, output->name); -+} -+ - static int - sna_output_add(struct sna *sna, unsigned id, unsigned serial) - { -@@ -3765,6 +4968,7 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) - return -1; - } - assert(compat_conn.conn.connector_id == id); -+ DBG(("%s(%d): has %d associated encoders\n", __FUNCTION__, id, compat_conn.conn.count_encoders)); - - if (compat_conn.conn.connector_type < ARRAY_SIZE(output_names)) - output_name = output_names[compat_conn.conn.connector_type]; -@@ -3813,34 +5017,43 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) - } - - if (is_zaphod(scrn)) { -- const char *str; -+ unsigned zaphod_crtcs; - -- str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); -- if (str && !sna_zaphod_match(str, name)) { -- DBG(("%s: zaphod mismatch, want %s, have %s\n", __FUNCTION__, str, name)); -+ if (!sna_zaphod_match(sna, name)) { -+ DBG(("%s: zaphod mismatch, want %s, have %s\n", -+ __FUNCTION__, -+ xf86GetOptValString(sna->Options, OPTION_ZAPHOD) ?: "???", -+ name)); - return 0; - } - -- if ((possible_crtcs & (1 << scrn->confScreen->device->screen)) == 0) { -- if (str) { -- xf86DrvMsg(scrn->scrnIndex, X_ERROR, -- "%s is an invalid output for screen (pipe) %d\n", -- name, scrn->confScreen->device->screen); -- return -1; -- } else -- return 0; -+ zaphod_crtcs = get_zaphod_crtcs(sna); -+ possible_crtcs &= zaphod_crtcs; -+ if (possible_crtcs == 0) { -+ xf86DrvMsg(scrn->scrnIndex, X_ERROR, -+ "%s is an invalid output for screen %d\n", -+ name, scrn->confScreen->device->screen); -+ return -1; - } - -- possible_crtcs = 1; -+ possible_crtcs >>= ffs(zaphod_crtcs) - 1; - } - - sna_output = calloc(sizeof(struct sna_output), 1); - if (!sna_output) - return -1; - -+ sna_output->connector_type = compat_conn.conn.connector_type; -+ sna_output->connector_type_id = compat_conn.conn.connector_type_id; - sna_output->num_props = compat_conn.conn.count_props; - sna_output->prop_ids = malloc(sizeof(uint32_t)*compat_conn.conn.count_props); - sna_output->prop_values = malloc(sizeof(uint64_t)*compat_conn.conn.count_props); -+ if (sna_output->prop_ids == NULL || sna_output->prop_values == NULL) { -+ free(sna_output->prop_ids); -+ free(sna_output->prop_values); -+ free(sna_output); -+ return -1; -+ } - - compat_conn.conn.count_encoders = 0; - -@@ -3865,16 +5078,16 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) - /* Construct name from topology, and recheck if output is acceptable */ - path = name_from_path(sna, sna_output, name); - if (path) { -- const char *str; -- - if (output_ignored(scrn, name)) { - len = 0; - goto skip; - } - -- str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); -- if (str && !sna_zaphod_match(str, name)) { -- DBG(("%s: zaphod mismatch, want %s, have %s\n", __FUNCTION__, str, name)); -+ if (is_zaphod(scrn) && !sna_zaphod_match(sna, name)) { -+ DBG(("%s: zaphod mismatch, want %s, have %s\n", -+ __FUNCTION__, -+ xf86GetOptValString(sna->Options, OPTION_ZAPHOD) ?: "???", -+ name)); - len = 0; - goto skip; - } -@@ -3889,7 +5102,6 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) - if (strcmp(output->name, name) == 0) { - assert(output->scrn == scrn); - assert(output->funcs == &sna_output_funcs); -- assert(to_sna_output(output)->id == 0); - sna_output_destroy(output); - goto reset; - } -@@ -3935,6 +5147,8 @@ reset: - sna_output->id = compat_conn.conn.connector_id; - sna_output->is_panel = is_panel(compat_conn.conn.connector_type); - sna_output->edid_idx = find_property(sna, sna_output, "EDID"); -+ sna_output->link_status_idx = -+ find_property(sna, sna_output, "link-status"); - if (find_property(sna, sna_output, "scaling mode") != -1) - sna_output->add_default_modes = - xf86ReturnOptValBool(output->options, OPTION_DEFAULT_MODES, TRUE); -@@ -3945,10 +5159,8 @@ reset: - sna_output->dpms_mode = sna_output->prop_values[i]; - DBG(("%s: found 'DPMS' (idx=%d, id=%d), initial value=%d\n", - __FUNCTION__, i, sna_output->dpms_id, sna_output->dpms_mode)); -- } else { -- sna_output->dpms_id = -1; -+ } else - sna_output->dpms_mode = DPMSModeOff; -- } - - sna_output->possible_encoders = possible_encoders; - sna_output->attached_encoders = attached_encoders; -@@ -3963,12 +5175,13 @@ reset: - sna_output->base = output; - - backlight_init(&sna_output->backlight); -- if (sna_output->is_panel) -- sna_output_backlight_init(output); -+ sna_output_backlight_init(output); - - output->possible_crtcs = possible_crtcs & count_to_mask(sna->mode.num_real_crtc); - output->interlaceAllowed = TRUE; - -+ sna_output_load_fake_edid(output); -+ - if (serial) { - if (output->randr_output == NULL) { - output->randr_output = RROutputCreate(xf86ScrnToScreen(scrn), name, len, output); -@@ -3976,6 +5189,7 @@ reset: - goto cleanup; - } - -+ RROutputChanged(output->randr_output, TRUE); - sna_output_create_resources(output); - RRPostPendingProperties(output->randr_output); - -@@ -4009,38 +5223,6 @@ skip: - return len; - } - --static void sna_output_del(xf86OutputPtr output) --{ -- ScrnInfoPtr scrn = output->scrn; -- xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); -- int i; -- -- DBG(("%s(%s)\n", __FUNCTION__, output->name)); -- assert(to_sna_output(output)); -- -- RROutputDestroy(output->randr_output); -- sna_output_destroy(output); -- -- while (output->probed_modes) -- xf86DeleteMode(&output->probed_modes, output->probed_modes); -- -- free(output); -- -- for (i = 0; i < config->num_output; i++) -- if (config->output[i] == output) -- break; -- assert(i < to_sna(scrn)->mode.num_real_output); -- DBG(("%s: removing output #%d of %d\n", -- __FUNCTION__, i, to_sna(scrn)->mode.num_real_output)); -- -- for (; i < config->num_output; i++) { -- config->output[i] = config->output[i+1]; -- config->output[i]->possible_clones >>= 1; -- } -- config->num_output--; -- to_sna(scrn)->mode.num_real_output--; --} -- - static int output_rank(const void *A, const void *B) - { - const xf86OutputPtr *a = A; -@@ -4058,6 +5240,7 @@ static void sort_config_outputs(struct sna *sna) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - qsort(config->output, sna->mode.num_real_output, sizeof(*config->output), output_rank); -+ config->compat_output = 0; /* make sure it is a sane value */ - sna_mode_compute_possible_outputs(sna); - } - -@@ -4080,11 +5263,15 @@ static bool disable_unused_crtc(struct sna *sna) - bool update = false; - int o, c; - -+ DBG(("%s\n", __FUNCTION__)); -+ - for (c = 0; c < sna->mode.num_real_crtc; c++) { - xf86CrtcPtr crtc = config->crtc[c]; - -- if (!crtc->enabled) -+ if (!crtc->enabled) { -+ sna_crtc_disable(crtc, false); - continue; -+ } - - for (o = 0; o < sna->mode.num_real_output; o++) { - xf86OutputPtr output = config->output[o]; -@@ -4094,7 +5281,7 @@ static bool disable_unused_crtc(struct sna *sna) - - if (o == sna->mode.num_real_output) { - DBG(("%s: CRTC:%d was enabled with no outputs\n", -- __FUNCTION__, to_sna_crtc(crtc)->id)); -+ __FUNCTION__, sna_crtc_id(crtc))); - crtc->enabled = false; - update = true; - } -@@ -4108,17 +5295,145 @@ static bool disable_unused_crtc(struct sna *sna) - return update; - } - --void sna_mode_discover(struct sna *sna) -+bool sna_mode_find_hotplug_connector(struct sna *sna, unsigned id) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int i; -+ -+ for (i = 0; i < sna->mode.num_real_output; i++) { -+ struct sna_output *output = to_sna_output(config->output[i]); -+ if (output->id == id) { -+ output->reprobe = true; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static bool -+output_retrain_link(struct sna *sna, struct sna_output *output) -+{ -+ struct sna_crtc *crtc = to_sna_crtc(output->base->crtc); -+ int crtc_x = crtc->offset & 0xffff; -+ int crtc_y = crtc->offset >> 16; -+ -+ return sna_crtc_flip(sna, crtc, crtc->bo, crtc_x, crtc_y); -+} -+ -+static bool -+output_check_link(struct sna *sna, struct sna_output *output) -+{ -+ uint64_t link_status; -+ -+ if (!output->base->crtc) -+ return true; -+ -+ if (output->link_status_idx == -1) -+ return true; -+ -+#define LINK_STATUS_GOOD 0 -+ link_status = output->prop_values[output->link_status_idx]; -+ DBG(("%s: link_status=%d\n", __FUNCTION__, link_status)); -+ if (link_status == LINK_STATUS_GOOD) -+ return true; -+ -+ /* Perform a modeset as required for "link-status" = BAD */ -+ if (!output_retrain_link(sna, output)) -+ return false; -+ -+ /* Query the "link-status" again to confirm the modeset */ -+ update_properties(sna, output); -+ -+ link_status = output->prop_values[output->link_status_idx]; -+ DBG(("%s: link_status=%d after modeset\n", __FUNCTION__, link_status)); -+ return link_status == LINK_STATUS_GOOD; -+} -+ -+static bool -+output_check_status(struct sna *sna, struct sna_output *output) -+{ -+ union compat_mode_get_connector compat_conn; -+ struct drm_mode_modeinfo dummy; -+ struct drm_mode_get_blob blob; -+ xf86OutputStatus status; -+ char *edid; -+ -+ VG_CLEAR(compat_conn); -+ -+ compat_conn.conn.connection = -1; -+ compat_conn.conn.connector_id = output->id; -+ compat_conn.conn.count_modes = 1; /* skip detect */ -+ compat_conn.conn.modes_ptr = (uintptr_t)&dummy; -+ compat_conn.conn.count_encoders = 0; -+ compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; -+ compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; -+ compat_conn.conn.count_props = output->num_props; -+ -+ if (drmIoctl(sna->kgem.fd, -+ DRM_IOCTL_MODE_GETCONNECTOR, -+ &compat_conn.conn) == 0) -+ output->update_properties = false; -+ -+ if (!output_check_link(sna, output)) -+ return false; -+ -+ if (output->reprobe) -+ return false; -+ -+ switch (compat_conn.conn.connection) { -+ case DRM_MODE_CONNECTED: -+ status = XF86OutputStatusConnected; -+ break; -+ case DRM_MODE_DISCONNECTED: -+ status = XF86OutputStatusDisconnected; -+ break; -+ default: -+ case DRM_MODE_UNKNOWNCONNECTION: -+ status = XF86OutputStatusUnknown; -+ break; -+ } -+ if (output->status != status) -+ return false; -+ -+ if (status != XF86OutputStatusConnected) -+ return true; -+ -+ if (output->num_modes != compat_conn.conn.count_modes) -+ return false; -+ -+ if (output->edid_len == 0) -+ return false; -+ -+ edid = alloca(output->edid_len); -+ -+ VG_CLEAR(blob); -+ blob.blob_id = output->prop_values[output->edid_idx]; -+ blob.length = output->edid_len; -+ blob.data = (uintptr_t)edid; -+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) -+ return false; -+ -+ if (blob.length != output->edid_len) -+ return false; -+ -+ return memcmp(edid, output->edid_raw, output->edid_len) == 0; -+} -+ -+void sna_mode_discover(struct sna *sna, bool tell) - { - ScreenPtr screen = xf86ScrnToScreen(sna->scrn); - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ bool force = sna->flags & SNA_REPROBE; - struct drm_mode_card_res res; -- uint32_t connectors[32]; -+ uint32_t connectors[32], now; - unsigned changed = 0; - unsigned serial; - int i, j; - - DBG(("%s()\n", __FUNCTION__)); -+ sna->flags &= ~SNA_REPROBE; -+ - VG_CLEAR(connectors); - - memset(&res, 0, sizeof(res)); -@@ -4128,10 +5443,11 @@ void sna_mode_discover(struct sna *sna) - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETRESOURCES, &res)) - return; - -- DBG(("%s: now %d (was %d) connectors\n", __FUNCTION__, -- res.count_connectors, sna->mode.num_real_output)); -+ DBG(("%s: now %d (was %d) connectors, %d encoders, %d crtc\n", __FUNCTION__, -+ res.count_connectors, sna->mode.num_real_output, -+ res.count_encoders, res.count_crtcs)); - if (res.count_connectors > 32) -- return; -+ res.count_connectors = 32; - - assert(sna->mode.num_real_crtc == res.count_crtcs || is_zaphod(sna->scrn)); - assert(sna->mode.max_crtc_width == res.max_width); -@@ -4142,6 +5458,11 @@ void sna_mode_discover(struct sna *sna) - if (serial == 0) - serial = ++sna->mode.serial; - -+ if (force) { -+ changed = 4; -+ now = 0; -+ } else -+ now = GetTimeInMillis(); - for (i = 0; i < res.count_connectors; i++) { - DBG(("%s: connector[%d] = %d\n", __FUNCTION__, i, connectors[i])); - for (j = 0; j < sna->mode.num_real_output; j++) { -@@ -4161,32 +5482,42 @@ void sna_mode_discover(struct sna *sna) - - for (i = 0; i < sna->mode.num_real_output; i++) { - xf86OutputPtr output = config->output[i]; -+ struct sna_output *sna_output = to_sna_output(output); - -- if (to_sna_output(output)->id == 0) -+ if (sna_output->id == 0) - continue; - -- if (to_sna_output(output)->serial == serial) -+ if (sna_output->serial == serial) { -+ if (output_check_status(sna, sna_output)) { -+ DBG(("%s: output %s (id=%d), retained state\n", -+ __FUNCTION__, output->name, sna_output->id)); -+ sna_output->last_detect = now; -+ } else { -+ DBG(("%s: output %s (id=%d), changed state, reprobing\n", -+ __FUNCTION__, output->name, sna_output->id)); -+ sna_output->hotplug_count++; -+ sna_output->last_detect = 0; -+ changed |= 4; -+ } - continue; -+ } - - DBG(("%s: removing output %s (id=%d), serial=%u [now %u]\n", -- __FUNCTION__, output->name, to_sna_output(output)->id, -- to_sna_output(output)->serial, serial)); -+ __FUNCTION__, output->name, sna_output->id, -+ sna_output->serial, serial)); - - xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, -- "%s output %s\n", -- sna->flags & SNA_REMOVE_OUTPUTS ? "Removed" : "Disabled", -+ "Disabled output %s\n", - output->name); -- if (sna->flags & SNA_REMOVE_OUTPUTS) { -- sna_output_del(output); -- i--; -- } else { -- to_sna_output(output)->id = 0; -- output->crtc = NULL; -- } -+ sna_output->id = 0; -+ sna_output->last_detect = 0; -+ output->crtc = NULL; -+ RROutputChanged(output->randr_output, TRUE); - changed |= 2; - } - -- if (changed) { -+ /* Have the list of available outputs been updated? */ -+ if (changed & 3) { - DBG(("%s: outputs changed, broadcasting\n", __FUNCTION__)); - - sna_mode_set_primary(sna); -@@ -4200,6 +5531,51 @@ void sna_mode_discover(struct sna *sna) - - xf86RandR12TellChanged(screen); - } -+ -+ /* If anything has changed, refresh the RandR information. -+ * Note this could recurse once from udevless RRGetInfo() probes, -+ * but only once. -+ */ -+ if (changed && tell) -+ RRGetInfo(screen, TRUE); -+} -+ -+/* Since we only probe the current mode on startup, we may not have the full -+ * list of modes available until the user explicitly requests them. Fake a -+ * hotplug event after a second after starting to fill in any missing modes. -+ */ -+static CARD32 sna_mode_coldplug(OsTimerPtr timer, CARD32 now, void *data) -+{ -+ struct sna *sna = data; -+ ScreenPtr screen = xf86ScrnToScreen(sna->scrn); -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ bool reprobe = false; -+ int i; -+ -+ DBG(("%s()\n", __FUNCTION__)); -+ -+ for (i = 0; i < sna->mode.num_real_output; i++) { -+ xf86OutputPtr output = config->output[i]; -+ struct sna_output *sna_output = to_sna_output(output); -+ -+ if (sna_output->id == 0) -+ continue; -+ if (sna_output->last_detect) -+ continue; -+ if (output->status == XF86OutputStatusDisconnected) -+ continue; -+ -+ DBG(("%s: output %s connected, needs reprobe\n", -+ __FUNCTION__, output->name)); -+ reprobe = true; -+ } -+ -+ if (reprobe) { -+ RRGetInfo(screen, TRUE); -+ RRTellChanged(screen); -+ } -+ free(timer); -+ return 0; - } - - static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) -@@ -4208,7 +5584,7 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) - - DBG(("%s\n", __FUNCTION__)); - -- if (wedged(sna)) -+ if (wedged(sna) || isGPU(sna->scrn)) - return; - - old_priv = sna_pixmap_force_to_gpu(old, MOVE_READ); -@@ -4220,12 +5596,19 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) - return; - - if (old_priv->clear) { -- (void)sna->render.fill_one(sna, new, new_priv->gpu_bo, -- old_priv->clear_color, -- 0, 0, -- new->drawable.width, -- new->drawable.height, -- GXcopy); -+ bool ok = false; -+ if (!wedged(sna)) -+ ok = sna->render.fill_one(sna, new, new_priv->gpu_bo, -+ old_priv->clear_color, -+ 0, 0, -+ new->drawable.width, -+ new->drawable.height, -+ GXcopy); -+ if (!ok) { -+ void *ptr = kgem_bo_map__gtt(&sna->kgem, new_priv->gpu_bo); -+ if (ptr) -+ memset(ptr, 0, new_priv->gpu_bo->pitch*new->drawable.height); -+ } - new_priv->clear = true; - new_priv->clear_color = old_priv->clear_color; - } else { -@@ -4281,11 +5664,18 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) - __FUNCTION__, box.x2, box.y2, sx, sy, dx, dy)); - - if (box.x2 != new->drawable.width || box.y2 != new->drawable.height) { -- (void)sna->render.fill_one(sna, new, new_priv->gpu_bo, 0, -- 0, 0, -- new->drawable.width, -- new->drawable.height, -- GXclear); -+ bool ok = false; -+ if (!wedged(sna)) -+ ok = sna->render.fill_one(sna, new, new_priv->gpu_bo, 0, -+ 0, 0, -+ new->drawable.width, -+ new->drawable.height, -+ GXclear); -+ if (!ok) { -+ void *ptr = kgem_bo_map__gtt(&sna->kgem, new_priv->gpu_bo); -+ if (ptr) -+ memset(ptr, 0, new_priv->gpu_bo->pitch*new->drawable.height); -+ } - } - (void)sna->render.copy_boxes(sna, GXcopy, - &old->drawable, old_priv->gpu_bo, sx, sy, -@@ -4302,7 +5692,7 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); - struct sna *sna = to_sna(scrn); -- ScreenPtr screen = scrn->pScreen; -+ ScreenPtr screen = xf86ScrnToScreen(scrn); - PixmapPtr new_front; - int i; - -@@ -4337,9 +5727,20 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) - for (i = 0; i < sna->mode.num_real_crtc; i++) - sna_crtc_disable_shadow(sna, to_sna_crtc(config->crtc[i])); - assert(sna->mode.shadow_active == 0); -+ assert(!sna->mode.shadow_enabled); - assert(sna->mode.shadow_damage == NULL); - assert(sna->mode.shadow == NULL); - -+ /* Flush pending shadow updates */ -+ if (sna->mode.flip_active) { -+ DBG(("%s: waiting for %d outstanding TearFree flips\n", -+ __FUNCTION__, sna->mode.flip_active)); -+ while (sna->mode.flip_active && sna_mode_wait_for_event(sna)) -+ sna_mode_wakeup(sna); -+ } -+ -+ /* Cancel a pending [un]flip (as the pixmaps no longer match) */ -+ sna_present_cancel_flip(sna); - copy_front(sna, sna->front, new_front); - - screen->SetScreenPixmap(new_front); -@@ -4351,14 +5752,6 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) - scrn->virtualY = height; - scrn->displayWidth = width; - -- /* Flush pending shadow updates */ -- if (sna->mode.flip_active) { -- DBG(("%s: waiting for %d outstanding TearFree flips\n", -- __FUNCTION__, sna->mode.flip_active)); -- while (sna->mode.flip_active && sna_mode_wait_for_event(sna)) -- sna_mode_wakeup(sna); -- } -- - /* Only update the CRTCs if we are in control */ - if (!scrn->vtSema) - return TRUE; -@@ -4371,7 +5764,7 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) - continue; - - if (!__sna_crtc_set_mode(crtc)) -- sna_crtc_disable(crtc); -+ sna_crtc_disable(crtc, false); - } - - sna_mode_wakeup(sna); -@@ -4381,19 +5774,6 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) - } - - /* cursor handling */ --struct sna_cursor { -- struct sna_cursor *next; -- uint32_t *image; -- Rotation rotation; -- int ref; -- int size; -- int last_width; -- int last_height; -- unsigned handle; -- unsigned serial; -- unsigned alloc; --}; -- - static void - rotate_coord(Rotation rotation, int size, - int x_dst, int y_dst, -@@ -4429,36 +5809,6 @@ rotate_coord(Rotation rotation, int size, - *y_src = y_dst; - } - --static void --rotate_coord_back(Rotation rotation, int size, int *x, int *y) --{ -- int t; -- -- if (rotation & RR_Reflect_X) -- *x = size - *x - 1; -- if (rotation & RR_Reflect_Y) -- *y = size - *y - 1; -- -- switch (rotation & 0xf) { -- case RR_Rotate_0: -- break; -- case RR_Rotate_90: -- t = *x; -- *x = *y; -- *y = size - t - 1; -- break; -- case RR_Rotate_180: -- *x = size - *x - 1; -- *y = size - *y - 1; -- break; -- case RR_Rotate_270: -- t = *x; -- *x = size - *y - 1; -- *y = t; -- break; -- } --} -- - static struct sna_cursor *__sna_create_cursor(struct sna *sna, int size) - { - struct sna_cursor *c; -@@ -4519,6 +5869,17 @@ static uint32_t *get_cursor_argb(CursorPtr c) - #endif - } - -+static int __cursor_size(int width, int height) -+{ -+ int i, size; -+ -+ i = MAX(width, height); -+ for (size = 64; size < i; size <<= 1) -+ ; -+ -+ return size; -+} -+ - static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - { - struct sna_cursor *cursor; -@@ -4526,6 +5887,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - const uint32_t *argb; - uint32_t *image; - int width, height, pitch, size, x, y; -+ bool transformed; - Rotation rotation; - - assert(sna->cursor.ref); -@@ -4537,8 +5899,8 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - cursor ? cursor->serial : 0, - sna->cursor.serial)); - if (cursor && cursor->serial == sna->cursor.serial) { -- assert(cursor->size == sna->cursor.size); -- assert(cursor->rotation == crtc->transform_in_use ? crtc->rotation : RR_Rotate_0); -+ assert(cursor->size == sna->cursor.size || cursor->transformed); -+ assert(cursor->rotation == (!to_sna_crtc(crtc)->cursor_transform && crtc->transform_in_use) ? crtc->rotation : RR_Rotate_0); - assert(cursor->ref); - return cursor; - } -@@ -4550,22 +5912,81 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - sna->cursor.serial, - get_cursor_argb(sna->cursor.ref) != NULL)); - -- rotation = crtc->transform_in_use ? crtc->rotation : RR_Rotate_0; -+ transformed = to_sna_crtc(crtc)->cursor_transform; -+ rotation = (!transformed && crtc->transform_in_use) ? crtc->rotation : RR_Rotate_0; -+ -+ if (transformed) { -+ struct pixman_box16 box; -+ -+ box.x1 = box.y1 = 0; -+ box.x2 = sna->cursor.ref->bits->width; -+ box.y2 = sna->cursor.ref->bits->height; - -- if (sna->cursor.use_gtt) { /* Don't allow phys cursor sharing */ -+ pixman_f_transform_bounds(&crtc->f_crtc_to_framebuffer, &box); -+ size = __cursor_size(box.x2 - box.x1, box.y2 - box.y1); -+ __DBG(("%s: transformed cursor %dx%d -> %dx%d\n", -+ __FUNCTION__ , -+ sna->cursor.ref->bits->width, -+ sna->cursor.ref->bits->height, -+ box.x2 - box.x1, box.y2 - box.y1)); -+ } else -+ size = sna->cursor.size; -+ -+ if (crtc->transform_in_use) { -+ RRTransformPtr T = NULL; -+ struct pixman_vector v; -+ -+ if (crtc->transformPresent) { -+ T = &crtc->transform; -+ -+ /* Cancel any translation from this affine -+ * transformation. We just want to rotate and scale -+ * the cursor image. -+ */ -+ v.vector[0] = 0; -+ v.vector[1] = 0; -+ v.vector[2] = pixman_fixed_1; -+ pixman_transform_point(&crtc->transform.transform, &v); -+ } -+ -+ RRTransformCompute(0, 0, size, size, crtc->rotation, T, NULL, -+ &to_sna_crtc(crtc)->cursor_to_fb, -+ &to_sna_crtc(crtc)->fb_to_cursor); -+ if (T) -+ pixman_f_transform_translate( -+ &to_sna_crtc(crtc)->cursor_to_fb, -+ &to_sna_crtc(crtc)->fb_to_cursor, -+ -pixman_fixed_to_double(v.vector[0]), -+ -pixman_fixed_to_double(v.vector[1])); -+ -+ __DBG(("%s: cursor_to_fb [%f %f %f, %f %f %f, %f %f %f]\n", -+ __FUNCTION__, -+ to_sna_crtc(crtc)->cursor_to_fb.m[0][0], -+ to_sna_crtc(crtc)->cursor_to_fb.m[0][1], -+ to_sna_crtc(crtc)->cursor_to_fb.m[0][2], -+ to_sna_crtc(crtc)->cursor_to_fb.m[1][0], -+ to_sna_crtc(crtc)->cursor_to_fb.m[1][1], -+ to_sna_crtc(crtc)->cursor_to_fb.m[1][2], -+ to_sna_crtc(crtc)->cursor_to_fb.m[2][0], -+ to_sna_crtc(crtc)->cursor_to_fb.m[2][1], -+ to_sna_crtc(crtc)->cursor_to_fb.m[2][2])); -+ } -+ -+ /* Don't allow phys cursor sharing */ -+ if (sna->cursor.use_gtt && !transformed) { - for (cursor = sna->cursor.cursors; cursor; cursor = cursor->next) { -- if (cursor->serial == sna->cursor.serial && cursor->rotation == rotation) { -+ if (cursor->serial == sna->cursor.serial && -+ cursor->rotation == rotation && -+ !cursor->transformed) { - __DBG(("%s: reusing handle=%d, serial=%d, rotation=%d, size=%d\n", - __FUNCTION__, cursor->handle, cursor->serial, cursor->rotation, cursor->size)); - assert(cursor->size == sna->cursor.size); - return cursor; - } - } -- -- cursor = to_sna_crtc(crtc)->cursor; - } - -- size = sna->cursor.size; -+ cursor = to_sna_crtc(crtc)->cursor; - if (cursor && cursor->alloc < 4*size*size) - cursor = NULL; - -@@ -4577,7 +5998,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - } - } - -- width = sna->cursor.ref->bits->width; -+ width = sna->cursor.ref->bits->width; - height = sna->cursor.ref->bits->height; - source = sna->cursor.ref->bits->source; - mask = sna->cursor.ref->bits->mask; -@@ -4585,7 +6006,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - pitch = BitmapBytePad(width); - - image = cursor->image; -- if (image == NULL) { -+ if (image == NULL || transformed) { - image = sna->cursor.scratch; - cursor->last_width = cursor->last_height = size; - } -@@ -4616,6 +6037,21 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - mask += pitch; - source += pitch; - } -+ if (transformed) { -+ __DBG(("%s: Applying affine BLT to bitmap\n", __FUNCTION__)); -+ affine_blt(image, cursor->image, 32, -+ 0, 0, width, height, size * 4, -+ 0, 0, size, size, size * 4, -+ &to_sna_crtc(crtc)->cursor_to_fb); -+ image = cursor->image; -+ } -+ } else if (transformed) { -+ __DBG(("%s: Applying affine BLT to ARGB\n", __FUNCTION__)); -+ affine_blt(argb, cursor->image, 32, -+ 0, 0, width, height, width * 4, -+ 0, 0, size, size, size * 4, -+ &to_sna_crtc(crtc)->cursor_to_fb); -+ image = cursor->image; - } else - memcpy_blt(argb, image, 32, - width * 4, size * 4, -@@ -4662,9 +6098,16 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) - - cursor->size = size; - cursor->rotation = rotation; -+ cursor->transformed = transformed; - cursor->serial = sna->cursor.serial; -- cursor->last_width = width; -- cursor->last_height = height; -+ if (transformed) { -+ /* mark the transformed rectangle as dirty, not input */ -+ cursor->last_width = size; -+ cursor->last_height = size; -+ } else { -+ cursor->last_width = width; -+ cursor->last_height = height; -+ } - return cursor; - } - -@@ -4674,40 +6117,55 @@ sna_realize_cursor(xf86CursorInfoPtr info, CursorPtr cursor) - return NULL; - } - --#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) --static inline int sigio_block(void) --{ -- OsBlockSIGIO(); -- return 0; --} --static inline void sigio_unblock(int was_blocked) -+static void enable_fb_access(ScrnInfoPtr scrn, int state) - { -- OsReleaseSIGIO(); -- (void)was_blocked; --} -+ scrn->EnableDisableFBAccess( -+#ifdef XF86_HAS_SCRN_CONV -+ scrn, - #else --#include --static inline int sigio_block(void) -+ scrn->scrnIndex, -+#endif -+ state); -+} -+ -+ -+static void __restore_swcursor(ScrnInfoPtr scrn) - { -- return xf86BlockSIGIO(); -+ DBG(("%s: attempting to restore SW cursor\n", __FUNCTION__)); -+ enable_fb_access(scrn, FALSE); -+ enable_fb_access(scrn, TRUE); -+ -+ RemoveBlockAndWakeupHandlers((void *)__restore_swcursor, -+ (void *)NoopDDA, -+ scrn); - } --static inline void sigio_unblock(int was_blocked) -+ -+static void restore_swcursor(struct sna *sna) - { -- xf86UnblockSIGIO(was_blocked); -+ sna->cursor.info->HideCursor(sna->scrn); -+ -+ /* XXX Force the cursor to be restored (avoiding recursion) */ -+ FreeCursor(sna->cursor.ref, None); -+ sna->cursor.ref = NULL; -+ -+ RegisterBlockAndWakeupHandlers((void *)__restore_swcursor, -+ (void *)NoopDDA, -+ sna->scrn); - } --#endif - - static void - sna_show_cursors(ScrnInfoPtr scrn) - { - xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); - struct sna *sna = to_sna(scrn); -+ struct kmsg kmsg; - int sigio, c; - - DBG(("%s: cursor?=%d\n", __FUNCTION__, sna->cursor.ref != NULL)); - if (sna->cursor.ref == NULL) - return; - -+ kmsg_open(&kmsg); - sigio = sigio_block(); - for (c = 0; c < sna->mode.num_real_crtc; c++) { - xf86CrtcPtr crtc = xf86_config->crtc[c]; -@@ -4721,7 +6179,7 @@ sna_show_cursors(ScrnInfoPtr scrn) - - if (!crtc->cursor_in_range) { - DBG(("%s: skipping cursor outside CRTC (pipe=%d)\n", -- __FUNCTION__, sna_crtc->pipe)); -+ __FUNCTION__, sna_crtc_pipe(crtc))); - continue; - } - -@@ -4729,20 +6187,21 @@ sna_show_cursors(ScrnInfoPtr scrn) - if (cursor == NULL || - (sna_crtc->cursor == cursor && sna_crtc->last_cursor_size == cursor->size)) { - DBG(("%s: skipping cursor already show on CRTC (pipe=%d)\n", -- __FUNCTION__, sna_crtc->pipe)); -+ __FUNCTION__, sna_crtc_pipe(crtc))); - continue; - } - - DBG(("%s: CRTC pipe=%d, handle->%d\n", __FUNCTION__, -- sna_crtc->pipe, cursor->handle)); -+ sna_crtc_pipe(crtc), cursor->handle)); - - VG_CLEAR(arg); - arg.flags = DRM_MODE_CURSOR_BO; -- arg.crtc_id = sna_crtc->id; -+ arg.crtc_id = __sna_crtc_id(sna_crtc); - arg.width = arg.height = cursor->size; - arg.handle = cursor->handle; - -- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { -+ if (!FAIL_CURSOR_IOCTL && -+ drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { - if (sna_crtc->cursor) { - assert(sna_crtc->cursor->ref > 0); - sna_crtc->cursor->ref--; -@@ -4750,10 +6209,18 @@ sna_show_cursors(ScrnInfoPtr scrn) - cursor->ref++; - sna_crtc->cursor = cursor; - sna_crtc->last_cursor_size = cursor->size; -+ } else { -+ ERR(("%s: failed to show cursor on CRTC:%d [pipe=%d], disabling hwcursor: errno=%d\n", -+ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), errno)); -+ sna->cursor.disable = true; - } - } - sigio_unblock(sigio); - sna->cursor.active = true; -+ kmsg_close(&kmsg, sna->cursor.disable); -+ -+ if (unlikely(sna->cursor.disable)) -+ restore_swcursor(sna); - } - - static void -@@ -4789,24 +6256,45 @@ static void - sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc) - { - struct drm_mode_cursor arg; -+ int sigio; - - if (!crtc->cursor) - return; - -- DBG(("%s: CRTC:%d, handle=%d\n", __FUNCTION__, crtc->id, crtc->cursor->handle)); -- assert(crtc->cursor->ref); -+ sigio = sigio_block(); -+ if (crtc->cursor) { -+ DBG(("%s: CRTC:%d, handle=%d\n", __FUNCTION__, __sna_crtc_id(crtc), crtc->cursor->handle)); -+ assert(crtc->cursor->ref > 0); -+ crtc->cursor->ref--; -+ crtc->cursor = NULL; -+ crtc->last_cursor_size = 0; - -- VG_CLEAR(arg); -- arg.flags = DRM_MODE_CURSOR_BO; -- arg.crtc_id = crtc->id; -- arg.width = arg.height = 0; -- arg.handle = 0; -+ VG_CLEAR(arg); -+ arg.flags = DRM_MODE_CURSOR_BO; -+ arg.crtc_id = __sna_crtc_id(crtc); -+ arg.width = arg.height = 0; -+ arg.handle = 0; - -- (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); -- assert(crtc->cursor->ref > 0); -- crtc->cursor->ref--; -- crtc->cursor = NULL; -- crtc->last_cursor_size = 0; -+ (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); -+ } -+ sigio_unblock(sigio); -+} -+ -+static void -+sna_disable_cursors(ScrnInfoPtr scrn) -+{ -+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); -+ struct sna *sna = to_sna(scrn); -+ int sigio, c; -+ -+ DBG(("%s\n", __FUNCTION__)); -+ -+ sigio = sigio_block(); -+ for (c = 0; c < sna->mode.num_real_crtc; c++) { -+ assert(to_sna_crtc(xf86_config->crtc[c])); -+ sna_crtc_disable_cursor(sna, to_sna_crtc(xf86_config->crtc[c])); -+ } -+ sigio_unblock(sigio); - } - - static void -@@ -4852,6 +6340,7 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) - { - xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); - struct sna *sna = to_sna(scrn); -+ struct kmsg kmsg; - int sigio, c; - - __DBG(("%s(%d, %d), cursor? %d\n", __FUNCTION__, -@@ -4859,6 +6348,7 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) - if (sna->cursor.ref == NULL) - return; - -+ kmsg_open(&kmsg); - sigio = sigio_block(); - sna->cursor.last_x = x; - sna->cursor.last_y = y; -@@ -4876,27 +6366,37 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) - - VG_CLEAR(arg); - arg.flags = 0; -- arg.crtc_id = sna_crtc->id; -+ arg.crtc_id = __sna_crtc_id(sna_crtc); - arg.handle = 0; - - if (sna_crtc->bo == NULL) - goto disable; - -+ cursor = __sna_get_cursor(sna, crtc); -+ if (cursor == NULL) -+ cursor = sna_crtc->cursor; -+ if (cursor == NULL) { -+ __DBG(("%s: failed to grab cursor, disabling\n", __FUNCTION__)); -+ goto disable; -+ } -+ - if (crtc->transform_in_use) { - int xhot = sna->cursor.ref->bits->xhot; - int yhot = sna->cursor.ref->bits->yhot; -- struct pict_f_vector v; -+ struct pict_f_vector v, hot; - -- v.v[0] = (x + xhot) + 0.5; -- v.v[1] = (y + yhot) + 0.5; -- v.v[2] = 1; -+ v.v[0] = x + xhot + .5; -+ v.v[1] = y + yhot + .5; -+ v.v[2] = 1.; - pixman_f_transform_point(&crtc->f_framebuffer_to_crtc, &v); - -- rotate_coord_back(crtc->rotation, sna->cursor.size, &xhot, &yhot); -+ hot.v[0] = xhot; -+ hot.v[1] = yhot; -+ hot.v[2] = 1.; -+ pixman_f_transform_point(&sna_crtc->fb_to_cursor, &hot); - -- /* cursor will have 0.5 added to it already so floor is sufficent */ -- arg.x = floor(v.v[0]) - xhot; -- arg.y = floor(v.v[1]) - yhot; -+ arg.x = floor(v.v[0] - hot.v[0]); -+ arg.y = floor(v.v[1] - hot.v[1]); - } else { - arg.x = x - crtc->x; - arg.y = y - crtc->y; -@@ -4904,15 +6404,6 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) - - if (arg.x < crtc->mode.HDisplay && arg.x > -sna->cursor.size && - arg.y < crtc->mode.VDisplay && arg.y > -sna->cursor.size) { -- cursor = __sna_get_cursor(sna, crtc); -- if (cursor == NULL) -- cursor = sna_crtc->cursor; -- if (cursor == NULL) { -- __DBG(("%s: failed to grab cursor, disabling\n", -- __FUNCTION__)); -- goto disable; -- } -- - if (sna_crtc->cursor != cursor || sna_crtc->last_cursor_size != cursor->size) { - arg.flags |= DRM_MODE_CURSOR_BO; - arg.handle = cursor->handle; -@@ -4932,10 +6423,13 @@ disable: - } - - __DBG(("%s: CRTC:%d (%d, %d), handle=%d, flags=%x (old cursor handle=%d), move? %d, update handle? %d\n", -- __FUNCTION__, sna_crtc->id, arg.x, arg.y, arg.handle, arg.flags, sna_crtc->cursor ? sna_crtc->cursor->handle : 0, -+ __FUNCTION__, __sna_crtc_id(sna_crtc), arg.x, arg.y, arg.handle, arg.flags, sna_crtc->cursor ? sna_crtc->cursor->handle : 0, - arg.flags & DRM_MODE_CURSOR_MOVE, arg.flags & DRM_MODE_CURSOR_BO)); - -- if (arg.flags && -+ if (arg.flags == 0) -+ continue; -+ -+ if (!FAIL_CURSOR_IOCTL && - drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { - if (arg.flags & DRM_MODE_CURSOR_BO) { - if (sna_crtc->cursor) { -@@ -4949,9 +6443,21 @@ disable: - } else - sna_crtc->last_cursor_size = 0; - } -+ } else { -+ ERR(("%s: failed to update cursor on CRTC:%d [pipe=%d], disabling hwcursor: errno=%d\n", -+ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), errno)); -+ /* XXX How to force switch back to SW cursor? -+ * Right now we just want until the next cursor image -+ * change, which is fairly frequent. -+ */ -+ sna->cursor.disable = true; - } - } - sigio_unblock(sigio); -+ kmsg_close(&kmsg, sna->cursor.disable); -+ -+ if (unlikely(sna->cursor.disable)) -+ restore_swcursor(sna); - } - - #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,15,99,902,2) -@@ -4978,17 +6484,6 @@ sna_load_cursor_image(ScrnInfoPtr scrn, unsigned char *src) - { - } - --static int __cursor_size(CursorPtr cursor) --{ -- int i, size; -- -- i = MAX(cursor->bits->width, cursor->bits->height); -- for (size = 64; size < i; size <<= 1) -- ; -- -- return size; --} -- - static bool - sna_cursor_preallocate(struct sna *sna) - { -@@ -5006,6 +6501,50 @@ sna_cursor_preallocate(struct sna *sna) - return true; - } - -+static bool -+transformable_cursor(struct sna *sna, CursorPtr cursor) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int i; -+ -+ for (i = 0; i < sna->mode.num_real_crtc; i++) { -+ xf86CrtcPtr crtc = config->crtc[i]; -+ struct pixman_box16 box; -+ int size; -+ -+ if (!to_sna_crtc(crtc)->hwcursor) { -+ DBG(("%s: hwcursor disabled on CRTC:%d [pipe=%d]\n", -+ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc))); -+ return false; -+ } -+ -+ if (!sna->cursor.use_gtt || !sna->cursor.scratch) { -+ DBG(("%s: unable to use GTT curosor access [%d] or no scratch [%d]\n", -+ __FUNCTION__, sna->cursor.use_gtt, sna->cursor.scratch)); -+ return false; -+ } -+ -+ box.x1 = box.y1 = 0; -+ box.x2 = cursor->bits->width; -+ box.y2 = cursor->bits->height; -+ -+ if (!pixman_f_transform_bounds(&crtc->f_crtc_to_framebuffer, -+ &box)) { -+ DBG(("%s: unable to transform bounds\n", __FUNCTION__)); -+ return false; -+ } -+ -+ size = __cursor_size(box.x2 - box.x1, box.y2 - box.y1); -+ if (size > sna->cursor.max_size) { -+ DBG(("%s: transformed cursor size=%d too large, max=%d\n", -+ __FUNCTION__, size, sna->cursor.max_size)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static Bool - sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) - { -@@ -5014,6 +6553,9 @@ sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) - DBG(("%s (%dx%d)?\n", __FUNCTION__, - cursor->bits->width, cursor->bits->height)); - -+ if (sna->cursor.disable) -+ return FALSE; -+ - /* cursors are invariant */ - if (cursor == sna->cursor.ref) - return TRUE; -@@ -5023,12 +6565,24 @@ sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) - sna->cursor.ref = NULL; - } - -- sna->cursor.size = __cursor_size(cursor); -- if (sna->cursor.size > sna->cursor.max_size) -+ sna->cursor.size = -+ __cursor_size(cursor->bits->width, cursor->bits->height); -+ if (sna->cursor.size > sna->cursor.max_size) { -+ DBG(("%s: cursor size=%d too large, max %d: using sw cursor\n", -+ __FUNCTION__, sna->cursor.size, sna->cursor.max_size)); - return FALSE; -+ } -+ -+ if (sna->mode.rr_active && !transformable_cursor(sna, cursor)) { -+ DBG(("%s: RandR active [%d] and non-transformable cursor: using sw cursor\n", -+ __FUNCTION__, sna->mode.rr_active)); -+ return FALSE; -+ } - -- if (!sna_cursor_preallocate(sna)) -+ if (!sna_cursor_preallocate(sna)) { -+ DBG(("%s: cursor preallocation failed: using sw cursor\n", __FUNCTION__)); - return FALSE; -+ } - - sna->cursor.ref = cursor; - cursor->refcnt++; -@@ -5056,8 +6610,12 @@ sna_cursor_pre_init(struct sna *sna) - return; - - #define LOCAL_IOCTL_GET_CAP DRM_IOWR(0x0c, struct local_get_cap) --#define DRM_CAP_CURSOR_WIDTH 8 --#define DRM_CAP_CURSOR_HEIGHT 9 -+#ifndef DRM_CAP_CURSOR_WIDTH -+#define DRM_CAP_CURSOR_WIDTH 0x8 -+#endif -+#ifndef DRM_CAP_CURSOR_HEIGHT -+#define DRM_CAP_CURSOR_HEIGHT 0x9 -+#endif - - #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 - -@@ -5087,11 +6645,9 @@ sna_cursor_pre_init(struct sna *sna) - DBG(("%s: cursor updates use_gtt?=%d\n", - __FUNCTION__, sna->cursor.use_gtt)); - -- if (!sna->cursor.use_gtt) { -- sna->cursor.scratch = malloc(sna->cursor.max_size * sna->cursor.max_size * 4); -- if (!sna->cursor.scratch) -- sna->cursor.max_size = 0; -- } -+ sna->cursor.scratch = malloc(sna->cursor.max_size * sna->cursor.max_size * 4); -+ if (!sna->cursor.scratch && !sna->cursor.use_gtt) -+ sna->cursor.max_size = 0; - - sna->cursor.num_stash = -sna->mode.num_real_crtc; - -@@ -5193,7 +6749,7 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, - int output_count = 0; - int i; - -- DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, crtc->id, crtc->pipe, bo->handle)); -+ DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), bo->handle)); - - assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids)); - assert(crtc->bo); -@@ -5207,11 +6763,11 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, - - DBG(("%s: attaching output '%s' %d [%d] to crtc:%d (pipe %d) (possible crtc:%x, possible clones:%x)\n", - __FUNCTION__, output->name, i, to_connector_id(output), -- crtc->id, crtc->pipe, -+ __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), - (uint32_t)output->possible_crtcs, - (uint32_t)output->possible_clones)); - -- assert(output->possible_crtcs & (1 << crtc->pipe) || -+ assert(output->possible_crtcs & (1 << __sna_crtc_pipe(crtc)) || - is_zaphod(sna->scrn)); - - output_ids[output_count] = to_connector_id(output); -@@ -5221,7 +6777,7 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, - assert(output_count); - - VG_CLEAR(arg); -- arg.crtc_id = crtc->id; -+ arg.crtc_id = __sna_crtc_id(crtc); - arg.fb_id = fb_id(bo); - assert(arg.fb_id); - arg.x = x; -@@ -5231,20 +6787,74 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, - arg.mode = crtc->kmode; - arg.mode_valid = 1; - -- DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d across %d outputs [%d...]\n", -- __FUNCTION__, crtc->id, crtc->pipe, -- arg.mode.hdisplay, -- arg.mode.vdisplay, -- arg.x, arg.y, -- arg.mode.clock, -- arg.fb_id, -- output_count, output_count ? output_ids[0] : 0)); -+ DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d across %d outputs [%d...]\n", -+ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), -+ arg.mode.hdisplay, -+ arg.mode.vdisplay, -+ arg.x, arg.y, -+ arg.mode.clock, -+ arg.fb_id, -+ output_count, output_count ? output_ids[0] : 0)); -+ -+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) -+ return false; -+ -+ crtc->offset = y << 16 | x; -+ __kgem_bo_clear_dirty(bo); -+ return true; -+} -+ -+static void sna_mode_restore(struct sna *sna) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int error = 0; -+ int i; -+ -+ assert(!sna->mode.hidden); -+ -+ for (i = 0; i < sna->mode.num_real_crtc; i++) { -+ xf86CrtcPtr crtc = config->crtc[i]; -+ -+ assert(to_sna_crtc(crtc) != NULL); -+ if (to_sna_crtc(crtc)->bo == NULL) -+ continue; -+ -+ assert(crtc->enabled); -+ if (!__sna_crtc_set_mode(crtc)) { -+ sna_crtc_disable(crtc, false); -+ error++; -+ } -+ } -+ sna_mode_wakeup(sna); -+ while (sna->mode.flip_active && sna_mode_wakeup(sna)) -+ ; -+ update_flush_interval(sna); -+ sna_cursors_reload(sna); -+ sna->mode.dirty = false; -+ -+ if (error) -+ xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, -+ "Failed to restore display configuration\n"); -+} -+ -+bool sna_needs_page_flip(struct sna *sna, struct kgem_bo *bo) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int i; -+ -+ for (i = 0; i < sna->mode.num_real_crtc; i++) { -+ struct sna_crtc *crtc = config->crtc[i]->driver_private; -+ -+ if (crtc->bo == NULL) -+ continue; - -- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) -- return false; -+ if (crtc->bo == bo) -+ continue; - -- crtc->offset = y << 16 | x; -- return true; -+ return true; -+ } -+ -+ return false; - } - - int -@@ -5256,6 +6866,7 @@ sna_page_flip(struct sna *sna, - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - const int width = sna->scrn->virtualX; - const int height = sna->scrn->virtualY; -+ int sigio; - int count = 0; - int i; - -@@ -5263,23 +6874,26 @@ sna_page_flip(struct sna *sna, - assert(bo->refcnt); - - assert((sna->flags & SNA_IS_HOSTED) == 0); -- assert((sna->flags & SNA_TEAR_FREE) == 0); - assert(sna->mode.flip_active == 0); - assert(sna->mode.front_active); -+ assert(!sna->mode.hidden); - assert(sna->scrn->vtSema); - - if ((sna->flags & (data ? SNA_HAS_FLIP : SNA_HAS_ASYNC_FLIP)) == 0) - return 0; - - kgem_bo_submit(&sna->kgem, bo); -+ __kgem_bo_clear_dirty(bo); - -+ sigio = sigio_block(); - for (i = 0; i < sna->mode.num_real_crtc; i++) { - struct sna_crtc *crtc = config->crtc[i]->driver_private; - struct drm_mode_crtc_page_flip arg; - uint32_t crtc_offset; -+ int fixup; - - DBG(("%s: crtc %d id=%d, pipe=%d active? %d\n", -- __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo != NULL)); -+ __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->bo != NULL)); - if (crtc->bo == NULL) - continue; - assert(!crtc->transform); -@@ -5288,13 +6902,18 @@ sna_page_flip(struct sna *sna, - assert(crtc->bo->refcnt >= crtc->bo->active_scanout); - assert(crtc->flip_bo == NULL); - -- arg.crtc_id = crtc->id; -+ assert_crtc_fb(sna, crtc); -+ if (data == NULL && crtc->bo == bo) -+ goto next_crtc; -+ -+ arg.crtc_id = __sna_crtc_id(crtc); - arg.fb_id = get_fb(sna, bo, width, height); - if (arg.fb_id == 0) { - assert(count == 0); -- return 0; -+ break; - } - -+ fixup = 0; - crtc_offset = crtc->base->y << 16 | crtc->base->x; - - if (bo->pitch != crtc->bo->pitch || crtc_offset != crtc->offset) { -@@ -5303,7 +6922,12 @@ sna_page_flip(struct sna *sna, - bo->pitch, crtc->bo->pitch, - crtc_offset, crtc->offset)); - fixup_flip: -+ fixup = 1; - if (crtc->bo != bo && sna_crtc_flip(sna, crtc, bo, crtc->base->x, crtc->base->y)) { -+update_scanout: -+ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", -+ __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout, -+ bo->handle, bo->active_scanout)); - assert(crtc->bo->active_scanout); - assert(crtc->bo->refcnt >= crtc->bo->active_scanout); - crtc->bo->active_scanout--; -@@ -5321,15 +6945,8 @@ fixup_flip: - goto next_crtc; - - /* queue a flip in order to send the event */ -- } else { -- if (count && !xf86SetDesiredModes(sna->scrn)) { -- xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, -- "failed to restore display configuration\n"); -- for (; i < sna->mode.num_real_crtc; i++) -- sna_crtc_disable(config->crtc[i]); -- } -- return 0; -- } -+ } else -+ goto error; - } - - /* Only the reference crtc will finally deliver its page flip -@@ -5346,7 +6963,7 @@ fixup_flip: - - retry_flip: - DBG(("%s: crtc %d id=%d, pipe=%d --> fb %d\n", -- __FUNCTION__, i, crtc->id, crtc->pipe, arg.fb_id)); -+ __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), arg.fb_id)); - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { - ERR(("%s: pageflip failed with err=%d\n", __FUNCTION__, errno)); - -@@ -5354,7 +6971,7 @@ retry_flip: - struct drm_mode_crtc mode; - - memset(&mode, 0, sizeof(mode)); -- mode.crtc_id = crtc->id; -+ mode.crtc_id = __sna_crtc_id(crtc); - drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode); - - DBG(("%s: crtc=%d, valid?=%d, fb attached?=%d, expected=%d\n", -@@ -5366,7 +6983,7 @@ retry_flip: - goto fixup_flip; - - if (count == 0) -- return 0; -+ break; - - DBG(("%s: throttling on busy flip / waiting for kernel to catch up\n", __FUNCTION__)); - drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_THROTTLE, 0); -@@ -5375,15 +6992,25 @@ retry_flip: - goto retry_flip; - } - -+ if (!fixup) -+ goto fixup_flip; -+ -+error: - xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, -- "page flipping failed, on CRTC:%d (pipe=%d), disabling %s page flips\n", -- crtc->id, crtc->pipe, data ? "synchronous": "asynchronous"); -+ "page flipping failed, on CRTC:%d (pipe=%d), disabling %s page flips\n", -+ __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), data ? "synchronous": "asynchronous"); -+ -+ if (count || crtc->bo == bo) -+ sna_mode_restore(sna); -+ - sna->flags &= ~(data ? SNA_HAS_FLIP : SNA_HAS_ASYNC_FLIP); -- goto fixup_flip; -+ count = 0; -+ break; - } - - if (data) { - assert(crtc->flip_bo == NULL); -+ assert(handler); - crtc->flip_handler = handler; - crtc->flip_data = data; - crtc->flip_bo = kgem_bo_reference(bo); -@@ -5391,11 +7018,15 @@ retry_flip: - crtc->flip_serial = crtc->mode_serial; - crtc->flip_pending = true; - sna->mode.flip_active++; -- } - -+ DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", -+ __FUNCTION__, __sna_crtc_id(crtc), crtc->flip_bo->handle, crtc->flip_bo->active_scanout, crtc->flip_serial)); -+ } else -+ goto update_scanout; - next_crtc: - count++; - } -+ sigio_unblock(sigio); - - DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count)); - return count; -@@ -5471,7 +7102,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) - - assert(sna_crtc); - -- lut.crtc_id = sna_crtc->id; -+ lut.crtc_id = __sna_crtc_id(sna_crtc); - lut.gamma_size = 256; - lut.red = (uintptr_t)(gamma); - lut.green = (uintptr_t)(gamma + 256); -@@ -5485,7 +7116,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) - } - - DBG(("%s: CRTC:%d, pipe=%d: gamma set?=%d\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, -+ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), - gamma_set)); - if (!gamma_set) { - int i; -@@ -5502,6 +7133,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) - crtc->gamma_red = gamma; - crtc->gamma_green = gamma + 256; - crtc->gamma_blue = gamma + 2*256; -+ crtc->gamma_size = 256; - } - } - } -@@ -5528,6 +7160,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) - { - ScrnInfoPtr scrn = sna->scrn; - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); -+ int crtc_active, crtc_enabled; - int width, height; - int i, j; - -@@ -5565,6 +7198,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) - } - - /* Copy the existing modes on each CRTCs */ -+ crtc_active = crtc_enabled = 0; - for (i = 0; i < sna->mode.num_real_crtc; i++) { - xf86CrtcPtr crtc = config->crtc[i]; - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -@@ -5577,12 +7211,12 @@ static bool sna_probe_initial_configuration(struct sna *sna) - - /* Retrieve the current mode */ - VG_CLEAR(mode); -- mode.crtc_id = sna_crtc->id; -+ mode.crtc_id = __sna_crtc_id(sna_crtc); - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) - continue; - - DBG(("%s: CRTC:%d, pipe=%d: has mode?=%d\n", __FUNCTION__, -- sna_crtc->id, sna_crtc->pipe, -+ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), - mode.mode_valid && mode.mode.clock)); - - if (!mode.mode_valid || mode.mode.clock == 0) -@@ -5593,6 +7227,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) - crtc->desiredX = mode.x; - crtc->desiredY = mode.y; - crtc->desiredTransformPresent = FALSE; -+ crtc_active++; - } - - /* Reconstruct outputs pointing to active CRTC */ -@@ -5604,6 +7239,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) - - crtc_id = (uintptr_t)output->crtc; - output->crtc = NULL; -+ output->status = XF86OutputStatusUnknown; - if (sna->flags & SNA_IS_SLAVED) - continue; - -@@ -5623,7 +7259,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) - xf86CrtcPtr crtc = config->crtc[j]; - - assert(to_sna_crtc(crtc)); -- if (to_sna_crtc(crtc)->id != crtc_id) -+ if (sna_crtc_id(crtc) != crtc_id) - continue; - - if (crtc->desiredMode.status == MODE_OK) { -@@ -5641,18 +7277,30 @@ static bool sna_probe_initial_configuration(struct sna *sna) - "Output %s using initial mode %s on pipe %d\n", - output->name, - crtc->desiredMode.name, -- to_sna_crtc(crtc)->pipe); -+ sna_crtc_pipe(crtc)); - - output->crtc = crtc; -+ output->status = XF86OutputStatusConnected; - crtc->enabled = TRUE; -+ crtc_enabled++; -+ -+ output_set_gamma(output, crtc); -+ -+ if (output->conf_monitor) { -+ output->mm_width = output->conf_monitor->mon_width; -+ output->mm_height = output->conf_monitor->mon_height; -+ } -+ -+#if 0 -+ sna_output_attach_edid(output); -+ sna_output_attach_tile(output); -+#endif - - if (output->mm_width == 0 || output->mm_height == 0) { - output->mm_height = (crtc->desiredMode.VDisplay * 254) / (10*DEFAULT_DPI); - output->mm_width = (crtc->desiredMode.HDisplay * 254) / (10*DEFAULT_DPI); - } - -- output_set_gamma(output, crtc); -- - M = calloc(1, sizeof(DisplayModeRec)); - if (M) { - *M = crtc->desiredMode; -@@ -5673,6 +7321,12 @@ static bool sna_probe_initial_configuration(struct sna *sna) - } - } - -+ if (crtc_active != crtc_enabled) { -+ DBG(("%s: only enabled %d out of %d active CRTC, forcing a reconfigure\n", -+ __FUNCTION__, crtc_enabled, crtc_active)); -+ return false; -+ } -+ - width = height = 0; - for (i = 0; i < sna->mode.num_real_crtc; i++) { - xf86CrtcPtr crtc = config->crtc[i]; -@@ -5707,8 +7361,8 @@ static bool sna_probe_initial_configuration(struct sna *sna) - if (sna_output->num_modes == 0) - continue; - -- width = sna_output->modes[0].hdisplay; -- height= sna_output->modes[0].vdisplay; -+ width = sna_output->modes[0].hdisplay; -+ height = sna_output->modes[0].vdisplay; - - DBG(("%s: panel '%s' is %dx%d\n", - __FUNCTION__, output->name, width, height)); -@@ -5788,7 +7442,7 @@ probe_capabilities(struct sna *sna) - sna->flags &= ~(SNA_HAS_FLIP | SNA_HAS_ASYNC_FLIP); - if (has_flip(sna)) - sna->flags |= SNA_HAS_FLIP; -- if (has_flip__async(sna)) -+ if (has_flip__async(sna) && (sna->flags & SNA_TEAR_FREE) == 0) - sna->flags |= SNA_HAS_ASYNC_FLIP; - DBG(("%s: page flips? %s, async? %s\n", __FUNCTION__, - sna->flags & SNA_HAS_FLIP ? "enabled" : "disabled", -@@ -5813,12 +7467,25 @@ sna_crtc_config_notify(ScreenPtr screen) - return; - } - -+ /* Flush any events completed by the modeset */ -+ sna_mode_wakeup(sna); -+ - update_flush_interval(sna); -+ sna->cursor.disable = false; /* Reset HW cursor until the next fail */ - sna_cursors_reload(sna); - - probe_capabilities(sna); - sna_present_update(sna); - -+ /* Allow TearFree to come back on when everything is off */ -+ if (!sna->mode.front_active && sna->flags & SNA_WANT_TEAR_FREE) { -+ if ((sna->flags & SNA_TEAR_FREE) == 0) -+ DBG(("%s: enable TearFree next modeset\n", -+ __FUNCTION__)); -+ -+ sna->flags |= SNA_TEAR_FREE; -+ } -+ - sna->mode.dirty = false; - } - -@@ -5840,6 +7507,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) - } - - probe_capabilities(sna); -+ sna->mode.hidden = 1; - - if (!xf86GetOptValInteger(sna->Options, OPTION_VIRTUAL, &num_fake)) - num_fake = 1; -@@ -5855,6 +7523,9 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) - if (res) { - xf86CrtcConfigPtr xf86_config; - -+ DBG(("%s: found %d CRTC, %d encoders, %d connectors\n", -+ __FUNCTION__, res->count_crtcs, res->count_encoders, res->count_connectors)); -+ - assert(res->count_crtcs); - assert(res->count_connectors); - -@@ -5862,6 +7533,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) - - xf86_config = XF86_CRTC_CONFIG_PTR(scrn); - xf86_config->xf86_crtc_notify = sna_crtc_config_notify; -+ xf86_config->compat_output = 0; - - for (i = 0; i < res->count_crtcs; i++) - if (!sna_crtc_add(scrn, res->crtcs[i])) -@@ -5900,6 +7572,11 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) - if (!sna_mode_fake_init(sna, num_fake)) - return false; - -+ sna->mode.shadow_size = 256; -+ sna->mode.shadow_events = malloc(sna->mode.shadow_size * sizeof(struct drm_event_vblank)); -+ if (!sna->mode.shadow_events) -+ return false; -+ - if (!sna_probe_initial_configuration(sna)) { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); - -@@ -5912,6 +7589,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) - } - } - sort_config_outputs(sna); -+ TimerSet(NULL, 0, COLDPLUG_DELAY_MS, sna_mode_coldplug, sna); - - sna_setup_provider(scrn); - return scrn->modes != NULL; -@@ -5921,18 +7599,58 @@ bool - sna_mode_wants_tear_free(struct sna *sna) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ bool found = false; -+ FILE *file; - int i; - -+ file = fopen("/sys/module/i915/parameters/enable_fbc", "r"); -+ if (file) { -+ int fbc_enabled = 0; -+ int value; -+ -+ if (fscanf(file, "%d", &value) == 1) -+ fbc_enabled = value > 0; -+ fclose(file); -+ -+ DBG(("%s: module parameter 'enable_fbc' enabled? %d\n", -+ __FUNCTION__, fbc_enabled)); -+ -+ if (fbc_enabled) -+ return true; -+ } -+ - for (i = 0; i < sna->mode.num_real_output; i++) { - struct sna_output *output = to_sna_output(config->output[i]); - int id = find_property(sna, output, "Panel Self-Refresh"); -- if (id !=-1 && output->prop_values[id] != -1) { -+ if (id == -1) -+ continue; -+ -+ found = true; -+ if (output->prop_values[id] != -1) { - DBG(("%s: Panel Self-Refresh detected on %s\n", - __FUNCTION__, config->output[i]->name)); - return true; - } - } - -+ if (!found) { -+ file = fopen("/sys/module/i915/parameters/enable_psr", "r"); -+ if (file) { -+ int psr_enabled = 0; -+ int value; -+ -+ if (fscanf(file, "%d", &value) == 1) -+ psr_enabled = value > 0; -+ fclose(file); -+ -+ DBG(("%s: module parameter 'enable_psr' enabled? %d\n", -+ __FUNCTION__, psr_enabled)); -+ -+ if (psr_enabled) -+ return true; -+ } -+ } -+ - return false; - } - -@@ -5955,7 +7673,7 @@ sna_mode_set_primary(struct sna *sna) - - DBG(("%s: setting PrimaryOutput %s\n", __FUNCTION__, output->name)); - rr->primaryOutput = output->randr_output; -- RROutputChanged(rr->primaryOutput, 0); -+ RROutputChanged(rr->primaryOutput, FALSE); - rr->layoutChanged = TRUE; - break; - } -@@ -5974,12 +7692,9 @@ sna_mode_disable(struct sna *sna) - if (!sna->scrn->vtSema) - return false; - -- /* XXX we will cause previously hidden cursors to be reshown, but -- * this should be a rare fixup case for severe fragmentation. -- */ -- sna_hide_cursors(sna->scrn); -+ sna_disable_cursors(sna->scrn); - for (i = 0; i < sna->mode.num_real_crtc; i++) -- sna_crtc_disable(config->crtc[i]); -+ sna_crtc_disable(config->crtc[i], false); - assert(sna->mode.front_active == 0); - - sna_mode_wakeup(sna); -@@ -6001,6 +7716,11 @@ sna_mode_enable(struct sna *sna) - if (!sna->scrn->vtSema) - return; - -+ if (sna->mode.hidden) { -+ DBG(("%s: hidden outputs\n", __FUNCTION__)); -+ return; -+ } -+ - for (i = 0; i < sna->mode.num_real_crtc; i++) { - xf86CrtcPtr crtc = config->crtc[i]; - -@@ -6016,13 +7736,30 @@ sna_mode_enable(struct sna *sna) - } - - update_flush_interval(sna); -- sna_show_cursors(sna->scrn); -+ sna_cursors_reload(sna); - sna->mode.dirty = false; - } - -+static void sna_randr_close(struct sna *sna) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int n; -+ -+ /* The RR structs are freed early during CloseScreen as they -+ * are tracked as Resources. However, we may be tempted to -+ * access them during shutdown so decouple them now. -+ */ -+ for (n = 0; n < config->num_output; n++) -+ config->output[n]->randr_output = NULL; -+ -+ for (n = 0; n < config->num_crtc; n++) -+ config->crtc[n]->randr_crtc = NULL; -+} -+ - void - sna_mode_close(struct sna *sna) - { -+ sna_randr_close(sna); - sna_mode_wakeup(sna); - - if (sna->flags & SNA_IS_HOSTED) -@@ -6077,15 +7814,22 @@ xf86CrtcPtr - sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -- xf86CrtcPtr best_crtc; -- int best_coverage, c; -+ xf86CrtcPtr best_crtc = NULL; -+ int best_coverage = -1, c; - - if (sna->flags & SNA_IS_HOSTED) - return NULL; - - /* If we do not own the VT, we do not own the CRTC either */ -- if (!sna->scrn->vtSema) -+ if (!sna->scrn->vtSema) { -+ DBG(("%s: none, VT switched\n", __FUNCTION__)); -+ return NULL; -+ } -+ -+ if (sna->mode.hidden) { -+ DBG(("%s: none, hidden outputs\n", __FUNCTION__)); - return NULL; -+ } - - DBG(("%s for box=(%d, %d), (%d, %d)\n", - __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); -@@ -6107,10 +7851,10 @@ sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) - cover_box.x2, cover_box.y2)); - return desired; - } -+ best_crtc = desired; -+ best_coverage = 0; - } - -- best_crtc = NULL; -- best_coverage = 0; - for (c = 0; c < sna->mode.num_real_crtc; c++) { - xf86CrtcPtr crtc = config->crtc[c]; - BoxRec cover_box; -@@ -6156,6 +7900,38 @@ sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) - return best_crtc; - } - -+static xf86CrtcPtr first_active_crtc(struct sna *sna) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int n; -+ -+ for (n = 0; n < sna->mode.num_real_crtc; n++) { -+ xf86CrtcPtr crtc = config->crtc[n]; -+ if (to_sna_crtc(crtc)->bo) -+ return crtc; -+ } -+ -+ /* No active, use the first as a placeholder */ -+ if (sna->mode.num_real_crtc) -+ return config->crtc[0]; -+ -+ return NULL; -+} -+ -+xf86CrtcPtr sna_primary_crtc(struct sna *sna) -+{ -+ rrScrPrivPtr rr = rrGetScrPriv(xf86ScrnToScreen(sna->scrn)); -+ if (rr && rr->primaryOutput) { -+ xf86OutputPtr output = rr->primaryOutput->devPrivate; -+ if (output->crtc && -+ output->scrn == sna->scrn && -+ to_sna_crtc(output->crtc)) -+ return output->crtc; -+ } -+ -+ return first_active_crtc(sna); -+} -+ - #define MI_LOAD_REGISTER_IMM (0x22<<23) - - static bool sna_emit_wait_for_scanline_hsw(struct sna *sna, -@@ -6433,7 +8209,7 @@ sna_wait_for_scanline(struct sna *sna, - y2 /= 2; - } - -- pipe = sna_crtc_to_pipe(crtc); -+ pipe = sna_crtc_pipe(crtc); - DBG(("%s: pipe=%d, y1=%d, y2=%d, full_height?=%d\n", - __FUNCTION__, pipe, y1, y2, full_height)); - -@@ -6457,19 +8233,101 @@ sna_wait_for_scanline(struct sna *sna, - return ret; - } - -+static bool sna_mode_shutdown_crtc(xf86CrtcPtr crtc) -+{ -+ struct sna *sna = to_sna(crtc->scrn); -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); -+ bool disabled = false; -+ int o; -+ -+ xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, -+ "%s: invalid state found on pipe %d, disabling CRTC:%d\n", -+ __FUNCTION__, -+ __sna_crtc_pipe(to_sna_crtc(crtc)), -+ __sna_crtc_id(to_sna_crtc(crtc))); -+ sna_crtc_disable(crtc, true); -+#if XF86_CRTC_VERSION >= 3 -+ crtc->active = FALSE; -+#endif -+ if (crtc->enabled) { -+ crtc->enabled = FALSE; -+ disabled = true; -+ } -+ -+ for (o = 0; o < sna->mode.num_real_output; o++) { -+ xf86OutputPtr output = config->output[o]; -+ -+ if (output->crtc != crtc) -+ continue; -+ -+ output->funcs->dpms(output, DPMSModeOff); -+ output->crtc = NULL; -+ } -+ -+ return disabled; -+} -+ -+static bool -+sna_mode_disable_secondary_planes(struct sna *sna) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ bool disabled = false; -+ int c; -+ -+ /* Disable all secondary planes on our CRTCs, just in case -+ * other userspace left garbage in them. -+ */ -+ for (c = 0; c < sna->mode.num_real_crtc; c++) { -+ xf86CrtcPtr crtc = config->crtc[c]; -+ struct sna_crtc *sna_crtc = to_sna_crtc(crtc); -+ struct plane *plane; -+ -+ list_for_each_entry(plane, &sna_crtc->sprites, link) { -+ struct local_mode_get_plane p; -+ struct local_mode_set_plane s; -+ -+ VG_CLEAR(p); -+ p.plane_id = plane->id; -+ p.count_format_types = 0; -+ if (drmIoctl(sna->kgem.fd, -+ LOCAL_IOCTL_MODE_GETPLANE, -+ &p)) -+ continue; -+ -+ if (p.fb_id == 0 || p.crtc_id == 0) -+ continue; -+ -+ memset(&s, 0, sizeof(s)); -+ s.plane_id = p.plane_id; -+ s.crtc_id = p.crtc_id; -+ if (drmIoctl(sna->kgem.fd, -+ LOCAL_IOCTL_MODE_SETPLANE, -+ &s)) -+ disabled |= sna_mode_shutdown_crtc(crtc); -+ } -+ } -+ -+ return disabled; -+} -+ - void sna_mode_check(struct sna *sna) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -- int i; -+ bool disabled; -+ int c, o; - - if (sna->flags & SNA_IS_HOSTED) - return; - -- DBG(("%s\n", __FUNCTION__)); -+ DBG(("%s: hidden?=%d\n", __FUNCTION__, sna->mode.hidden)); -+ if (sna->mode.hidden) -+ return; -+ -+ disabled = sna_mode_disable_secondary_planes(sna); - - /* Validate CRTC attachments and force consistency upon the kernel */ -- for (i = 0; i < sna->mode.num_real_crtc; i++) { -- xf86CrtcPtr crtc = config->crtc[i]; -+ for (c = 0; c < sna->mode.num_real_crtc; c++) { -+ xf86CrtcPtr crtc = config->crtc[c]; - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); - struct drm_mode_crtc mode; - uint32_t expected[2]; -@@ -6483,7 +8341,7 @@ void sna_mode_check(struct sna *sna) - expected[1] = sna_crtc->flip_bo ? fb_id(sna_crtc->flip_bo) : -1; - - VG_CLEAR(mode); -- mode.crtc_id = sna_crtc->id; -+ mode.crtc_id = __sna_crtc_id(sna_crtc); - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) - continue; - -@@ -6492,16 +8350,12 @@ void sna_mode_check(struct sna *sna) - mode.crtc_id, mode.mode_valid, - mode.fb_id, expected[0], expected[1])); - -- if (mode.fb_id != expected[0] && mode.fb_id != expected[1]) { -- xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, -- "%s: invalid state found on pipe %d, disabling CRTC:%d\n", -- __FUNCTION__, sna_crtc->pipe, sna_crtc->id); -- sna_crtc_disable(crtc); -- } -+ if (mode.fb_id != expected[0] && mode.fb_id != expected[1]) -+ disabled |= sna_mode_shutdown_crtc(crtc); - } - -- for (i = 0; i < config->num_output; i++) { -- xf86OutputPtr output = config->output[i]; -+ for (o = 0; o < config->num_output; o++) { -+ xf86OutputPtr output = config->output[o]; - struct sna_output *sna_output; - - if (output->crtc) -@@ -6515,26 +8369,16 @@ void sna_mode_check(struct sna *sna) - } - - update_flush_interval(sna); -+ -+ if (disabled) -+ xf86RandR12TellChanged(xf86ScrnToScreen(sna->scrn)); - } - - static bool - sna_crtc_hide_planes(struct sna *sna, struct sna_crtc *crtc) - { --#define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) -- struct local_mode_set_plane { -- uint32_t plane_id; -- uint32_t crtc_id; -- uint32_t fb_id; /* fb object contains surface format type */ -- uint32_t flags; -- -- /* Signed dest location allows it to be partially off screen */ -- int32_t crtc_x, crtc_y; -- uint32_t crtc_w, crtc_h; -- -- /* Source values are 16.16 fixed point */ -- uint32_t src_x, src_y; -- uint32_t src_h, src_w; -- } s; -+ struct local_mode_set_plane s; -+ struct plane *plane; - - if (crtc->primary.id == 0) - return false; -@@ -6544,8 +8388,10 @@ sna_crtc_hide_planes(struct sna *sna, struct sna_crtc *crtc) - if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) - return false; - -- s.plane_id = crtc->sprite.id; -- (void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s); -+ list_for_each_entry(plane, &crtc->sprites, link) { -+ s.plane_id = plane->id; -+ (void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s); -+ } - - __sna_crtc_disable(sna, crtc); - return true; -@@ -6561,21 +8407,22 @@ void sna_mode_reset(struct sna *sna) - - DBG(("%s\n", __FUNCTION__)); - -- sna_hide_cursors(sna->scrn); -+ sna_disable_cursors(sna->scrn); - for (i = 0; i < sna->mode.num_real_crtc; i++) - if (!sna_crtc_hide_planes(sna, to_sna_crtc(config->crtc[i]))) -- sna_crtc_disable(config->crtc[i]); -+ sna_crtc_disable(config->crtc[i], true); - assert(sna->mode.front_active == 0); - - for (i = 0; i < sna->mode.num_real_crtc; i++) { - struct sna_crtc *sna_crtc = to_sna_crtc(config->crtc[i]); -+ struct plane *plane; - - assert(sna_crtc != NULL); -- sna_crtc->dpms_mode = -1; - - /* Force the rotation property to be reset on next use */ - rotation_reset(&sna_crtc->primary); -- rotation_reset(&sna_crtc->sprite); -+ list_for_each_entry(plane, &sna_crtc->sprites, link) -+ rotation_reset(plane); - } - - /* VT switching, likely to be fbcon so make the backlight usable */ -@@ -6641,9 +8488,10 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo - { - int16_t sx, sy; - struct sna *sna = to_sna(crtc->scrn); -- ScreenPtr screen = sna->scrn->pScreen; -+ ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); - DrawablePtr draw = crtc_source(crtc, &sx, &sy); - PictFormatPtr format; -+ PictTransform T; - PicturePtr src, dst; - PixmapPtr pixmap; - int depth, error; -@@ -6664,6 +8512,14 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo - __FUNCTION__, format->format, depth, draw->bitsPerPixel, - bo->pitch, crtc->mode.HDisplay, crtc->mode.VDisplay)); - -+ if (sx | sy) -+ RegionTranslate(region, sx, sy); -+ error = !sna_drawable_move_region_to_cpu(draw, region, MOVE_READ); -+ if (sx | sy) -+ RegionTranslate(region, -sx, -sy); -+ if (error) -+ return; -+ - ptr = kgem_bo_map__gtt(&sna->kgem, bo); - if (ptr == NULL) - return; -@@ -6683,9 +8539,37 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo - if (!src) - goto free_pixmap; - -- error = SetPictureTransform(src, &crtc->crtc_to_framebuffer); -- if (error) -- goto free_src; -+ pixman_transform_init_translate(&T, sx << 16, sy << 16); -+ pixman_transform_multiply(&T, &T, &crtc->crtc_to_framebuffer); -+ if (!sna_transform_is_integer_translation(&T, &sx, &sy)) { -+#define f2d(x) (((double)(x))/65536.) -+ DBG(("%s: transform=[[%f %f %f], [%f %f %f], [%f %f %f]] (raw [[%x %x %x], [%x %x %x], [%x %x %x]])\n", -+ __FUNCTION__, -+ f2d(T.matrix[0][0]), -+ f2d(T.matrix[0][1]), -+ f2d(T.matrix[0][2]), -+ f2d(T.matrix[1][0]), -+ f2d(T.matrix[1][1]), -+ f2d(T.matrix[1][2]), -+ f2d(T.matrix[2][0]), -+ f2d(T.matrix[2][1]), -+ f2d(T.matrix[2][2]), -+ T.matrix[0][0], -+ T.matrix[0][1], -+ T.matrix[0][2], -+ T.matrix[1][0], -+ T.matrix[1][1], -+ T.matrix[1][2], -+ T.matrix[2][0], -+ T.matrix[2][1], -+ T.matrix[2][2])); -+#undef f2d -+ -+ error = SetPictureTransform(src, &T); -+ if (error) -+ goto free_src; -+ sx = sy = 0; -+ } - - if (crtc->filter && crtc->transform_in_use) - SetPicturePictFilter(src, crtc->filter, -@@ -6733,10 +8617,11 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo - { - int16_t sx, sy; - struct sna *sna = to_sna(crtc->scrn); -- ScreenPtr screen = crtc->scrn->pScreen; -+ ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); - DrawablePtr draw = crtc_source(crtc, &sx, &sy); - struct sna_composite_op tmp; - PictFormatPtr format; -+ PictTransform T; - PicturePtr src, dst; - PixmapPtr pixmap; - const BoxRec *b; -@@ -6777,9 +8662,14 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo - if (!src) - goto free_pixmap; - -- error = SetPictureTransform(src, &crtc->crtc_to_framebuffer); -- if (error) -- goto free_src; -+ pixman_transform_init_translate(&T, sx << 16, sy << 16); -+ pixman_transform_multiply(&T, &T, &crtc->crtc_to_framebuffer); -+ if (!sna_transform_is_integer_translation(&T, &sx, &sy)) { -+ error = SetPictureTransform(src, &T); -+ if (error) -+ goto free_src; -+ sx = sy = 0; -+ } - - if (crtc->filter && crtc->transform_in_use) - SetPicturePictFilter(src, crtc->filter, -@@ -6793,36 +8683,38 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo - ValidatePicture(src); - ValidatePicture(dst); - -- if (!sna->render.composite(sna, -- PictOpSrc, src, NULL, dst, -- sx, sy, -- 0, 0, -- 0, 0, -- crtc->mode.HDisplay, crtc->mode.VDisplay, -- COMPOSITE_PARTIAL, memset(&tmp, 0, sizeof(tmp)))) { -- DBG(("%s: unsupported operation!\n", __FUNCTION__)); -- sna_crtc_redisplay__fallback(crtc, region, bo); -- goto free_dst; -- } -- -+ /* Composite each box individually as if we are dealing with a rotation -+ * on a large display, we may have to perform intermediate copies. We -+ * can then minimise the overdraw by looking at individual boxes rather -+ * than the bbox. -+ */ - n = region_num_rects(region); - b = region_rects(region); - do { -- BoxRec box; -- -- box = *b++; -+ BoxRec box = *b; - transformed_box(&box, crtc); - - DBG(("%s: (%d, %d)x(%d, %d) -> (%d, %d), (%d, %d)\n", - __FUNCTION__, -- b[-1].x1, b[-1].y1, b[-1].x2-b[-1].x1, b[-1].y2-b[-1].y1, -+ b->x1, b->y1, b->x2-b->x1, b->y2-b->y1, - box.x1, box.y1, box.x2, box.y2)); - -- tmp.box(sna, &tmp, &box); -- } while (--n); -- tmp.done(sna, &tmp); -+ if (!sna->render.composite(sna, -+ PictOpSrc, src, NULL, dst, -+ sx + box.x1, sy + box.y1, -+ 0, 0, -+ box.x1, box.y1, -+ box.x2 - box.x1, box.y2 - box.y1, -+ 0, memset(&tmp, 0, sizeof(tmp)))) { -+ DBG(("%s: unsupported operation!\n", __FUNCTION__)); -+ sna_crtc_redisplay__fallback(crtc, region, bo); -+ break; -+ } else { -+ tmp.box(sna, &tmp, &box); -+ tmp.done(sna, &tmp); -+ } -+ } while (b++, --n); - --free_dst: - FreePicture(dst, None); - free_src: - FreePicture(src, None); -@@ -6839,7 +8731,7 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo *bo) - struct sna_pixmap *priv = sna_pixmap((PixmapPtr)draw); - - DBG(("%s: crtc %d [pipe=%d], damage (%d, %d), (%d, %d) x %d\n", -- __FUNCTION__, to_sna_crtc(crtc)->id, to_sna_crtc(crtc)->pipe, -+ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), - region->extents.x1, region->extents.y1, - region->extents.x2, region->extents.y2, - region_num_rects(region))); -@@ -6898,7 +8790,10 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo *bo) - static void shadow_flip_handler(struct drm_event_vblank *e, - void *data) - { -- sna_mode_redisplay(data); -+ struct sna *sna = data; -+ -+ if (!sna->mode.shadow_wait) -+ sna_mode_redisplay(sna); - } - - void sna_shadow_set_crtc(struct sna *sna, -@@ -6908,18 +8803,23 @@ void sna_shadow_set_crtc(struct sna *sna, - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); - struct sna_pixmap *priv; - -+ assert(sna_crtc); - DBG(("%s: setting shadow override for CRTC:%d to handle=%d\n", -- __FUNCTION__, sna_crtc->id, bo->handle)); -+ __FUNCTION__, __sna_crtc_id(sna_crtc), bo->handle)); - - assert(sna->flags & SNA_TEAR_FREE); -- assert(sna_crtc); - assert(!sna_crtc->transform); - - if (sna_crtc->client_bo != bo) { -- if (sna_crtc->client_bo) -+ if (sna_crtc->client_bo) { -+ assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); -+ sna_crtc->client_bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo); -+ } - - sna_crtc->client_bo = kgem_bo_reference(bo); -+ sna_crtc->client_bo->active_scanout++; -+ assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); - sna_crtc_damage(crtc); - } - -@@ -6969,11 +8869,13 @@ void sna_shadow_unset_crtc(struct sna *sna, - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); - - DBG(("%s: clearin shadow override for CRTC:%d\n", -- __FUNCTION__, sna_crtc->id)); -+ __FUNCTION__, __sna_crtc_id(sna_crtc))); - - if (sna_crtc->client_bo == NULL) - return; - -+ assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); -+ sna_crtc->client_bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo); - sna_crtc->client_bo = NULL; - list_del(&sna_crtc->shadow_link); -@@ -6982,15 +8884,57 @@ void sna_shadow_unset_crtc(struct sna *sna, - sna_crtc_damage(crtc); - } - -+static bool move_crtc_to_gpu(struct sna *sna) -+{ -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ int i; -+ -+ for (i = 0; i < sna->mode.num_real_crtc; i++) { -+ struct sna_crtc *crtc = to_sna_crtc(config->crtc[i]); -+ unsigned hint; -+ -+ assert(crtc); -+ -+ if (crtc->bo == NULL) -+ continue; -+ -+ if (crtc->slave_pixmap) -+ continue; -+ -+ if (crtc->client_bo) -+ continue; -+ -+ if (crtc->shadow_bo) -+ continue; -+ -+ hint = MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT; -+ if (sna->flags & SNA_TEAR_FREE) -+ hint |= __MOVE_FORCE; -+ -+ DBG(("%s: CRTC %d [pipe=%d] requires frontbuffer\n", -+ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc))); -+ return sna_pixmap_move_to_gpu(sna->front, hint); -+ } -+ -+ return true; -+} -+ - void sna_mode_redisplay(struct sna *sna) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - RegionPtr region; - int i; - -- if (!sna->mode.shadow_damage) -+ if (sna->mode.hidden) { -+ DBG(("%s: hidden outputs, skipping\n", __FUNCTION__)); -+ return; -+ } -+ -+ if (!sna->mode.shadow_enabled) - return; - -+ assert(sna->mode.shadow_damage); -+ - DBG(("%s: posting shadow damage? %d (flips pending? %d, mode reconfiguration pending? %d)\n", - __FUNCTION__, - !RegionNil(DamageRegion(sna->mode.shadow_damage)), -@@ -7012,21 +8956,23 @@ void sna_mode_redisplay(struct sna *sna) - region->extents.x2, region->extents.y2)); - - if (sna->mode.flip_active) { -- DamagePtr damage; -- -- damage = sna->mode.shadow_damage; -- sna->mode.shadow_damage = NULL; -+ DBG(("%s: checking for %d outstanding flip completions\n", -+ __FUNCTION__, sna->mode.flip_active)); - -+ sna->mode.dirty = true; - while (sna->mode.flip_active && sna_mode_wakeup(sna)) - ; -+ sna->mode.dirty = false; - -- sna->mode.shadow_damage = damage; -+ DBG(("%s: now %d outstanding flip completions (enabled? %d)\n", -+ __FUNCTION__, -+ sna->mode.flip_active, -+ sna->mode.shadow_enabled)); -+ if (sna->mode.flip_active || !sna->mode.shadow_enabled) -+ return; - } - -- if (sna->mode.flip_active) -- return; -- -- if (wedged(sna) || !sna_pixmap_move_to_gpu(sna->front, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT)) { -+ if (!move_crtc_to_gpu(sna)) { - DBG(("%s: forcing scanout update using the CPU\n", __FUNCTION__)); - if (!sna_pixmap_move_to_cpu(sna->front, MOVE_READ)) - return; -@@ -7047,90 +8993,14 @@ void sna_mode_redisplay(struct sna *sna) - damage.data = NULL; - RegionIntersect(&damage, &damage, region); - if (!box_empty(&damage.extents)) { -- struct kgem_bo *bo = NULL; -- - DBG(("%s: fallback intersects pipe=%d [(%d, %d), (%d, %d)]\n", -- __FUNCTION__, sna_crtc->pipe, -+ __FUNCTION__, __sna_crtc_pipe(sna_crtc), - damage.extents.x1, damage.extents.y1, - damage.extents.x2, damage.extents.y2)); - -- if (sna->flags & SNA_TEAR_FREE) { -- RegionRec new_damage; -- -- RegionNull(&new_damage); -- RegionCopy(&new_damage, &damage); -- -- bo = sna_crtc->client_bo; -- if (bo == NULL) { -- damage.extents = crtc->bounds; -- damage.data = NULL; -- bo = kgem_create_2d(&sna->kgem, -- crtc->mode.HDisplay, -- crtc->mode.VDisplay, -- crtc->scrn->bitsPerPixel, -- sna_crtc->bo->tiling, -- CREATE_SCANOUT); -- } else -- RegionUnion(&damage, &damage, &sna_crtc->client_damage); -- -- DBG(("%s: TearFree fallback, shadow handle=%d, crtc handle=%d\n", __FUNCTION__, bo->handle, sna_crtc->bo->handle)); -- -- sna_crtc->client_damage = new_damage; -- } -- -- if (bo == NULL) -- bo = sna_crtc->bo; -- sna_crtc_redisplay__fallback(crtc, &damage, bo); -- -- if (bo != sna_crtc->bo) { -- struct drm_mode_crtc_page_flip arg; -- -- arg.crtc_id = sna_crtc->id; -- arg.fb_id = get_fb(sna, bo, -- crtc->mode.HDisplay, -- crtc->mode.VDisplay); -- -- arg.user_data = (uintptr_t)sna_crtc; -- arg.flags = DRM_MODE_PAGE_FLIP_EVENT; -- arg.reserved = 0; -- -- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { -- if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) { -- assert(sna_crtc->bo->active_scanout); -- assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); -- sna_crtc->bo->active_scanout--; -- kgem_bo_destroy(&sna->kgem, sna_crtc->bo); -- -- sna_crtc->bo = bo; -- sna_crtc->bo->active_scanout++; -- sna_crtc->client_bo = NULL; -- } else { -- DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", -- __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno)); -- xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, -- "Page flipping failed, disabling TearFree\n"); -- sna->flags &= ~SNA_TEAR_FREE; -- -- damage.extents = crtc->bounds; -- damage.data = NULL; -- sna_crtc_redisplay__fallback(crtc, &damage, sna_crtc->bo); -- -- kgem_bo_destroy(&sna->kgem, bo); -- sna_crtc->client_bo = NULL; -- } -- } else { -- sna->mode.flip_active++; -- -- assert(sna_crtc->flip_bo == NULL); -- sna_crtc->flip_handler = shadow_flip_handler; -- sna_crtc->flip_data = sna; -- sna_crtc->flip_bo = bo; -- sna_crtc->flip_bo->active_scanout++; -- sna_crtc->flip_serial = sna_crtc->mode_serial; -- -- sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo); -- } -- } -+ sna_crtc_redisplay__fallback(crtc, -+ &damage, -+ sna_crtc->bo); - } - RegionUninit(&damage); - -@@ -7171,6 +9041,7 @@ void sna_mode_redisplay(struct sna *sna) - xf86CrtcPtr crtc = config->crtc[i]; - struct sna_crtc *sna_crtc = to_sna_crtc(crtc); - RegionRec damage; -+ int sigio; - - assert(sna_crtc != NULL); - DBG(("%s: crtc[%d] transformed? %d\n", -@@ -7192,30 +9063,38 @@ void sna_mode_redisplay(struct sna *sna) - region_num_rects(&damage), - damage.extents.x1, damage.extents.y1, - damage.extents.x2, damage.extents.y2)); -+ sigio = sigio_block(); - if (!box_empty(&damage.extents)) { - if (sna->flags & SNA_TEAR_FREE) { -+ RegionRec new_damage; - struct drm_mode_crtc_page_flip arg; - struct kgem_bo *bo; - -- RegionUninit(&damage); -- damage.extents = crtc->bounds; -- damage.data = NULL; -+ RegionNull(&new_damage); -+ RegionCopy(&new_damage, &damage); - -- bo = sna_crtc->client_bo; -- if (bo == NULL) -+ bo = sna_crtc->cache_bo; -+ if (bo == NULL) { -+ damage.extents = crtc->bounds; -+ damage.data = NULL; - bo = kgem_create_2d(&sna->kgem, - crtc->mode.HDisplay, - crtc->mode.VDisplay, - crtc->scrn->bitsPerPixel, - sna_crtc->bo->tiling, - CREATE_SCANOUT); -- if (bo == NULL) -- goto disable1; -+ if (bo == NULL) -+ continue; -+ } else -+ RegionUnion(&damage, &damage, &sna_crtc->crtc_damage); -+ sna_crtc->crtc_damage = new_damage; - - sna_crtc_redisplay(crtc, &damage, bo); - kgem_bo_submit(&sna->kgem, bo); -+ __kgem_bo_clear_dirty(bo); - -- arg.crtc_id = sna_crtc->id; -+ assert_crtc_fb(sna, sna_crtc); -+ arg.crtc_id = __sna_crtc_id(sna_crtc); - arg.fb_id = get_fb(sna, bo, - crtc->mode.HDisplay, - crtc->mode.VDisplay); -@@ -7228,6 +9107,9 @@ void sna_mode_redisplay(struct sna *sna) - - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { - if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) { -+ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", -+ __FUNCTION__, sna_crtc->bo->handle, sna_crtc->bo->active_scanout - 1, -+ bo->handle, bo->active_scanout)); - assert(sna_crtc->bo->active_scanout); - assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); - sna_crtc->bo->active_scanout--; -@@ -7235,13 +9117,12 @@ void sna_mode_redisplay(struct sna *sna) - - sna_crtc->bo = kgem_bo_reference(bo); - sna_crtc->bo->active_scanout++; -- sna_crtc->client_bo = kgem_bo_reference(bo); - } else { - BoxRec box; - DrawableRec tmp; - - DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", -- __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno)); -+ __FUNCTION__, arg.fb_id, i, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), errno)); - xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, - "Page flipping failed, disabling TearFree\n"); - sna->flags &= ~SNA_TEAR_FREE; -@@ -7260,13 +9141,13 @@ disable1: - &box, 1, COPY_LAST)) { - xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, - "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", -- __FUNCTION__, sna_crtc->id, sna_crtc->pipe); -- sna_crtc_disable(crtc); -+ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc)); -+ sna_crtc_disable(crtc, false); - } -- -- kgem_bo_destroy(&sna->kgem, bo); -- sna_crtc->client_bo = NULL; - } -+ -+ kgem_bo_destroy(&sna->kgem, bo); -+ sna_crtc->cache_bo = NULL; - continue; - } - sna->mode.flip_active++; -@@ -7279,13 +9160,20 @@ disable1: - sna_crtc->flip_serial = sna_crtc->mode_serial; - sna_crtc->flip_pending = true; - -- sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo); -+ if (sna_crtc->bo != sna->mode.shadow) { -+ assert_scanout(&sna->kgem, sna_crtc->bo, -+ crtc->mode.HDisplay, crtc->mode.VDisplay); -+ sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo); -+ } -+ DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", -+ __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial)); - } else { - sna_crtc_redisplay(crtc, &damage, sna_crtc->bo); - kgem_scanout_flush(&sna->kgem, sna_crtc->bo); - } - } - RegionUninit(&damage); -+ sigio_unblock(sigio); - - if (sna_crtc->slave_damage) - DamageEmpty(sna_crtc->slave_damage); -@@ -7296,6 +9184,7 @@ disable1: - struct kgem_bo *old = sna->mode.shadow; - struct drm_mode_crtc_page_flip arg; - uint32_t fb = 0; -+ int sigio; - - DBG(("%s: flipping TearFree outputs, current scanout handle=%d [active?=%d], new handle=%d [active=%d]\n", - __FUNCTION__, old->handle, old->active_scanout, new->handle, new->active_scanout)); -@@ -7307,7 +9196,9 @@ disable1: - arg.reserved = 0; - - kgem_bo_submit(&sna->kgem, new); -+ __kgem_bo_clear_dirty(new); - -+ sigio = sigio_block(); - for (i = 0; i < sna->mode.num_real_crtc; i++) { - struct sna_crtc *crtc = config->crtc[i]->driver_private; - struct kgem_bo *flip_bo; -@@ -7315,20 +9206,20 @@ disable1: - - assert(crtc != NULL); - DBG(("%s: crtc %d [%d, pipe=%d] active? %d, transformed? %d\n", -- __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo ? crtc->bo->handle : 0, crtc->transform)); -+ __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->bo ? crtc->bo->handle : 0, crtc->transform)); - if (crtc->bo == NULL || crtc->transform) - continue; - - assert(config->crtc[i]->enabled); -- assert(crtc->dpms_mode <= DPMSModeOn); - assert(crtc->flip_bo == NULL); -+ assert_crtc_fb(sna, crtc); - -- arg.crtc_id = crtc->id; -+ arg.crtc_id = __sna_crtc_id(crtc); - arg.user_data = (uintptr_t)crtc; - - if (crtc->client_bo) { - DBG(("%s: apply shadow override bo for CRTC:%d on pipe=%d, handle=%d\n", -- __FUNCTION__, crtc->id, crtc->pipe, crtc->client_bo->handle)); -+ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->client_bo->handle)); - arg.fb_id = get_fb(sna, crtc->client_bo, - crtc->base->mode.HDisplay, - crtc->base->mode.VDisplay); -@@ -7356,6 +9247,7 @@ fixup_shadow: - } - } - -+ sigio_unblock(sigio); - return; - } - -@@ -7365,8 +9257,12 @@ fixup_shadow: - y = crtc->base->y; - } - -- if (crtc->bo == flip_bo) -+ if (crtc->bo == flip_bo) { -+ assert(crtc->bo->refcnt >= crtc->bo->active_scanout); -+ DBG(("%s: flip handle=%d is already on the CRTC\n", -+ __FUNCTION__, flip_bo->handle)); - continue; -+ } - - if (flip_bo->pitch != crtc->bo->pitch || (y << 16 | x) != crtc->offset) { - DBG(("%s: changing pitch (new %d =?= old %d) or offset (new %x =?= old %x)\n", -@@ -7375,6 +9271,9 @@ fixup_shadow: - y << 16 | x, crtc->offset)); - fixup_flip: - if (sna_crtc_flip(sna, crtc, flip_bo, x, y)) { -+ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", -+ __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout-1, -+ flip_bo->handle, flip_bo->active_scanout)); - assert(flip_bo != crtc->bo); - assert(crtc->bo->active_scanout); - assert(crtc->bo->refcnt >= crtc->bo->active_scanout); -@@ -7389,9 +9288,11 @@ fixup_flip: - crtc->bo = kgem_bo_reference(flip_bo); - crtc->bo->active_scanout++; - } else { -- xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, -- "Failed to prepare CRTC for page flipping, disabling TearFree\n"); -- sna->flags &= ~SNA_TEAR_FREE; -+ if (sna->flags & SNA_TEAR_FREE) { -+ xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, -+ "Failed to prepare CRTC for page flipping, disabling TearFree\n"); -+ sna->flags &= ~SNA_TEAR_FREE; -+ } - - if (sna->mode.flip_active == 0) { - DBG(("%s: abandoning flip attempt\n", __FUNCTION__)); -@@ -7400,15 +9301,15 @@ fixup_flip: - - xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, - "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", -- __FUNCTION__, crtc->id, crtc->pipe); -- sna_crtc_disable(crtc->base); -+ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc)); -+ sna_crtc_disable(crtc->base, false); - } - continue; - } - - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { - ERR(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", -- __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno)); -+ __FUNCTION__, arg.fb_id, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), errno)); - goto fixup_flip; - } - sna->mode.flip_active++; -@@ -7421,6 +9322,9 @@ fixup_flip: - crtc->flip_serial = crtc->mode_serial; - crtc->flip_pending = true; - -+ DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", -+ __FUNCTION__, __sna_crtc_id(crtc), crtc->flip_bo->handle, crtc->flip_bo->active_scanout, crtc->flip_serial)); -+ - { - struct drm_i915_gem_busy busy = { flip_bo->handle }; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy) == 0) { -@@ -7435,6 +9339,7 @@ fixup_flip: - } - } - } -+ sigio_unblock(sigio); - - DBG(("%s: flipped %d outputs, shadow active? %d\n", - __FUNCTION__, -@@ -7486,7 +9391,9 @@ again: - struct drm_event *e = (struct drm_event *)&buffer[i]; - switch (e->type) { - case DRM_EVENT_VBLANK: -- if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2) -+ if (sna->mode.shadow_wait) -+ defer_event(sna, e); -+ else if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2) - sna_present_vblank_handler((struct drm_event_vblank *)e); - else - sna_dri2_vblank_handler((struct drm_event_vblank *)e); -@@ -7495,13 +9402,19 @@ again: - { - struct drm_event_vblank *vbl = (struct drm_event_vblank *)e; - struct sna_crtc *crtc = (void *)(uintptr_t)vbl->user_data; -+ uint64_t msc; - - /* Beware Zaphod! */ - sna = to_sna(crtc->base->scrn); - -- crtc->swap.tv_sec = vbl->tv_sec; -- crtc->swap.tv_usec = vbl->tv_usec; -- crtc->swap.msc = msc64(crtc, vbl->sequence); -+ if (msc64(crtc, vbl->sequence, &msc)) { -+ DBG(("%s: recording last swap on pipe=%d, frame %d [%08llx], time %d.%06d\n", -+ __FUNCTION__, __sna_crtc_pipe(crtc), vbl->sequence, (long long)msc, vbl->tv_sec, vbl->tv_usec)); -+ crtc->swap.tv_sec = vbl->tv_sec; -+ crtc->swap.tv_usec = vbl->tv_usec; -+ crtc->swap.msc = msc; -+ } -+ assert(crtc->flip_pending); - crtc->flip_pending = false; - - assert(crtc->flip_bo); -@@ -7509,10 +9422,12 @@ again: - assert(crtc->flip_bo->refcnt >= crtc->flip_bo->active_scanout); - - if (crtc->flip_serial == crtc->mode_serial) { -- DBG(("%s: removing handle=%d from scanout, installing handle=%d\n", -- __FUNCTION__, crtc->bo->handle, crtc->flip_bo->handle)); -+ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", -+ __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout - 1, -+ crtc->flip_bo->handle, crtc->flip_bo->active_scanout)); - assert(crtc->bo->active_scanout); - assert(crtc->bo->refcnt >= crtc->bo->active_scanout); -+ - crtc->bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, crtc->bo); - -@@ -7523,6 +9438,8 @@ again: - - crtc->bo = crtc->flip_bo; - crtc->flip_bo = NULL; -+ -+ assert_crtc_fb(sna, crtc); - } else { - crtc->flip_bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, crtc->flip_bo); -@@ -7531,8 +9448,10 @@ again: - - DBG(("%s: flip complete, pending? %d\n", __FUNCTION__, sna->mode.flip_active)); - assert(sna->mode.flip_active); -- if (--sna->mode.flip_active == 0) -+ if (--sna->mode.flip_active == 0) { -+ assert(crtc->flip_handler); - crtc->flip_handler(vbl, crtc->flip_data); -+ } - } - break; - default: -diff --git a/src/sna/sna_display_fake.c b/src/sna/sna_display_fake.c -index 4d74c38d..fa26bda1 100644 ---- a/src/sna/sna_display_fake.c -+++ b/src/sna/sna_display_fake.c -@@ -96,12 +96,6 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, - } - - static void --sna_crtc_gamma_set(xf86CrtcPtr crtc, -- CARD16 *red, CARD16 *green, CARD16 *blue, int size) --{ --} -- --static void - sna_crtc_destroy(xf86CrtcPtr crtc) - { - } -@@ -109,7 +103,6 @@ sna_crtc_destroy(xf86CrtcPtr crtc) - static const xf86CrtcFuncsRec sna_crtc_funcs = { - .dpms = sna_crtc_dpms, - .set_mode_major = sna_crtc_set_mode_major, -- .gamma_set = sna_crtc_gamma_set, - .destroy = sna_crtc_destroy, - }; - -@@ -192,7 +185,7 @@ static const xf86OutputFuncsRec sna_output_funcs = { - static Bool - sna_mode_resize(ScrnInfoPtr scrn, int width, int height) - { -- ScreenPtr screen = scrn->pScreen; -+ ScreenPtr screen = xf86ScrnToScreen(scrn); - PixmapPtr new_front; - - DBG(("%s (%d, %d) -> (%d, %d)\n", __FUNCTION__, -@@ -262,6 +255,7 @@ static bool add_fake_output(struct sna *sna, bool late) - output->mm_height = 0; - output->interlaceAllowed = FALSE; - output->subpixel_order = SubPixelNone; -+ output->status = XF86OutputStatusDisconnected; - - output->possible_crtcs = ~((1 << sna->mode.num_real_crtc) - 1); - output->possible_clones = ~((1 << sna->mode.num_real_output) - 1); -@@ -297,6 +291,8 @@ static bool add_fake_output(struct sna *sna, bool late) - - RRCrtcSetRotations(crtc->randr_crtc, - RR_Rotate_All | RR_Reflect_All); -+ if (!RRCrtcGammaSetSize(crtc->randr_crtc, 256)) -+ goto err; - } - - sna->mode.num_fake++; -@@ -312,13 +308,16 @@ err: - continue; - - xf86OutputDestroy(output); -+ i--; - } - - for (i = 0; i < xf86_config->num_crtc; i++) { - crtc = xf86_config->crtc[i]; - if (crtc->driver_private) - continue; -+ - xf86CrtcDestroy(crtc); -+ i--; - } - sna->mode.num_fake = -1; - return false; -diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c -index e5c4d53e..d89525cc 100644 ---- a/src/sna/sna_dri2.c -+++ b/src/sna/sna_dri2.c -@@ -82,12 +82,23 @@ get_private(void *buffer) - return (struct sna_dri2_private *)((DRI2Buffer2Ptr)buffer+1); - } - -+pure static inline DRI2BufferPtr sna_pixmap_get_buffer(PixmapPtr pixmap) -+{ -+ assert(pixmap->refcnt); -+ return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; -+} -+ -+static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) -+{ -+ assert(pixmap->refcnt); -+ ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; -+} -+ - #if DRI2INFOREC_VERSION >= 4 - enum event_type { - WAITMSC = 0, - SWAP, -- SWAP_WAIT, -- SWAP_THROTTLE, -+ SWAP_COMPLETE, - FLIP, - FLIP_THROTTLE, - FLIP_COMPLETE, -@@ -98,6 +109,7 @@ struct dri_bo { - struct list link; - struct kgem_bo *bo; - uint32_t name; -+ unsigned flags; - }; - - struct sna_dri2_event { -@@ -108,6 +120,8 @@ struct sna_dri2_event { - xf86CrtcPtr crtc; - int pipe; - bool queued; -+ bool sync; -+ bool chained; - - /* for swaps & flips only */ - DRI2SwapEventPtr event_complete; -@@ -116,35 +130,146 @@ struct sna_dri2_event { - DRI2BufferPtr back; - struct kgem_bo *bo; - -+ struct copy { -+ struct kgem_bo *bo; -+ unsigned flags; -+ uint32_t name; -+ uint32_t size; -+ } pending; -+ - struct sna_dri2_event *chain; - -- struct list cache; - struct list link; - -- int mode; -+ int flip_continue; -+ int keepalive; -+ int signal; - }; - -+#if DRI2INFOREC_VERSION < 10 -+#undef USE_ASYNC_SWAP -+#endif -+ -+#if USE_ASYNC_SWAP -+#define KEEPALIVE 8 /* wait ~100ms before discarding swap caches */ -+#define APPLY_DAMAGE 0 -+#else -+#define USE_ASYNC_SWAP 0 -+#define KEEPALIVE 1 -+#define APPLY_DAMAGE 1 -+#endif -+ - static void sna_dri2_flip_event(struct sna_dri2_event *flip); -+inline static DRI2BufferPtr dri2_window_get_front(WindowPtr win); -+ -+static struct kgem_bo * -+__sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, -+ DRI2BufferPtr src, DRI2BufferPtr dst, -+ unsigned flags); -+ -+inline static void -+__sna_dri2_copy_event(struct sna_dri2_event *info, unsigned flags) -+{ -+ DBG(("%s: flags = %x\n", __FUNCTION__, flags)); -+ assert(info->front != info->back); -+ info->bo = __sna_dri2_copy_region(info->sna, info->draw, NULL, -+ info->back, info->front, -+ flags); -+ info->front->flags = info->back->flags; -+} -+ -+static int front_pitch(DrawablePtr draw) -+{ -+ DRI2BufferPtr buffer; -+ -+ buffer = NULL; -+ if (draw->type != DRAWABLE_PIXMAP) -+ buffer = dri2_window_get_front((WindowPtr)draw); -+ if (buffer == NULL) -+ buffer = sna_pixmap_get_buffer(get_drawable_pixmap(draw)); -+ -+ return buffer ? buffer->pitch : 0; -+} -+ -+struct dri2_window { -+ DRI2BufferPtr front; -+ struct sna_dri2_event *chain; -+ xf86CrtcPtr crtc; -+ int64_t msc_delta; -+ struct list cache; -+ uint32_t cache_size; -+ int scanout; -+}; -+ -+static struct dri2_window *dri2_window(WindowPtr win) -+{ -+ assert(win->drawable.type != DRAWABLE_PIXMAP); -+ return ((void **)__get_private(win, sna_window_key))[1]; -+} -+ -+static bool use_scanout(struct sna *sna, -+ DrawablePtr draw, -+ struct dri2_window *priv) -+{ -+ if (priv->front) -+ return true; -+ -+ if (priv->scanout < 0) -+ priv->scanout = -+ (sna->flags & (SNA_LINEAR_FB | SNA_NO_WAIT | SNA_NO_FLIP)) == 0 && -+ draw->width == sna->front->drawable.width && -+ draw->height == sna->front->drawable.height && -+ draw->bitsPerPixel == sna->front->drawable.bitsPerPixel; -+ -+ return priv->scanout; -+} - - static void - sna_dri2_get_back(struct sna *sna, - DrawablePtr draw, -- DRI2BufferPtr back, -- struct sna_dri2_event *info) -+ DRI2BufferPtr back) - { -+ struct dri2_window *priv = dri2_window((WindowPtr)draw); -+ uint32_t size; - struct kgem_bo *bo; -+ struct dri_bo *c; - uint32_t name; -+ int flags; - bool reuse; - -- DBG(("%s: draw size=%dx%d, buffer size=%dx%d\n", -+ DBG(("%s: draw size=%dx%d, back buffer handle=%d size=%dx%d, is-scanout? %d, active?=%d, pitch=%d, front pitch=%d\n", - __FUNCTION__, draw->width, draw->height, -- get_private(back)->size & 0xffff, get_private(back)->size >> 16)); -- reuse = (draw->height << 16 | draw->width) == get_private(back)->size; -+ get_private(back)->bo->handle, -+ get_private(back)->size & 0xffff, get_private(back)->size >> 16, -+ get_private(back)->bo->scanout, -+ get_private(back)->bo->active_scanout, -+ back->pitch, front_pitch(draw))); -+ assert(priv); -+ -+ size = draw->height << 16 | draw->width; -+ if (size != priv->cache_size) { -+ while (!list_is_empty(&priv->cache)) { -+ c = list_first_entry(&priv->cache, struct dri_bo, link); -+ list_del(&c->link); -+ -+ DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); -+ assert(c->bo); -+ kgem_bo_destroy(&sna->kgem, c->bo); -+ -+ free(c); -+ } -+ priv->cache_size = size; -+ } -+ -+ reuse = size == get_private(back)->size; -+ if (reuse) -+ reuse = get_private(back)->bo->scanout == use_scanout(sna, draw, priv); -+ DBG(("%s: reuse backbuffer? %d\n", __FUNCTION__, reuse)); - if (reuse) { - bo = get_private(back)->bo; - assert(bo->refcnt); -- DBG(("%s: back buffer handle=%d, scanout?=%d, refcnt=%d\n", -- __FUNCTION__, bo->handle, bo->active_scanout, get_private(back)->refcnt)); -+ DBG(("%s: back buffer handle=%d, active?=%d, refcnt=%d\n", -+ __FUNCTION__, bo->handle, bo->active_scanout, get_private(back)->refcnt)); - if (bo->active_scanout == 0) { - DBG(("%s: reuse unattached back\n", __FUNCTION__)); - get_private(back)->stale = false; -@@ -153,24 +278,37 @@ sna_dri2_get_back(struct sna *sna, - } - - bo = NULL; -- if (info) { -- struct dri_bo *c; -- list_for_each_entry(c, &info->cache, link) { -- if (c->bo && c->bo->scanout == 0) { -- bo = c->bo; -- name = c->name; -- DBG(("%s: reuse cache handle=%d\n", __FUNCTION__, bo->handle)); -- list_move_tail(&c->link, &info->cache); -- c->bo = NULL; -+ list_for_each_entry(c, &priv->cache, link) { -+ DBG(("%s: cache: handle=%d, active=%d\n", -+ __FUNCTION__, c->bo ? c->bo->handle : 0, c->bo ? c->bo->active_scanout : -1)); -+ assert(c->bo); -+ if (c->bo->active_scanout == 0) { -+ _list_del(&c->link); -+ if (c->bo == NULL) { -+ free(c); -+ goto out; - } -+ bo = c->bo; -+ name = c->name; -+ flags = c->flags; -+ DBG(("%s: reuse cache handle=%d, name=%d, flags=%d\n", __FUNCTION__, bo->handle, name, flags)); -+ c->bo = NULL; -+ break; - } - } - if (bo == NULL) { - DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); -+ flags = CREATE_EXACT; -+ -+ if (use_scanout(sna, draw, priv)) { -+ DBG(("%s: requesting scanout compatible back\n", __FUNCTION__)); -+ flags |= CREATE_SCANOUT; -+ } -+ - bo = kgem_create_2d(&sna->kgem, - draw->width, draw->height, draw->bitsPerPixel, - get_private(back)->bo->tiling, -- get_private(back)->bo->scanout ? CREATE_SCANOUT : 0); -+ flags); - if (bo == NULL) - return; - -@@ -179,30 +317,42 @@ sna_dri2_get_back(struct sna *sna, - kgem_bo_destroy(&sna->kgem, bo); - return; - } -+ -+ flags = 0; -+ if (USE_ASYNC_SWAP && back->flags) { -+ BoxRec box; -+ -+ box.x1 = 0; -+ box.y1 = 0; -+ box.x2 = draw->width; -+ box.y2 = draw->height; -+ -+ DBG(("%s: filling new buffer with old back\n", __FUNCTION__)); -+ if (sna->render.copy_boxes(sna, GXcopy, -+ draw, get_private(back)->bo, 0, 0, -+ draw, bo, 0, 0, -+ &box, 1, COPY_LAST | COPY_DRI)) -+ flags = back->flags; -+ } - } - assert(bo->active_scanout == 0); - -- if (info && reuse) { -- bool found = false; -- struct dri_bo *c; -- -- list_for_each_entry_reverse(c, &info->cache, link) { -- if (c->bo == NULL) { -- found = true; -- _list_del(&c->link); -- break; -- } -- } -- if (!found) -+ if (reuse && get_private(back)->bo->refcnt == 1 + get_private(back)->bo->active_scanout) { -+ if (&c->link == &priv->cache) - c = malloc(sizeof(*c)); - if (c != NULL) { - c->bo = ref(get_private(back)->bo); - c->name = back->name; -- list_add(&c->link, &info->cache); -- DBG(("%s: cacheing handle=%d (name=%d)\n", __FUNCTION__, c->bo->handle, c->name)); -+ c->flags = back->flags; -+ list_add(&c->link, &priv->cache); -+ DBG(("%s: caching handle=%d (name=%d, flags=%d, active_scanout=%d)\n", __FUNCTION__, c->bo->handle, c->name, c->flags, c->bo->active_scanout)); - } -+ } else { -+ if (&c->link != &priv->cache) -+ free(c); - } - -+ assert(bo->active_scanout == 0); - assert(bo != get_private(back)->bo); - kgem_bo_destroy(&sna->kgem, get_private(back)->bo); - -@@ -210,21 +360,13 @@ sna_dri2_get_back(struct sna *sna, - get_private(back)->size = draw->height << 16 | draw->width; - back->pitch = bo->pitch; - back->name = name; -+ back->flags = flags; - -- get_private(back)->stale = false; --} -- --struct dri2_window { -- DRI2BufferPtr front; -- struct sna_dri2_event *chain; -- xf86CrtcPtr crtc; -- int64_t msc_delta; --}; -+ assert(back->pitch); -+ assert(back->name); - --static struct dri2_window *dri2_window(WindowPtr win) --{ -- assert(win->drawable.type != DRAWABLE_PIXMAP); -- return ((void **)__get_private(win, sna_window_key))[1]; -+out: -+ get_private(back)->stale = false; - } - - static struct sna_dri2_event * -@@ -232,21 +374,25 @@ dri2_chain(DrawablePtr d) - { - struct dri2_window *priv = dri2_window((WindowPtr)d); - assert(priv != NULL); -+ assert(priv->chain == NULL || priv->chain->chained); - return priv->chain; - } - inline static DRI2BufferPtr dri2_window_get_front(WindowPtr win) - { - struct dri2_window *priv = dri2_window(win); -+ assert(priv->front == NULL || get_private(priv->front)->bo->active_scanout); - return priv ? priv->front : NULL; - } - #else - inline static void *dri2_window_get_front(WindowPtr win) { return NULL; } -+#define APPLY_DAMAGE 1 - #endif - - #if DRI2INFOREC_VERSION < 6 - - #define xorg_can_triple_buffer() 0 - #define swap_limit(d, l) false -+#define mark_stale(b) - - #else - -@@ -273,6 +419,8 @@ mark_stale(DRI2BufferPtr back) - * stale frame. (This is mostly useful for tracking down - * driver bugs!) - */ -+ DBG(("%s(handle=%d) => %d\n", __FUNCTION__, -+ get_private(back)->bo->handle, xorg_can_triple_buffer())); - get_private(back)->stale = xorg_can_triple_buffer(); - } - -@@ -286,21 +434,29 @@ sna_dri2_swap_limit_validate(DrawablePtr draw, int swap_limit) - static void - sna_dri2_reuse_buffer(DrawablePtr draw, DRI2BufferPtr buffer) - { -+ struct sna *sna = to_sna_from_drawable(draw); -+ - DBG(("%s: reusing buffer pixmap=%ld, attachment=%d, handle=%d, name=%d\n", - __FUNCTION__, get_drawable_pixmap(draw)->drawable.serialNumber, - buffer->attachment, get_private(buffer)->bo->handle, buffer->name)); - assert(get_private(buffer)->refcnt); -- assert(get_private(buffer)->bo->refcnt > get_private(buffer)->bo->active_scanout); -+ assert(get_private(buffer)->bo->refcnt >= get_private(buffer)->bo->active_scanout); -+ assert(kgem_bo_flink(&sna->kgem, get_private(buffer)->bo) == buffer->name); - - if (buffer->attachment == DRI2BufferBackLeft && - draw->type != DRAWABLE_PIXMAP) { -- DBG(("%s: replacing back buffer\n", __FUNCTION__)); -- sna_dri2_get_back(to_sna_from_drawable(draw), draw, buffer, dri2_chain(draw)); -+ DBG(("%s: replacing back buffer on window %ld\n", __FUNCTION__, draw->id)); -+ sna_dri2_get_back(sna, draw, buffer); - -- assert(kgem_bo_flink(&to_sna_from_drawable(draw)->kgem, get_private(buffer)->bo) == buffer->name); - assert(get_private(buffer)->bo->refcnt); - assert(get_private(buffer)->bo->active_scanout == 0); -+ assert(kgem_bo_flink(&sna->kgem, get_private(buffer)->bo) == buffer->name); -+ DBG(("%s: reusing back buffer handle=%d, name=%d, pitch=%d, age=%d\n", -+ __FUNCTION__, get_private(buffer)->bo->handle, -+ buffer->name, buffer->pitch, buffer->flags)); - } -+ -+ kgem_bo_submit(&sna->kgem, get_private(buffer)->bo); - } - - static bool swap_limit(DrawablePtr draw, int limit) -@@ -314,11 +470,6 @@ static bool swap_limit(DrawablePtr draw, int limit) - } - #endif - --#if DRI2INFOREC_VERSION < 10 --#undef USE_ASYNC_SWAP --#define USE_ASYNC_SWAP 0 --#endif -- - #define COLOR_PREFER_TILING_Y 0 - - /* Prefer to enable TILING_Y if this buffer will never be a -@@ -328,6 +479,9 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw) - { - uint32_t tiling; - -+ if (!sna->kgem.can_fence) -+ return I915_TILING_NONE; -+ - if (COLOR_PREFER_TILING_Y && - (draw->width != sna->front->drawable.width || - draw->height != sna->front->drawable.height)) -@@ -355,7 +509,6 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, - PixmapPtr pixmap) - { - struct sna_pixmap *priv; -- int tiling; - - DBG(("%s: attaching DRI client to pixmap=%ld\n", - __FUNCTION__, pixmap->drawable.serialNumber)); -@@ -373,31 +526,29 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, - return NULL; - } - -- assert(priv->flush == false); -+ assert(priv->flush == false || priv->pinned & PIN_DRI3); -+ assert(priv->gpu_bo->flush == false || priv->pinned & PIN_DRI3); - assert(priv->cpu_damage == NULL); - assert(priv->gpu_bo); - assert(priv->gpu_bo->proxy == NULL); -- assert(priv->gpu_bo->flush == false); -- -- tiling = color_tiling(sna, &pixmap->drawable); -- if (tiling < 0) -- tiling = -tiling; -- if (priv->gpu_bo->tiling != tiling) -- sna_pixmap_change_tiling(pixmap, tiling); - -- return priv->gpu_bo; --} -+ if (!kgem_bo_is_fenced(&sna->kgem, priv->gpu_bo)) { -+ if (priv->gpu_bo->tiling && -+ !sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { -+ DBG(("%s: failed to discard tiling (%d) for DRI2 protocol\n", __FUNCTION__, priv->gpu_bo->tiling)); -+ return NULL; -+ } -+ } else { -+ int tiling = color_tiling(sna, &pixmap->drawable); -+ if (tiling < 0) -+ tiling = -tiling; -+ if (priv->gpu_bo->tiling < tiling && !priv->gpu_bo->scanout) -+ sna_pixmap_change_tiling(pixmap, tiling); -+ } - --pure static inline void *sna_pixmap_get_buffer(PixmapPtr pixmap) --{ -- assert(pixmap->refcnt); -- return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; --} -+ priv->gpu_bo->active_scanout++; - --static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) --{ -- assert(pixmap->refcnt); -- ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; -+ return priv->gpu_bo; - } - - void -@@ -422,13 +573,18 @@ sna_dri2_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap, struct kgem_bo *bo) - if (private->bo == bo) - return; - -+ assert(private->bo->active_scanout > 0); -+ private->bo->active_scanout--; -+ - DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, private->bo->handle)); - private->bo->flush = false; - kgem_bo_destroy(&sna->kgem, private->bo); - -+ - buffer->name = kgem_bo_flink(&sna->kgem, bo); - buffer->pitch = bo->pitch; - private->bo = ref(bo); -+ bo->active_scanout++; - - DBG(("%s: adding flush hint to handle=%d\n", __FUNCTION__, bo->handle)); - bo->flush = true; -@@ -449,9 +605,9 @@ sna_dri2_create_buffer(DrawablePtr draw, - struct sna_dri2_private *private; - PixmapPtr pixmap; - struct kgem_bo *bo; -- unsigned flags = 0; -+ unsigned bpp = format ?: draw->bitsPerPixel; -+ unsigned flags = CREATE_EXACT; - uint32_t size; -- int bpp; - - DBG(("%s pixmap=%ld, (attachment=%d, format=%d, drawable=%dx%d), window?=%d\n", - __FUNCTION__, -@@ -468,11 +624,11 @@ sna_dri2_create_buffer(DrawablePtr draw, - if (draw->type != DRAWABLE_PIXMAP) - buffer = dri2_window_get_front((WindowPtr)draw); - if (buffer == NULL) -- buffer = sna_pixmap_get_buffer(pixmap); -+ buffer = (DRI2Buffer2Ptr)sna_pixmap_get_buffer(pixmap); - if (buffer) { - private = get_private(buffer); - -- DBG(("%s: reusing front buffer attachment, win=%lu %dx%d, pixmap=%ld [%ld] %dx%d, handle=%d, name=%d\n", -+ DBG(("%s: reusing front buffer attachment, win=%lu %dx%d, pixmap=%ld [%ld] %dx%d, handle=%d, name=%d, active_scanout=%d\n", - __FUNCTION__, - draw->type != DRAWABLE_PIXMAP ? (long)draw->id : (long)0, - draw->width, draw->height, -@@ -480,12 +636,22 @@ sna_dri2_create_buffer(DrawablePtr draw, - private->pixmap->drawable.serialNumber, - pixmap->drawable.width, - pixmap->drawable.height, -- private->bo->handle, buffer->name)); -+ private->bo->handle, buffer->name, -+ private->bo->active_scanout)); - -+ assert(buffer->attachment == DRI2BufferFrontLeft); - assert(private->pixmap == pixmap); - assert(sna_pixmap(pixmap)->flush); - assert(sna_pixmap(pixmap)->pinned & PIN_DRI2); - assert(kgem_bo_flink(&sna->kgem, private->bo) == buffer->name); -+ assert(private->bo->pitch == buffer->pitch); -+ assert(private->bo->active_scanout); -+ -+ sna_pixmap_move_to_gpu(pixmap, -+ MOVE_READ | -+ __MOVE_FORCE | -+ __MOVE_DRI); -+ kgem_bo_submit(&sna->kgem, private->bo); - - private->refcnt++; - return buffer; -@@ -498,7 +664,6 @@ sna_dri2_create_buffer(DrawablePtr draw, - assert(sna_pixmap(pixmap) != NULL); - - bo = ref(bo); -- bpp = pixmap->drawable.bitsPerPixel; - if (pixmap == sna->front && !(sna->flags & SNA_LINEAR_FB)) - flags |= CREATE_SCANOUT; - DBG(("%s: attaching to front buffer %dx%d [%p:%d], scanout? %d\n", -@@ -506,6 +671,7 @@ sna_dri2_create_buffer(DrawablePtr draw, - pixmap->drawable.width, pixmap->drawable.height, - pixmap, pixmap->refcnt, flags & CREATE_SCANOUT)); - size = (uint32_t)pixmap->drawable.height << 16 | pixmap->drawable.width; -+ bpp = pixmap->drawable.bitsPerPixel; - break; - - case DRI2BufferBackLeft: -@@ -514,6 +680,7 @@ sna_dri2_create_buffer(DrawablePtr draw, - flags |= CREATE_SCANOUT; - if (draw->width == sna->front->drawable.width && - draw->height == sna->front->drawable.height && -+ draw->bitsPerPixel == bpp && - (sna->flags & (SNA_LINEAR_FB | SNA_NO_WAIT | SNA_NO_FLIP)) == 0) - flags |= CREATE_SCANOUT; - } -@@ -521,7 +688,6 @@ sna_dri2_create_buffer(DrawablePtr draw, - case DRI2BufferFrontRight: - case DRI2BufferFakeFrontLeft: - case DRI2BufferFakeFrontRight: -- bpp = draw->bitsPerPixel; - DBG(("%s: creating back buffer %dx%d, suitable for scanout? %d\n", - __FUNCTION__, - draw->width, draw->height, -@@ -530,7 +696,7 @@ sna_dri2_create_buffer(DrawablePtr draw, - bo = kgem_create_2d(&sna->kgem, - draw->width, - draw->height, -- draw->bitsPerPixel, -+ bpp, - color_tiling(sna, draw), - flags); - break; -@@ -558,7 +724,6 @@ sna_dri2_create_buffer(DrawablePtr draw, - * not understand W tiling and the GTT is incapable of - * W fencing. - */ -- bpp = format ? format : draw->bitsPerPixel; - bpp *= 2; - bo = kgem_create_2d(&sna->kgem, - ALIGN(draw->width, 64), -@@ -570,7 +735,6 @@ sna_dri2_create_buffer(DrawablePtr draw, - case DRI2BufferDepthStencil: - case DRI2BufferHiz: - case DRI2BufferAccum: -- bpp = format ? format : draw->bitsPerPixel, - bo = kgem_create_2d(&sna->kgem, - draw->width, draw->height, bpp, - other_tiling(sna, draw), -@@ -614,7 +778,7 @@ sna_dri2_create_buffer(DrawablePtr draw, - pixmap->refcnt++; - - priv = sna_pixmap(pixmap); -- assert(priv->flush == false); -+ assert(priv->flush == false || priv->pinned & PIN_DRI3); - assert((priv->pinned & PIN_DRI2) == 0); - - /* Don't allow this named buffer to be replaced */ -@@ -630,17 +794,17 @@ sna_dri2_create_buffer(DrawablePtr draw, - if (priv->gpu_bo->exec) - sna->kgem.flush = 1; - -- priv->flush |= 1; -+ priv->flush |= FLUSH_READ; - if (draw->type == DRAWABLE_PIXMAP) { - /* DRI2 renders directly into GLXPixmaps, treat as hostile */ - kgem_bo_unclean(&sna->kgem, priv->gpu_bo); - sna_damage_all(&priv->gpu_damage, pixmap); - priv->clear = false; - priv->cpu = false; -- priv->flush |= 2; -+ priv->flush |= FLUSH_WRITE; - } - -- sna_accel_watch_flush(sna, 1); -+ sna_watch_flush(sna, 1); - } - - return buffer; -@@ -651,16 +815,80 @@ err: - return NULL; - } - --static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) -+static void -+sna_dri2_cache_bo(struct sna *sna, -+ DrawablePtr draw, -+ struct kgem_bo *bo, -+ uint32_t name, -+ uint32_t size, -+ uint32_t flags) -+{ -+ struct dri_bo *c; -+ -+ DBG(("%s(handle=%d, name=%d)\n", __FUNCTION__, bo->handle, name)); -+ -+ if (draw == NULL) { -+ DBG(("%s: no draw, releasing handle=%d\n", -+ __FUNCTION__, bo->handle)); -+ goto err; -+ } -+ -+ if (draw->type == DRAWABLE_PIXMAP) { -+ DBG(("%s: not a window, releasing handle=%d\n", -+ __FUNCTION__, bo->handle)); -+ goto err; -+ } -+ -+ if (bo->refcnt > 1 + bo->active_scanout) { -+ DBG(("%s: multiple references [%d], releasing handle\n", -+ __FUNCTION__, bo->refcnt, bo->handle)); -+ goto err; -+ } -+ -+ if ((draw->height << 16 | draw->width) != size) { -+ DBG(("%s: wrong size [%dx%d], releasing handle\n", -+ __FUNCTION__, -+ size & 0xffff, size >> 16, -+ bo->handle)); -+ goto err; -+ } -+ -+ if (bo->scanout && front_pitch(draw) != bo->pitch) { -+ DBG(("%s: scanout with pitch change [%d != %d], releasing handle\n", -+ __FUNCTION__, bo->pitch, front_pitch(draw), bo->handle)); -+ goto err; -+ } -+ -+ c = malloc(sizeof(*c)); -+ if (!c) -+ goto err; -+ -+ DBG(("%s: caching handle=%d (name=%d, flags=%d, active_scanout=%d)\n", __FUNCTION__, bo->handle, name, flags, bo->active_scanout)); -+ -+ c->bo = bo; -+ c->name = name; -+ c->flags = flags; -+ list_add(&c->link, &dri2_window((WindowPtr)draw)->cache); -+ return; -+ -+err: -+ kgem_bo_destroy(&sna->kgem, bo); -+} -+ -+static void _sna_dri2_destroy_buffer(struct sna *sna, -+ DrawablePtr draw, -+ DRI2Buffer2Ptr buffer) - { - struct sna_dri2_private *private = get_private(buffer); - - if (buffer == NULL) - return; - -- DBG(("%s: %p [handle=%d] -- refcnt=%d, pixmap=%ld\n", -+ DBG(("%s: %p [handle=%d] -- refcnt=%d, draw=%ld, pixmap=%ld, proxy?=%d\n", - __FUNCTION__, buffer, private->bo->handle, private->refcnt, -- private->pixmap ? private->pixmap->drawable.serialNumber : 0)); -+ draw ? draw->id : 0, -+ private->pixmap ? private->pixmap->drawable.serialNumber : 0, -+ private->proxy != NULL)); - assert(private->refcnt > 0); - if (--private->refcnt) - return; -@@ -669,7 +897,10 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) - - if (private->proxy) { - DBG(("%s: destroying proxy\n", __FUNCTION__)); -- _sna_dri2_destroy_buffer(sna, private->proxy); -+ assert(private->bo->active_scanout > 0); -+ private->bo->active_scanout--; -+ -+ _sna_dri2_destroy_buffer(sna, draw, private->proxy); - private->pixmap = NULL; - } - -@@ -683,6 +914,11 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) - assert(priv->pinned & PIN_DRI2); - assert(priv->flush); - -+ DBG(("%s: removing active_scanout=%d from pixmap handle=%d\n", -+ __FUNCTION__, priv->gpu_bo->active_scanout, priv->gpu_bo->handle)); -+ assert(priv->gpu_bo->active_scanout > 0); -+ priv->gpu_bo->active_scanout--; -+ - /* Undo the DRI markings on this pixmap */ - DBG(("%s: releasing last DRI pixmap=%ld, scanout?=%d\n", - __FUNCTION__, -@@ -692,28 +928,34 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) - list_del(&priv->flush_list); - - DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, private->bo->handle)); -- priv->gpu_bo->flush = false; - priv->pinned &= ~PIN_DRI2; - -- priv->flush = false; -- sna_accel_watch_flush(sna, -1); -+ if ((priv->pinned & PIN_DRI3) == 0) { -+ priv->gpu_bo->flush = false; -+ priv->flush = false; -+ } -+ sna_watch_flush(sna, -1); - - sna_pixmap_set_buffer(pixmap, NULL); - pixmap->drawable.pScreen->DestroyPixmap(pixmap); - } -- assert(private->bo->flush == false); - -- kgem_bo_destroy(&sna->kgem, private->bo); -+ sna_dri2_cache_bo(sna, draw, -+ private->bo, -+ buffer->name, -+ private->size, -+ buffer->flags); - free(buffer); - } - - static void sna_dri2_destroy_buffer(DrawablePtr draw, DRI2Buffer2Ptr buffer) - { -- _sna_dri2_destroy_buffer(to_sna_from_drawable(draw), buffer); -+ _sna_dri2_destroy_buffer(to_sna_from_drawable(draw), draw, buffer); - } - - static DRI2BufferPtr sna_dri2_reference_buffer(DRI2BufferPtr buffer) - { -+ assert(get_private(buffer)->refcnt > 0); - get_private(buffer)->refcnt++; - return buffer; - } -@@ -746,10 +988,9 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) - { - struct sna *sna = to_sna_from_pixmap(pixmap); - struct sna_pixmap *priv = sna_pixmap(pixmap); -- RegionRec region; - -- DBG(("%s: pixmap=%ld, handle=%d\n", -- __FUNCTION__, pixmap->drawable.serialNumber, bo->handle)); -+ DBG(("%s: pixmap=%ld, handle=%d (old handle=%d)\n", -+ __FUNCTION__, pixmap->drawable.serialNumber, bo->handle, priv->gpu_bo->handle)); - - assert(pixmap->drawable.width * pixmap->drawable.bitsPerPixel <= 8*bo->pitch); - assert(pixmap->drawable.height * bo->pitch <= kgem_bo_size(bo)); -@@ -758,21 +999,34 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) - assert((priv->pinned & (PIN_PRIME | PIN_DRI3)) == 0); - assert(priv->flush); - -- /* Post damage on the new front buffer so that listeners, such -- * as DisplayLink know take a copy and shove it over the USB, -- * also for software cursors and the like. -- */ -- region.extents.x1 = region.extents.y1 = 0; -- region.extents.x2 = pixmap->drawable.width; -- region.extents.y2 = pixmap->drawable.height; -- region.data = NULL; -- DamageRegionAppend(&pixmap->drawable, ®ion); -+ if (APPLY_DAMAGE) { -+ RegionRec region; -+ -+ /* Post damage on the new front buffer so that listeners, such -+ * as DisplayLink know take a copy and shove it over the USB, -+ * also for software cursors and the like. -+ */ -+ region.extents.x1 = region.extents.y1 = 0; -+ region.extents.x2 = pixmap->drawable.width; -+ region.extents.y2 = pixmap->drawable.height; -+ region.data = NULL; -+ -+ /* -+ * Eeek, beware the sw cursor copying to the old bo -+ * causing recursion and mayhem. -+ */ -+ DBG(("%s: marking whole pixmap as damaged\n", __FUNCTION__)); -+ sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; -+ DamageRegionAppend(&pixmap->drawable, ®ion); -+ } - - damage(pixmap, priv, NULL); - - assert(bo->refcnt); -- if (priv->move_to_gpu) -+ if (priv->move_to_gpu) { -+ DBG(("%s: applying final/discard move-to-gpu\n", __FUNCTION__)); - priv->move_to_gpu(sna, priv, 0); -+ } - if (priv->gpu_bo != bo) { - DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, priv->gpu_bo->handle)); - priv->gpu_bo->flush = false; -@@ -792,8 +1046,27 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) - bo->domain = DOMAIN_NONE; - assert(bo->flush); - -- DamageRegionProcessPending(&pixmap->drawable); -+ if (APPLY_DAMAGE) { -+ sna->ignore_copy_area = false; -+ DamageRegionProcessPending(&pixmap->drawable); -+ } -+} -+ -+#if defined(__GNUC__) -+#define popcount(x) __builtin_popcount(x) -+#else -+static int popcount(unsigned int x) -+{ -+ int count = 0; -+ -+ while (x) { -+ count += x&1; -+ x >>= 1; -+ } -+ -+ return count; - } -+#endif - - static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kgem_bo *src, bool sync) - { -@@ -823,6 +1096,12 @@ static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kg - return; - } - -+ if (sna->render_state.gt < 2 && sna->kgem.has_semaphores) { -+ DBG(("%s: small GT [%d], not forcing selection\n", -+ __FUNCTION__, sna->render_state.gt)); -+ return; -+ } -+ - VG_CLEAR(busy); - busy.handle = src->handle; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy)) -@@ -860,9 +1139,11 @@ static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kg - * the cost of the query. - */ - mode = KGEM_RENDER; -- if (busy.busy & (0xfffe << 16)) -+ if ((busy.busy & 0xffff) == I915_EXEC_BLT) - mode = KGEM_BLT; -- kgem_bo_mark_busy(&sna->kgem, busy.handle == src->handle ? src : dst, mode); -+ kgem_bo_mark_busy(&sna->kgem, -+ busy.handle == src->handle ? src : dst, -+ mode); - _kgem_set_mode(&sna->kgem, mode); - } - -@@ -871,10 +1152,13 @@ static bool is_front(int attachment) - return attachment == DRI2BufferFrontLeft; - } - -+#define DRI2_SYNC 0x1 -+#define DRI2_DAMAGE 0x2 -+#define DRI2_BO 0x4 - static struct kgem_bo * - __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - DRI2BufferPtr src, DRI2BufferPtr dst, -- bool sync) -+ unsigned flags) - { - PixmapPtr pixmap = get_drawable_pixmap(draw); - DrawableRec scratch, *src_draw = &pixmap->drawable, *dst_draw = &pixmap->drawable; -@@ -886,7 +1170,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - struct kgem_bo *dst_bo; - const BoxRec *boxes; - int16_t dx, dy, sx, sy; -- unsigned flags; -+ unsigned hint; - int n; - - /* To hide a stale DRI2Buffer, one may choose to substitute -@@ -962,8 +1246,9 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - } - } - } else -- sync = false; -+ flags &= ~DRI2_SYNC; - -+ scratch.pScreen = draw->pScreen; - scratch.x = scratch.y = 0; - scratch.width = scratch.height = 0; - scratch.depth = draw->depth; -@@ -971,6 +1256,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - - src_bo = src_priv->bo; - assert(src_bo->refcnt); -+ kgem_bo_unclean(&sna->kgem, src_bo); - if (is_front(src->attachment)) { - struct sna_pixmap *priv; - -@@ -987,11 +1273,12 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - scratch.height = src_priv->size >> 16; - src_draw = &scratch; - -- DBG(("%s: source size %dx%d, region size %dx%d\n", -+ DBG(("%s: source size %dx%d, region size %dx%d, src offset %dx%d\n", - __FUNCTION__, - scratch.width, scratch.height, - clip.extents.x2 - clip.extents.x1, -- clip.extents.y2 - clip.extents.y1)); -+ clip.extents.y2 - clip.extents.y1, -+ -sx, -sy)); - - source.extents.x1 = -sx; - source.extents.y1 = -sy; -@@ -1002,6 +1289,10 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - assert(region == NULL || region == &clip); - pixman_region_intersect(&clip, &clip, &source); - -+ if (!pixman_region_not_empty(&clip)) { -+ DBG(("%s: region doesn't overlap pixmap\n", __FUNCTION__)); -+ return NULL; -+ } - } - - dst_bo = dst_priv->bo; -@@ -1013,12 +1304,12 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - /* Preserve the CRTC shadow overrides */ - sna_shadow_steal_crtcs(sna, &shadow); - -- flags = MOVE_WRITE | __MOVE_FORCE; -+ hint = MOVE_WRITE | __MOVE_FORCE; - if (clip.data) -- flags |= MOVE_READ; -+ hint |= MOVE_READ; - - assert(region == NULL || region == &clip); -- priv = sna_pixmap_move_area_to_gpu(pixmap, &clip.extents, flags); -+ priv = sna_pixmap_move_area_to_gpu(pixmap, &clip.extents, hint); - if (priv) { - damage(pixmap, priv, region); - dst_bo = priv->gpu_bo; -@@ -1050,20 +1341,20 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - assert(region == NULL || region == &clip); - pixman_region_intersect(&clip, &clip, &target); - -- sync = false; -+ flags &= ~DRI2_SYNC; - } - - if (!wedged(sna)) { - xf86CrtcPtr crtc; - - crtc = NULL; -- if (sync && sna_pixmap_is_scanout(sna, pixmap)) -+ if (flags & DRI2_SYNC && sna_pixmap_is_scanout(sna, pixmap)) - crtc = sna_covering_crtc(sna, &clip.extents, NULL); - sna_dri2_select_mode(sna, dst_bo, src_bo, crtc != NULL); - -- sync = (crtc != NULL&& -- sna_wait_for_scanline(sna, pixmap, crtc, -- &clip.extents)); -+ if (crtc == NULL || -+ !sna_wait_for_scanline(sna, pixmap, crtc, &clip.extents)) -+ flags &= ~DRI2_SYNC; - } - - if (region) { -@@ -1075,8 +1366,11 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - boxes = &clip.extents; - n = 1; - } -- DamageRegionAppend(&pixmap->drawable, region); -- -+ if (APPLY_DAMAGE || flags & DRI2_DAMAGE) { -+ DBG(("%s: marking region as damaged\n", __FUNCTION__)); -+ sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; -+ DamageRegionAppend(&pixmap->drawable, region); -+ } - - DBG(("%s: copying [(%d, %d), (%d, %d)]x%d src=(%d, %d), dst=(%d, %d)\n", - __FUNCTION__, -@@ -1084,29 +1378,36 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, - boxes[0].x2, boxes[0].y2, - n, sx, sy, dx, dy)); - -- flags = COPY_LAST; -- if (sync) -- flags |= COPY_SYNC; -+ hint = COPY_LAST | COPY_DRI; -+ if (flags & DRI2_SYNC) -+ hint |= COPY_SYNC; - if (!sna->render.copy_boxes(sna, GXcopy, - src_draw, src_bo, sx, sy, - dst_draw, dst_bo, dx, dy, -- boxes, n, flags)) -+ boxes, n, hint)) - memcpy_copy_boxes(sna, GXcopy, - src_draw, src_bo, sx, sy, - dst_draw, dst_bo, dx, dy, -- boxes, n, flags); -- -- DBG(("%s: flushing? %d\n", __FUNCTION__, sync)); -- if (sync) { /* STAT! */ -- struct kgem_request *rq = sna->kgem.next_request; -- kgem_submit(&sna->kgem); -- if (rq->bo) { -- bo = ref(rq->bo); -- DBG(("%s: recording sync fence handle=%d\n", __FUNCTION__, bo->handle)); -+ boxes, n, hint); -+ -+ sna->needs_dri_flush = true; -+ if (flags & (DRI2_SYNC | DRI2_BO)) { /* STAT! */ -+ struct kgem_request *rq = RQ(dst_bo->rq); -+ if (rq && rq != (void *)&sna->kgem) { -+ if (rq->bo == NULL) -+ kgem_submit(&sna->kgem); -+ if (rq->bo) { /* Becareful in case the gpu is wedged */ -+ bo = ref(rq->bo); -+ DBG(("%s: recording sync fence handle=%d\n", -+ __FUNCTION__, bo->handle)); -+ } - } - } - -- DamageRegionProcessPending(&pixmap->drawable); -+ if (APPLY_DAMAGE || flags & DRI2_DAMAGE) { -+ sna->ignore_copy_area = false; -+ DamageRegionProcessPending(&pixmap->drawable); -+ } - - if (clip.data) - pixman_region_fini(&clip); -@@ -1142,6 +1443,8 @@ sna_dri2_copy_region(DrawablePtr draw, - assert(get_private(src)->refcnt); - assert(get_private(dst)->refcnt); - -+ assert(get_private(src)->bo != get_private(dst)->bo); -+ - assert(get_private(src)->bo->refcnt); - assert(get_private(dst)->bo->refcnt); - -@@ -1151,7 +1454,7 @@ sna_dri2_copy_region(DrawablePtr draw, - region->extents.x2, region->extents.y2, - region_num_rects(region))); - -- __sna_dri2_copy_region(sna, draw, region, src, dst, false); -+ __sna_dri2_copy_region(sna, draw, region, src, dst, DRI2_DAMAGE); - } - - inline static uint32_t pipe_select(int pipe) -@@ -1161,6 +1464,7 @@ inline static uint32_t pipe_select(int pipe) - * we can safely ignore the capability check - if we have more - * than two pipes, we can assume that they are fully supported. - */ -+ assert(pipe < _DRM_VBLANK_HIGH_CRTC_MASK); - if (pipe > 1) - return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; - else if (pipe > 0) -@@ -1169,15 +1473,53 @@ inline static uint32_t pipe_select(int pipe) - return 0; - } - --static inline int sna_wait_vblank(struct sna *sna, union drm_wait_vblank *vbl, int pipe) -+static inline bool sna_next_vblank(struct sna_dri2_event *info) - { -- DBG(("%s(pipe=%d, waiting until seq=%u%s)\n", -- __FUNCTION__, pipe, vbl->request.sequence, -- vbl->request.type & DRM_VBLANK_RELATIVE ? " [relative]" : "")); -- assert(pipe != -1); -+ union drm_wait_vblank vbl; - -- vbl->request.type |= pipe_select(pipe); -- return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); -+ DBG(("%s(pipe=%d, waiting until next vblank)\n", -+ __FUNCTION__, info->pipe)); -+ assert(info->pipe != -1); -+ -+ VG_CLEAR(vbl); -+ vbl.request.type = -+ DRM_VBLANK_RELATIVE | -+ DRM_VBLANK_EVENT | -+ pipe_select(info->pipe); -+ vbl.request.sequence = 1; -+ vbl.request.signal = (uintptr_t)info; -+ -+ assert(!info->queued); -+ if (drmIoctl(info->sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, &vbl)) -+ return false; -+ -+ info->queued = true; -+ return true; -+} -+ -+static inline bool sna_wait_vblank(struct sna_dri2_event *info, -+ unsigned seq) -+{ -+ union drm_wait_vblank vbl; -+ -+ DBG(("%s(pipe=%d, waiting until vblank %u)\n", -+ __FUNCTION__, info->pipe, seq)); -+ assert(info->pipe != -1); -+ -+ VG_CLEAR(vbl); -+ vbl.request.type = -+ DRM_VBLANK_ABSOLUTE | -+ DRM_VBLANK_EVENT | -+ pipe_select(info->pipe); -+ vbl.request.sequence = seq; -+ vbl.request.signal = (uintptr_t)info; -+ -+ assert(!info->queued); -+ if (drmIoctl(info->sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, &vbl)) -+ return false; -+ -+ info->queued = true; -+ return true; - } - - #if DRI2INFOREC_VERSION >= 4 -@@ -1195,6 +1537,7 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) - { - struct dri2_window *priv; - -+ assert(draw); - if (draw->type != DRAWABLE_WINDOW) - return msc; - -@@ -1206,6 +1549,9 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) - priv->crtc = crtc; - priv->msc_delta = 0; - priv->chain = NULL; -+ priv->scanout = -1; -+ priv->cache_size = 0; -+ list_init(&priv->cache); - dri2_window_attach((WindowPtr)draw, priv); - } - } else { -@@ -1214,8 +1560,8 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) - const struct ust_msc *this = sna_crtc_last_swap(crtc); - DBG(("%s: Window transferring from pipe=%d [msc=%llu] to pipe=%d [msc=%llu], delta now %lld\n", - __FUNCTION__, -- sna_crtc_to_pipe(priv->crtc), (long long)last->msc, -- sna_crtc_to_pipe(crtc), (long long)this->msc, -+ sna_crtc_pipe(priv->crtc), (long long)last->msc, -+ sna_crtc_pipe(crtc), (long long)this->msc, - (long long)(priv->msc_delta + this->msc - last->msc))); - priv->msc_delta += this->msc - last->msc; - priv->crtc = crtc; -@@ -1248,57 +1594,119 @@ sna_dri2_get_crtc(DrawablePtr draw) - NULL); - } - --static void --sna_dri2_remove_event(WindowPtr win, struct sna_dri2_event *info) -+static void frame_swap_complete(struct sna_dri2_event *frame, int type) - { -- struct dri2_window *priv; -- struct sna_dri2_event *chain; -- -- assert(win->drawable.type == DRAWABLE_WINDOW); -- DBG(("%s: remove[%p] from window %ld, active? %d\n", -- __FUNCTION__, info, (long)win->drawable.id, info->draw != NULL)); -+ const struct ust_msc *swap; - -- priv = dri2_window(win); -- assert(priv); -- assert(priv->chain != NULL); -+ assert(frame->signal); -+ frame->signal = false; - -- if (priv->chain == info) { -- priv->chain = info->chain; -+ if (frame->client == NULL) { -+ DBG(("%s: client already gone\n", __FUNCTION__)); - return; - } - -- chain = priv->chain; -- while (chain->chain != info) -- chain = chain->chain; -- assert(chain != info); -- assert(info->chain != chain); -- chain->chain = info->chain; -+ assert(frame->draw); -+ -+ swap = sna_crtc_last_swap(frame->crtc); -+ DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc=%lld], tv=%d.%06d\n", -+ __FUNCTION__, type, (long)frame->draw->id, frame->pipe, -+ (long long)swap->msc, -+ (long long)draw_current_msc(frame->draw, frame->crtc, swap->msc), -+ swap->tv_sec, swap->tv_usec)); -+ -+ DRI2SwapComplete(frame->client, frame->draw, -+ draw_current_msc(frame->draw, frame->crtc, swap->msc), -+ swap->tv_sec, swap->tv_usec, -+ type, frame->event_complete, frame->event_data); - } - --static void --sna_dri2_event_free(struct sna_dri2_event *info) -+static void fake_swap_complete(struct sna *sna, ClientPtr client, -+ DrawablePtr draw, xf86CrtcPtr crtc, -+ int type, DRI2SwapEventPtr func, void *data) - { -- DrawablePtr draw = info->draw; -+ const struct ust_msc *swap; - -- DBG(("%s(draw?=%d)\n", __FUNCTION__, draw != NULL)); -- if (draw && draw->type == DRAWABLE_WINDOW) -- sna_dri2_remove_event((WindowPtr)draw, info); -+ assert(draw); - -- _sna_dri2_destroy_buffer(info->sna, info->front); -- _sna_dri2_destroy_buffer(info->sna, info->back); -+ if (crtc == NULL) -+ crtc = sna_primary_crtc(sna); - -- while (!list_is_empty(&info->cache)) { -- struct dri_bo *c; -+ swap = sna_crtc_last_swap(crtc); -+ DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", -+ __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_pipe(crtc) : -1, -+ (long long)swap->msc, -+ (long long)draw_current_msc(draw, crtc, swap->msc), -+ swap->tv_sec, swap->tv_usec)); - -- c = list_first_entry(&info->cache, struct dri_bo, link); -- list_del(&c->link); -+ DRI2SwapComplete(client, draw, -+ draw_current_msc(draw, crtc, swap->msc), -+ swap->tv_sec, swap->tv_usec, -+ type, func, data); -+} - -- DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); -- if (c->bo) -- kgem_bo_destroy(&info->sna->kgem, c->bo); -+static void -+sna_dri2_remove_event(struct sna_dri2_event *info) -+{ -+ WindowPtr win = (WindowPtr)info->draw; -+ struct dri2_window *priv; - -- free(c); -+ assert(win->drawable.type == DRAWABLE_WINDOW); -+ DBG(("%s: remove[%p] from window %ld, active? %d\n", -+ __FUNCTION__, info, (long)win->drawable.id, info->draw != NULL)); -+ assert(!info->signal); -+ -+ priv = dri2_window(win); -+ assert(priv); -+ assert(priv->chain != NULL); -+ assert(info->chained); -+ info->chained = false; -+ -+ if (priv->chain != info) { -+ struct sna_dri2_event *chain = priv->chain; -+ while (chain->chain != info) { -+ assert(chain->chained); -+ chain = chain->chain; -+ } -+ assert(chain != info); -+ assert(info->chain != chain); -+ chain->chain = info->chain; -+ return; -+ } -+ -+ priv->chain = info->chain; -+ if (priv->chain == NULL) { -+ struct dri_bo *c, *tmp; -+ -+ c = list_entry(priv->cache.next->next, struct dri_bo, link); -+ list_for_each_entry_safe_from(c, tmp, &priv->cache, link) { -+ list_del(&c->link); -+ -+ DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); -+ assert(c->bo); -+ kgem_bo_destroy(&info->sna->kgem, c->bo); -+ free(c); -+ } - } -+} -+ -+static void -+sna_dri2_event_free(struct sna_dri2_event *info) -+{ -+ DBG(("%s(draw?=%d)\n", __FUNCTION__, info->draw != NULL)); -+ assert(!info->queued); -+ assert(!info->signal); -+ assert(info->pending.bo == NULL); -+ -+ if (info->sna->dri2.flip_pending == info) -+ info->sna->dri2.flip_pending = NULL; -+ assert(info->sna->dri2.flip_pending != info); -+ if (info->chained) -+ sna_dri2_remove_event(info); -+ -+ assert((info->front == NULL && info->back == NULL) || info->front != info->back); -+ _sna_dri2_destroy_buffer(info->sna, info->draw, info->front); -+ _sna_dri2_destroy_buffer(info->sna, info->draw, info->back); - - if (info->bo) { - DBG(("%s: releasing batch handle=%d\n", __FUNCTION__, info->bo->handle)); -@@ -1331,15 +1739,26 @@ sna_dri2_client_gone(CallbackListPtr *list, void *closure, void *data) - - event = list_first_entry(&priv->events, struct sna_dri2_event, link); - assert(event->client == client); -+ list_del(&event->link); -+ event->signal = false; - -- if (event->queued) { -- if (event->draw) -- sna_dri2_remove_event((WindowPtr)event->draw, -- event); -- event->client = NULL; -- event->draw = NULL; -- list_del(&event->link); -- } else -+ if (event->pending.bo) { -+ assert(event->pending.bo->active_scanout > 0); -+ event->pending.bo->active_scanout--; -+ -+ kgem_bo_destroy(&sna->kgem, event->pending.bo); -+ event->pending.bo = NULL; -+ } -+ -+ if (event->chained) -+ sna_dri2_remove_event(event); -+ -+ event->client = NULL; -+ event->draw = NULL; -+ event->keepalive = 1; -+ assert(!event->signal); -+ -+ if (!event->queued) - sna_dri2_event_free(event); - } - -@@ -1365,11 +1784,15 @@ static bool add_event_to_client(struct sna_dri2_event *info, struct sna *sna, Cl - } - - static struct sna_dri2_event * --sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) -+sna_dri2_add_event(struct sna *sna, -+ DrawablePtr draw, -+ ClientPtr client, -+ xf86CrtcPtr crtc) - { - struct dri2_window *priv; - struct sna_dri2_event *info, *chain; - -+ assert(draw != NULL); - assert(draw->type == DRAWABLE_WINDOW); - DBG(("%s: adding event to window %ld)\n", - __FUNCTION__, (long)draw->id)); -@@ -1382,11 +1805,11 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) - if (info == NULL) - return NULL; - -- list_init(&info->cache); - info->sna = sna; - info->draw = draw; -- info->crtc = priv->crtc; -- info->pipe = sna_crtc_to_pipe(priv->crtc); -+ info->crtc = crtc; -+ info->pipe = sna_crtc_pipe(crtc); -+ info->keepalive = 1; - - if (!add_event_to_client(info, sna, client)) { - free(info); -@@ -1394,6 +1817,7 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) - } - - assert(priv->chain != info); -+ info->chained = true; - - if (priv->chain == NULL) { - priv->chain = info; -@@ -1409,6 +1833,66 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) - return info; - } - -+static void decouple_window(WindowPtr win, -+ struct dri2_window *priv, -+ struct sna *sna, -+ bool signal) -+{ -+ if (priv->front) { -+ DBG(("%s: decouple private front\n", __FUNCTION__)); -+ assert(priv->crtc); -+ sna_shadow_unset_crtc(sna, priv->crtc); -+ -+ _sna_dri2_destroy_buffer(sna, NULL, priv->front); -+ priv->front = NULL; -+ } -+ -+ if (priv->chain) { -+ struct sna_dri2_event *info, *chain; -+ -+ DBG(("%s: freeing chain\n", __FUNCTION__)); -+ -+ chain = priv->chain; -+ while ((info = chain)) { -+ DBG(("%s: freeing event, pending signal? %d, pending swap? handle=%d\n", -+ __FUNCTION__, info->signal, -+ info->pending.bo ? info->pending.bo->handle : 0)); -+ assert(info->draw == &win->drawable); -+ -+ if (info->pending.bo) { -+ if (signal) { -+ bool was_signalling = info->signal; -+ info->signal = true; -+ frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); -+ info->signal = was_signalling; -+ } -+ assert(info->pending.bo->active_scanout > 0); -+ info->pending.bo->active_scanout--; -+ -+ kgem_bo_destroy(&sna->kgem, info->pending.bo); -+ info->pending.bo = NULL; -+ } -+ -+ if (info->signal && signal) -+ frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); -+ info->signal = false; -+ info->draw = NULL; -+ info->keepalive = 1; -+ assert(!info->signal); -+ list_del(&info->link); -+ -+ chain = info->chain; -+ info->chain = NULL; -+ info->chained = false; -+ -+ if (!info->queued) -+ sna_dri2_event_free(info); -+ } -+ -+ priv->chain = NULL; -+ } -+} -+ - void sna_dri2_decouple_window(WindowPtr win) - { - struct dri2_window *priv; -@@ -1418,50 +1902,34 @@ void sna_dri2_decouple_window(WindowPtr win) - return; - - DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); -+ decouple_window(win, priv, to_sna_from_drawable(&win->drawable), true); - -- if (priv->front) { -- struct sna *sna = to_sna_from_drawable(&win->drawable); -- assert(priv->crtc); -- sna_shadow_unset_crtc(sna, priv->crtc); -- _sna_dri2_destroy_buffer(sna, priv->front); -- priv->front = NULL; -- } -+ priv->scanout = -1; - } - - void sna_dri2_destroy_window(WindowPtr win) - { - struct dri2_window *priv; -+ struct sna *sna; - - priv = dri2_window(win); - if (priv == NULL) - return; - - DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); -+ sna = to_sna_from_drawable(&win->drawable); -+ decouple_window(win, priv, sna, false); - -- if (priv->front) { -- struct sna *sna = to_sna_from_drawable(&win->drawable); -- assert(priv->crtc); -- sna_shadow_unset_crtc(sna, priv->crtc); -- _sna_dri2_destroy_buffer(sna, priv->front); -- } -- -- if (priv->chain) { -- struct sna_dri2_event *info, *chain; -- -- DBG(("%s: freeing chain\n", __FUNCTION__)); -- -- chain = priv->chain; -- while ((info = chain)) { -- info->draw = NULL; -- info->client = NULL; -- list_del(&info->link); -+ while (!list_is_empty(&priv->cache)) { -+ struct dri_bo *c; - -- chain = info->chain; -- info->chain = NULL; -+ c = list_first_entry(&priv->cache, struct dri_bo, link); -+ list_del(&c->link); - -- if (!info->queued) -- sna_dri2_event_free(info); -- } -+ DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); -+ assert(c->bo); -+ kgem_bo_destroy(&sna->kgem, c->bo); -+ free(c); - } - - free(priv); -@@ -1479,19 +1947,30 @@ sna_dri2_flip(struct sna_dri2_event *info) - { - struct kgem_bo *bo = get_private(info->back)->bo; - struct kgem_bo *tmp_bo; -- uint32_t tmp_name; -+ uint32_t tmp_name, tmp_flags; - int tmp_pitch; - - DBG(("%s(type=%d)\n", __FUNCTION__, info->type)); - - assert(sna_pixmap_get_buffer(info->sna->front) == info->front); - assert(get_drawable_pixmap(info->draw)->drawable.height * bo->pitch <= kgem_bo_size(bo)); -+ assert(get_private(info->front)->size == get_private(info->back)->size); - assert(bo->refcnt); - -+ if (info->sna->mode.flip_active) { -+ DBG(("%s: %d flips still active, aborting\n", -+ __FUNCTION__, info->sna->mode.flip_active)); -+ return false; -+ } -+ -+ assert(!info->queued); - if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, - info->type == FLIP_ASYNC ? NULL : info)) - return false; - -+ DBG(("%s: queued flip=%p\n", __FUNCTION__, info->type == FLIP_ASYNC ? NULL : info)); -+ assert(info->signal || info->type != FLIP_THROTTLE); -+ - assert(info->sna->dri2.flip_pending == NULL || - info->sna->dri2.flip_pending == info); - if (info->type != FLIP_ASYNC) -@@ -1505,13 +1984,21 @@ sna_dri2_flip(struct sna_dri2_event *info) - tmp_bo = get_private(info->front)->bo; - tmp_name = info->front->name; - tmp_pitch = info->front->pitch; -+ tmp_flags = info->front->flags; -+ -+ assert(tmp_bo->active_scanout > 0); -+ tmp_bo->active_scanout--; - - set_bo(info->sna->front, bo); - -+ info->front->flags = info->back->flags; - info->front->name = info->back->name; - info->front->pitch = info->back->pitch; - get_private(info->front)->bo = bo; -+ bo->active_scanout++; -+ assert(bo->active_scanout <= bo->refcnt); - -+ info->back->flags = tmp_flags; - info->back->name = tmp_name; - info->back->pitch = tmp_pitch; - get_private(info->back)->bo = tmp_bo; -@@ -1521,6 +2008,7 @@ sna_dri2_flip(struct sna_dri2_event *info) - assert(get_private(info->back)->bo->refcnt); - assert(get_private(info->front)->bo != get_private(info->back)->bo); - -+ info->keepalive = KEEPALIVE; - info->queued = true; - return true; - } -@@ -1549,15 +2037,16 @@ can_flip(struct sna * sna, - } - - assert(sna->scrn->vtSema); -+ assert(!sna->mode.hidden); - - if ((sna->flags & (SNA_HAS_FLIP | SNA_HAS_ASYNC_FLIP)) == 0) { - DBG(("%s: no, pageflips disabled\n", __FUNCTION__)); - return false; - } - -- if (front->format != back->format) { -+ if (front->cpp != back->cpp) { - DBG(("%s: no, format mismatch, front = %d, back = %d\n", -- __FUNCTION__, front->format, back->format)); -+ __FUNCTION__, front->cpp, back->cpp)); - return false; - } - -@@ -1567,7 +2056,7 @@ can_flip(struct sna * sna, - } - - if (!sna_crtc_is_on(crtc)) { -- DBG(("%s: ref-pipe=%d is disabled\n", __FUNCTION__, sna_crtc_to_pipe(crtc))); -+ DBG(("%s: ref-pipe=%d is disabled\n", __FUNCTION__, sna_crtc_pipe(crtc))); - return false; - } - -@@ -1581,7 +2070,7 @@ can_flip(struct sna * sna, - if (sna_pixmap_get_buffer(pixmap) != front) { - DBG(("%s: no, DRI2 drawable is no longer attached (old name=%d, new name=%d) to pixmap=%ld\n", - __FUNCTION__, front->name, -- sna_pixmap_get_buffer(pixmap) ? ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name : 0, -+ sna_pixmap_get_buffer(pixmap) ? sna_pixmap_get_buffer(pixmap)->name : 0, - pixmap->drawable.serialNumber)); - return false; - } -@@ -1661,7 +2150,6 @@ can_flip(struct sna * sna, - } - - DBG(("%s: yes, pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber)); -- assert(dri2_window(win)->front == NULL); - return true; - } - -@@ -1680,9 +2168,9 @@ can_xchg(struct sna *sna, - if (draw->type == DRAWABLE_PIXMAP) - return false; - -- if (front->format != back->format) { -+ if (front->cpp != back->cpp) { - DBG(("%s: no, format mismatch, front = %d, back = %d\n", -- __FUNCTION__, front->format, back->format)); -+ __FUNCTION__, front->cpp, back->cpp)); - return false; - } - -@@ -1714,6 +2202,8 @@ can_xchg(struct sna *sna, - return false; - } - -+ DBG(("%s: back size=%x, front size=%x\n", -+ __FUNCTION__, get_private(back)->size, get_private(front)->size)); - if (get_private(back)->size != get_private(front)->size) { - DBG(("%s: no, back buffer %dx%d does not match front buffer %dx%d\n", - __FUNCTION__, -@@ -1766,9 +2256,9 @@ overlaps_other_crtc(struct sna *sna, xf86CrtcPtr desired) - static bool - can_xchg_crtc(struct sna *sna, - DrawablePtr draw, -+ xf86CrtcPtr crtc, - DRI2BufferPtr front, -- DRI2BufferPtr back, -- xf86CrtcPtr crtc) -+ DRI2BufferPtr back) - { - WindowPtr win = (WindowPtr)draw; - PixmapPtr pixmap; -@@ -1785,9 +2275,9 @@ can_xchg_crtc(struct sna *sna, - if (draw->type == DRAWABLE_PIXMAP) - return false; - -- if (front->format != back->format) { -+ if (front->cpp != back->cpp) { - DBG(("%s: no, format mismatch, front = %d, back = %d\n", -- __FUNCTION__, front->format, back->format)); -+ __FUNCTION__, front->cpp, back->cpp)); - return false; - } - -@@ -1866,20 +2356,21 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) - - back_bo = get_private(back)->bo; - front_bo = get_private(front)->bo; -- assert(front_bo != back_bo); - -- DBG(("%s: win=%ld, exchange front=%d/%d and back=%d/%d, pixmap=%ld %dx%d\n", -+ DBG(("%s: win=%ld, exchange front=%d/%d,ref=%d and back=%d/%d,ref=%d, pixmap=%ld %dx%d\n", - __FUNCTION__, win->drawable.id, -- front_bo->handle, front->name, -- back_bo->handle, back->name, -+ front_bo->handle, front->name, get_private(front)->refcnt, -+ back_bo->handle, back->name, get_private(back)->refcnt, - pixmap->drawable.serialNumber, - pixmap->drawable.width, - pixmap->drawable.height)); - -- DBG(("%s: back_bo pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", -- __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt, back_bo->active_scanout)); -- DBG(("%s: front_bo pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", -- __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt, front_bo->active_scanout)); -+ DBG(("%s: back_bo handle=%d, pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", -+ __FUNCTION__, back_bo->handle, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt, back_bo->active_scanout)); -+ DBG(("%s: front_bo handle=%d, pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", -+ __FUNCTION__, front_bo->handle, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt, front_bo->active_scanout)); -+ -+ assert(front_bo != back_bo); - assert(front_bo->refcnt); - assert(back_bo->refcnt); - -@@ -1894,6 +2385,11 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) - get_private(back)->bo = front_bo; - mark_stale(back); - -+ assert(front_bo->active_scanout > 0); -+ front_bo->active_scanout--; -+ back_bo->active_scanout++; -+ assert(back_bo->active_scanout <= back_bo->refcnt); -+ - tmp = front->name; - front->name = back->name; - back->name = tmp; -@@ -1902,17 +2398,23 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) - front->pitch = back->pitch; - back->pitch = tmp; - -+ tmp = front->flags; -+ front->flags = back->flags; -+ back->flags = tmp; -+ - assert(front_bo->refcnt); - assert(back_bo->refcnt); - -+ assert(front_bo->pitch == get_private(front)->bo->pitch); -+ assert(back_bo->pitch == get_private(back)->bo->pitch); -+ - assert(get_private(front)->bo == sna_pixmap(pixmap)->gpu_bo); - } - - static void sna_dri2_xchg_crtc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr crtc, DRI2BufferPtr front, DRI2BufferPtr back) - { - WindowPtr win = (WindowPtr)draw; -- DRI2Buffer2Ptr tmp; -- struct kgem_bo *bo; -+ struct dri2_window *priv = dri2_window(win); - - DBG(("%s: exchange front=%d/%d and back=%d/%d, win id=%lu, pixmap=%ld %dx%d\n", - __FUNCTION__, -@@ -1922,162 +2424,130 @@ static void sna_dri2_xchg_crtc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr cr - get_window_pixmap(win)->drawable.serialNumber, - get_window_pixmap(win)->drawable.width, - get_window_pixmap(win)->drawable.height)); -+ assert(can_xchg_crtc(sna, draw, crtc, front, back)); - -- DamageRegionAppend(&win->drawable, &win->clipList); -+ if (APPLY_DAMAGE) { -+ DBG(("%s: marking drawable as damaged\n", __FUNCTION__)); -+ sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; -+ DamageRegionAppend(&win->drawable, &win->clipList); -+ } - sna_shadow_set_crtc(sna, crtc, get_private(back)->bo); -- DamageRegionProcessPending(&win->drawable); -+ if (APPLY_DAMAGE) { -+ sna->ignore_copy_area = false; -+ DamageRegionProcessPending(&win->drawable); -+ } - -- assert(dri2_window(win)->front == NULL); -+ if (priv->front == NULL) { -+ DRI2Buffer2Ptr tmp; - -- tmp = calloc(1, sizeof(*tmp) + sizeof(struct sna_dri2_private)); -- if (tmp == NULL) { -- back->attachment = -1; -- if (get_private(back)->proxy == NULL) { -- get_private(back)->pixmap = get_window_pixmap(win); -- get_private(back)->proxy = sna_dri2_reference_buffer(sna_pixmap_get_buffer(get_private(back)->pixmap)); -+ tmp = calloc(1, sizeof(*tmp) + sizeof(struct sna_dri2_private)); -+ if (tmp == NULL) { -+ sna_shadow_unset_crtc(sna, crtc); -+ return; - } -- dri2_window(win)->front = sna_dri2_reference_buffer(back); -- return; -- } - -- *tmp = *back; -- tmp->attachment = DRI2BufferFrontLeft; -- tmp->driverPrivate = tmp + 1; -- get_private(tmp)->refcnt = 1; -- get_private(tmp)->bo = get_private(back)->bo; -- get_private(tmp)->size = get_private(back)->size; -- get_private(tmp)->pixmap = get_window_pixmap(win); -- get_private(tmp)->proxy = sna_dri2_reference_buffer(sna_pixmap_get_buffer(get_private(tmp)->pixmap)); -- dri2_window(win)->front = tmp; -- -- DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); -- back->name = 0; -- bo = kgem_create_2d(&sna->kgem, -- draw->width, draw->height, draw->bitsPerPixel, -- get_private(back)->bo->tiling, -- CREATE_SCANOUT); -- if (bo != NULL) { -- get_private(back)->bo = bo; -- back->pitch = bo->pitch; -- back->name = kgem_bo_flink(&sna->kgem, bo); -- } -- if (back->name == 0) { -- if (bo != NULL) -- kgem_bo_destroy(&sna->kgem, bo); -- get_private(back)->bo = NULL; -- back->attachment = -1; -+ tmp->attachment = DRI2BufferFrontLeft; -+ tmp->driverPrivate = tmp + 1; -+ tmp->cpp = back->cpp; -+ tmp->format = back->format; -+ -+ get_private(tmp)->refcnt = 1; -+ get_private(tmp)->bo = kgem_create_2d(&sna->kgem, -+ draw->width, draw->height, draw->bitsPerPixel, -+ get_private(back)->bo->tiling, -+ CREATE_SCANOUT | CREATE_EXACT); -+ if (get_private(tmp)->bo != NULL) { -+ tmp->pitch = get_private(tmp)->bo->pitch; -+ tmp->name = kgem_bo_flink(&sna->kgem, get_private(tmp)->bo); -+ } -+ if (tmp->name == 0) { -+ if (get_private(tmp)->bo != NULL) -+ kgem_bo_destroy(&sna->kgem, get_private(tmp)->bo); -+ sna_shadow_unset_crtc(sna, crtc); -+ return; -+ } -+ get_private(tmp)->size = get_private(back)->size; -+ get_private(tmp)->pixmap = get_private(front)->pixmap; -+ get_private(tmp)->proxy = sna_dri2_reference_buffer(front); -+ get_private(tmp)->bo->active_scanout++; -+ -+ priv->front = front = tmp; - } --} -+ assert(front == priv->front); - --static void frame_swap_complete(struct sna_dri2_event *frame, int type) --{ -- const struct ust_msc *swap; -+ { -+ struct kgem_bo *front_bo = get_private(front)->bo; -+ struct kgem_bo *back_bo = get_private(back)->bo; -+ unsigned tmp; - -- if (frame->draw == NULL) -- return; -+ assert(front_bo->refcnt); -+ assert(back_bo->refcnt); - -- assert(frame->client); -+ get_private(back)->bo = front_bo; -+ get_private(front)->bo = back_bo; -+ mark_stale(back); - -- swap = sna_crtc_last_swap(frame->crtc); -- DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc=%lld], tv=%d.%06d\n", -- __FUNCTION__, type, (long)frame->draw, frame->pipe, -- (long long)swap->msc, -- (long long)draw_current_msc(frame->draw, frame->crtc, swap->msc), -- swap->tv_sec, swap->tv_usec)); -+ assert(front_bo->active_scanout > 0); -+ front_bo->active_scanout--; -+ back_bo->active_scanout++; -+ assert(back_bo->active_scanout <= back_bo->refcnt); - -- DRI2SwapComplete(frame->client, frame->draw, -- draw_current_msc(frame->draw, frame->crtc, swap->msc), -- swap->tv_sec, swap->tv_usec, -- type, frame->event_complete, frame->event_data); --} -+ tmp = front->name; -+ front->name = back->name; -+ back->name = tmp; - --static void fake_swap_complete(struct sna *sna, ClientPtr client, -- DrawablePtr draw, xf86CrtcPtr crtc, -- int type, DRI2SwapEventPtr func, void *data) --{ -- const struct ust_msc *swap; -- -- swap = sna_crtc_last_swap(crtc); -- DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", -- __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_to_pipe(crtc) : -1, -- (long long)swap->msc, -- (long long)draw_current_msc(draw, crtc, swap->msc), -- swap->tv_sec, swap->tv_usec)); -+ tmp = front->pitch; -+ front->pitch = back->pitch; -+ back->pitch = tmp; - -- DRI2SwapComplete(client, draw, -- draw_current_msc(draw, crtc, swap->msc), -- swap->tv_sec, swap->tv_usec, -- type, func, data); -+ tmp = front->flags; -+ front->flags = back->flags; -+ back->flags = tmp; -+ } - } - - static void chain_swap(struct sna_dri2_event *chain) - { -- union drm_wait_vblank vbl; -+ DBG(("%s: draw=%ld, queued?=%d, type=%d\n", -+ __FUNCTION__, (long)chain->draw->id, chain->queued, chain->type)); -+ -+ if (chain->queued) /* too early! */ -+ return; - - if (chain->draw == NULL) { - sna_dri2_event_free(chain); - return; - } - -- if (chain->queued) /* too early! */ -- return; -- - assert(chain == dri2_chain(chain->draw)); -- DBG(("%s: chaining draw=%ld, type=%d\n", -- __FUNCTION__, (long)chain->draw->id, chain->type)); -- chain->queued = true; -+ assert(chain->signal); - - switch (chain->type) { -- case SWAP_THROTTLE: -+ case SWAP_COMPLETE: - DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); -- if (chain->sna->mode.shadow && -- !chain->sna->mode.shadow_damage) { -- /* recursed from wait_for_shadow(), simply requeue */ -- DBG(("%s -- recursed from wait_for_shadow(), requeuing\n", __FUNCTION__)); -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)chain; -- -- if (!sna_wait_vblank(chain->sna, &vbl, chain->pipe)) -- return; -- -- DBG(("%s -- requeue failed, errno=%d\n", __FUNCTION__, errno)); -- } -- - if (can_xchg(chain->sna, chain->draw, chain->front, chain->back)) { - sna_dri2_xchg(chain->draw, chain->front, chain->back); -- } else if (can_xchg_crtc(chain->sna, chain->draw, chain->front, chain->back, chain->crtc)) { -- sna_dri2_xchg_crtc(chain->sna, chain->draw, chain->crtc, chain->front, chain->back); -+ } else if (can_xchg_crtc(chain->sna, chain->draw, chain->crtc, -+ chain->front, chain->back)) { -+ sna_dri2_xchg_crtc(chain->sna, chain->draw, chain->crtc, -+ chain->front, chain->back); - } else { -- assert(chain->queued); -- chain->bo = __sna_dri2_copy_region(chain->sna, chain->draw, NULL, -- chain->back, chain->front, -- true); -+ __sna_dri2_copy_event(chain, chain->sync | DRI2_BO); - } -+ assert(get_private(chain->back)->bo != get_private(chain->front)->bo); - case SWAP: - break; - default: - return; - } - -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)chain; -- if (sna_wait_vblank(chain->sna, &vbl, chain->pipe)) { -+ if ((chain->type == SWAP_COMPLETE && -+ !swap_limit(chain->draw, 2 + !chain->sync) && -+ !chain->sync) || -+ !sna_next_vblank(chain)) { - DBG(("%s: vblank wait failed, unblocking client\n", __FUNCTION__)); - frame_swap_complete(chain, DRI2_BLIT_COMPLETE); - sna_dri2_event_free(chain); -- } else { -- if (chain->type == SWAP_THROTTLE && !swap_limit(chain->draw, 2)) { -- DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); -- frame_swap_complete(chain, DRI2_BLIT_COMPLETE); -- } - } - } - -@@ -2086,40 +2556,27 @@ static inline bool rq_is_busy(struct kgem *kgem, struct kgem_bo *bo) - if (bo == NULL) - return false; - -- DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, -- bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); -- assert(bo->refcnt); -- -- if (bo->exec) -- return true; -- -- if (bo->rq == NULL) -- return false; -- -- return __kgem_busy(kgem, bo->handle); -+ return __kgem_bo_is_busy(kgem, bo); - } - --static bool sna_dri2_blit_complete(struct sna *sna, -- struct sna_dri2_event *info) -+static bool sna_dri2_blit_complete(struct sna_dri2_event *info) - { -- if (rq_is_busy(&sna->kgem, info->bo)) { -- union drm_wait_vblank vbl; -+ if (!info->bo) -+ return true; - -+ if (__kgem_bo_is_busy(&info->sna->kgem, info->bo)) { - DBG(("%s: vsync'ed blit is still busy, postponing\n", - __FUNCTION__)); -- -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)info; -- assert(info->queued); -- if (!sna_wait_vblank(sna, &vbl, info->pipe)) -+ if (sna_next_vblank(info)) - return false; -+ -+ kgem_bo_sync__gtt(&info->sna->kgem, info->bo); - } - - DBG(("%s: blit finished\n", __FUNCTION__)); -+ kgem_bo_destroy(&info->sna->kgem, info->bo); -+ info->bo = NULL; -+ - return true; - } - -@@ -2128,11 +2585,12 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) - struct sna_dri2_event *info = (void *)(uintptr_t)event->user_data; - struct sna *sna = info->sna; - DrawablePtr draw; -- union drm_wait_vblank vbl; - uint64_t msc; - -- DBG(("%s(type=%d, sequence=%d)\n", __FUNCTION__, info->type, event->sequence)); -+ DBG(("%s(type=%d, sequence=%d, draw=%ld)\n", __FUNCTION__, info->type, event->sequence, info->draw ? info->draw->serialNumber : 0)); - assert(info->queued); -+ info->queued = false; -+ - msc = sna_crtc_record_event(info->crtc, event); - - draw = info->draw; -@@ -2141,68 +2599,120 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) - goto done; - } - -+ assert((info->front == NULL && info->back == NULL) || info->front != info->back); - switch (info->type) { - case FLIP: - /* If we can still flip... */ -+ assert(info->signal); - if (can_flip(sna, draw, info->front, info->back, info->crtc) && - sna_dri2_flip(info)) - return; - - /* else fall through to blit */ - case SWAP: -- assert(info->queued); -- if (sna->mode.shadow && !sna->mode.shadow_damage) { -- /* recursed from wait_for_shadow(), simply requeue */ -- DBG(("%s -- recursed from wait_for_shadow(), requeuing\n", __FUNCTION__)); -- -- } else if (can_xchg(info->sna, draw, info->front, info->back)) { -+ assert(info->signal); -+ if (can_xchg(info->sna, draw, info->front, info->back)) { - sna_dri2_xchg(draw, info->front, info->back); -- info->type = SWAP_WAIT; -- } else if (can_xchg_crtc(sna, draw, info->front, info->back, info->crtc)) { -- sna_dri2_xchg_crtc(sna, draw, info->crtc, info->front, info->back); -- info->type = SWAP_WAIT; -+ info->type = SWAP_COMPLETE; -+ } else if (can_xchg_crtc(sna, draw, info->crtc, -+ info->front, info->back)) { -+ sna_dri2_xchg_crtc(sna, draw, info->crtc, -+ info->front, info->back); -+ info->type = SWAP_COMPLETE; - } else { -- assert(info->queued); -- info->bo = __sna_dri2_copy_region(sna, draw, NULL, -- info->back, info->front, true); -- info->type = SWAP_WAIT; -+ __sna_dri2_copy_event(info, DRI2_BO | DRI2_SYNC); -+ info->type = SWAP_COMPLETE; - } - -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)info; -- -- assert(info->queued); -- if (!sna_wait_vblank(sna, &vbl, info->pipe)) -+ if (sna_next_vblank(info)) - return; - - DBG(("%s -- requeue failed, errno=%d\n", __FUNCTION__, errno)); -+ assert(info->pending.bo == NULL); -+ assert(info->keepalive == 1); - /* fall through to SwapComplete */ -- case SWAP_WAIT: -- if (!sna_dri2_blit_complete(sna, info)) -- return; -- -- DBG(("%s: swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, -- event->sequence, event->tv_sec, event->tv_usec)); -- frame_swap_complete(info, DRI2_BLIT_COMPLETE); -- break; -- -- case SWAP_THROTTLE: -+ case SWAP_COMPLETE: - DBG(("%s: %d complete, frame=%d tv=%d.%06d\n", - __FUNCTION__, info->type, - event->sequence, event->tv_sec, event->tv_usec)); - -- if (xorg_can_triple_buffer()) { -- if (!sna_dri2_blit_complete(sna, info)) -+ if (info->signal) { -+ if (!sna_dri2_blit_complete(info)) - return; - - DBG(("%s: triple buffer swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, - event->sequence, event->tv_sec, event->tv_usec)); - frame_swap_complete(info, DRI2_BLIT_COMPLETE); - } -+ -+ if (info->pending.bo) { -+ struct copy current_back; -+ -+ DBG(("%s: swapping back handle=%d [name=%d, active=%d] for pending handle=%d [name=%d, active=%d], front handle=%d [name=%d, active=%d]\n", -+ __FUNCTION__, -+ get_private(info->back)->bo->handle, info->back->name, get_private(info->back)->bo->active_scanout, -+ info->pending.bo->handle, info->pending.name, info->pending.bo->active_scanout, -+ get_private(info->front)->bo->handle, info->front->name, get_private(info->front)->bo->active_scanout)); -+ -+ assert(info->pending.bo->active_scanout > 0); -+ info->pending.bo->active_scanout--; -+ -+ current_back.bo = get_private(info->back)->bo; -+ current_back.size = get_private(info->back)->size; -+ current_back.name = info->back->name; -+ current_back.flags = info->back->flags; -+ -+ get_private(info->back)->bo = info->pending.bo; -+ get_private(info->back)->size = info->pending.size; -+ info->back->name = info->pending.name; -+ info->back->pitch = info->pending.bo->pitch; -+ info->back->flags = info->pending.flags; -+ info->pending.bo = NULL; -+ -+ assert(get_private(info->back)->bo != get_private(info->front)->bo); -+ -+ if (can_xchg(info->sna, info->draw, info->front, info->back)) -+ sna_dri2_xchg(info->draw, info->front, info->back); -+ else if (can_xchg_crtc(info->sna, info->draw, info->crtc, -+ info->front, info->back)) -+ sna_dri2_xchg_crtc(info->sna, info->draw, info->crtc, -+ info->front, info->back); -+ else -+ __sna_dri2_copy_event(info, info->sync | DRI2_BO); -+ -+ sna_dri2_cache_bo(info->sna, info->draw, -+ get_private(info->back)->bo, -+ info->back->name, -+ get_private(info->back)->size, -+ info->back->flags); -+ -+ get_private(info->back)->bo = current_back.bo; -+ get_private(info->back)->size = current_back.size; -+ info->back->name = current_back.name; -+ info->back->pitch = current_back.bo->pitch; -+ info->back->flags = current_back.flags; -+ -+ DBG(("%s: restored current back handle=%d [name=%d, active=%d], active=%d], front handle=%d [name=%d, active=%d]\n", -+ __FUNCTION__, -+ get_private(info->back)->bo->handle, info->back->name, get_private(info->back)->bo->active_scanout, -+ get_private(info->front)->bo->handle, info->front->name, get_private(info->front)->bo->active_scanout)); -+ -+ assert(info->draw); -+ assert(!info->signal); -+ info->keepalive++; -+ info->signal = true; -+ } -+ -+ if (--info->keepalive) { -+ if (sna_next_vblank(info)) -+ return; -+ -+ if (info->signal) { -+ DBG(("%s: triple buffer swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, -+ event->sequence, event->tv_sec, event->tv_usec)); -+ frame_swap_complete(info, DRI2_BLIT_COMPLETE); -+ } -+ } - break; - - case WAITMSC: -@@ -2218,11 +2728,11 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) - } - - if (info->chain) { -+ DBG(("%s: continuing chain\n", __FUNCTION__)); - assert(info->chain != info); - assert(info->draw == draw); -- sna_dri2_remove_event((WindowPtr)draw, info); -+ sna_dri2_remove_event(info); - chain_swap(info->chain); -- info->draw = NULL; - } - - done: -@@ -2230,101 +2740,148 @@ done: - DBG(("%s complete\n", __FUNCTION__)); - } - --static bool -+static void - sna_dri2_immediate_blit(struct sna *sna, - struct sna_dri2_event *info, -- bool sync, bool event) -+ bool sync) - { -- DrawablePtr draw = info->draw; -- bool ret = false; -+ struct sna_dri2_event *chain = dri2_chain(info->draw); - - if (sna->flags & SNA_NO_WAIT) - sync = false; - -- DBG(("%s: emitting immediate blit, throttling client, synced? %d, chained? %d, send-event? %d\n", -- __FUNCTION__, sync, dri2_chain(draw) != info, -- event)); -+ DBG(("%s: emitting immediate blit, throttling client, synced? %d, chained? %d, pipe %d\n", -+ __FUNCTION__, sync, chain != info, info->pipe)); -+ assert(chain); - -- info->type = SWAP_THROTTLE; -- if (!sync || dri2_chain(draw) == info) { -- DBG(("%s: no pending blit, starting chain\n", -- __FUNCTION__)); -+ info->type = SWAP_COMPLETE; -+ info->sync = sync; -+ info->keepalive = KEEPALIVE; - -- info->queued = true; -- info->bo = __sna_dri2_copy_region(sna, draw, NULL, -- info->back, -- info->front, -- sync); -- if (event) { -- if (sync) { -- union drm_wait_vblank vbl; -- -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)info; -- ret = !sna_wait_vblank(sna, &vbl, info->pipe); -- if (ret) -- event = !swap_limit(draw, 2); -- } -- if (event) { -- DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); -- frame_swap_complete(info, DRI2_BLIT_COMPLETE); -- } -+ if (chain == info) { -+ DBG(("%s: no pending blit, starting chain\n", __FUNCTION__)); -+ -+ assert(info->front != info->back); -+ if (can_xchg(info->sna, info->draw, info->front, info->back)) { -+ sna_dri2_xchg(info->draw, info->front, info->back); -+ } else if (can_xchg_crtc(info->sna, info->draw, info->crtc, -+ info->front, info->back)) { -+ sna_dri2_xchg_crtc(info->sna, info->draw, info->crtc, -+ info->front, info->back); -+ } else -+ __sna_dri2_copy_event(info, sync | DRI2_BO); -+ -+ assert(info->signal); -+ -+ if ((!swap_limit(info->draw, 2 + !sync) && !sync) || -+ !sna_next_vblank(info)) { -+ DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); -+ frame_swap_complete(info, DRI2_BLIT_COMPLETE); -+ sna_dri2_event_free(info); -+ } -+ return; -+ } -+ -+ DBG(("%s: current event front=%d [name=%d, active?=%d], back=%d [name=%d, active?=%d]\n", __FUNCTION__, -+ get_private(chain->front)->bo->handle, chain->front->name, get_private(chain->front)->bo->active_scanout, -+ get_private(chain->back)->bo->handle, chain->back->name, get_private(chain->back)->bo->active_scanout)); -+ -+ if (chain->type == SWAP_COMPLETE && chain->front == info->front) { -+ assert(chain->draw == info->draw); -+ assert(chain->client == info->client); -+ assert(chain->event_complete == info->event_complete); -+ assert(chain->event_data == info->event_data); -+ assert(chain->queued); -+ -+ if ((!sync || !chain->sync) && chain->pending.bo) { -+ bool signal = chain->signal; -+ -+ DBG(("%s: swap elision, unblocking client\n", __FUNCTION__)); -+ assert(chain->draw); -+ chain->signal = true; -+ frame_swap_complete(chain, DRI2_EXCHANGE_COMPLETE); -+ chain->signal = signal; -+ -+ assert(chain->pending.bo->active_scanout > 0); -+ chain->pending.bo->active_scanout--; -+ -+ sna_dri2_cache_bo(chain->sna, chain->draw, -+ chain->pending.bo, -+ chain->pending.name, -+ chain->pending.size, -+ chain->pending.flags); -+ chain->pending.bo = NULL; -+ } -+ -+ if (chain->pending.bo == NULL && swap_limit(info->draw, 2 + !sync)) { -+ DBG(("%s: setting handle=%d as pending blit (current event front=%d, back=%d)\n", __FUNCTION__, -+ get_private(info->back)->bo->handle, -+ get_private(chain->front)->bo->handle, -+ get_private(chain->back)->bo->handle)); -+ chain->pending.bo = ref(get_private(info->back)->bo); -+ chain->pending.size = get_private(info->back)->size; -+ chain->pending.name = info->back->name; -+ chain->pending.flags = info->back->flags; -+ chain->sync = sync; -+ info->signal = false; /* transfer signal to pending */ -+ -+ /* Prevent us from handing it back on next GetBuffers */ -+ chain->pending.bo->active_scanout++; -+ -+ sna_dri2_event_free(info); -+ return; - } -- } else { -- DBG(("%s: pending blit, chained\n", __FUNCTION__)); -- ret = true; - } - -- DBG(("%s: continue? %d\n", __FUNCTION__, ret)); -- return ret; -+ DBG(("%s: pending blit, chained\n", __FUNCTION__)); - } - - static bool - sna_dri2_flip_continue(struct sna_dri2_event *info) - { -- DBG(("%s(mode=%d)\n", __FUNCTION__, info->mode)); -+ struct kgem_bo *bo = get_private(info->front)->bo; - -- if (info->mode > 0){ -- struct kgem_bo *bo = get_private(info->front)->bo; -+ DBG(("%s(mode=%d)\n", __FUNCTION__, info->flip_continue)); -+ assert(info->flip_continue > 0); -+ info->type = info->flip_continue; -+ info->flip_continue = 0; - -- info->type = info->mode; -+ assert(!info->signal); -+ info->signal = info->type == FLIP_THROTTLE && info->draw; - -- if (bo != sna_pixmap(info->sna->front)->gpu_bo) -- return false; -+ if (info->sna->mode.front_active == 0) -+ return false; - -- if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info)) -- return false; -+ if (bo != sna_pixmap(info->sna->front)->gpu_bo) -+ return false; - -- assert(info->sna->dri2.flip_pending == NULL || -- info->sna->dri2.flip_pending == info); -- info->sna->dri2.flip_pending = info; -- assert(info->queued); -- } else { -- info->type = -info->mode; -+ assert(!info->queued); -+ if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info)) -+ return false; - -- if (!info->draw) -- return false; -+ DBG(("%s: queued flip=%p\n", __FUNCTION__, info)); -+ assert(info->sna->dri2.flip_pending == NULL || -+ info->sna->dri2.flip_pending == info); -+ info->sna->dri2.flip_pending = info; -+ info->queued = true; - -- if (!can_flip(info->sna, info->draw, info->front, info->back, info->crtc)) -- return false; -+ return true; -+} - -- assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front); -- if (!sna_dri2_flip(info)) -- return false; -+static bool -+sna_dri2_flip_keepalive(struct sna_dri2_event *info) -+{ -+ DBG(("%s(keepalive?=%d)\n", __FUNCTION__, info->keepalive-1)); -+ assert(info->keepalive > 0); -+ if (!--info->keepalive) -+ return false; - -- if (!xorg_can_triple_buffer()) { -- sna_dri2_get_back(info->sna, info->draw, info->back, info); -- DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); -- frame_swap_complete(info, DRI2_FLIP_COMPLETE); -- } -- } -+ if (info->draw == NULL) -+ return false; - -- info->mode = 0; -- return true; -+ DBG(("%s: marking next flip as complete\n", __FUNCTION__)); -+ info->flip_continue = FLIP_COMPLETE; -+ return sna_dri2_flip_continue(info); - } - - static void chain_flip(struct sna *sna) -@@ -2332,8 +2889,8 @@ static void chain_flip(struct sna *sna) - struct sna_dri2_event *chain = sna->dri2.flip_pending; - - assert(chain->type == FLIP); -- DBG(("%s: chaining type=%d, cancelled?=%d\n", -- __FUNCTION__, chain->type, chain->draw == NULL)); -+ DBG(("%s: chaining type=%d, cancelled?=%d window=%ld\n", -+ __FUNCTION__, chain->type, chain->draw == NULL, chain->draw ? chain->draw->id : 0)); - - sna->dri2.flip_pending = NULL; - if (chain->draw == NULL) { -@@ -2343,31 +2900,18 @@ static void chain_flip(struct sna *sna) - - assert(chain == dri2_chain(chain->draw)); - assert(!chain->queued); -- chain->queued = true; - - if (can_flip(sna, chain->draw, chain->front, chain->back, chain->crtc) && - sna_dri2_flip(chain)) { - DBG(("%s: performing chained flip\n", __FUNCTION__)); - } else { - DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); -- chain->bo = __sna_dri2_copy_region(sna, chain->draw, NULL, -- chain->back, chain->front, -- true); -+ __sna_dri2_copy_event(chain, DRI2_SYNC); - - if (xorg_can_triple_buffer()) { -- union drm_wait_vblank vbl; -- -- VG_CLEAR(vbl); -- -- chain->type = SWAP_WAIT; -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)chain; -- -- assert(chain->queued); -- if (!sna_wait_vblank(sna, &vbl, chain->pipe)) -+ chain->type = SWAP_COMPLETE; -+ assert(chain->signal); -+ if (sna_next_vblank(chain)) - return; - } - -@@ -2381,8 +2925,10 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) - { - struct sna *sna = flip->sna; - -- DBG(("%s(pipe=%d, event=%d)\n", __FUNCTION__, flip->pipe, flip->type)); -- assert(flip->queued); -+ DBG(("%s flip=%p (pipe=%d, event=%d, queued?=%d)\n", __FUNCTION__, flip, flip->pipe, flip->type, flip->queued)); -+ if (!flip->queued) /* pageflip died whilst being queued */ -+ return; -+ flip->queued = false; - - if (sna->dri2.flip_pending == flip) - sna->dri2.flip_pending = NULL; -@@ -2390,8 +2936,10 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) - /* We assume our flips arrive in order, so we don't check the frame */ - switch (flip->type) { - case FLIP: -- DBG(("%s: swap complete, unblocking client\n", __FUNCTION__)); -- frame_swap_complete(flip, DRI2_FLIP_COMPLETE); -+ if (flip->signal) { -+ DBG(("%s: swap complete, unblocking client\n", __FUNCTION__)); -+ frame_swap_complete(flip, DRI2_FLIP_COMPLETE); -+ } - sna_dri2_event_free(flip); - - if (sna->dri2.flip_pending) -@@ -2399,27 +2947,35 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) - break; - - case FLIP_THROTTLE: -- DBG(("%s: triple buffer swap complete, unblocking client\n", __FUNCTION__)); -- frame_swap_complete(flip, DRI2_FLIP_COMPLETE); -+ if (flip->signal) { -+ DBG(("%s: triple buffer swap complete, unblocking client\n", __FUNCTION__)); -+ frame_swap_complete(flip, DRI2_FLIP_COMPLETE); -+ } - case FLIP_COMPLETE: -+ assert(!flip->signal); - if (sna->dri2.flip_pending) { -+ DBG(("%s: pending flip\n", __FUNCTION__)); - sna_dri2_event_free(flip); - chain_flip(sna); -- } else if (!flip->mode) { -+ } else if (!flip->flip_continue) { - DBG(("%s: flip chain complete\n", __FUNCTION__)); -+ if (!sna_dri2_flip_keepalive(flip)) { -+ if (flip->chain) { -+ sna_dri2_remove_event(flip); -+ chain_swap(flip->chain); -+ } - -- if (flip->chain) { -- sna_dri2_remove_event((WindowPtr)flip->draw, -- flip); -- chain_swap(flip->chain); -- flip->draw = NULL; -+ sna_dri2_event_free(flip); - } -- -- sna_dri2_event_free(flip); - } else if (!sna_dri2_flip_continue(flip)) { - DBG(("%s: no longer able to flip\n", __FUNCTION__)); -- if (flip->draw == NULL || !sna_dri2_immediate_blit(sna, flip, false, flip->mode < 0)) -- sna_dri2_event_free(flip); -+ if (flip->draw != NULL) -+ __sna_dri2_copy_event(flip, 0); -+ if (flip->signal) { -+ DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); -+ frame_swap_complete(flip, DRI2_BLIT_COMPLETE); -+ } -+ sna_dri2_event_free(flip); - } - break; - -@@ -2433,17 +2989,27 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) - } - } - -+static int -+sna_query_vblank(struct sna *sna, xf86CrtcPtr crtc, union drm_wait_vblank *vbl) -+{ -+ VG_CLEAR(*vbl); -+ vbl->request.type = -+ _DRM_VBLANK_RELATIVE | pipe_select(sna_crtc_pipe(crtc)); -+ vbl->request.sequence = 0; -+ -+ return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); -+} -+ - static uint64_t - get_current_msc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr crtc) - { - union drm_wait_vblank vbl; -- uint64_t ret = -1; -+ uint64_t ret; - -- VG_CLEAR(vbl); -- vbl.request.type = _DRM_VBLANK_RELATIVE; -- vbl.request.sequence = 0; -- if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(crtc)) == 0) -+ if (sna_query_vblank(sna, crtc, &vbl) == 0) - ret = sna_crtc_record_vblank(crtc, &vbl); -+ else -+ ret = sna_crtc_last_swap(crtc)->msc; - - return draw_current_msc(draw, crtc, ret); - } -@@ -2494,12 +3060,18 @@ static int use_triple_buffer(struct sna *sna, ClientPtr client, bool async) - } - - static bool immediate_swap(struct sna *sna, -- uint64_t target_msc, -- uint64_t divisor, - DrawablePtr draw, - xf86CrtcPtr crtc, -+ uint64_t *target_msc, -+ uint64_t divisor, -+ uint64_t remainder, - uint64_t *current_msc) - { -+ /* -+ * If divisor is zero, or current_msc is smaller than target_msc -+ * we just need to make sure target_msc passes before initiating -+ * the swap. -+ */ - if (divisor == 0) { - *current_msc = -1; - -@@ -2508,72 +3080,97 @@ static bool immediate_swap(struct sna *sna, - return true; - } - -- if (target_msc) -+ if (*target_msc) - *current_msc = get_current_msc(sna, draw, crtc); - - DBG(("%s: current_msc=%ld, target_msc=%ld -- %s\n", -- __FUNCTION__, (long)*current_msc, (long)target_msc, -- (*current_msc >= target_msc - 1) ? "yes" : "no")); -- return *current_msc >= target_msc - 1; -+ __FUNCTION__, (long)*current_msc, (long)*target_msc, -+ (*current_msc >= *target_msc - 1) ? "yes" : "no")); -+ return *current_msc >= *target_msc - 1; - } - - DBG(("%s: explicit waits requests, divisor=%ld\n", - __FUNCTION__, (long)divisor)); - *current_msc = get_current_msc(sna, draw, crtc); -- return false; -+ if (*current_msc >= *target_msc) { -+ DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", -+ __FUNCTION__, -+ (long long)*current_msc, -+ (long long)*target_msc, -+ (long long)divisor, -+ (long long)remainder)); -+ -+ *target_msc = *current_msc + remainder - *current_msc % divisor; -+ if (*target_msc <= *current_msc) -+ *target_msc += divisor; -+ } -+ -+ DBG(("%s: target_msc=%lld, current_msc=%lld, immediate?=%d\n", -+ __FUNCTION__, (long long)*target_msc, (long long)*current_msc, -+ *current_msc >= *target_msc - 1)); -+ return *current_msc >= *target_msc - 1; - } - - static bool - sna_dri2_schedule_flip(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, - DRI2BufferPtr front, DRI2BufferPtr back, -- CARD64 *target_msc, CARD64 divisor, CARD64 remainder, -+ bool immediate, CARD64 *target_msc, CARD64 current_msc, - DRI2SwapEventPtr func, void *data) - { - struct sna *sna = to_sna_from_drawable(draw); - struct sna_dri2_event *info; -- uint64_t current_msc; -- -- if (immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) { -- int type; - -+ if (immediate) { -+ bool signal = false; - info = sna->dri2.flip_pending; - DBG(("%s: performing immediate swap on pipe %d, pending? %d, mode: %d, continuation? %d\n", -- __FUNCTION__, sna_crtc_to_pipe(crtc), -- info != NULL, info ? info->mode : 0, -+ __FUNCTION__, sna_crtc_pipe(crtc), -+ info != NULL, info ? info->flip_continue : 0, - info && info->draw == draw)); - - if (info && info->draw == draw) { - assert(info->type != FLIP); -- assert(info->front == front); -+ assert(info->queued); -+ assert(info->front != info->back); -+ if (info->front != front) { -+ assert(info->front != NULL); -+ _sna_dri2_destroy_buffer(sna, draw, info->front); -+ info->front = sna_dri2_reference_buffer(front); -+ } - if (info->back != back) { -- _sna_dri2_destroy_buffer(sna, info->back); -+ assert(info->back != NULL); -+ _sna_dri2_destroy_buffer(sna, draw, info->back); - info->back = sna_dri2_reference_buffer(back); - } -- if (info->mode || current_msc >= *target_msc) { -- DBG(("%s: executing xchg of pending flip\n", -- __FUNCTION__)); -- sna_dri2_xchg(draw, front, back); -- info->mode = type = FLIP_COMPLETE; -- goto new_back; -- } else { -+ assert(info->front != info->back); -+ DBG(("%s: executing xchg of pending flip: flip_continue=%d, keepalive=%d, chain?=%d\n", __FUNCTION__, info->flip_continue, info->keepalive, current_msc < *target_msc)); -+ sna_dri2_xchg(draw, front, back); -+ info->keepalive = KEEPALIVE; -+ if (xorg_can_triple_buffer() && -+ current_msc < *target_msc) { - DBG(("%s: chaining flip\n", __FUNCTION__)); -- type = FLIP_THROTTLE; -- if (xorg_can_triple_buffer()) -- info->mode = -type; -- else -- info->mode = -FLIP_COMPLETE; -+ info->flip_continue = FLIP_THROTTLE; - goto out; -+ } else { -+ info->flip_continue = FLIP_COMPLETE; -+ signal = info->signal; -+ assert(info->draw); -+ info->signal = true; -+ goto new_back; - } - } - -- info = sna_dri2_add_event(sna, draw, client); -+ info = sna_dri2_add_event(sna, draw, client, crtc); - if (info == NULL) - return false; - - assert(info->crtc == crtc); - info->event_complete = func; - info->event_data = data; -+ assert(info->draw); -+ info->signal = true; - -+ assert(front != back); - info->front = sna_dri2_reference_buffer(front); - info->back = sna_dri2_reference_buffer(back); - -@@ -2584,26 +3181,33 @@ sna_dri2_schedule_flip(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, - */ - DBG(("%s: queueing flip after pending completion\n", - __FUNCTION__)); -- info->type = type = FLIP; -+ info->type = FLIP; - sna->dri2.flip_pending = info; -- assert(info->queued); - current_msc++; -+ } else if (sna->mode.flip_active) { -+ DBG(("%s: %d outstanding flips from old client, queueing\n", -+ __FUNCTION__, sna->mode.flip_active)); -+ goto queue; - } else { -- info->type = type = use_triple_buffer(sna, client, *target_msc == 0); -+ info->type = use_triple_buffer(sna, client, *target_msc == 0); - if (!sna_dri2_flip(info)) { - DBG(("%s: flip failed, falling back\n", __FUNCTION__)); -+ info->signal = false; - sna_dri2_event_free(info); - return false; - } -+ assert(get_private(info->front)->bo->active_scanout); - } - -- swap_limit(draw, 1 + (type == FLIP_THROTTLE)); -- if (type >= FLIP_COMPLETE) { -+ swap_limit(draw, 1 + (info->type == FLIP_THROTTLE)); -+ if (info->type >= FLIP_COMPLETE) { - new_back: - if (!xorg_can_triple_buffer()) -- sna_dri2_get_back(sna, draw, back, info); -+ sna_dri2_get_back(sna, draw, back); - DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); - frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); -+ assert(info->draw); -+ info->signal = signal; - if (info->type == FLIP_ASYNC) - sna_dri2_event_free(info); - } -@@ -2613,57 +3217,34 @@ out: - return true; - } - -- info = sna_dri2_add_event(sna, draw, client); -+queue: -+ if (KEEPALIVE > 1 && sna->dri2.flip_pending) { -+ info = sna->dri2.flip_pending; -+ info->keepalive = 1; -+ } -+ -+ info = sna_dri2_add_event(sna, draw, client, crtc); - if (info == NULL) - return false; - - assert(info->crtc == crtc); - info->event_complete = func; - info->event_data = data; -+ assert(info->draw); -+ info->signal = true; - info->type = FLIP; - -+ assert(front != back); - info->front = sna_dri2_reference_buffer(front); - info->back = sna_dri2_reference_buffer(back); - -- /* -- * If divisor is zero, or current_msc is smaller than target_msc -- * we just need to make sure target_msc passes before initiating -- * the swap. -- */ -- if (divisor && current_msc >= *target_msc) { -- DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", -- __FUNCTION__, -- (long long)current_msc, -- (long long)*target_msc, -- (long long)divisor, -- (long long)remainder)); -- -- *target_msc = current_msc + remainder - current_msc % divisor; -- if (*target_msc <= current_msc) -- *target_msc += divisor; -- } -- -- if (*target_msc <= current_msc + 1) { -- if (!sna_dri2_flip(info)) { -- sna_dri2_event_free(info); -- return false; -- } -+ if (*target_msc <= current_msc + 1 && sna_dri2_flip(info)) { - *target_msc = current_msc + 1; - } else { -- union drm_wait_vblank vbl; -- -- VG_CLEAR(vbl); -- -- vbl.request.type = -- DRM_VBLANK_ABSOLUTE | -- DRM_VBLANK_EVENT; -- - /* Account for 1 frame extra pageflip delay */ -- vbl.reply.sequence = draw_target_seq(draw, *target_msc - 1); -- vbl.request.signal = (uintptr_t)info; -- -- info->queued = true; -- if (sna_wait_vblank(sna, &vbl, info->pipe)) { -+ if (!sna_wait_vblank(info, -+ draw_target_seq(draw, *target_msc - 1))) { -+ info->signal = false; - sna_dri2_event_free(info); - return false; - } -@@ -2674,128 +3255,6 @@ out: - return true; - } - --static bool --sna_dri2_schedule_xchg(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, -- DRI2BufferPtr front, DRI2BufferPtr back, -- CARD64 *target_msc, CARD64 divisor, CARD64 remainder, -- DRI2SwapEventPtr func, void *data) --{ -- struct sna *sna = to_sna_from_drawable(draw); -- uint64_t current_msc; -- bool sync, event; -- -- if (!immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) -- return false; -- -- sync = current_msc < *target_msc; -- event = dri2_chain(draw) == NULL; -- if (!sync || event) { -- DBG(("%s: performing immediate xchg on pipe %d\n", -- __FUNCTION__, sna_crtc_to_pipe(crtc))); -- sna_dri2_xchg(draw, front, back); -- } -- if (sync) { -- struct sna_dri2_event *info; -- -- info = sna_dri2_add_event(sna, draw, client); -- if (!info) -- goto complete; -- -- info->event_complete = func; -- info->event_data = data; -- -- info->front = sna_dri2_reference_buffer(front); -- info->back = sna_dri2_reference_buffer(back); -- info->type = SWAP_THROTTLE; -- -- if (event) { -- union drm_wait_vblank vbl; -- -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)info; -- -- info->queued = true; -- if (sna_wait_vblank(sna, &vbl, info->pipe)) { -- sna_dri2_event_free(info); -- goto complete; -- } -- -- swap_limit(draw, 2); -- } -- } else { --complete: -- fake_swap_complete(sna, client, draw, crtc, DRI2_EXCHANGE_COMPLETE, func, data); -- } -- -- *target_msc = current_msc + 1; -- return true; --} -- --static bool --sna_dri2_schedule_xchg_crtc(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, -- DRI2BufferPtr front, DRI2BufferPtr back, -- CARD64 *target_msc, CARD64 divisor, CARD64 remainder, -- DRI2SwapEventPtr func, void *data) --{ -- struct sna *sna = to_sna_from_drawable(draw); -- uint64_t current_msc; -- bool sync, event; -- -- if (!immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) -- return false; -- -- sync = current_msc < *target_msc; -- event = dri2_chain(draw) == NULL; -- if (!sync || event) { -- DBG(("%s: performing immediate xchg only on pipe %d\n", -- __FUNCTION__, sna_crtc_to_pipe(crtc))); -- sna_dri2_xchg_crtc(sna, draw, crtc, front, back); -- } -- if (sync) { -- struct sna_dri2_event *info; -- -- info = sna_dri2_add_event(sna, draw, client); -- if (!info) -- goto complete; -- -- info->event_complete = func; -- info->event_data = data; -- -- info->front = sna_dri2_reference_buffer(front); -- info->back = sna_dri2_reference_buffer(back); -- info->type = SWAP_THROTTLE; -- -- if (event) { -- union drm_wait_vblank vbl; -- -- VG_CLEAR(vbl); -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -- vbl.request.signal = (uintptr_t)info; -- -- info->queued = true; -- if (sna_wait_vblank(sna, &vbl, info->pipe)) { -- sna_dri2_event_free(info); -- goto complete; -- } -- -- swap_limit(draw, 2); -- } -- } else { --complete: -- fake_swap_complete(sna, client, draw, crtc, DRI2_EXCHANGE_COMPLETE, func, data); -- } -- -- *target_msc = current_msc + 1; -- return true; --} -- - static bool has_pending_events(struct sna *sna) - { - struct pollfd pfd; -@@ -2830,11 +3289,11 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - CARD64 remainder, DRI2SwapEventPtr func, void *data) - { - struct sna *sna = to_sna_from_drawable(draw); -- union drm_wait_vblank vbl; - xf86CrtcPtr crtc = NULL; - struct sna_dri2_event *info = NULL; - int type = DRI2_EXCHANGE_COMPLETE; - CARD64 current_msc; -+ bool immediate; - - DBG(("%s: draw=%lu %dx%d, pixmap=%ld %dx%d, back=%u (refs=%d/%d, flush=%d) , front=%u (refs=%d/%d, flush=%d)\n", - __FUNCTION__, -@@ -2860,6 +3319,7 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - assert(get_private(front)->refcnt); - assert(get_private(back)->refcnt); - -+ assert(get_private(back)->bo != get_private(front)->bo); - assert(get_private(front)->bo->refcnt); - assert(get_private(back)->bo->refcnt); - -@@ -2876,17 +3336,17 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - goto skip; - } - -- assert(sna_pixmap_from_drawable(draw)->flush); -- - if (draw->type != DRAWABLE_PIXMAP) { - WindowPtr win = (WindowPtr)draw; - struct dri2_window *priv = dri2_window(win); -+ - if (priv->front) { -- assert(front == priv->front); -- assert(get_private(priv->front)->refcnt > 1); -- get_private(priv->front)->refcnt--; -- priv->front = NULL; -+ front = priv->front; -+ assert(front->attachment == DRI2BufferFrontLeft); -+ assert(get_private(front)->refcnt); -+ assert(get_private(front)->pixmap == get_drawable_pixmap(draw)); - } -+ - if (win->clipList.extents.x2 <= win->clipList.extents.x1 || - win->clipList.extents.y2 <= win->clipList.extents.y1) { - DBG(("%s: window clipped (%d, %d), (%d, %d)\n", -@@ -2899,6 +3359,10 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - } - } - -+ DBG(("%s: using front handle=%d, active_scanout?=%d, flush?=%d\n", __FUNCTION__, get_private(front)->bo->handle, get_private(front)->bo->active_scanout, sna_pixmap_from_drawable(draw)->flush)); -+ assert(get_private(front)->bo->active_scanout); -+ assert(sna_pixmap_from_drawable(draw)->flush); -+ - /* Drawable not displayed... just complete the swap */ - if ((sna->flags & SNA_NO_WAIT) == 0) - crtc = sna_dri2_get_crtc(draw); -@@ -2914,109 +3378,112 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - sna_mode_wakeup(sna); - } - -- if (can_xchg(sna, draw, front, back) && -- sna_dri2_schedule_xchg(client, draw, crtc, front, back, -+ immediate = immediate_swap(sna, draw, crtc, - target_msc, divisor, remainder, -- func, data)) -- return TRUE; -- -- if (can_xchg_crtc(sna, draw, front, back, crtc) && -- sna_dri2_schedule_xchg_crtc(client, draw, crtc, front, back, -- target_msc, divisor, remainder, -- func, data)) -- return TRUE; -+ ¤t_msc); - - if (can_flip(sna, draw, front, back, crtc) && - sna_dri2_schedule_flip(client, draw, crtc, front, back, -- target_msc, divisor, remainder, -+ immediate, target_msc, current_msc, - func, data)) - return TRUE; - -- VG_CLEAR(vbl); -- -- info = sna_dri2_add_event(sna, draw, client); -+ info = sna_dri2_add_event(sna, draw, client, crtc); - if (!info) - goto blit; - - assert(info->crtc == crtc); - info->event_complete = func; - info->event_data = data; -+ assert(info->draw); -+ info->signal = true; - -+ assert(front != back); - info->front = sna_dri2_reference_buffer(front); - info->back = sna_dri2_reference_buffer(back); - -- if (immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) { -+ if (immediate) { - bool sync = current_msc < *target_msc; -- if (!sna_dri2_immediate_blit(sna, info, sync, true)) -- sna_dri2_event_free(info); -+ sna_dri2_immediate_blit(sna, info, sync); - *target_msc = current_msc + sync; -+ DBG(("%s: reported target_msc=%llu\n", -+ __FUNCTION__, *target_msc)); - return TRUE; - } - -- vbl.request.type = -- DRM_VBLANK_ABSOLUTE | -- DRM_VBLANK_EVENT; -- vbl.request.signal = (uintptr_t)info; -- -- /* -- * If divisor is zero, or current_msc is smaller than target_msc -- * we just need to make sure target_msc passes before initiating -- * the swap. -- */ - info->type = SWAP; -- info->queued = true; -- if (divisor && current_msc >= *target_msc) { -- DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", -- __FUNCTION__, -- (long long)current_msc, -- (long long)*target_msc, -- (long long)divisor, -- (long long)remainder)); -- -- *target_msc = current_msc + remainder - current_msc % divisor; -- if (*target_msc <= current_msc) -- *target_msc += divisor; -- } -- vbl.request.sequence = draw_target_seq(draw, *target_msc - 1); - if (*target_msc <= current_msc + 1) { - DBG(("%s: performing blit before queueing\n", __FUNCTION__)); -- assert(info->queued); -- info->bo = __sna_dri2_copy_region(sna, draw, NULL, -- back, front, -- true); -- info->type = SWAP_WAIT; -- -- vbl.request.type = -- DRM_VBLANK_RELATIVE | -- DRM_VBLANK_EVENT; -- vbl.request.sequence = 1; -+ __sna_dri2_copy_event(info, DRI2_SYNC); -+ info->type = SWAP_COMPLETE; -+ if (!sna_next_vblank(info)) -+ goto fake; -+ -+ DBG(("%s: reported target_msc=%llu\n", -+ __FUNCTION__, *target_msc)); - *target_msc = current_msc + 1; -- } -+ swap_limit(draw, 2); -+ } else { -+ if (!sna_wait_vblank(info, -+ draw_target_seq(draw, *target_msc - 1))) -+ goto blit; - -- assert(info->queued); -- if (sna_wait_vblank(sna, &vbl, info->pipe)) -- goto blit; -+ DBG(("%s: reported target_msc=%llu (in)\n", -+ __FUNCTION__, *target_msc)); -+ swap_limit(draw, 1); -+ } - -- DBG(("%s: reported target_msc=%llu\n", __FUNCTION__, *target_msc)); -- swap_limit(draw, 1 + (info->type == SWAP_WAIT)); - return TRUE; - - blit: - DBG(("%s -- blit\n", __FUNCTION__)); -- if (info) -- sna_dri2_event_free(info); - if (can_xchg(sna, draw, front, back)) { - sna_dri2_xchg(draw, front, back); - } else { -- __sna_dri2_copy_region(sna, draw, NULL, back, front, false); -+ __sna_dri2_copy_region(sna, draw, NULL, back, front, 0); -+ front->flags = back->flags; - type = DRI2_BLIT_COMPLETE; - } -+ if (draw->type == DRAWABLE_PIXMAP) -+ goto fake; - skip: - DBG(("%s: unable to show frame, unblocking client\n", __FUNCTION__)); -- if (crtc == NULL) -- crtc = sna_mode_first_crtc(sna); -- fake_swap_complete(sna, client, draw, crtc, type, func, data); -- *target_msc = 0; /* offscreen, so zero out target vblank count */ -+ if (crtc == NULL && (sna->flags & SNA_NO_WAIT) == 0) -+ crtc = sna_primary_crtc(sna); -+ if (crtc && sna_crtc_is_on(crtc)) { -+ if (info == NULL) -+ info = sna_dri2_add_event(sna, draw, client, crtc); -+ if (info != dri2_chain(draw)) -+ goto fake; -+ -+ assert(info->crtc == crtc); -+ -+ info->type = SWAP_COMPLETE; -+ info->event_complete = func; -+ info->event_data = data; -+ assert(info->draw); -+ info->signal = true; -+ -+ if (info->front == NULL) -+ info->front = sna_dri2_reference_buffer(front); -+ if (info->back == NULL) -+ info->back = sna_dri2_reference_buffer(back); -+ -+ if (!sna_next_vblank(info)) -+ goto fake; -+ -+ swap_limit(draw, 1); -+ } else { -+fake: -+ /* XXX Use a Timer to throttle the client? */ -+ fake_swap_complete(sna, client, draw, crtc, type, func, data); -+ if (info) { -+ assert(info->draw); -+ info->signal = false; -+ sna_dri2_event_free(info); -+ } -+ } -+ DBG(("%s: reported target_msc=%llu (in)\n", __FUNCTION__, *target_msc)); - return TRUE; - } - -@@ -3030,27 +3497,25 @@ sna_dri2_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc) - struct sna *sna = to_sna_from_drawable(draw); - xf86CrtcPtr crtc = sna_dri2_get_crtc(draw); - const struct ust_msc *swap; -+ union drm_wait_vblank vbl; - - DBG(("%s(draw=%ld, pipe=%d)\n", __FUNCTION__, draw->id, -- crtc ? sna_crtc_to_pipe(crtc) : -1)); -+ crtc ? sna_crtc_pipe(crtc) : -1)); - -- if (crtc != NULL) { -- union drm_wait_vblank vbl; -+ /* Drawable not displayed, make up a *monotonic* value */ -+ if (crtc == NULL) -+ crtc = sna_primary_crtc(sna); -+ if (crtc == NULL) -+ return FALSE; - -- VG_CLEAR(vbl); -- vbl.request.type = _DRM_VBLANK_RELATIVE; -- vbl.request.sequence = 0; -- if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(crtc)) == 0) -- sna_crtc_record_vblank(crtc, &vbl); -- } else -- /* Drawable not displayed, make up a *monotonic* value */ -- crtc = sna_mode_first_crtc(sna); -+ if (sna_query_vblank(sna, crtc, &vbl) == 0) -+ sna_crtc_record_vblank(crtc, &vbl); - - swap = sna_crtc_last_swap(crtc); - *msc = draw_current_msc(draw, crtc, swap->msc); - *ust = ust64(swap->tv_sec, swap->tv_usec); -- DBG(("%s: msc=%llu, ust=%llu\n", __FUNCTION__, -- (long long)*msc, (long long)*ust)); -+ DBG(("%s: msc=%llu [raw=%llu], ust=%llu\n", __FUNCTION__, -+ (long long)*msc, swap->msc, (long long)*ust)); - return TRUE; - } - -@@ -3068,32 +3533,22 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc - struct sna_dri2_event *info = NULL; - xf86CrtcPtr crtc; - CARD64 current_msc; -- union drm_wait_vblank vbl; - const struct ust_msc *swap; -- int pipe; - - crtc = sna_dri2_get_crtc(draw); - DBG(("%s(pipe=%d, target_msc=%llu, divisor=%llu, rem=%llu)\n", -- __FUNCTION__, crtc ? sna_crtc_to_pipe(crtc) : -1, -+ __FUNCTION__, crtc ? sna_crtc_pipe(crtc) : -1, - (long long)target_msc, - (long long)divisor, - (long long)remainder)); - - /* Drawable not visible, return immediately */ - if (crtc == NULL) -- goto out_complete; -- -- pipe = sna_crtc_to_pipe(crtc); -- -- VG_CLEAR(vbl); -- -- /* Get current count */ -- vbl.request.type = _DRM_VBLANK_RELATIVE; -- vbl.request.sequence = 0; -- if (sna_wait_vblank(sna, &vbl, pipe)) -- goto out_complete; -+ crtc = sna_primary_crtc(sna); -+ if (crtc == NULL) -+ return FALSE; - -- current_msc = draw_current_msc(draw, crtc, sna_crtc_record_vblank(crtc, &vbl)); -+ current_msc = get_current_msc(sna, draw, crtc); - - /* If target_msc already reached or passed, set it to - * current_msc to ensure we return a reasonable value back -@@ -3104,15 +3559,13 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc - if (divisor == 0 && current_msc >= target_msc) - goto out_complete; - -- info = sna_dri2_add_event(sna, draw, client); -+ info = sna_dri2_add_event(sna, draw, client, crtc); - if (!info) - goto out_complete; - - assert(info->crtc == crtc); - info->type = WAITMSC; - -- vbl.request.signal = (uintptr_t)info; -- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; - /* - * If divisor is zero, or current_msc is smaller than target_msc, - * we just need to make sure target_msc passes before waking up the -@@ -3129,10 +3582,8 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc - if (target_msc <= current_msc) - target_msc += divisor; - } -- vbl.request.sequence = draw_target_seq(draw, target_msc); - -- info->queued = true; -- if (sna_wait_vblank(sna, &vbl, pipe)) -+ if (!sna_wait_vblank(info, draw_target_seq(draw, target_msc))) - goto out_free_info; - - DRI2BlockClient(client, draw); -@@ -3141,8 +3592,6 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc - out_free_info: - sna_dri2_event_free(info); - out_complete: -- if (crtc == NULL) -- crtc = sna_mode_first_crtc(sna); - swap = sna_crtc_last_swap(crtc); - DRI2WaitMSCComplete(client, draw, - draw_current_msc(draw, crtc, swap->msc), -@@ -3231,9 +3680,18 @@ static bool is_level(const char **str) - return false; - } - -+static const char *options_get_dri(struct sna *sna) -+{ -+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) -+ return xf86GetOptValString(sna->Options, OPTION_DRI); -+#else -+ return NULL; -+#endif -+} -+ - static const char *dri_driver_name(struct sna *sna) - { -- const char *s = xf86GetOptValString(sna->Options, OPTION_DRI); -+ const char *s = options_get_dri(sna); - - if (is_level(&s)) { - if (sna->kgem.gen < 030) -@@ -3259,7 +3717,7 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) - - if (wedged(sna)) { - xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, -- "loading DRI2 whilst the GPU is wedged.\n"); -+ "loading DRI2 whilst acceleration is disabled.\n"); - } - - if (xf86LoaderCheckSymbol("DRI2Version")) -@@ -3274,7 +3732,7 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) - memset(&info, '\0', sizeof(info)); - info.fd = sna->kgem.fd; - info.driverName = dri_driver_name(sna); -- info.deviceName = intel_get_client_name(sna->dev); -+ info.deviceName = intel_get_master_name(sna->dev); - - DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", - __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName)); -@@ -3299,11 +3757,12 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) - info.numDrivers = 2; - info.driverNames = driverNames; - driverNames[0] = info.driverName; -- driverNames[1] = info.driverName; -+ driverNames[1] = "va_gl"; - #endif - - #if DRI2INFOREC_VERSION >= 6 - if (xorg_can_triple_buffer()) { -+ DBG(("%s: enabling Xorg triple buffering\n", __FUNCTION__)); - info.version = 6; - info.SwapLimitValidate = sna_dri2_swap_limit_validate; - info.ReuseBufferNotify = sna_dri2_reuse_buffer; -@@ -3311,8 +3770,10 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) - #endif - - #if USE_ASYNC_SWAP -+ DBG(("%s: enabled async swap and buffer age\n", __FUNCTION__)); - info.version = 10; - info.scheduleSwap0 = 1; -+ info.bufferAge = 1; - #endif - - return DRI2ScreenInit(screen, &info); -diff --git a/src/sna/sna_dri3.c b/src/sna/sna_dri3.c -index f586e242..ce4970ae 100644 ---- a/src/sna/sna_dri3.c -+++ b/src/sna/sna_dri3.c -@@ -55,11 +55,14 @@ static inline void mark_dri3_pixmap(struct sna *sna, struct sna_pixmap *priv, st - if (bo->exec) - sna->kgem.flush = 1; - if (bo == priv->gpu_bo) -- priv->flush |= 3; -+ priv->flush |= FLUSH_READ | FLUSH_WRITE; - else - priv->shm = true; - -- sna_accel_watch_flush(sna, 1); -+ sna_watch_flush(sna, 1); -+ -+ kgem_bo_submit(&sna->kgem, bo); -+ kgem_bo_unclean(&sna->kgem, bo); - } - - static void sna_sync_flush(struct sna *sna, struct sna_pixmap *priv) -@@ -270,6 +273,8 @@ static PixmapPtr sna_dri3_pixmap_from_fd(ScreenPtr screen, - priv->ptr = MAKE_STATIC_PTR(pixmap->devPrivate.ptr); - } else { - assert(priv->gpu_bo == bo); -+ priv->create = kgem_can_create_2d(&sna->kgem, -+ width, height, depth); - priv->pinned |= PIN_DRI3; - } - list_add(&priv->cow_list, &sna->dri3.pixmaps); -@@ -325,6 +330,15 @@ static int sna_dri3_fd_from_pixmap(ScreenPtr screen, - return -1; - } - -+ if (bo->tiling && !sna->kgem.can_fence) { -+ if (!sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { -+ DBG(("%s: unable to discard GPU tiling (%d) for DRI3 protocol\n", -+ __FUNCTION__, bo->tiling)); -+ return -1; -+ } -+ bo = priv->gpu_bo; -+ } -+ - fd = kgem_bo_export_to_prime(&sna->kgem, bo); - if (fd == -1) { - DBG(("%s: exporting handle=%d to fd failed\n", __FUNCTION__, bo->handle)); -diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c -index 8a3599c7..1b4015de 100644 ---- a/src/sna/sna_driver.c -+++ b/src/sna/sna_driver.c -@@ -57,6 +57,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. - #include - #include - -+#if defined(HAVE_X11_EXTENSIONS_DPMSCONST_H) -+#include -+#else -+#define DPMSModeOn 0 -+#define DPMSModeOff 3 -+#endif -+ - #include - #include - #include -@@ -69,6 +76,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. - - #if HAVE_DOT_GIT - #include "git_version.h" -+#else -+#define git_version "not compiled from git" - #endif - - #ifdef TEARFREE -@@ -185,12 +194,12 @@ sna_set_fallback_mode(ScrnInfoPtr scrn) - - xf86DisableUnusedFunctions(scrn); - #ifdef RANDR_12_INTERFACE -- if (get_root_window(scrn->pScreen)) -- xf86RandR12TellChanged(scrn->pScreen); -+ if (get_root_window(xf86ScrnToScreen(scrn))) -+ xf86RandR12TellChanged(xf86ScrnToScreen(scrn)); - #endif - } - --static Bool sna_set_desired_mode(struct sna *sna) -+static void sna_set_desired_mode(struct sna *sna) - { - ScrnInfoPtr scrn = sna->scrn; - -@@ -203,7 +212,6 @@ static Bool sna_set_desired_mode(struct sna *sna) - } - - sna_mode_check(sna); -- return TRUE; - } - - /** -@@ -222,7 +230,7 @@ static Bool sna_create_screen_resources(ScreenPtr screen) - screen->width, screen->height, screen->rootDepth)); - - assert(sna->scrn == xf86ScreenToScrn(screen)); -- assert(sna->scrn->pScreen == screen); -+ assert(to_screen_from_sna(sna) == screen); - - /* free the data used during miInitScreen */ - free(screen->devPrivate); -@@ -273,33 +281,89 @@ static Bool sna_create_screen_resources(ScreenPtr screen) - if (serverGeneration == 1 && (sna->flags & SNA_IS_HOSTED) == 0) - sna_copy_fbcon(sna); - -- (void)sna_set_desired_mode(sna); -+ sna_set_desired_mode(sna); - } - - return TRUE; - } - --static Bool sna_save_screen(ScreenPtr screen, int mode) -+static void sna_dpms_set(ScrnInfoPtr scrn, int mode, int flags) - { -- ScrnInfoPtr scrn = xf86ScreenToScrn(screen); -+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); -+ struct sna *sna = to_sna(scrn); -+ bool changed = false; -+ int i; - -- DBG(("%s(mode=%d)\n", __FUNCTION__, mode)); -+ DBG(("%s(mode=%d, flags=%d), vtSema=%d => off?=%d\n", -+ __FUNCTION__, mode, flags, scrn->vtSema, mode!=DPMSModeOn)); - if (!scrn->vtSema) -- return FALSE; -+ return; - -- xf86SaveScreen(screen, mode); -- sna_crtc_config_notify(screen); -- return TRUE; -+ /* Opencoded version of xf86DPMSSet(). -+ * -+ * The principle difference is to skip calling crtc->dpms() when -+ * turning off the display. This (on recent enough kernels at -+ * least) should be equivalent in power consumption, but require -+ * less work (hence quicker and less likely to fail) when switching -+ * back on. -+ */ -+ if (mode != DPMSModeOn) { -+ if (sna->mode.hidden == 0 && !(sna->flags & SNA_NO_DPMS)) { -+ DBG(("%s: hiding %d outputs\n", -+ __FUNCTION__, config->num_output)); -+ for (i = 0; i < config->num_output; i++) { -+ xf86OutputPtr output = config->output[i]; -+ if (output->crtc != NULL) -+ output->funcs->dpms(output, mode); -+ } -+ sna->mode.hidden = sna->mode.front_active + 1; -+ sna->mode.front_active = 0; -+ changed = true; -+ } -+ } else { -+ /* Re-enable CRTC that have been forced off via other means */ -+ if (sna->mode.hidden != 0) { -+ DBG(("%s: unhiding %d crtc, %d outputs\n", -+ __FUNCTION__, config->num_crtc, config->num_output)); -+ sna->mode.front_active = sna->mode.hidden - 1; -+ sna->mode.hidden = 0; -+ for (i = 0; i < config->num_crtc; i++) { -+ xf86CrtcPtr crtc = config->crtc[i]; -+ if (crtc->enabled) -+ crtc->funcs->dpms(crtc, mode); -+ } -+ -+ for (i = 0; i < config->num_output; i++) { -+ xf86OutputPtr output = config->output[i]; -+ if (output->crtc != NULL) -+ output->funcs->dpms(output, mode); -+ } -+ changed = true; -+ } -+ } -+ -+ DBG(("%s: hiding outputs? %d, front active? %d, changed? %d\n", -+ __FUNCTION__, sna->mode.hidden, sna->mode.front_active, changed)); -+ -+ if (changed) -+ sna_crtc_config_notify(xf86ScrnToScreen(scrn)); - } - --static void sna_dpms_set(ScrnInfoPtr scrn, int mode, int flags) -+static Bool sna_save_screen(ScreenPtr screen, int mode) - { -- DBG(("%s(mode=%d, flags=%d)\n", __FUNCTION__, mode)); -- if (!scrn->vtSema) -- return; -+ ScrnInfoPtr scrn = xf86ScreenToScrn(screen); -+ -+ DBG(("%s(mode=%d [unblank=%d])\n", -+ __FUNCTION__, mode, xf86IsUnblank(mode))); - -- xf86DPMSSet(scrn, mode, flags); -- sna_crtc_config_notify(xf86ScrnToScreen(scrn)); -+ /* We have to unroll xf86SaveScreen() here as it is called -+ * by DPMSSet() nullifying our special handling crtc->dpms() -+ * in sna_dpms_set(). -+ */ -+ sna_dpms_set(scrn, -+ xf86IsUnblank(mode) ? DPMSModeOn : DPMSModeOff, -+ 0); -+ return TRUE; - } - - static void sna_selftest(void) -@@ -330,107 +394,6 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd) - #endif - } - --static int --namecmp(const char *s1, const char *s2) --{ -- char c1, c2; -- -- if (!s1 || *s1 == 0) { -- if (!s2 || *s2 == 0) -- return 0; -- else -- return 1; -- } -- -- while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') -- s1++; -- -- while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') -- s2++; -- -- c1 = isupper(*s1) ? tolower(*s1) : *s1; -- c2 = isupper(*s2) ? tolower(*s2) : *s2; -- while (c1 == c2) { -- if (c1 == '\0') -- return 0; -- -- s1++; -- while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') -- s1++; -- -- s2++; -- while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') -- s2++; -- -- c1 = isupper(*s1) ? tolower(*s1) : *s1; -- c2 = isupper(*s2) ? tolower(*s2) : *s2; -- } -- -- return c1 - c2; --} -- --static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val) --{ -- const char *str = xf86GetOptValString(sna->Options, id); -- -- if (str == NULL) -- return val; -- -- if (*str == '\0') -- return TRUE; -- -- if (namecmp(str, "1") == 0) -- return TRUE; -- if (namecmp(str, "on") == 0) -- return TRUE; -- if (namecmp(str, "true") == 0) -- return TRUE; -- if (namecmp(str, "yes") == 0) -- return TRUE; -- -- if (namecmp(str, "0") == 0) -- return FALSE; -- if (namecmp(str, "off") == 0) -- return FALSE; -- if (namecmp(str, "false") == 0) -- return FALSE; -- if (namecmp(str, "no") == 0) -- return FALSE; -- -- return val; --} -- --static unsigned sna_option_cast_to_unsigned(struct sna *sna, int id, unsigned val) --{ -- const char *str = xf86GetOptValString(sna->Options, id); -- unsigned v; -- -- if (str == NULL || *str == '\0') -- return val; -- -- if (namecmp(str, "on") == 0) -- return val; -- if (namecmp(str, "true") == 0) -- return val; -- if (namecmp(str, "yes") == 0) -- return val; -- -- if (namecmp(str, "0") == 0) -- return 0; -- if (namecmp(str, "off") == 0) -- return 0; -- if (namecmp(str, "false") == 0) -- return 0; -- if (namecmp(str, "no") == 0) -- return 0; -- -- v = atoi(str); -- if (v) -- return v; -- -- return val; --} -- - static Bool fb_supports_depth(int fd, int depth) - { - struct drm_i915_gem_create create; -@@ -470,16 +433,24 @@ static void setup_dri(struct sna *sna) - unsigned level; - - sna->dri2.available = false; -+ sna->dri2.enable = false; - sna->dri3.available = false; -+ sna->dri3.enable = false; -+ sna->dri3.override = false; - -- level = sna_option_cast_to_unsigned(sna, OPTION_DRI, ~0); -+ level = intel_option_cast_to_unsigned(sna->Options, OPTION_DRI, DEFAULT_DRI_LEVEL); - #if HAVE_DRI3 -- if (level >= 3) -- sna->dri3.available = !!xf86LoadSubModule(sna->scrn, "dri3"); -+ sna->dri3.available = !!xf86LoadSubModule(sna->scrn, "dri3"); -+ sna->dri3.override = -+ !sna->dri3.available || -+ xf86IsOptionSet(sna->Options, OPTION_DRI); -+ if (level >= 3 && sna->kgem.gen >= 040) -+ sna->dri3.enable = sna->dri3.available; - #endif - #if HAVE_DRI2 -+ sna->dri2.available = !!xf86LoadSubModule(sna->scrn, "dri2"); - if (level >= 2) -- sna->dri2.available = !!xf86LoadSubModule(sna->scrn, "dri2"); -+ sna->dri2.enable = sna->dri2.available; - #endif - } - -@@ -498,13 +469,13 @@ static bool enable_tear_free(struct sna *sna) - return ENABLE_TEAR_FREE; - } - --static void setup_tear_free(struct sna *sna) -+static bool setup_tear_free(struct sna *sna) - { - MessageType from; - Bool enable; - - if (sna->flags & SNA_LINEAR_FB) -- return; -+ return false; - - if ((sna->flags & SNA_HAS_FLIP) == 0) { - from = X_PROBED; -@@ -518,11 +489,12 @@ static void setup_tear_free(struct sna *sna) - from = X_CONFIG; - - if (enable) -- sna->flags |= SNA_TEAR_FREE; -+ sna->flags |= SNA_WANT_TEAR_FREE | SNA_TEAR_FREE; - - done: - xf86DrvMsg(sna->scrn->scrnIndex, from, "TearFree %sabled\n", - sna->flags & SNA_TEAR_FREE ? "en" : "dis"); -+ return sna->flags & SNA_TEAR_FREE; - } - - /** -@@ -612,8 +584,10 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) - } - - intel_detect_chipset(scrn, sna->dev); -- xf86DrvMsg(scrn->scrnIndex, X_PROBED, "CPU: %s\n", -- sna_cpu_features_to_string(sna->cpu_features, buf)); -+ xf86DrvMsg(scrn->scrnIndex, X_PROBED, -+ "CPU: %s; using a maximum of %d threads\n", -+ sna_cpu_features_to_string(sna->cpu_features, buf), -+ sna_use_threads(64*1024, 64*1024, 1)); - - if (!xf86SetDepthBpp(scrn, 24, 0, 0, - Support32bppFb | -@@ -651,18 +625,11 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) - kgem_init(&sna->kgem, fd, - xf86GetPciInfoForEntity(pEnt->index), - sna->info->gen); -- if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE) || -- !sna_option_cast_to_bool(sna, OPTION_ACCEL_METHOD, TRUE)) { -- xf86DrvMsg(sna->scrn->scrnIndex, X_CONFIG, -- "Disabling hardware acceleration.\n"); -- sna->kgem.wedged = true; -- } - - if (xf86ReturnOptValBool(sna->Options, OPTION_TILING_FB, FALSE)) - sna->flags |= SNA_LINEAR_FB; -- -- if (xf86ReturnOptValBool(sna->Options, OPTION_DELETE_DP12, FALSE)) -- sna->flags |= SNA_REMOVE_OUTPUTS; -+ if (!sna->kgem.can_fence) -+ sna->flags |= SNA_LINEAR_FB; - - if (!xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE)) - sna->flags |= SNA_NO_WAIT; -@@ -695,7 +662,8 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) - } - scrn->currentMode = scrn->modes; - -- setup_tear_free(sna); -+ if (!setup_tear_free(sna) && sna_mode_wants_tear_free(sna)) -+ sna->kgem.needs_dirtyfb = sna->kgem.has_dirtyfb; - - xf86SetGamma(scrn, zeros); - xf86SetDpi(scrn, 0, 0); -@@ -721,11 +689,13 @@ cleanup: - return FALSE; - } - -+#if !HAVE_NOTIFY_FD - static bool has_shadow(struct sna *sna) - { -- if (!sna->mode.shadow_damage) -+ if (!sna->mode.shadow_enabled) - return false; - -+ assert(sna->mode.shadow_damage); - if (RegionNil(DamageRegion(sna->mode.shadow_damage))) - return false; - -@@ -748,7 +718,7 @@ sna_block_handler(BLOCKHANDLER_ARGS_DECL) - sna->BlockHandler(BLOCKHANDLER_ARGS); - - if (*tv == NULL || ((*tv)->tv_usec | (*tv)->tv_sec) || has_shadow(sna)) -- sna_accel_block_handler(sna, tv); -+ sna_accel_block(sna, tv); - } - - static void -@@ -770,52 +740,102 @@ sna_wakeup_handler(WAKEUPHANDLER_ARGS_DECL) - - sna->WakeupHandler(WAKEUPHANDLER_ARGS); - -- sna_accel_wakeup_handler(sna); -- - if (FD_ISSET(sna->kgem.fd, (fd_set*)read_mask)) { - sna_mode_wakeup(sna); - /* Clear the flag so that subsequent ZaphodHeads don't block */ - FD_CLR(sna->kgem.fd, (fd_set*)read_mask); - } - } -+#else -+static void -+sna_block_handler(void *data, void *_timeout) -+{ -+ struct sna *sna = data; -+ int *timeout = _timeout; -+ struct timeval tv, *tvp; -+ -+ DBG(("%s (timeout=%d)\n", __FUNCTION__, *timeout)); -+ if (*timeout == 0) -+ return; -+ -+ if (*timeout < 0) { -+ tvp = NULL; -+ } else { -+ tv.tv_sec = *timeout / 1000; -+ tv.tv_usec = (*timeout % 1000) * 1000; -+ tvp = &tv; -+ } -+ -+ sna_accel_block(sna, &tvp); -+ if (tvp) -+ *timeout = tvp->tv_sec * 1000 + tvp->tv_usec / 1000; -+} -+#endif - - #if HAVE_UDEV -+#include -+ - static void - sna_handle_uevents(int fd, void *closure) - { - struct sna *sna = closure; -- struct udev_device *dev; -- const char *str; - struct stat s; -- dev_t udev_devnum; -+ struct pollfd pfd; -+ bool hotplug = false; - - DBG(("%s\n", __FUNCTION__)); - -- dev = udev_monitor_receive_device(sna->uevent_monitor); -- if (!dev) -- return; -+ pfd.fd = udev_monitor_get_fd(sna->uevent_monitor); -+ pfd.events = POLLIN; -+ -+ if (fstat(sna->kgem.fd, &s)) -+ memset(&s, 0, sizeof(s)); -+ -+ while (poll(&pfd, 1, 0) > 0) { -+ struct udev_device *dev; -+ dev_t devnum; -+ -+ dev = udev_monitor_receive_device(sna->uevent_monitor); -+ if (dev == NULL) -+ break; -+ -+ devnum = udev_device_get_devnum(dev); -+ if (memcmp(&s.st_rdev, &devnum, sizeof(dev_t)) == 0) { -+ const char *str; -+ -+ str = udev_device_get_property_value(dev, "HOTPLUG"); -+ if (str && atoi(str) == 1) { -+ str = udev_device_get_property_value(dev, "CONNECTOR"); -+ if (str) { -+ hotplug |= sna_mode_find_hotplug_connector(sna, atoi(str)); -+ } else { -+ sna->flags |= SNA_REPROBE; -+ hotplug = true; -+ } -+ } -+ } - -- udev_devnum = udev_device_get_devnum(dev); -- if (fstat(sna->kgem.fd, &s) || memcmp(&s.st_rdev, &udev_devnum, sizeof (dev_t))) { - udev_device_unref(dev); -- return; - } - -- str = udev_device_get_property_value(dev, "HOTPLUG"); -- if (str && atoi(str) == 1) { -- ScrnInfoPtr scrn = sna->scrn; -- -- DBG(("%s: hotplug event (vtSema?=%d)\n", __FUNCTION__, scrn->vtSema)); -+ if (hotplug) { -+ DBG(("%s: hotplug event (vtSema?=%d)\n", -+ __FUNCTION__, sna->scrn->vtSema)); - -- if (scrn->vtSema) { -- sna_mode_discover(sna); -- sna_mode_check(sna); -- RRGetInfo(xf86ScrnToScreen(scrn), TRUE); -- } else -+ if (sna->scrn->vtSema) -+ sna_mode_discover(sna, true); -+ else - sna->flags |= SNA_REPROBE; - } -+} - -- udev_device_unref(dev); -+static bool has_randr(void) -+{ -+#if HAS_DIXREGISTERPRIVATEKEY -+ return dixPrivateKeyRegistered(rrPrivKey); -+#else -+ return *rrPrivKey; -+#endif - } - - static void -@@ -833,7 +853,7 @@ sna_uevent_init(struct sna *sna) - /* RandR will be disabled if Xinerama is active, and so generating - * RR hotplug events is then verboten. - */ -- if (!dixPrivateKeyRegistered(rrPrivKey)) -+ if (!has_randr()) - goto out; - - u = NULL; -@@ -861,7 +881,8 @@ sna_uevent_init(struct sna *sna) - - sna->uevent_monitor = mon; - out: -- xf86DrvMsg(sna->scrn->scrnIndex, from, "display hotplug detection %s\n", -+ xf86DrvMsg(sna->scrn->scrnIndex, from, -+ "Display hotplug detection %s\n", - sna->uevent_monitor ? "enabled" : "disabled"); - return; - -@@ -874,17 +895,10 @@ err_dev: - - static bool sna_uevent_poll(struct sna *sna) - { -- struct pollfd pfd; -- - if (sna->uevent_monitor == NULL) - return false; - -- pfd.fd = udev_monitor_get_fd(sna->uevent_monitor); -- pfd.events = POLLIN; -- -- while (poll(&pfd, 1, 0) > 0) -- sna_handle_uevents(pfd.fd, sna); -- -+ sna_handle_uevents(udev_monitor_get_fd(sna->uevent_monitor), sna); - return true; - } - -@@ -918,8 +932,10 @@ sna_randr_getinfo(ScreenPtr screen, Rotation *rotations) - { - struct sna *sna = to_sna_from_screen(screen); - -+ DBG(("%s()\n", __FUNCTION__)); -+ - if (!sna_uevent_poll(sna)) -- sna_mode_discover(sna); -+ sna_mode_discover(sna, false); - - return sna->mode.rrGetInfo(screen, rotations); - } -@@ -931,8 +947,8 @@ static void sna_leave_vt(VT_FUNC_ARGS_DECL) - - DBG(("%s\n", __FUNCTION__)); - -- sna_accel_leave(sna); - sna_mode_reset(sna); -+ sna_accel_leave(sna); - - if (intel_put_master(sna->dev)) - xf86DrvMsg(scrn->scrnIndex, X_WARNING, -@@ -948,6 +964,12 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) - - /* XXX Note that we will leak kernel resources if !vtSema */ - -+#if HAVE_NOTIFY_FD -+ RemoveBlockAndWakeupHandlers(sna_block_handler, -+ (ServerWakeupHandlerProcPtr)NoopDDA, -+ sna); -+#endif -+ - sna_uevent_fini(sna); - sna_mode_close(sna); - -@@ -1047,12 +1069,13 @@ static void sna_dri_init(struct sna *sna, ScreenPtr screen) - { - char str[128] = ""; - -- if (sna->dri2.available) -+ if (sna->dri2.enable) - sna->dri2.open = sna_dri2_open(sna, screen); - if (sna->dri2.open) - strcat(str, "DRI2 "); - -- if (sna->dri3.available) -+ /* Load DRI3 in case DRI2 doesn't work, e.g. vgaarb */ -+ if (sna->dri3.enable || (!sna->dri2.open && !sna->dri3.override)) - sna->dri3.open = sna_dri3_open(sna, screen); - if (sna->dri3.open) - strcat(str, "DRI3 "); -@@ -1098,7 +1121,8 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) - DBG(("%s\n", __FUNCTION__)); - - assert(sna->scrn == scrn); -- assert(scrn->pScreen == NULL); /* set afterwards */ -+ assert(to_screen_from_sna(sna) == NULL || /* set afterwards */ -+ to_screen_from_sna(sna) == screen); - - assert(sna->freed_pixmap == NULL); - -@@ -1166,11 +1190,17 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) - * later memory should be bound when allocating, e.g rotate_mem */ - scrn->vtSema = TRUE; - -+#if !HAVE_NOTIFY_FD - sna->BlockHandler = screen->BlockHandler; - screen->BlockHandler = sna_block_handler; - - sna->WakeupHandler = screen->WakeupHandler; - screen->WakeupHandler = sna_wakeup_handler; -+#else -+ RegisterBlockAndWakeupHandlers(sna_block_handler, -+ (ServerWakeupHandlerProcPtr)NoopDDA, -+ sna); -+#endif - - screen->SaveScreen = sna_save_screen; - screen->CreateScreenResources = sna_create_screen_resources; -@@ -1190,6 +1220,8 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) - CMAP_PALETTED_TRUECOLOR)) - return FALSE; - -+ if (!xf86CheckBoolOption(scrn->options, "dpms", TRUE)) -+ sna->flags |= SNA_NO_DPMS; - xf86DPMSInit(screen, sna_dpms_set, 0); - - sna_uevent_init(sna); -@@ -1244,20 +1276,15 @@ static Bool sna_enter_vt(VT_FUNC_ARGS_DECL) - if (intel_get_master(sna->dev)) - return FALSE; - -+ sna_accel_enter(sna); -+ - if (sna->flags & SNA_REPROBE) { -- DBG(("%s: reporting deferred hotplug event\n", -- __FUNCTION__)); -- sna_mode_discover(sna); -- RRGetInfo(xf86ScrnToScreen(scrn), TRUE); -- sna->flags &= ~SNA_REPROBE; -+ DBG(("%s: reporting deferred hotplug event\n", __FUNCTION__)); -+ sna_mode_discover(sna, true); - } - -- if (!sna_set_desired_mode(sna)) { -- intel_put_master(sna->dev); -- return FALSE; -- } -+ sna_set_desired_mode(sna); - -- sna_accel_enter(sna); - return TRUE; - } - -@@ -1379,6 +1406,9 @@ static void describe_sna(ScrnInfoPtr scrn) - xf86DrvMsg(scrn->scrnIndex, X_INFO, - "SNA compiled: %s\n", BUILDER_DESCRIPTION); - #endif -+#if HAS_DEBUG_FULL -+ ErrorF("SNA compiled with full debug logging; expect to run slowly\n"); -+#endif - #if !NDEBUG - xf86DrvMsg(scrn->scrnIndex, X_INFO, - "SNA compiled with assertions enabled\n"); -@@ -1400,6 +1430,7 @@ static void describe_sna(ScrnInfoPtr scrn) - "SNA compiled for use with valgrind\n"); - VALGRIND_PRINTF("SNA compiled for use with valgrind\n"); - #endif -+ DBG(("xf86-video-intel version: %s\n", git_version)); - DBG(("pixman version: %s\n", pixman_version_string())); - } - -diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c -index a5dfb06b..6ee40336 100644 ---- a/src/sna/sna_glyphs.c -+++ b/src/sna/sna_glyphs.c -@@ -74,7 +74,7 @@ - #define NO_GLYPHS_VIA_MASK 0 - #define FORCE_SMALL_MASK 0 /* -1 = never, 1 = always */ - #define NO_GLYPHS_SLOW 0 --#define NO_DISCARD_MASK 0 -+#define DISCARD_MASK 0 /* -1 = never, 1 = always */ - - #define CACHE_PICTURE_SIZE 1024 - #define GLYPH_MIN_SIZE 8 -@@ -185,7 +185,7 @@ void sna_glyphs_close(struct sna *sna) - */ - bool sna_glyphs_create(struct sna *sna) - { -- ScreenPtr screen = sna->scrn->pScreen; -+ ScreenPtr screen = to_screen_from_sna(sna); - pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff }; - unsigned int formats[] = { - PIXMAN_a8, -@@ -1094,6 +1094,9 @@ sna_glyph_get_image(GlyphPtr g, ScreenPtr s) - - static inline bool use_small_mask(struct sna *sna, int16_t width, int16_t height, int depth) - { -+ if (depth < 8) -+ return true; -+ - if (FORCE_SMALL_MASK) - return FORCE_SMALL_MASK > 0; - -@@ -1156,12 +1159,6 @@ glyphs_via_mask(struct sna *sna, - src_x += box.x1 - list->xOff; - src_y += box.y1 - list->yOff; - -- if (format->depth < 8) { -- format = PictureMatchFormat(screen, 8, PICT_a8); -- if (!format) -- return false; -- } -- - component_alpha = NeedsComponent(format->format); - if (use_small_mask(sna, width, height, format->depth)) { - pixman_image_t *mask_image; -@@ -1179,7 +1176,7 @@ use_small_mask: - return false; - - mask_image = -- pixman_image_create_bits(format->depth << 24 | format->format, -+ pixman_image_create_bits(pixmap->drawable.bitsPerPixel << 24 | format->format, - width, height, - pixmap->devPrivate.ptr, - pixmap->devKind); -@@ -1386,10 +1383,11 @@ next_image: - DBG(("%s: atlas format=%08x, mask format=%08x\n", - __FUNCTION__, - (int)p->atlas->format, -- (int)(format->depth << 24 | format->format))); -+ (int)mask->format)); - - memset(&tmp, 0, sizeof(tmp)); -- if (p->atlas->format == (format->depth << 24 | format->format)) { -+ if (p->atlas->format == mask->format || -+ alphaless(p->atlas->format) == mask->format) { - ok = sna->render.composite(sna, PictOpAdd, - p->atlas, NULL, mask, - 0, 0, 0, 0, 0, 0, -@@ -1561,6 +1559,9 @@ skip_glyph: - } - } - -+ assert(format); -+ DBG(("%s: format=%08d, depth=%d\n", -+ __FUNCTION__, format->format, format->depth)); - out: - if (list_extents != stack_extents) - free(list_extents); -@@ -1573,24 +1574,34 @@ static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask, - PictFormatPtr g; - uint32_t color; - -- if (NO_DISCARD_MASK) -- return false; -+ if (DISCARD_MASK) -+ return DISCARD_MASK > 0; - - DBG(("%s: nlist=%d, mask=%08x, depth %d, op=%d (bounded? %d)\n", - __FUNCTION__, nlist, - mask ? (unsigned)mask->format : 0, mask ? mask->depth : 0, - op, op_is_bounded(op))); - -- if (nlist == 1 && list->len == 1) -- return true; -+ if (nlist == 1 && list->len == 1) { -+ if (mask == list->format) -+ return true; -+ -+ g = list->format; -+ goto skip; -+ } - -- if (!op_is_bounded(op)) -+ if (!op_is_bounded(op)) { -+ DBG(("%s: unbounded op, not discarding\n", __FUNCTION__)); - return false; -+ } - - /* No glyphs overlap and we are not performing a mask conversion. */ - g = glyphs_format(nlist, list, glyphs); -- if (mask == g) -+ if (mask == g) { -+ DBG(("%s: mask matches glyphs format, no conversion, so discard mask\n", -+ __FUNCTION__)); - return true; -+ } - - DBG(("%s: preferred mask format %08x, depth %d\n", - __FUNCTION__, g ? (unsigned)g->format : 0, g ? g->depth : 0)); -@@ -1605,18 +1616,41 @@ static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask, - - list++; - } -+ -+ if (!sna_picture_is_solid(src, &color)) -+ return false; -+ -+ return color >> 24 == 0xff; - } else { -- if (PICT_FORMAT_A(mask->format) >= PICT_FORMAT_A(g->format)) -+skip: -+ if (mask->format == g->format) - return true; - -- if (g->depth != 1) -- return false; -- } -+ if (mask->format == alphaless(g->format)) -+ return true; -+ -+ if (PICT_FORMAT_TYPE(g->format) == PICT_TYPE_A && -+ PICT_FORMAT_TYPE(mask->format) != PICT_TYPE_A) -+ return true; - -- if (!sna_picture_is_solid(src, &color)) - return false; -+ } -+} - -- return color >> 24 == 0xff; -+static uint32_t pixman_format(PictFormatPtr short_format) -+{ -+ uint32_t bpp; -+ -+ bpp = short_format->depth; -+ if (bpp <= 1) -+ bpp = 1; -+ else if (bpp <= 8) -+ bpp = 8; -+ else if (bpp <= 16) -+ bpp = 16; -+ else -+ bpp = 32; -+ return bpp << 24 | short_format->format; - } - - static void -@@ -1756,7 +1790,7 @@ next: - if (sigtrap_get() == 0) { - if (mask_format) { - pixman_composite_glyphs(op, src_image, dst_image, -- mask_format->format | (mask_format->depth << 24), -+ pixman_format(mask_format), - src_x + src_dx + region.extents.x1 - dst_x, - src_y + src_dy + region.extents.y1 - dst_y, - region.extents.x1, region.extents.y1, -@@ -1815,10 +1849,10 @@ out: - x, y, - mask_format->depth, - (long)mask_format->format, -- (long)(mask_format->depth << 24 | mask_format->format), -+ (long)pixman_format(mask_format), - NeedsComponent(mask_format->format))); - mask_image = -- pixman_image_create_bits(mask_format->depth << 24 | mask_format->format, -+ pixman_image_create_bits(pixman_format(mask_format), - region.extents.x2 - region.extents.x1, - region.extents.y2 - region.extents.y1, - NULL, 0); -@@ -2086,12 +2120,6 @@ glyphs_via_image(struct sna *sna, - src_x += box.x1 - list->xOff; - src_y += box.y1 - list->yOff; - -- if (format->depth < 8) { -- format = PictureMatchFormat(screen, 8, PICT_a8); -- if (!format) -- return false; -- } -- - DBG(("%s: small mask [format=%lx, depth=%d, size=%d], rendering glyphs to upload buffer\n", - __FUNCTION__, (unsigned long)format->format, - format->depth, (uint32_t)width*height*format->depth)); -@@ -2104,7 +2132,7 @@ glyphs_via_image(struct sna *sna, - return false; - - mask_image = -- pixman_image_create_bits(format->depth << 24 | format->format, -+ pixman_image_create_bits(pixmap->drawable.bitsPerPixel << 24 | format->format, - width, height, - pixmap->devPrivate.ptr, - pixmap->devKind); -diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c -index d6aa1294..d32bd583 100644 ---- a/src/sna/sna_io.c -+++ b/src/sna/sna_io.c -@@ -105,8 +105,10 @@ read_boxes_inplace__cpu(struct kgem *kgem, - if (!download_inplace__cpu(kgem, dst, bo, box, n)) - return false; - -+ if (bo->tiling == I915_TILING_Y) -+ return false; -+ - assert(kgem_bo_can_map__cpu(kgem, bo, false)); -- assert(bo->tiling != I915_TILING_Y); - - src = kgem_bo_map__cpu(kgem, bo); - if (src == NULL) -@@ -281,6 +283,9 @@ fallback: - if (box[n].y2 > extents.y2) - extents.y2 = box[n].y2; - } -+ if (!can_blt && sna->render.max_3d_size == 0) -+ goto fallback; -+ - if (kgem_bo_can_map(kgem, src_bo)) { - /* Is it worth detiling? */ - if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) -@@ -477,6 +482,7 @@ fallback: - goto fallback; - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); - - tmp_nbox = nbox; - tmp_box = box; -@@ -539,6 +545,7 @@ fallback: - break; - - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); - tmp_box += nbox_this_time; - } while (1); - } else { -@@ -597,6 +604,7 @@ fallback: - break; - - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); - tmp_box += nbox_this_time; - } while (1); - } -@@ -666,8 +674,10 @@ write_boxes_inplace__tiled(struct kgem *kgem, - { - uint8_t *dst; - -+ if (bo->tiling == I915_TILING_Y) -+ return false; -+ - assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true)); -- assert(bo->tiling != I915_TILING_Y); - - if (kgem_bo_can_map__cpu(kgem, bo, true)) { - dst = kgem_bo_map__cpu(kgem, bo); -@@ -778,6 +788,15 @@ static bool __upload_inplace(struct kgem *kgem, - if (FORCE_INPLACE) - return FORCE_INPLACE > 0; - -+ if (bo->exec) -+ return false; -+ -+ if (bo->flush) -+ return true; -+ -+ if (kgem_bo_can_map__cpu(kgem, bo, true)) -+ return true; -+ - /* If we are writing through the GTT, check first if we might be - * able to almagamate a series of small writes into a single - * operation. -@@ -849,6 +868,8 @@ bool sna_write_boxes(struct sna *sna, PixmapPtr dst, - if (box[n].y2 > extents.y2) - extents.y2 = box[n].y2; - } -+ if (!can_blt && sna->render.max_3d_size == 0) -+ goto fallback; - - /* Try to avoid switching rings... */ - if (!can_blt || kgem->ring == KGEM_RENDER || -@@ -1038,6 +1059,7 @@ tile: - goto fallback; - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); - - if (kgem->gen >= 0100) { - cmd |= 8; -@@ -1129,6 +1151,7 @@ tile: - if (nbox) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); - } - - kgem_bo_destroy(kgem, src_bo); -@@ -1224,6 +1247,7 @@ tile: - if (nbox) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); - } - - kgem_bo_destroy(kgem, src_bo); -@@ -1541,6 +1565,7 @@ tile: - goto fallback; - _kgem_set_mode(kgem, KGEM_BLT); - } -+ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); - - if (sna->kgem.gen >= 0100) { - cmd |= 8; -@@ -1636,6 +1661,7 @@ tile: - if (nbox) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); - } - - kgem_bo_destroy(kgem, src_bo); -@@ -1732,6 +1758,7 @@ tile: - if (nbox) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); -+ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); - } - - kgem_bo_destroy(kgem, src_bo); -diff --git a/src/sna/sna_present.c b/src/sna/sna_present.c -index 6dd6fe88..2796d972 100644 ---- a/src/sna/sna_present.c -+++ b/src/sna/sna_present.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -38,21 +39,73 @@ - static present_screen_info_rec present_info; - - struct sna_present_event { -- uint64_t event_id; - xf86CrtcPtr crtc; -+ struct sna *sna; -+ struct list link; -+ uint64_t *event_id; -+ uint64_t target_msc; -+ int n_event_id; -+ bool queued; - }; - -+static void sna_present_unflip(ScreenPtr screen, uint64_t event_id); -+static bool sna_present_queue(struct sna_present_event *info, -+ uint64_t last_msc); -+ - static inline struct sna_present_event * - to_present_event(uintptr_t data) - { - return (struct sna_present_event *)(data & ~3); - } - -+static struct sna_present_event *info_alloc(struct sna *sna) -+{ -+ struct sna_present_event *info; -+ -+ info = sna->present.freed_info; -+ if (info) { -+ sna->present.freed_info = NULL; -+ return info; -+ } -+ -+ return malloc(sizeof(struct sna_present_event) + sizeof(uint64_t)); -+} -+ -+static void info_free(struct sna_present_event *info) -+{ -+ struct sna *sna = info->sna; -+ -+ if (sna->present.freed_info) -+ free(sna->present.freed_info); -+ -+ sna->present.freed_info = info; -+} -+ -+static inline bool msc_before(uint64_t msc, uint64_t target) -+{ -+ return (int64_t)(msc - target) < 0; -+} -+ - #define MARK_PRESENT(x) ((void *)((uintptr_t)(x) | 2)) - --static int pipe_from_crtc(RRCrtcPtr crtc) -+static inline xf86CrtcPtr unmask_crtc(xf86CrtcPtr crtc) -+{ -+ return (xf86CrtcPtr)((uintptr_t)crtc & ~1); -+} -+ -+static inline xf86CrtcPtr mark_crtc(xf86CrtcPtr crtc) -+{ -+ return (xf86CrtcPtr)((uintptr_t)crtc | 1); -+} -+ -+static inline bool has_vblank(xf86CrtcPtr crtc) -+{ -+ return (uintptr_t)crtc & 1; -+} -+ -+static inline int pipe_from_crtc(RRCrtcPtr crtc) - { -- return crtc ? sna_crtc_to_pipe(crtc->devPrivate) : -1; -+ return crtc ? sna_crtc_pipe(crtc->devPrivate) : -1; - } - - static uint32_t pipe_select(int pipe) -@@ -74,6 +127,215 @@ static inline int sna_wait_vblank(struct sna *sna, union drm_wait_vblank *vbl, i - return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); - } - -+static uint64_t gettime_ust64(void) -+{ -+ struct timespec tv; -+ -+ if (clock_gettime(CLOCK_MONOTONIC, &tv)) -+ return GetTimeInMicros(); -+ -+ return ust64(tv.tv_sec, tv.tv_nsec / 1000); -+} -+ -+static void vblank_complete(struct sna_present_event *info, -+ uint64_t ust, uint64_t msc) -+{ -+ int n; -+ -+ if (msc_before(msc, info->target_msc)) { -+ DBG(("%s: event=%d too early, now %lld, expected %lld\n", -+ __FUNCTION__, -+ info->event_id[0], -+ (long long)msc, (long long)info->target_msc)); -+ if (sna_present_queue(info, msc)) -+ return; -+ } -+ -+ DBG(("%s: %d events complete\n", __FUNCTION__, info->n_event_id)); -+ for (n = 0; n < info->n_event_id; n++) { -+ DBG(("%s: pipe=%d tv=%d.%06d msc=%lld (target=%lld), event=%lld complete%s\n", __FUNCTION__, -+ sna_crtc_pipe(info->crtc), -+ (int)(ust / 1000000), (int)(ust % 1000000), -+ (long long)msc, (long long)info->target_msc, -+ (long long)info->event_id[n], -+ info->target_msc && msc == (uint32_t)info->target_msc ? "" : ": MISS")); -+ present_event_notify(info->event_id[n], ust, msc); -+ } -+ if (info->n_event_id > 1) -+ free(info->event_id); -+ list_del(&info->link); -+ info_free(info); -+} -+ -+static uint32_t msc_to_delay(xf86CrtcPtr crtc, uint64_t target) -+{ -+ const DisplayModeRec *mode = &crtc->desiredMode; -+ const struct ust_msc *swap = sna_crtc_last_swap(crtc); -+ int64_t delay, subframe; -+ -+ assert(mode->Clock); -+ -+ delay = target - swap->msc; -+ assert(delay >= 0); -+ if (delay > 1) { /* try to use the hw vblank for the last frame */ -+ delay--; -+ subframe = 0; -+ } else { -+ subframe = gettime_ust64() - swap_ust(swap); -+ subframe += 500; -+ subframe /= 1000; -+ } -+ delay *= mode->VTotal * mode->HTotal / mode->Clock; -+ if (subframe < delay) -+ delay -= subframe; -+ else -+ delay = 0; -+ -+ DBG(("%s: sleep %d frames, %llu ms\n", __FUNCTION__, -+ (int)(target - swap->msc), (long long)delay)); -+ assert(delay >= 0); -+ return MIN(delay, INT32_MAX); -+} -+ -+static CARD32 sna_fake_vblank_handler(OsTimerPtr timer, CARD32 now, void *data) -+{ -+ struct sna_present_event *info = data; -+ union drm_wait_vblank vbl; -+ uint64_t msc, ust; -+ -+ DBG(("%s(event=%lldx%d, now=%d)\n", __FUNCTION__, (long long)info->event_id[0], info->n_event_id, now)); -+ -+ VG_CLEAR(vbl); -+ vbl.request.type = DRM_VBLANK_RELATIVE; -+ vbl.request.sequence = 0; -+ if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { -+ ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); -+ msc = sna_crtc_record_vblank(info->crtc, &vbl); -+ DBG(("%s: event=%lld, target msc=%lld, now %lld\n", -+ __FUNCTION__, (long long)info->event_id[0], (long long)info->target_msc, (long long)msc)); -+ if (msc_before(msc, info->target_msc)) { -+ int delta = info->target_msc - msc; -+ uint32_t delay; -+ -+ DBG(("%s: too early, requeuing delta=%d\n", __FUNCTION__, delta)); -+ assert(info->target_msc - msc < 1ull<<31); -+ if (delta <= 2) { -+ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; -+ vbl.request.sequence = info->target_msc; -+ vbl.request.signal = (uintptr_t)MARK_PRESENT(info); -+ if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { -+ DBG(("%s: scheduled new vblank event for %lld\n", __FUNCTION__, (long long)info->target_msc)); -+ info->queued = true; -+ if (delta == 1) { -+ sna_crtc_set_vblank(info->crtc); -+ info->crtc = mark_crtc(info->crtc); -+ } -+ free(timer); -+ return 0; -+ } -+ } -+ -+ delay = msc_to_delay(info->crtc, info->target_msc); -+ if (delay) { -+ DBG(("%s: requeueing timer for %dms delay\n", __FUNCTION__, delay)); -+ return delay; -+ } -+ -+ /* As a last resort use a blocking wait. -+ * Less than a millisecond for (hopefully) a rare case. -+ */ -+ DBG(("%s: blocking wait!\n", __FUNCTION__)); -+ vbl.request.type = DRM_VBLANK_ABSOLUTE; -+ vbl.request.sequence = info->target_msc; -+ if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { -+ ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); -+ msc = sna_crtc_record_vblank(info->crtc, &vbl); -+ } else { -+ DBG(("%s: blocking wait failed, fudging\n", -+ __FUNCTION__)); -+ goto fixup; -+ } -+ } -+ } else { -+fixup: -+ ust = gettime_ust64(); -+ msc = info->target_msc; -+ DBG(("%s: event=%lld, CRTC OFF, target msc=%lld, was %lld (off)\n", -+ __FUNCTION__, (long long)info->event_id[0], (long long)info->target_msc, (long long)sna_crtc_last_swap(info->crtc)->msc)); -+ } -+ -+ vblank_complete(info, ust, msc); -+ free(timer); -+ return 0; -+} -+ -+static bool sna_fake_vblank(struct sna_present_event *info) -+{ -+ const struct ust_msc *swap = sna_crtc_last_swap(info->crtc); -+ uint32_t delay; -+ -+ if (msc_before(swap->msc, info->target_msc)) -+ delay = msc_to_delay(info->crtc, info->target_msc); -+ else -+ delay = 0; -+ -+ DBG(("%s(event=%lldx%d, target_msc=%lld, msc=%lld, delay=%ums)\n", -+ __FUNCTION__, (long long)info->event_id[0], info->n_event_id, -+ (long long)info->target_msc, (long long)swap->msc, delay)); -+ if (delay == 0) { -+ uint64_t ust, msc; -+ -+ if (msc_before(swap->msc, info->target_msc)) { -+ /* Fixup and pretend it completed immediately */ -+ msc = info->target_msc; -+ ust = gettime_ust64(); -+ } else { -+ msc = swap->msc; -+ ust = swap_ust(swap); -+ } -+ -+ vblank_complete(info, ust, msc); -+ return true; -+ } -+ -+ return TimerSet(NULL, 0, delay, sna_fake_vblank_handler, info); -+} -+ -+static bool sna_present_queue(struct sna_present_event *info, -+ uint64_t last_msc) -+{ -+ union drm_wait_vblank vbl; -+ int delta = info->target_msc - last_msc; -+ -+ DBG(("%s: target msc=%llu, seq=%u (last_msc=%llu), delta=%d\n", -+ __FUNCTION__, -+ (long long)info->target_msc, -+ (unsigned)info->target_msc, -+ (long long)last_msc, -+ delta)); -+ assert(info->target_msc - last_msc < 1ull<<31); -+ assert(delta >= 0); -+ -+ VG_CLEAR(vbl); -+ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; -+ vbl.request.sequence = info->target_msc; -+ vbl.request.signal = (uintptr_t)MARK_PRESENT(info); -+ if (delta > 2 || -+ sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc))) { -+ DBG(("%s: vblank enqueue failed, faking delta=%d\n", __FUNCTION__, delta)); -+ if (!sna_fake_vblank(info)) -+ return false; -+ } else { -+ info->queued = true; -+ if (delta == 1) { -+ sna_crtc_set_vblank(info->crtc); -+ info->crtc = mark_crtc(info->crtc); -+ } -+ } -+ -+ return true; -+} -+ - static RRCrtcPtr - sna_present_get_crtc(WindowPtr window) - { -@@ -81,7 +343,10 @@ sna_present_get_crtc(WindowPtr window) - BoxRec box; - xf86CrtcPtr crtc; - -- DBG(("%s\n", __FUNCTION__)); -+ DBG(("%s: window=%ld (pixmap=%ld), box=(%d, %d)x(%d, %d)\n", -+ __FUNCTION__, window->drawable.id, get_window_pixmap(window)->drawable.serialNumber, -+ window->drawable.x, window->drawable.y, -+ window->drawable.width, window->drawable.height)); - - box.x1 = window->drawable.x; - box.y1 = window->drawable.y; -@@ -99,26 +364,59 @@ static int - sna_present_get_ust_msc(RRCrtcPtr crtc, CARD64 *ust, CARD64 *msc) - { - struct sna *sna = to_sna_from_screen(crtc->pScreen); -- int pipe = pipe_from_crtc(crtc); - union drm_wait_vblank vbl; - -- DBG(("%s(pipe=%d)\n", __FUNCTION__, pipe)); -+ DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc->devPrivate))); -+ if (sna_crtc_has_vblank(crtc->devPrivate)) { -+ DBG(("%s: vblank active, reusing last swap msc/ust\n", -+ __FUNCTION__)); -+ goto last; -+ } - - VG_CLEAR(vbl); - vbl.request.type = DRM_VBLANK_RELATIVE; - vbl.request.sequence = 0; -- if (sna_wait_vblank(sna, &vbl, pipe) == 0) { -+ if (sna_wait_vblank(sna, &vbl, sna_crtc_pipe(crtc->devPrivate)) == 0) { -+ struct sna_present_event *info; -+ - *ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); - *msc = sna_crtc_record_vblank(crtc->devPrivate, &vbl); -+ -+ info = info_alloc(sna); -+ if (info) { -+ info->crtc = crtc->devPrivate; -+ info->sna = sna; -+ info->target_msc = *msc + 1; -+ info->event_id = (uint64_t *)(info + 1); -+ info->n_event_id = 0; -+ -+ vbl.request.type = -+ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; -+ vbl.request.sequence = info->target_msc; -+ vbl.request.signal = (uintptr_t)MARK_PRESENT(info); -+ -+ if (sna_wait_vblank(info->sna, &vbl, -+ sna_crtc_pipe(info->crtc)) == 0) { -+ list_add(&info->link, -+ &sna->present.vblank_queue); -+ info->queued = true; -+ sna_crtc_set_vblank(info->crtc); -+ info->crtc = mark_crtc(info->crtc); -+ } else -+ info_free(info); -+ } - } else { -- const struct ust_msc *swap = sna_crtc_last_swap(crtc->devPrivate); -- *ust = ust64(swap->tv_sec, swap->tv_usec); -+ const struct ust_msc *swap; -+last: -+ swap = sna_crtc_last_swap(crtc->devPrivate); -+ *ust = swap_ust(swap); - *msc = swap->msc; - } - -- DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld\n", __FUNCTION__, pipe, -+ DBG(("%s: pipe=%d, tv=%d.%06d seq=%d msc=%lld\n", __FUNCTION__, -+ sna_crtc_pipe(crtc->devPrivate), - (int)(*ust / 1000000), (int)(*ust % 1000000), -- (long long)*msc)); -+ vbl.reply.sequence, (long long)*msc)); - - return Success; - } -@@ -127,43 +425,106 @@ void - sna_present_vblank_handler(struct drm_event_vblank *event) - { - struct sna_present_event *info = to_present_event(event->user_data); -+ uint64_t msc; - -- DBG(("%s: pipe=%d tv=%d.%06d msc=%d, event %lld complete\n", __FUNCTION__, -- sna_crtc_to_pipe(info->crtc), -- event->tv_sec, event->tv_usec, event->sequence, -- (long long)info->event_id)); -- present_event_notify(info->event_id, -- ust64(event->tv_sec, event->tv_usec), -- sna_crtc_record_event(info->crtc, event)); -- free(info); -+ if (!info->queued) { -+ DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__)); -+ assert(!has_vblank(info->crtc)); -+ return; -+ } -+ -+ if (has_vblank(info->crtc)) { -+ DBG(("%s: clearing immediate flag\n", __FUNCTION__)); -+ info->crtc = unmask_crtc(info->crtc); -+ sna_crtc_clear_vblank(info->crtc); -+ } -+ -+ msc = sna_crtc_record_event(info->crtc, event); -+ -+ if (info->sna->mode.shadow_wait) { -+ DBG(("%s: recursed from TearFree\n", __FUNCTION__)); -+ if (TimerSet(NULL, 0, 1, sna_fake_vblank_handler, info)) -+ return; -+ } -+ -+ vblank_complete(info, ust64(event->tv_sec, event->tv_usec), msc); - } - - static int - sna_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) - { - struct sna *sna = to_sna_from_screen(crtc->pScreen); -- struct sna_present_event *event; -- union drm_wait_vblank vbl; -- -- DBG(("%s(pipe=%d, event=%lld, msc=%lld)\n", -- __FUNCTION__, pipe_from_crtc(crtc), -- (long long)event_id, (long long)msc)); -+ struct sna_present_event *info, *tmp; -+ const struct ust_msc *swap; - -- event = malloc(sizeof(struct sna_present_event)); -- if (event == NULL) -+ if (!sna_crtc_is_on(crtc->devPrivate)) - return BadAlloc; - -- event->event_id = event_id; -- event->crtc = crtc->devPrivate; -+ swap = sna_crtc_last_swap(crtc->devPrivate); -+ DBG(("%s(pipe=%d, event=%lld, msc=%lld, last swap=%lld)\n", -+ __FUNCTION__, sna_crtc_pipe(crtc->devPrivate), -+ (long long)event_id, (long long)msc, (long long)swap->msc)); - -- VG_CLEAR(vbl); -- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; -- vbl.request.sequence = msc; -- vbl.request.signal = (uintptr_t)MARK_PRESENT(event); -- if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(event->crtc))) { -- DBG(("%s: vblank enqueue failed\n", __FUNCTION__)); -- free(event); -- return BadMatch; -+ if (warn_unless((int64_t)(msc - swap->msc) >= 0)) { -+ DBG(("%s: pipe=%d tv=%d.%06d msc=%lld (target=%lld), event=%lld complete\n", __FUNCTION__, -+ sna_crtc_pipe(crtc->devPrivate), -+ swap->tv_sec, swap->tv_usec, -+ (long long)swap->msc, (long long)msc, -+ (long long)event_id)); -+ present_event_notify(event_id, swap_ust(swap), swap->msc); -+ return Success; -+ } -+ if (warn_unless(msc - swap->msc < 1ull<<31)) -+ return BadValue; -+ -+ list_for_each_entry(tmp, &sna->present.vblank_queue, link) { -+ if (tmp->target_msc == msc && -+ unmask_crtc(tmp->crtc) == crtc->devPrivate) { -+ uint64_t *events = tmp->event_id; -+ -+ if (tmp->n_event_id && -+ is_power_of_two(tmp->n_event_id)) { -+ events = malloc(2*sizeof(uint64_t)*tmp->n_event_id); -+ if (events == NULL) -+ return BadAlloc; -+ -+ memcpy(events, -+ tmp->event_id, -+ tmp->n_event_id*sizeof(uint64_t)); -+ if (tmp->n_event_id != 1) -+ free(tmp->event_id); -+ tmp->event_id = events; -+ } -+ -+ DBG(("%s: appending event=%lld to vblank %lld x %d\n", -+ __FUNCTION__, (long long)event_id, (long long)msc, tmp->n_event_id+1)); -+ events[tmp->n_event_id++] = event_id; -+ return Success; -+ } -+ if ((int64_t)(tmp->target_msc - msc) > 0) { -+ DBG(("%s: previous target_msc=%lld invalid for coalescing\n", -+ __FUNCTION__, (long long)tmp->target_msc)); -+ break; -+ } -+ } -+ -+ info = info_alloc(sna); -+ if (info == NULL) -+ return BadAlloc; -+ -+ info->crtc = crtc->devPrivate; -+ info->sna = sna; -+ info->target_msc = msc; -+ info->event_id = (uint64_t *)(info + 1); -+ info->event_id[0] = event_id; -+ info->n_event_id = 1; -+ list_add_tail(&info->link, &tmp->link); -+ info->queued = false; -+ -+ if (!sna_present_queue(info, swap->msc)) { -+ list_del(&info->link); -+ info_free(info); -+ return BadAlloc; - } - - return Success; -@@ -180,14 +541,6 @@ sna_present_abort_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) - static void - sna_present_flush(WindowPtr window) - { -- PixmapPtr pixmap = get_window_pixmap(window); -- struct sna_pixmap *priv; -- -- DBG(("%s(pixmap=%ld)\n", __FUNCTION__, pixmap->drawable.serialNumber)); -- -- priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_FORCE); -- if (priv && priv->gpu_bo) -- kgem_scanout_flush(&to_sna_from_pixmap(pixmap)->kgem, priv->gpu_bo); - } - - static bool -@@ -201,8 +554,13 @@ check_flip__crtc(struct sna *sna, - - assert(sna->scrn->vtSema); - -- if (sna->mode.shadow_active) { -- DBG(("%s: shadow buffer active\n", __FUNCTION__)); -+ if (!sna->mode.front_active) { -+ DBG(("%s: DPMS off, no flips\n", __FUNCTION__)); -+ return FALSE; -+ } -+ -+ if (sna->mode.rr_active) { -+ DBG(("%s: RandR transformation active\n", __FUNCTION__)); - return false; - } - -@@ -224,6 +582,11 @@ sna_present_check_flip(RRCrtcPtr crtc, - pixmap->drawable.serialNumber, - sync_flip)); - -+ if (!sna->scrn->vtSema) { -+ DBG(("%s: VT switched away, no flips\n", __FUNCTION__)); -+ return FALSE; -+ } -+ - if (sna->flags & SNA_NO_FLIP) { - DBG(("%s: flips not suported\n", __FUNCTION__)); - return FALSE; -@@ -231,7 +594,7 @@ sna_present_check_flip(RRCrtcPtr crtc, - - if (sync_flip) { - if ((sna->flags & SNA_HAS_FLIP) == 0) { -- DBG(("%s: async flips not suported\n", __FUNCTION__)); -+ DBG(("%s: sync flips not suported\n", __FUNCTION__)); - return FALSE; - } - } else { -@@ -257,24 +620,39 @@ sna_present_check_flip(RRCrtcPtr crtc, - return FALSE; - } - -- return TRUE; --} -- --static uint64_t gettime_ust64(void) --{ -- struct timespec tv; -+ if (flip->pinned) { -+ assert(flip->gpu_bo); -+ if (sna->flags & SNA_LINEAR_FB) { -+ if (flip->gpu_bo->tiling != I915_TILING_NONE) { -+ DBG(("%s: pined bo, tilng=%d needs NONE\n", -+ __FUNCTION__, flip->gpu_bo->tiling)); -+ return FALSE; -+ } -+ } else { -+ if (!sna->kgem.can_scanout_y && -+ flip->gpu_bo->tiling == I915_TILING_Y) { -+ DBG(("%s: pined bo, tilng=%d and can't scanout Y\n", -+ __FUNCTION__, flip->gpu_bo->tiling)); -+ return FALSE; -+ } -+ } - -- if (clock_gettime(CLOCK_MONOTONIC, &tv)) -- return 0; -+ if (flip->gpu_bo->pitch & 63) { -+ DBG(("%s: pined bo, bad pitch=%d\n", -+ __FUNCTION__, flip->gpu_bo->pitch)); -+ return FALSE; -+ } -+ } - -- return ust64(tv.tv_sec, tv.tv_nsec / 1000); -+ return TRUE; - } - - static Bool --page_flip__async(RRCrtcPtr crtc, -- uint64_t event_id, -- uint64_t target_msc, -- struct kgem_bo *bo) -+flip__async(struct sna *sna, -+ RRCrtcPtr crtc, -+ uint64_t event_id, -+ uint64_t target_msc, -+ struct kgem_bo *bo) - { - DBG(("%s(pipe=%d, event=%lld, handle=%d)\n", - __FUNCTION__, -@@ -282,17 +660,17 @@ page_flip__async(RRCrtcPtr crtc, - (long long)event_id, - bo->handle)); - -- if (!sna_page_flip(to_sna_from_screen(crtc->pScreen), bo, NULL, NULL)) { -+ if (!sna_page_flip(sna, bo, NULL, NULL)) { - DBG(("%s: async pageflip failed\n", __FUNCTION__)); - present_info.capabilities &= ~PresentCapabilityAsync; - return FALSE; - } - -- DBG(("%s: pipe=%d tv=%d.%06d msc=%d, event %lld complete\n", __FUNCTION__, -+ DBG(("%s: pipe=%d tv=%ld.%06d msc=%lld (target=%lld), event=%lld complete\n", __FUNCTION__, - pipe_from_crtc(crtc), -- gettime_ust64() / 1000000, gettime_ust64() % 1000000, -- sna_crtc_last_swap(crtc->devPrivate)->msc, -- (long long)event_id)); -+ (long)(gettime_ust64() / 1000000), (int)(gettime_ust64() % 1000000), -+ crtc ? (long long)sna_crtc_last_swap(crtc->devPrivate)->msc : 0LL, -+ (long long)target_msc, (long long)event_id)); - present_event_notify(event_id, gettime_ust64(), target_msc); - return TRUE; - } -@@ -303,7 +681,12 @@ present_flip_handler(struct drm_event_vblank *event, void *data) - struct sna_present_event *info = data; - struct ust_msc swap; - -- DBG(("%s(sequence=%d)\n", __FUNCTION__, event->sequence)); -+ DBG(("%s(sequence=%d): event=%lld\n", __FUNCTION__, event->sequence, (long long)info->event_id[0])); -+ assert(info->n_event_id == 1); -+ if (!info->queued) { -+ DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__)); -+ return; -+ } - - if (info->crtc == NULL) { - swap.tv_sec = event->tv_sec; -@@ -312,22 +695,33 @@ present_flip_handler(struct drm_event_vblank *event, void *data) - } else - swap = *sna_crtc_last_swap(info->crtc); - -- DBG(("%s: pipe=%d, tv=%d.%06d msc %lld, event %lld complete\n", __FUNCTION__, -- info->crtc ? sna_crtc_to_pipe(info->crtc) : -1, -+ DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld (target %lld), event=%lld complete%s\n", __FUNCTION__, -+ info->crtc ? sna_crtc_pipe(info->crtc) : -1, - swap.tv_sec, swap.tv_usec, (long long)swap.msc, -- (long long)info->event_id)); -- present_event_notify(info->event_id, ust64(swap.tv_sec, swap.tv_usec), swap.msc); -- free(info); -+ (long long)info->target_msc, -+ (long long)info->event_id[0], -+ info->target_msc && info->target_msc == swap.msc ? "" : ": MISS")); -+ present_event_notify(info->event_id[0], swap_ust(&swap), swap.msc); -+ if (info->crtc) -+ sna_crtc_clear_vblank(info->crtc); -+ -+ if (info->sna->present.unflip) { -+ DBG(("%s: executing queued unflip (event=%lld)\n", __FUNCTION__, (long long)info->sna->present.unflip)); -+ sna_present_unflip(xf86ScrnToScreen(info->sna->scrn), -+ info->sna->present.unflip); -+ info->sna->present.unflip = 0; -+ } -+ info_free(info); - } - - static Bool --page_flip(ScreenPtr screen, -- RRCrtcPtr crtc, -- uint64_t event_id, -- struct kgem_bo *bo) -+flip(struct sna *sna, -+ RRCrtcPtr crtc, -+ uint64_t event_id, -+ uint64_t target_msc, -+ struct kgem_bo *bo) - { -- struct sna *sna = to_sna_from_screen(screen); -- struct sna_present_event *event; -+ struct sna_present_event *info; - - DBG(("%s(pipe=%d, event=%lld, handle=%d)\n", - __FUNCTION__, -@@ -335,18 +729,27 @@ page_flip(ScreenPtr screen, - (long long)event_id, - bo->handle)); - -- event = malloc(sizeof(struct sna_present_event)); -- if (event == NULL) -+ info = info_alloc(sna); -+ if (info == NULL) - return FALSE; - -- event->event_id = event_id; -- event->crtc = crtc ? crtc->devPrivate : NULL; -- if (!sna_page_flip(sna, bo, present_flip_handler, event)) { -+ info->crtc = crtc ? crtc->devPrivate : NULL; -+ info->sna = sna; -+ info->event_id = (uint64_t *)(info + 1); -+ info->event_id[0] = event_id; -+ info->n_event_id = 1; -+ info->target_msc = target_msc; -+ info->queued = false; -+ -+ if (!sna_page_flip(sna, bo, present_flip_handler, info)) { - DBG(("%s: pageflip failed\n", __FUNCTION__)); -- free(event); -+ info_free(info); - return FALSE; - } - -+ info->queued = true; -+ if (info->crtc) -+ sna_crtc_set_vblank(info->crtc); - return TRUE; - } - -@@ -358,12 +761,48 @@ get_flip_bo(PixmapPtr pixmap) - - DBG(("%s(pixmap=%ld)\n", __FUNCTION__, pixmap->drawable.serialNumber)); - -- priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | __MOVE_FORCE); -+ priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | __MOVE_SCANOUT | __MOVE_FORCE); - if (priv == NULL) { - DBG(("%s: cannot force pixmap to the GPU\n", __FUNCTION__)); - return NULL; - } - -+ if (priv->gpu_bo->scanout) -+ return priv->gpu_bo; -+ -+ if (sna->kgem.has_llc && !wedged(sna) && !priv->pinned) { -+ struct kgem_bo *bo; -+ uint32_t tiling; -+ -+ tiling = I915_TILING_NONE; -+ if ((sna->flags & SNA_LINEAR_FB) == 0) -+ tiling = I915_TILING_X; -+ -+ bo = kgem_create_2d(&sna->kgem, -+ pixmap->drawable.width, -+ pixmap->drawable.height, -+ pixmap->drawable.bitsPerPixel, -+ tiling, CREATE_SCANOUT | CREATE_CACHED); -+ if (bo) { -+ BoxRec box; -+ -+ box.x1 = box.y1 = 0; -+ box.x2 = pixmap->drawable.width; -+ box.y2 = pixmap->drawable.height; -+ -+ if (sna->render.copy_boxes(sna, GXcopy, -+ &pixmap->drawable, priv->gpu_bo, 0, 0, -+ &pixmap->drawable, bo, 0, 0, -+ &box, 1, 0)) { -+ sna_pixmap_unmap(pixmap, priv); -+ kgem_bo_destroy(&sna->kgem, priv->gpu_bo); -+ -+ priv->gpu_bo = bo; -+ } else -+ kgem_bo_destroy(&sna->kgem, bo); -+ } -+ } -+ - if (sna->flags & SNA_LINEAR_FB && - priv->gpu_bo->tiling && - !sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { -@@ -372,12 +811,17 @@ get_flip_bo(PixmapPtr pixmap) - } - - if (priv->gpu_bo->tiling == I915_TILING_Y && -+ !sna->kgem.can_scanout_y && - !sna_pixmap_change_tiling(pixmap, I915_TILING_X)) { - DBG(("%s: invalid Y-tiling, cannot convert\n", __FUNCTION__)); - return NULL; - } - -- priv->pinned |= PIN_SCANOUT; -+ if (priv->gpu_bo->pitch & 63) { -+ DBG(("%s: invalid pitch, no conversion\n", __FUNCTION__)); -+ return NULL; -+ } -+ - return priv->gpu_bo; - } - -@@ -388,6 +832,7 @@ sna_present_flip(RRCrtcPtr crtc, - PixmapPtr pixmap, - Bool sync_flip) - { -+ struct sna *sna = to_sna_from_pixmap(pixmap); - struct kgem_bo *bo; - - DBG(("%s(pipe=%d, event=%lld, msc=%lld, pixmap=%ld, sync?=%d)\n", -@@ -397,11 +842,32 @@ sna_present_flip(RRCrtcPtr crtc, - (long long)target_msc, - pixmap->drawable.serialNumber, sync_flip)); - -- if (!check_flip__crtc(to_sna_from_pixmap(pixmap), crtc)) { -+ if (!check_flip__crtc(sna, crtc)) { - DBG(("%s: flip invalid for CRTC\n", __FUNCTION__)); - return FALSE; - } - -+ assert(sna->present.unflip == 0); -+ -+ if (sna->flags & SNA_TEAR_FREE) { -+ DBG(("%s: disabling TearFree (was %s) in favour of Present flips\n", -+ __FUNCTION__, sna->mode.shadow_enabled ? "enabled" : "disabled")); -+ sna->mode.shadow_enabled = false; -+ } -+ assert(!sna->mode.shadow_enabled); -+ -+ if (sna->mode.flip_active) { -+ struct pollfd pfd; -+ -+ DBG(("%s: flips still pending, stalling\n", __FUNCTION__)); -+ pfd.fd = sna->kgem.fd; -+ pfd.events = POLLIN; -+ while (poll(&pfd, 1, 0) == 1) -+ sna_mode_wakeup(sna); -+ if (sna->mode.flip_active) -+ return FALSE; -+ } -+ - bo = get_flip_bo(pixmap); - if (bo == NULL) { - DBG(("%s: flip invalid bo\n", __FUNCTION__)); -@@ -409,9 +875,9 @@ sna_present_flip(RRCrtcPtr crtc, - } - - if (sync_flip) -- return page_flip(crtc->pScreen, crtc, event_id, bo); -+ return flip(sna, crtc, event_id, target_msc, bo); - else -- return page_flip__async(crtc, event_id, target_msc, bo); -+ return flip__async(sna, crtc, event_id, target_msc, bo); - } - - static void -@@ -421,29 +887,70 @@ sna_present_unflip(ScreenPtr screen, uint64_t event_id) - struct kgem_bo *bo; - - DBG(("%s(event=%lld)\n", __FUNCTION__, (long long)event_id)); -- if (sna->mode.front_active == 0 || sna->mode.shadow_active) { -+ if (sna->mode.front_active == 0 || sna->mode.rr_active) { - const struct ust_msc *swap; - - DBG(("%s: no CRTC active, perform no-op flip\n", __FUNCTION__)); - - notify: -- swap = sna_crtc_last_swap(sna_mode_first_crtc(sna)); -- DBG(("%s: pipe=%d, tv=%d.%06d msc %lld, event %lld complete\n", __FUNCTION__, -+ swap = sna_crtc_last_swap(sna_primary_crtc(sna)); -+ DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld, event=%lld complete\n", __FUNCTION__, - -1, - swap->tv_sec, swap->tv_usec, (long long)swap->msc, - (long long)event_id)); -- present_event_notify(event_id, -- ust64(swap->tv_sec, swap->tv_usec), -- swap->msc); -+ present_event_notify(event_id, swap_ust(swap), swap->msc); -+ return; -+ } -+ -+ assert(!sna->mode.shadow_enabled); -+ if (sna->mode.flip_active) { -+ DBG(("%s: %d outstanding flips, queueing unflip\n", __FUNCTION__, sna->mode.flip_active)); -+ assert(sna->present.unflip == 0); -+ sna->present.unflip = event_id; - return; - } - -+ if (sna->flags & SNA_TEAR_FREE) { -+ DBG(("%s: %s TearFree after Present flips\n", -+ __FUNCTION__, sna->mode.shadow_damage != NULL ? "enabling" : "disabling")); -+ sna->mode.shadow_enabled = sna->mode.shadow_damage != NULL; -+ } -+ - bo = get_flip_bo(screen->GetScreenPixmap(screen)); -- if (bo == NULL || !page_flip(screen, NULL, event_id, bo)) { -+ if (bo == NULL) { -+reset_mode: - DBG(("%s: failed, trying to restore original mode\n", __FUNCTION__)); - xf86SetDesiredModes(sna->scrn); - goto notify; - } -+ -+ /* Are we unflipping after a failure that left our ScreenP in place? */ -+ if (!sna_needs_page_flip(sna, bo)) -+ goto notify; -+ -+ assert(sna_pixmap(screen->GetScreenPixmap(screen))->pinned & PIN_SCANOUT); -+ -+ if (sna->flags & SNA_HAS_ASYNC_FLIP) { -+ DBG(("%s: trying async flip restore\n", __FUNCTION__)); -+ if (flip__async(sna, NULL, event_id, 0, bo)) -+ return; -+ } -+ -+ if (!flip(sna, NULL, event_id, 0, bo)) -+ goto reset_mode; -+} -+ -+void sna_present_cancel_flip(struct sna *sna) -+{ -+ if (sna->present.unflip) { -+ const struct ust_msc *swap; -+ -+ swap = sna_crtc_last_swap(sna_primary_crtc(sna)); -+ present_event_notify(sna->present.unflip, -+ swap_ust(swap), swap->msc); -+ -+ sna->present.unflip = 0; -+ } - } - - static present_screen_info_rec present_info = { -@@ -463,10 +970,13 @@ static present_screen_info_rec present_info = { - - bool sna_present_open(struct sna *sna, ScreenPtr screen) - { -+ DBG(("%s(num_crtc=%d)\n", __FUNCTION__, sna->mode.num_real_crtc)); -+ - if (sna->mode.num_real_crtc == 0) - return false; - - sna_present_update(sna); -+ list_init(&sna->present.vblank_queue); - - return present_screen_init(screen, &present_info); - } -diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c -index 3fbb9ecb..3e935d57 100644 ---- a/src/sna/sna_render.c -+++ b/src/sna/sna_render.c -@@ -54,7 +54,7 @@ sna_format_for_depth(int depth) - { - switch (depth) { - case 1: return PICT_a1; -- case 4: return PICT_a4; -+ case 4: return PICT_x4a4; - case 8: return PICT_a8; - case 15: return PICT_x1r5g5b5; - case 16: return PICT_r5g6b5; -@@ -272,18 +272,6 @@ no_render_context_switch(struct kgem *kgem, - } - - static void --no_render_retire(struct kgem *kgem) --{ -- (void)kgem; --} -- --static void --no_render_expire(struct kgem *kgem) --{ -- (void)kgem; --} -- --static void - no_render_fini(struct sna *sna) - { - (void)sna; -@@ -316,8 +304,6 @@ const char *no_render_init(struct sna *sna) - render->fini = no_render_fini; - - sna->kgem.context_switch = no_render_context_switch; -- sna->kgem.retire = no_render_retire; -- sna->kgem.expire = no_render_expire; - if (sna->kgem.has_blt) - sna->kgem.ring = KGEM_BLT; - -@@ -407,10 +393,7 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt) - } - } - -- if (priv->shm) { -- assert(!priv->flush); -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -- } -+ add_shm_flush(sna, priv); - - DBG(("%s for box=(%d, %d), (%d, %d)\n", - __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); -@@ -567,6 +550,7 @@ static struct kgem_bo *upload(struct sna *sna, - assert(priv->gpu_damage == NULL); - assert(priv->gpu_bo == NULL); - assert(bo->proxy != NULL); -+ sna_damage_all(&priv->cpu_damage, pixmap); - kgem_proxy_bo_attach(bo, &priv->gpu_bo); - } - } -@@ -627,10 +611,7 @@ sna_render_pixmap_bo(struct sna *sna, - !priv->cpu_bo->snoop && priv->cpu_bo->pitch < 4096) { - DBG(("%s: CPU all damaged\n", __FUNCTION__)); - channel->bo = priv->cpu_bo; -- if (priv->shm) { -- assert(!priv->flush); -- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); -- } -+ add_shm_flush(sna, priv); - goto done; - } - } -@@ -1275,6 +1256,7 @@ sna_render_picture_extract(struct sna *sna, - assert(priv->gpu_damage == NULL); - assert(priv->gpu_bo == NULL); - assert(bo->proxy != NULL); -+ sna_damage_all(&priv->cpu_damage, pixmap); - kgem_proxy_bo_attach(bo, &priv->gpu_bo); - } - } -@@ -1338,6 +1320,8 @@ sna_render_picture_convolve(struct sna *sna, - */ - DBG(("%s: origin=(%d,%d) kernel=%dx%d, size=%dx%d\n", - __FUNCTION__, x_off, y_off, cw, ch, w, h)); -+ if (cw*ch > 32) /* too much loss of precision from quantization! */ -+ return -1; - - assert(picture->pDrawable); - assert(picture->filter == PictFilterConvolution); -@@ -1388,9 +1372,9 @@ sna_render_picture_convolve(struct sna *sna, - alpha = CreateSolidPicture(0, &color, &error); - if (alpha) { - sna_composite(PictOpAdd, picture, alpha, tmp, -- x, y, -+ x-(x_off+i), y-(y_off+j), -+ 0, 0, - 0, 0, -- x_off+i, y_off+j, - w, h); - FreePicture(alpha, 0); - } -@@ -2183,11 +2167,11 @@ copy_overlap(struct sna *sna, uint8_t alu, - ret = (sna->render.copy_boxes(sna, GXcopy, - draw, bo, src_dx, src_dy, - &tmp->drawable, tmp_bo, -extents->x1, -extents->y1, -- box, n , 0) && -+ box, n, 0) && - sna->render.copy_boxes(sna, alu, - &tmp->drawable, tmp_bo, -extents->x1, -extents->y1, - draw, bo, dst_dx, dst_dy, -- box, n , 0)); -+ box, n, 0)); - - screen->DestroyPixmap(tmp); - return ret; -@@ -2308,16 +2292,22 @@ static bool can_copy_cpu(struct sna *sna, - struct kgem_bo *src, - struct kgem_bo *dst) - { -- if (src->tiling != dst->tiling) -- return false; -+ DBG(("%s: tiling=%d:%d, pitch=%d:%d, can_map=%d:%d[%d]\n", -+ __FUNCTION__, -+ src->tiling, dst->tiling, -+ src->pitch, dst->pitch, -+ kgem_bo_can_map__cpu(&sna->kgem, src, false), -+ kgem_bo_can_map__cpu(&sna->kgem, dst, true), -+ sna->kgem.has_wc_mmap)); - -- if (src->pitch != dst->pitch) -+ if (src->tiling != dst->tiling) - return false; - - if (!kgem_bo_can_map__cpu(&sna->kgem, src, false)) - return false; - -- if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true)) -+ if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true) && -+ !sna->kgem.has_wc_mmap) - return false; - - DBG(("%s -- yes, src handle=%d, dst handle=%d\n", __FUNCTION__, src->handle, dst->handle)); -@@ -2330,31 +2320,62 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, - const DrawableRec *dst_draw, struct kgem_bo *dst_bo, int16_t dx, int16_t dy, - const BoxRec *box, int n, unsigned flags) - { -+ memcpy_box_func detile = NULL; - void *dst, *src; -- bool clipped; - - if (op != GXcopy) - return false; - -- clipped = (n > 1 || -- box->x1 + dx > 0 || -- box->y1 + dy > 0 || -- box->x2 + dx < dst_draw->width || -- box->y2 + dy < dst_draw->height); -+ if (src_draw->depth != dst_draw->depth) -+ return false; - - dst = src = NULL; -- if (!clipped && can_copy_cpu(sna, src_bo, dst_bo)) { -- dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); -+ if (can_copy_cpu(sna, src_bo, dst_bo)) { -+ if (src_bo->pitch != dst_bo->pitch || -+ dx != sx || dy != sy || n > 1 || -+ box->x1 + dx > 0 || -+ box->y1 + dy > 0 || -+ box->x2 + dx < dst_draw->width || -+ box->y2 + dy < dst_draw->height) { -+ if (dx != sx) /* not implemented in memcpy yet */ -+ goto use_gtt; -+ -+ switch (dst_bo->tiling) { -+ default: -+ case I915_TILING_Y: -+ goto use_gtt; -+ -+ case I915_TILING_X: -+ detile = sna->kgem.memcpy_between_tiled_x; -+ if (detile == NULL) -+ goto use_gtt; -+ break; -+ -+ case I915_TILING_NONE: -+ break; -+ } -+ } -+ -+ if (kgem_bo_can_map__cpu(&sna->kgem, dst_bo, true)) -+ dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); -+ else -+ dst = kgem_bo_map__wc(&sna->kgem, dst_bo); - src = kgem_bo_map__cpu(&sna->kgem, src_bo); - } - - if (dst == NULL || src == NULL) { -+use_gtt: - dst = kgem_bo_map__gtt(&sna->kgem, dst_bo); - src = kgem_bo_map__gtt(&sna->kgem, src_bo); - if (dst == NULL || src == NULL) - return false; -+ -+ detile = NULL; - } else { -- kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true); -+ if (dst == dst_bo->map__wc) -+ kgem_bo_sync__gtt(&sna->kgem, dst_bo); -+ else -+ kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true); - kgem_bo_sync__cpu_full(&sna->kgem, src_bo, false); - } - -@@ -2362,7 +2383,16 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, - __FUNCTION__, sx, sy, dx, dy, n)); - - if (sigtrap_get() == 0) { -- do { -+ if (detile) { -+ do { -+ detile(src, dst, dst_draw->bitsPerPixel, -+ src_bo->pitch, dst_bo->pitch, -+ box->x1 + sx, box->y1 + sy, -+ box->x1 + dx, box->y1 + dy, -+ box->x2 - box->x1, box->y2 - box->y1); -+ box++; -+ } while (--n); -+ } else do { - memcpy_blt(src, dst, dst_draw->bitsPerPixel, - src_bo->pitch, dst_bo->pitch, - box->x1 + sx, box->y1 + sy, -@@ -2380,4 +2410,5 @@ void - sna_render_mark_wedged(struct sna *sna) - { - sna->render.copy_boxes = memcpy_copy_boxes; -+ sna->render.prefer_gpu = 0; - } -diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h -index 6e1fa480..4ba345a7 100644 ---- a/src/sna/sna_render.h -+++ b/src/sna/sna_render.h -@@ -148,6 +148,10 @@ struct sna_composite_op { - struct { - uint32_t flags; - } gen8; -+ -+ struct { -+ uint32_t flags; -+ } gen9; - } u; - - void *priv; -@@ -238,8 +242,9 @@ struct sna_render { - int16_t w, int16_t h, - unsigned flags, - struct sna_composite_op *tmp); --#define COMPOSITE_PARTIAL 0x1 --#define COMPOSITE_FALLBACK 0x80000000 -+#define COMPOSITE_PARTIAL 0x1 -+#define COMPOSITE_UPLOAD 0x40000000 -+#define COMPOSITE_FALLBACK 0x80000000 - - bool (*check_composite_spans)(struct sna *sna, uint8_t op, - PicturePtr dst, PicturePtr src, -@@ -286,6 +291,8 @@ struct sna_render { - #define COPY_LAST 0x1 - #define COPY_SYNC 0x2 - #define COPY_NO_OVERLAP 0x4 -+#define COPY_SMALL 0x8 -+#define COPY_DRI 0x10 - - bool (*copy)(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, -@@ -481,6 +488,7 @@ enum { - - GEN7_WM_KERNEL_VIDEO_PLANAR, - GEN7_WM_KERNEL_VIDEO_PACKED, -+ GEN7_WM_KERNEL_VIDEO_RGB, - GEN7_WM_KERNEL_COUNT - }; - -@@ -533,12 +541,13 @@ enum { - - GEN8_WM_KERNEL_VIDEO_PLANAR, - GEN8_WM_KERNEL_VIDEO_PACKED, -+ GEN8_WM_KERNEL_VIDEO_RGB, - GEN8_WM_KERNEL_COUNT - }; - - struct gen8_render_state { - unsigned gt; -- -+ const struct gt_info *info; - struct kgem_bo *general_bo; - - uint32_t vs_state; -@@ -565,6 +574,58 @@ struct gen8_render_state { - bool emit_flush; - }; - -+enum { -+ GEN9_WM_KERNEL_NOMASK = 0, -+ GEN9_WM_KERNEL_NOMASK_P, -+ -+ GEN9_WM_KERNEL_MASK, -+ GEN9_WM_KERNEL_MASK_P, -+ -+ GEN9_WM_KERNEL_MASKCA, -+ GEN9_WM_KERNEL_MASKCA_P, -+ -+ GEN9_WM_KERNEL_MASKSA, -+ GEN9_WM_KERNEL_MASKSA_P, -+ -+ GEN9_WM_KERNEL_OPACITY, -+ GEN9_WM_KERNEL_OPACITY_P, -+ -+ GEN9_WM_KERNEL_VIDEO_PLANAR, -+ GEN9_WM_KERNEL_VIDEO_PACKED, -+ GEN9_WM_KERNEL_VIDEO_RGB, -+ GEN9_WM_KERNEL_COUNT -+}; -+ -+struct gen9_render_state { -+ unsigned gt; -+ const struct gt_info *info; -+ struct kgem_bo *general_bo; -+ -+ uint32_t vs_state; -+ uint32_t sf_state; -+ uint32_t sf_mask_state; -+ uint32_t wm_state; -+ uint32_t wm_kernel[GEN9_WM_KERNEL_COUNT][3]; -+ -+ uint32_t cc_blend; -+ -+ uint32_t drawrect_offset; -+ uint32_t drawrect_limit; -+ uint32_t blend; -+ uint32_t samplers; -+ uint32_t kernel; -+ -+ uint16_t num_sf_outputs; -+ uint16_t ve_id; -+ uint16_t last_primitive; -+ int16_t floats_per_vertex; -+ uint16_t surface_table; -+ -+ bool needs_invariant; -+ bool emit_flush; -+ bool ve_dirty; -+}; -+ - struct sna_static_stream { - uint32_t size, used; - uint8_t *data; -@@ -620,6 +681,7 @@ const char *gen5_render_init(struct sna *sna, const char *backend); - const char *gen6_render_init(struct sna *sna, const char *backend); - const char *gen7_render_init(struct sna *sna, const char *backend); - const char *gen8_render_init(struct sna *sna, const char *backend); -+const char *gen9_render_init(struct sna *sna, const char *backend); - - void sna_render_mark_wedged(struct sna *sna); - -diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h -index 10fbbfe2..e162e37f 100644 ---- a/src/sna/sna_render_inline.h -+++ b/src/sna/sna_render_inline.h -@@ -304,6 +304,12 @@ color_convert(uint32_t pixel, - return pixel; - } - -+inline static uint32_t -+solid_color(uint32_t format, uint32_t pixel) -+{ -+ return color_convert(pixel, format, PICT_a8r8g8b8); -+} -+ - inline static bool dst_use_gpu(PixmapPtr pixmap) - { - struct sna_pixmap *priv = sna_pixmap(pixmap); -diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c -index 308efc0a..8e2627f7 100644 ---- a/src/sna/sna_tiling.c -+++ b/src/sna/sna_tiling.c -@@ -369,8 +369,7 @@ sna_tiling_composite_spans_boxes(struct sna *sna, - const BoxRec *box, int nbox, float opacity) - { - while (nbox--) -- sna_tiling_composite_spans_box(sna, op->base.priv, box++, opacity); -- (void)sna; -+ sna_tiling_composite_spans_box(sna, op, box++, opacity); - } - - fastcall static void -@@ -581,6 +580,7 @@ sna_tiling_composite_spans(uint32_t op, - tile->rects = tile->rects_embedded; - tile->rect_count = 0; - tile->rect_size = ARRAY_SIZE(tile->rects_embedded); -+ COMPILE_TIME_ASSERT(sizeof(tile->rects_embedded[0]) >= sizeof(struct sna_tile_span)); - - tmp->box = sna_tiling_composite_spans_box; - tmp->boxes = sna_tiling_composite_spans_boxes; -diff --git a/src/sna/sna_trapezoids_boxes.c b/src/sna/sna_trapezoids_boxes.c -index 9900e3f0..bbf83759 100644 ---- a/src/sna/sna_trapezoids_boxes.c -+++ b/src/sna/sna_trapezoids_boxes.c -@@ -198,7 +198,7 @@ composite_aligned_boxes(struct sna *sna, - if (op == PictOpClear && sna->clear) - src = sna->clear; - -- DBG(("%s: clipped extents (%d, %d), (%d, %d); now offset by (%d, %d), orgin (%d, %d)\n", -+ DBG(("%s: clipped extents (%d, %d), (%d, %d); now offset by (%d, %d), origin (%d, %d)\n", - __FUNCTION__, - clip.extents.x1, clip.extents.y1, - clip.extents.x2, clip.extents.y2, -@@ -592,6 +592,8 @@ lerp32_opacity(PixmapPtr scratch, - uint32_t *ptr; - int stride, i; - -+ sigtrap_assert_active(); -+ - ptr = (uint32_t*)((uint8_t *)scratch->devPrivate.ptr + scratch->devKind * y); - ptr += x; - stride = scratch->devKind / 4; -diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c -index 37def2f9..8bc7c8a8 100644 ---- a/src/sna/sna_trapezoids_imprecise.c -+++ b/src/sna/sna_trapezoids_imprecise.c -@@ -962,6 +962,16 @@ tor_add_trapezoid(struct tor *tor, - const xTrapezoid *t, - int dx, int dy) - { -+ if (!xTrapezoidValid(t)) { -+ __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n", -+ __FUNCTION__, -+ t->top, t->bottom, -+ t->left.p1.x, t->left.p1.y, -+ t->left.p2.x, t->left.p2.y, -+ t->right.p1.x, t->right.p1.y, -+ t->right.p2.x, t->right.p2.y)); -+ return; -+ } - polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy); - polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy); - } -@@ -1687,31 +1697,27 @@ struct span_thread { - #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) - struct span_thread_boxes { - const struct sna_composite_spans_op *op; -+ const BoxRec *clip_start, *clip_end; - int num_boxes; - struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; - }; - --static void span_thread_add_boxes(struct sna *sna, void *data, -- const BoxRec *box, int count, float alpha) -+static void span_thread_add_box(struct sna *sna, void *data, -+ const BoxRec *box, float alpha) - { - struct span_thread_boxes *b = data; - -- __DBG(("%s: adding %d boxes with alpha=%f\n", -- __FUNCTION__, count, alpha)); -+ __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha)); - -- assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); -- if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) { -- DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); -- assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); -+ if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) { -+ DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes)); - b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); - b->num_boxes = 0; - } - -- do { -- b->boxes[b->num_boxes].box = *box++; -- b->boxes[b->num_boxes].alpha = alpha; -- b->num_boxes++; -- } while (--count); -+ b->boxes[b->num_boxes].box = *box++; -+ b->boxes[b->num_boxes].alpha = alpha; -+ b->num_boxes++; - assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); - } - -@@ -1722,8 +1728,22 @@ span_thread_box(struct sna *sna, - const BoxRec *box, - int coverage) - { -+ struct span_thread_boxes *b = (struct span_thread_boxes *)op; -+ - __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); -- span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); -+ if (b->num_boxes) { -+ struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1]; -+ if (bb->box.x1 == box->x1 && -+ bb->box.x2 == box->x2 && -+ bb->box.y2 == box->y1 && -+ bb->alpha == AREA_TO_ALPHA(coverage)) { -+ bb->box.y2 = box->y2; -+ __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2)); -+ return; -+ } -+ } -+ -+ span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage)); - } - - static void -@@ -1733,20 +1753,28 @@ span_thread_clipped_box(struct sna *sna, - const BoxRec *box, - int coverage) - { -- pixman_region16_t region; -+ struct span_thread_boxes *b = (struct span_thread_boxes *)op; -+ const BoxRec *c; - - __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, - AREA_TO_ALPHA(coverage))); - -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- if (region_num_rects(®ion)) { -- span_thread_add_boxes(sna, op, -- region_rects(®ion), -- region_num_rects(®ion), -- AREA_TO_ALPHA(coverage)); -+ b->clip_start = -+ find_clip_box_for_y(b->clip_start, b->clip_end, box->y1); -+ -+ c = b->clip_start; -+ while (c != b->clip_end) { -+ BoxRec clipped; -+ -+ if (box->y2 <= c->y1) -+ break; -+ -+ clipped = *box; -+ if (!box_intersect(&clipped, c++)) -+ continue; -+ -+ span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage)); - } -- pixman_region_fini(®ion); - } - - static span_func_t -@@ -1777,6 +1805,16 @@ thread_choose_span(struct sna_composite_spans_op *tmp, - return span; - } - -+inline static void -+span_thread_boxes_init(struct span_thread_boxes *boxes, -+ const struct sna_composite_spans_op *op, -+ const RegionRec *clip) -+{ -+ boxes->op = op; -+ region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end); -+ boxes->num_boxes = 0; -+} -+ - static void - span_thread(void *arg) - { -@@ -1789,8 +1827,7 @@ span_thread(void *arg) - if (!tor_init(&tor, &thread->extents, 2*thread->ntrap)) - return; - -- boxes.op = thread->op; -- boxes.num_boxes = 0; -+ span_thread_boxes_init(&boxes, thread->op, thread->clip); - - y1 = thread->extents.y1 - thread->draw_y; - y2 = thread->extents.y2 - thread->draw_y; -@@ -2190,6 +2227,52 @@ static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v) - } while (--h); - } - -+struct clipped_span { -+ span_func_t span; -+ const BoxRec *clip_start, *clip_end; -+}; -+ -+static void -+tor_blt_clipped(struct sna *sna, -+ struct sna_composite_spans_op *op, -+ pixman_region16_t *clip, -+ const BoxRec *box, -+ int coverage) -+{ -+ struct clipped_span *cs = (struct clipped_span *)clip; -+ const BoxRec *c; -+ -+ cs->clip_start = -+ find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1); -+ -+ c = cs->clip_start; -+ while (c != cs->clip_end) { -+ BoxRec clipped; -+ -+ if (box->y2 <= c->y1) -+ break; -+ -+ clipped = *box; -+ if (!box_intersect(&clipped, c++)) -+ continue; -+ -+ cs->span(sna, op, NULL, &clipped, coverage); -+ } -+} -+ -+inline static span_func_t -+clipped_span(struct clipped_span *cs, -+ span_func_t span, -+ const RegionRec *clip) -+{ -+ if (clip->data) { -+ cs->span = span; -+ region_get_boxes(clip, &cs->clip_start, &cs->clip_end); -+ span = tor_blt_clipped; -+ } -+ return span; -+} -+ - static void - tor_blt_src(struct sna *sna, - struct sna_composite_spans_op *op, -@@ -2203,25 +2286,6 @@ tor_blt_src(struct sna *sna, - } - - static void --tor_blt_src_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_src(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- --static void - tor_blt_in(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, -@@ -2253,25 +2317,6 @@ tor_blt_in(struct sna *sna, - } - - static void --tor_blt_in_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_in(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- --static void - tor_blt_add(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, -@@ -2310,25 +2355,6 @@ tor_blt_add(struct sna *sna, - } - - static void --tor_blt_add_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_add(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- --static void - tor_blt_lerp32(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, -@@ -2343,6 +2369,7 @@ tor_blt_lerp32(struct sna *sna, - if (coverage == 0) - return; - -+ sigtrap_assert_active(); - ptr += box->y1 * stride + box->x1; - - h = box->y2 - box->y1; -@@ -2383,25 +2410,6 @@ tor_blt_lerp32(struct sna *sna, - } - } - --static void --tor_blt_lerp32_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_lerp32(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- - struct pixman_inplace { - pixman_image_t *image, *source, *mask; - uint32_t color; -@@ -2431,24 +2439,6 @@ pixmask_span_solid(struct sna *sna, - pi->dx + box->x1, pi->dy + box->y1, - box->x2 - box->x1, box->y2 - box->y1); - } --static void --pixmask_span_solid__clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- pixmask_span_solid(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} - - static void - pixmask_span(struct sna *sna, -@@ -2471,24 +2461,6 @@ pixmask_span(struct sna *sna, - pi->dx + box->x1, pi->dy + box->y1, - box->x2 - box->x1, box->y2 - box->y1); - } --static void --pixmask_span__clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- pixmask_span(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} - - struct inplace_x8r8g8b8_thread { - xTrapezoid *traps; -@@ -2507,6 +2479,7 @@ static void inplace_x8r8g8b8_thread(void *arg) - struct inplace_x8r8g8b8_thread *thread = arg; - struct tor tor; - span_func_t span; -+ struct clipped_span clipped; - RegionPtr clip; - int y1, y2, n; - -@@ -2537,12 +2510,11 @@ static void inplace_x8r8g8b8_thread(void *arg) - inplace.stride = pixmap->devKind; - inplace.color = thread->color; - -- if (clip->data) -- span = tor_blt_lerp32_clipped; -- else -- span = tor_blt_lerp32; -+ span = clipped_span(&clipped, tor_blt_lerp32, clip); - -- tor_render(NULL, &tor, (void*)&inplace, clip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&inplace, (void*)&clipped, -+ span, false); - } else if (thread->is_solid) { - struct pixman_inplace pi; - -@@ -2555,12 +2527,11 @@ static void inplace_x8r8g8b8_thread(void *arg) - 1, 1, pi.bits, 0); - pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - -- if (clip->data) -- span = pixmask_span_solid__clipped; -- else -- span = pixmask_span_solid; -+ span = clipped_span(&clipped, pixmask_span_solid, clip); - -- tor_render(NULL, &tor, (void*)&pi, clip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void *)&clipped, -+ span, false); - - pixman_image_unref(pi.source); - pixman_image_unref(pi.image); -@@ -2579,12 +2550,11 @@ static void inplace_x8r8g8b8_thread(void *arg) - pi.bits = pixman_image_get_data(pi.mask); - pi.op = thread->op; - -- if (clip->data) -- span = pixmask_span__clipped; -- else -- span = pixmask_span; -+ span = clipped_span(&clipped, pixmask_span, clip); - -- tor_render(NULL, &tor, (void*)&pi, clip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void *)&clipped, -+ span, false); - - pixman_image_unref(pi.mask); - pixman_image_unref(pi.source); -@@ -2698,6 +2668,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - if (num_threads == 1) { - struct tor tor; - span_func_t span; -+ struct clipped_span clipped; - - if (!tor_init(&tor, ®ion.extents, 2*ntrap)) - return true; -@@ -2723,17 +2694,15 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - inplace.stride = pixmap->devKind; - inplace.color = color; - -- if (dst->pCompositeClip->data) -- span = tor_blt_lerp32_clipped; -- else -- span = tor_blt_lerp32; -+ span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip); - - DBG(("%s: render inplace op=%d, color=%08x\n", - __FUNCTION__, op, color)); - - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&inplace, -- dst->pCompositeClip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&inplace, (void*)&clipped, -+ span, false); - sigtrap_put(); - } - } else if (is_solid) { -@@ -2748,15 +2717,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - 1, 1, pi.bits, 0); - pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - -- if (dst->pCompositeClip->data) -- span = pixmask_span_solid__clipped; -- else -- span = pixmask_span_solid; -+ span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip); - - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&pi, -- dst->pCompositeClip, span, -- false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void*)&clipped, -+ span, false); - sigtrap_put(); - } - -@@ -2777,15 +2743,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - pi.bits = pixman_image_get_data(pi.mask); - pi.op = op; - -- if (dst->pCompositeClip->data) -- span = pixmask_span__clipped; -- else -- span = pixmask_span; -+ span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip); - - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&pi, -- dst->pCompositeClip, span, -- false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void*)&clipped, -+ span, false); - sigtrap_put(); - } - -@@ -2847,9 +2810,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - - struct inplace_thread { - xTrapezoid *traps; -- RegionPtr clip; - span_func_t span; - struct inplace inplace; -+ struct clipped_span clipped; - BoxRec extents; - int dx, dy; - int draw_x, draw_y; -@@ -2874,8 +2837,9 @@ static void inplace_thread(void *arg) - tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy); - } - -- tor_render(NULL, &tor, (void*)&thread->inplace, -- thread->clip, thread->span, thread->unbounded); -+ tor_render(NULL, &tor, -+ (void*)&thread->inplace, (void*)&thread->clipped, -+ thread->span, thread->unbounded); - - tor_fini(&tor); - } -@@ -2889,6 +2853,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna, - bool fallback) - { - struct inplace inplace; -+ struct clipped_span clipped; - span_func_t span; - PixmapPtr pixmap; - struct sna_pixmap *priv; -@@ -3005,21 +2970,12 @@ imprecise_trapezoid_span_inplace(struct sna *sna, - region.extents.x2, region.extents.y2)); - - if (op == PictOpSrc) { -- if (dst->pCompositeClip->data) -- span = tor_blt_src_clipped; -- else -- span = tor_blt_src; -+ span = tor_blt_src; - } else if (op == PictOpIn) { -- if (dst->pCompositeClip->data) -- span = tor_blt_in_clipped; -- else -- span = tor_blt_in; -+ span = tor_blt_in; - } else { - assert(op == PictOpAdd); -- if (dst->pCompositeClip->data) -- span = tor_blt_add_clipped; -- else -- span = tor_blt_add; -+ span = tor_blt_add; - } - - DBG(("%s: move-to-cpu\n", __FUNCTION__)); -@@ -3037,6 +2993,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna, - inplace.stride = pixmap->devKind; - inplace.opacity = color >> 24; - -+ span = clipped_span(&clipped, span, dst->pCompositeClip); -+ - num_threads = 1; - if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) - num_threads = sna_use_threads(region.extents.x2 - region.extents.x1, -@@ -3057,8 +3015,9 @@ imprecise_trapezoid_span_inplace(struct sna *sna, - } - - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&inplace, -- dst->pCompositeClip, span, unbounded); -+ tor_render(NULL, &tor, -+ (void*)&inplace, (void *)&clipped, -+ span, unbounded); - sigtrap_put(); - } - -@@ -3075,8 +3034,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna, - threads[0].traps = traps; - threads[0].ntrap = ntrap; - threads[0].inplace = inplace; -+ threads[0].clipped = clipped; - threads[0].extents = region.extents; -- threads[0].clip = dst->pCompositeClip; - threads[0].span = span; - threads[0].unbounded = unbounded; - threads[0].dx = dx; -@@ -3707,8 +3666,7 @@ tristrip_thread(void *arg) - if (!tor_init(&tor, &thread->extents, 2*thread->count)) - return; - -- boxes.op = thread->op; -- boxes.num_boxes = 0; -+ span_thread_boxes_init(&boxes, thread->op, thread->clip); - - cw = 0; ccw = 1; - polygon_add_line(tor.polygon, -@@ -3874,7 +3832,7 @@ imprecise_tristrip_span_converter(struct sna *sna, - break; - } while (1); - polygon_add_line(tor.polygon, -- &points[cw], &points[2+ccw], -+ &points[cw], &points[ccw], - dx, dy); - assert(tor.polygon->num_edges <= 2*count); - -diff --git a/src/sna/sna_trapezoids_mono.c b/src/sna/sna_trapezoids_mono.c -index 808703a9..07a7867d 100644 ---- a/src/sna/sna_trapezoids_mono.c -+++ b/src/sna/sna_trapezoids_mono.c -@@ -72,13 +72,14 @@ struct mono { - struct sna *sna; - struct sna_composite_op op; - pixman_region16_t clip; -+ const BoxRec *clip_start, *clip_end; - - fastcall void (*span)(struct mono *, int, int, BoxPtr); - - struct mono_polygon polygon; - }; - --#define I(x) pixman_fixed_to_int ((x) + pixman_fixed_1_minus_e/2) -+#define I(x) pixman_fixed_to_int((x) + pixman_fixed_1_minus_e/2) - - static struct quorem - floored_muldivrem(int32_t x, int32_t a, int32_t b) -@@ -249,22 +250,22 @@ mono_add_line(struct mono *mono, - - e->dxdy = floored_muldivrem(dx, pixman_fixed_1, dy); - -- e->x = floored_muldivrem((ytop - dst_y) * pixman_fixed_1 + pixman_fixed_1_minus_e/2 - p1->y, -+ e->x = floored_muldivrem((ytop - dst_y) * pixman_fixed_1 + pixman_fixed_1/2 - p1->y, - dx, dy); - e->x.quo += p1->x; - e->x.rem -= dy; - - e->dy = dy; -- -- __DBG(("%s: initial x=%d [%d.%d/%d] + dxdy=%d.%d/%d\n", -- __FUNCTION__, -- I(e->x.quo), e->x.quo, e->x.rem, e->dy, -- e->dxdy.quo, e->dxdy.rem, e->dy)); - } - e->x.quo += dst_x*pixman_fixed_1; -+ __DBG(("%s: initial x=%d [%d.%d/%d] + dxdy=%d.%d/%d\n", -+ __FUNCTION__, -+ I(e->x.quo), e->x.quo, e->x.rem, e->dy, -+ e->dxdy.quo, e->dxdy.rem, e->dy)); - - { - struct mono_edge **ptail = &polygon->y_buckets[ytop - mono->clip.extents.y1]; -+ assert(ytop - mono->clip.extents.y1 < mono->clip.extents.y2 - mono->clip.extents.y1); - if (*ptail) - (*ptail)->prev = e; - e->next = *ptail; -@@ -368,6 +369,10 @@ static struct mono_edge *mono_filter(struct mono_edge *edges) - e->x.rem == n->x.rem && - e->dxdy.quo == n->dxdy.quo && - e->dxdy.rem == n->dxdy.rem) { -+ assert(e->dy == n->dy); -+ __DBG(("%s: discarding cancellation pair (%d.%d) + (%d.%d)\n", -+ __FUNCTION__, e->x.quo, e->x.rem, e->dxdy.quo, e->dxdy.rem)); -+ - if (e->prev) - e->prev->next = n->next; - else -@@ -378,8 +383,11 @@ static struct mono_edge *mono_filter(struct mono_edge *edges) - break; - - e = n->next; -- } else -+ } else { -+ __DBG(("%s: adding edge (%d.%d) + (%d.%d)/%d, height=%d\n", -+ __FUNCTION__, n->x.quo, n->x.rem, n->dxdy.quo, n->dxdy.rem, n->dy, n->height_left)); - e = n; -+ } - } - - return edges; -@@ -474,6 +482,34 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box) - c->op.box(c->sna, &c->op, box); - } - -+fastcall static void -+mono_span__clipped(struct mono *c, int x1, int x2, BoxPtr box) -+{ -+ const BoxRec *b; -+ -+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); -+ -+ c->clip_start = -+ find_clip_box_for_y(c->clip_start, c->clip_end, box->y1); -+ -+ b = c->clip_start; -+ while (b != c->clip_end) { -+ BoxRec clipped; -+ -+ if (box->y2 <= b->y1) -+ break; -+ -+ clipped.x1 = x1; -+ clipped.x2 = x2; -+ clipped.y1 = box->y1; -+ clipped.y2 = box->y2; -+ if (!box_intersect(&clipped, b++)) -+ continue; -+ -+ c->op.box(c->sna, &c->op, &clipped); -+ } -+} -+ - struct mono_span_thread_boxes { - const struct sna_composite_op *op; - #define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec)) -@@ -482,40 +518,45 @@ struct mono_span_thread_boxes { - }; - - inline static void --thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count) -+thread_mono_span_add_box(struct mono *c, const BoxRec *box) - { - struct mono_span_thread_boxes *b = c->op.priv; - -- assert(count > 0 && count <= MONO_SPAN_MAX_BOXES); -- if (unlikely(b->num_boxes + count > MONO_SPAN_MAX_BOXES)) { -+ if (unlikely(b->num_boxes == MONO_SPAN_MAX_BOXES)) { - b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes); - b->num_boxes = 0; - } - -- memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec)); -- b->num_boxes += count; -+ b->boxes[b->num_boxes++] = *box; - assert(b->num_boxes <= MONO_SPAN_MAX_BOXES); - } - - fastcall static void - thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box) - { -- pixman_region16_t region; -+ const BoxRec *b; - - __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); - -- box->x1 = x1; -- box->x2 = x2; -+ c->clip_start = -+ find_clip_box_for_y(c->clip_start, c->clip_end, box->y1); - -- assert(c->clip.data); -+ b = c->clip_start; -+ while (b != c->clip_end) { -+ BoxRec clipped; -+ -+ if (box->y2 <= b->y1) -+ break; -+ -+ clipped.x1 = x1; -+ clipped.x2 = x2; -+ clipped.y1 = box->y1; -+ clipped.y2 = box->y2; -+ if (!box_intersect(&clipped, b++)) -+ continue; - -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, &c->clip); -- if (region_num_rects(®ion)) -- thread_mono_span_add_boxes(c, -- region_rects(®ion), -- region_num_rects(®ion)); -- pixman_region_fini(®ion); -+ thread_mono_span_add_box(c, &clipped); -+ } - } - - fastcall static void -@@ -525,7 +566,7 @@ thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box) - - box->x1 = x1; - box->x2 = x2; -- thread_mono_span_add_boxes(c, box, 1); -+ thread_mono_span_add_box(c, box); - } - - inline static void -@@ -537,6 +578,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) - int winding = 0; - BoxRec box; - -+ __DBG(("%s: y=%d, h=%d\n", __FUNCTION__, y, h)); -+ - DBG_MONO_EDGES(edge); - VALIDATE_MONO_EDGES(&c->head); - -@@ -547,6 +590,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) - struct mono_edge *next = edge->next; - int16_t xend = I(edge->x.quo); - -+ __DBG(("%s: adding edge dir=%d [winding=%d], x=%d [%d]\n", -+ __FUNCTION__, edge->dir, winding + edge->dir, xend, edge->x.quo)); - if (--edge->height_left) { - if (edge->dy) { - edge->x.quo += edge->dxdy.quo; -@@ -555,6 +600,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) - ++edge->x.quo; - edge->x.rem -= edge->dy; - } -+ __DBG(("%s: stepped edge (%d.%d) + (%d.%d)/%d, height=%d, prev_x=%d\n", -+ __FUNCTION__, edge->x.quo, edge->x.rem, edge->dxdy.quo, edge->dxdy.rem, edge->dy, edge->height_left, edge->x.quo)); - } - - if (edge->x.quo < prev_x) { -@@ -578,17 +625,22 @@ mono_row(struct mono *c, int16_t y, int16_t h) - winding += edge->dir; - if (winding == 0) { - assert(I(next->x.quo) >= xend); -- if (I(next->x.quo) > xend + 1) { -+ if (I(next->x.quo) > xend) { -+ __DBG(("%s: end span: %d\n", __FUNCTION__, xend)); - if (xstart < c->clip.extents.x1) - xstart = c->clip.extents.x1; - if (xend > c->clip.extents.x2) - xend = c->clip.extents.x2; -- if (xend > xstart) -+ if (xend > xstart) { -+ __DBG(("%s: emit span [%d, %d]\n", __FUNCTION__, xstart, xend)); - c->span(c, xstart, xend, &box); -+ } - xstart = INT16_MIN; - } -- } else if (xstart == INT16_MIN) -+ } else if (xstart == INT16_MIN) { -+ __DBG(("%s: starting new span: %d\n", __FUNCTION__, xend)); - xstart = xend; -+ } - - edge = next; - } -@@ -650,9 +702,14 @@ mono_render(struct mono *mono) - for (i = 0; i < h; i = j) { - j = i + 1; - -+ __DBG(("%s: row=%d, new edges? %d\n", __FUNCTION__, -+ i, polygon->y_buckets[i] != NULL)); -+ - if (polygon->y_buckets[i]) - mono_merge_edges(mono, polygon->y_buckets[i]); - -+ __DBG(("%s: row=%d, vertical? %d\n", __FUNCTION__, -+ i, mono->is_vertical)); - if (mono->is_vertical) { - struct mono_edge *e = mono->head.next; - int min_height = h - i; -@@ -667,6 +724,7 @@ mono_render(struct mono *mono) - j++; - if (j != i + 1) - mono_step_edges(mono, j - (i + 1)); -+ __DBG(("%s: %d vertical rows\n", __FUNCTION__, j-i)); - } - - mono_row(mono, i, j-i); -@@ -717,6 +775,7 @@ mono_span_thread(void *arg) - if (RegionNil(&mono.clip)) - return; - } -+ region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); - - boxes.op = thread->op; - boxes.num_boxes = 0; -@@ -891,9 +950,12 @@ mono_trapezoids_span_converter(struct sna *sna, - - if (mono.clip.data == NULL && mono.op.damage == NULL) - mono.span = mono_span__fast; -+ else if (mono.clip.data != NULL && mono.op.damage == NULL) -+ mono.span = mono_span__clipped; - else - mono.span = mono_span; - -+ region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); - mono_render(&mono); - mono.op.done(mono.sna, &mono.op); - mono_fini(&mono); -@@ -939,6 +1001,7 @@ mono_trapezoids_span_converter(struct sna *sna, - mono.clip.extents.x2 - mono.clip.extents.x1, - mono.clip.extents.y2 - mono.clip.extents.y1, - COMPOSITE_PARTIAL, memset(&mono.op, 0, sizeof(mono.op)))) { -+ region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); - mono_render(&mono); - mono.op.done(mono.sna, &mono.op); - } -@@ -974,6 +1037,7 @@ mono_inplace_fill_box(struct sna *sna, - box->x2 - box->x1, - box->y2 - box->y1, - fill->color)); -+ sigtrap_assert_active(); - pixman_fill(fill->data, fill->stride, fill->bpp, - box->x1, box->y1, - box->x2 - box->x1, -@@ -995,6 +1059,7 @@ mono_inplace_fill_boxes(struct sna *sna, - box->x2 - box->x1, - box->y2 - box->y1, - fill->color)); -+ sigtrap_assert_active(); - pixman_fill(fill->data, fill->stride, fill->bpp, - box->x1, box->y1, - box->x2 - box->x1, -@@ -1382,10 +1447,13 @@ mono_triangles_span_converter(struct sna *sna, - mono_render(&mono); - mono.op.done(mono.sna, &mono.op); - } -+ mono_fini(&mono); - - if (!was_clear && !operator_is_bounded(op)) { - xPointFixed p1, p2; - -+ DBG(("%s: performing unbounded clear\n", __FUNCTION__)); -+ - if (!mono_init(&mono, 2+3*count)) - return false; - -@@ -1431,7 +1499,6 @@ mono_triangles_span_converter(struct sna *sna, - mono_fini(&mono); - } - -- mono_fini(&mono); - REGION_UNINIT(NULL, &mono.clip); - return true; - } -diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c -index 9187ab48..242b4acb 100644 ---- a/src/sna/sna_trapezoids_precise.c -+++ b/src/sna/sna_trapezoids_precise.c -@@ -1023,6 +1023,16 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges) - static void - tor_add_trapezoid(struct tor *tor, const xTrapezoid *t, int dx, int dy) - { -+ if (!xTrapezoidValid(t)) { -+ __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n", -+ __FUNCTION__, -+ t->top, t->bottom, -+ t->left.p1.x, t->left.p1.y, -+ t->left.p2.x, t->left.p2.y, -+ t->right.p1.x, t->right.p1.y, -+ t->right.p2.x, t->right.p2.y)); -+ return; -+ } - polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy); - polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy); - } -@@ -1635,31 +1645,27 @@ struct span_thread { - #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) - struct span_thread_boxes { - const struct sna_composite_spans_op *op; -+ const BoxRec *clip_start, *clip_end; - int num_boxes; - struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; - }; - --static void span_thread_add_boxes(struct sna *sna, void *data, -- const BoxRec *box, int count, float alpha) -+static void span_thread_add_box(struct sna *sna, void *data, -+ const BoxRec *box, float alpha) - { - struct span_thread_boxes *b = data; - -- __DBG(("%s: adding %d boxes with alpha=%f\n", -- __FUNCTION__, count, alpha)); -+ __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha)); - -- assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); -- if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) { -- DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); -- assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); -+ if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) { -+ DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes)); - b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); - b->num_boxes = 0; - } - -- do { -- b->boxes[b->num_boxes].box = *box++; -- b->boxes[b->num_boxes].alpha = alpha; -- b->num_boxes++; -- } while (--count); -+ b->boxes[b->num_boxes].box = *box++; -+ b->boxes[b->num_boxes].alpha = alpha; -+ b->num_boxes++; - assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); - } - -@@ -1670,8 +1676,22 @@ span_thread_box(struct sna *sna, - const BoxRec *box, - int coverage) - { -+ struct span_thread_boxes *b = (struct span_thread_boxes *)op; -+ - __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); -- span_thread_add_boxes(sna, op, box, 1, AREA_TO_FLOAT(coverage)); -+ if (b->num_boxes) { -+ struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1]; -+ if (bb->box.x1 == box->x1 && -+ bb->box.x2 == box->x2 && -+ bb->box.y2 == box->y1 && -+ bb->alpha == AREA_TO_FLOAT(coverage)) { -+ bb->box.y2 = box->y2; -+ __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2)); -+ return; -+ } -+ } -+ -+ span_thread_add_box(sna, op, box, AREA_TO_FLOAT(coverage)); - } - - static void -@@ -1681,20 +1701,28 @@ span_thread_clipped_box(struct sna *sna, - const BoxRec *box, - int coverage) - { -- pixman_region16_t region; -+ struct span_thread_boxes *b = (struct span_thread_boxes *)op; -+ const BoxRec *c; - - __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, - AREA_TO_FLOAT(coverage))); - -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- if (region_num_rects(®ion)) { -- span_thread_add_boxes(sna, op, -- region_rects(®ion), -- region_num_rects(®ion), -- AREA_TO_FLOAT(coverage)); -+ b->clip_start = -+ find_clip_box_for_y(b->clip_start, b->clip_end, box->y1); -+ -+ c = b->clip_start; -+ while (c != b->clip_end) { -+ BoxRec clipped; -+ -+ if (box->y2 <= c->y1) -+ break; -+ -+ clipped = *box; -+ if (!box_intersect(&clipped, c++)) -+ continue; -+ -+ span_thread_add_box(sna, op, &clipped, AREA_TO_FLOAT(coverage)); - } -- pixman_region_fini(®ion); - } - - static span_func_t -@@ -1712,7 +1740,7 @@ thread_choose_span(struct sna_composite_spans_op *tmp, - - assert(!is_mono(dst, maskFormat)); - assert(tmp->thread_boxes); -- DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL)); -+ DBG(("%s: clipped? %d x %d\n", __FUNCTION__, clip->data != NULL, region_num_rects(clip))); - if (clip->data) - span = span_thread_clipped_box; - else -@@ -1721,6 +1749,17 @@ thread_choose_span(struct sna_composite_spans_op *tmp, - return span; - } - -+inline static void -+span_thread_boxes_init(struct span_thread_boxes *boxes, -+ const struct sna_composite_spans_op *op, -+ const RegionRec *clip) -+{ -+ boxes->op = op; -+ boxes->clip_start = region_rects(clip); -+ boxes->clip_end = boxes->clip_start + region_num_rects(clip); -+ boxes->num_boxes = 0; -+} -+ - static void - span_thread(void *arg) - { -@@ -1733,8 +1772,7 @@ span_thread(void *arg) - if (!tor_init(&tor, &thread->extents, 2*thread->ntrap)) - return; - -- boxes.op = thread->op; -- boxes.num_boxes = 0; -+ span_thread_boxes_init(&boxes, thread->op, thread->clip); - - y1 = thread->extents.y1 - thread->draw_y; - y2 = thread->extents.y2 - thread->draw_y; -@@ -2183,6 +2221,52 @@ static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity) - return opacity == 255 ? coverage : mul_8_8(coverage, opacity); - } - -+struct clipped_span { -+ span_func_t span; -+ const BoxRec *clip_start, *clip_end; -+}; -+ -+static void -+tor_blt_clipped(struct sna *sna, -+ struct sna_composite_spans_op *op, -+ pixman_region16_t *clip, -+ const BoxRec *box, -+ int coverage) -+{ -+ struct clipped_span *cs = (struct clipped_span *)clip; -+ const BoxRec *c; -+ -+ cs->clip_start = -+ find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1); -+ -+ c = cs->clip_start; -+ while (c != cs->clip_end) { -+ BoxRec clipped; -+ -+ if (box->y2 <= c->y1) -+ break; -+ -+ clipped = *box; -+ if (!box_intersect(&clipped, c++)) -+ continue; -+ -+ cs->span(sna, op, NULL, &clipped, coverage); -+ } -+} -+ -+inline static span_func_t -+clipped_span(struct clipped_span *cs, -+ span_func_t span, -+ const RegionRec *clip) -+{ -+ if (clip->data) { -+ cs->span = span; -+ region_get_boxes(clip, &cs->clip_start, &cs->clip_end); -+ span = tor_blt_clipped; -+ } -+ return span; -+} -+ - static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v) - { - uint8_t *ptr = in->ptr; -@@ -2218,25 +2302,6 @@ tor_blt_src(struct sna *sna, - } - - static void --tor_blt_src_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_src(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- --static void - tor_blt_in(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, -@@ -2268,25 +2333,6 @@ tor_blt_in(struct sna *sna, - } - - static void --tor_blt_in_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_in(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- --static void - tor_blt_add(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, -@@ -2325,25 +2371,6 @@ tor_blt_add(struct sna *sna, - } - - static void --tor_blt_add_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_add(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- --static void - tor_blt_lerp32(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, -@@ -2358,6 +2385,7 @@ tor_blt_lerp32(struct sna *sna, - if (coverage == 0) - return; - -+ sigtrap_assert_active(); - ptr += box->y1 * stride + box->x1; - - h = box->y2 - box->y1; -@@ -2396,25 +2424,6 @@ tor_blt_lerp32(struct sna *sna, - } - } - --static void --tor_blt_lerp32_clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- tor_blt_lerp32(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} -- - struct pixman_inplace { - pixman_image_t *image, *source, *mask; - uint32_t color; -@@ -2442,24 +2451,6 @@ pixmask_span_solid(struct sna *sna, - pi->dx + box->x1, pi->dy + box->y1, - box->x2 - box->x1, box->y2 - box->y1); - } --static void --pixmask_span_solid__clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- pixmask_span_solid(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} - - static void - pixmask_span(struct sna *sna, -@@ -2480,24 +2471,6 @@ pixmask_span(struct sna *sna, - pi->dx + box->x1, pi->dy + box->y1, - box->x2 - box->x1, box->y2 - box->y1); - } --static void --pixmask_span__clipped(struct sna *sna, -- struct sna_composite_spans_op *op, -- pixman_region16_t *clip, -- const BoxRec *box, -- int coverage) --{ -- pixman_region16_t region; -- int n; -- -- pixman_region_init_rects(®ion, box, 1); -- RegionIntersect(®ion, ®ion, clip); -- n = region_num_rects(®ion); -- box = region_rects(®ion); -- while (n--) -- pixmask_span(sna, op, NULL, box++, coverage); -- pixman_region_fini(®ion); --} - - struct inplace_x8r8g8b8_thread { - xTrapezoid *traps; -@@ -2516,6 +2489,7 @@ static void inplace_x8r8g8b8_thread(void *arg) - struct inplace_x8r8g8b8_thread *thread = arg; - struct tor tor; - span_func_t span; -+ struct clipped_span clipped; - RegionPtr clip; - int y1, y2, n; - -@@ -2546,12 +2520,11 @@ static void inplace_x8r8g8b8_thread(void *arg) - inplace.stride = pixmap->devKind; - inplace.color = thread->color; - -- if (clip->data) -- span = tor_blt_lerp32_clipped; -- else -- span = tor_blt_lerp32; -+ span = clipped_span(&clipped, tor_blt_lerp32, clip); - -- tor_render(NULL, &tor, (void*)&inplace, clip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&inplace, (void *)&clipped, -+ span, false); - } else if (thread->is_solid) { - struct pixman_inplace pi; - -@@ -2564,10 +2537,7 @@ static void inplace_x8r8g8b8_thread(void *arg) - 1, 1, pi.bits, 0); - pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - -- if (clip->data) -- span = pixmask_span_solid__clipped; -- else -- span = pixmask_span_solid; -+ span = clipped_span(&clipped, pixmask_span_solid, clip); - - tor_render(NULL, &tor, (void*)&pi, clip, span, false); - -@@ -2588,12 +2558,11 @@ static void inplace_x8r8g8b8_thread(void *arg) - pi.bits = pixman_image_get_data(pi.mask); - pi.op = thread->op; - -- if (clip->data) -- span = pixmask_span__clipped; -- else -- span = pixmask_span; -+ span = clipped_span(&clipped, pixmask_span, clip); - -- tor_render(NULL, &tor, (void*)&pi, clip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void *)&clipped, -+ span, false); - - pixman_image_unref(pi.mask); - pixman_image_unref(pi.source); -@@ -2712,6 +2681,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - if (num_threads == 1) { - struct tor tor; - span_func_t span; -+ struct clipped_span clipped; - - if (!tor_init(&tor, ®ion.extents, 2*ntrap)) - return true; -@@ -2737,17 +2707,14 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - inplace.stride = pixmap->devKind; - inplace.color = color; - -- if (dst->pCompositeClip->data) -- span = tor_blt_lerp32_clipped; -- else -- span = tor_blt_lerp32; -- -+ span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip); - DBG(("%s: render inplace op=%d, color=%08x\n", - __FUNCTION__, op, color)); - - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&inplace, -- dst->pCompositeClip, span, false); -+ tor_render(NULL, &tor, -+ (void*)&inplace, (void*)&clipped, -+ span, false); - sigtrap_put(); - } - } else if (is_solid) { -@@ -2762,15 +2729,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - 1, 1, pi.bits, 0); - pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - -- if (dst->pCompositeClip->data) -- span = pixmask_span_solid__clipped; -- else -- span = pixmask_span_solid; -- -+ span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip); - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&pi, -- dst->pCompositeClip, span, -- false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void*)&clipped, -+ span, false); - sigtrap_put(); - } - -@@ -2791,15 +2754,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - pi.bits = pixman_image_get_data(pi.mask); - pi.op = op; - -- if (dst->pCompositeClip->data) -- span = pixmask_span__clipped; -- else -- span = pixmask_span; -- -+ span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip); - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&pi, -- dst->pCompositeClip, span, -- false); -+ tor_render(NULL, &tor, -+ (void*)&pi, (void *)&clipped, -+ span, false); - sigtrap_put(); - } - -@@ -2861,9 +2820,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, - - struct inplace_thread { - xTrapezoid *traps; -- RegionPtr clip; - span_func_t span; - struct inplace inplace; -+ struct clipped_span clipped; - BoxRec extents; - int dx, dy; - int draw_x, draw_y; -@@ -2888,8 +2847,9 @@ static void inplace_thread(void *arg) - tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy); - } - -- tor_render(NULL, &tor, (void*)&thread->inplace, -- thread->clip, thread->span, thread->unbounded); -+ tor_render(NULL, &tor, -+ (void*)&thread->inplace, (void*)&thread->clipped, -+ thread->span, thread->unbounded); - - tor_fini(&tor); - } -@@ -2903,6 +2863,7 @@ precise_trapezoid_span_inplace(struct sna *sna, - bool fallback) - { - struct inplace inplace; -+ struct clipped_span clipped; - span_func_t span; - PixmapPtr pixmap; - struct sna_pixmap *priv; -@@ -3020,21 +2981,12 @@ precise_trapezoid_span_inplace(struct sna *sna, - dst->pCompositeClip->data != NULL)); - - if (op == PictOpSrc) { -- if (dst->pCompositeClip->data) -- span = tor_blt_src_clipped; -- else -- span = tor_blt_src; -+ span = tor_blt_src; - } else if (op == PictOpIn) { -- if (dst->pCompositeClip->data) -- span = tor_blt_in_clipped; -- else -- span = tor_blt_in; -+ span = tor_blt_in; - } else { - assert(op == PictOpAdd); -- if (dst->pCompositeClip->data) -- span = tor_blt_add_clipped; -- else -- span = tor_blt_add; -+ span = tor_blt_add; - } - - DBG(("%s: move-to-cpu(dst)\n", __FUNCTION__)); -@@ -3052,6 +3004,8 @@ precise_trapezoid_span_inplace(struct sna *sna, - inplace.stride = pixmap->devKind; - inplace.opacity = color >> 24; - -+ span = clipped_span(&clipped, span, dst->pCompositeClip); -+ - num_threads = 1; - if (!NO_GPU_THREADS && - (flags & COMPOSITE_SPANS_RECTILINEAR) == 0) -@@ -3074,8 +3028,9 @@ precise_trapezoid_span_inplace(struct sna *sna, - } - - if (sigtrap_get() == 0) { -- tor_render(NULL, &tor, (void*)&inplace, -- dst->pCompositeClip, span, unbounded); -+ tor_render(NULL, &tor, -+ (void*)&inplace, (void *)&clipped, -+ span, unbounded); - sigtrap_put(); - } - -@@ -3093,7 +3048,7 @@ precise_trapezoid_span_inplace(struct sna *sna, - threads[0].ntrap = ntrap; - threads[0].inplace = inplace; - threads[0].extents = region.extents; -- threads[0].clip = dst->pCompositeClip; -+ threads[0].clipped = clipped; - threads[0].span = span; - threads[0].unbounded = unbounded; - threads[0].dx = dx; -@@ -3316,8 +3271,7 @@ tristrip_thread(void *arg) - if (!tor_init(&tor, &thread->extents, 2*thread->count)) - return; - -- boxes.op = thread->op; -- boxes.num_boxes = 0; -+ span_thread_boxes_init(&boxes, thread->op, thread->clip); - - cw = 0; ccw = 1; - polygon_add_line(tor.polygon, -diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c -index ed0e7b31..e2b11c31 100644 ---- a/src/sna/sna_video.c -+++ b/src/sna/sna_video.c -@@ -591,6 +591,72 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */ - return true; - } - -+void sna_video_fill_colorkey(struct sna_video *video, -+ const RegionRec *clip) -+{ -+ struct sna *sna = video->sna; -+ PixmapPtr front = sna->front; -+ struct kgem_bo *bo = __sna_pixmap_get_bo(front); -+ uint8_t *dst, *tmp; -+ int w, width; -+ -+ if (video->AlwaysOnTop || RegionEqual(&video->clip, (RegionPtr)clip)) -+ return; -+ -+ assert(bo); -+ if (!wedged(sna) && -+ sna_blt_fill_boxes(sna, GXcopy, bo, -+ front->drawable.bitsPerPixel, -+ video->color_key, -+ region_rects(clip), -+ region_num_rects(clip))) { -+ RegionCopy(&video->clip, (RegionPtr)clip); -+ return; -+ } -+ -+ dst = kgem_bo_map__gtt(&sna->kgem, bo); -+ if (dst == NULL) -+ return; -+ -+ w = front->drawable.bitsPerPixel/8; -+ width = (clip->extents.x2 - clip->extents.x1) * w; -+ tmp = malloc(width); -+ if (tmp == NULL) -+ return; -+ -+ memcpy(tmp, &video->color_key, w); -+ while (2 * w < width) { -+ memcpy(tmp + w, tmp, w); -+ w *= 2; -+ } -+ if (w < width) -+ memcpy(tmp + w, tmp, width - w); -+ -+ if (sigtrap_get() == 0) { -+ const BoxRec *box = region_rects(clip); -+ int n = region_num_rects(clip); -+ -+ w = front->drawable.bitsPerPixel/8; -+ do { -+ int y = box->y1; -+ uint8_t *row = dst + y*bo->pitch + w*box->x1; -+ -+ width = (box->x2 - box->x1) * w; -+ while (y < box->y2) { -+ memcpy(row, tmp, width); -+ row += bo->pitch; -+ y++; -+ } -+ box++; -+ } while (--n); -+ sigtrap_put(); -+ -+ RegionCopy(&video->clip, (RegionPtr)clip); -+ } -+ -+ free(tmp); -+} -+ - XvAdaptorPtr sna_xv_adaptor_alloc(struct sna *sna) - { - XvAdaptorPtr new_adaptors; -diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h -index f21605fc..39cb725f 100644 ---- a/src/sna/sna_video.h -+++ b/src/sna/sna_video.h -@@ -72,6 +72,8 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE. - struct sna_video { - struct sna *sna; - -+ int idx; /* XXX expose struct plane instead? */ -+ - int brightness; - int contrast; - int saturation; -@@ -193,6 +195,9 @@ bool - sna_video_copy_data(struct sna_video *video, - struct sna_video_frame *frame, - const uint8_t *buf); -+void -+sna_video_fill_colorkey(struct sna_video *video, -+ const RegionRec *clip); - - void sna_video_buffer_fini(struct sna_video *video); - -@@ -210,4 +215,26 @@ sna_window_set_port(WindowPtr window, XvPortPtr port) - ((void **)__get_private(window, sna_window_key))[2] = port; - } - -+static inline int offset_and_clip(int x, int dx) -+{ -+ x += dx; -+ if (x <= 0) -+ return 0; -+ if (x >= MAXSHORT) -+ return MAXSHORT; -+ return x; -+} -+ -+static inline void init_video_region(RegionRec *region, -+ DrawablePtr draw, -+ int drw_x, int drw_y, -+ int drw_w, int drw_h) -+{ -+ region->extents.x1 = offset_and_clip(draw->x, drw_x); -+ region->extents.y1 = offset_and_clip(draw->y, drw_y); -+ region->extents.x2 = offset_and_clip(draw->x, drw_x + drw_w); -+ region->extents.y2 = offset_and_clip(draw->y, drw_y + drw_h); -+ region->data = NULL; -+} -+ - #endif /* SNA_VIDEO_H */ -diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c -index ac81f1a0..9bc5ce40 100644 ---- a/src/sna/sna_video_overlay.c -+++ b/src/sna/sna_video_overlay.c -@@ -130,7 +130,7 @@ static int sna_video_overlay_stop(ddStopVideo_ARGS) - - DBG(("%s()\n", __FUNCTION__)); - -- REGION_EMPTY(scrn->pScreen, &video->clip); -+ REGION_EMPTY(to_screen_from_sna(sna), &video->clip); - - request.flags = 0; - (void)drmIoctl(sna->kgem.fd, -@@ -474,15 +474,13 @@ sna_video_overlay_put_image(ddPutImage_ARGS) - if (src_h >= (drw_h * 8)) - drw_h = src_h / 7; - -- clip.extents.x1 = draw->x + drw_x; -- clip.extents.y1 = draw->y + drw_y; -- clip.extents.x2 = clip.extents.x1 + drw_w; -- clip.extents.y2 = clip.extents.y1 + drw_h; -- clip.data = NULL; -+ init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); - - DBG(("%s: always_on_top=%d\n", __FUNCTION__, video->AlwaysOnTop)); -- if (!video->AlwaysOnTop) -+ if (!video->AlwaysOnTop) { -+ ValidateGC(draw, gc); - RegionIntersect(&clip, &clip, gc->pCompositeClip); -+ } - if (box_empty(&clip.extents)) - goto invisible; - -@@ -551,15 +549,7 @@ sna_video_overlay_put_image(ddPutImage_ARGS) - ret = Success; - if (sna_video_overlay_show - (sna, video, &frame, crtc, &dstBox, src_w, src_h, drw_w, drw_h)) { -- //xf86XVFillKeyHelperDrawable(draw, video->color_key, &clip); -- if (!video->AlwaysOnTop && !RegionEqual(&video->clip, &clip) && -- sna_blt_fill_boxes(sna, GXcopy, -- __sna_pixmap_get_bo(sna->front), -- sna->front->drawable.bitsPerPixel, -- video->color_key, -- region_rects(&clip), -- region_num_rects(&clip))) -- RegionCopy(&video->clip, &clip); -+ sna_video_fill_colorkey(video, &clip); - sna_window_set_port((WindowPtr)draw, port); - } else { - DBG(("%s: failed to show video frame\n", __FUNCTION__)); -diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c -index 92230f97..69bfdfd2 100644 ---- a/src/sna/sna_video_sprite.c -+++ b/src/sna/sna_video_sprite.c -@@ -47,6 +47,8 @@ - #define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ - #define DRM_FORMAT_UYVY fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */ - -+#define has_hw_scaling(sna) ((sna)->kgem.gen < 071) -+ - #define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) - struct local_mode_set_plane { - uint32_t plane_id; -@@ -81,19 +83,17 @@ static int sna_video_sprite_stop(ddStopVideo_ARGS) - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(video->sna->scrn); - int i; - -- for (i = 0; i < config->num_crtc; i++) { -+ for (i = 0; i < video->sna->mode.num_real_crtc; i++) { - xf86CrtcPtr crtc = config->crtc[i]; - int pipe; - -- if (sna_crtc_id(crtc) == 0) -- break; -- -- pipe = sna_crtc_to_pipe(crtc); -+ pipe = sna_crtc_pipe(crtc); -+ assert(pipe < ARRAY_SIZE(video->bo)); - if (video->bo[pipe] == NULL) - continue; - - memset(&s, 0, sizeof(s)); -- s.plane_id = sna_crtc_to_sprite(crtc); -+ s.plane_id = sna_crtc_to_sprite(crtc, video->idx); - if (drmIoctl(video->sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) - xf86DrvMsg(video->sna->scrn->scrnIndex, X_ERROR, - "failed to disable plane\n"); -@@ -153,7 +153,7 @@ static int sna_video_sprite_best_size(ddQueryBestSize_ARGS) - struct sna_video *video = port->devPriv.ptr; - struct sna *sna = video->sna; - -- if (sna->kgem.gen >= 075) { -+ if (!has_hw_scaling(sna) && !sna->render.video) { - *p_w = vid_w; - *p_h = vid_h; - } else { -@@ -221,12 +221,12 @@ sna_video_sprite_show(struct sna *sna, - BoxPtr dstBox) - { - struct local_mode_set_plane s; -- int pipe = sna_crtc_to_pipe(crtc); -+ int pipe = sna_crtc_pipe(crtc); - - /* XXX handle video spanning multiple CRTC */ - - VG_CLEAR(s); -- s.plane_id = sna_crtc_to_sprite(crtc); -+ s.plane_id = sna_crtc_to_sprite(crtc, video->idx); - - #define DRM_I915_SET_SPRITE_COLORKEY 0x2b - #define LOCAL_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct local_intel_sprite_colorkey) -@@ -263,9 +263,6 @@ sna_video_sprite_show(struct sna *sna, - video->color_key_changed &= ~(1 << pipe); - } - -- if (video->bo[pipe] == frame->bo) -- return true; -- - update_dst_box_to_crtc_coords(sna, crtc, dstBox); - if (frame->rotation & (RR_Rotate_90 | RR_Rotate_270)) { - int tmp = frame->width; -@@ -283,15 +280,30 @@ sna_video_sprite_show(struct sna *sna, - uint32_t handles[4]; - uint32_t pitches[4]; /* pitch for each plane */ - uint32_t offsets[4]; /* offset of each plane */ -+ uint64_t modifiers[4]; - } f; - bool purged = true; - - memset(&f, 0, sizeof(f)); - f.width = frame->width; - f.height = frame->height; -+ f.flags = 1 << 1; /* +modifiers */ - f.handles[0] = frame->bo->handle; - f.pitches[0] = frame->pitch[0]; - -+ switch (frame->bo->tiling) { -+ case I915_TILING_NONE: -+ break; -+ case I915_TILING_X: -+ /* I915_FORMAT_MOD_X_TILED */ -+ f.modifiers[0] = (uint64_t)1 << 56 | 1; -+ break; -+ case I915_TILING_Y: -+ /* I915_FORMAT_MOD_X_TILED */ -+ f.modifiers[0] = (uint64_t)1 << 56 | 2; -+ break; -+ } -+ - switch (frame->id) { - case FOURCC_RGB565: - f.pixel_format = DRM_FORMAT_RGB565; -@@ -360,7 +372,7 @@ sna_video_sprite_show(struct sna *sna, - return false; - } - -- frame->bo->domain = DOMAIN_NONE; -+ __kgem_bo_clear_dirty(frame->bo); - - if (video->bo[pipe]) - kgem_bo_destroy(&sna->kgem, video->bo[pipe]); -@@ -374,17 +386,17 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) - struct sna *sna = video->sna; - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - RegionRec clip; -+ BoxRec draw_extents; - int ret, i; - -- clip.extents.x1 = draw->x + drw_x; -- clip.extents.y1 = draw->y + drw_y; -- clip.extents.x2 = clip.extents.x1 + drw_w; -- clip.extents.y2 = clip.extents.y1 + drw_h; -- clip.data = NULL; -+ init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); -+ draw_extents = clip.extents; - - DBG(("%s: always_on_top=%d\n", __FUNCTION__, video->AlwaysOnTop)); -- if (!video->AlwaysOnTop) -+ if (!video->AlwaysOnTop) { -+ ValidateGC(draw, gc); - RegionIntersect(&clip, &clip, gc->pCompositeClip); -+ } - - DBG(("%s: src=(%d, %d),(%d, %d), dst=(%d, %d),(%d, %d), id=%d, sizep=%dx%d, sync?=%d\n", - __FUNCTION__, -@@ -402,19 +414,17 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) - goto err; - } - -- for (i = 0; i < config->num_crtc; i++) { -+ for (i = 0; i < video->sna->mode.num_real_crtc; i++) { - xf86CrtcPtr crtc = config->crtc[i]; - struct sna_video_frame frame; -+ BoxRec dst = draw_extents; - int pipe; - INT32 x1, x2, y1, y2; -- BoxRec dst; - RegionRec reg; - Rotation rotation; -+ bool cache_bo; - -- if (sna_crtc_id(crtc) == 0) -- break; -- -- pipe = sna_crtc_to_pipe(crtc); -+ pipe = sna_crtc_pipe(crtc); - - sna_video_frame_init(video, format->id, width, height, &frame); - -@@ -423,10 +433,11 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) - RegionIntersect(®, ®, &clip); - if (RegionNil(®)) { - off: -+ assert(pipe < ARRAY_SIZE(video->bo)); - if (video->bo[pipe]) { - struct local_mode_set_plane s; - memset(&s, 0, sizeof(s)); -- s.plane_id = sna_crtc_to_sprite(crtc); -+ s.plane_id = sna_crtc_to_sprite(crtc, video->idx); - if (drmIoctl(video->sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) - xf86DrvMsg(video->sna->scrn->scrnIndex, X_ERROR, - "failed to disable plane\n"); -@@ -440,8 +451,6 @@ off: - y1 = src_y; - y2 = src_y + src_h; - -- dst = clip.extents; -- - ret = xf86XVClipVideoHelper(&dst, &x1, &x2, &y1, &y2, - ®, frame.width, frame.height); - RegionUninit(®); -@@ -465,8 +474,8 @@ off: - - /* if sprite can't handle rotation natively, store it for the copy func */ - rotation = RR_Rotate_0; -- if (!sna_crtc_set_sprite_rotation(crtc, crtc->rotation)) { -- sna_crtc_set_sprite_rotation(crtc, RR_Rotate_0); -+ if (!sna_crtc_set_sprite_rotation(crtc, video->idx, crtc->rotation)) { -+ sna_crtc_set_sprite_rotation(crtc, video->idx, RR_Rotate_0); - rotation = crtc->rotation; - } - sna_video_frame_set_rotation(video, &frame, rotation); -@@ -496,6 +505,8 @@ off: - frame.image.y1 = 0; - frame.image.x2 = frame.width; - frame.image.y2 = frame.height; -+ -+ cache_bo = false; - } else { - frame.bo = sna_video_buffer(video, &frame); - if (frame.bo == NULL) { -@@ -509,6 +520,60 @@ off: - ret = BadAlloc; - goto err; - } -+ -+ cache_bo = true; -+ } -+ -+ if (!has_hw_scaling(sna) && sna->render.video && -+ !((frame.src.x2 - frame.src.x1) == (dst.x2 - dst.x1) && -+ (frame.src.y2 - frame.src.y1) == (dst.y2 - dst.y1))) { -+ ScreenPtr screen = to_screen_from_sna(sna); -+ PixmapPtr scaled; -+ RegionRec r; -+ -+ r.extents.x1 = r.extents.y1 = 0; -+ r.extents.x2 = dst.x2 - dst.x1; -+ r.extents.y2 = dst.y2 - dst.y1; -+ r.data = NULL; -+ -+ DBG(("%s: scaling from (%d, %d) to (%d, %d)\n", -+ __FUNCTION__, -+ frame.src.x2 - frame.src.x1, -+ frame.src.y2 - frame.src.y1, -+ r.extents.x2, r.extents.y2)); -+ -+ scaled = screen->CreatePixmap(screen, -+ r.extents.x2, -+ r.extents.y2, -+ 24, -+ CREATE_PIXMAP_USAGE_SCRATCH); -+ if (scaled == NULL) { -+ ret = BadAlloc; -+ goto err; -+ } -+ -+ if (!sna->render.video(sna, video, &frame, &r, scaled)) { -+ screen->DestroyPixmap(scaled); -+ ret = BadAlloc; -+ goto err; -+ } -+ -+ if (cache_bo) -+ sna_video_buffer_fini(video); -+ else -+ kgem_bo_destroy(&sna->kgem, frame.bo); -+ -+ frame.bo = kgem_bo_reference(__sna_pixmap_get_bo(scaled)); -+ kgem_bo_submit(&sna->kgem, frame.bo); -+ -+ frame.id = FOURCC_RGB888; -+ frame.src = frame.image = r.extents; -+ frame.width = frame.image.x2; -+ frame.height = frame.image.y2; -+ frame.pitch[0] = frame.bo->pitch; -+ -+ screen->DestroyPixmap(scaled); -+ cache_bo = false; - } - - ret = Success; -@@ -517,24 +582,16 @@ off: - ret = BadAlloc; - } - -- frame.bo->domain = DOMAIN_NONE; -- if (xvmc_passthrough(format->id)) -- kgem_bo_destroy(&sna->kgem, frame.bo); -- else -+ if (cache_bo) - sna_video_buffer_fini(video); -+ else -+ kgem_bo_destroy(&sna->kgem, frame.bo); - - if (ret != Success) - goto err; - } - -- if (!video->AlwaysOnTop && !RegionEqual(&video->clip, &clip) && -- sna_blt_fill_boxes(sna, GXcopy, -- __sna_pixmap_get_bo(sna->front), -- sna->front->drawable.bitsPerPixel, -- video->color_key, -- region_rects(&clip), -- region_num_rects(&clip))) -- RegionCopy(&video->clip, &clip); -+ sna_video_fill_colorkey(video, &clip); - sna_window_set_port((WindowPtr)draw, port); - - return Success; -@@ -606,25 +663,28 @@ static int sna_video_sprite_color_key(struct sna *sna) - return color_key & ((1 << scrn->depth) - 1); - } - --static bool sna_video_has_sprites(struct sna *sna) -+static int sna_video_has_sprites(struct sna *sna) - { - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); -+ unsigned min; - int i; - - DBG(("%s: num_crtc=%d\n", __FUNCTION__, sna->mode.num_real_crtc)); - - if (sna->mode.num_real_crtc == 0) -- return false; -+ return 0; - -+ min = -1; - for (i = 0; i < sna->mode.num_real_crtc; i++) { -- if (!sna_crtc_to_sprite(config->crtc[i])) { -- DBG(("%s: no sprite found on pipe %d\n", __FUNCTION__, sna_crtc_to_pipe(config->crtc[i]))); -- return false; -- } -+ unsigned count = sna_crtc_count_sprites(config->crtc[i]); -+ DBG(("%s: %d sprites found on pipe %d\n", __FUNCTION__, -+ count, sna_crtc_pipe(config->crtc[i]))); -+ if (count < min) -+ min = count; - } - -- DBG(("%s: yes\n", __FUNCTION__)); -- return true; -+ DBG(("%s: min=%d\n", __FUNCTION__, min)); -+ return min; - } - - void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) -@@ -632,16 +692,18 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) - XvAdaptorPtr adaptor; - struct sna_video *video; - XvPortPtr port; -+ int count, i; - -- if (!sna_video_has_sprites(sna)) -+ count = sna_video_has_sprites(sna); -+ if (!count) - return; - - adaptor = sna_xv_adaptor_alloc(sna); - if (!adaptor) - return; - -- video = calloc(1, sizeof(*video)); -- port = calloc(1, sizeof(*port)); -+ video = calloc(count, sizeof(*video)); -+ port = calloc(count, sizeof(*port)); - if (video == NULL || port == NULL) { - free(video); - free(port); -@@ -686,36 +748,43 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) - adaptor->ddPutImage = sna_video_sprite_put_image; - adaptor->ddQueryImageAttributes = sna_video_sprite_query; - -- adaptor->nPorts = 1; -+ adaptor->nPorts = count; - adaptor->pPorts = port; - -- adaptor->base_id = port->id = FakeClientID(0); -- AddResource(port->id, XvGetRTPort(), port); -- port->pAdaptor = adaptor; -- port->pNotify = NULL; -- port->pDraw = NULL; -- port->client = NULL; -- port->grab.client = NULL; -- port->time = currentTime; -- port->devPriv.ptr = video; -- -- video->sna = sna; -- video->alignment = 64; -- video->color_key = sna_video_sprite_color_key(sna); -- video->color_key_changed = ~0; -- video->has_color_key = true; -- video->brightness = -19; /* (255/219) * -16 */ -- video->contrast = 75; /* 255/219 * 64 */ -- video->saturation = 146; /* 128/112 * 128 */ -- video->desired_crtc = NULL; -- video->gamma5 = 0xc0c0c0; -- video->gamma4 = 0x808080; -- video->gamma3 = 0x404040; -- video->gamma2 = 0x202020; -- video->gamma1 = 0x101010; -- video->gamma0 = 0x080808; -- RegionNull(&video->clip); -- video->SyncToVblank = 1; -+ for (i = 0; i < count; i++) { -+ port->id = FakeClientID(0); -+ AddResource(port->id, XvGetRTPort(), port); -+ port->pAdaptor = adaptor; -+ port->pNotify = NULL; -+ port->pDraw = NULL; -+ port->client = NULL; -+ port->grab.client = NULL; -+ port->time = currentTime; -+ port->devPriv.ptr = video; -+ -+ video->sna = sna; -+ video->idx = i; -+ video->alignment = 64; -+ video->color_key = sna_video_sprite_color_key(sna); -+ video->color_key_changed = ~0; -+ video->has_color_key = true; -+ video->brightness = -19; /* (255/219) * -16 */ -+ video->contrast = 75; /* 255/219 * 64 */ -+ video->saturation = 146; /* 128/112 * 128 */ -+ video->desired_crtc = NULL; -+ video->gamma5 = 0xc0c0c0; -+ video->gamma4 = 0x808080; -+ video->gamma3 = 0x404040; -+ video->gamma2 = 0x202020; -+ video->gamma1 = 0x101010; -+ video->gamma0 = 0x080808; -+ RegionNull(&video->clip); -+ video->SyncToVblank = 1; -+ -+ port++; -+ video++; -+ } -+ adaptor->base_id = adaptor->pPorts[0].id; - - xvColorKey = MAKE_ATOM("XV_COLORKEY"); - xvAlwaysOnTop = MAKE_ATOM("XV_ALWAYS_ON_TOP"); -diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c -index 95011939..3cce5cf1 100644 ---- a/src/sna/sna_video_textured.c -+++ b/src/sna/sna_video_textured.c -@@ -48,7 +48,12 @@ static const XvAttributeRec Attributes[] = { - //{XvSettable | XvGettable, 0, 255, (char *)"XV_CONTRAST"}, - }; - --static const XvImageRec Images[] = { -+static const XvImageRec gen2_Images[] = { -+ XVIMAGE_YUY2, -+ XVIMAGE_UYVY, -+}; -+ -+static const XvImageRec gen3_Images[] = { - XVIMAGE_YUY2, - XVIMAGE_YV12, - XVIMAGE_I420, -@@ -149,15 +154,16 @@ sna_video_textured_put_image(ddPutImage_ARGS) - BoxRec dstBox; - RegionRec clip; - xf86CrtcPtr crtc; -+ int16_t dx, dy; - bool flush = false; - bool ret; - -- clip.extents.x1 = draw->x + drw_x; -- clip.extents.y1 = draw->y + drw_y; -- clip.extents.x2 = clip.extents.x1 + drw_w; -- clip.extents.y2 = clip.extents.y1 + drw_h; -- clip.data = NULL; -+ if (wedged(sna)) -+ return BadAlloc; - -+ init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); -+ -+ ValidateGC(draw, gc); - RegionIntersect(&clip, &clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) - return Success; -@@ -181,6 +187,9 @@ sna_video_textured_put_image(ddPutImage_ARGS) - &clip)) - return Success; - -+ if (get_drawable_deltas(draw, pixmap, &dx, &dy)) -+ RegionTranslate(&clip, dx, dy); -+ - flags = MOVE_WRITE | __MOVE_FORCE; - if (clip.data) - flags |= MOVE_READ; -@@ -234,7 +243,7 @@ sna_video_textured_put_image(ddPutImage_ARGS) - DBG(("%s: failed to render video\n", __FUNCTION__)); - ret = BadAlloc; - } else -- DamageDamageRegion(draw, &clip); -+ DamageDamageRegion(&pixmap->drawable, &clip); - - kgem_bo_destroy(&sna->kgem, frame.bo); - -@@ -316,7 +325,7 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen) - - if (!sna->render.video) { - xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, -- "Textured video not supported on this hardware\n"); -+ "Textured video not supported on this hardware or backend\n"); - return; - } - -@@ -362,8 +371,13 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen) - ARRAY_SIZE(Formats)); - adaptor->nAttributes = ARRAY_SIZE(Attributes); - adaptor->pAttributes = (XvAttributeRec *)Attributes; -- adaptor->nImages = ARRAY_SIZE(Images); -- adaptor->pImages = (XvImageRec *)Images; -+ if (sna->kgem.gen < 030) { -+ adaptor->nImages = ARRAY_SIZE(gen2_Images); -+ adaptor->pImages = (XvImageRec *)gen2_Images; -+ } else { -+ adaptor->nImages = ARRAY_SIZE(gen3_Images); -+ adaptor->pImages = (XvImageRec *)gen3_Images; -+ } - #if XORG_XV_VERSION < 2 - adaptor->ddAllocatePort = sna_xv_alloc_port; - adaptor->ddFreePort = sna_xv_free_port; -diff --git a/src/sna/xassert.h b/src/sna/xassert.h -index 1bcfd080..e648e4bc 100644 ---- a/src/sna/xassert.h -+++ b/src/sna/xassert.h -@@ -43,6 +43,28 @@ - xorg_backtrace(); \ - FatalError("%s:%d assertion '%s' failed\n", __func__, __LINE__, #E); \ - } while (0) -+ -+#define warn_unless(E) \ -+({ \ -+ bool fail = !(E); \ -+ if (unlikely(fail)) { \ -+ static int __warn_once__; \ -+ if (!__warn_once__) { \ -+ xorg_backtrace(); \ -+ ErrorF("%s:%d assertion '%s' failed\n", __func__, __LINE__, #E); \ -+ __warn_once__ = 1; \ -+ } \ -+ } \ -+ unlikely(fail); \ -+}) -+ -+#define dbg(EXPR) EXPR -+ -+#else -+ -+#define warn_unless(E) ({ bool fail = !(E); unlikely(fail); }) -+#define dbg(EXPR) -+ - #endif - - #endif /* __XASSERT_H__ */ -diff --git a/src/uxa/i830_reg.h b/src/uxa/i830_reg.h -index d8306bcd..ba39d82c 100644 ---- a/src/uxa/i830_reg.h -+++ b/src/uxa/i830_reg.h -@@ -65,6 +65,12 @@ - #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) - #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) - -+#define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2)) -+ -+#define BCS_SWCTRL 0x22200 -+# define BCS_SWCTRL_SRC_Y (1 << 0) -+# define BCS_SWCTRL_DST_Y (1 << 1) -+ - /* BLT commands */ - #define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) - #define COLOR_BLT_WRITE_ALPHA (1<<21) -diff --git a/src/uxa/i965_video.c b/src/uxa/i965_video.c -index 68e6fd38..438ab909 100644 ---- a/src/uxa/i965_video.c -+++ b/src/uxa/i965_video.c -@@ -37,7 +37,6 @@ - #include "fourcc.h" - - #include "intel.h" --#include "intel_xvmc.h" - #include "intel_uxa.h" - #include "i830_reg.h" - #include "i965_reg.h" -diff --git a/src/uxa/intel.h b/src/uxa/intel.h -index 1b7e5339..a5e77af4 100644 ---- a/src/uxa/intel.h -+++ b/src/uxa/intel.h -@@ -121,7 +121,6 @@ typedef struct intel_screen_private { - - void *modes; - drm_intel_bo *front_buffer, *back_buffer; -- unsigned int back_name; - long front_pitch, front_tiling; - - dri_bufmgr *bufmgr; -@@ -169,6 +168,7 @@ typedef struct intel_screen_private { - const struct intel_device_info *info; - - unsigned int BR[20]; -+ unsigned int BR_tiling[2]; - - CloseScreenProcPtr CloseScreen; - -@@ -196,7 +196,9 @@ typedef struct intel_screen_private { - - int colorKey; - XF86VideoAdaptorPtr adaptor; -+#if !HAVE_NOTIFY_FD - ScreenBlockHandlerProcPtr BlockHandler; -+#endif - Bool overlayOn; - - struct { -@@ -285,8 +287,6 @@ typedef struct intel_screen_private { - Bool has_kernel_flush; - Bool needs_flush; - -- struct _DRI2FrameEvent *pending_flip[MAX_PIPES]; -- - /* Broken-out options. */ - OptionInfoPtr Options; - -@@ -368,6 +368,7 @@ typedef void (*intel_drm_abort_proc)(ScrnInfoPtr scrn, - - extern uint32_t intel_drm_queue_alloc(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data, intel_drm_handler_proc handler, intel_drm_abort_proc abort); - extern void intel_drm_abort(ScrnInfoPtr scrn, Bool (*match)(void *data, void *match_data), void *match_data); -+extern void intel_drm_abort_seq(ScrnInfoPtr scrn, uint32_t seq); - - extern int intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, xf86CrtcPtr crtc); - extern int intel_crtc_id(xf86CrtcPtr crtc); -@@ -408,7 +409,6 @@ typedef struct _DRI2FrameEvent { - ClientPtr client; - enum DRI2FrameEventType type; - int frame; -- int pipe; - - struct list drawable_resource, client_resource; - -@@ -418,7 +418,12 @@ typedef struct _DRI2FrameEvent { - DRI2BufferPtr front; - DRI2BufferPtr back; - -- struct _DRI2FrameEvent *chain; -+ /* current scanout for triple buffer */ -+ int old_width; -+ int old_height; -+ int old_pitch; -+ int old_tiling; -+ dri_bo *old_buffer; - } DRI2FrameEventRec, *DRI2FrameEventPtr; - - extern Bool intel_do_pageflip(intel_screen_private *intel, -@@ -456,10 +461,6 @@ extern xf86CrtcPtr intel_covering_crtc(ScrnInfoPtr scrn, BoxPtr box, - - Bool I830DRI2ScreenInit(ScreenPtr pScreen); - void I830DRI2CloseScreen(ScreenPtr pScreen); --void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, -- unsigned int tv_usec, DRI2FrameEventPtr flip_info); --void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, -- unsigned int tv_usec, DRI2FrameEventPtr flip_info); - - /* intel_dri3.c */ - Bool intel_dri3_screen_init(ScreenPtr screen); -diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c -index a29e4434..114c6026 100644 ---- a/src/uxa/intel_batchbuffer.c -+++ b/src/uxa/intel_batchbuffer.c -@@ -245,6 +245,17 @@ void intel_batch_submit(ScrnInfoPtr scrn) - if (intel->batch_used == 0) - return; - -+ if (intel->current_batch == I915_EXEC_BLT && -+ INTEL_INFO(intel)->gen >= 060) { -+ OUT_BATCH(MI_FLUSH_DW); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(0); -+ OUT_BATCH(MI_LOAD_REGISTER_IMM); -+ OUT_BATCH(BCS_SWCTRL); -+ OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16); -+ } -+ - /* Mark the end of the batchbuffer. */ - OUT_BATCH(MI_BATCH_BUFFER_END); - /* Emit a padding dword if we aren't going to be quad-word aligned. */ -diff --git a/src/uxa/intel_batchbuffer.h b/src/uxa/intel_batchbuffer.h -index e5fb8d08..e71ffd19 100644 ---- a/src/uxa/intel_batchbuffer.h -+++ b/src/uxa/intel_batchbuffer.h -@@ -30,7 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - #ifndef _INTEL_BATCHBUFFER_H - #define _INTEL_BATCHBUFFER_H - --#define BATCH_RESERVED 16 -+#define BATCH_RESERVED 64 - - - void intel_batch_init(ScrnInfoPtr scrn); -@@ -202,6 +202,23 @@ do { \ - - #define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) - #define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) -+#define BEGIN_BATCH_BLT_TILED(n) \ -+do { \ -+ if (INTEL_INFO(intel)->gen < 060) { \ -+ __BEGIN_BATCH(n, BLT_BATCH); \ -+ } else { \ -+ __BEGIN_BATCH(n+7, BLT_BATCH); \ -+ OUT_BATCH(MI_FLUSH_DW); \ -+ OUT_BATCH(0); \ -+ OUT_BATCH(0); \ -+ OUT_BATCH(0); \ -+ OUT_BATCH(MI_LOAD_REGISTER_IMM); \ -+ OUT_BATCH(BCS_SWCTRL); \ -+ OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | \ -+ ((intel->BR_tiling[0] == I915_TILING_Y) ? BCS_SWCTRL_DST_Y : 0) | \ -+ ((intel->BR_tiling[1] == I915_TILING_Y) ? BCS_SWCTRL_SRC_Y : 0)); \ -+ } \ -+} while (0) - - #define ADVANCE_BATCH() do { \ - if (intel->batch_emitting == 0) \ -diff --git a/src/uxa/intel_display.c b/src/uxa/intel_display.c -index 7b4d4e0c..809cda1d 100644 ---- a/src/uxa/intel_display.c -+++ b/src/uxa/intel_display.c -@@ -89,11 +89,11 @@ struct intel_mode { - struct list outputs; - struct list crtcs; - -- void *pageflip_data; -- intel_pageflip_handler_proc pageflip_handler; -- intel_pageflip_abort_proc pageflip_abort; -- -- Bool delete_dp_12_displays; -+ struct { -+ intel_pageflip_handler_proc handler; -+ intel_pageflip_abort_proc abort; -+ void *data; -+ } pageflip; - }; - - struct intel_pageflip { -@@ -114,7 +114,6 @@ struct intel_crtc { - struct list link; - PixmapPtr scanout_pixmap; - uint32_t scanout_fb_id; -- int32_t vblank_offset; - uint32_t msc_prev; - uint64_t msc_high; - }; -@@ -193,7 +192,7 @@ intel_output_backlight_init(xf86OutputPtr output) - - str = xf86GetOptValString(intel->Options, OPTION_BACKLIGHT); - if (str != NULL) { -- if (backlight_exists(str) != BL_NONE) { -+ if (backlight_exists(str)) { - intel_output->backlight_active_level = - backlight_open(&intel_output->backlight, - strdup(str)); -@@ -689,9 +688,11 @@ intel_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) - } - - bo = intel_get_pixmap_bo(ppix); -- if (intel->front_buffer) { -- ErrorF("have front buffer\n"); -- } -+ if (!bo) -+ return FALSE; -+ -+ if (intel->front_buffer) -+ return FALSE; - - drm_intel_bo_disable_reuse(bo); - -@@ -867,6 +868,48 @@ intel_output_attach_edid(xf86OutputPtr output) - xf86OutputSetEDID(output, mon); - } - -+static void -+intel_output_attach_tile(xf86OutputPtr output) -+{ -+#if XF86_OUTPUT_VERSION >= 3 -+ struct intel_output *intel_output = output->driver_private; -+ drmModeConnectorPtr koutput = intel_output->mode_output; -+ struct intel_mode *mode = intel_output->mode; -+ drmModePropertyBlobPtr blob = NULL; -+ struct xf86CrtcTileInfo tile_info, *set = NULL; -+ int i; -+ -+ for (i = 0; koutput && i < koutput->count_props; i++) { -+ drmModePropertyPtr props; -+ -+ props = drmModeGetProperty(mode->fd, koutput->props[i]); -+ if (!props) -+ continue; -+ -+ if (!(props->flags & DRM_MODE_PROP_BLOB)) { -+ drmModeFreeProperty(props); -+ continue; -+ } -+ -+ if (!strcmp(props->name, "TILE")) { -+ blob = drmModeGetPropertyBlob(mode->fd, -+ koutput->prop_values[i]); -+ } -+ drmModeFreeProperty(props); -+ } -+ -+ if (blob) { -+ if (xf86OutputParseKMSTile(blob->data, -+ blob->length, -+ &tile_info)) -+ set = &tile_info; -+ drmModeFreePropertyBlob(blob); -+ } -+ -+ xf86OutputSetTile(output, set); -+#endif -+} -+ - static DisplayModePtr - intel_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) - { -@@ -922,6 +965,7 @@ intel_output_get_modes(xf86OutputPtr output) - int i; - - intel_output_attach_edid(output); -+ intel_output_attach_tile(output); - - if (!koutput) - return Modes; -@@ -1492,6 +1536,7 @@ intel_output_init(ScrnInfoPtr scrn, struct intel_mode *mode, drmModeResPtr mode_ - intel_output = output->driver_private; - intel_output->output_id = mode_res->connectors[num]; - intel_output->mode_output = koutput; -+ RROutputChanged(output->randr_output, TRUE); - return; - } - } -@@ -1650,9 +1695,6 @@ intel_pageflip_abort(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data); - static void - intel_pageflip_complete(struct intel_mode *mode); - --static void --intel_drm_abort_seq (ScrnInfoPtr scrn, uint32_t seq); -- - Bool - intel_do_pageflip(intel_screen_private *intel, - dri_bo *new_front, -@@ -1671,23 +1713,30 @@ intel_do_pageflip(intel_screen_private *intel, - uint32_t new_fb_id; - uint32_t flags; - uint32_t seq; -+ int err = 0; - int i; - - /* -+ * We only have a single length queue in the kernel, so any -+ * attempts to schedule a second flip before processing the first -+ * is a bug. Punt it back to the caller. -+ */ -+ if (mode->flip_count) -+ return FALSE; -+ -+ /* - * Create a new handle for the back buffer - */ - if (drmModeAddFB(mode->fd, scrn->virtualX, scrn->virtualY, - scrn->depth, scrn->bitsPerPixel, pitch, -- new_front->handle, &new_fb_id)) -+ new_front->handle, &new_fb_id)) { -+ err = errno; - goto error_out; -+ } - - drm_intel_bo_disable_reuse(new_front); - intel_flush(intel); - -- mode->pageflip_data = pageflip_data; -- mode->pageflip_handler = pageflip_handler; -- mode->pageflip_abort = pageflip_abort; -- - /* - * Queue flips on all enabled CRTCs - * Note that if/when we get per-CRTC buffers, we'll have to update this. -@@ -1699,6 +1748,7 @@ intel_do_pageflip(intel_screen_private *intel, - */ - mode->fe_msc = 0; - mode->fe_usec = 0; -+ memset(&mode->pageflip, 0, sizeof(mode->pageflip)); - - flags = DRM_MODE_PAGE_FLIP_EVENT; - if (async) -@@ -1711,8 +1761,7 @@ intel_do_pageflip(intel_screen_private *intel, - - flip = calloc(1, sizeof(struct intel_pageflip)); - if (flip == NULL) { -- xf86DrvMsg(scrn->scrnIndex, X_WARNING, -- "flip queue: carrier alloc failed.\n"); -+ err = errno; - goto error_undo; - } - -@@ -1724,33 +1773,30 @@ intel_do_pageflip(intel_screen_private *intel, - - seq = intel_drm_queue_alloc(scrn, config->crtc[i], flip, intel_pageflip_handler, intel_pageflip_abort); - if (!seq) { -+ err = errno; - free(flip); - goto error_undo; - } - --again: -+ mode->flip_count++; -+ - if (drmModePageFlip(mode->fd, - crtc_id(crtc), - new_fb_id, - flags, (void *)(uintptr_t)seq)) { -- if (intel_mode_read_drm_events(intel)) { -- xf86DrvMsg(scrn->scrnIndex, X_WARNING, -- "flip queue retry\n"); -- goto again; -- } -- xf86DrvMsg(scrn->scrnIndex, X_WARNING, -- "flip queue failed: %s\n", strerror(errno)); -- if (seq) -- intel_drm_abort_seq(scrn, seq); -- free(flip); -+ err = errno; -+ intel_drm_abort_seq(scrn, seq); - goto error_undo; - } -- mode->flip_count++; - } - - mode->old_fb_id = mode->fb_id; - mode->fb_id = new_fb_id; - -+ mode->pageflip.data = pageflip_data; -+ mode->pageflip.handler = pageflip_handler; -+ mode->pageflip.abort = pageflip_abort; -+ - if (!mode->flip_count) - intel_pageflip_complete(mode); - -@@ -1765,7 +1811,7 @@ error_undo: - - error_out: - xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n", -- strerror(errno)); -+ strerror(err)); - - mode->flip_count = 0; - return FALSE; -@@ -1839,7 +1885,7 @@ intel_drm_abort(ScrnInfoPtr scrn, Bool (*match)(void *data, void *match_data), v - /* - * Abort by drm queue sequence number - */ --static void -+void - intel_drm_abort_seq(ScrnInfoPtr scrn, uint32_t seq) - { - struct intel_drm_queue *q; -@@ -1911,7 +1957,6 @@ intel_sequence_to_crtc_msc(xf86CrtcPtr crtc, uint32_t sequence) - { - struct intel_crtc *intel_crtc = crtc->driver_private; - -- sequence += intel_crtc->vblank_offset; - if ((int32_t) (sequence - intel_crtc->msc_prev) < -0x40000000) - intel_crtc->msc_high += 0x100000000L; - intel_crtc->msc_prev = sequence; -@@ -1935,37 +1980,10 @@ intel_get_crtc_msc_ust(ScrnInfoPtr scrn, xf86CrtcPtr crtc, uint64_t *msc, uint64 - return 0; - } - --/* -- * Convert a 64-bit adjusted MSC value into a 32-bit kernel sequence number, -- * removing the high 32 bits and subtracting out the vblank_offset term. -- * -- * This also updates the vblank_offset when it notices that the value should -- * change. -- */ -- --#define MAX_VBLANK_OFFSET 1000 -- - uint32_t - intel_crtc_msc_to_sequence(ScrnInfoPtr scrn, xf86CrtcPtr crtc, uint64_t expect) - { -- struct intel_crtc *intel_crtc = crtc->driver_private; -- uint64_t msc, ust; -- -- if (intel_get_crtc_msc_ust(scrn, crtc, &msc, &ust) == 0) { -- int64_t diff = expect - msc; -- -- /* We're way off here, assume that the kernel has lost its mind -- * and smack the vblank back to something sensible -- */ -- if (diff < -MAX_VBLANK_OFFSET || diff > MAX_VBLANK_OFFSET) { -- intel_crtc->vblank_offset += (int32_t) diff; -- if (intel_crtc->vblank_offset > -MAX_VBLANK_OFFSET && -- intel_crtc->vblank_offset < MAX_VBLANK_OFFSET) -- intel_crtc->vblank_offset = 0; -- } -- } -- -- return (uint32_t) (expect - intel_crtc->vblank_offset); -+ return (uint32_t)expect; - } - - /* -@@ -1998,14 +2016,13 @@ intel_drm_handler(int fd, uint32_t frame, uint32_t sec, uint32_t usec, void *use - static void - intel_pageflip_complete(struct intel_mode *mode) - { -- /* Release framebuffer */ -- drmModeRmFB(mode->fd, mode->old_fb_id); -- -- if (!mode->pageflip_handler) -+ if (!mode->pageflip.handler) - return; - -- mode->pageflip_handler(mode->fe_msc, mode->fe_usec, -- mode->pageflip_data); -+ /* Release framebuffer */ -+ drmModeRmFB(mode->fd, mode->old_fb_id); -+ mode->pageflip.handler(mode->fe_msc, mode->fe_usec, -+ mode->pageflip.data); - } - - /* -@@ -2045,6 +2062,7 @@ intel_pageflip_handler(ScrnInfoPtr scrn, xf86CrtcPtr crtc, - - if (!mode) - return; -+ - intel_pageflip_complete(mode); - } - -@@ -2060,18 +2078,18 @@ intel_pageflip_abort(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data) - if (!mode) - return; - -- /* Release framebuffer */ -- drmModeRmFB(mode->fd, mode->old_fb_id); -- -- if (!mode->pageflip_abort) -+ if (!mode->pageflip.abort) - return; - -- mode->pageflip_abort(mode->pageflip_data); -+ /* Release framebuffer */ -+ drmModeRmFB(mode->fd, mode->old_fb_id); -+ mode->pageflip.abort(mode->pageflip.data); - } - - /* - * Check for pending DRM events and process them. - */ -+#if !HAVE_NOTIFY_FD - static void - drm_wakeup_handler(pointer data, int err, pointer p) - { -@@ -2086,6 +2104,14 @@ drm_wakeup_handler(pointer data, int err, pointer p) - if (FD_ISSET(mode->fd, read_mask)) - drmHandleEvent(mode->fd, &mode->event_context); - } -+#else -+static void -+drm_notify_fd(int fd, int ready, void *data) -+{ -+ struct intel_mode *mode = data; -+ drmHandleEvent(mode->fd, &mode->event_context); -+} -+#endif - - /* - * If there are any available, read drm_events -@@ -2231,10 +2257,6 @@ Bool intel_mode_pre_init(ScrnInfoPtr scrn, int fd, int cpp) - intel->use_pageflipping = TRUE; - } - -- if (xf86ReturnOptValBool(intel->Options, OPTION_DELETE_DP12, FALSE)) { -- mode->delete_dp_12_displays = TRUE; -- } -- - intel->modes = mode; - drmModeFreeResources(mode_res); - return TRUE; -@@ -2250,9 +2272,11 @@ intel_mode_init(struct intel_screen_private *intel) - * registration within ScreenInit and not PreInit. - */ - mode->flip_count = 0; -- AddGeneralSocket(mode->fd); -+ SetNotifyFd(mode->fd, drm_notify_fd, X_NOTIFY_READ, mode); -+#if !HAVE_NOTIFY_FD - RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, - drm_wakeup_handler, mode); -+#endif - } - - void -@@ -2276,9 +2300,11 @@ intel_mode_close(intel_screen_private *intel) - - intel_drm_abort_scrn(intel->scrn); - -+#if !HAVE_NOTIFY_FD - RemoveBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, - drm_wakeup_handler, mode); -- RemoveGeneralSocket(mode->fd); -+#endif -+ RemoveNotifyFd(mode->fd); - } - - void -@@ -2498,12 +2524,11 @@ intel_mode_hotplug(struct intel_screen_private *intel) - int i, j; - Bool found; - Bool changed = FALSE; -- struct intel_mode *mode = intel->modes; -+ - mode_res = drmModeGetResources(intel->drmSubFD); - if (!mode_res) - goto out; - --restart_destroy: - for (i = 0; i < config->num_output; i++) { - xf86OutputPtr output = config->output[i]; - struct intel_output *intel_output; -@@ -2522,13 +2547,9 @@ restart_destroy: - drmModeFreeConnector(intel_output->mode_output); - intel_output->mode_output = NULL; - intel_output->output_id = -1; -+ RROutputChanged(output->randr_output, TRUE); - - changed = TRUE; -- if (mode->delete_dp_12_displays) { -- RROutputDestroy(output->randr_output); -- xf86OutputDestroy(output); -- goto restart_destroy; -- } - } - - /* find new output ids we don't have outputs for */ -@@ -2552,10 +2573,8 @@ restart_destroy: - intel_output_init(scrn, intel->modes, mode_res, i, 1); - } - -- if (changed) { -- RRSetChanged(xf86ScrnToScreen(scrn)); -+ if (changed) - RRTellChanged(xf86ScrnToScreen(scrn)); -- } - - drmModeFreeResources(mode_res); - out: -diff --git a/src/uxa/intel_dri.c b/src/uxa/intel_dri.c -index f61c6210..524826d2 100644 ---- a/src/uxa/intel_dri.c -+++ b/src/uxa/intel_dri.c -@@ -81,6 +81,47 @@ static DevPrivateKeyRec i830_client_key; - static int i830_client_key; - #endif - -+static void I830DRI2FlipEventHandler(unsigned int frame, -+ unsigned int tv_sec, -+ unsigned int tv_usec, -+ DRI2FrameEventPtr flip_info); -+ -+static void I830DRI2FrameEventHandler(unsigned int frame, -+ unsigned int tv_sec, -+ unsigned int tv_usec, -+ DRI2FrameEventPtr swap_info); -+ -+static void -+i830_dri2_del_frame_event(DRI2FrameEventPtr info); -+ -+static uint32_t pipe_select(int pipe) -+{ -+ if (pipe > 1) -+ return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; -+ else if (pipe > 0) -+ return DRM_VBLANK_SECONDARY; -+ else -+ return 0; -+} -+ -+static void -+intel_dri2_vblank_handler(ScrnInfoPtr scrn, -+ xf86CrtcPtr crtc, -+ uint64_t msc, -+ uint64_t usec, -+ void *data) -+{ -+ I830DRI2FrameEventHandler((uint32_t) msc, usec / 1000000, usec % 1000000, data); -+} -+ -+static void -+intel_dri2_vblank_abort(ScrnInfoPtr scrn, -+ xf86CrtcPtr crtc, -+ void *data) -+{ -+ i830_dri2_del_frame_event(data); -+} -+ - static uint32_t pixmap_flink(PixmapPtr pixmap) - { - struct intel_uxa_pixmap *priv = intel_uxa_get_pixmap_private(pixmap); -@@ -135,9 +176,6 @@ I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, - pixmap = NULL; - if (attachments[i] == DRI2BufferFrontLeft) { - pixmap = get_front_buffer(drawable); -- -- if (pixmap == NULL) -- drawable = &(get_drawable_pixmap(drawable)->drawable); - } else if (attachments[i] == DRI2BufferStencil && pDepthPixmap) { - pixmap = pDepthPixmap; - pixmap->refcnt++; -@@ -246,11 +284,8 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, - } - - pixmap = NULL; -- if (attachment == DRI2BufferFrontLeft) { -+ if (attachment == DRI2BufferFrontLeft) - pixmap = get_front_buffer(drawable); -- if (pixmap == NULL) -- drawable = &(get_drawable_pixmap(drawable)->drawable); -- } - - if (pixmap == NULL) { - unsigned int hint = INTEL_CREATE_PIXMAP_DRI2; -@@ -673,6 +708,20 @@ i830_dri2_del_frame_event(DRI2FrameEventPtr info) - if (info->back) - I830DRI2DestroyBuffer(NULL, info->back); - -+ if (info->old_buffer) { -+ /* Check that the old buffer still matches the front buffer -+ * in case a mode change occurred before we woke up. -+ */ -+ if (info->intel->back_buffer == NULL && -+ info->old_width == info->intel->scrn->virtualX && -+ info->old_height == info->intel->scrn->virtualY && -+ info->old_pitch == info->intel->front_pitch && -+ info->old_tiling == info->intel->front_tiling) -+ info->intel->back_buffer = info->old_buffer; -+ else -+ dri_bo_unreference(info->old_buffer); -+ } -+ - free(info); - } - -@@ -708,16 +757,14 @@ static void - I830DRI2ExchangeBuffers(struct intel_screen_private *intel, DRI2BufferPtr front, DRI2BufferPtr back) - { - I830DRI2BufferPrivatePtr front_priv, back_priv; -- int tmp; - struct intel_uxa_pixmap *new_front; - - front_priv = front->driverPrivate; - back_priv = back->driverPrivate; - - /* Swap BO names so DRI works */ -- tmp = front->name; - front->name = back->name; -- back->name = tmp; -+ back->name = pixmap_flink(front_priv->pixmap); - - /* Swap pixmap bos */ - new_front = intel_exchange_pixmap_buffers(intel, -@@ -753,87 +800,30 @@ I830DRI2FlipAbort(void *pageflip_data) - i830_dri2_del_frame_event(info); - } - --/* -- * Our internal swap routine takes care of actually exchanging, blitting, or -- * flipping buffers as necessary. -- */ - static Bool --I830DRI2ScheduleFlip(struct intel_screen_private *intel, -- DrawablePtr draw, -- DRI2FrameEventPtr info) -+allocate_back_buffer(struct intel_screen_private *intel) - { -- I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; -- drm_intel_bo *new_back, *old_back; -- int tmp_name; -- -- if (!intel->use_triple_buffer) { -- info->type = DRI2_SWAP; -- if (!intel_do_pageflip(intel, -- get_pixmap_bo(priv), -- info->pipe, FALSE, info, -- I830DRI2FlipComplete, -- I830DRI2FlipAbort)) -- return FALSE; -- -- I830DRI2ExchangeBuffers(intel, info->front, info->back); -- return TRUE; -- } -+ drm_intel_bo *bo; -+ int pitch; -+ uint32_t tiling; - -- if (intel->pending_flip[info->pipe]) { -- assert(intel->pending_flip[info->pipe]->chain == NULL); -- intel->pending_flip[info->pipe]->chain = info; -+ if (intel->back_buffer) - return TRUE; -- } - -- if (intel->back_buffer == NULL) { -- new_back = drm_intel_bo_alloc(intel->bufmgr, "front buffer", -- intel->front_buffer->size, 0); -- if (new_back == NULL) -- return FALSE; -- -- if (intel->front_tiling != I915_TILING_NONE) { -- uint32_t tiling = intel->front_tiling; -- drm_intel_bo_set_tiling(new_back, &tiling, intel->front_pitch); -- if (tiling != intel->front_tiling) { -- drm_intel_bo_unreference(new_back); -- return FALSE; -- } -- } -- -- drm_intel_bo_disable_reuse(new_back); -- dri_bo_flink(new_back, &intel->back_name); -- } else { -- new_back = intel->back_buffer; -- intel->back_buffer = NULL; -- } -+ bo = intel_allocate_framebuffer(intel->scrn, -+ intel->scrn->virtualX, -+ intel->scrn->virtualY, -+ intel->cpp, -+ &pitch, &tiling); -+ if (bo == NULL) -+ return FALSE; - -- old_back = get_pixmap_bo(priv); -- if (!intel_do_pageflip(intel, old_back, info->pipe, FALSE, info, I830DRI2FlipComplete, I830DRI2FlipAbort)) { -- intel->back_buffer = new_back; -+ if (pitch != intel->front_pitch || tiling != intel->front_tiling) { -+ drm_intel_bo_unreference(bo); - return FALSE; - } -- info->type = DRI2_SWAP_CHAIN; -- intel->pending_flip[info->pipe] = info; -- -- priv = info->front->driverPrivate; -- -- /* Exchange the current front-buffer with the fresh bo */ -- -- intel->back_buffer = intel->front_buffer; -- drm_intel_bo_reference(intel->back_buffer); -- intel_set_pixmap_bo(priv->pixmap, new_back); -- drm_intel_bo_unreference(new_back); -- -- tmp_name = info->front->name; -- info->front->name = intel->back_name; -- intel->back_name = tmp_name; - -- /* Then flip DRI2 pointers and update the screen pixmap */ -- I830DRI2ExchangeBuffers(intel, info->front, info->back); -- DRI2SwapComplete(info->client, draw, 0, 0, 0, -- DRI2_EXCHANGE_COMPLETE, -- info->event_complete, -- info->event_data); -+ intel->back_buffer = bo; - return TRUE; - } - -@@ -889,8 +879,88 @@ can_exchange(DrawablePtr drawable, DRI2BufferPtr front, DRI2BufferPtr back) - return TRUE; - } - --void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, -- unsigned int tv_usec, DRI2FrameEventPtr swap_info) -+static Bool -+queue_flip(struct intel_screen_private *intel, -+ DrawablePtr draw, -+ DRI2FrameEventPtr info) -+{ -+ xf86CrtcPtr crtc = I830DRI2DrawableCrtc(draw); -+ I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; -+ drm_intel_bo *old_back = get_pixmap_bo(priv); -+ -+ if (crtc == NULL) -+ return FALSE; -+ -+ if (!can_exchange(draw, info->front, info->back)) -+ return FALSE; -+ -+ if (!intel_do_pageflip(intel, old_back, -+ intel_crtc_to_pipe(crtc), -+ FALSE, info, -+ I830DRI2FlipComplete, I830DRI2FlipAbort)) -+ return FALSE; -+ -+#if DRI2INFOREC_VERSION >= 6 -+ if (intel->use_triple_buffer && allocate_back_buffer(intel)) { -+ info->old_width = intel->scrn->virtualX; -+ info->old_height = intel->scrn->virtualY; -+ info->old_pitch = intel->front_pitch; -+ info->old_tiling = intel->front_tiling; -+ info->old_buffer = intel->front_buffer; -+ dri_bo_reference(info->old_buffer); -+ -+ priv = info->front->driverPrivate; -+ intel_set_pixmap_bo(priv->pixmap, intel->back_buffer); -+ -+ dri_bo_unreference(intel->back_buffer); -+ intel->back_buffer = NULL; -+ -+ DRI2SwapLimit(draw, 2); -+ } else -+ DRI2SwapLimit(draw, 1); -+#endif -+ -+ /* Then flip DRI2 pointers and update the screen pixmap */ -+ I830DRI2ExchangeBuffers(intel, info->front, info->back); -+ return TRUE; -+} -+ -+static Bool -+queue_swap(struct intel_screen_private *intel, -+ DrawablePtr draw, -+ DRI2FrameEventPtr info) -+{ -+ xf86CrtcPtr crtc = I830DRI2DrawableCrtc(draw); -+ drmVBlank vbl; -+ -+ if (crtc == NULL) -+ return FALSE; -+ -+ vbl.request.type = -+ DRM_VBLANK_RELATIVE | -+ DRM_VBLANK_EVENT | -+ pipe_select(intel_crtc_to_pipe(crtc)); -+ vbl.request.sequence = 1; -+ vbl.request.signal = -+ intel_drm_queue_alloc(intel->scrn, crtc, info, -+ intel_dri2_vblank_handler, -+ intel_dri2_vblank_abort); -+ if (vbl.request.signal == 0) -+ return FALSE; -+ -+ info->type = DRI2_SWAP; -+ if (drmWaitVBlank(intel->drmSubFD, &vbl)) { -+ intel_drm_abort_seq(intel->scrn, vbl.request.signal); -+ return FALSE; -+ } -+ -+ return TRUE; -+} -+ -+static void I830DRI2FrameEventHandler(unsigned int frame, -+ unsigned int tv_sec, -+ unsigned int tv_usec, -+ DRI2FrameEventPtr swap_info) - { - intel_screen_private *intel = swap_info->intel; - DrawablePtr drawable; -@@ -906,24 +976,22 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, - return; - } - -- - switch (swap_info->type) { - case DRI2_FLIP: - /* If we can still flip... */ -- if (can_exchange(drawable, swap_info->front, swap_info->back) && -- I830DRI2ScheduleFlip(intel, drawable, swap_info)) -- return; -- -- /* else fall through to exchange/blit */ -- case DRI2_SWAP: { -- I830DRI2FallbackBlitSwap(drawable, -- swap_info->front, swap_info->back); -- DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, -- DRI2_BLIT_COMPLETE, -- swap_info->client ? swap_info->event_complete : NULL, -- swap_info->event_data); -- break; -- } -+ if (!queue_flip(intel, drawable, swap_info) && -+ !queue_swap(intel, drawable, swap_info)) { -+ case DRI2_SWAP: -+ I830DRI2FallbackBlitSwap(drawable, -+ swap_info->front, swap_info->back); -+ DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, -+ DRI2_BLIT_COMPLETE, -+ swap_info->client ? swap_info->event_complete : NULL, -+ swap_info->event_data); -+ break; -+ } -+ return; -+ - case DRI2_WAITMSC: - if (swap_info->client) - DRI2WaitMSCComplete(swap_info->client, drawable, -@@ -939,12 +1007,13 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, - i830_dri2_del_frame_event(swap_info); - } - --void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, -- unsigned int tv_usec, DRI2FrameEventPtr flip_info) -+static void I830DRI2FlipEventHandler(unsigned int frame, -+ unsigned int tv_sec, -+ unsigned int tv_usec, -+ DRI2FrameEventPtr flip_info) - { - struct intel_screen_private *intel = flip_info->intel; - DrawablePtr drawable; -- DRI2FrameEventPtr chain; - - drawable = NULL; - if (flip_info->drawable_id) -@@ -954,6 +1023,7 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, - - /* We assume our flips arrive in order, so we don't check the frame */ - switch (flip_info->type) { -+ case DRI2_FLIP: - case DRI2_SWAP: - if (!drawable) - break; -@@ -984,35 +1054,6 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, - flip_info->event_data); - break; - -- case DRI2_SWAP_CHAIN: -- assert(intel->pending_flip[flip_info->pipe] == flip_info); -- intel->pending_flip[flip_info->pipe] = NULL; -- -- chain = flip_info->chain; -- if (chain) { -- DrawablePtr chain_drawable = NULL; -- if (chain->drawable_id) -- dixLookupDrawable(&chain_drawable, -- chain->drawable_id, -- serverClient, -- M_ANY, DixWriteAccess); -- if (chain_drawable == NULL) { -- i830_dri2_del_frame_event(chain); -- } else if (!can_exchange(chain_drawable, chain->front, chain->back) || -- !I830DRI2ScheduleFlip(intel, chain_drawable, chain)) { -- I830DRI2FallbackBlitSwap(chain_drawable, -- chain->front, -- chain->back); -- -- DRI2SwapComplete(chain->client, chain_drawable, frame, tv_sec, tv_usec, -- DRI2_BLIT_COMPLETE, -- chain->client ? chain->event_complete : NULL, -- chain->event_data); -- i830_dri2_del_frame_event(chain); -- } -- } -- break; -- - default: - xf86DrvMsg(intel->scrn->scrnIndex, X_WARNING, - "%s: unknown vblank event received\n", __func__); -@@ -1023,38 +1064,6 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, - i830_dri2_del_frame_event(flip_info); - } - --static uint32_t pipe_select(int pipe) --{ -- if (pipe > 1) -- return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; -- else if (pipe > 0) -- return DRM_VBLANK_SECONDARY; -- else -- return 0; --} -- --static void --intel_dri2_vblank_handler(ScrnInfoPtr scrn, -- xf86CrtcPtr crtc, -- uint64_t msc, -- uint64_t usec, -- void *data) --{ -- DRI2FrameEventPtr swap_info = data; -- -- I830DRI2FrameEventHandler((uint32_t) msc, usec / 1000000, usec % 1000000, swap_info); --} -- --static void --intel_dri2_vblank_abort(ScrnInfoPtr scrn, -- xf86CrtcPtr crtc, -- void *data) --{ -- DRI2FrameEventPtr swap_info = data; -- -- i830_dri2_del_frame_event(swap_info); --} -- - /* - * ScheduleSwap is responsible for requesting a DRM vblank event for the - * appropriate frame. -@@ -1089,7 +1098,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - int pipe = crtc ? intel_crtc_to_pipe(crtc) : -1; - int flip = 0; - DRI2FrameEventPtr swap_info = NULL; -- enum DRI2FrameEventType swap_type = DRI2_SWAP; - uint64_t current_msc, current_ust; - uint64_t request_msc; - uint32_t seq; -@@ -1109,7 +1117,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - swap_info->event_data = data; - swap_info->front = front; - swap_info->back = back; -- swap_info->pipe = pipe; -+ swap_info->type = DRI2_SWAP; - - if (!i830_dri2_add_frame_event(swap_info)) { - free(swap_info); -@@ -1124,20 +1132,27 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - if (ret) - goto blit_fallback; - -- /* Flips need to be submitted one frame before */ -+ /* -+ * If we can, schedule the flip directly from here rather -+ * than waiting for an event from the kernel for the current -+ * (or a past) MSC. -+ */ -+ if (divisor == 0 && -+ current_msc >= *target_msc && -+ queue_flip(intel, draw, swap_info)) -+ return TRUE; -+ - if (can_exchange(draw, front, back)) { -- swap_type = DRI2_FLIP; -- flip = 1; -+ swap_info->type = DRI2_FLIP; -+ /* Flips need to be submitted one frame before */ -+ if (*target_msc > 0) -+ --*target_msc; -+ flip = 1; - } - -- swap_info->type = swap_type; -- -- /* Correct target_msc by 'flip' if swap_type == DRI2_FLIP. -- * Do it early, so handling of different timing constraints -- * for divisor, remainder and msc vs. target_msc works. -- */ -- if (*target_msc > 0) -- *target_msc -= flip; -+#if DRI2INFOREC_VERSION >= 6 -+ DRI2SwapLimit(draw, 1); -+#endif - - /* - * If divisor is zero, or current_msc is smaller than target_msc -@@ -1145,15 +1160,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - * the swap. - */ - if (divisor == 0 || current_msc < *target_msc) { -- /* -- * If we can, schedule the flip directly from here rather -- * than waiting for an event from the kernel for the current -- * (or a past) MSC. -- */ -- if (flip && divisor == 0 && current_msc >= *target_msc && -- I830DRI2ScheduleFlip(intel, draw, swap_info)) -- return TRUE; -- - vbl.request.type = - DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe); - -@@ -1168,7 +1174,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - * current_msc to ensure we return a reasonable value back - * to the caller. This makes swap_interval logic more robust. - */ -- if (current_msc >= *target_msc) -+ if (current_msc > *target_msc) - *target_msc = current_msc; - - seq = intel_drm_queue_alloc(scrn, crtc, swap_info, intel_dri2_vblank_handler, intel_dri2_vblank_abort); -@@ -1183,6 +1189,8 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "divisor 0 get vblank counter failed: %s\n", - strerror(errno)); -+ intel_drm_abort_seq(intel->scrn, seq); -+ swap_info = NULL; - goto blit_fallback; - } - -@@ -1332,7 +1340,6 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, - - if (!i830_dri2_add_frame_event(wait_info)) { - free(wait_info); -- wait_info = NULL; - goto out_complete; - } - -@@ -1374,7 +1381,8 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, - strerror(errno)); - limit--; - } -- goto out_free; -+ intel_drm_abort_seq(intel->scrn, seq); -+ goto out_complete; - } - - wait_info->frame = intel_sequence_to_crtc_msc(crtc, vbl.reply.sequence); -@@ -1417,7 +1425,8 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, - strerror(errno)); - limit--; - } -- goto out_free; -+ intel_drm_abort_seq(intel->scrn, seq); -+ goto out_complete; - } - - wait_info->frame = intel_sequence_to_crtc_msc(crtc, vbl.reply.sequence); -@@ -1440,13 +1449,92 @@ static int has_i830_dri(void) - return access(DRI_DRIVER_PATH "/i830_dri.so", R_OK) == 0; - } - --static const char *dri_driver_name(intel_screen_private *intel) -+static int -+namecmp(const char *s1, const char *s2) -+{ -+ char c1, c2; -+ -+ if (!s1 || *s1 == 0) { -+ if (!s2 || *s2 == 0) -+ return 0; -+ else -+ return 1; -+ } -+ -+ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') -+ s1++; -+ -+ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') -+ s2++; -+ -+ c1 = isupper(*s1) ? tolower(*s1) : *s1; -+ c2 = isupper(*s2) ? tolower(*s2) : *s2; -+ while (c1 == c2) { -+ if (c1 == '\0') -+ return 0; -+ -+ s1++; -+ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') -+ s1++; -+ -+ s2++; -+ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') -+ s2++; -+ -+ c1 = isupper(*s1) ? tolower(*s1) : *s1; -+ c2 = isupper(*s2) ? tolower(*s2) : *s2; -+ } -+ -+ return c1 - c2; -+} -+ -+static Bool is_level(const char **str) -+{ -+ const char *s = *str; -+ char *end; -+ unsigned val; -+ -+ if (s == NULL || *s == '\0') -+ return TRUE; -+ -+ if (namecmp(s, "on") == 0) -+ return TRUE; -+ if (namecmp(s, "true") == 0) -+ return TRUE; -+ if (namecmp(s, "yes") == 0) -+ return TRUE; -+ -+ if (namecmp(s, "0") == 0) -+ return TRUE; -+ if (namecmp(s, "off") == 0) -+ return TRUE; -+ if (namecmp(s, "false") == 0) -+ return TRUE; -+ if (namecmp(s, "no") == 0) -+ return TRUE; -+ -+ val = strtoul(s, &end, 0); -+ if (val && *end == '\0') -+ return TRUE; -+ if (val && *end == ':') -+ *str = end + 1; -+ return FALSE; -+} -+ -+static const char *options_get_dri(intel_screen_private *intel) - { - #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) -- const char *s = xf86GetOptValString(intel->Options, OPTION_DRI); -- Bool dummy; -+ return xf86GetOptValString(intel->Options, OPTION_DRI); -+#else -+ return NULL; -+#endif -+} - -- if (s == NULL || xf86getBoolValue(&dummy, s)) { -+static const char *dri_driver_name(intel_screen_private *intel) -+{ -+ const char *s = options_get_dri(intel); -+ -+ if (is_level(&s)) { - if (INTEL_INFO(intel)->gen < 030) - return has_i830_dri() ? "i830" : "i915"; - else if (INTEL_INFO(intel)->gen < 040) -@@ -1456,14 +1544,6 @@ static const char *dri_driver_name(intel_screen_private *intel) - } - - return s; --#else -- if (INTEL_INFO(intel)->gen < 030) -- return has_i830_dri() ? "i830" : "i915"; -- else if (INTEL_INFO(intel)->gen < 040) -- return "i915"; -- else -- return "i965"; --#endif - } - - Bool I830DRI2ScreenInit(ScreenPtr screen) -@@ -1544,7 +1624,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) - info.numDrivers = 2; - info.driverNames = driverNames; - driverNames[0] = info.driverName; -- driverNames[1] = info.driverName; -+ driverNames[1] = "va_gl"; - #endif - - return DRI2ScreenInit(screen, &info); -diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c -index 2793da5d..3703c412 100644 ---- a/src/uxa/intel_driver.c -+++ b/src/uxa/intel_driver.c -@@ -237,24 +237,17 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) - return TRUE; - } - --static Bool intel_option_cast_string_to_bool(intel_screen_private *intel, -- int id, Bool val) --{ --#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) -- xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id)); -- return val; --#else -- return val; --#endif --} -- - static void intel_check_dri_option(ScrnInfoPtr scrn) - { - intel_screen_private *intel = intel_get_screen_private(scrn); -+ unsigned level; - - intel->dri2 = intel->dri3 = DRI_NONE; -- if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE)) -- intel->dri2 = intel->dri3 = DRI_DISABLED; -+ level = intel_option_cast_to_unsigned(intel->Options, OPTION_DRI, DEFAULT_DRI_LEVEL); -+ if (level < 3 || INTEL_INFO(intel)->gen < 040) -+ intel->dri3 = DRI_DISABLED; -+ if (level < 2) -+ intel->dri2 = DRI_DISABLED; - - if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) { - xf86DrvMsg(scrn->scrnIndex, X_CONFIG, -@@ -371,8 +364,8 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel) - if (INTEL_INFO(intel)->gen == -1) - return FALSE; - -- if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) || -- !intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) { -+ if (!xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_ENABLE, TRUE) || -+ !intel_option_cast_to_bool(intel->Options, OPTION_ACCEL_METHOD, TRUE)) { - xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG, - "Disabling hardware acceleration.\n"); - return FALSE; -@@ -659,8 +652,9 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) - } - - static void --intel_dirty_update(ScreenPtr screen) -+intel_dirty_update(intel_screen_private *intel) - { -+ ScreenPtr screen = xf86ScrnToScreen(intel->scrn); - RegionPtr region; - PixmapDirtyUpdatePtr ent; - -@@ -677,6 +671,7 @@ intel_dirty_update(ScreenPtr screen) - } - #endif - -+#if !HAVE_NOTIFY_FD - static void - I830BlockHandler(BLOCKHANDLER_ARGS_DECL) - { -@@ -694,9 +689,22 @@ I830BlockHandler(BLOCKHANDLER_ARGS_DECL) - intel_uxa_block_handler(intel); - intel_video_block_handler(intel); - #ifdef INTEL_PIXMAP_SHARING -- intel_dirty_update(screen); -+ intel_dirty_update(intel); - #endif - } -+#else -+static void -+I830BlockHandler(void *data, void *timeout) -+{ -+ intel_screen_private *intel = data; -+ -+ intel_uxa_block_handler(intel); -+ intel_video_block_handler(intel); -+#ifdef INTEL_PIXMAP_SHARING -+ intel_dirty_update(intel); -+#endif -+} -+#endif - - static Bool - intel_init_initial_framebuffer(ScrnInfoPtr scrn) -@@ -735,6 +743,8 @@ intel_flush_callback(CallbackListPtr *list, - } - - #if HAVE_UDEV -+#include -+ - static void - I830HandleUEvents(int fd, void *closure) - { -@@ -771,6 +781,15 @@ I830HandleUEvents(int fd, void *closure) - udev_device_unref(dev); - } - -+static int has_randr(void) -+{ -+#if HAS_DIXREGISTERPRIVATEKEY -+ return dixPrivateKeyRegistered(rrPrivKey); -+#else -+ return *rrPrivKey; -+#endif -+} -+ - static void - I830UeventInit(ScrnInfoPtr scrn) - { -@@ -780,6 +799,10 @@ I830UeventInit(ScrnInfoPtr scrn) - Bool hotplug; - MessageType from = X_CONFIG; - -+ /* Without RR, nothing we can do here */ -+ if (!has_randr()) -+ return; -+ - if (!xf86GetOptValBool(intel->Options, OPTION_HOTPLUG, &hotplug)) { - from = X_DEFAULT; - hotplug = TRUE; -@@ -939,8 +962,14 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL) - "Hardware cursor initialization failed\n"); - } - -+#if !HAVE_NOTIFY_FD - intel->BlockHandler = screen->BlockHandler; - screen->BlockHandler = I830BlockHandler; -+#else -+ RegisterBlockAndWakeupHandlers(I830BlockHandler, -+ (ServerWakeupHandlerProcPtr)NoopDDA, -+ intel); -+#endif - - #ifdef INTEL_PIXMAP_SHARING - screen->StartPixmapTracking = PixmapStartDirtyTracking; -@@ -1164,8 +1193,6 @@ static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL) - - intel_sync_close(screen); - -- xf86GARTCloseScreen(scrn->scrnIndex); -- - scrn->vtSema = FALSE; - return TRUE; - } -diff --git a/src/uxa/intel_hwmc.c b/src/uxa/intel_hwmc.c -index 829cb8e0..78540600 100644 ---- a/src/uxa/intel_hwmc.c -+++ b/src/uxa/intel_hwmc.c -@@ -193,7 +193,7 @@ Bool intel_xvmc_adaptor_init(ScreenPtr pScreen) - intel_screen_private *intel = intel_get_screen_private(scrn); - struct pci_device *pci; - static XF86MCAdaptorRec *pAdapt; -- char *name; -+ const char *name; - char buf[64]; - - if (!intel->XvMCEnabled) -diff --git a/src/uxa/intel_memory.c b/src/uxa/intel_memory.c -index 0c6cf30c..b2d7a367 100644 ---- a/src/uxa/intel_memory.c -+++ b/src/uxa/intel_memory.c -@@ -42,7 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * This is the video memory allocator. Our memory allocation is different from - * other graphics chips, where you have a fixed amount of graphics memory - * available that you want to put to the best use. Instead, we have almost no -- * memory pre-allocated, and we have to choose an appropriate amount of sytem -+ * memory pre-allocated, and we have to choose an appropriate amount of system - * memory to use. - * - * The allocations we might do: -diff --git a/src/uxa/intel_present.c b/src/uxa/intel_present.c -index d20043f3..ac028edd 100644 ---- a/src/uxa/intel_present.c -+++ b/src/uxa/intel_present.c -@@ -244,6 +244,7 @@ intel_present_check_flip(RRCrtcPtr crtc, - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); - intel_screen_private *intel = intel_get_screen_private(scrn); - dri_bo *bo; -+ uint32_t tiling, swizzle; - - if (!scrn->vtSema) - return FALSE; -@@ -266,6 +267,12 @@ intel_present_check_flip(RRCrtcPtr crtc, - if (!bo) - return FALSE; - -+ if (drm_intel_bo_get_tiling(bo, &tiling, &swizzle)) -+ return FALSE; -+ -+ if (tiling == I915_TILING_Y) -+ return FALSE; -+ - return TRUE; - } - -@@ -343,29 +350,33 @@ intel_present_unflip(ScreenPtr screen, uint64_t event_id) - { - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); - intel_screen_private *intel = intel_get_screen_private(scrn); -- struct intel_present_vblank_event *event; - PixmapPtr pixmap = screen->GetScreenPixmap(screen); -+ struct intel_present_vblank_event *event = NULL; - dri_bo *bo; -- Bool ret; - - if (!intel_present_check_flip(NULL, screen->root, pixmap, true)) -- return; -+ goto fail; - - bo = intel_get_pixmap_bo(pixmap); - if (!bo) -- return; -+ goto fail; - - event = calloc(1, sizeof(struct intel_present_vblank_event)); - if (!event) -- return; -+ goto fail; - - event->event_id = event_id; - -- ret = intel_do_pageflip(intel, bo, -1, FALSE, event, intel_present_flip_event, intel_present_flip_abort); -- if (!ret) { -- xf86DrvMsg(scrn->scrnIndex, X_ERROR, -- "present unflip failed\n"); -- } -+ if (!intel_do_pageflip(intel, bo, -1, FALSE, event, -+ intel_present_flip_event, -+ intel_present_flip_abort)) -+ goto fail; -+ -+ return; -+fail: -+ xf86SetDesiredModes(scrn); -+ present_event_notify(event_id, 0, 0); -+ free(event); - } - - static present_screen_info_rec intel_present_screen_info = { -diff --git a/src/uxa/intel_uxa.c b/src/uxa/intel_uxa.c -index 590ff5d1..ec32a723 100644 ---- a/src/uxa/intel_uxa.c -+++ b/src/uxa/intel_uxa.c -@@ -176,6 +176,24 @@ intel_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) - return TRUE; - } - -+static Bool -+intel_uxa_check_bo_tiling(intel_screen_private *intel, -+ PixmapPtr pixmap, -+ unsigned *tiling_out) -+{ -+ struct intel_uxa_pixmap *priv; -+ -+ priv = intel_uxa_get_pixmap_private(pixmap); -+ if (!priv) -+ return FALSE; -+ -+ if (priv->tiling == I915_TILING_Y && INTEL_INFO(intel)->gen < 060) -+ return FALSE; -+ -+ *tiling_out = priv->tiling; -+ return TRUE; -+} -+ - /** - * Sets up hardware state for a series of solid fills. - */ -@@ -189,6 +207,9 @@ intel_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) - intel_uxa_get_pixmap_bo(pixmap), - }; - -+ if (!intel_uxa_check_bo_tiling(intel, pixmap, &intel->BR_tiling[0])) -+ return FALSE; -+ - if (!intel_uxa_check_pitch_2d(pixmap)) - return FALSE; - -@@ -236,7 +257,7 @@ static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) - - { - int len = INTEL_INFO(intel)->gen >= 0100 ? 7 : 6; -- BEGIN_BATCH_BLT(len); -+ BEGIN_BATCH_BLT_TILED(len); - - cmd = XY_COLOR_BLT_CMD | (len - 2); - -@@ -310,6 +331,10 @@ intel_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, - intel_uxa_get_pixmap_bo(dest), - }; - -+ if (!intel_uxa_check_bo_tiling(intel, dest, &intel->BR_tiling[0]) || -+ !intel_uxa_check_bo_tiling(intel, source, &intel->BR_tiling[1])) -+ return FALSE; -+ - if (!intel_uxa_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) - return FALSE; - -@@ -375,7 +400,7 @@ intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, - - { - int len = INTEL_INFO(intel)->gen >= 0100 ? 10 : 8; -- BEGIN_BATCH_BLT(len); -+ BEGIN_BATCH_BLT_TILED(len); - - cmd = XY_SRC_COPY_BLT_CMD | (len - 2); - -@@ -1068,7 +1093,7 @@ Bool intel_uxa_create_screen_resources(ScreenPtr screen) - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); - PixmapPtr pixmap; - intel_screen_private *intel = intel_get_screen_private(scrn); -- dri_bo *bo = intel->front_buffer; -+ dri_bo *bo = intel->front_buffer, *old_bo; - int old_width, old_height, old_pitch; - - if (!uxa_resources_init(screen)) -@@ -1081,6 +1106,7 @@ Bool intel_uxa_create_screen_resources(ScreenPtr screen) - old_width = pixmap->drawable.width; - old_height = pixmap->drawable.height; - old_pitch = pixmap->devKind; -+ old_bo = intel_uxa_get_pixmap_bo(pixmap); - - if (!screen->ModifyPixmapHeader(pixmap, - scrn->virtualX, -@@ -1102,6 +1128,9 @@ Bool intel_uxa_create_screen_resources(ScreenPtr screen) - err: - screen->ModifyPixmapHeader(pixmap, - old_width, old_height, -1, -1, old_pitch, NULL); -+ if (old_bo) -+ intel_uxa_set_pixmap_bo(pixmap, old_bo); -+ - return FALSE; - } - -diff --git a/test/Makefile.am b/test/Makefile.am -index 66ed8ebb..12b5d5d8 100644 ---- a/test/Makefile.am -+++ b/test/Makefile.am -@@ -5,6 +5,7 @@ stress_TESTS = \ - basic-rectangle \ - basic-string \ - basic-copyarea \ -+ basic-copyplane \ - basic-copyarea-size \ - basic-putimage \ - basic-lines \ -@@ -12,8 +13,10 @@ stress_TESTS = \ - DrawSegments \ - cursor-test \ - render-fill \ -+ render-glyphs \ - render-trapezoid \ - render-trapezoid-image \ -+ render-triangle \ - render-fill-copy \ - render-composite-solid \ - render-composite-solid-mask \ -@@ -25,9 +28,16 @@ stress_TESTS = \ - shm-test \ - $(NULL) - -+if X11_VM -+stress_TESTS += \ -+ xvidmode \ -+ $(NULL) -+endif -+ - if DRI2 - stress_TESTS += \ - dri2-race \ -+ dri2-speed \ - dri2-swap \ - dri2-test \ - $(NULL) -@@ -36,8 +46,11 @@ endif - if X11_DRI3 - stress_TESTS += \ - dri3-test \ -+ present-race \ -+ present-speed \ - present-test \ - $(NULL) -+present_speed_CFLAGS = ${AM_CFLAGS} -pthread - endif - check_PROGRAMS = $(stress_TESTS) - -diff --git a/test/basic-copyplane.c b/test/basic-copyplane.c -new file mode 100644 -index 00000000..f049b82b ---- /dev/null -+++ b/test/basic-copyplane.c -@@ -0,0 +1,99 @@ -+#include -+#include -+#include -+ -+#include /* for XDestroyImage */ -+#include /* for pixman blt functions */ -+ -+#include "test.h" -+ -+static uint8_t clock_bits[] = {0x3C, 0x5E, 0xEF, 0xF7, 0x87, 0xFF, 0x7E, 0x3C}; -+ -+/* https://bugs.freedesktop.org/show_bug.cgi?id=91499 */ -+static void draw_clock(struct test_display *t, Drawable d, -+ uint8_t alu, int x, int y, uint32_t fg, uint32_t bg) -+{ -+ Pixmap pixmap; -+ XGCValues val; -+ GC gc; -+ -+ val.graphics_exposures = 0; -+ val.function = alu; -+ val.foreground = fg; -+ val.background = fg; -+ -+ gc = XCreateGC(t->dpy, d, -+ GCGraphicsExposures | GCForeground | GCBackground | GCFunction, -+ &val); -+ pixmap = XCreateBitmapFromData(t->dpy, d, (char *)clock_bits, 8, 8); -+ -+ XCopyPlane(t->dpy, pixmap, d, gc, 0, 0, 8, 8, x, y, 1); -+ -+ XFreePixmap(t->dpy, pixmap); -+ XFreeGC(t->dpy, gc); -+} -+ -+static void clear(struct test_display *dpy, struct test_target *tt) -+{ -+ XRenderColor render_color = {0}; -+ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, &render_color, -+ 0, 0, tt->width, tt->height); -+} -+ -+static void clock_tests(struct test *t, int reps, int sets, enum target target) -+{ -+ struct test_target out, ref; -+ int r, s; -+ -+ printf("Testing clock (%s): ", test_target_name(target)); -+ fflush(stdout); -+ -+ test_target_create_render(&t->out, target, &out); -+ clear(&t->out, &out); -+ -+ test_target_create_render(&t->ref, target, &ref); -+ clear(&t->ref, &ref); -+ -+ for (s = 0; s < sets; s++) { -+ for (r = 0; r < reps; r++) { -+ int x = rand() % (out.width - 8); -+ int y = rand() % (out.height - 8); -+ uint8_t alu = rand() % (GXset + 1); -+ uint32_t bg = rand(); -+ uint32_t fg = rand(); -+ -+ draw_clock(&t->out, out.draw, alu, x, y, fg, bg); -+ draw_clock(&t->ref, ref.draw, alu, x, y, fg, bg); -+ } -+ -+ test_compare(t, -+ out.draw, out.format, -+ ref.draw, ref.format, -+ 0, 0, out.width, out.height, -+ ""); -+ } -+ -+ printf("passed [%d iterations x %d]\n", reps, sets); -+ -+ test_target_destroy_render(&t->out, &out); -+ test_target_destroy_render(&t->ref, &ref); -+} -+ -+int main(int argc, char **argv) -+{ -+ struct test test; -+ int i; -+ -+ test_init(&test, argc, argv); -+ -+ for (i = 0; i <= DEFAULT_ITERATIONS; i++) { -+ int reps = REPS(i), sets = SETS(i); -+ enum target t; -+ -+ for (t = TARGET_FIRST; t <= TARGET_LAST; t++) { -+ clock_tests(&test, reps, sets, t); -+ } -+ } -+ -+ return 0; -+} -diff --git a/test/dri2-race.c b/test/dri2-race.c -index 8862c84c..ece624f6 100644 ---- a/test/dri2-race.c -+++ b/test/dri2-race.c -@@ -5,6 +5,11 @@ - #include - #include - #include -+#include -+#include -+#include -+#include -+#include - #include - #include - #include -@@ -12,11 +17,49 @@ - - #include - #include -+#include - - #include "dri2.h" - - #define COUNT 60 - -+#define N_DIVISORS 3 -+static const int divisors[N_DIVISORS] = { 0, 1, 16 }; -+ -+static jmp_buf error_handler[4]; -+static int have_error_handler; -+ -+#define error_get() \ -+ setjmp(error_handler[have_error_handler++]) -+ -+#define error_put() \ -+ have_error_handler-- -+ -+static int (*saved_io_error)(Display *dpy); -+ -+static int io_error(Display *dpy) -+{ -+ if (have_error_handler) -+ longjmp(error_handler[--have_error_handler], 0); -+ -+ return saved_io_error(dpy); -+} -+ -+static int x_error(Display *dpy, XErrorEvent *e) -+{ -+ return Success; -+} -+ -+static uint32_t upper_32_bits(uint64_t val) -+{ -+ return val >> 32; -+} -+ -+static uint32_t lower_32_bits(uint64_t val) -+{ -+ return val & 0xffffffff; -+} -+ - static int dri2_open(Display *dpy) - { - drm_auth_t auth; -@@ -41,45 +84,701 @@ static int dri2_open(Display *dpy) - return fd; - } - --static void run(Display *dpy, int width, int height, -- unsigned int *attachments, int nattachments, -- const char *name) -+static void swap_buffers(Display *dpy, Window win, int divisor, -+ unsigned int *attachments, int nattachments) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ unsigned int seq[2]; -+ -+ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, -+ 0, 0, 0, divisor, 0, 0).sequence; -+ -+ -+ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, -+ nattachments, nattachments, -+ attachments).sequence; -+ -+ xcb_flush(c); -+ xcb_discard_reply(c, seq[0]); -+ xcb_discard_reply(c, seq[1]); -+} -+ -+#define COMPOSITE 1 -+ -+static int has_composite(Display *dpy) -+{ -+ Display *dummy = NULL; -+ int event, error; -+ int major = -1, minor = -1; -+ -+ if (dpy == NULL) -+ dummy = dpy = XOpenDisplay(NULL); -+ -+ if (XCompositeQueryExtension(dpy, &event, &error)) -+ XCompositeQueryVersion(dpy, &major, &minor); -+ -+ if (dummy) -+ XCloseDisplay(dummy); -+ -+ return major > 0 || minor >= 4; -+} -+ -+static void race_window(Display *dpy, int width, int height, -+ unsigned int *attachments, int nattachments, -+ unsigned flags, const char *name) - { - Window win; - XSetWindowAttributes attr; -- int count, loop; -+ int count, loop, n; - DRI2Buffer *buffers; - -+ if (flags & COMPOSITE && !has_composite(dpy)) -+ return; -+ -+ printf("%s(%s)\n", __func__, name); -+ - /* Be nasty and install a fullscreen window on top so that we - * can guarantee we do not get clipped by children. - */ - attr.override_redirect = 1; -- loop = 100; -- do { -+ for (n = 0; n < N_DIVISORS; n++) { -+ loop = 256 >> ffs(divisors[n]); -+ printf("DRI2SwapBuffers(divisor=%d), loop=%d", divisors[n], loop); -+ fflush(stdout); -+ do { -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ -+ buffers = DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count); -+ if (count != nattachments) -+ return; -+ -+ free(buffers); -+ for (count = 0; count < loop; count++) -+ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); -+ XDestroyWindow(dpy, win); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ loop = 256 >> ffs(divisors[n]); -+ printf("xcb_dri2_swap_buffers(divisor=%d), loops=%d", divisors[n], loop); -+ fflush(stdout); -+ do { -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ -+ buffers = DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count); -+ if (count != nattachments) -+ return; -+ -+ free(buffers); -+ for (count = 0; count < loop; count++) -+ swap_buffers(dpy, win, divisors[n], attachments, nattachments); -+ XDestroyWindow(dpy, win); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ loop = 256 >> ffs(divisors[n]); -+ printf("DRI2WaitMsc(divisor=%d), loop=%d", divisors[n], loop); -+ fflush(stdout); -+ do { -+ uint64_t ignore, msc; -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); -+ msc++; -+ for (count = 0; count < loop; count++) { -+ xcb_discard_reply(c, -+ xcb_dri2_wait_msc(c, win, -+ upper_32_bits(msc), -+ lower_32_bits(msc), -+ 0, 0, 0, 0).sequence); -+ msc += divisors[n]; -+ } -+ XFlush(dpy); -+ XDestroyWindow(dpy, win); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ XSync(dpy, 1); -+ sleep(2); -+ XSync(dpy, 1); -+} -+ -+static int rand_size(int max) -+{ -+ return 1 + (rand() % (max - 1)); -+} -+ -+static void race_resize(Display *dpy, int width, int height, -+ unsigned int *attachments, int nattachments, -+ unsigned flags, const char *name) -+{ -+ Window win; -+ XSetWindowAttributes attr; -+ int count, loop, n; -+ DRI2Buffer *buffers; -+ -+ if (flags & COMPOSITE && !has_composite(dpy)) -+ return; -+ -+ printf("%s(%s)\n", __func__, name); -+ -+ attr.override_redirect = 1; -+ for (n = 0; n < N_DIVISORS; n++) { -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ -+ loop = 256 >> ffs(divisors[n]); -+ printf("DRI2SwapBuffers(divisor=%d), loop=%d", divisors[n], loop); -+ fflush(stdout); -+ do { -+ int w, h; -+ -+ buffers = DRI2GetBuffers(dpy, win, &w, &h, -+ attachments, nattachments, &count); -+ if (count != nattachments) -+ return; -+ -+ free(buffers); -+ for (count = 0; count < loop; count++) -+ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); -+ XResizeWindow(dpy, win, rand_size(width), rand_size(height)); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { - win = XCreateWindow(dpy, DefaultRootWindow(dpy), - 0, 0, width, height, 0, - DefaultDepth(dpy, DefaultScreen(dpy)), - InputOutput, - DefaultVisual(dpy, DefaultScreen(dpy)), - CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); - XMapWindow(dpy, win); - - DRI2CreateDrawable(dpy, win); - -- buffers = DRI2GetBuffers(dpy, win, &width, &height, -- attachments, nattachments, &count); -- if (count != nattachments) -- return; -+ loop = 256 >> ffs(divisors[n]); -+ printf("xcb_dri2_swap_buffers(divisor=%d), loops=%d", divisors[n], loop); -+ fflush(stdout); -+ do { -+ int w, h; -+ -+ buffers = DRI2GetBuffers(dpy, win, &w, &h, -+ attachments, nattachments, &count); -+ if (count != nattachments) -+ return; - -- free(buffers); -- for (count = 0; count < loop; count++) -- DRI2SwapBuffers(dpy, win, 0, 0, 0); -+ free(buffers); -+ for (count = 0; count < loop; count++) -+ swap_buffers(dpy, win, divisors[n], attachments, nattachments); -+ XResizeWindow(dpy, win, rand_size(width), rand_size(height)); -+ printf("."); fflush(stdout); -+ } while (--loop); - XDestroyWindow(dpy, win); -- } while (--loop); -+ XSync(dpy, True); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ -+ loop = 256 >> ffs(divisors[n]); -+ printf("DRI2WaitMsc(divisor=%d), loop=%d", divisors[n], loop); -+ fflush(stdout); -+ do { -+ uint64_t ignore, msc; -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ -+ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); -+ msc++; -+ for (count = 0; count < loop; count++) { -+ xcb_discard_reply(c, -+ xcb_dri2_wait_msc(c, win, -+ upper_32_bits(msc), -+ lower_32_bits(msc), -+ 0, 0, 0, 0).sequence); -+ msc += divisors[n]; -+ } -+ XFlush(dpy); -+ XResizeWindow(dpy, win, rand_size(width), rand_size(height)); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ printf("*\n"); -+ } -+ -+ XSync(dpy, 1); -+ sleep(2); -+ XSync(dpy, 1); -+} -+ -+static void race_manager(Display *dpy, int width, int height, -+ unsigned int *attachments, int nattachments, -+ unsigned flags, const char *name) -+{ -+ Display *mgr = XOpenDisplay(NULL); -+ Window win; -+ XSetWindowAttributes attr; -+ int count, loop, n; -+ DRI2Buffer *buffers; -+ -+ if (flags & COMPOSITE && !has_composite(dpy)) -+ return; -+ -+ printf("%s(%s)\n", __func__, name); -+ -+ /* Be nasty and install a fullscreen window on top so that we -+ * can guarantee we do not get clipped by children. -+ */ -+ attr.override_redirect = 1; -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("DRI2SwapBuffers(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ -+ buffers = DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count); -+ if (count != nattachments) -+ return; -+ -+ free(buffers); -+ for (count = 0; count < loop; count++) -+ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); -+ XFlush(dpy); -+ XDestroyWindow(mgr, win); -+ XFlush(mgr); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("xcb_dri2_swap_buffers(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ -+ buffers = DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count); -+ if (count != nattachments) -+ return; -+ -+ free(buffers); -+ for (count = 0; count < loop; count++) -+ swap_buffers(dpy, win, divisors[n], attachments, nattachments); -+ XFlush(dpy); -+ XDestroyWindow(mgr, win); -+ XFlush(mgr); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("DRI2WaitMsc(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ uint64_t ignore, msc; -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); -+ msc++; -+ for (count = 0; count < loop; count++) { -+ xcb_discard_reply(c, -+ xcb_dri2_wait_msc(c, win, -+ upper_32_bits(msc), -+ lower_32_bits(msc), -+ 0, 0, 0, 0).sequence); -+ msc += divisors[n]; -+ } -+ XFlush(dpy); -+ XDestroyWindow(mgr, win); -+ XFlush(mgr); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } - - XSync(dpy, 1); -+ XSync(mgr, 1); - sleep(2); - XSync(dpy, 1); -+ XSync(mgr, 1); -+ -+ XCloseDisplay(mgr); -+} -+ -+static void race_close(int width, int height, -+ unsigned int *attachments, int nattachments, -+ unsigned flags, const char *name) -+{ -+ XSetWindowAttributes attr; -+ int count, loop, n; -+ -+ if (flags & COMPOSITE && !has_composite(NULL)) -+ return; -+ -+ printf("%s(%s)\n", __func__, name); -+ -+ /* Be nasty and install a fullscreen window on top so that we -+ * can guarantee we do not get clipped by children. -+ */ -+ attr.override_redirect = 1; -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("DRI2SwapBuffers(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ Display *dpy = XOpenDisplay(NULL); -+ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ free(DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count)); -+ if (count != nattachments) -+ return; -+ -+ for (count = 0; count < loop; count++) -+ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); -+ XCloseDisplay(dpy); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("xcb_dri2_swap_buffers(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ Display *dpy = XOpenDisplay(NULL); -+ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ free(DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count)); -+ if (count != nattachments) -+ return; -+ -+ for (count = 0; count < loop; count++) -+ swap_buffers(dpy, win, divisors[n], attachments, nattachments); -+ XCloseDisplay(dpy); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("DRI2WaitMsc(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ uint64_t ignore, msc; -+ Display *dpy = XOpenDisplay(NULL); -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); -+ msc++; -+ for (count = 0; count < loop; count++) { -+ xcb_discard_reply(c, -+ xcb_dri2_wait_msc(c, win, -+ upper_32_bits(msc), -+ lower_32_bits(msc), -+ 0, 0, 0, 0).sequence); -+ msc += divisors[n]; -+ } -+ XFlush(dpy); -+ XCloseDisplay(dpy); -+ printf("."); fflush(stdout); -+ } while (--loop); -+ printf("*\n"); -+ } -+} -+ -+static void race_client(int width, int height, -+ unsigned int *attachments, int nattachments, -+ unsigned flags, const char *name) -+{ -+ Display *mgr = XOpenDisplay(NULL); -+ XSetWindowAttributes attr; -+ int count, loop, n; -+ -+ if (flags & COMPOSITE && !has_composite(NULL)) -+ return; -+ -+ printf("%s(%s)\n", __func__, name); -+ -+ /* Be nasty and install a fullscreen window on top so that we -+ * can guarantee we do not get clipped by children. -+ */ -+ attr.override_redirect = 1; -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("DRI2SwapBuffers(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ Display *dpy = XOpenDisplay(NULL); -+ Window win; -+ -+ if (error_get()) { -+ XCloseDisplay(dpy); -+ printf("+"); fflush(stdout); -+ continue; -+ } -+ -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ free(DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count)); -+ if (count == nattachments) { -+ for (count = 0; count < loop; count++) -+ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); -+ } -+ -+ XFlush(dpy); -+ XKillClient(mgr, win); -+ XFlush(mgr); -+ -+ XCloseDisplay(dpy); -+ printf("."); fflush(stdout); -+ -+ error_put(); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("xcb_dri2_swap_buffers(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ Display *dpy = XOpenDisplay(NULL); -+ Window win; -+ -+ if (error_get()) { -+ XCloseDisplay(dpy); -+ printf("+"); fflush(stdout); -+ continue; -+ } -+ -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ free(DRI2GetBuffers(dpy, win, &width, &height, -+ attachments, nattachments, &count)); -+ if (count == nattachments) { -+ for (count = 0; count < loop; count++) -+ swap_buffers(dpy, win, divisors[n], attachments, nattachments); -+ } -+ -+ XFlush(dpy); -+ XKillClient(mgr, win); -+ XFlush(mgr); -+ -+ XCloseDisplay(dpy); -+ printf("."); fflush(stdout); -+ -+ error_put(); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ for (n = 0; n < N_DIVISORS; n++) { -+ printf("DRI2WaitMsc(divisor=%d)", divisors[n]); -+ fflush(stdout); -+ loop = 256 >> ffs(divisors[n]); -+ do { -+ Display *dpy = XOpenDisplay(NULL); -+ uint64_t ignore, msc; -+ xcb_connection_t *c; -+ Window win; -+ -+ if (error_get()) { -+ XCloseDisplay(dpy); -+ printf("+"); fflush(stdout); -+ continue; -+ } -+ -+ win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ if (flags & COMPOSITE) -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ XMapWindow(dpy, win); -+ -+ DRI2CreateDrawable(dpy, win); -+ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); -+ c = XGetXCBConnection(dpy); -+ msc++; -+ for (count = 0; count < loop; count++) { -+ xcb_discard_reply(c, -+ xcb_dri2_wait_msc(c, win, -+ upper_32_bits(msc), -+ lower_32_bits(msc), -+ 0, 0, 0, 0).sequence); -+ msc += divisors[n]; -+ } -+ -+ XFlush(dpy); -+ XKillClient(mgr, win); -+ XFlush(mgr); -+ -+ XCloseDisplay(dpy); -+ printf("."); fflush(stdout); -+ -+ error_put(); -+ } while (--loop); -+ printf("*\n"); -+ } -+ -+ XCloseDisplay(mgr); - } - - int main(void) -@@ -91,7 +790,10 @@ int main(void) - DRI2BufferFrontLeft, - }; - -- dpy = XOpenDisplay (NULL); -+ saved_io_error = XSetIOErrorHandler(io_error); -+ XSetErrorHandler(x_error); -+ -+ dpy = XOpenDisplay(NULL); - if (dpy == NULL) - return 77; - -@@ -101,13 +803,52 @@ int main(void) - - width = WidthOfScreen(DefaultScreenOfDisplay(dpy)); - height = HeightOfScreen(DefaultScreenOfDisplay(dpy)); -- run(dpy, width, height, attachments, 1, "fullscreen"); -- run(dpy, width, height, attachments, 2, "fullscreen (with front)"); -+ race_window(dpy, width, height, attachments, 1, 0, "fullscreen"); -+ race_window(dpy, width, height, attachments, 1, COMPOSITE, "composite fullscreen"); -+ race_window(dpy, width, height, attachments, 2, 0, "fullscreen (with front)"); -+ race_window(dpy, width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); -+ -+ race_resize(dpy, width, height, attachments, 1, 0, ""); -+ race_resize(dpy, width, height, attachments, 1, COMPOSITE, "composite"); -+ race_resize(dpy, width, height, attachments, 2, 0, "with front"); -+ race_resize(dpy, width, height, attachments, 2, COMPOSITE, "composite with front"); -+ -+ race_manager(dpy, width, height, attachments, 1, 0, "fullscreen"); -+ race_manager(dpy, width, height, attachments, 1, COMPOSITE, "composite fullscreen"); -+ race_manager(dpy, width, height, attachments, 2, 0, "fullscreen (with front)"); -+ race_manager(dpy, width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); -+ -+ race_close(width, height, attachments, 1, 0, "fullscreen"); -+ race_close(width, height, attachments, 1, COMPOSITE, "composite fullscreen"); -+ race_close(width, height, attachments, 2, 0, "fullscreen (with front)"); -+ race_close(width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); -+ -+ race_client(width, height, attachments, 1, 0, "fullscreen"); -+ race_client(width, height, attachments, 1, COMPOSITE, "composite fullscreen"); -+ race_client(width, height, attachments, 2, 0, "fullscreen (with front)"); -+ race_client(width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); - - width /= 2; - height /= 2; -- run(dpy, width, height, attachments, 1, "windowed"); -- run(dpy, width, height, attachments, 2, "windowed (with front)"); -+ race_window(dpy, width, height, attachments, 1, 0, "windowed"); -+ race_window(dpy, width, height, attachments, 1, COMPOSITE, "composite windowed"); -+ race_window(dpy, width, height, attachments, 2, 0, "windowed (with front)"); -+ race_window(dpy, width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); -+ -+ race_manager(dpy, width, height, attachments, 1, 0, "windowed"); -+ race_manager(dpy, width, height, attachments, 1, COMPOSITE, "composite windowed"); -+ race_manager(dpy, width, height, attachments, 2, 0, "windowed (with front)"); -+ race_manager(dpy, width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); -+ -+ race_close(width, height, attachments, 1, 0, "windowed"); -+ race_close(width, height, attachments, 1, COMPOSITE, "composite windowed"); -+ race_close(width, height, attachments, 2, 0, "windowed (with front)"); -+ race_close(width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); -+ -+ race_client(width, height, attachments, 1, 0, "windowed"); -+ race_client(width, height, attachments, 1, COMPOSITE, "composite windowed"); -+ race_client(width, height, attachments, 2, 0, "windowed (with front)"); -+ race_client(width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); - - return 0; - } -diff --git a/test/dri2-speed.c b/test/dri2-speed.c -new file mode 100644 -index 00000000..87b9d0b6 ---- /dev/null -+++ b/test/dri2-speed.c -@@ -0,0 +1,342 @@ -+/* -+ * Copyright (c) 2015 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "dri2.h" -+ -+static int _x_error_occurred; -+ -+static int -+_check_error_handler(Display *display, -+ XErrorEvent *event) -+{ -+ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", -+ DisplayString(display), -+ event->serial, -+ event->error_code, -+ event->request_code, -+ event->minor_code); -+ _x_error_occurred++; -+ return False; /* ignored */ -+} -+ -+static double elapsed(const struct timespec *start, -+ const struct timespec *end) -+{ -+ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; -+} -+ -+static void run(Display *dpy, Window win, const char *name) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ struct timespec start, end; -+ int n, completed = 0; -+ -+ _x_error_occurred = 0; -+ -+ clock_gettime(CLOCK_MONOTONIC, &start); -+ do { -+ for (n = 0; n < 1000; n++) { -+ unsigned int attachments[] = { DRI2BufferBackLeft }; -+ unsigned int seq[2]; -+ -+ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, -+ 0, 0, 0, 0, 0, 0).sequence; -+ -+ -+ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, -+ 1, 1, attachments).sequence; -+ -+ xcb_flush(c); -+ xcb_discard_reply(c, seq[0]); -+ xcb_discard_reply(c, seq[1]); -+ completed++; -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ } while (end.tv_sec < start.tv_sec + 10); -+ -+ XSync(dpy, True); -+ if (_x_error_occurred) -+ abort(); -+ -+ printf("%s: Completed %d swaps in %.1fs, %.3fus each (%.1f FPS)\n", -+ name, completed, elapsed(&start, &end) / 1000000, -+ elapsed(&start, &end) / completed, -+ completed / (elapsed(&start, &end) / 1000000)); -+} -+ -+static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) -+{ -+ XRRScreenResources *res; -+ -+ res = XRRGetScreenResourcesCurrent(dpy, window); -+ if (res == NULL) -+ res = XRRGetScreenResources(dpy, window); -+ -+ return res; -+} -+ -+static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) -+{ -+ int i; -+ -+ for (i = 0; i < res->nmode; i++) { -+ if (res->modes[i].id == id) -+ return &res->modes[i]; -+ } -+ -+ return NULL; -+} -+ -+static int dri2_open(Display *dpy) -+{ -+ drm_auth_t auth; -+ char *driver, *device; -+ int fd; -+ -+ if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device)) -+ return -1; -+ -+ printf ("Connecting to %s driver on %s\n", driver, device); -+ -+ fd = open(device, O_RDWR); -+ if (fd < 0) -+ return -1; -+ -+ if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth)) -+ return -1; -+ -+ if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic)) -+ return -1; -+ -+ return fd; -+} -+ -+static void fullscreen(Display *dpy, Window win) -+{ -+ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); -+ XChangeProperty(dpy, win, -+ XInternAtom(dpy, "_NET_WM_STATE", False), -+ XA_ATOM, 32, PropModeReplace, -+ (unsigned char *)&atom, 1); -+} -+ -+static int has_composite(Display *dpy) -+{ -+ int event, error; -+ int major, minor; -+ -+ if (!XDamageQueryExtension (dpy, &event, &error)) -+ return 0; -+ -+ if (!XCompositeQueryExtension(dpy, &event, &error)) -+ return 0; -+ -+ XCompositeQueryVersion(dpy, &major, &minor); -+ -+ return major > 0 || minor >= 4; -+} -+ -+int main(void) -+{ -+ Display *dpy; -+ Window root, win; -+ XRRScreenResources *res; -+ XRRCrtcInfo **original_crtc; -+ XSetWindowAttributes attr; -+ int i, j, fd; -+ -+ attr.override_redirect = 1; -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ return 77; -+ -+ fd = dri2_open(dpy); -+ if (fd < 0) -+ return 77; -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSDisable(dpy); -+ -+ root = DefaultRootWindow(dpy); -+ -+ signal(SIGALRM, SIG_IGN); -+ XSetErrorHandler(_check_error_handler); -+ -+ res = NULL; -+ if (XRRQueryVersion(dpy, &i, &i)) -+ res = _XRRGetScreenResourcesCurrent(dpy, root); -+ if (res == NULL) -+ return 77; -+ -+ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); -+ for (i = 0; i < res->ncrtc; i++) -+ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); -+ -+ printf("noutput=%d, ncrtc=%d\n", res->noutput, res->ncrtc); -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ -+ DRI2CreateDrawable(dpy, root); -+ DRI2SwapInterval(dpy, root, 0); -+ run(dpy, root, "off"); -+ XSync(dpy, True); -+ -+ for (i = 0; i < res->noutput; i++) { -+ XRROutputInfo *output; -+ XRRModeInfo *mode; -+ -+ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); -+ if (output == NULL) -+ continue; -+ -+ mode = NULL; -+ if (res->nmode) -+ mode = lookup_mode(res, output->modes[0]); -+ -+ for (j = 0; mode && j < 2*output->ncrtc; j++) { -+ int c = j; -+ if (c >= output->ncrtc) -+ c = 2*output->ncrtc - j - 1; -+ -+ printf("[%d, %d] -- OUTPUT:%ld, CRTC:%ld: %dx%d\n", -+ i, c, (long)res->outputs[i], (long)output->crtcs[c], -+ mode->width, mode->height); -+ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, -+ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); -+ -+ run(dpy, root, "root"); -+ XSync(dpy, True); -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width, mode->height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ DRI2CreateDrawable(dpy, win); -+ DRI2SwapInterval(dpy, win, 0); -+ fullscreen(dpy, win); -+ XMapWindow(dpy, win); -+ run(dpy, win, "fullscreen"); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width, mode->height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ DRI2CreateDrawable(dpy, win); -+ DRI2SwapInterval(dpy, win, 0); -+ XMapWindow(dpy, win); -+ run(dpy, win, "windowed"); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ if (has_composite(dpy)) { -+ Damage damage; -+ -+ _x_error_occurred = 0; -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width, mode->height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ damage = XDamageCreate(dpy, win, XDamageReportRawRectangles); -+ DRI2CreateDrawable(dpy, win); -+ DRI2SwapInterval(dpy, win, 0); -+ XMapWindow(dpy, win); -+ XSync(dpy, True); -+ if (!_x_error_occurred) -+ run(dpy, win, "composited"); -+ XDamageDestroy(dpy, damage); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ } -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width/2, mode->height/2, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ DRI2CreateDrawable(dpy, win); -+ DRI2SwapInterval(dpy, win, 0); -+ XMapWindow(dpy, win); -+ run(dpy, win, "half"); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ } -+ -+ XRRFreeOutputInfo(output); -+ } -+ -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ original_crtc[i]->x, -+ original_crtc[i]->y, -+ original_crtc[i]->mode, -+ original_crtc[i]->rotation, -+ original_crtc[i]->outputs, -+ original_crtc[i]->noutput); -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSEnable(dpy); -+ return 0; -+} -diff --git a/test/dri2-test.c b/test/dri2-test.c -index dd4179f3..bdf01f38 100644 ---- a/test/dri2-test.c -+++ b/test/dri2-test.c -@@ -6,6 +6,10 @@ - #include - #include - #include -+#include -+#include -+#include -+#include - #include - #include - #include -@@ -18,6 +22,8 @@ - - #define COUNT 60 - -+static int prime[] = { 0, 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 27, 29, 31, 37, 41, 43, 47, 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131 }; -+ - static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) - { - XRRScreenResources *res; -@@ -101,16 +107,41 @@ static uint64_t check_msc(Display *dpy, Window win, uint64_t last_msc) - return current_msc; - } - -+static void wait_next_vblank(Display *dpy, Window win) -+{ -+ uint64_t msc, ust, sbc; -+ DRI2WaitMSC(dpy, win, 0, 1, 0, &ust, &msc, &sbc); -+} -+ -+static void swap_buffers(xcb_connection_t *c, Window win, -+ unsigned int *attachments, int nattachments) -+{ -+ unsigned int seq[2]; -+ -+ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, -+ 0, 0, 0, 0, 0, 0).sequence; -+ -+ -+ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, -+ nattachments, nattachments, -+ attachments).sequence; -+ -+ xcb_flush(c); -+ xcb_discard_reply(c, seq[0]); -+ xcb_discard_reply(c, seq[1]); -+} -+ - static void run(Display *dpy, int width, int height, - unsigned int *attachments, int nattachments, - const char *name) - { -+ xcb_connection_t *c = XGetXCBConnection(dpy); - Window win; - XSetWindowAttributes attr; -- int count; - DRI2Buffer *buffers; - struct timespec start, end; -- uint64_t msc; -+ uint64_t start_msc, end_msc; -+ int modulus, remainder, count; - - /* Be nasty and install a fullscreen window on top so that we - * can guarantee we do not get clipped by children. -@@ -125,42 +156,99 @@ static void run(Display *dpy, int width, int height, - XMapWindow(dpy, win); - - DRI2CreateDrawable(dpy, win); -- msc = check_msc(dpy, win, 0); -+ DRI2SwapInterval(dpy, win, 1); -+ start_msc = check_msc(dpy, win, 0); - - buffers = DRI2GetBuffers(dpy, win, &width, &height, - attachments, nattachments, &count); - if (count != nattachments) - return; - -- msc = check_msc(dpy, win, msc); -+ swap_buffers(c, win, attachments, nattachments); -+ start_msc = check_msc(dpy, win, start_msc); - clock_gettime(CLOCK_MONOTONIC, &start); - for (count = 0; count < COUNT; count++) -- DRI2SwapBuffers(dpy, win, 0, 0, 0); -- msc = check_msc(dpy, win, msc); -+ swap_buffers(c, win, attachments, nattachments); -+ end_msc = check_msc(dpy, win, start_msc); - clock_gettime(CLOCK_MONOTONIC, &end); -- printf("%d %s (%dx%d) swaps in %fs.\n", -- count, name, width, height, elapsed(&start, &end)); -+ printf("%d [%ld] %s (%dx%d) swaps in %fs.\n", -+ count, (long)(end_msc - start_msc), -+ name, width, height, elapsed(&start, &end)); - -- msc = check_msc(dpy, win, msc); -+ swap_buffers(c, win, attachments, nattachments); -+ start_msc = check_msc(dpy, win, end_msc); - clock_gettime(CLOCK_MONOTONIC, &start); - for (count = 0; count < COUNT; count++) - dri2_copy_swap(dpy, win, width, height, nattachments == 2); -- msc = check_msc(dpy, win, msc); -+ end_msc = check_msc(dpy, win, start_msc); - clock_gettime(CLOCK_MONOTONIC, &end); - -- printf("%d %s (%dx%d) blits in %fs.\n", -- count, name, width, height, elapsed(&start, &end)); -+ printf("%d [%ld] %s (%dx%d) blits in %fs.\n", -+ count, (long)(end_msc - start_msc), -+ name, width, height, elapsed(&start, &end)); - - DRI2SwapInterval(dpy, win, 0); -+ wait_next_vblank(dpy, win); -+ -+ swap_buffers(c, win, attachments, nattachments); -+ start_msc = check_msc(dpy, win, end_msc); -+ clock_gettime(CLOCK_MONOTONIC, &start); -+ for (count = 0; count < COUNT; count++) -+ swap_buffers(c, win, attachments, nattachments); -+ end_msc = check_msc(dpy, win, start_msc); -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ printf("%d [%ld] %s (%dx%d) vblank=0 swaps in %fs.\n", -+ count, (long)(end_msc - start_msc), -+ name, width, height, elapsed(&start, &end)); - -- msc = check_msc(dpy, win, msc); -+ start_msc = check_msc(dpy, win, end_msc); - clock_gettime(CLOCK_MONOTONIC, &start); - for (count = 0; count < COUNT; count++) -- DRI2SwapBuffers(dpy, win, 0, 0, 0); -- msc = check_msc(dpy, win, msc); -+ wait_next_vblank(dpy, win); -+ end_msc = check_msc(dpy, win, start_msc); - clock_gettime(CLOCK_MONOTONIC, &end); -- printf("%d %s (%dx%d) vblank=0 swaps in %fs.\n", -- count, name, width, height, elapsed(&start, &end)); -+ printf("%d [%ld] %s waits in %fs.\n", -+ count, (long)(end_msc - start_msc), -+ name, elapsed(&start, &end)); -+ -+ printf("Testing past & future waits\n"); -+ for (modulus = 1; modulus <= 128; modulus <<= 1) { -+ for (count = 0; prime[count] < modulus; count++) { -+ uint64_t msc, ust, sbc; -+ uint64_t target; -+ -+ remainder = prime[count]; -+ -+ DRI2WaitMSC(dpy, win, 0, 1, 0, &ust, &msc, &sbc); -+ -+ target = msc + modulus + 1; -+ target &= -modulus; -+ target += remainder; -+ -+ DRI2WaitMSC(dpy, win, target, modulus, remainder, -+ &ust, &msc, &sbc); -+ if (msc != target) { -+ printf("Missed future MSC (%d, %d): expected=%lld, found=%lld\n", -+ modulus, remainder, -+ (long long)target, (long long)msc); -+ } -+ -+ target = msc; -+ target &= -modulus; -+ target += remainder; -+ if (target <= msc) -+ target += modulus; -+ -+ DRI2WaitMSC(dpy, win, msc, modulus, remainder, -+ &ust, &msc, &sbc); -+ -+ if (msc != target) { -+ printf("Missed past MSC (%d, %d): expected=%lld, found=%lld\n", -+ modulus, remainder, -+ (long long)target, (long long)msc); -+ } -+ } -+ } - - XDestroyWindow(dpy, win); - free(buffers); -diff --git a/test/dri3-test.c b/test/dri3-test.c -index c66da313..78e105a8 100644 ---- a/test/dri3-test.c -+++ b/test/dri3-test.c -@@ -93,14 +93,9 @@ static const struct pci_id_match ids[] = { - INTEL_IVB_D_IDS(070), - INTEL_IVB_M_IDS(070), - -- INTEL_HSW_D_IDS(075), -- INTEL_HSW_M_IDS(075), -- -- INTEL_VLV_D_IDS(071), -- INTEL_VLV_M_IDS(071), -- -- INTEL_BDW_D_IDS(0100), -- INTEL_BDW_M_IDS(0100), -+ INTEL_HSW_IDS(075), -+ INTEL_VLV_IDS(071), -+ INTEL_BDW_IDS(0100), - }; - - static int i915_gen(int device) -@@ -1020,6 +1015,67 @@ fail: - return 1; - } - -+static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) -+{ -+ struct drm_i915_gem_set_tiling set_tiling; -+ -+ set_tiling.handle = handle; -+ set_tiling.tiling_mode = tiling; -+ set_tiling.stride = stride; -+ -+ return drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0; -+} -+ -+static int test_tiling(Display *dpy, int device) -+{ -+ Window root = RootWindow(dpy, DefaultScreen(dpy)); -+ const int tiling[] = { I915_TILING_NONE, I915_TILING_X, I915_TILING_Y }; -+ int line = -1; -+ int t; -+ -+ _x_error_occurred = 0; -+ -+ for (t = 0; t < sizeof(tiling)/sizeof(tiling[0]); t++) { -+ uint32_t src; -+ int src_fd; -+ Pixmap src_pix; -+ -+ src = gem_create(device, 4*4096); -+ if (!src) { -+ line = __LINE__; -+ goto fail; -+ } -+ -+ gem_set_tiling(device, src, tiling[t], 512); -+ -+ src_fd = gem_export(device, src); -+ if (src_fd < 0) { -+ line = __LINE__; -+ goto fail; -+ } -+ -+ src_pix = dri3_create_pixmap(dpy, root, -+ 128, 32, 32, -+ src_fd, 32, 512, 4*4096); -+ XSync(dpy, True); -+ if (_x_error_occurred) { -+ line = __LINE__; -+ goto fail; -+ } -+ XFreePixmap(dpy, src_pix); -+ _x_error_occurred = 0; -+ -+ close(src_fd); -+ gem_close(device, src); -+ } -+ -+ return 0; -+ -+fail: -+ printf("%s failed with tiling %d, line %d\n", __func__, tiling[t], line); -+ return 1; -+} -+ - static int - _check_error_handler(Display *display, - XErrorEvent *event) -@@ -1060,6 +1116,7 @@ int main(void) - - error += test_bad_size(dpy, device); - error += test_bad_pitch(dpy, device); -+ error += test_tiling(dpy, device); - - error += test_shm(dpy, device, 400, 300); - error += test_shm(dpy, device, 300, 400); -diff --git a/test/dri3.c b/test/dri3.c -index 45f3285c..e5644629 100644 ---- a/test/dri3.c -+++ b/test/dri3.c -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - - #include "dri3.h" - -@@ -109,12 +110,45 @@ void dri3_fence_free(Display *dpy, struct dri3_fence *fence) - xcb_sync_destroy_fence(c, fence->xid); - } - -+static void dri3_query_version(xcb_connection_t *c, int *major, int *minor) -+{ -+ xcb_dri3_query_version_reply_t *reply; -+ -+ reply = xcb_dri3_query_version_reply(c, -+ xcb_dri3_query_version(c, -+ XCB_DRI3_MAJOR_VERSION, -+ XCB_DRI3_MINOR_VERSION), -+ NULL); -+ if (reply != NULL) { -+ *major = reply->major_version; -+ *minor = reply->minor_version; -+ free(reply); -+ } -+} -+ -+static int dri3_exists(xcb_connection_t *c) -+{ -+ const xcb_query_extension_reply_t *ext; -+ int major, minor; -+ -+ major = minor = -1; -+ -+ ext = xcb_get_extension_data(c, &xcb_dri3_id); -+ if (ext != NULL && ext->present) -+ dri3_query_version(c, &major, &minor); -+ -+ return major >= 0; -+} -+ - int dri3_open__full(Display *dpy, Window root, unsigned provider) - { - xcb_connection_t *c = XGetXCBConnection(dpy); - xcb_dri3_open_cookie_t cookie; - xcb_dri3_open_reply_t *reply; - -+ if (!dri3_exists(c)) -+ return -1; -+ - cookie = xcb_dri3_open(c, root, provider); - reply = xcb_dri3_open_reply(c, cookie, NULL); - -diff --git a/test/present-race.c b/test/present-race.c -new file mode 100644 -index 00000000..b2b6aa2b ---- /dev/null -+++ b/test/present-race.c -@@ -0,0 +1,484 @@ -+/* -+ * Copyright (c) 2014 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if HAVE_X11_EXTENSIONS_SHMPROTO_H -+#include -+#elif HAVE_X11_EXTENSIONS_SHMSTR_H -+#include -+#else -+#error Failed to find the right header for X11 MIT-SHM protocol definitions -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include "dri3.h" -+ -+static int _x_error_occurred; -+static uint32_t stamp; -+ -+static int -+_check_error_handler(Display *display, -+ XErrorEvent *event) -+{ -+ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", -+ DisplayString(display), -+ event->serial, -+ event->error_code, -+ event->request_code, -+ event->minor_code); -+ _x_error_occurred++; -+ return False; /* ignored */ -+} -+ -+static int has_composite(Display *dpy) -+{ -+ int event, error; -+ int major, minor; -+ -+ if (!XCompositeQueryExtension(dpy, &event, &error)) -+ return 0; -+ -+ XCompositeQueryVersion(dpy, &major, &minor); -+ -+ return major > 0 || minor >= 4; -+} -+ -+static void *setup_msc(Display *dpy, Window win) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_void_cookie_t cookie; -+ uint32_t id = xcb_generate_id(c); -+ xcb_generic_error_t *error; -+ void *q; -+ -+ cookie = xcb_present_select_input_checked(c, id, win, XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); -+ q = xcb_register_for_special_xge(c, &xcb_present_id, id, &stamp); -+ -+ error = xcb_request_check(c, cookie); -+ assert(error == NULL); -+ -+ return q; -+} -+ -+static void teardown_msc(Display *dpy, void *q) -+{ -+ xcb_unregister_for_special_event(XGetXCBConnection(dpy), q); -+} -+ -+static uint64_t wait_vblank(Display *dpy, Window win) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ static uint32_t serial = 1; -+ uint64_t msc = 0; -+ int complete = 0; -+ void *q; -+ -+ if (win == 0) -+ win = DefaultRootWindow(dpy); -+ -+ q = setup_msc(dpy, win); -+ -+ xcb_present_notify_msc(c, win, serial ^ 0xdeadbeef, 0, 1, 0); -+ xcb_flush(c); -+ -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && -+ ce->serial == (serial ^ 0xdeadbeef)) { -+ msc = ce->msc; -+ complete = 1; -+ } -+ free(ev); -+ } while (!complete); -+ -+ if (++serial == 0) -+ serial = 1; -+ -+ teardown_msc(dpy, q); -+ -+ return msc; -+} -+ -+static int test_basic(Display *dpy, int dummy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ XSetWindowAttributes attr; -+ Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); -+ Pixmap pixmap; -+ struct dri3_fence fence; -+ Window root, win; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, ret = 1; -+ const char *phase; -+ uint64_t msc; -+ -+ root = DefaultRootWindow(dpy); -+ XGetGeometry(dpy, root, -+ &win, &x, &y, -+ &width, &height, &border, &depth); -+ -+ _x_error_occurred = 0; -+ attr.override_redirect = 1; -+ switch (dummy) { -+ case 0: -+ win = root; -+ phase = "root"; -+ break; -+ case 1: -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, depth, -+ InputOutput, visual, -+ CWOverrideRedirect, &attr); -+ phase = "fullscreen"; -+ break; -+ case 2: -+ width /= 2; -+ height /= 2; -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, depth, -+ InputOutput, visual, -+ CWOverrideRedirect, &attr); -+ phase = "window"; -+ break; -+ case 3: -+ if (!has_composite(dpy)) -+ return 0; -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ phase = "composite"; -+ break; -+ -+ default: -+ phase = "broken"; -+ win = root; -+ abort(); -+ break; -+ } -+ -+ XMapWindow(dpy, win); -+ XSync(dpy, True); -+ if (_x_error_occurred) -+ return 1; -+ -+ if (dri3_create_fence(dpy, win, &fence)) -+ return 0; -+ -+ printf("%s: Testing basic flip: %dx%d\n", phase, width, height); -+ fflush(stdout); -+ _x_error_occurred = 0; -+ -+ xshmfence_reset(fence.addr); -+ msc = wait_vblank(dpy, win); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ fence.xid, -+ XCB_PRESENT_OPTION_NONE, -+ (msc + 64) & -64, /* target msc */ -+ 64, /* divisor */ -+ 32, /* remainder */ -+ 0, NULL); -+ XFreePixmap(dpy, pixmap); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, /* sync fence */ -+ XCB_PRESENT_OPTION_NONE, -+ (msc + 64) & -64, /* target msc */ -+ 64, /* divisor */ -+ 48, /* remainder */ -+ 0, NULL); -+ XFreePixmap(dpy, pixmap); -+ XDestroyWindow(dpy, win); -+ XFlush(dpy); -+ -+ ret = !!xshmfence_await(fence.addr); -+ dri3_fence_free(dpy, &fence); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+} -+ -+static int test_race(Display *dpy, int dummy) -+{ -+ Display *mgr = XOpenDisplay(NULL); -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ XSetWindowAttributes attr; -+ Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); -+ Pixmap pixmap; -+ struct dri3_fence fence; -+ Window root, win; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, ret = 1; -+ const char *phase; -+ uint64_t msc; -+ -+ root = DefaultRootWindow(dpy); -+ XGetGeometry(dpy, root, -+ &win, &x, &y, -+ &width, &height, &border, &depth); -+ -+ _x_error_occurred = 0; -+ attr.override_redirect = 1; -+ switch (dummy) { -+ case 0: -+ win = root; -+ phase = "root"; -+ break; -+ case 1: -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, depth, -+ InputOutput, visual, -+ CWOverrideRedirect, &attr); -+ phase = "fullscreen"; -+ break; -+ case 2: -+ width /= 2; -+ height /= 2; -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, depth, -+ InputOutput, visual, -+ CWOverrideRedirect, &attr); -+ phase = "window"; -+ break; -+ case 3: -+ if (!has_composite(dpy)) -+ return 0; -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ phase = "composite"; -+ break; -+ -+ default: -+ phase = "broken"; -+ win = root; -+ abort(); -+ break; -+ } -+ -+ XMapWindow(dpy, win); -+ XSync(dpy, True); -+ if (_x_error_occurred) -+ return 1; -+ -+ if (dri3_create_fence(dpy, win, &fence)) -+ return 0; -+ -+ printf("%s: Testing race with manager: %dx%d\n", phase, width, height); -+ fflush(stdout); -+ _x_error_occurred = 0; -+ -+ xshmfence_reset(fence.addr); -+ msc = wait_vblank(dpy, win); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ fence.xid, -+ XCB_PRESENT_OPTION_NONE, -+ (msc + 64) & -64, /* target msc */ -+ 64, /* divisor */ -+ 32, /* remainder */ -+ 0, NULL); -+ XFreePixmap(dpy, pixmap); -+ -+ XFlush(dpy); -+ XDestroyWindow(mgr, win); -+ XFlush(mgr); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, /* sync fence */ -+ XCB_PRESENT_OPTION_NONE, -+ (msc + 64) & -64, /* target msc */ -+ 64, /* divisor */ -+ 48, /* remainder */ -+ 0, NULL); -+ XFreePixmap(dpy, pixmap); -+ XFlush(dpy); -+ -+ ret = !!xshmfence_await(fence.addr); -+ dri3_fence_free(dpy, &fence); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ XCloseDisplay(mgr); -+ -+ return ret; -+} -+ -+static int has_present(Display *dpy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_generic_error_t *error = NULL; -+ void *reply; -+ -+ reply = xcb_xfixes_query_version_reply(c, -+ xcb_xfixes_query_version(c, -+ XCB_XFIXES_MAJOR_VERSION, -+ XCB_XFIXES_MINOR_VERSION), -+ &error); -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "XFixes not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ reply = xcb_dri3_query_version_reply(c, -+ xcb_dri3_query_version(c, -+ XCB_DRI3_MAJOR_VERSION, -+ XCB_DRI3_MINOR_VERSION), -+ &error); -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "DRI3 not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ reply = xcb_present_query_version_reply(c, -+ xcb_present_query_version(c, -+ XCB_PRESENT_MAJOR_VERSION, -+ XCB_PRESENT_MINOR_VERSION), -+ &error); -+ -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+int main(void) -+{ -+ Display *dpy; -+ int dummy; -+ int error = 0; -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ return 77; -+ -+ if (!has_present(dpy)) -+ return 77; -+ -+ if (DPMSQueryExtension(dpy, &dummy, &dummy)) -+ DPMSDisable(dpy); -+ -+ signal(SIGALRM, SIG_IGN); -+ XSetErrorHandler(_check_error_handler); -+ -+ for (dummy = 0; dummy <= 3; dummy++) { -+ error += test_basic(dpy, dummy); -+ error += test_race(dpy, dummy); -+ } -+ -+ if (DPMSQueryExtension(dpy, &dummy, &dummy)) -+ DPMSEnable(dpy); -+ return !!error; -+} -diff --git a/test/present-speed.c b/test/present-speed.c -new file mode 100644 -index 00000000..eccde931 ---- /dev/null -+++ b/test/present-speed.c -@@ -0,0 +1,1015 @@ -+/* -+ * Copyright (c) 2015 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "dri3.h" -+ -+static int _x_error_occurred; -+static uint32_t stamp; -+ -+struct list { -+ struct list *next, *prev; -+}; -+ -+static void -+list_init(struct list *list) -+{ -+ list->next = list->prev = list; -+} -+ -+static inline void -+__list_add(struct list *entry, -+ struct list *prev, -+ struct list *next) -+{ -+ next->prev = entry; -+ entry->next = next; -+ entry->prev = prev; -+ prev->next = entry; -+} -+ -+static inline void -+list_add(struct list *entry, struct list *head) -+{ -+ __list_add(entry, head, head->next); -+} -+ -+static inline void -+__list_del(struct list *prev, struct list *next) -+{ -+ next->prev = prev; -+ prev->next = next; -+} -+ -+static inline void -+_list_del(struct list *entry) -+{ -+ __list_del(entry->prev, entry->next); -+} -+ -+static inline void -+list_move(struct list *list, struct list *head) -+{ -+ if (list->prev != head) { -+ _list_del(list); -+ list_add(list, head); -+ } -+} -+ -+#define __container_of(ptr, sample, member) \ -+ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) -+ -+#define list_for_each_entry(pos, head, member) \ -+ for (pos = __container_of((head)->next, pos, member); \ -+ &pos->member != (head); \ -+ pos = __container_of(pos->member.next, pos, member)) -+ -+static int -+_check_error_handler(Display *display, -+ XErrorEvent *event) -+{ -+ if (_x_error_occurred < 0) -+ return True; -+ -+ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", -+ DisplayString(display), -+ event->serial, -+ event->error_code, -+ event->request_code, -+ event->minor_code); -+ _x_error_occurred++; -+ return False; /* ignored */ -+} -+ -+static double elapsed(const struct timespec *start, -+ const struct timespec *end) -+{ -+ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; -+} -+ -+struct buffer { -+ struct list link; -+ Pixmap pixmap; -+ struct dri3_fence fence; -+ int fd; -+ int busy; -+ int id; -+}; -+ -+#define DRI3 1 -+#define NOCOPY 2 -+#define ASYNC 4 -+static void run(Display *dpy, Window win, const char *name, unsigned options) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ struct timespec start, end; -+#define N_BACK 8 -+ char test_name[128]; -+ struct buffer buffer[N_BACK]; -+ struct list mru; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ unsigned present_flags = 0; -+ xcb_xfixes_region_t update = 0; -+ int completed = 0; -+ int queued = 0; -+ uint32_t eid = 0; -+ void *Q = NULL; -+ int i, n; -+ -+ list_init(&mru); -+ -+ XGetGeometry(dpy, win, -+ &root, &i, &n, &width, &height, &border, &depth); -+ -+ _x_error_occurred = 0; -+ -+ for (n = 0; n < N_BACK; n++) { -+ buffer[n].pixmap = xcb_generate_id(c); -+ xcb_create_pixmap(c, depth, buffer[n].pixmap, win, -+ width, height); -+ buffer[n].fence.xid = 0; -+ buffer[n].fd = -1; -+ buffer[n].id = n; -+ if (options & DRI3) { -+ xcb_dri3_buffer_from_pixmap_reply_t *reply; -+ int *fds; -+ -+ if (dri3_create_fence(dpy, win, &buffer[n].fence)) -+ return; -+ -+ reply = xcb_dri3_buffer_from_pixmap_reply (c, -+ xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap), -+ NULL); -+ if (reply == NULL) -+ return; -+ -+ fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply); -+ buffer[n].fd = fds[0]; -+ free(reply); -+ -+ /* start idle */ -+ xshmfence_trigger(buffer[n].fence.addr); -+ } -+ buffer[n].busy = 0; -+ list_add(&buffer[n].link, &mru); -+ } -+ if (options & ASYNC) -+ present_flags |= XCB_PRESENT_OPTION_ASYNC; -+ if (options & NOCOPY) { -+ update = xcb_generate_id(c); -+ xcb_xfixes_create_region(c, update, 0, NULL); -+ present_flags |= XCB_PRESENT_OPTION_COPY; -+ } -+ -+ if (!(options & DRI3)) { -+ eid = xcb_generate_id(c); -+ xcb_present_select_input(c, eid, win, -+ (options & NOCOPY ? 0 : XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY) | -+ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); -+ Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp); -+ } -+ -+ clock_gettime(CLOCK_MONOTONIC, &start); -+ do { -+ for (n = 0; n < 1000; n++) { -+ struct buffer *tmp, *b = NULL; -+retry: -+ list_for_each_entry(tmp, &mru, link) { -+ if (tmp->fence.xid) -+ tmp->busy = !xshmfence_query(tmp->fence.addr); -+ if (!tmp->busy) { -+ b = tmp; -+ break; -+ } -+ } -+ if (options & DRI3) { -+ if (b == NULL) -+ goto retry; -+ -+ xshmfence_reset(b->fence.addr); -+ queued--; -+ completed++; -+ } else while (b == NULL) { -+ xcb_present_generic_event_t *ev; -+ -+ ev = (xcb_present_generic_event_t *) -+ xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ abort(); -+ -+ do { -+ switch (ev->evtype) { -+ case XCB_PRESENT_COMPLETE_NOTIFY: -+ completed++; -+ queued--; -+ break; -+ -+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: -+ { -+ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; -+ assert(ie->serial < N_BACK); -+ buffer[ie->serial].busy = 0; -+ if (b == NULL) -+ b = &buffer[ie->serial]; -+ break; -+ } -+ } -+ free(ev); -+ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); -+ } -+ -+ b->busy = (options & NOCOPY) == 0; -+ xcb_present_pixmap(c, win, b->pixmap, b->id, -+ 0, /* valid */ -+ update, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ b->fence.xid, -+ present_flags, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ list_move(&b->link, &mru); -+ queued++; -+ xcb_flush(c); -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ } while (end.tv_sec < start.tv_sec + 10); -+ -+ if (options & DRI3) { -+ struct buffer *b; -+ XID pixmap; -+ -+ pixmap = xcb_generate_id(c); -+ xcb_create_pixmap(c, depth, pixmap, win, width, height); -+ xcb_present_pixmap(c, win, pixmap, 0xdeadbeef, -+ 0, /* valid */ -+ None, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ 0, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ list_for_each_entry(b, &mru, link) -+ xshmfence_await(b->fence.addr); -+ -+ xcb_free_pixmap(c, pixmap); -+ completed += queued; -+ } else while (queued) { -+ xcb_present_generic_event_t *ev; -+ -+ ev = (xcb_present_generic_event_t *) -+ xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ abort(); -+ -+ do { -+ switch (ev->evtype) { -+ case XCB_PRESENT_COMPLETE_NOTIFY: -+ completed++; -+ queued--; -+ break; -+ -+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: -+ break; -+ } -+ free(ev); -+ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ -+ if (update) -+ xcb_xfixes_destroy_region(c, update); -+ for (n = 0; n < N_BACK; n++) { -+ if (buffer[n].fence.xid) -+ dri3_fence_free(dpy, &buffer[n].fence); -+ if (buffer[n].fd != -1) -+ close(buffer[n].fd); -+ xcb_free_pixmap(c, buffer[n].pixmap); -+ } -+ -+ if (Q) { -+ xcb_discard_reply(c, xcb_present_select_input_checked(c, eid, win, 0).sequence); -+ XSync(dpy, True); -+ xcb_unregister_for_special_event(c, Q); -+ } -+ -+ test_name[0] = '\0'; -+ if (options) { -+ snprintf(test_name, sizeof(test_name), "(%s%s%s )", -+ options & NOCOPY ? " no-copy" : "", -+ options & DRI3 ? " dri3" : "", -+ options & ASYNC ? " async" : ""); -+ } -+ printf("%s%s: Completed %d presents in %.1fs, %.3fus each (%.1f FPS)\n", -+ name, test_name, -+ completed, elapsed(&start, &end) / 1000000, -+ elapsed(&start, &end) / completed, -+ completed / (elapsed(&start, &end) / 1000000)); -+} -+ -+struct perpixel { -+ Window win; -+ struct buffer buffer[N_BACK]; -+ struct list mru; -+ uint32_t eid; -+ void *Q; -+ int queued; -+}; -+ -+static void perpixel(Display *dpy, -+ int max_width, int max_height, unsigned options) -+{ -+ //const int sz = max_width * max_height; -+ const int sz = 1048; -+ struct perpixel *pp; -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ struct timespec start, end; -+ char test_name[128]; -+ unsigned present_flags = 0; -+ xcb_xfixes_region_t update = 0; -+ int completed = 0; -+ int i, n; -+ -+ pp = calloc(sz, sizeof(*pp)); -+ if (!pp) -+ return; -+ -+ for (i = 0; i < sz; i++) { -+ XSetWindowAttributes attr = { .override_redirect = 1 }; -+ int depth = DefaultDepth(dpy, DefaultScreen(dpy)); -+ pp[i].win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ i % max_width, i / max_width, 1, 1, 0, depth, -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XMapWindow(dpy, pp[i].win); -+ list_init(&pp[i].mru); -+ for (n = 0; n < N_BACK; n++) { -+ pp[i].buffer[n].pixmap = xcb_generate_id(c); -+ xcb_create_pixmap(c, depth, pp[i].buffer[n].pixmap, -+ pp[i].win, 1, 1); -+ pp[i].buffer[n].fence.xid = 0; -+ pp[i].buffer[n].fd = -1; -+ pp[i].buffer[n].id = n; -+ if (options & DRI3) { -+ xcb_dri3_buffer_from_pixmap_reply_t *reply; -+ int *fds; -+ -+ if (dri3_create_fence(dpy, pp[i].win, &pp[i].buffer[n].fence)) -+ return; -+ -+ reply = xcb_dri3_buffer_from_pixmap_reply(c, -+ xcb_dri3_buffer_from_pixmap(c, pp[i].buffer[n].pixmap), -+ NULL); -+ if (reply == NULL) -+ return; -+ -+ fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, reply); -+ pp[i].buffer[n].fd = fds[0]; -+ free(reply); -+ -+ /* start idle */ -+ xshmfence_trigger(pp[i].buffer[n].fence.addr); -+ } -+ pp[i].buffer[n].busy = 0; -+ list_add(&pp[i].buffer[n].link, &pp[i].mru); -+ } -+ -+ if (!(options & DRI3)) { -+ pp[i].eid = xcb_generate_id(c); -+ xcb_present_select_input(c, pp[i].eid, pp[i].win, -+ (options & NOCOPY ? 0 : XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY) | -+ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); -+ pp[i].Q = xcb_register_for_special_xge(c, &xcb_present_id, pp[i].eid, &stamp); -+ } -+ pp[i].queued = 0; -+ } -+ -+ XSync(dpy, True); -+ _x_error_occurred = 0; -+ -+ if (options & ASYNC) -+ present_flags |= XCB_PRESENT_OPTION_ASYNC; -+ if (options & NOCOPY) { -+ update = xcb_generate_id(c); -+ xcb_xfixes_create_region(c, update, 0, NULL); -+ present_flags |= XCB_PRESENT_OPTION_COPY; -+ } -+ -+ clock_gettime(CLOCK_MONOTONIC, &start); -+ do { -+ for (i = 0; i < sz; i++) { -+ struct buffer *tmp, *b = NULL; -+retry: -+ list_for_each_entry(tmp, &pp[i].mru, link) { -+ if (tmp->fence.xid) -+ tmp->busy = !xshmfence_query(tmp->fence.addr); -+ if (!tmp->busy) { -+ b = tmp; -+ break; -+ } -+ } -+ if (options & DRI3) { -+ if (b == NULL) -+ goto retry; -+ -+ xshmfence_reset(b->fence.addr); -+ pp[i].queued--; -+ completed++; -+ } else while (b == NULL) { -+ xcb_present_generic_event_t *ev; -+ -+ ev = (xcb_present_generic_event_t *) -+ xcb_wait_for_special_event(c, pp[i].Q); -+ if (ev == NULL) -+ abort(); -+ -+ do { -+ switch (ev->evtype) { -+ case XCB_PRESENT_COMPLETE_NOTIFY: -+ completed++; -+ pp[i].queued--; -+ break; -+ -+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: -+ { -+ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; -+ assert(ie->serial < N_BACK); -+ pp[i].buffer[ie->serial].busy = 0; -+ if (b == NULL) -+ b = &pp[i].buffer[ie->serial]; -+ break; -+ } -+ } -+ free(ev); -+ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, pp[i].Q))); -+ } -+ -+ b->busy = (options & NOCOPY) == 0; -+ xcb_present_pixmap(c, pp[i].win, b->pixmap, b->id, -+ 0, /* valid */ -+ update, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ b->fence.xid, -+ present_flags, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ list_move(&b->link, &pp[i].mru); -+ pp[i].queued++; -+ } -+ xcb_flush(c); -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ } while (end.tv_sec < start.tv_sec + 10); -+ -+ for (i = 0; i < sz; i++) { -+ if (options & DRI3) { -+ int depth = DefaultDepth(dpy, DefaultScreen(dpy)); -+ struct buffer *b; -+ XID pixmap; -+ -+ pixmap = xcb_generate_id(c); -+ xcb_create_pixmap(c, depth, pixmap, pp[i].win, 1, 1); -+ xcb_present_pixmap(c, pp[i].win, pixmap, 0xdeadbeef, -+ 0, /* valid */ -+ None, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ 0, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ list_for_each_entry(b, &pp[i].mru, link) -+ xshmfence_await(b->fence.addr); -+ -+ xcb_free_pixmap(c, pixmap); -+ completed += pp[i].queued; -+ } else while (pp[i].queued) { -+ xcb_present_generic_event_t *ev; -+ -+ ev = (xcb_present_generic_event_t *) -+ xcb_wait_for_special_event(c, pp[i].Q); -+ if (ev == NULL) -+ abort(); -+ -+ do { -+ switch (ev->evtype) { -+ case XCB_PRESENT_COMPLETE_NOTIFY: -+ completed++; -+ pp[i].queued--; -+ break; -+ -+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: -+ break; -+ } -+ free(ev); -+ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, pp[i].Q))); -+ } -+ } -+ clock_gettime(CLOCK_MONOTONIC, &end); -+ -+ if (update) -+ xcb_xfixes_destroy_region(c, update); -+ -+ for (i = 0; i < sz; i++) { -+ for (n = 0; n < N_BACK; n++) { -+ if (pp[i].buffer[n].fence.xid) -+ dri3_fence_free(dpy, &pp[i].buffer[n].fence); -+ if (pp[i].buffer[n].fd != -1) -+ close(pp[i].buffer[n].fd); -+ xcb_free_pixmap(c, pp[i].buffer[n].pixmap); -+ } -+ -+ if (pp[i].Q) { -+ xcb_discard_reply(c, xcb_present_select_input_checked(c, pp[i].eid, pp[i].win, 0).sequence); -+ XSync(dpy, True); -+ xcb_unregister_for_special_event(c, pp[i].Q); -+ } -+ -+ XDestroyWindow(dpy, pp[i].win); -+ } -+ free(pp); -+ -+ test_name[0] = '\0'; -+ if (options) { -+ snprintf(test_name, sizeof(test_name), "(%s%s%s )", -+ options & NOCOPY ? " no-copy" : "", -+ options & DRI3 ? " dri3" : "", -+ options & ASYNC ? " async" : ""); -+ } -+ printf("%s%s: Completed %d presents in %.1fs, %.3fus each (%.1f FPS)\n", -+ __func__, test_name, -+ completed, elapsed(&start, &end) / 1000000, -+ elapsed(&start, &end) / completed, -+ completed / (elapsed(&start, &end) / 1000000)); -+} -+ -+static int isqrt(int x) -+{ -+ int i; -+ -+ for (i = 2; i*i < x; i++) -+ ; -+ return i; -+} -+ -+struct sibling { -+ pthread_t thread; -+ Display *dpy; -+ int x, y; -+ int width, height; -+ unsigned options; -+}; -+ -+static void *sibling(void *arg) -+{ -+ struct sibling *s = arg; -+ XSetWindowAttributes attr = { .override_redirect = 1 }; -+ Window win = XCreateWindow(s->dpy, DefaultRootWindow(s->dpy), -+ s->x, s->y, s->width, s->height, 0, -+ DefaultDepth(s->dpy, DefaultScreen(s->dpy)), -+ InputOutput, -+ DefaultVisual(s->dpy, DefaultScreen(s->dpy)), -+ CWOverrideRedirect, &attr); -+ XMapWindow(s->dpy, win); -+ run(s->dpy, win, "sibling", s->options); -+ return NULL; -+} -+ -+static void siblings(Display *dpy, -+ int max_width, int max_height, int ncpus, unsigned options) -+{ -+ int sq_ncpus = isqrt(ncpus); -+ int width = max_width / sq_ncpus; -+ int height = max_height/ sq_ncpus; -+ struct sibling s[ncpus]; -+ int child; -+ -+ if (ncpus <= 1) -+ return; -+ -+ for (child = 0; child < ncpus; child++) { -+ s[child].dpy = dpy; -+ s[child].x = (child % sq_ncpus) * width; -+ s[child].y = (child / sq_ncpus) * height; -+ s[child].width = width; -+ s[child].height = height; -+ s[child].options = options; -+ pthread_create(&s[child].thread, NULL, sibling, &s[child]); -+ } -+ -+ for (child = 0; child < ncpus; child++) -+ pthread_join(s[child].thread, NULL); -+} -+ -+static void cousins(int max_width, int max_height, int ncpus, unsigned options) -+{ -+ int sq_ncpus = isqrt(ncpus); -+ int width = max_width / sq_ncpus; -+ int height = max_height/ sq_ncpus; -+ int child; -+ -+ if (ncpus <= 1) -+ return; -+ -+ for (child = 0; child < ncpus; child++) { -+ for (; fork() == 0; exit(0)) { -+ int x = (child % sq_ncpus) * width; -+ int y = (child / sq_ncpus) * height; -+ XSetWindowAttributes attr = { .override_redirect = 1 }; -+ Display *dpy = XOpenDisplay(NULL); -+ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), -+ x, y, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XMapWindow(dpy, win); -+ run(dpy, win, "cousin", options); -+ } -+ } -+ -+ while (child) { -+ int status = -1; -+ pid_t pid = wait(&status); -+ if (pid == -1) -+ continue; -+ child--; -+ } -+} -+ -+static int has_present(Display *dpy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_generic_error_t *error = NULL; -+ void *reply; -+ -+ reply = xcb_present_query_version_reply(c, -+ xcb_present_query_version(c, -+ XCB_PRESENT_MAJOR_VERSION, -+ XCB_PRESENT_MINOR_VERSION), -+ &error); -+ -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static int has_composite(Display *dpy) -+{ -+ int event, error; -+ int major, minor; -+ -+ if (!XDamageQueryExtension (dpy, &event, &error)) -+ return 0; -+ -+ if (!XCompositeQueryExtension(dpy, &event, &error)) -+ return 0; -+ -+ XCompositeQueryVersion(dpy, &major, &minor); -+ -+ return major > 0 || minor >= 4; -+} -+ -+static int dri3_query_version(Display *dpy, int *major, int *minor) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_dri3_query_version_reply_t *reply; -+ xcb_generic_error_t *error; -+ -+ *major = *minor = -1; -+ -+ reply = xcb_dri3_query_version_reply(c, -+ xcb_dri3_query_version(c, -+ XCB_DRI3_MAJOR_VERSION, -+ XCB_DRI3_MINOR_VERSION), -+ &error); -+ free(error); -+ if (reply == NULL) -+ return -1; -+ -+ *major = reply->major_version; -+ *minor = reply->minor_version; -+ free(reply); -+ -+ return 0; -+} -+ -+static int has_dri3(Display *dpy) -+{ -+ const xcb_query_extension_reply_t *ext; -+ int major, minor; -+ -+ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); -+ if (ext == NULL || !ext->present) -+ return 0; -+ -+ if (dri3_query_version(dpy, &major, &minor) < 0) -+ return 0; -+ -+ return major >= 0; -+} -+ -+static int has_xfixes(Display *dpy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ const xcb_query_extension_reply_t *ext; -+ void *reply; -+ -+ ext = xcb_get_extension_data(c, &xcb_xfixes_id); -+ if (ext == NULL || !ext->present) -+ return 0; -+ -+ reply = xcb_xfixes_query_version_reply(c, -+ xcb_xfixes_query_version(c, -+ XCB_XFIXES_MAJOR_VERSION, -+ XCB_XFIXES_MINOR_VERSION), -+ NULL); -+ free(reply); -+ -+ return reply != NULL; -+} -+ -+static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) -+{ -+ XRRScreenResources *res; -+ -+ res = XRRGetScreenResourcesCurrent(dpy, window); -+ if (res == NULL) -+ res = XRRGetScreenResources(dpy, window); -+ -+ return res; -+} -+ -+static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) -+{ -+ int i; -+ -+ for (i = 0; i < res->nmode; i++) { -+ if (res->modes[i].id == id) -+ return &res->modes[i]; -+ } -+ -+ return NULL; -+} -+ -+static void fullscreen(Display *dpy, Window win) -+{ -+ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); -+ XChangeProperty(dpy, win, -+ XInternAtom(dpy, "_NET_WM_STATE", False), -+ XA_ATOM, 32, PropModeReplace, -+ (unsigned char *)&atom, 1); -+} -+ -+static void loop(Display *dpy, XRRScreenResources *res, unsigned options) -+{ -+ Window root = DefaultRootWindow(dpy); -+ Window win; -+ XSetWindowAttributes attr; -+ int i, j; -+ -+ attr.override_redirect = 1; -+ -+ run(dpy, root, "off", options); -+ XSync(dpy, True); -+ -+ for (i = 0; i < res->noutput; i++) { -+ XRROutputInfo *output; -+ XRRModeInfo *mode; -+ -+ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); -+ if (output == NULL) -+ continue; -+ -+ mode = NULL; -+ if (res->nmode) -+ mode = lookup_mode(res, output->modes[0]); -+ -+ for (j = 0; mode && j < 2*output->ncrtc; j++) { -+ int c = j; -+ if (c >= output->ncrtc) -+ c = 2*output->ncrtc - j - 1; -+ -+ printf("[%d, %d] -- OUTPUT:%ld, CRTC:%ld: %dx%d\n", -+ i, c, (long)res->outputs[i], (long)output->crtcs[c], -+ mode->width, mode->height); -+ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, -+ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); -+ -+ run(dpy, root, "root", options); -+ XSync(dpy, True); -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width, mode->height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ fullscreen(dpy, win); -+ XMapWindow(dpy, win); -+ run(dpy, win, "fullscreen", options); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width, mode->height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XMapWindow(dpy, win); -+ run(dpy, win, "windowed", options); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ if (has_composite(dpy)) { -+ Damage damage; -+ -+ _x_error_occurred = 0; -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width, mode->height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ damage = XDamageCreate(dpy, win, XDamageReportNonEmpty); -+ XMapWindow(dpy, win); -+ XSync(dpy, True); -+ if (!_x_error_occurred) -+ run(dpy, win, "composited", options); -+ XDamageDestroy(dpy, damage); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ } -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, mode->width/2, mode->height/2, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XMapWindow(dpy, win); -+ run(dpy, win, "half", options); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ perpixel(dpy, mode->width, mode->height, options); -+ -+ siblings(dpy, mode->width, mode->height, -+ sysconf(_SC_NPROCESSORS_ONLN), -+ options); -+ -+ cousins(mode->width, mode->height, -+ sysconf(_SC_NPROCESSORS_ONLN), -+ options); -+ -+ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ } -+ -+ XRRFreeOutputInfo(output); -+ } -+ -+} -+ -+int main(void) -+{ -+ Display *dpy; -+ XRRScreenResources *res; -+ XRRCrtcInfo **original_crtc; -+ int i; -+ -+ XInitThreads(); -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ return 77; -+ -+ if (!has_present(dpy)) -+ return 77; -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSDisable(dpy); -+ -+ signal(SIGALRM, SIG_IGN); -+ XSetErrorHandler(_check_error_handler); -+ -+ res = NULL; -+ if (XRRQueryVersion(dpy, &i, &i)) -+ res = _XRRGetScreenResourcesCurrent(dpy, DefaultRootWindow(dpy)); -+ if (res == NULL) -+ return 77; -+ -+ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); -+ for (i = 0; i < res->ncrtc; i++) -+ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); -+ -+ printf("noutput=%d, ncrtc=%d\n", res->noutput, res->ncrtc); -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ 0, 0, None, RR_Rotate_0, NULL, 0); -+ -+ loop(dpy, res, 0); -+ loop(dpy, res, ASYNC); -+ if (has_xfixes(dpy)) -+ loop(dpy, res, NOCOPY); -+ if (has_dri3(dpy)) { -+ loop(dpy, res, DRI3); -+ loop(dpy, res, DRI3 | ASYNC); -+ } -+ -+ for (i = 0; i < res->ncrtc; i++) -+ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, -+ original_crtc[i]->x, -+ original_crtc[i]->y, -+ original_crtc[i]->mode, -+ original_crtc[i]->rotation, -+ original_crtc[i]->outputs, -+ original_crtc[i]->noutput); -+ -+ if (DPMSQueryExtension(dpy, &i, &i)) -+ DPMSEnable(dpy); -+ return 0; -+} -diff --git a/test/present-test.c b/test/present-test.c -index 6b562eb0..5a12a24f 100644 ---- a/test/present-test.c -+++ b/test/present-test.c -@@ -31,7 +31,9 @@ - #include - #include - #include -+#include - #include -+#include - #include - #include - #include -@@ -44,6 +46,8 @@ - #endif - #include - #include -+#include -+#include - #include - #include - -@@ -134,12 +138,14 @@ static void *setup_msc(Display *dpy, Window win) - return q; - } - --static uint64_t check_msc(Display *dpy, Window win, void *q, uint64_t last_msc) -+static uint64_t check_msc(Display *dpy, Window win, void *q, uint64_t last_msc, uint64_t *ust) - { - xcb_connection_t *c = XGetXCBConnection(dpy); -+ static uint32_t serial = 1; - uint64_t msc = 0; -+ int complete = 0; - -- xcb_present_notify_msc(c, win, 0, 0, 0, 0); -+ xcb_present_notify_msc(c, win, serial ^ 0xcc00ffee, 0, 0, 0); - xcb_flush(c); - - do { -@@ -151,82 +157,1268 @@ static uint64_t check_msc(Display *dpy, Window win, void *q, uint64_t last_msc) - break; - - ce = (xcb_present_complete_notify_event_t *)ev; -- if (ce->kind != XCB_PRESENT_COMPLETE_KIND_PIXMAP) -+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && -+ ce->serial == (serial ^ 0xcc00ffee)) { -+ msc = ce->msc; -+ if (ust) -+ *ust = ce->ust; -+ complete = 1; -+ } -+ free(ev); -+ } while (!complete); -+ -+ if ((int64_t)(msc - last_msc) < 0) { -+ printf("Invalid MSC: was %llu, now %llu\n", -+ (long long)last_msc, (long long)msc); -+ } -+ -+ if (++serial == 0) -+ serial = 1; -+ -+ return msc; -+} -+ -+static uint64_t wait_vblank(Display *dpy, Window win, void *q) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ static uint32_t serial = 1; -+ uint64_t msc = 0; -+ int complete = 0; -+ -+ xcb_present_notify_msc(c, win, serial ^ 0xdeadbeef, 0, 1, 0); -+ xcb_flush(c); -+ -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && -+ ce->serial == (serial ^ 0xdeadbeef)) { - msc = ce->msc; -+ complete = 1; -+ } -+ free(ev); -+ } while (!complete); -+ -+ if (++serial == 0) -+ serial = 1; -+ -+ return msc; -+} -+ -+static uint64_t msc_interval(Display *dpy, Window win, void *q) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ uint64_t msc, ust; -+ int complete = 0; -+ -+ msc = check_msc(dpy, win, q, 0, NULL); -+ -+ xcb_present_notify_msc(c, win, 0xc0ffee00, msc, 0, 0); -+ xcb_present_notify_msc(c, win, 0xc0ffee01, msc + 10, 0, 0); -+ xcb_flush(c); -+ -+ ust = msc = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && -+ ce->serial == 0xc0ffee00) { -+ msc -= ce->msc; -+ ust -= ce->ust; -+ complete++; -+ } -+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && -+ ce->serial == 0xc0ffee01) { -+ msc += ce->msc; -+ ust += ce->ust; -+ complete++; -+ } -+ free(ev); -+ } while (complete != 2); -+ -+ printf("10 frame interval: msc=%lld, ust=%lld\n", -+ (long long)msc, (long long)ust); -+ XSync(dpy, True); -+ if (msc == 0) -+ return 0; -+ -+ return (ust + msc/2) / msc; -+} -+ -+static void teardown_msc(Display *dpy, void *q) -+{ -+ xcb_unregister_for_special_event(XGetXCBConnection(dpy), q); -+} -+ -+static int test_whole(Display *dpy, Window win, const char *phase) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Pixmap pixmap; -+ struct dri3_fence fence; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, ret = 1; -+ -+ XGetGeometry(dpy, win, -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ if (dri3_create_fence(dpy, win, &fence)) -+ return 0; -+ -+ printf("%s: Testing simple flip: %dx%d\n", phase, width, height); -+ _x_error_occurred = 0; -+ -+ xshmfence_reset(fence.addr); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ fence.xid, -+ XCB_PRESENT_OPTION_NONE, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ XFreePixmap(dpy, pixmap); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, /* sync fence */ -+ XCB_PRESENT_OPTION_NONE, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ XFreePixmap(dpy, pixmap); -+ XFlush(dpy); -+ -+ ret = !!xshmfence_await(fence.addr); -+ dri3_fence_free(dpy, &fence); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+} -+ -+static uint64_t flush_flips(Display *dpy, Window win, Pixmap pixmap, void *Q, uint64_t *ust) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ uint64_t msc; -+ int complete; -+ -+ msc = check_msc(dpy, win, Q, 0, NULL); -+ xcb_present_pixmap(c, win, pixmap, -+ 0xdeadbeef, /* serial */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ msc + 60, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ complete = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ complete = (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP && -+ ce->serial == 0xdeadbeef); -+ free(ev); -+ } while (!complete); -+ XSync(dpy, True); -+ -+ return check_msc(dpy, win, Q, msc, ust); -+} -+ -+static int test_double(Display *dpy, Window win, const char *phase, void *Q) -+{ -+#define COUNT (15*60) -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Pixmap pixmap; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, n, ret; -+ struct { -+ uint64_t msc, ust; -+ } frame[COUNT+1]; -+ int offset = 0; -+ -+ XGetGeometry(dpy, win, -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ printf("%s: Testing flip double buffering: %dx%d\n", phase, width, height); -+ _x_error_occurred = 0; -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ flush_flips(dpy, win, pixmap, Q, NULL); -+ for (n = 0; n <= COUNT; n++) { -+ int complete; -+ -+ xcb_present_pixmap(c, win, pixmap, n, -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ 0, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ complete = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP && -+ ce->serial == n) { -+ frame[n].msc = ce->msc; -+ frame[n].ust = ce->ust; -+ complete = 1; -+ } -+ free(ev); -+ } while (!complete); -+ } -+ XFreePixmap(dpy, pixmap); -+ -+ XSync(dpy, True); -+ ret = !!_x_error_occurred; -+ -+ if (frame[COUNT].msc - frame[0].msc != COUNT) { -+ printf("Expected %d frames interval, %d elapsed instead\n", -+ COUNT, (int)(frame[COUNT].msc - frame[0].msc)); -+ for (n = 0; n <= COUNT; n++) { -+ if (frame[n].msc - frame[0].msc != n + offset) { -+ printf("frame[%d]: msc=%03lld, ust=%lld\n", n, -+ (long long)(frame[n].msc - frame[0].msc), -+ (long long)(frame[n].ust - frame[0].ust)); -+ offset = frame[n].msc - frame[0].msc - n; -+ ret++; -+ } -+ } -+ } -+ -+ return ret; -+} -+ -+static int test_future(Display *dpy, Window win, const char *phase, void *Q) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Pixmap pixmap; -+ struct dri3_fence fence; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, ret = 0, n; -+ uint64_t msc, ust; -+ int complete, count; -+ int early = 0, late = 0; -+ int earliest = 0, latest = 0; -+ uint64_t interval; -+ -+ XGetGeometry(dpy, win, -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ if (dri3_create_fence(dpy, win, &fence)) -+ return 0; -+ -+ printf("%s: Testing flips into the future: %dx%d\n", phase, width, height); -+ _x_error_occurred = 0; -+ -+ interval = msc_interval(dpy, win, Q); -+ if (interval == 0) { -+ printf("Zero delay between frames\n"); -+ return 1; -+ } -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ msc = flush_flips(dpy, win, pixmap, Q, &ust); -+ for (n = 1; n <= 10; n++) -+ xcb_present_pixmap(c, win, pixmap, -+ n, /* serial */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ msc + 60 + n*15*60, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_present_pixmap(c, win, pixmap, -+ 0xdeadbeef, /* serial */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ msc + 60 + n*15*60, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ complete = 0; -+ count = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); -+ -+ if (ce->serial == 0xdeadbeef) { -+ int64_t time; -+ -+ time = ce->ust - (ust + (60 + 15*60*n) * interval); -+ if (time < -(int64_t)interval) { -+ fprintf(stderr, -+ "\tflips completed too early by %lldms\n", -+ (long long)(-time / 1000)); -+ } else if (time > (int64_t)interval) { -+ fprintf(stderr, -+ "\tflips completed too late by %lldms\n", -+ (long long)(time / 1000)); -+ } -+ complete = 1; -+ } else { -+ int diff = (int64_t)(ce->msc - (15*60*ce->serial + msc + 60)); -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tframe %d displayed early by %d frames\n", ce->serial, -diff); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tframe %d displayed late by %d frames\n", ce->serial, diff); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ count++; -+ } -+ free(ev); -+ } while (!complete); -+ -+ if (early) -+ printf("\t%d frames shown too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d frames shown too late (worst %d)!\n", late, latest); -+ -+ if (count != 10) { -+ fprintf(stderr, "Sentinel frame received too early! %d frames outstanding\n", 10 - count); -+ ret++; -+ -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); -+ free(ev); -+ } while (++count != 10); -+ } -+ -+ ret += !!_x_error_occurred; -+ -+ return ret; -+} -+ -+static int test_exhaustion(Display *dpy, Window win, const char *phase, void *Q) -+{ -+#define N_VBLANKS 256 /* kernel event queue length: 128 vblanks */ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Pixmap pixmap; -+ struct dri3_fence fence[2]; -+ Window root; -+ xcb_xfixes_region_t region; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, ret = 0, n; -+ uint64_t target, final; -+ -+ XGetGeometry(dpy, win, -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ if (dri3_create_fence(dpy, win, &fence[0]) || -+ dri3_create_fence(dpy, win, &fence[1])) -+ return 0; -+ -+ printf("%s: Testing flips with long vblank queues: %dx%d\n", phase, width, height); -+ _x_error_occurred = 0; -+ -+ region = xcb_generate_id(c); -+ xcb_xfixes_create_region(c, region, 0, NULL); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ xshmfence_reset(fence[0].addr); -+ xshmfence_reset(fence[1].addr); -+ target = check_msc(dpy, win, Q, 0, NULL); -+ for (n = N_VBLANKS; n--; ) -+ xcb_present_pixmap(c, win, pixmap, 0, -+ 0, /* valid */ -+ region, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ target + N_VBLANKS, /* target msc */ -+ 1, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ region, /* valid */ -+ region, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ fence[0].xid, -+ XCB_PRESENT_OPTION_NONE, -+ target, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ for (n = 1; n < N_VBLANKS; n++) -+ xcb_present_pixmap(c, win, pixmap, 0, -+ region, /* valid */ -+ region, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ target + n, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_present_pixmap(c, win, pixmap, 0, -+ region, /* valid */ -+ region, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ fence[1].xid, -+ XCB_PRESENT_OPTION_NONE, -+ target + N_VBLANKS, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ ret += !!xshmfence_await(fence[0].addr); -+ final = check_msc(dpy, win, Q, 0, NULL); -+ if (final < target) { -+ printf("\tFirst flip too early, MSC was %llu, expected %llu\n", -+ (long long)final, (long long)target); -+ ret++; -+ } else if (final > target + 1) { -+ printf("\tFirst flip too late, MSC was %llu, expected %llu\n", -+ (long long)final, (long long)target); -+ ret++; -+ } -+ -+ ret += !!xshmfence_await(fence[1].addr); -+ final = check_msc(dpy, win, Q, 0, NULL); -+ if (final < target + N_VBLANKS) { -+ printf("\tLast flip too early, MSC was %llu, expected %llu\n", -+ (long long)final, (long long)(target + N_VBLANKS)); -+ ret++; -+ } else if (final > target + N_VBLANKS + 1) { -+ printf("\tLast flip too late, MSC was %llu, expected %llu\n", -+ (long long)final, (long long)(target + N_VBLANKS)); -+ ret++; -+ } -+ -+ flush_flips(dpy, win, pixmap, Q, NULL); -+ -+ XFreePixmap(dpy, pixmap); -+ xcb_xfixes_destroy_region(c, region); -+ dri3_fence_free(dpy, &fence[1]); -+ dri3_fence_free(dpy, &fence[0]); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+#undef N_VBLANKS -+} -+ -+static int test_accuracy(Display *dpy, Window win, const char *phase, void *Q) -+{ -+#define N_VBLANKS (60 * 120) /* ~2 minutes */ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Pixmap pixmap; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ int x, y, ret = 0, n; -+ uint64_t target; -+ int early = 0, late = 0; -+ int earliest = 0, latest = 0; -+ int complete, count; -+ -+ XGetGeometry(dpy, win, -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ printf("%s: Testing flip accuracy: %dx%d\n", phase, width, height); -+ _x_error_occurred = 0; -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ target = flush_flips(dpy, win, pixmap, Q, NULL); -+ for (n = 0; n <= N_VBLANKS; n++) -+ xcb_present_pixmap(c, win, pixmap, -+ n, /* serial */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ target + 60 + n, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_present_pixmap(c, win, pixmap, -+ 0xdeadbeef, /* serial */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ target + 60 + n, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ complete = 0; -+ count = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); -+ -+ if (ce->serial != 0xdeadbeef) { -+ int diff = (int64_t)(ce->msc - (target + ce->serial + 60)); -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tframe %d displayed early by %d frames\n", ce->serial, -diff); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tframe %d displayed late by %d frames\n", ce->serial, diff); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ count++; -+ } else -+ complete = 1; - free(ev); -- } while (msc == 0); -+ } while (!complete); -+ -+ if (early) -+ printf("\t%d frames shown too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d frames shown too late (worst %d)!\n", late, latest); -+ -+ if (count != N_VBLANKS+1) { -+ fprintf(stderr, "Sentinel frame received too early! %d frames outstanding\n", N_VBLANKS+1 - count); -+ ret++; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); -+ free(ev); -+ } while (++count != N_VBLANKS+1); -+ } -+ -+ XFreePixmap(dpy, pixmap); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+#undef N_VBLANKS -+} -+ -+static int test_modulus(Display *dpy, Window win, const char *phase, void *Q) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Pixmap pixmap; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth; -+ xcb_xfixes_region_t region; -+ int x, y, ret = 0; -+ uint64_t target; -+ int early = 0, late = 0; -+ int earliest = 0, latest = 0; -+ int complete, expect, count; -+ -+ XGetGeometry(dpy, win, -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ printf("%s: Testing flip modulus: %dx%d\n", phase, width, height); -+ _x_error_occurred = 0; -+ -+ region = xcb_generate_id(c); -+ xcb_xfixes_create_region(c, region, 0, NULL); -+ -+ pixmap = XCreatePixmap(dpy, win, width, height, depth); -+ target = flush_flips(dpy, win, pixmap, Q, NULL); -+ expect = 0; -+ for (x = 1; x <= 7; x++) { -+ for (y = 0; y < x; y++) { -+ xcb_present_pixmap(c, win, pixmap, -+ y << 16 | x, /* serial */ -+ region, /* valid */ -+ region, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ 0, /* target msc */ -+ x, /* divisor */ -+ y, /* remainder */ -+ 0, NULL); -+ expect++; -+ } -+ } -+ xcb_present_pixmap(c, win, pixmap, -+ 0xdeadbeef, /* serial */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ target + 2*x, /* target msc */ -+ 0, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ xcb_flush(c); -+ -+ complete = 0; -+ count = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ if (ce->kind != XCB_PRESENT_COMPLETE_KIND_PIXMAP) -+ break; -+ -+ assert(ce->serial); -+ if (ce->serial != 0xdeadbeef) { -+ uint64_t msc; -+ int diff; -+ -+ x = ce->serial & 0xffff; -+ y = ce->serial >> 16; -+ -+ msc = target; -+ msc -= target % x; -+ msc += y; -+ if (msc <= target) -+ msc += x; -+ -+ diff = (int64_t)(ce->msc - msc); -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tframe (%d, %d) displayed early by %d frames\n", y, x, -diff); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tframe (%d, %d) displayed late by %d frames\n", y, x, diff); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ count++; -+ } else -+ complete = 1; -+ free(ev); -+ } while (!complete); -+ -+ if (early) -+ printf("\t%d frames shown too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d frames shown too late (worst %d)!\n", late, latest); -+ -+ if (count != expect) { -+ fprintf(stderr, "Sentinel frame received too early! %d frames outstanding\n", expect - count); -+ ret++; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ free(ev); -+ } while (++count != expect); -+ } -+ -+ XFreePixmap(dpy, pixmap); -+ xcb_xfixes_destroy_region(c, region); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+} -+ -+static int test_future_msc(Display *dpy, void *Q) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Window root = DefaultRootWindow(dpy); -+ int ret = 0, n; -+ uint64_t msc, ust; -+ int complete, count; -+ int early = 0, late = 0; -+ int earliest = 0, latest = 0; -+ uint64_t interval; -+ -+ printf("Testing notifies into the future\n"); -+ _x_error_occurred = 0; -+ -+ interval = msc_interval(dpy, root, Q); -+ if (interval == 0) { -+ printf("Zero delay between frames\n"); -+ return 1; -+ } -+ msc = check_msc(dpy, root, Q, 0, &ust); -+ printf("Initial msc=%llx, interval between frames %lldus\n", -+ (long long)msc, (long long)interval); -+ -+ for (n = 1; n <= 10; n++) -+ xcb_present_notify_msc(c, root, n, msc + 60 + n*15*60, 0, 0); -+ xcb_present_notify_msc(c, root, 0xdeadbeef, msc + 60 + n*15*60, 0, 0); -+ xcb_flush(c); -+ -+ complete = 0; -+ count = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ -+ if (ce->serial == 0xdeadbeef) { -+ int64_t time, tolerance; -+ -+ tolerance = 60 + 15*60*n/10; -+ if (tolerance < interval) -+ tolerance = interval; -+ -+ time = ce->ust - (ust + (60 + 15*60*n) * interval); -+ if (time < -(int64_t)tolerance) { -+ fprintf(stderr, -+ "\tnotifies completed too early by %lldms, tolerance %lldus\n", -+ (long long)(-time / 1000), (long long)tolerance); -+ } else if (time > (int64_t)tolerance) { -+ fprintf(stderr, -+ "\tnotifies completed too late by %lldms, tolerance %lldus\n", -+ (long long)(time / 1000), (long long)tolerance); -+ } -+ complete = 1; -+ } else { -+ int diff = (int64_t)(ce->msc - (15*60*ce->serial + msc + 60)); -+ -+ if (ce->serial != count + 1) { -+ fprintf(stderr, "vblank received out of order! expected %d, received %d\n", -+ count + 1, (int)ce->serial); -+ ret++; -+ } -+ count++; -+ -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tnotify %d early by %d msc\n", ce->serial, -diff); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tnotify %d late by %d msc\n", ce->serial, diff); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ } -+ free(ev); -+ } while (!complete); -+ -+ if (early) -+ printf("\t%d notifies too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d notifies too late (worst %d)!\n", late, latest); -+ -+ if (count != 10) { -+ fprintf(stderr, "Sentinel vblank received too early! %d waits outstanding\n", 10 - count); -+ ret++; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ free(ev); -+ } while (++count != 10); -+ } -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+} -+ -+static int test_wrap_msc(Display *dpy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Window root, win; -+ int x, y; -+ unsigned int width, height; -+ unsigned border, depth; -+ XSetWindowAttributes attr; -+ int ret = 0, n; -+ uint64_t msc, ust; -+ int complete; -+ uint64_t interval; -+ void *Q; -+ -+ XGetGeometry(dpy, DefaultRootWindow(dpy), -+ &root, &x, &y, &width, &height, &border, &depth); -+ -+ attr.override_redirect = 1; -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, depth, -+ InputOutput, DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XMapWindow(dpy, win); -+ XSync(dpy, True); -+ if (_x_error_occurred) -+ return 1; - -- if (msc < last_msc) { -- printf("Invalid MSC: was %llu, now %llu\n", -- (long long)last_msc, (long long)msc); -+ printf("Testing wraparound notifies\n"); -+ _x_error_occurred = 0; -+ -+ Q = setup_msc(dpy, win); -+ interval = msc_interval(dpy, win, Q); -+ if (interval == 0) { -+ printf("Zero delay between frames\n"); -+ return 1; - } -+ msc = check_msc(dpy, win, Q, 0, &ust); -+ printf("Initial msc=%llx, interval between frames %lldus\n", -+ (long long)msc, (long long)interval); -+ -+ for (n = 1; n <= 10; n++) -+ xcb_present_notify_msc(c, win, n, -+ msc + ((long long)n<<32) + n, -+ 0, 0); -+ for (n = 1; n <= 10; n++) -+ xcb_present_notify_msc(c, win, -n, -+ 0, (long long)n << 32, 0); -+ xcb_present_notify_msc(c, win, 0xdeadbeef, msc + 60*10, 0, 0); -+ xcb_flush(c); - -- return msc; -+ complete = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ -+ if (ce->serial == 0xdeadbeef) { -+ complete = 1; -+ } else { -+ fprintf(stderr, -+ "\tnotify %d recieved at +%llu\n", -+ ce->serial, ce->msc - msc); -+ ret++; -+ } -+ free(ev); -+ } while (!complete); -+ -+ teardown_msc(dpy, Q); -+ XDestroyWindow(dpy, win); -+ XSync(dpy, True); -+ -+ return ret; - } - --static void teardown_msc(Display *dpy, void *q) -+static int test_exhaustion_msc(Display *dpy, void *Q) - { -- xcb_unregister_for_special_event(XGetXCBConnection(dpy), q); -+#define N_VBLANKS 256 /* kernel event queue length: 128 vblanks */ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Window root = DefaultRootWindow(dpy); -+ int ret = 0, n, complete; -+ int earliest = 0, early = 0; -+ int latest = 0, late = 0; -+ uint64_t msc; -+ -+ printf("Testing notifies with long queues\n"); -+ _x_error_occurred = 0; -+ -+ msc = check_msc(dpy, root, Q, 0, NULL); -+ for (n = N_VBLANKS; n--; ) -+ xcb_present_notify_msc(c, root, N_VBLANKS, msc + N_VBLANKS, 0, 0); -+ for (n = 1; n <= N_VBLANKS ; n++) -+ xcb_present_notify_msc(c, root, n, msc + n, 0, 0); -+ xcb_flush(c); -+ -+ complete = 2*N_VBLANKS; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ int diff; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ -+ diff = (int64_t)(ce->msc - msc - ce->serial); -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tnotify %d early by %d msc\n",(int)ce->serial, -diff); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tnotify %d late by %d msc\n", (int)ce->serial, diff); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ free(ev); -+ } while (--complete); -+ -+ if (early) -+ printf("\t%d notifies too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d notifies too late (worst %d)!\n", late, latest); -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+#undef N_VBLANKS - } --static int test_whole(Display *dpy) -+ -+static int test_accuracy_msc(Display *dpy, void *Q) - { -- Pixmap pixmap; -- struct dri3_fence fence; -- Window root; -- unsigned int width, height; -- unsigned border, depth; -- int x, y, ret = 1; -+#define N_VBLANKS (60 * 120) /* ~2 minutes */ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Window root = DefaultRootWindow(dpy); -+ int ret = 0, n; -+ uint64_t msc; -+ int early = 0, late = 0; -+ int earliest = 0, latest = 0; -+ int complete, count; - -- XGetGeometry(dpy, DefaultRootWindow(dpy), -- &root, &x, &y, &width, &height, &border, &depth); -+ printf("Testing notify accuracy\n"); -+ _x_error_occurred = 0; - -- if (dri3_create_fence(dpy, root, &fence)) -- return 0; -+ msc = check_msc(dpy, root, Q, 0, NULL); -+ for (n = 0; n <= N_VBLANKS; n++) -+ xcb_present_notify_msc(c, root, n, msc + 60 + n, 0, 0); -+ xcb_present_notify_msc(c, root, 0xdeadbeef, msc + 60 + n, 0, 0); -+ xcb_flush(c); -+ -+ complete = 0; -+ count = 0; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ -+ if (ce->serial != 0xdeadbeef) { -+ int diff = (int64_t)(ce->msc - (msc + ce->serial + 60)); -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tnotify %d early by %d msc\n", ce->serial, -diff); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tnotify %d late by %d msc\n", ce->serial, diff); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ count++; -+ } else -+ complete = 1; -+ free(ev); -+ } while (!complete); -+ -+ if (early) -+ printf("\t%d notifies too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d notifies too late (worst %d)!\n", late, latest); -+ -+ if (count != N_VBLANKS+1) { -+ fprintf(stderr, "Sentinel vblank received too early! %d waits outstanding\n", N_VBLANKS+1 - count); -+ ret++; -+ do { -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ free(ev); -+ } while (++count != N_VBLANKS+1); -+ } -+ -+ XSync(dpy, True); -+ ret += !!_x_error_occurred; -+ -+ return ret; -+#undef N_VBLANKS -+} - -- printf("Testing whole screen flip: %dx%d\n", width, height); -+static int test_modulus_msc(Display *dpy, void *Q) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ Window root = DefaultRootWindow(dpy); -+ xcb_present_complete_notify_event_t *ce; -+ xcb_generic_event_t *ev; -+ int x, y, ret = 0; -+ uint64_t target; -+ int early = 0, late = 0; -+ int earliest = 0, latest = 0; -+ int complete, count, expect; -+ -+ printf("Testing notify modulus\n"); - _x_error_occurred = 0; - -- xshmfence_reset(fence.addr); -+ target = wait_vblank(dpy, root, Q); - -- pixmap = XCreatePixmap(dpy, root, width, height, depth); -- xcb_present_pixmap(XGetXCBConnection(dpy), -- root, pixmap, -- 0, /* sbc */ -- 0, /* valid */ -- 0, /* update */ -- 0, /* x_off */ -- 0, /* y_off */ -- None, -- None, /* wait fence */ -- fence.xid, -- XCB_PRESENT_OPTION_NONE, -- 0, /* target msc */ -- 0, /* divisor */ -- 0, /* remainder */ -- 0, NULL); -- XFreePixmap(dpy, pixmap); -+ expect = 0; -+ xcb_present_notify_msc(c, root, 0, 0, 0, 0); -+ for (x = 1; x <= 19; x++) { -+ for (y = 0; y < x; y++) { -+ xcb_present_notify_msc(c, root, y << 16 | x, 0, x, y); -+ expect++; -+ } -+ } -+ xcb_present_notify_msc(c, root, 0xdeadbeef, target + 2*x, 0, 0); -+ xcb_flush(c); - -- pixmap = XCreatePixmap(dpy, root, width, height, depth); -- xcb_present_pixmap(XGetXCBConnection(dpy), -- root, pixmap, -- 0, /* sbc */ -- 0, /* valid */ -- 0, /* update */ -- 0, /* x_off */ -- 0, /* y_off */ -- None, -- None, /* wait fence */ -- None, /* sync fence */ -- XCB_PRESENT_OPTION_NONE, -- 0, /* target msc */ -- 0, /* divisor */ -- 0, /* remainder */ -- 0, NULL); -- XFreePixmap(dpy, pixmap); -- XFlush(dpy); -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev) { -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ assert(ce->serial == 0); -+ assert(target == ce->msc); -+ target = ce->msc; -+ } - -- ret = !!xshmfence_await(fence.addr); -- dri3_fence_free(dpy, &fence); -+ complete = 0; -+ count = 0; -+ do { -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ -+ assert(ce->serial); -+ if (ce->serial != 0xdeadbeef) { -+ uint64_t msc; -+ int diff; -+ -+ x = ce->serial & 0xffff; -+ y = ce->serial >> 16; -+ -+ msc = target; -+ msc -= target % x; -+ msc += y; -+ if (msc <= target) -+ msc += x; -+ -+ diff = (int64_t)(ce->msc - msc); -+ if (diff < 0) { -+ if (-diff > earliest) { -+ fprintf(stderr, "\tnotify (%d, %d) early by %d msc (target %lld, reported %lld)\n", y, x, -diff, (long long)msc, (long long)ce->msc); -+ earliest = -diff; -+ } -+ early++; -+ ret++; -+ } else if (diff > 0) { -+ if (diff > latest) { -+ fprintf(stderr, "\tnotify (%d, %d) late by %d msc (target %lld, reported %lld)\n", y, x, diff, (long long)msc, (long long)ce->msc); -+ latest = diff; -+ } -+ late++; -+ ret++; -+ } -+ count++; -+ } else -+ complete = 1; -+ free(ev); -+ } while (!complete); -+ -+ if (early) -+ printf("\t%d notifies too early (worst %d)!\n", early, earliest); -+ if (late) -+ printf("\t%d notifies too late (worst %d)!\n", late, latest); -+ -+ if (count != expect) { -+ fprintf(stderr, "Sentinel vblank received too early! %d waits outstanding\n", expect - count); -+ ret++; -+ do { -+ ev = xcb_wait_for_special_event(c, Q); -+ if (ev == NULL) -+ break; -+ -+ ce = (xcb_present_complete_notify_event_t *)ev; -+ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); -+ free(ev); -+ } while (++count != expect); -+ } - - XSync(dpy, True); - ret += !!_x_error_occurred; -@@ -279,8 +1471,6 @@ static int for_each_crtc(Display *dpy, - for (i = 0; i < res->ncrtc; i++) - original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); - -- printf("noutput=%d, ncrtc=%d\n", res->noutput, res->ncrtc); -- - for (i = 0; i < res->noutput; i++) { - XRROutputInfo *output; - XRRModeInfo *mode; -@@ -322,7 +1512,7 @@ static int for_each_crtc(Display *dpy, - free(original_crtc); - XRRFreeScreenResources(res); - -- return j; -+ return err; - } - - struct test_crtc { -@@ -335,6 +1525,7 @@ struct test_crtc { - uint64_t msc; - }; - #define SYNC 0x1 -+#define FUTURE 0x2 - - static int __test_crtc(Display *dpy, RRCrtc crtc, - int width, int height, -@@ -344,7 +1535,7 @@ static int __test_crtc(Display *dpy, RRCrtc crtc, - Pixmap pixmap; - int err = 0; - -- test->msc = check_msc(dpy, test->win, test->queue, test->msc); -+ test->msc = check_msc(dpy, test->win, test->queue, test->msc, NULL); - - if (test->flags & SYNC) - xshmfence_reset(test->fence.addr); -@@ -361,16 +1552,14 @@ static int __test_crtc(Display *dpy, RRCrtc crtc, - None, /* wait fence */ - test->flags & SYNC ? test->fence.xid : None, - XCB_PRESENT_OPTION_NONE, -- 0, /* target msc */ -+ test->msc, /* target msc */ - 1, /* divisor */ - 0, /* remainder */ - 0, NULL); -- XFreePixmap(dpy, pixmap); -- - if (test->flags & SYNC) { -- pixmap = XCreatePixmap(dpy, test->win, width, height, test->depth); -+ Pixmap tmp = XCreatePixmap(dpy, test->win, width, height, test->depth); - xcb_present_pixmap(XGetXCBConnection(dpy), -- test->win, pixmap, -+ test->win, tmp, - 1, /* sbc */ - 0, /* valid */ - 0, /* update */ -@@ -380,16 +1569,17 @@ static int __test_crtc(Display *dpy, RRCrtc crtc, - None, /* wait fence */ - None, /* sync fence */ - XCB_PRESENT_OPTION_NONE, -- 1, /* target msc */ -+ test->msc + (test->flags & FUTURE ? 5 * 16 : 1), /* target msc */ - 1, /* divisor */ - 0, /* remainder */ - 0, NULL); -- XFreePixmap(dpy, pixmap); -+ XFreePixmap(dpy, tmp); - XFlush(dpy); - err += !!xshmfence_await(test->fence.addr); - } -+ XFreePixmap(dpy, pixmap); - -- test->msc = check_msc(dpy, test->win, test->queue, test->msc); -+ test->msc = check_msc(dpy, test->win, test->queue, test->msc, NULL); - return err; - } - -@@ -410,15 +1600,23 @@ static int test_crtc(Display *dpy, void *queue, uint64_t last_msc) - - printf("Testing each crtc, without waiting for each flip\n"); - test.flags = 0; -+ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); - err += for_each_crtc(dpy, __test_crtc, &test); -+ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); - - printf("Testing each crtc, waiting for flips to complete\n"); - test.flags = SYNC; -+ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); - err += for_each_crtc(dpy, __test_crtc, &test); -+ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); - -- test.msc = check_msc(dpy, test.win, test.queue, test.msc); -- dri3_fence_free(dpy, &test.fence); -+ printf("Testing each crtc, with future flips\n"); -+ test.flags = FUTURE | SYNC; -+ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); -+ err += for_each_crtc(dpy, __test_crtc, &test); -+ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); - -+ dri3_fence_free(dpy, &test.fence); - XSync(dpy, True); - err += !!_x_error_occurred; - -@@ -536,6 +1734,31 @@ static int gem_set_caching(int fd, uint32_t handle, int caching) - return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; - } - -+static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) -+{ -+ struct drm_i915_gem_set_tiling set_tiling; -+ int err; -+ -+restart: -+ set_tiling.handle = handle; -+ set_tiling.tiling_mode = tiling; -+ set_tiling.stride = stride; -+ -+ if (drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) -+ return 1; -+ -+ err = errno; -+ if (err == EINTR) -+ goto restart; -+ -+ if (err == EAGAIN) { -+ sched_yield(); -+ goto restart; -+ } -+ -+ return 0; -+} -+ - static int gem_export(int fd, uint32_t handle) - { - struct drm_prime_handle args; -@@ -557,6 +1780,126 @@ static void gem_close(int fd, uint32_t handle) - (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); - } - -+static int test_dri3_tiling(Display *dpy) -+{ -+ Window win = DefaultRootWindow(dpy); -+ const int tiling[] = { I915_TILING_NONE, I915_TILING_X, I915_TILING_Y }; -+ Window root; -+ unsigned int width, height; -+ unsigned border, depth, bpp; -+ unsigned stride, size; -+ void *Q; -+ int x, y; -+ int device; -+ int line = -1; -+ int t; -+ -+ device = dri3_open(dpy); -+ if (device < 0) -+ return 0; -+ -+ if (!is_intel(device)) -+ return 0; -+ -+ printf("Opened Intel DRI3 device\n"); -+ -+ XGetGeometry(dpy, win, &root, &x, &y, -+ &width, &height, &border, &depth); -+ -+ switch (depth) { -+ case 8: bpp = 8; break; -+ case 15: case 16: bpp = 16; break; -+ case 24: case 32: bpp = 32; break; -+ default: return 0; -+ } -+ -+ stride = ALIGN(width * bpp/8, 512); -+ size = PAGE_ALIGN(stride * ALIGN(height, 32)); -+ printf("Creating DRI3 %dx%d (source stride=%d, size=%d) for GTT\n", -+ width, height, stride, size); -+ -+ _x_error_occurred = 0; -+ Q = setup_msc(dpy, root); -+ -+ for (t = 0; t < sizeof(tiling)/sizeof(tiling[0]); t++) { -+ uint64_t msc; -+ uint32_t src; -+ int src_fd; -+ Pixmap src_pix; -+ -+ src = gem_create(device, size); -+ if (!src) { -+ line = __LINE__; -+ goto fail; -+ } -+ -+ gem_set_tiling(device, src, tiling[t], stride); -+ -+ src_fd = gem_export(device, src); -+ if (src_fd < 0) { -+ line = __LINE__; -+ goto fail; -+ } -+ -+ src_pix = dri3_create_pixmap(dpy, root, -+ width, height, depth, -+ src_fd, bpp, stride, size); -+ -+ msc = wait_vblank(dpy, root, Q); -+ -+ xcb_present_pixmap(XGetXCBConnection(dpy), -+ win, src_pix, -+ 0, /* sbc */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ msc + 2, /* target msc */ -+ 1, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ -+ xcb_present_pixmap(XGetXCBConnection(dpy), -+ win, src_pix, -+ 0, /* sbc */ -+ 0, /* valid */ -+ 0, /* update */ -+ 0, /* x_off */ -+ 0, /* y_off */ -+ None, -+ None, /* wait fence */ -+ None, -+ XCB_PRESENT_OPTION_NONE, -+ msc + 3, /* target msc */ -+ 1, /* divisor */ -+ 0, /* remainder */ -+ 0, NULL); -+ -+ XSync(dpy, True); -+ if (_x_error_occurred) { -+ line = __LINE__; -+ goto fail; -+ } -+ XFreePixmap(dpy, src_pix); -+ _x_error_occurred = 0; -+ -+ close(src_fd); -+ gem_close(device, src); -+ } -+ -+ teardown_msc(dpy, Q); -+ return 0; -+ -+fail: -+ printf("%s failed with tiling %d, line %d\n", __func__, tiling[t], line); -+ teardown_msc(dpy, Q); -+ return 1; -+} -+ - static int test_dri3(Display *dpy) - { - Window win = DefaultRootWindow(dpy); -@@ -670,8 +2013,32 @@ fail: - static int has_present(Display *dpy) - { - xcb_connection_t *c = XGetXCBConnection(dpy); -- xcb_present_query_version_reply_t *reply; - xcb_generic_error_t *error = NULL; -+ void *reply; -+ -+ reply = xcb_xfixes_query_version_reply(c, -+ xcb_xfixes_query_version(c, -+ XCB_XFIXES_MAJOR_VERSION, -+ XCB_XFIXES_MINOR_VERSION), -+ &error); -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "XFixes not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ reply = xcb_dri3_query_version_reply(c, -+ xcb_dri3_query_version(c, -+ XCB_DRI3_MAJOR_VERSION, -+ XCB_DRI3_MINOR_VERSION), -+ &error); -+ free(reply); -+ free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "DRI3 not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } - - reply = xcb_present_query_version_reply(c, - xcb_present_query_version(c, -@@ -681,14 +2048,32 @@ static int has_present(Display *dpy) - - free(reply); - free(error); -+ if (reply == NULL) { -+ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static int has_composite(Display *dpy) -+{ -+ int event, error; -+ int major, minor; -+ -+ if (!XCompositeQueryExtension(dpy, &event, &error)) -+ return 0; -+ -+ XCompositeQueryVersion(dpy, &major, &minor); - -- return reply != NULL; -+ return major > 0 || minor >= 4; - } - - int main(void) - { - Display *dpy; - Window root; -+ int dummy; - int error = 0; - uint64_t last_msc; - void *queue; -@@ -700,27 +2085,135 @@ int main(void) - if (!has_present(dpy)) - return 77; - -+ if (DPMSQueryExtension(dpy, &dummy, &dummy)) -+ DPMSDisable(dpy); -+ - root = DefaultRootWindow(dpy); - - signal(SIGALRM, SIG_IGN); - XSetErrorHandler(_check_error_handler); - - queue = setup_msc(dpy, root); -- last_msc = check_msc(dpy, root, queue, 0); -+ last_msc = check_msc(dpy, root, queue, 0, NULL); -+ -+ error += test_future_msc(dpy, queue); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); -+ -+ error += test_wrap_msc(dpy); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); -+ -+ error += test_accuracy_msc(dpy, queue); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); -+ -+ error += test_modulus_msc(dpy, queue); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); -+ -+ error += test_exhaustion_msc(dpy, queue); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); -+ -+ for (dummy = 0; dummy <= 3; dummy++) { -+ Window win; -+ uint64_t msc = 0; -+ XSetWindowAttributes attr; -+ Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); -+ unsigned int width, height; -+ unsigned border, depth; -+ const char *phase; -+ int x, y; -+ void *Q; -+ -+ attr.override_redirect = 1; -+ -+ XGetGeometry(dpy, root, &win, &x, &y, -+ &width, &height, &border, &depth); -+ -+ _x_error_occurred = 0; -+ switch (dummy) { -+ case 0: -+ win = root; -+ phase = "root"; -+ break; -+ case 1: -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, depth, -+ InputOutput, visual, -+ CWOverrideRedirect, &attr); -+ phase = "fullscreen"; -+ break; -+ case 2: -+ win = XCreateWindow(dpy, root, -+ 0, 0, width/2, height/2, 0, depth, -+ InputOutput, visual, -+ CWOverrideRedirect, &attr); -+ phase = "window"; -+ break; -+ case 3: -+ if (!has_composite(dpy)) -+ continue; -+ -+ win = XCreateWindow(dpy, root, -+ 0, 0, width, height, 0, -+ DefaultDepth(dpy, DefaultScreen(dpy)), -+ InputOutput, -+ DefaultVisual(dpy, DefaultScreen(dpy)), -+ CWOverrideRedirect, &attr); -+ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); -+ phase = "composite"; -+ break; -+ -+ default: -+ phase = "broken"; -+ win = root; -+ abort(); -+ break; -+ } -+ -+ XMapWindow(dpy, win); -+ XSync(dpy, True); -+ if (_x_error_occurred) -+ continue; -+ -+ Q = setup_msc(dpy, win); -+ msc = check_msc(dpy, win, Q, msc, NULL); - -- error += test_whole(dpy); -- last_msc = check_msc(dpy, root, queue, last_msc); -+ error += test_whole(dpy, win, phase); -+ msc = check_msc(dpy, win, Q, msc, NULL); -+ -+ error += test_double(dpy, win, phase, Q); -+ msc = check_msc(dpy, win, Q, msc, NULL); -+ -+ error += test_future(dpy, win, phase, Q); -+ msc = check_msc(dpy, win, Q, msc, NULL); -+ -+ error += test_accuracy(dpy, win, phase, Q); -+ msc = check_msc(dpy, win, Q, msc, NULL); -+ -+ error += test_modulus(dpy, win, phase, Q); -+ msc = check_msc(dpy, win, Q, msc, NULL); -+ -+ error += test_exhaustion(dpy, win, phase, Q); -+ msc = check_msc(dpy, win, Q, msc, NULL); -+ -+ teardown_msc(dpy, Q); -+ if (win != root) -+ XDestroyWindow(dpy, win); -+ } - - error += test_crtc(dpy, queue, last_msc); -- last_msc = check_msc(dpy, root, queue, last_msc); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); - - error += test_shm(dpy); -- last_msc = check_msc(dpy, root, queue, last_msc); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); - - error += test_dri3(dpy); -- last_msc = check_msc(dpy, root, queue, last_msc); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); -+ -+ error += test_dri3_tiling(dpy); -+ last_msc = check_msc(dpy, root, queue, last_msc, NULL); - - teardown_msc(dpy, queue); - -+ if (DPMSQueryExtension(dpy, &dummy, &dummy)) -+ DPMSEnable(dpy); - return !!error; - } -diff --git a/test/render-glyphs.c b/test/render-glyphs.c -new file mode 100644 -index 00000000..8822e36a ---- /dev/null -+++ b/test/render-glyphs.c -@@ -0,0 +1,441 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include /* for XDestroyImage */ -+#include /* for pixman blt functions */ -+ -+#include "test.h" -+ -+static const XRenderColor colors[] = { -+ /* red, green, blue, alpha */ -+ { 0 }, -+ { 0, 0, 0, 0xffff }, -+ { 0xffff, 0, 0, 0xffff }, -+ { 0, 0xffff, 0, 0xffff }, -+ { 0, 0, 0xffff, 0xffff }, -+ { 0xffff, 0xffff, 0xffff, 0xffff }, -+}; -+ -+static struct clip { -+ void *func; -+} clips[] = { -+ { NULL }, -+}; -+ -+static int _x_error_occurred; -+ -+static int -+_check_error_handler(Display *display, -+ XErrorEvent *event) -+{ -+ _x_error_occurred = 1; -+ return False; /* ignored */ -+} -+ -+static void clear(struct test_display *dpy, -+ struct test_target *tt, -+ const XRenderColor *c) -+{ -+ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, c, -+ 0, 0, tt->width, tt->height); -+} -+ -+static bool check_op(struct test_display *dpy, int op, struct test_target *tt) -+{ -+ XRenderColor render_color = {0}; -+ -+ XSync(dpy->dpy, True); -+ _x_error_occurred = 0; -+ -+ XRenderFillRectangle(dpy->dpy, op, -+ tt->picture, &render_color, -+ 0, 0, 0, 0); -+ -+ XSync(dpy->dpy, True); -+ return _x_error_occurred == 0; -+} -+ -+struct glyph_iter { -+ enum { -+ GLYPHS, OP, DST, SRC, MASK, CLIP, -+ } stage; -+ -+ int glyph_format; -+ int op; -+ int dst_color; -+ int src_color; -+ int mask_format; -+ int clip; -+ -+ struct { -+ struct test_display *dpy; -+ struct test_target tt; -+ GlyphSet glyphset; -+ Picture src; -+ XRenderPictFormat *mask_format; -+ } ref, out; -+}; -+ -+static void glyph_iter_init(struct glyph_iter *gi, -+ struct test *t, enum target target) -+{ -+ memset(gi, 0, sizeof(*gi)); -+ -+ gi->out.dpy = &t->out; -+ test_target_create_render(&t->out, target, &gi->out.tt); -+ -+ gi->ref.dpy = &t->ref; -+ test_target_create_render(&t->ref, target, &gi->ref.tt); -+ -+ gi->stage = GLYPHS; -+ gi->glyph_format = -1; -+ gi->op = -1; -+ gi->dst_color = -1; -+ gi->src_color = -1; -+ gi->mask_format = -1; -+ gi->clip = -1; -+} -+ -+static void render_clear(char *image, int image_size, int bpp) -+{ -+ memset(image, 0, image_size); -+} -+ -+static void render_black(char *image, int image_size, int bpp) -+{ -+ if (bpp == 4) { -+ uint32_t *p = (uint32_t *)image; -+ image_size /= 4; -+ while (image_size--) -+ *p++ = 0x000000ff; -+ } else -+ memset(image, 0x55, image_size); -+} -+ -+static void render_green(char *image, int image_size, int bpp) -+{ -+ if (bpp == 4) { -+ uint32_t *p = (uint32_t *)image; -+ image_size /= 4; -+ while (image_size--) -+ *p++ = 0xffff0000; -+ } else -+ memset(image, 0xaa, image_size); -+} -+ -+static void render_white(char *image, int image_size, int bpp) -+{ -+ memset(image, 0xff, image_size); -+} -+ -+static GlyphSet create_glyphs(Display *dpy, int format_id) -+{ -+#define N_GLYPHS 4 -+ XRenderPictFormat *format; -+ XGlyphInfo glyph = { 8, 8, 0, 0, 8, 0 }; -+ char image[4*8*8]; -+ GlyphSet glyphset; -+ Glyph gid; -+ int image_size; -+ int bpp; -+ int n; -+ -+ format = XRenderFindStandardFormat(dpy, format_id); -+ if (format == NULL) -+ return 0; -+ -+ switch (format_id) { -+ case PictStandardARGB32: -+ case PictStandardRGB24: -+ image_size = 4 * 8 * 8; -+ bpp = 4; -+ break; -+ case PictStandardA8: -+ case PictStandardA4: -+ image_size = 8 * 8; -+ bpp = 1; -+ break; -+ case PictStandardA1: -+ image_size = 8; -+ bpp = 0; -+ break; -+ default: -+ return 0; -+ } -+ -+ glyphset = XRenderCreateGlyphSet(dpy, format); -+ for (n = 0; n < N_GLYPHS; n++) { -+ gid = n; -+ -+ switch (n) { -+ case 0: render_clear(image, image_size, bpp); break; -+ case 1: render_black(image, image_size, bpp); break; -+ case 2: render_green(image, image_size, bpp); break; -+ case 3: render_white(image, image_size, bpp); break; -+ } -+ -+ XRenderAddGlyphs(dpy, glyphset, -+ &gid, &glyph, 1, image, image_size); -+ } -+ -+ return glyphset; -+} -+ -+static const char *glyph_name(int n) -+{ -+ switch (n) { -+ case 0: return "clear"; -+ case 1: return "black"; -+ case 2: return "green"; -+ case 3: return "white"; -+ default: return "unknown"; -+ } -+} -+ -+static bool glyph_iter_next(struct glyph_iter *gi) -+{ -+restart: -+ if (gi->stage == GLYPHS) { -+ if (++gi->glyph_format == PictStandardNUM) -+ return false; -+ -+ if (gi->out.glyphset) -+ XRenderFreeGlyphSet(gi->out.dpy->dpy, -+ gi->out.glyphset); -+ gi->out.glyphset = create_glyphs(gi->out.dpy->dpy, -+ gi->glyph_format); -+ -+ if (gi->ref.glyphset) -+ XRenderFreeGlyphSet(gi->ref.dpy->dpy, -+ gi->ref.glyphset); -+ gi->ref.glyphset = create_glyphs(gi->ref.dpy->dpy, -+ gi->glyph_format); -+ -+ gi->stage++; -+ } -+ -+ if (gi->stage == OP) { -+ do { -+ if (++gi->op == 255) -+ goto reset_op; -+ } while (!check_op(gi->out.dpy, gi->op, &gi->out.tt) || -+ !check_op(gi->ref.dpy, gi->op, &gi->ref.tt)); -+ -+ gi->stage++; -+ } -+ -+ if (gi->stage == DST) { -+ if (++gi->dst_color == ARRAY_SIZE(colors)) -+ goto reset_dst; -+ -+ gi->stage++; -+ } -+ -+ if (gi->stage == SRC) { -+ if (++gi->src_color == ARRAY_SIZE(colors)) -+ goto reset_src; -+ -+ if (gi->ref.src) -+ XRenderFreePicture(gi->ref.dpy->dpy, gi->ref.src); -+ gi->ref.src = XRenderCreateSolidFill(gi->ref.dpy->dpy, -+ &colors[gi->src_color]); -+ -+ if (gi->out.src) -+ XRenderFreePicture(gi->out.dpy->dpy, gi->out.src); -+ gi->out.src = XRenderCreateSolidFill(gi->out.dpy->dpy, -+ &colors[gi->src_color]); -+ -+ gi->stage++; -+ } -+ -+ if (gi->stage == MASK) { -+ if (++gi->mask_format > PictStandardNUM) -+ goto reset_mask; -+ -+ if (gi->mask_format == PictStandardRGB24) -+ gi->mask_format++; -+ -+ if (gi->mask_format < PictStandardNUM) { -+ gi->out.mask_format = XRenderFindStandardFormat(gi->out.dpy->dpy, -+ gi->mask_format); -+ gi->ref.mask_format = XRenderFindStandardFormat(gi->ref.dpy->dpy, -+ gi->mask_format); -+ } else { -+ gi->out.mask_format = NULL; -+ gi->ref.mask_format = NULL; -+ } -+ -+ gi->stage++; -+ } -+ -+ if (gi->stage == CLIP) { -+ if (++gi->clip == ARRAY_SIZE(clips)) -+ goto reset_clip; -+ -+ gi->stage++; -+ } -+ -+ gi->stage--; -+ return true; -+ -+reset_op: -+ gi->op = -1; -+reset_dst: -+ gi->dst_color = -1; -+reset_src: -+ gi->src_color = -1; -+reset_mask: -+ gi->mask_format = -1; -+reset_clip: -+ gi->clip = -1; -+ gi->stage--; -+ goto restart; -+} -+ -+static void glyph_iter_fini(struct glyph_iter *gi) -+{ -+ if (gi->out.glyphset) -+ XRenderFreeGlyphSet (gi->out.dpy->dpy, gi->out.glyphset); -+ if (gi->ref.glyphset) -+ XRenderFreeGlyphSet (gi->ref.dpy->dpy, gi->ref.glyphset); -+ -+ test_target_destroy_render(gi->out.dpy, &gi->out.tt); -+ test_target_destroy_render(gi->ref.dpy, &gi->ref.tt); -+} -+ -+static const char *stdformat_to_str(int id) -+{ -+ switch (id) { -+ case PictStandardARGB32: return "ARGB32"; -+ case PictStandardRGB24: return "RGB24"; -+ case PictStandardA8: return "A8"; -+ case PictStandardA4: return "A4"; -+ case PictStandardA1: return "A1"; -+ default: return "none"; -+ } -+} -+ -+static char *glyph_iter_to_string(struct glyph_iter *gi, -+ const char *format, -+ ...) -+{ -+ static char buf[100]; -+ va_list ap; -+ int len; -+ -+ len = sprintf(buf, "glyphs=%s, op=%d, dst=%08x, src=%08x, mask=%s", -+ stdformat_to_str(gi->glyph_format), gi->op, -+ xrender_color(&colors[gi->dst_color]), -+ xrender_color(&colors[gi->src_color]), -+ stdformat_to_str(gi->mask_format)); -+ -+ if (format) { -+ buf[len++] = ' '; -+ va_start(ap, format); -+ vsprintf(buf+len, format, ap); -+ va_end(ap); -+ } -+ -+ return buf; -+} -+ -+static void single(struct test *t, enum target target) -+{ -+ struct glyph_iter gi; -+ int n; -+ -+ printf("Testing single glyph (%s): ", test_target_name(target)); -+ fflush(stdout); -+ -+ glyph_iter_init(&gi, t, target); -+ while (glyph_iter_next(&gi)) { -+ XGlyphElt8 elt; -+ char id[N_GLYPHS]; -+ -+ for (n = 0; n < N_GLYPHS; n++) { -+ id[n] = n; -+ -+ elt.chars = &id[n]; -+ elt.nchars = 1; -+ elt.xOff = 0; -+ elt.yOff = 0; -+ -+ clear(gi.out.dpy, &gi.out.tt, &colors[gi.dst_color]); -+ elt.glyphset = gi.out.glyphset; -+ XRenderCompositeText8 (gi.out.dpy->dpy, gi.op, -+ gi.out.src, -+ gi.out.tt.picture, -+ gi.out.mask_format, -+ 0, 0, -+ 0, 8, -+ &elt, 1); -+ -+ clear(gi.ref.dpy, &gi.ref.tt, &colors[gi.dst_color]); -+ elt.glyphset = gi.ref.glyphset; -+ XRenderCompositeText8 (gi.ref.dpy->dpy, gi.op, -+ gi.ref.src, -+ gi.ref.tt.picture, -+ gi.ref.mask_format, -+ 0, 0, -+ 0, 8, -+ &elt, 1); -+ test_compare(t, -+ gi.out.tt.draw, gi.out.tt.format, -+ gi.ref.tt.draw, gi.ref.tt.format, -+ 0, 0, gi.out.tt.width, gi.out.tt.height, -+ glyph_iter_to_string(&gi, -+ "glyph=%s", -+ glyph_name(n))); -+ } -+ -+ elt.chars = &id[0]; -+ elt.nchars = n; -+ clear(gi.out.dpy, &gi.out.tt, &colors[gi.dst_color]); -+ elt.glyphset = gi.out.glyphset; -+ XRenderCompositeText8 (gi.out.dpy->dpy, gi.op, -+ gi.out.src, -+ gi.out.tt.picture, -+ gi.out.mask_format, -+ 0, 0, -+ 0, 8, -+ &elt, 1); -+ -+ clear(gi.ref.dpy, &gi.ref.tt, &colors[gi.dst_color]); -+ elt.glyphset = gi.ref.glyphset; -+ XRenderCompositeText8 (gi.ref.dpy->dpy, gi.op, -+ gi.ref.src, -+ gi.ref.tt.picture, -+ gi.ref.mask_format, -+ 0, 0, -+ 0, 8, -+ &elt, 1); -+ test_compare(t, -+ gi.out.tt.draw, gi.out.tt.format, -+ gi.ref.tt.draw, gi.ref.tt.format, -+ 0, 0, gi.out.tt.width, gi.out.tt.height, -+ glyph_iter_to_string(&gi, "all")); -+ } -+ glyph_iter_fini(&gi); -+} -+ -+int main(int argc, char **argv) -+{ -+ struct test test; -+ int t; -+ -+ test_init(&test, argc, argv); -+ XSetErrorHandler(_check_error_handler); -+ -+ for (t = TARGET_FIRST; t <= TARGET_LAST; t++) { -+ single(&test, t); -+ //overlapping(&test, t); -+ //gap(&test, t); -+ //mixed(&test, t); -+ } -+ -+ return 0; -+} -diff --git a/test/render-trapezoid.c b/test/render-trapezoid.c -index cd990143..f15a78e3 100644 ---- a/test/render-trapezoid.c -+++ b/test/render-trapezoid.c -@@ -403,16 +403,141 @@ static void trap_tests(struct test *t, - free(traps); - } - -+enum edge { -+ EDGE_SHARP = PolyEdgeSharp, -+ EDGE_SMOOTH, -+}; -+ -+static const char *edge_name(enum edge edge) -+{ -+ switch (edge) { -+ default: -+ case EDGE_SHARP: return "sharp"; -+ case EDGE_SMOOTH: return "smooth"; -+ } -+} -+ -+static void set_edge(Display *dpy, Picture p, enum edge edge) -+{ -+ XRenderPictureAttributes a; -+ -+ a.poly_edge = edge; -+ XRenderChangePicture(dpy, p, CPPolyEdge, &a); -+} -+ -+static void edge_test(struct test *t, -+ enum mask mask, -+ enum edge edge, -+ enum target target) -+{ -+ struct test_target out, ref; -+ XRenderColor white = { 0xffff, 0xffff, 0xffff, 0xffff }; -+ Picture src_ref, src_out; -+ XTrapezoid trap; -+ int left_or_right, p; -+ -+ test_target_create_render(&t->out, target, &out); -+ set_edge(t->out.dpy, out.picture, edge); -+ src_out = XRenderCreateSolidFill(t->out.dpy, &white); -+ -+ test_target_create_render(&t->ref, target, &ref); -+ set_edge(t->ref.dpy, ref.picture, edge); -+ src_ref = XRenderCreateSolidFill(t->ref.dpy, &white); -+ -+ printf("Testing edges (with mask %s and %s edges) (%s): ", -+ mask_name(mask), -+ edge_name(edge), -+ test_target_name(target)); -+ fflush(stdout); -+ -+ for (left_or_right = 0; left_or_right <= 1; left_or_right++) { -+ for (p = -64; p <= out.width + 64; p++) { -+ char buf[80]; -+ -+ if (left_or_right) { -+ trap.left.p1.x = 0; -+ trap.left.p1.y = 0; -+ trap.left.p2.x = 0; -+ trap.left.p2.y = out.height << 16; -+ -+ trap.right.p1.x = p << 16; -+ trap.right.p1.y = 0; -+ trap.right.p2.x = out.width << 16; -+ trap.right.p2.y = out.height << 16; -+ } else { -+ trap.right.p1.x = out.width << 16; -+ trap.right.p1.y = 0; -+ trap.right.p2.x = out.width << 16; -+ trap.right.p2.y = out.height << 16; -+ -+ trap.left.p1.x = 0; -+ trap.left.p1.y = 0; -+ trap.left.p2.x = p << 16; -+ trap.left.p2.y = out.height << 16; -+ } -+ -+ trap.top = 0; -+ trap.bottom = out.height << 16; -+ -+ sprintf(buf, -+ "trap=((%d, %d), (%d, %d)), ((%d, %d), (%d, %d))\n", -+ trap.left.p1.x >> 16, trap.left.p1.y >> 16, -+ trap.left.p2.x >> 16, trap.left.p2.y >> 16, -+ trap.right.p1.x >> 16, trap.right.p1.y >> 16, -+ trap.right.p2.x >> 16, trap.right.p2.y >> 16); -+ -+ clear(&t->out, &out); -+ XRenderCompositeTrapezoids(t->out.dpy, -+ PictOpSrc, -+ src_out, -+ out.picture, -+ mask_format(t->out.dpy, mask), -+ 0, 0, -+ &trap, 1); -+ -+ clear(&t->ref, &ref); -+ XRenderCompositeTrapezoids(t->ref.dpy, -+ PictOpSrc, -+ src_ref, -+ ref.picture, -+ mask_format(t->ref.dpy, mask), -+ 0, 0, -+ &trap, 1); -+ -+ test_compare(t, -+ out.draw, out.format, -+ ref.draw, ref.format, -+ 0, 0, out.width, out.height, -+ buf); -+ } -+ } -+ -+ XRenderFreePicture(t->out.dpy, src_out); -+ test_target_destroy_render(&t->out, &out); -+ -+ XRenderFreePicture(t->ref.dpy, src_ref); -+ test_target_destroy_render(&t->ref, &ref); -+ -+ printf("pass\n"); -+} -+ - int main(int argc, char **argv) - { - struct test test; - int i, dx, dy; - enum target target; - enum mask mask; -+ enum edge edge; - enum trapezoid trapezoid; - - test_init(&test, argc, argv); - -+ for (target = TARGET_FIRST; target <= TARGET_LAST; target++) { -+ for (mask = MASK_NONE; mask <= MASK_A8; mask++) -+ for (edge = EDGE_SHARP; edge <= EDGE_SMOOTH; edge++) -+ edge_test(&test, mask, edge, target); -+ } -+ - for (i = 0; i <= DEFAULT_ITERATIONS; i++) { - int reps = REPS(i), sets = SETS(i); - -diff --git a/test/render-triangle.c b/test/render-triangle.c -new file mode 100644 -index 00000000..165834ce ---- /dev/null -+++ b/test/render-triangle.c -@@ -0,0 +1,180 @@ -+#include -+#include -+#include -+ -+#include "test.h" -+ -+enum edge { -+ EDGE_SHARP = PolyEdgeSharp, -+ EDGE_SMOOTH, -+}; -+ -+static void set_edge(Display *dpy, Picture p, enum edge edge) -+{ -+ XRenderPictureAttributes a; -+ -+ a.poly_edge = edge; -+ XRenderChangePicture(dpy, p, CPPolyEdge, &a); -+} -+ -+static XRenderPictFormat *mask_format(Display *dpy, enum mask mask) -+{ -+ switch (mask) { -+ default: -+ case MASK_NONE: return NULL; -+ case MASK_A1: return XRenderFindStandardFormat(dpy, PictStandardA1); -+ case MASK_A8: return XRenderFindStandardFormat(dpy, PictStandardA8); -+ } -+} -+ -+static const char *mask_name(enum mask mask) -+{ -+ switch (mask) { -+ default: -+ case MASK_NONE: return "none"; -+ case MASK_A1: return "a1"; -+ case MASK_A8: return "a8"; -+ } -+} -+ -+static const char *edge_name(enum edge edge) -+{ -+ switch (edge) { -+ default: -+ case EDGE_SHARP: return "sharp"; -+ case EDGE_SMOOTH: return "smooth"; -+ } -+} -+ -+static void clear(struct test_display *dpy, struct test_target *tt) -+{ -+ XRenderColor render_color = {0}; -+ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, &render_color, -+ 0, 0, tt->width, tt->height); -+} -+ -+static void step_to_point(int step, int width, int height, XPointFixed *p) -+{ -+ do { -+ p->x = (step - 64) << 16; -+ p->y = -64 << 16; -+ -+ step -= width - 128; -+ if (step <= 0) -+ return; -+ -+ p->x = (width + 64) << 16; -+ p->y = (step - 64) << 16; -+ step -= height - 128; -+ -+ if (step <= 0) -+ return; -+ -+ p->x = (width + 64 - step) << 16; -+ p->y = (height + 64) << 16; -+ step -= width - 128; -+ -+ if (step <= 0) -+ return; -+ -+ p->x = -64 << 16; -+ p->y = (height + 64 - step) << 16; -+ step -= height - 128; -+ } while (step > 0); -+} -+ -+static void edge_test(struct test *t, -+ enum mask mask, -+ enum edge edge, -+ enum target target) -+{ -+ struct test_target out, ref; -+ XRenderColor white = { 0xffff, 0xffff, 0xffff, 0xffff }; -+ Picture src_ref, src_out; -+ XTriangle tri; -+ unsigned step, max; -+ -+ test_target_create_render(&t->out, target, &out); -+ set_edge(t->out.dpy, out.picture, edge); -+ src_out = XRenderCreateSolidFill(t->out.dpy, &white); -+ -+ test_target_create_render(&t->ref, target, &ref); -+ set_edge(t->ref.dpy, ref.picture, edge); -+ src_ref = XRenderCreateSolidFill(t->ref.dpy, &white); -+ -+ printf("Testing edges (with mask %s and %s edges) (%s): ", -+ mask_name(mask), -+ edge_name(edge), -+ test_target_name(target)); -+ fflush(stdout); -+ -+ max = 2*(out.width + 128 + out.height+128); -+ step = 0; -+ for (step = 0; step <= max; step++) { -+ char buf[80]; -+ -+ step_to_point(step, out.width, out.height, &tri.p1); -+ step_to_point(step + out.width + 128, -+ out.width, out.height, -+ &tri.p2); -+ step_to_point(step + out.height + 128 + 2*(out.width + 128), -+ out.width, out.height, -+ &tri.p3); -+ -+ sprintf(buf, -+ "tri=((%d, %d), (%d, %d), (%d, %d))\n", -+ tri.p1.x >> 16, tri.p1.y >> 16, -+ tri.p2.x >> 16, tri.p2.y >> 16, -+ tri.p3.x >> 16, tri.p3.y >> 16); -+ -+ clear(&t->out, &out); -+ XRenderCompositeTriangles(t->out.dpy, -+ PictOpSrc, -+ src_out, -+ out.picture, -+ mask_format(t->out.dpy, mask), -+ 0, 0, -+ &tri, 1); -+ -+ clear(&t->ref, &ref); -+ XRenderCompositeTriangles(t->ref.dpy, -+ PictOpSrc, -+ src_ref, -+ ref.picture, -+ mask_format(t->ref.dpy, mask), -+ 0, 0, -+ &tri, 1); -+ -+ test_compare(t, -+ out.draw, out.format, -+ ref.draw, ref.format, -+ 0, 0, out.width, out.height, -+ buf); -+ } -+ -+ XRenderFreePicture(t->out.dpy, src_out); -+ test_target_destroy_render(&t->out, &out); -+ -+ XRenderFreePicture(t->ref.dpy, src_ref); -+ test_target_destroy_render(&t->ref, &ref); -+ -+ printf("pass\n"); -+} -+ -+int main(int argc, char **argv) -+{ -+ struct test test; -+ enum target target; -+ enum mask mask; -+ enum edge edge; -+ -+ test_init(&test, argc, argv); -+ -+ for (target = TARGET_FIRST; target <= TARGET_LAST; target++) { -+ for (mask = MASK_NONE; mask <= MASK_A8; mask++) -+ for (edge = EDGE_SHARP; edge <= EDGE_SMOOTH; edge++) -+ edge_test(&test, mask, edge, target); -+ } -+ -+ return 0; -+} -diff --git a/test/test.h b/test/test.h -index a3ef979d..9eec1cf9 100644 ---- a/test/test.h -+++ b/test/test.h -@@ -107,6 +107,15 @@ static inline uint32_t color(uint8_t red, uint8_t green, uint8_t blue, uint8_t a - return alpha << 24 | ra >> 8 << 16 | ga >> 8 << 8 | ba >> 8; - } - -+static inline uint32_t xrender_color(const XRenderColor *c) -+{ -+ uint32_t ra = c->red * c->alpha; -+ uint32_t ga = c->green * c->alpha; -+ uint32_t ba = c->blue * c->alpha; -+ -+ return c->alpha >> 8 << 24 | ra >> 24 << 16 | ga >> 24 << 8 | ba >> 24; -+} -+ - void test_timer_start(struct test_display *t, struct timespec *tv); - double test_timer_stop(struct test_display *t, struct timespec *tv); - -diff --git a/test/test_image.c b/test/test_image.c -index d15a8af8..1c076990 100644 ---- a/test/test_image.c -+++ b/test/test_image.c -@@ -197,13 +197,10 @@ void test_compare(struct test *t, - const char *info) - { - XImage out_image, ref_image; -- Pixmap tmp; -- char *out, *ref; -+ uint32_t *out, *ref; - char buf[600]; - uint32_t mask; - int i, j; -- XGCValues gcv; -- GC gc; - - if (w * h * 4 > t->out.max_shm_size) - return test_compare_fallback(t, -@@ -214,37 +211,24 @@ void test_compare(struct test *t, - test_init_image(&out_image, &t->out.shm, out_format, w, h); - test_init_image(&ref_image, &t->ref.shm, ref_format, w, h); - -- gcv.graphics_exposures = 0; -- - die_unless(out_image.depth == ref_image.depth); - die_unless(out_image.bits_per_pixel == ref_image.bits_per_pixel); - die_unless(out_image.bits_per_pixel == 32); - -- mask = depth_mask(out_image.depth); -+ XShmGetImage(t->out.dpy, out_draw, &out_image, x, y, AllPlanes); -+ out = (uint32_t *)out_image.data; - -- tmp = XCreatePixmap(t->out.dpy, out_draw, w, h, out_image.depth); -- gc = XCreateGC(t->out.dpy, tmp, GCGraphicsExposures, &gcv); -- XCopyArea(t->out.dpy, out_draw, tmp, gc, x, y, w, h, 0, 0); -- XShmGetImage(t->out.dpy, tmp, &out_image, 0, 0, AllPlanes); -- XFreeGC(t->out.dpy, gc); -- XFreePixmap(t->out.dpy, tmp); -- out = out_image.data; -- -- tmp = XCreatePixmap(t->ref.dpy, ref_draw, w, h, ref_image.depth); -- gc = XCreateGC(t->ref.dpy, tmp, GCGraphicsExposures, &gcv); -- XCopyArea(t->ref.dpy, ref_draw, tmp, gc, x, y, w, h, 0, 0); -- XShmGetImage(t->ref.dpy, tmp, &ref_image, 0, 0, AllPlanes); -- XFreeGC(t->ref.dpy, gc); -- XFreePixmap(t->ref.dpy, tmp); -- ref = ref_image.data; -+ XShmGetImage(t->ref.dpy, ref_draw, &ref_image, x, y, AllPlanes); -+ ref = (uint32_t *)ref_image.data; - - /* Start with an exact comparison. However, one quicky desires - * a fuzzy comparator to hide hardware inaccuracies... - */ -+ mask = depth_mask(out_image.depth); - for (j = 0; j < h; j++) { - for (i = 0; i < w; i++) { -- uint32_t a = ((uint32_t *)out)[i] & mask; -- uint32_t b = ((uint32_t *)ref)[i] & mask; -+ uint32_t a = out[i] & mask; -+ uint32_t b = ref[i] & mask; - if (a != b && pixel_difference(a, b) > MAX_DELTA) { - show_pixels(buf, - &out_image, &ref_image, -@@ -255,8 +239,8 @@ void test_compare(struct test *t, - x,i, y,j, a, b, pixel_difference(a, b), buf, info); - } - } -- out += out_image.bytes_per_line; -- ref += ref_image.bytes_per_line; -+ out = (uint32_t *)((char *)out + out_image.bytes_per_line); -+ ref = (uint32_t *)((char *)ref + ref_image.bytes_per_line); - } - } - -diff --git a/test/xvidmode.c b/test/xvidmode.c -new file mode 100644 -index 00000000..5cde8286 ---- /dev/null -+++ b/test/xvidmode.c -@@ -0,0 +1,54 @@ -+#include -+#include -+#include -+#include -+#include -+ -+int main(void) -+{ -+ Display *dpy; -+ XF86VidModeModeLine current; -+ XF86VidModeModeInfo **modes; -+ int num_modes, i; -+ int saved_mode = -1; -+ int dotclock; -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ dpy = XOpenDisplay(":0"); -+ -+ XF86VidModeGetModeLine(dpy, DefaultScreen(dpy), &dotclock, ¤t); -+ XF86VidModeGetAllModeLines(dpy, XDefaultScreen(dpy), -+ &num_modes, &modes); -+ for (i = 0; i < num_modes; i++) { -+ int this; -+ -+ this = (current.hdisplay == modes[i]->hdisplay && -+ current.vdisplay == modes[i]->vdisplay && -+ dotclock == modes[i]->dotclock); -+ if (this && saved_mode == -1) -+ saved_mode = i; -+ -+ printf("[%d] %dx%d%s\n", -+ i, -+ modes[i]->hdisplay, -+ modes[i]->vdisplay, -+ this ? "*" : ""); -+ } -+ -+ for (i = 0; i < num_modes; i++) { -+ printf("Switching to mode %dx%d\n", -+ modes[i]->hdisplay, -+ modes[i]->vdisplay); -+ XF86VidModeSwitchToMode(dpy, XDefaultScreen(dpy), modes[i]); -+ XSync(dpy, True); -+ } -+ -+ if (saved_mode != -1) { -+ XF86VidModeSwitchToMode(dpy, XDefaultScreen(dpy), -+ modes[saved_mode]); -+ XFlush(dpy); -+ } -+ -+ return 0; -+} -diff --git a/tools/Makefile.am b/tools/Makefile.am -index b5de2c96..92df266b 100644 ---- a/tools/Makefile.am -+++ b/tools/Makefile.am -@@ -26,13 +26,30 @@ AM_CFLAGS = \ - drivermandir = $(DRIVER_MAN_DIR) - policydir = $(datarootdir)/polkit-1/actions - -+bin_PROGRAMS = -+noinst_PROGRAMS = -+libexec_PROGRAMS = -+ - if BUILD_TOOLS --bin_PROGRAMS = intel-virtual-output -+bin_PROGRAMS += intel-virtual-output - driverman_DATA = intel-virtual-output.$(DRIVER_MAN_SUFFIX) - endif - -+if BUILD_TOOL_CURSOR -+noinst_PROGRAMS += cursor -+cursor_CFLAGS = $(TOOL_CURSOR_CFLAGS) -+cursor_LDADD = $(TOOL_CURSOR_LIBS) -+endif -+ -+if X11_DRI3 -+noinst_PROGRAMS += dri3info -+dri3info_SOURCES = dri3info.c -+dri3info_CFLAGS = $(X11_DRI3_CFLAGS) $(DRI_CFLAGS) -+dri3info_LDADD = $(X11_DRI3_LIBS) $(DRI_LIBS) -+endif -+ - if BUILD_BACKLIGHT_HELPER --libexec_PROGRAMS = xf86-video-intel-backlight-helper -+libexec_PROGRAMS += xf86-video-intel-backlight-helper - nodist_policy_DATA = org.x.xf86-video-intel.backlight-helper.policy - - backlight_helper = $(libexecdir)/xf86-video-intel-backlight-helper -diff --git a/tools/backlight_helper.c b/tools/backlight_helper.c -index 8b2667dc..aadb8fac 100644 ---- a/tools/backlight_helper.c -+++ b/tools/backlight_helper.c -@@ -1,3 +1,7 @@ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ - #include - #include - #include -@@ -9,6 +13,12 @@ - #include - #include - -+#if MAJOR_IN_MKDEV -+#include -+#elif MAJOR_IN_SYSMACROS -+#include -+#endif -+ - #define DBG 0 - - #if defined(__GNUC__) && (__GNUC__ > 3) -diff --git a/tools/cursor.c b/tools/cursor.c -new file mode 100644 -index 00000000..6a2438ad ---- /dev/null -+++ b/tools/cursor.c -@@ -0,0 +1,127 @@ -+/* -+ * Copyright © 2015 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ * -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+int main(int argc, char **argv) -+{ -+ Display *dpy; -+ XFixesCursorImage *cur; -+ unsigned long *src; /* XXX deep sigh */ -+ unsigned x, y; -+ png_struct *png; -+ png_info *info; -+ png_byte **rows; -+ FILE *file; -+ -+ dpy = XOpenDisplay(NULL); -+ if (dpy == NULL) -+ return 1; -+ -+ if (!XFixesQueryExtension(dpy, (int *)&x, (int *)&y)) -+ return 1; -+ -+ cur = XFixesGetCursorImage(dpy); -+ if (cur == NULL) -+ return 1; -+ -+ printf("Cursor on display '%s': %dx%d, (hotspot %dx%d)\n", -+ DisplayString(dpy), -+ cur->width, cur->height, -+ cur->xhot, cur->yhot); -+ -+ if (1) { -+ int x, y; -+ -+ src = cur->pixels; -+ for (y = 0; y < cur->height; y++) { -+ for (x = 0; x < cur->width; x++) { -+ if (x == cur->xhot && y == cur->yhot) -+ printf("+"); -+ else -+ printf("%c", *src ? *src >> 24 >= 127 ? 'x' : '.' : ' '); -+ src++; -+ } -+ printf("\n"); -+ } -+ } -+ -+ file = fopen("cursor.png", "wb"); -+ if (file == NULL) -+ return 2; -+ -+ png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); -+ info = png_create_info_struct(png); -+ png_init_io(png, file); -+ png_set_IHDR(png, info, -+ cur->width, cur->height, 8, -+ PNG_COLOR_TYPE_RGB_ALPHA, -+ PNG_INTERLACE_NONE, -+ PNG_COMPRESSION_TYPE_DEFAULT, -+ PNG_FILTER_TYPE_DEFAULT); -+ png_write_info(png, info); -+ -+ src = cur->pixels; -+ rows = malloc(cur->height*sizeof(png_byte*)); -+ if (rows == NULL) -+ return 3; -+ -+ for (y = 0; y < cur->height; y++) { -+ rows[y] = malloc(cur->width * 4); -+ for (x = 0; x < cur->width; x++) { -+ uint32_t p = *src++; -+ uint8_t r = p >> 0; -+ uint8_t g = p >> 8; -+ uint8_t b = p >> 16; -+ uint8_t a = p >> 24; -+ -+ if (a > 0x00 && a < 0xff) { -+ r = (r * 0xff + a /2) / a; -+ g = (g * 0xff + a /2) / a; -+ b = (b * 0xff + a /2) / a; -+ } -+ -+ rows[y][4*x + 0] = b; -+ rows[y][4*x + 1] = g; -+ rows[y][4*x + 2] = r; -+ rows[y][4*x + 3] = a; -+ } -+ } -+ -+ png_write_image(png, rows); -+ png_write_end(png, NULL); -+ fclose(file); -+ -+ return 0; -+} -diff --git a/tools/dri3info.c b/tools/dri3info.c -new file mode 100644 -index 00000000..0c33fc5a ---- /dev/null -+++ b/tools/dri3info.c -@@ -0,0 +1,329 @@ -+/* -+ * Copyright (c) 2015 Intel Corporation -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * To compile standalone: gcc -o dri3info dri3info.c `pkg-config --cflags --libs xcb-dri3 x11-xcb xrandr xxf86vm libdrm` -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+static int dri3_query_version(Display *dpy, int *major, int *minor) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_dri3_query_version_reply_t *reply; -+ xcb_generic_error_t *error; -+ -+ *major = *minor = -1; -+ -+ reply = xcb_dri3_query_version_reply(c, -+ xcb_dri3_query_version(c, -+ XCB_DRI3_MAJOR_VERSION, -+ XCB_DRI3_MINOR_VERSION), -+ &error); -+ free(error); -+ if (reply == NULL) -+ return -1; -+ -+ *major = reply->major_version; -+ *minor = reply->minor_version; -+ free(reply); -+ -+ return 0; -+} -+ -+static int dri3_exists(Display *dpy) -+{ -+ const xcb_query_extension_reply_t *ext; -+ int major, minor; -+ -+ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); -+ if (ext == NULL || !ext->present) -+ return 0; -+ -+ if (dri3_query_version(dpy, &major, &minor) < 0) -+ return 0; -+ -+ return major >= 0; -+} -+ -+static int dri3_open(Display *dpy) -+{ -+ xcb_connection_t *c = XGetXCBConnection(dpy); -+ xcb_dri3_open_cookie_t cookie; -+ xcb_dri3_open_reply_t *reply; -+ -+ if (!dri3_exists(dpy)) -+ return -1; -+ -+ cookie = xcb_dri3_open(c, RootWindow(dpy, DefaultScreen(dpy)), None); -+ reply = xcb_dri3_open_reply(c, cookie, NULL); -+ -+ if (!reply) -+ return -1; -+ -+ if (reply->nfd != 1) -+ return -1; -+ -+ return xcb_dri3_open_reply_fds(c, reply)[0]; -+} -+ -+static void get_device_path(int fd, char *buf, int len) -+{ -+ struct stat remote, local; -+ int i; -+ -+ if (fstat(fd, &remote)) -+ goto out; -+ -+ for (i = 0; i < 16; i++) { -+ snprintf(buf, len, "/dev/dri/card%d", i); -+ if (stat(buf, &local)) -+ continue; -+ -+ if (local.st_mode == remote.st_mode && -+ local.st_rdev == remote.st_rdev) -+ return; -+ -+ snprintf(buf, len, "/dev/dri/renderD%d", i + 128); -+ if (stat(buf, &local)) -+ continue; -+ -+ if (local.st_mode == remote.st_mode && -+ local.st_rdev == remote.st_rdev) -+ return; -+ } -+ -+out: -+ strncpy(buf, "unknown path", len); -+} -+ -+static void get_driver_name(int fd, char *name, int len) -+{ -+ drm_version_t version; -+ -+ memset(name, 0, len); -+ memset(&version, 0, sizeof(version)); -+ version.name_len = len; -+ version.name = name; -+ -+ (void)drmIoctl(fd, DRM_IOCTL_VERSION, &version); -+} -+ -+static int compute_refresh_rate_from_mode(long n, long d, unsigned flags, -+ int32_t *numerator, -+ int32_t *denominator) -+{ -+ int i; -+ -+ /* The mode flags are only defined privately to the Xserver (in xf86str.h) -+ * but they at least bit compatible between VidMode, RandR and DRM. -+ */ -+# define V_INTERLACE 0x010 -+# define V_DBLSCAN 0x020 -+ -+ if (flags & V_INTERLACE) -+ n *= 2; -+ else if (flags & V_DBLSCAN) -+ d *= 2; -+ -+ /* The OML_sync_control spec requires that if the refresh rate is a -+ * whole number, that the returned numerator be equal to the refresh -+ * rate and the denominator be 1. -+ */ -+ -+ if (n % d == 0) { -+ n /= d; -+ d = 1; -+ } -+ else { -+ static const unsigned f[] = { 13, 11, 7, 5, 3, 2, 0 }; -+ -+ /* This is a poor man's way to reduce a fraction. It's far from -+ * perfect, but it will work well enough for this situation. -+ */ -+ -+ for (i = 0; f[i] != 0; i++) { -+ while (n % f[i] == 0 && d % f[i] == 0) { -+ d /= f[i]; -+ n /= f[i]; -+ } -+ } -+ } -+ -+ *numerator = n; -+ *denominator = d; -+ return 1; -+} -+ -+static int RRGetMscRate(Display *dpy, int32_t *numerator, int32_t *denominator) -+{ -+ int ret = 0; -+ Window root = RootWindow(dpy, DefaultScreen(dpy)); -+ XRRScreenResources *res; -+ int rr_event, rr_error; -+ RROutput primary; -+ RRMode mode = 0; -+ int n; -+ -+ if (!XRRQueryExtension(dpy, &rr_event, &rr_error)) -+ return ret; -+ -+ res = XRRGetScreenResourcesCurrent(dpy, root); -+ if (res == NULL) -+ return ret; -+ -+ /* Use the primary output if specified, otherwise -+ * use the mode on the first enabled crtc. -+ */ -+ primary = XRRGetOutputPrimary(dpy, root); -+ if (primary) { -+ XRROutputInfo *output; -+ -+ output = XRRGetOutputInfo(dpy, res, primary); -+ if (output != NULL) { -+ if (output->crtc) { -+ XRRCrtcInfo *crtc; -+ -+ crtc = XRRGetCrtcInfo(dpy, res, output->crtc); -+ if (crtc) { -+ mode = crtc->mode; -+ XRRFreeCrtcInfo(crtc); -+ } -+ } -+ XRRFreeOutputInfo(output); -+ } -+ } -+ -+ for (n = 0; mode == 0 && n < res->ncrtc; n++) { -+ XRRCrtcInfo *crtc; -+ -+ crtc = XRRGetCrtcInfo(dpy, res, res->crtcs[n]); -+ if (crtc) { -+ mode = crtc->mode; -+ XRRFreeCrtcInfo(crtc); -+ } -+ } -+ -+ for (n = 0; n < res->nmode; n++) { -+ if (res->modes[n].id == mode) { -+ ret = compute_refresh_rate_from_mode(res->modes[n].dotClock, -+ res->modes[n].hTotal*res->modes[n].vTotal, -+ res->modes[n].modeFlags, -+ numerator, denominator); -+ break; -+ } -+ } -+ -+ XRRFreeScreenResources(res); -+ return ret; -+} -+ -+static int VMGetMscRate(Display *dpy, int32_t *numerator, int32_t *denominator) -+{ -+ XF86VidModeModeLine mode_line; -+ int dot_clock; -+ int i; -+ -+ if (XF86VidModeQueryVersion(dpy, &i, &i) && -+ XF86VidModeGetModeLine(dpy, DefaultScreen(dpy), &dot_clock, &mode_line)) -+ return compute_refresh_rate_from_mode(dot_clock * 1000, -+ mode_line.vtotal * mode_line.htotal, -+ mode_line.flags, -+ numerator, denominator); -+ -+ return 0; -+} -+ -+static int get_refresh_rate(Display *dpy, -+ int32_t *numerator, -+ int32_t *denominator) -+{ -+ if (RRGetMscRate(dpy, numerator, denominator)) -+ return 1; -+ -+ if (VMGetMscRate(dpy, numerator, denominator)) -+ return 1; -+ -+ return 0; -+} -+ -+static void info(const char *dpyname) -+{ -+ Display *dpy; -+ int device; -+ int32_t numerator, denominator; -+ -+ dpy = XOpenDisplay(dpyname); -+ if (dpy == NULL) { -+ printf("Unable to connect to display '%s'\n", -+ dpyname ?: getenv("DISPLAY") ?: "unset"); -+ return; -+ } -+ -+ printf("Display '%s'\n", DisplayString(dpy)); -+ device = dri3_open(dpy); -+ if (device < 0) { -+ printf("\tUnable to connect to DRI3\n"); -+ } else { -+ char device_path[1024]; -+ char driver_name[1024]; -+ -+ get_device_path(device, device_path, sizeof(device_path)); -+ get_driver_name(device, driver_name, sizeof(driver_name)); -+ -+ printf("Connected to DRI3, using fd %d which matches %s, driver %s\n", -+ device, device_path, driver_name); -+ close(device); -+ } -+ -+ if (get_refresh_rate(dpy, &numerator, &denominator)) -+ printf("\tPrimary refresh rate: %d/%d (%.1fHz)\n", -+ numerator, denominator, numerator/(float)denominator); -+ -+ XCloseDisplay(dpy); -+} -+ -+int main(int argc, char **argv) -+{ -+ int i; -+ -+ if (argc > 1) { -+ for (i = 1; i < argc; i++) -+ info(argv[i]); -+ } else -+ info(NULL); -+ -+ return 0; -+} -diff --git a/tools/virtual.c b/tools/virtual.c -index 8e2b4a22..fc8db2b9 100644 ---- a/tools/virtual.c -+++ b/tools/virtual.c -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - #if HAVE_X11_EXTENSIONS_SHMPROTO_H - #include -@@ -79,13 +80,15 @@ static int verbose; - #define DRAW 0x8 - #define DAMAGE 0x10 - #define CURSOR 0x20 --#define POLL 0x40 -+#define SCREEN 0x40 -+#define POLL 0x80 - - struct display { - Display *dpy; - struct clone *clone; - struct context *ctx; - -+ int saver_event, saver_error, saver_active; - int damage_event, damage_error; - int xfixes_event, xfixes_error; - int rr_event, rr_error, rr_active; -@@ -98,6 +101,7 @@ struct display { - int width; - int height; - int depth; -+ int active; - - XRenderPictFormat *root_format; - XRenderPictFormat *rgb16_format; -@@ -111,7 +115,7 @@ struct display { - Cursor invisible_cursor; - Cursor visible_cursor; - -- XcursorImage cursor_image; -+ XcursorImage cursor_image; /* first only */ - int cursor_serial; - int cursor_x; - int cursor_y; -@@ -123,6 +127,13 @@ struct display { - int send; - int skip_clone; - int skip_frame; -+ -+ struct { -+ int timeout; -+ int interval; -+ int prefer_blank; -+ int allow_exp; -+ } saver; - }; - - struct output { -@@ -145,6 +156,7 @@ struct output { - XRenderPictFormat *use_render; - - int x, y; -+ int width, height; - XRRModeInfo mode; - Rotation rotation; - }; -@@ -218,6 +230,13 @@ static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Wi - static int _x_error_occurred; - - static int -+_io_error_handler(Display *display) -+{ -+ fprintf(stderr, "XIO error on display %s\n", DisplayString(display)); -+ abort(); -+} -+ -+static int - _check_error_handler(Display *display, - XErrorEvent *event) - { -@@ -243,6 +262,10 @@ can_use_shm(Display *dpy, - XExtCodes *codes; - int major, minor, has_shm, has_pixmap; - -+ *shm_event = 0; -+ *shm_opcode = 0; -+ *shm_pixmap = 0; -+ - if (!XShmQueryExtension(dpy)) - return 0; - -@@ -320,6 +343,7 @@ can_use_shm(Display *dpy, - #include - #include - #include -+#include - #include - #include - static Pixmap dri3_create_pixmap(Display *dpy, -@@ -357,6 +381,7 @@ static int dri3_query_version(Display *dpy, int *major, int *minor) - { - xcb_connection_t *c = XGetXCBConnection(dpy); - xcb_dri3_query_version_reply_t *reply; -+ xcb_generic_error_t *error; - - *major = *minor = -1; - -@@ -364,7 +389,8 @@ static int dri3_query_version(Display *dpy, int *major, int *minor) - xcb_dri3_query_version(c, - XCB_DRI3_MAJOR_VERSION, - XCB_DRI3_MINOR_VERSION), -- NULL); -+ &error); -+ free(error); - if (reply == NULL) - return -1; - -@@ -377,8 +403,13 @@ static int dri3_query_version(Display *dpy, int *major, int *minor) - - static int dri3_exists(Display *dpy) - { -+ const xcb_query_extension_reply_t *ext; - int major, minor; - -+ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); -+ if (ext == NULL || !ext->present) -+ return 0; -+ - if (dri3_query_version(dpy, &major, &minor) < 0) - return 0; - -@@ -809,6 +840,10 @@ static int clone_update_modes__fixed(struct clone *clone) - RRMode id; - int i, j, ret = ENOENT; - -+ DBG(X11, ("%s-%s cloning modes fixed %dx%d\n", -+ DisplayString(clone->dst.dpy), clone->dst.name, -+ clone->dst.width, clone->dst.height)); -+ - assert(clone->src.rr_output); - - res = _XRRGetScreenResourcesCurrent(clone->src.dpy, clone->src.window); -@@ -837,8 +872,8 @@ static int clone_update_modes__fixed(struct clone *clone) - - /* Create matching mode for the real output on the virtual */ - memset(&mode, 0, sizeof(mode)); -- mode.width = clone->width; -- mode.height = clone->height; -+ mode.width = clone->dst.width; -+ mode.height = clone->dst.height; - mode.nameLength = sprintf(mode_name, "FAKE-%dx%d", mode.width, mode.height); - mode.name = mode_name; - -@@ -942,6 +977,35 @@ out: - return rr_output; - } - -+static int check_virtual(struct display *display) -+{ -+ XRRScreenResources *res; -+ int found = -ENOENT; -+ int i; -+ -+ res = _XRRGetScreenResourcesCurrent(display->dpy, display->root); -+ if (res == NULL) -+ return -ENOMEM; -+ -+ for (i = 0; found == -ENOENT && i < res->noutput; i++) { -+ XRROutputInfo *output; -+ -+ output = XRRGetOutputInfo(display->dpy, res, res->outputs[i]); -+ if (output == NULL) -+ continue; -+ -+ if (strcmp(output->name, "VIRTUAL1") == 0) -+ found = 0; -+ -+ XRRFreeOutputInfo(output); -+ } -+ XRRFreeScreenResources(res); -+ -+ DBG(XRR, ("%s(%s): has VIRTUAL1? %d\n", -+ __func__, DisplayString(display->dpy), found)); -+ return found; -+} -+ - static int stride_for_depth(int width, int depth) - { - if (depth == 24) -@@ -1082,20 +1146,20 @@ static int clone_init_xfer(struct clone *clone) - width = 0; - height = 0; - } else if (clone->dri3.xid) { -- width = clone->dst.display->width; -- height = clone->dst.display->height; -+ width = clone->dst.width; -+ height = clone->dst.height; - } else { - width = mode_width(&clone->src.mode, clone->src.rotation); - height = mode_height(&clone->src.mode, clone->src.rotation); - } - -+ DBG(DRAW, ("%s-%s create xfer, %dx%d (currently %dx%d)\n", -+ DisplayString(clone->dst.dpy), clone->dst.name, -+ width, height, clone->width, clone->height)); -+ - if (width == clone->width && height == clone->height) - return 0; - -- DBG(DRAW, ("%s-%s create xfer, %dx%d\n", -- DisplayString(clone->dst.dpy), clone->dst.name, -- width, height)); -- - if (clone->shm.shmaddr) { - if (clone->src.use_shm) - XShmDetach(clone->src.dpy, &clone->src.shm); -@@ -1225,6 +1289,56 @@ static void clone_update(struct clone *clone) - clone->rr_update = 0; - } - -+static void screensaver_save(struct display *display) -+{ -+ display->saver_active = -+ XScreenSaverQueryExtension(display->dpy, -+ &display->saver_event, -+ &display->saver_error); -+ DBG(SCREEN, -+ ("%s screen saver active? %d [event=%d, error=%d]\n", -+ DisplayString(display->dpy), -+ display->saver_active, -+ display->saver_event, -+ display->saver_error)); -+ -+ XGetScreenSaver(display->dpy, -+ &display->saver.timeout, -+ &display->saver.interval, -+ &display->saver.prefer_blank, -+ &display->saver.allow_exp); -+ -+ DBG(SCREEN, -+ ("%s saving screen saver defaults: timeout=%d interval=%d prefer_blank=%d allow_exp=%d\n", -+ DisplayString(display->dpy), -+ display->saver.timeout, -+ display->saver.interval, -+ display->saver.prefer_blank, -+ display->saver.allow_exp)); -+} -+ -+static void screensaver_disable(struct display *display) -+{ -+ DBG(SCREEN, -+ ("%s disabling screen saver\n", DisplayString(display->dpy))); -+ -+ XSetScreenSaver(display->dpy, 0, 0, DefaultBlanking, DefaultExposures); -+ display_mark_flush(display); -+} -+ -+static void screensaver_restore(struct display *display) -+{ -+ DBG(SCREEN, -+ ("%s restoring screen saver\n", DisplayString(display->dpy))); -+ -+ XSetScreenSaver(display->dpy, -+ display->saver.timeout, -+ display->saver.interval, -+ display->saver.prefer_blank, -+ display->saver.allow_exp); -+ display_mark_flush(display); -+} -+ - static int context_update(struct context *ctx) - { - Display *dpy = ctx->display->dpy; -@@ -1325,8 +1439,19 @@ static int context_update(struct context *ctx) - struct clone *clone; - int x1, x2, y1, y2; - -- if (display->rr_active == 0) -+ if (display->rr_active == 0) { -+ for (clone = display->clone; clone; clone = clone->next) { -+ struct output *output = &clone->src; -+ if (output->mode.id) { -+ clone->dst.mode.id = -1; -+ clone->dst.rr_crtc = -1; -+ } else { -+ clone->dst.mode.id = 0; -+ clone->dst.rr_crtc = 0; -+ } -+ } - continue; -+ } - - x1 = y1 = INT_MAX; - x2 = y2 = INT_MIN; -@@ -1570,6 +1695,13 @@ ungrab: - XUngrabServer(display->dpy); - } - -+ for (n = 1; n < ctx->ndisplay; n++) { -+ struct display *display = &ctx->display[n]; -+ -+ display->active = 0; -+ screensaver_restore(display); -+ } -+ - ctx->active = NULL; - for (n = 0; n < ctx->nclone; n++) { - struct clone *clone = &ctx->clones[n]; -@@ -1580,7 +1712,10 @@ ungrab: - continue; - - DBG(XRR, ("%s-%s: added to active list\n", -- DisplayString(clone->dst.display->dpy), clone->dst.name)); -+ DisplayString(clone->dst.display->dpy), clone->dst.name)); -+ -+ if (clone->dst.display->active++ == 0) -+ screensaver_disable(clone->dst.display); - - clone->active = ctx->active; - ctx->active = clone; -@@ -1599,14 +1734,17 @@ static Cursor display_load_invisible_cursor(struct display *display) - - static Cursor display_get_visible_cursor(struct display *display) - { -- if (display->cursor_serial != display->cursor_image.size) { -- DBG(CURSOR, ("%s updating cursor\n", DisplayString(display->dpy))); -+ struct display *first = display->ctx->display; -+ -+ if (display->cursor_serial != first->cursor_serial) { -+ DBG(CURSOR, ("%s updating cursor %dx%d, serial %d\n", -+ DisplayString(display->dpy), first->cursor_image.width, first->cursor_image.height, first->cursor_serial)); - - if (display->visible_cursor) - XFreeCursor(display->dpy, display->visible_cursor); - -- display->visible_cursor = XcursorImageLoadCursor(display->dpy, &display->cursor_image); -- display->cursor_serial = display->cursor_image.size; -+ display->visible_cursor = XcursorImageLoadCursor(display->dpy, &first->cursor_image); -+ display->cursor_serial = first->cursor_serial; - } - - return display->visible_cursor; -@@ -1629,7 +1767,7 @@ static void display_load_visible_cursor(struct display *display, XFixesCursorIma - display->cursor_image.height = cur->height; - display->cursor_image.xhot = cur->xhot; - display->cursor_image.yhot = cur->yhot; -- display->cursor_image.size++; -+ display->cursor_serial++; - - n = cur->width*cur->height; - src = cur->pixels; -@@ -1637,11 +1775,24 @@ static void display_load_visible_cursor(struct display *display, XFixesCursorIma - while (n--) - *dst++ = *src++; - -- DBG(CURSOR, ("%s marking cursor changed\n", DisplayString(display->dpy))); -- display->cursor_moved++; -- if (display->cursor != display->invisible_cursor) { -- display->cursor_visible++; -- context_enable_timer(display->ctx); -+ if (verbose & CURSOR) { -+ int x, y; -+ -+ printf("%s cursor image %dx%d, serial %d:\n", -+ DisplayString(display->dpy), -+ cur->width, cur->height, -+ display->cursor_serial); -+ dst = display->cursor_image.pixels; -+ for (y = 0; y < cur->height; y++) { -+ for (x = 0; x < cur->width; x++) { -+ if (x == cur->xhot && y == cur->yhot) -+ printf("+"); -+ else -+ printf("%c", *dst ? *dst >> 24 >= 127 ? 'x' : '.' : ' '); -+ dst++; -+ } -+ printf("\n"); -+ } - } - } - -@@ -1685,6 +1836,8 @@ static void display_flush_cursor(struct display *display) - if (cursor == None) - cursor = display->invisible_cursor; - if (cursor != display->cursor) { -+ DBG(CURSOR, ("%s setting cursor shape %lx\n", -+ DisplayString(display->dpy), (long)cursor)); - XDefineCursor(display->dpy, display->root, cursor); - display->cursor = cursor; - } -@@ -1762,6 +1915,8 @@ static void get_src(struct clone *c, const XRectangle *clip) - c->image.obdata = (char *)&c->src.shm; - - if (c->src.use_render) { -+ DBG(DRAW, ("%s-%s get_src via XRender\n", -+ DisplayString(c->dst.dpy), c->dst.name)); - XRenderComposite(c->src.dpy, PictOpSrc, - c->src.win_picture, 0, c->src.pix_picture, - clip->x, clip->y, -@@ -1782,16 +1937,22 @@ static void get_src(struct clone *c, const XRectangle *clip) - &c->image, 0, 0); - } - } else if (c->src.pixmap) { -+ DBG(DRAW, ("%s-%s get_src XCopyArea (SHM/DRI3)\n", -+ DisplayString(c->dst.dpy), c->dst.name)); - XCopyArea(c->src.dpy, c->src.window, c->src.pixmap, c->src.gc, - clip->x, clip->y, - clip->width, clip->height, - 0, 0); - XSync(c->src.dpy, False); - } else if (c->src.use_shm) { -+ DBG(DRAW, ("%s-%s get_src XShmGetImage\n", -+ DisplayString(c->dst.dpy), c->dst.name)); - ximage_prepare(&c->image, clip->width, clip->height); - XShmGetImage(c->src.dpy, c->src.window, &c->image, - clip->x, clip->y, AllPlanes); - } else { -+ DBG(DRAW, ("%s-%s get_src XGetSubImage (slow)\n", -+ DisplayString(c->dst.dpy), c->dst.name)); - ximage_prepare(&c->image, c->width, c->height); - XGetSubImage(c->src.dpy, c->src.window, - clip->x, clip->y, clip->width, clip->height, -@@ -1838,7 +1999,7 @@ static void put_dst(struct clone *c, const XRectangle *clip) - clip->width, clip->height); - c->dst.display->send |= c->dst.use_shm; - } else if (c->dst.pixmap) { -- DBG(DRAW, ("%s-%s using SHM pixmap\n", -+ DBG(DRAW, ("%s-%s using SHM or DRI3 pixmap\n", - DisplayString(c->dst.dpy), c->dst.name)); - c->dst.serial = NextRequest(c->dst.dpy); - XCopyArea(c->dst.dpy, c->dst.pixmap, c->dst.window, c->dst.gc, -@@ -1870,6 +2031,9 @@ static int clone_paint(struct clone *c) - { - XRectangle clip; - -+ if (c->width == 0 || c->height == 0) -+ return 0; -+ - DBG(DRAW, ("%s-%s paint clone, damaged (%d, %d), (%d, %d) [(%d, %d), (%d, %d)]\n", - DisplayString(c->dst.dpy), c->dst.name, - c->damaged.x1, c->damaged.y1, -@@ -1944,6 +2108,10 @@ static int clone_paint(struct clone *c) - clip.height = c->damaged.y2 - c->damaged.y1; - get_src(c, &clip); - -+ DBG(DRAW, ("%s-%s target offset %dx%d\n", -+ DisplayString(c->dst.dpy), c->dst.name, -+ c->dst.x - c->src.x, c->dst.y - c->src.y)); -+ - clip.x += c->dst.x - c->src.x; - clip.y += c->dst.y - c->src.y; - put_dst(c, &clip); -@@ -1969,8 +2137,9 @@ static void clone_damage(struct clone *c, const XRectangle *rec) - if ((v = (int)rec->y + rec->height) > c->damaged.y2) - c->damaged.y2 = v; - -- DBG(DAMAGE, ("%s-%s damaged: (%d, %d), (%d, %d)\n", -+ DBG(DAMAGE, ("%s-%s damaged: +(%d,%d)x(%d, %d) -> (%d, %d), (%d, %d)\n", - DisplayString(c->dst.display->dpy), c->dst.name, -+ rec->x, rec->y, rec->width, rec->height, - c->damaged.x1, c->damaged.y1, - c->damaged.x2, c->damaged.y2)); - } -@@ -2252,6 +2421,8 @@ static int clone_init_depth(struct clone *clone) - if (ret) - return ret; - -+ clone->depth = depth; -+ - DBG(X11, ("%s-%s using depth %d, requires xrender for src? %d, for dst? %d\n", - DisplayString(clone->dst.dpy), clone->dst.name, - clone->depth, -@@ -2312,6 +2483,8 @@ static int add_display(struct context *ctx, Display *dpy) - display->depth = DefaultDepth(dpy, DefaultScreen(dpy)); - display->visual = DefaultVisual(dpy, DefaultScreen(dpy)); - -+ XSelectInput(dpy, display->root, ExposureMask); -+ - display->has_shm = can_use_shm(dpy, display->root, - &display->shm_event, - &display->shm_opcode, -@@ -2323,6 +2496,8 @@ static int add_display(struct context *ctx, Display *dpy) - display->shm_opcode, - display->has_shm_pixmap)); - -+ screensaver_save(display); -+ - display->rr_active = XRRQueryExtension(dpy, &display->rr_event, &display->rr_error); - DBG(X11, ("%s: randr_active?=%d, event=%d, error=%d\n", - DisplayString(dpy), -@@ -2592,6 +2767,11 @@ static int last_display_add_clones__randr(struct context *ctx) - return ret; - } - -+ clone->dst.x = 0; -+ clone->dst.y = 0; -+ clone->dst.width = display->width; -+ clone->dst.height = display->height; -+ - ret = clone_update_modes__randr(clone); - if (ret) { - fprintf(stderr, "Failed to clone output \"%s\" from display \"%s\"\n", -@@ -2668,8 +2848,8 @@ static int last_display_add_clones__xinerama(struct context *ctx) - } - - /* Replace the modes on the local VIRTUAL output with the remote Screen */ -- clone->width = xi[n].width; -- clone->height = xi[n].height; -+ clone->dst.width = xi[n].width; -+ clone->dst.height = xi[n].height; - clone->dst.x = xi[n].x_org; - clone->dst.y = xi[n].y_org; - clone->dst.rr_crtc = -1; -@@ -2698,64 +2878,67 @@ static int last_display_add_clones__display(struct context *ctx) - Display *dpy = display->dpy; - struct clone *clone; - Screen *scr; -+ int count, s; - char buf[80]; - int ret; - RROutput id; - -+ count = ScreenCount(dpy); -+ DBG(X11, ("%s(%s) - %d screens\n", __func__, DisplayString(dpy), count)); -+ for (s = 0; s < count; s++) { -+ clone = add_clone(ctx); -+ if (clone == NULL) -+ return -ENOMEM; - -- DBG(X11, ("%s(%s)\n", __func__, DisplayString(dpy))); -- clone = add_clone(ctx); -- if (clone == NULL) -- return -ENOMEM; -+ clone->depth = 24; -+ clone->next = display->clone; -+ display->clone = clone; - -- clone->depth = 24; -- clone->next = display->clone; -- display->clone = clone; -+ id = claim_virtual(ctx->display, buf, ctx->nclone); -+ if (id == 0) { -+ fprintf(stderr, "Failed to find available VirtualHead \"%s\" for on display \"%s\"\n", -+ buf, DisplayString(dpy)); -+ } -+ ret = clone_output_init(clone, &clone->src, ctx->display, buf, id); -+ if (ret) { -+ fprintf(stderr, "Failed to add display \"%s\"\n", -+ DisplayString(ctx->display->dpy)); -+ return ret; -+ } - -- id = claim_virtual(ctx->display, buf, ctx->nclone); -- if (id == 0) { -- fprintf(stderr, "Failed to find available VirtualHead \"%s\" for on display \"%s\"\n", -- buf, DisplayString(dpy)); -- } -- ret = clone_output_init(clone, &clone->src, ctx->display, buf, id); -- if (ret) { -- fprintf(stderr, "Failed to add display \"%s\"\n", -- DisplayString(ctx->display->dpy)); -- return ret; -- } -+ sprintf(buf, "SCREEN%d", s); -+ ret = clone_output_init(clone, &clone->dst, display, buf, 0); -+ if (ret) { -+ fprintf(stderr, "Failed to add display \"%s\"\n", -+ DisplayString(dpy)); -+ return ret; -+ } - -- sprintf(buf, "WHOLE"); -- ret = clone_output_init(clone, &clone->dst, display, buf, 0); -- if (ret) { -- fprintf(stderr, "Failed to add display \"%s\"\n", -- DisplayString(dpy)); -- return ret; -- } -+ ret = clone_init_depth(clone); -+ if (ret) { -+ fprintf(stderr, "Failed to negotiate image format for display \"%s\"\n", -+ DisplayString(dpy)); -+ return ret; -+ } - -- ret = clone_init_depth(clone); -- if (ret) { -- fprintf(stderr, "Failed to negotiate image format for display \"%s\"\n", -- DisplayString(dpy)); -- return ret; -- } -+ /* Replace the modes on the local VIRTUAL output with the remote Screen */ -+ scr = ScreenOfDisplay(dpy, s); -+ clone->dst.width = scr->width; -+ clone->dst.height = scr->height; -+ clone->dst.x = 0; -+ clone->dst.y = 0; -+ clone->dst.rr_crtc = -1; -+ ret = clone_update_modes__fixed(clone); -+ if (ret) { -+ fprintf(stderr, "Failed to clone display \"%s\"\n", -+ DisplayString(dpy)); -+ return ret; -+ } - -- /* Replace the modes on the local VIRTUAL output with the remote Screen */ -- scr = ScreenOfDisplay(dpy, DefaultScreen(dpy)); -- clone->width = scr->width; -- clone->height = scr->height; -- clone->dst.x = 0; -- clone->dst.y = 0; -- clone->dst.rr_crtc = -1; -- ret = clone_update_modes__fixed(clone); -- if (ret) { -- fprintf(stderr, "Failed to clone display \"%s\"\n", -- DisplayString(dpy)); -- return ret; -+ clone->active = ctx->active; -+ ctx->active = clone; - } - -- clone->active = ctx->active; -- ctx->active = clone; -- - return 0; - } - -@@ -3168,6 +3351,33 @@ static void context_cleanup(struct context *ctx) - XCloseDisplay(dpy); - } - -+static void update_cursor_image(struct context *ctx) -+{ -+ XFixesCursorImage *cur; -+ int i; -+ -+ DBG(CURSOR, ("%s cursor changed\n", -+ DisplayString(ctx->display->dpy))); -+ -+ cur = XFixesGetCursorImage(ctx->display->dpy); -+ if (cur == NULL) -+ return; -+ -+ display_load_visible_cursor(&ctx->display[0], cur); -+ for (i = 1; i < ctx->ndisplay; i++) { -+ struct display *display = &ctx->display[i]; -+ -+ DBG(CURSOR, ("%s marking cursor changed\n", DisplayString(display->dpy))); -+ display->cursor_moved++; -+ if (display->cursor != display->invisible_cursor) { -+ display->cursor_visible++; -+ context_enable_timer(display->ctx); -+ } -+ } -+ -+ XFree(cur); -+} -+ - static int done; - - static void signal_handler(int sig) -@@ -3182,6 +3392,7 @@ int main(int argc, char **argv) - uint64_t count; - int daemonize = 1, bumblebee = 0, siblings = 0, singleton = 1; - int i, ret, open, fail; -+ int idle; - - signal(SIGPIPE, SIG_IGN); - -@@ -3228,6 +3439,7 @@ int main(int argc, char **argv) - return -ret; - - XSetErrorHandler(_check_error_handler); -+ XSetIOErrorHandler(_io_error_handler); - - ret = add_fd(&ctx, display_open(&ctx, src_name)); - if (ret) { -@@ -3237,6 +3449,13 @@ int main(int argc, char **argv) - goto out; - } - -+ ret = check_virtual(ctx.display); -+ if (ret) { -+ fprintf(stderr, "No VIRTUAL outputs on \"%s\".\n", -+ DisplayString(ctx.display->dpy)); -+ goto out; -+ } -+ - if (singleton) { - XSelectInput(ctx.display->dpy, ctx.display->root, PropertyChangeMask); - if (first_display_has_singleton(&ctx)) { -@@ -3291,6 +3510,11 @@ int main(int argc, char **argv) - if (ret) - goto out; - -+ if (ctx.display->saver_active) -+ XScreenSaverSelectInput(ctx.display->dpy, -+ ctx.display->root, -+ ScreenSaverNotifyMask); -+ - if ((ctx.display->rr_event | ctx.display->rr_error) == 0) { - fprintf(stderr, "RandR extension not supported by %s\n", DisplayString(ctx.display->dpy)); - ret = EINVAL; -@@ -3348,25 +3572,60 @@ int main(int argc, char **argv) - signal(SIGTERM, signal_handler); - - ctx.command_continuation = 0; -+ update_cursor_image(&ctx); -+ -+ idle = 0; - while (!done) { - XEvent e; - int reconfigure = 0; - int rr_update = 0; - -- DBG(POLL, ("polling - enable timer? %d, nfd=%d, ndisplay=%d\n", ctx.timer_active, ctx.nfd, ctx.ndisplay)); -- ret = poll(ctx.pfd + !ctx.timer_active, ctx.nfd - !ctx.timer_active, -1); -- if (ret <= 0) -- break; -+ if (idle) { -+ DBG(POLL, ("polling - enable timer? %d, nfd=%d, ndisplay=%d\n", ctx.timer_active, ctx.nfd, ctx.ndisplay)); -+ ret = poll(ctx.pfd + !ctx.timer_active, ctx.nfd - !ctx.timer_active, -1); -+ if (ret <= 0) -+ break; -+ -+ DBG(POLL, ("poll reports %d fd awake\n", ret)); -+ } -+ idle = 1; - - /* pfd[0] is the timer, pfd[1] is the local display, pfd[2] is the mouse, pfd[3+] are the remotes */ - -- DBG(POLL, ("poll reports %d fd awake\n", ret)); - if (ctx.pfd[1].revents || XPending(ctx.display[0].dpy)) { - DBG(POLL,("%s woken up\n", DisplayString(ctx.display[0].dpy))); -+ ctx.pfd[1].revents = 0; -+ idle = 0; -+ - do { - XNextEvent(ctx.display->dpy, &e); - -- if (e.type == ctx.display->damage_event + XDamageNotify ) { -+ DBG(POLL, ("%s received event %d\n", DisplayString(ctx.display[0].dpy), e.type)); -+ -+ if (e.type == ctx.display->saver_event + ScreenSaverNotify) { -+ const XScreenSaverNotifyEvent *se = (const XScreenSaverNotifyEvent *)&e; -+ DBG(SCREEN, -+ ("%s screen saver: state=%d, kind=%d, forced=%d\n", -+ DisplayString(ctx.display->dpy), -+ se->state, se->kind, se->forced)); -+ for (i = 1; i < ctx.ndisplay; i++) { -+ struct display *display = &ctx.display[i]; -+ -+ if (!display->active) -+ continue; -+ -+ DBG(SCREEN, -+ ("%s %s screen saver\n", -+ DisplayString(display->dpy), -+ se->state == ScreenSaverOn ? "activating" : "resetting\n")); -+ -+ if (se->state == ScreenSaverOn) -+ XActivateScreenSaver(display->dpy); -+ else -+ XResetScreenSaver(display->dpy); -+ XFlush(display->dpy); -+ } -+ } else if (e.type == ctx.display->damage_event + XDamageNotify) { - const XDamageNotifyEvent *de = (const XDamageNotifyEvent *)&e; - struct clone *clone; - -@@ -3380,19 +3639,7 @@ int main(int argc, char **argv) - if (ctx.active) - context_enable_timer(&ctx); - } else if (e.type == ctx.display->xfixes_event + XFixesCursorNotify) { -- XFixesCursorImage *cur; -- -- DBG(CURSOR, ("%s cursor changed\n", -- DisplayString(ctx.display->dpy))); -- -- cur = XFixesGetCursorImage(ctx.display->dpy); -- if (cur == NULL) -- continue; -- -- for (i = 1; i < ctx.ndisplay; i++) -- display_load_visible_cursor(&ctx.display[i], cur); -- -- XFree(cur); -+ update_cursor_image(&ctx); - } else if (e.type == ctx.display->rr_event + RRScreenChangeNotify) { - DBG(XRR, ("%s screen changed (reconfigure pending? %d)\n", - DisplayString(ctx.display->dpy), reconfigure)); -@@ -3426,13 +3673,41 @@ int main(int argc, char **argv) - if (ctx.pfd[i+2].revents == 0 && !XPending(ctx.display[i].dpy)) - continue; - -+ ctx.pfd[i+2].revents = 0; -+ idle = 0; -+ - DBG(POLL, ("%s woken up\n", DisplayString(ctx.display[i].dpy))); - do { - XNextEvent(ctx.display[i].dpy, &e); - - DBG(POLL, ("%s received event %d\n", DisplayString(ctx.display[i].dpy), e.type)); -- if (ctx.display[i].rr_active && e.type == ctx.display[i].rr_event + RRNotify) { -- XRRNotifyEvent *re = (XRRNotifyEvent *)&e; -+ if (e.type == Expose) { -+ const XExposeEvent *xe = (XExposeEvent *)&e; -+ struct clone *clone; -+ int damaged = 0; -+ -+ DBG(DAMAGE, ("%s exposed: (%d, %d)x(%d, %d)\n", -+ DisplayString(ctx.display[i].dpy), -+ xe->x, xe->y, xe->width, xe->height)); -+ -+ for (clone = ctx.active; clone; clone = clone->active) { -+ XRectangle r; -+ -+ if (clone->dst.display != &ctx.display[i]) -+ continue; -+ -+ r.x = clone->src.x + xe->x; -+ r.y = clone->src.y + xe->y; -+ r.width = xe->width; -+ r.height = xe->height; -+ clone_damage(clone, &r); -+ damaged++; -+ } -+ -+ if (damaged) -+ context_enable_timer(&ctx); -+ } else if (ctx.display[i].rr_active && e.type == ctx.display[i].rr_event + RRNotify) { -+ const XRRNotifyEvent *re = (XRRNotifyEvent *)&e; - - DBG(XRR, ("%s received RRNotify, type %d\n", DisplayString(ctx.display[i].dpy), re->subtype)); - if (re->subtype == RRNotify_OutputChange) { -@@ -3480,6 +3755,7 @@ int main(int argc, char **argv) - - DBG(TIMER, ("%s timer still active? %d\n", DisplayString(ctx.display->dpy), ret != 0)); - ctx.timer_active = ret != 0; -+ idle = 0; - } - } - diff --git a/xorg/xf86-video-intel/smbuild b/xorg/xf86-video-intel/smbuild new file mode 100755 index 0000000..cd9e162 --- /dev/null +++ b/xorg/xf86-video-intel/smbuild @@ -0,0 +1,38 @@ +# Maintainer: PktSurf +app=xf86-video-intel +version="20201215" +build=1sml +homepage="https://xorg.freedesktop.org/" +download="https://xorg.freedesktop.org/releases/individual/driver/xf86-video-intel-$version.tar.bz2" +desc="X.org Intel i810/i830/i915/945G/G965+ video drivers" +requires="pixman libxv libxinerama libxtst libxrandr libxcursor xorg-server" +noautoconfsite=1 + +prepbuilddir() { + mkandenterbuilddir + rm -rf $app-$version + + tar xf $srcdir/$app-$version.tar.?z* + cd $app-$version + fixbuilddirpermissions +} + +build() { + ./autogen.sh \ + --prefix=/usr \ + --enable-xvmc \ + --disable-dga \ + --with-default-dri=3 \ + --disable-selective-werror + + make + make install DESTDIR=$pkg + + cp COPYING $pkgdocs/ + + mkfinalpkg +} + +sha512sums=" +92fb542379cbd1cd94c81ec1a916ae44b44d9bb7e52508d863d362e25af0136cfd7c6bd929d5125d8d6acec658cc4bb54b975193fd34170a811819c8e228eb89 xf86-video-intel-20201215.tar.lz +" \ No newline at end of file