Commit e90a4ea5 authored by Chris Wilson's avatar Chris Wilson Committed by Dave Airlie

drm/udl: Inline memcmp() for RLE compression of xfer

As we use a variable length the compiler does not realise that it is a
fixed value of either 2 or 4 bytes. Instead of performing the inline
comparison itself, the compiler inserts a function call to the generic
memcmp routine which is optimised for long comparisons of variable
length. That turns out to be quite expensive...
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
parent bcb39af4
...@@ -75,15 +75,19 @@ static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes) ...@@ -75,15 +75,19 @@ static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes)
} }
#endif #endif
static inline u16 pixel32_to_be16p(const uint8_t *pixel) static inline u16 pixel32_to_be16(const uint32_t pixel)
{ {
uint32_t pix = *(uint32_t *)pixel; return (((pixel >> 3) & 0x001f) |
u16 retval; ((pixel >> 5) & 0x07e0) |
((pixel >> 8) & 0xf800));
}
retval = (((pix >> 3) & 0x001f) | static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
((pix >> 5) & 0x07e0) | {
((pix >> 8) & 0xf800)); if (bpp == 2)
return retval; return *(const uint16_t *)pixel == repeat;
else
return *(const uint32_t *)pixel == repeat;
} }
/* /*
...@@ -152,29 +156,33 @@ static void udl_compress_hline16( ...@@ -152,29 +156,33 @@ static void udl_compress_hline16(
prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
while (pixel < cmd_pixel_end) { while (pixel < cmd_pixel_end) {
const u8 * const repeating_pixel = pixel; const u8 *const start = pixel;
u32 repeating_pixel;
if (bpp == 2)
*(uint16_t *)cmd = cpu_to_be16p((uint16_t *)pixel); if (bpp == 2) {
else if (bpp == 4) repeating_pixel = *(uint16_t *)pixel;
*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16p(pixel)); *(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
} else {
repeating_pixel = *(uint32_t *)pixel;
*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
}
cmd += 2; cmd += 2;
pixel += bpp; pixel += bpp;
if (unlikely((pixel < cmd_pixel_end) && if (unlikely((pixel < cmd_pixel_end) &&
(!memcmp(pixel, repeating_pixel, bpp)))) { (pixel_repeats(pixel, repeating_pixel, bpp)))) {
/* go back and fill in raw pixel count */ /* go back and fill in raw pixel count */
*raw_pixels_count_byte = (((repeating_pixel - *raw_pixels_count_byte = (((start -
raw_pixel_start) / bpp) + 1) & 0xFF; raw_pixel_start) / bpp) + 1) & 0xFF;
while ((pixel < cmd_pixel_end) while ((pixel < cmd_pixel_end) &&
&& (!memcmp(pixel, repeating_pixel, bpp))) { (pixel_repeats(pixel, repeating_pixel, bpp))) {
pixel += bpp; pixel += bpp;
} }
/* immediately after raw data is repeat byte */ /* immediately after raw data is repeat byte */
*cmd++ = (((pixel - repeating_pixel) / bpp) - 1) & 0xFF; *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
/* Then start another raw pixel span */ /* Then start another raw pixel span */
raw_pixel_start = pixel; raw_pixel_start = pixel;
...@@ -223,6 +231,8 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, ...@@ -223,6 +231,8 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
u8 *cmd = *urb_buf_ptr; u8 *cmd = *urb_buf_ptr;
u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;
BUG_ON(!(bpp == 2 || bpp == 4));
line_start = (u8 *) (front + byte_offset); line_start = (u8 *) (front + byte_offset);
next_pixel = line_start; next_pixel = line_start;
line_end = next_pixel + byte_width; line_end = next_pixel + byte_width;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment