optimize a bit more conversions to/from Skia bitmap formats

It turns out this doesn't really matter in practice, since if
converting between pixel formats is where time is spent, something
higher must be already wrong. But since I've already written this...

Change-Id: I25451664d529a9226d2d81b2c424a4f4e5422ad5
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/99577
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
This commit is contained in:
Luboš Luňák 2020-07-28 10:37:16 +02:00
parent df56a000a1
commit 7a38f1817b
10 changed files with 269 additions and 60 deletions

View File

@ -91,6 +91,11 @@ $(eval $(call gb_Library_set_include,skia,\
$(eval $(call gb_Library_add_exception_objects,skia,\
external/skia/source/SkMemory_malloc \
external/skia/source/skia_compiler \
external/skia/source/skia_opts \
))
$(eval $(call gb_Library_add_exception_objects,skia,\
external/skia/source/skia_opts_ssse3, $(CXXFLAGS_INTRINSICS_SSSE3) $(CLANG_CXXFLAGS_INTRINSICS_SSSE3) \
))
$(eval $(call gb_Library_set_generated_cxx_suffix,skia,cpp))

View File

@ -34,7 +34,6 @@ skia_patches := \
windows-force-unicode-api.patch.0 \
operator-eq-bool.patch.1 \
fix-without-gl.patch.0 \
extend-rgb-to-rgba.patch.0 \
windows-typeface-directwrite.patch.0 \
windows-raster-surface-no-copies.patch.1 \
fix-windows-dwrite.patch.1 \

View File

@ -1,29 +0,0 @@
diff --git a/include/core/SkSwizzle.h b/include/core/SkSwizzle.h
index 61e93b2da7..9a26f0f492 100644
--- ./include/core/SkSwizzle.h
+++ ./include/core/SkSwizzle.h
@@ -16,4 +16,8 @@
*/
SK_API void SkSwapRB(uint32_t* dest, const uint32_t* src, int count);
+SK_API void SkExtendRGBToRGBA(uint32_t* dest, const uint8_t* src, int count);
+
+SK_API void SkExtendGrayToRGBA(uint32_t* dest, const uint8_t* src, int count);
+
#endif
diff --git a/src/core/SkSwizzle.cpp b/src/core/SkSwizzle.cpp
index 301b0184f1..382323695f 100644
--- ./src/core/SkSwizzle.cpp
+++ ./src/core/SkSwizzle.cpp
@@ -12,3 +12,11 @@
void SkSwapRB(uint32_t* dest, const uint32_t* src, int count) {
SkOpts::RGBA_to_BGRA(dest, src, count);
}
+
+void SkExtendRGBToRGBA(uint32_t* dest, const uint8_t* src, int count) {
+ SkOpts::RGB_to_RGB1(dest, src, count);
+}
+
+void SkExtendGrayToRGBA(uint32_t* dest, const uint8_t* src, int count) {
+ SkOpts::gray_to_RGB1(dest, src, count);
+}

28
external/skia/inc/skia_opts.hxx vendored Normal file
View File

@ -0,0 +1,28 @@
/*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKIA_OPTS_H
#define SKIA_OPTS_H
#include <include/core/SkTypes.h>
SK_API void SkConvertRGBToRGBA(uint32_t* dest, const uint8_t* src, int count);
SK_API void SkConvertGrayToRGBA(uint32_t* dest, const uint8_t* src, int count);
SK_API void SkConvertRGBAToRGB(uint8_t* dest, const uint32_t* src, int count);
SK_API void SkConvertRGBAToGrayFast(uint8_t* dest, const uint32_t* src, int count);
namespace SkLoOpts
{
SK_API void Init();
typedef void (*Swizzle_u8_8888)(uint8_t*, const uint32_t*, int);
extern Swizzle_u8_8888 RGB1_to_RGB, // i.e. remove an (opaque) alpha
RGB1_to_gray_fast; // i.e. copy one channel to the result
}
#endif

75
external/skia/source/skia_opts.cxx vendored Normal file
View File

@ -0,0 +1,75 @@
/*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <skia_opts.hxx>
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
#include "src/core/SkCpu.h"
#include "src/core/SkOpts.h"
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif
void SkConvertRGBToRGBA(uint32_t* dest, const uint8_t* src, int count)
{
SkOpts::RGB_to_RGB1(dest, src, count);
}
void SkConvertGrayToRGBA(uint32_t* dest, const uint8_t* src, int count)
{
SkOpts::gray_to_RGB1(dest, src, count);
}
void SkConvertRGBAToRGB(uint8_t* dest, const uint32_t* src, int count)
{
SkLoOpts::RGB1_to_RGB(dest, src, count);
}
void SkConvertRGBAToGrayFast(uint8_t* dest, const uint32_t* src, int count)
{
SkLoOpts::RGB1_to_gray_fast(dest, src, count);
}
// The rest is mostly based on Skia's SkOpts.cpp, reduced to only SSSE3 so far.
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
#define SK_OPTS_NS ssse3
#else
#define SK_OPTS_NS portable
#endif
#include "skia_opts_internal.hxx"
namespace SkLoOpts {
// Define default function pointer values here...
// If our global compile options are set high enough, these defaults might even be
// CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
// They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
#define DEFINE_DEFAULT(name) decltype(name) name = SK_OPTS_NS::name
DEFINE_DEFAULT(RGB1_to_RGB);
DEFINE_DEFAULT(RGB1_to_gray_fast);
#undef DEFINE_DEFAULT
// Each Init_foo() is defined in its own file.
void Init_ssse3();
static void init() {
#if !defined(SK_BUILD_NO_OPTS)
#if defined(SK_CPU_X86)
#if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSSE3
if (SkCpu::Supports(SkCpu::SSSE3)) { Init_ssse3(); }
#endif
#endif
#endif
}
void Init() {
static SkOnce once;
once(init);
}
} // namespace SkLoOpts

View File

@ -0,0 +1,83 @@
/*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKIA_OPTS_INTERNAL_H
#define SKIA_OPTS_INTERNAL_H
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
#include <immintrin.h>
#endif
namespace SK_OPTS_NS {
static void RGB1_to_RGB_portable(uint8_t dst[], const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
dst[0] = src[i] >> 0;
dst[1] = src[i] >> 8;
dst[2] = src[i] >> 16;
dst += 3;
}
}
static void RGB1_to_gray_fast_portable(uint8_t dst[], const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
dst[i] = src[i] & 0xFF;
}
}
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
inline void RGB1_to_RGB(uint8_t dst[], const uint32_t* src, int count) {
const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
__m128i pack = _mm_setr_epi8(0,1,2, 4,5,6, 8,9,10, 12,13,14, X,X,X,X);
// Storing 4 pixels should store 12 bytes, but here it stores 16, so test count >= 6
// in order to not overrun the output buffer.
while (count >= 6) {
__m128i rgba = _mm_loadu_si128((const __m128i*) src);
__m128i rgb = _mm_shuffle_epi8(rgba, pack);
// Store 4 pixels.
_mm_storeu_si128((__m128i*) dst, rgb);
src += 4*4;
dst += 4*3;
count -= 4;
}
RGB1_to_RGB_portable(dst, src, count);
}
inline void RGB1_to_gray_fast(uint8_t dst[], const uint32_t* src, int count) {
const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
__m128i pack = _mm_setr_epi8(0,4,8,12, X,X,X,X,X,X,X,X,X,X,X,X);
// Storing 4 pixels should store 4 bytes, but here it stores 16, so test count >= 16
// in order to not overrun the output buffer.
while (count >= 16) {
__m128i rgba = _mm_loadu_si128((const __m128i*) src);
__m128i rgb = _mm_shuffle_epi8(rgba, pack);
// Store 4 pixels.
_mm_storeu_si128((__m128i*) dst, rgb);
src += 4*4;
dst += 4;
count -= 4;
}
RGB1_to_gray_fast_portable(dst, src, count);
}
#else
inline void RGB1_to_RGB(uint8_t dst[], const uint32_t* src, int count) {
RGB1_to_RGB_portable(dst, src, count);
}
inline void RGB1_to_gray_fast(uint8_t dst[], const uint32_t* src, int count) {
RGB1_to_gray_fast_portable(dst, src, count);
}
#endif
} // namespace
#endif

View File

@ -0,0 +1,17 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <skia_opts.hxx>
#define SK_OPTS_NS ssse3
#include "skia_opts_internal.hxx"
namespace SkLoOpts {
void Init_ssse3() {
RGB1_to_RGB = ssse3::RGB1_to_RGB;
RGB1_to_gray_fast = ssse3::RGB1_to_gray_fast;
}
}

View File

@ -4383,6 +4383,10 @@ external/redland/raptor/raptor_config.h
external/redland/rasqal/rasqal.h
external/redland/redland/librdf.h
external/sane/inc/sane/sane.h
external/skia/inc/skia_opts.hxx
external/skia/source/skia_opts.cxx
external/skia/source/skia_opts_internal.hxx
external/skia/source/skia_opts_ssse3.cxx
external/unixODBC/inc/odbc/sql.h
external/unixODBC/inc/odbc/sqlext.h
external/unixODBC/inc/odbc/sqltypes.h

View File

@ -38,6 +38,7 @@ bool isVCLSkiaEnabled() { return false; }
#include <SkSurface.h>
#include <SkGraphics.h>
#include <skia_compiler.hxx>
#include <skia_opts.hxx>
#ifdef DBG_UTIL
#include <fstream>
@ -239,6 +240,7 @@ bool isVCLSkiaEnabled()
{
bRet = true;
SkGraphics::Init();
SkLoOpts::Init();
// don't actually block if denylisted, but log it if enabled, and also get the vendor id
checkDeviceDenylisted(true);
}

View File

@ -37,6 +37,7 @@
#include <SkSwizzle.h>
#include <SkColorFilter.h>
#include <SkColorMatrix.h>
#include <skia_opts.hxx>
#include <skia/utils.hxx>
#include <skia/zone.hxx>
@ -450,7 +451,7 @@ SkBitmap SkiaSalBitmap::GetAsSkBitmap() const
#endif
if (!bitmap.installPixels(
SkImageInfo::MakeS32(mPixelsSize.Width(), mPixelsSize.Height(), alphaType),
data.release(), mPixelsSize.Width() * 4,
data.release(), mScanlineSize,
[](void* addr, void*) { delete[] static_cast<sal_uInt8*>(addr); }, nullptr))
abort();
bitmap.setImmutable();
@ -461,13 +462,18 @@ SkBitmap SkiaSalBitmap::GetAsSkBitmap() const
std::unique_ptr<uint32_t[]> data(
new uint32_t[mPixelsSize.Height() * mPixelsSize.Width()]);
uint32_t* dest = data.get();
for (long y = 0; y < mPixelsSize.Height(); ++y)
// SkConvertRGBToRGBA() also works as BGR to BGRA (the function extends 3 bytes to 4
// by adding 0xFF alpha, so position of B and R doesn't matter).
if (mPixelsSize.Width() * 3 == mScanlineSize)
SkConvertRGBToRGBA(dest, mBuffer.get(), mPixelsSize.Height() * mPixelsSize.Width());
else
{
const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
// This also works as BGR to BGRA (the function extends 3 bytes to 4
// by adding 0xFF alpha, so position of B and R doesn't matter).
SkExtendRGBToRGBA(dest, src, mPixelsSize.Width());
dest += mPixelsSize.Width();
for (long y = 0; y < mPixelsSize.Height(); ++y)
{
const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
SkConvertRGBToRGBA(dest, src, mPixelsSize.Width());
dest += mPixelsSize.Width();
}
}
if (!bitmap.installPixels(
SkImageInfo::MakeS32(mPixelsSize.Width(), mPixelsSize.Height(),
@ -486,11 +492,17 @@ SkBitmap SkiaSalBitmap::GetAsSkBitmap() const
std::unique_ptr<uint32_t[]> data(
new uint32_t[mPixelsSize.Height() * mPixelsSize.Width()]);
uint32_t* dest = data.get();
for (long y = 0; y < mPixelsSize.Height(); ++y)
if (mPixelsSize.Width() * 1 == mScanlineSize)
SkConvertGrayToRGBA(dest, mBuffer.get(),
mPixelsSize.Height() * mPixelsSize.Width());
else
{
const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
SkExtendGrayToRGBA(dest, src, mPixelsSize.Width());
dest += mPixelsSize.Width();
for (long y = 0; y < mPixelsSize.Height(); ++y)
{
const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
SkConvertGrayToRGBA(dest, src, mPixelsSize.Width());
dest += mPixelsSize.Width();
}
}
if (!bitmap.installPixels(
SkImageInfo::MakeS32(mPixelsSize.Width(), mPixelsSize.Height(),
@ -826,37 +838,50 @@ void SkiaSalBitmap::EnsureBitmapData()
assert(mBuffer != nullptr);
if (mBitCount == 32)
{
for (long y = 0; y < mSize.Height(); ++y)
if (int(bitmap.rowBytes()) == mScanlineSize)
memcpy(mBuffer.get(), bitmap.getPixels(), mSize.Height() * mScanlineSize);
else
{
const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
memcpy(dest, src, mScanlineSize);
for (long y = 0; y < mSize.Height(); ++y)
{
const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
memcpy(dest, src, mScanlineSize);
}
}
}
else if (mBitCount == 24) // non-paletted
{
for (long y = 0; y < mSize.Height(); ++y)
if (int(bitmap.rowBytes()) == mSize.Width() * 4 && mSize.Width() * 3 == mScanlineSize)
{
const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
for (long x = 0; x < mSize.Width(); ++x)
SkConvertRGBAToRGB(mBuffer.get(), bitmap.getAddr32(0, 0),
mSize.Height() * mSize.Width());
}
else
{
for (long y = 0; y < mSize.Height(); ++y)
{
*dest++ = *src++;
*dest++ = *src++;
*dest++ = *src++;
++src; // skip alpha
const uint32_t* src = bitmap.getAddr32(0, y);
sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
SkConvertRGBAToRGB(dest, src, mSize.Width());
}
}
}
else if (mBitCount == 8 && mPalette.IsGreyPalette8Bit())
{
for (long y = 0; y < mSize.Height(); ++y)
{ // no actual data conversion, use one color channel as the gray value
if (int(bitmap.rowBytes()) == mSize.Width() * 4 && mSize.Width() * 1 == mScanlineSize)
{
const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
// no actual data conversion, use one color channel as the gray value
for (long x = 0; x < mSize.Width(); ++x)
dest[x] = src[x * 4];
SkConvertRGBAToGrayFast(mBuffer.get(), bitmap.getAddr32(0, 0),
mSize.Height() * mSize.Width());
}
else
{
for (long y = 0; y < mSize.Height(); ++y)
{
const uint32_t* src = bitmap.getAddr32(0, y);
sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
SkConvertRGBAToGrayFast(dest, src, mSize.Width());
}
}
}
else