From 52683adedd3e7d1b80686b23902c989db6c37f9f Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 18 Feb 2023 22:36:38 +0200 Subject: [PATCH] video_core: Rewrite custom textures --- .gitmodules | 34 +- externals/CMakeLists.txt | 3 + externals/glad/include/glad/glad.h | 305 +- externals/glad/src/glad.c | 376 +- externals/libspng | 1 - externals/libspng/CMakeLists.txt | 14 + externals/libspng/spng.c | 6979 +++++++++++++++++ externals/libspng/spng.h | 537 ++ externals/zlib-ng/CMakeLists.txt | 14 + externals/zlib-ng/zlib-ng | 1 + src/android/app/src/main/jni/CMakeLists.txt | 4 +- .../src/main/jni/lodepng_image_interface.cpp | 29 - .../src/main/jni/lodepng_image_interface.h | 14 - src/android/app/src/main/jni/native.cpp | 4 - src/citra_qt/main.cpp | 2 + src/common/CMakeLists.txt | 6 +- src/common/async_handle.h | 16 +- src/common/bit_util.h | 6 +- src/common/dds-ktx.h | 1465 ++++ src/common/dds.h | 111 - src/common/image_util.cpp | 97 +- src/common/image_util.h | 23 +- src/common/scratch_buffer.h | 19 +- src/core/CMakeLists.txt | 2 - src/core/core.cpp | 28 +- src/core/core.h | 26 +- src/core/custom_tex_cache.cpp | 109 - src/core/custom_tex_cache.h | 55 - src/video_core/CMakeLists.txt | 4 +- .../rasterizer_cache/custom_tex_manager.cpp | 251 + .../rasterizer_cache/custom_tex_manager.h | 79 + .../rasterizer_cache/hires_replacer.cpp | 66 - .../rasterizer_cache/hires_replacer.h | 29 - .../rasterizer_cache/pixel_format.cpp | 17 + .../rasterizer_cache/pixel_format.h | 11 + .../rasterizer_cache/rasterizer_cache.h | 101 +- .../rasterizer_cache/rasterizer_cache_base.h | 30 +- .../rasterizer_cache/surface_base.h | 22 +- src/video_core/rasterizer_cache/utils.h | 23 +- src/video_core/renderer_opengl/gl_driver.cpp | 18 + src/video_core/renderer_opengl/gl_driver.h | 10 + .../renderer_opengl/gl_rasterizer.cpp | 9 +- .../renderer_opengl/gl_rasterizer.h | 9 +- .../renderer_opengl/gl_texture_runtime.cpp | 141 +- .../renderer_opengl/gl_texture_runtime.h | 84 +- .../renderer_opengl/renderer_opengl.cpp | 11 +- .../renderer_opengl/renderer_opengl.h | 7 +- .../renderer_vulkan/renderer_vulkan.cpp | 13 +- .../renderer_vulkan/renderer_vulkan.h | 6 +- .../renderer_vulkan/vk_instance.cpp | 65 +- src/video_core/renderer_vulkan/vk_instance.h | 7 + .../renderer_vulkan/vk_master_semaphore.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 24 +- .../renderer_vulkan/vk_rasterizer.cpp | 16 +- .../renderer_vulkan/vk_rasterizer.h | 13 +- .../renderer_vulkan/vk_texture_runtime.cpp | 272 +- .../renderer_vulkan/vk_texture_runtime.h | 45 +- src/video_core/video_core.cpp | 9 +- src/video_core/video_core.h | 6 +- 59 files changed, 10683 insertions(+), 997 deletions(-) delete mode 160000 externals/libspng create mode 100644 externals/libspng/CMakeLists.txt create mode 100644 externals/libspng/spng.c create mode 100644 externals/libspng/spng.h create mode 100644 externals/zlib-ng/CMakeLists.txt create mode 160000 externals/zlib-ng/zlib-ng delete mode 100644 src/android/app/src/main/jni/lodepng_image_interface.cpp delete mode 100644 src/android/app/src/main/jni/lodepng_image_interface.h create mode 100644 src/common/dds-ktx.h delete mode 100644 src/common/dds.h delete mode 100644 src/core/custom_tex_cache.cpp delete mode 100644 src/core/custom_tex_cache.h create mode 100644 src/video_core/rasterizer_cache/custom_tex_manager.cpp create mode 100644 src/video_core/rasterizer_cache/custom_tex_manager.h delete mode 100644 src/video_core/rasterizer_cache/hires_replacer.cpp delete mode 100644 src/video_core/rasterizer_cache/hires_replacer.h diff --git a/.gitmodules b/.gitmodules index 8e4c3f1af..7e629b2db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,26 +50,26 @@ path = externals/libyuv url = https://github.com/lemenkov/libyuv.git [submodule "sdl2"] - path = externals/sdl2/SDL - url = https://github.com/libsdl-org/SDL + path = externals/sdl2/SDL + url = https://github.com/libsdl-org/SDL [submodule "cryptopp-cmake"] - path = externals/cryptopp-cmake - url = https://github.com/abdes/cryptopp-cmake.git + path = externals/cryptopp-cmake + url = https://github.com/abdes/cryptopp-cmake.git [submodule "cryptopp"] - path = externals/cryptopp - url = https://github.com/weidai11/cryptopp.git + path = externals/cryptopp + url = https://github.com/weidai11/cryptopp.git [submodule "vulkan-headers"] - path = externals/vulkan-headers - url = https://github.com/KhronosGroup/Vulkan-Headers + path = externals/vulkan-headers + url = https://github.com/KhronosGroup/Vulkan-Headers [submodule "glslang"] - path = externals/glslang - url = https://github.com/KhronosGroup/glslang + path = externals/glslang + url = https://github.com/KhronosGroup/glslang [submodule "glm"] - path = externals/glm - url = https://github.com/g-truc/glm + path = externals/glm + url = https://github.com/g-truc/glm [submodule "sirit"] - path = externals/sirit - url = https://github.com/GPUCode/sirit -[submodule "externals/libspng"] - path = externals/libspng - url = https://github.com/randy408/libspng + path = externals/sirit + url = https://github.com/GPUCode/sirit +[submodule "zlib-ng"] + path = externals/zlib-ng/zlib-ng + url = https://github.com/zlib-ng/zlib-ng diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 356a13e17..d507d78ac 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -120,6 +120,9 @@ if (ENABLE_SDL2 AND NOT USE_SYSTEM_SDL2) add_subdirectory(sdl2) endif() +# Zlib +add_subdirectory(zlib-ng) + # Zstandard set(ZSTD_LEGACY_SUPPORT OFF) set(ZSTD_BUILD_PROGRAMS OFF) diff --git a/externals/glad/include/glad/glad.h b/externals/glad/include/glad/glad.h index 4a6560f49..346f4cd89 100644 --- a/externals/glad/include/glad/glad.h +++ b/externals/glad/include/glad/glad.h @@ -1,25 +1,27 @@ /* - OpenGL, OpenGL ES loader generated by glad 0.1.36 on Fri Sep 9 09:22:43 2022. + OpenGL, OpenGL ES loader generated by glad 0.1.36 on Tue Feb 21 16:13:52 2023. Language/Generator: C/C++ Specification: gl - APIs: gl=4.4, gles2=3.2 + APIs: gl=4.6, gles2=3.2 Profile: core Extensions: GL_ARB_buffer_storage, GL_ARB_direct_state_access, + GL_ARB_texture_compression_bptc, GL_EXT_buffer_storage, - GL_EXT_clip_cull_distance + GL_EXT_clip_cull_distance, + GL_EXT_texture_compression_s3tc Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="core" --api="gl=4.4,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_direct_state_access,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance" + --profile="core" --api="gl=4.6,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_direct_state_access,GL_ARB_texture_compression_bptc,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_texture_compression_s3tc" Online: - https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.4&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_direct_state_access&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance + https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.6&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_direct_state_access&extensions=GL_ARB_texture_compression_bptc&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_texture_compression_s3tc */ @@ -1472,6 +1474,62 @@ typedef void (APIENTRY *GLVULKANPROCNV)(void); #define GL_QUERY_BUFFER_BINDING 0x9193 #define GL_QUERY_RESULT_NO_WAIT 0x9194 #define GL_MIRROR_CLAMP_TO_EDGE 0x8743 +#define GL_CONTEXT_LOST 0x0507 +#define GL_NEGATIVE_ONE_TO_ONE 0x935E +#define GL_ZERO_TO_ONE 0x935F +#define GL_CLIP_ORIGIN 0x935C +#define GL_CLIP_DEPTH_MODE 0x935D +#define GL_QUERY_WAIT_INVERTED 0x8E17 +#define GL_QUERY_NO_WAIT_INVERTED 0x8E18 +#define GL_QUERY_BY_REGION_WAIT_INVERTED 0x8E19 +#define GL_QUERY_BY_REGION_NO_WAIT_INVERTED 0x8E1A +#define GL_MAX_CULL_DISTANCES 0x82F9 +#define GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES 0x82FA +#define GL_TEXTURE_TARGET 0x1006 +#define GL_QUERY_TARGET 0x82EA +#define GL_GUILTY_CONTEXT_RESET 0x8253 +#define GL_INNOCENT_CONTEXT_RESET 0x8254 +#define GL_UNKNOWN_CONTEXT_RESET 0x8255 +#define GL_RESET_NOTIFICATION_STRATEGY 0x8256 +#define GL_LOSE_CONTEXT_ON_RESET 0x8252 +#define GL_NO_RESET_NOTIFICATION 0x8261 +#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004 +#define GL_COLOR_TABLE 0x80D0 +#define GL_POST_CONVOLUTION_COLOR_TABLE 0x80D1 +#define GL_POST_COLOR_MATRIX_COLOR_TABLE 0x80D2 +#define GL_PROXY_COLOR_TABLE 0x80D3 +#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE 0x80D4 +#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE 0x80D5 +#define GL_CONVOLUTION_1D 0x8010 +#define GL_CONVOLUTION_2D 0x8011 +#define GL_SEPARABLE_2D 0x8012 +#define GL_HISTOGRAM 0x8024 +#define GL_PROXY_HISTOGRAM 0x8025 +#define GL_MINMAX 0x802E +#define GL_CONTEXT_RELEASE_BEHAVIOR 0x82FB +#define GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH 0x82FC +#define GL_SHADER_BINARY_FORMAT_SPIR_V 0x9551 +#define GL_SPIR_V_BINARY 0x9552 +#define GL_PARAMETER_BUFFER 0x80EE +#define GL_PARAMETER_BUFFER_BINDING 0x80EF +#define GL_CONTEXT_FLAG_NO_ERROR_BIT 0x00000008 +#define GL_VERTICES_SUBMITTED 0x82EE +#define GL_PRIMITIVES_SUBMITTED 0x82EF +#define GL_VERTEX_SHADER_INVOCATIONS 0x82F0 +#define GL_TESS_CONTROL_SHADER_PATCHES 0x82F1 +#define GL_TESS_EVALUATION_SHADER_INVOCATIONS 0x82F2 +#define GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED 0x82F3 +#define GL_FRAGMENT_SHADER_INVOCATIONS 0x82F4 +#define GL_COMPUTE_SHADER_INVOCATIONS 0x82F5 +#define GL_CLIPPING_INPUT_PRIMITIVES 0x82F6 +#define GL_CLIPPING_OUTPUT_PRIMITIVES 0x82F7 +#define GL_POLYGON_OFFSET_CLAMP 0x8E1B +#define GL_SPIR_V_EXTENSIONS 0x9553 +#define GL_NUM_SPIR_V_EXTENSIONS 0x9554 +#define GL_TEXTURE_MAX_ANISOTROPY 0x84FE +#define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF +#define GL_TRANSFORM_FEEDBACK_OVERFLOW 0x82EC +#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW 0x82ED #define GL_ALIASED_POINT_SIZE_RANGE 0x846D #define GL_RED_BITS 0x0D52 #define GL_GREEN_BITS 0x0D53 @@ -1501,14 +1559,6 @@ typedef void (APIENTRY *GLVULKANPROCNV)(void); #define GL_HSL_COLOR 0x92AF #define GL_HSL_LUMINOSITY 0x92B0 #define GL_PRIMITIVE_BOUNDING_BOX 0x92BE -#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004 -#define GL_LOSE_CONTEXT_ON_RESET 0x8252 -#define GL_GUILTY_CONTEXT_RESET 0x8253 -#define GL_INNOCENT_CONTEXT_RESET 0x8254 -#define GL_UNKNOWN_CONTEXT_RESET 0x8255 -#define GL_RESET_NOTIFICATION_STRATEGY 0x8256 -#define GL_NO_RESET_NOTIFICATION 0x8261 -#define GL_CONTEXT_LOST 0x0507 #define GL_COMPRESSED_RGBA_ASTC_4x4 0x93B0 #define GL_COMPRESSED_RGBA_ASTC_5x4 0x93B1 #define GL_COMPRESSED_RGBA_ASTC_5x5 0x93B2 @@ -3324,73 +3374,12 @@ typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC)(GLuint first, GLsizei count, GLAPI PFNGLBINDVERTEXBUFFERSPROC glad_glBindVertexBuffers; #define glBindVertexBuffers glad_glBindVertexBuffers #endif -#ifndef GL_ES_VERSION_2_0 -#define GL_ES_VERSION_2_0 1 -GLAPI int GLAD_GL_ES_VERSION_2_0; -#endif -#ifndef GL_ES_VERSION_3_0 -#define GL_ES_VERSION_3_0 1 -GLAPI int GLAD_GL_ES_VERSION_3_0; -#endif -#ifndef GL_ES_VERSION_3_1 -#define GL_ES_VERSION_3_1 1 -GLAPI int GLAD_GL_ES_VERSION_3_1; -typedef void (APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC)(GLbitfield barriers); -GLAPI PFNGLMEMORYBARRIERBYREGIONPROC glad_glMemoryBarrierByRegion; -#define glMemoryBarrierByRegion glad_glMemoryBarrierByRegion -#endif -#ifndef GL_ES_VERSION_3_2 -#define GL_ES_VERSION_3_2 1 -GLAPI int GLAD_GL_ES_VERSION_3_2; -typedef void (APIENTRYP PFNGLBLENDBARRIERPROC)(void); -GLAPI PFNGLBLENDBARRIERPROC glad_glBlendBarrier; -#define glBlendBarrier glad_glBlendBarrier -typedef void (APIENTRYP PFNGLPRIMITIVEBOUNDINGBOXPROC)(GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW); -GLAPI PFNGLPRIMITIVEBOUNDINGBOXPROC glad_glPrimitiveBoundingBox; -#define glPrimitiveBoundingBox glad_glPrimitiveBoundingBox -typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC)(void); -GLAPI PFNGLGETGRAPHICSRESETSTATUSPROC glad_glGetGraphicsResetStatus; -#define glGetGraphicsResetStatus glad_glGetGraphicsResetStatus -typedef void (APIENTRYP PFNGLREADNPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); -GLAPI PFNGLREADNPIXELSPROC glad_glReadnPixels; -#define glReadnPixels glad_glReadnPixels -typedef void (APIENTRYP PFNGLGETNUNIFORMFVPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat *params); -GLAPI PFNGLGETNUNIFORMFVPROC glad_glGetnUniformfv; -#define glGetnUniformfv glad_glGetnUniformfv -typedef void (APIENTRYP PFNGLGETNUNIFORMIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLint *params); -GLAPI PFNGLGETNUNIFORMIVPROC glad_glGetnUniformiv; -#define glGetnUniformiv glad_glGetnUniformiv -typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint *params); -GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv; -#define glGetnUniformuiv glad_glGetnUniformuiv -#endif -#define GL_TEXTURE_TARGET 0x1006 -#define GL_QUERY_TARGET 0x82EA -#define GL_MAP_PERSISTENT_BIT_EXT 0x0040 -#define GL_MAP_COHERENT_BIT_EXT 0x0080 -#define GL_DYNAMIC_STORAGE_BIT_EXT 0x0100 -#define GL_CLIENT_STORAGE_BIT_EXT 0x0200 -#define GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT 0x00004000 -#define GL_BUFFER_IMMUTABLE_STORAGE_EXT 0x821F -#define GL_BUFFER_STORAGE_FLAGS_EXT 0x8220 -#define GL_MAX_CLIP_DISTANCES_EXT 0x0D32 -#define GL_MAX_CULL_DISTANCES_EXT 0x82F9 -#define GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES_EXT 0x82FA -#define GL_CLIP_DISTANCE0_EXT 0x3000 -#define GL_CLIP_DISTANCE1_EXT 0x3001 -#define GL_CLIP_DISTANCE2_EXT 0x3002 -#define GL_CLIP_DISTANCE3_EXT 0x3003 -#define GL_CLIP_DISTANCE4_EXT 0x3004 -#define GL_CLIP_DISTANCE5_EXT 0x3005 -#define GL_CLIP_DISTANCE6_EXT 0x3006 -#define GL_CLIP_DISTANCE7_EXT 0x3007 -#ifndef GL_ARB_buffer_storage -#define GL_ARB_buffer_storage 1 -GLAPI int GLAD_GL_ARB_buffer_storage; -#endif -#ifndef GL_ARB_direct_state_access -#define GL_ARB_direct_state_access 1 -GLAPI int GLAD_GL_ARB_direct_state_access; +#ifndef GL_VERSION_4_5 +#define GL_VERSION_4_5 1 +GLAPI int GLAD_GL_VERSION_4_5; +typedef void (APIENTRYP PFNGLCLIPCONTROLPROC)(GLenum origin, GLenum depth); +GLAPI PFNGLCLIPCONTROLPROC glad_glClipControl; +#define glClipControl glad_glClipControl typedef void (APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC)(GLsizei n, GLuint *ids); GLAPI PFNGLCREATETRANSFORMFEEDBACKSPROC glad_glCreateTransformFeedbacks; #define glCreateTransformFeedbacks glad_glCreateTransformFeedbacks @@ -3682,6 +3671,158 @@ GLAPI PFNGLGETQUERYBUFFEROBJECTUI64VPROC glad_glGetQueryBufferObjectui64v; typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset); GLAPI PFNGLGETQUERYBUFFEROBJECTUIVPROC glad_glGetQueryBufferObjectuiv; #define glGetQueryBufferObjectuiv glad_glGetQueryBufferObjectuiv +typedef void (APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC)(GLbitfield barriers); +GLAPI PFNGLMEMORYBARRIERBYREGIONPROC glad_glMemoryBarrierByRegion; +#define glMemoryBarrierByRegion glad_glMemoryBarrierByRegion +typedef void (APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +GLAPI PFNGLGETTEXTURESUBIMAGEPROC glad_glGetTextureSubImage; +#define glGetTextureSubImage glad_glGetTextureSubImage +typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void *pixels); +GLAPI PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC glad_glGetCompressedTextureSubImage; +#define glGetCompressedTextureSubImage glad_glGetCompressedTextureSubImage +typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC)(void); +GLAPI PFNGLGETGRAPHICSRESETSTATUSPROC glad_glGetGraphicsResetStatus; +#define glGetGraphicsResetStatus glad_glGetGraphicsResetStatus +typedef void (APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint lod, GLsizei bufSize, void *pixels); +GLAPI PFNGLGETNCOMPRESSEDTEXIMAGEPROC glad_glGetnCompressedTexImage; +#define glGetnCompressedTexImage glad_glGetnCompressedTexImage +typedef void (APIENTRYP PFNGLGETNTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +GLAPI PFNGLGETNTEXIMAGEPROC glad_glGetnTexImage; +#define glGetnTexImage glad_glGetnTexImage +typedef void (APIENTRYP PFNGLGETNUNIFORMDVPROC)(GLuint program, GLint location, GLsizei bufSize, GLdouble *params); +GLAPI PFNGLGETNUNIFORMDVPROC glad_glGetnUniformdv; +#define glGetnUniformdv glad_glGetnUniformdv +typedef void (APIENTRYP PFNGLGETNUNIFORMFVPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat *params); +GLAPI PFNGLGETNUNIFORMFVPROC glad_glGetnUniformfv; +#define glGetnUniformfv glad_glGetnUniformfv +typedef void (APIENTRYP PFNGLGETNUNIFORMIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLint *params); +GLAPI PFNGLGETNUNIFORMIVPROC glad_glGetnUniformiv; +#define glGetnUniformiv glad_glGetnUniformiv +typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint *params); +GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv; +#define glGetnUniformuiv glad_glGetnUniformuiv +typedef void (APIENTRYP PFNGLREADNPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +GLAPI PFNGLREADNPIXELSPROC glad_glReadnPixels; +#define glReadnPixels glad_glReadnPixels +typedef void (APIENTRYP PFNGLGETNMAPDVPROC)(GLenum target, GLenum query, GLsizei bufSize, GLdouble *v); +GLAPI PFNGLGETNMAPDVPROC glad_glGetnMapdv; +#define glGetnMapdv glad_glGetnMapdv +typedef void (APIENTRYP PFNGLGETNMAPFVPROC)(GLenum target, GLenum query, GLsizei bufSize, GLfloat *v); +GLAPI PFNGLGETNMAPFVPROC glad_glGetnMapfv; +#define glGetnMapfv glad_glGetnMapfv +typedef void (APIENTRYP PFNGLGETNMAPIVPROC)(GLenum target, GLenum query, GLsizei bufSize, GLint *v); +GLAPI PFNGLGETNMAPIVPROC glad_glGetnMapiv; +#define glGetnMapiv glad_glGetnMapiv +typedef void (APIENTRYP PFNGLGETNPIXELMAPFVPROC)(GLenum map, GLsizei bufSize, GLfloat *values); +GLAPI PFNGLGETNPIXELMAPFVPROC glad_glGetnPixelMapfv; +#define glGetnPixelMapfv glad_glGetnPixelMapfv +typedef void (APIENTRYP PFNGLGETNPIXELMAPUIVPROC)(GLenum map, GLsizei bufSize, GLuint *values); +GLAPI PFNGLGETNPIXELMAPUIVPROC glad_glGetnPixelMapuiv; +#define glGetnPixelMapuiv glad_glGetnPixelMapuiv +typedef void (APIENTRYP PFNGLGETNPIXELMAPUSVPROC)(GLenum map, GLsizei bufSize, GLushort *values); +GLAPI PFNGLGETNPIXELMAPUSVPROC glad_glGetnPixelMapusv; +#define glGetnPixelMapusv glad_glGetnPixelMapusv +typedef void (APIENTRYP PFNGLGETNPOLYGONSTIPPLEPROC)(GLsizei bufSize, GLubyte *pattern); +GLAPI PFNGLGETNPOLYGONSTIPPLEPROC glad_glGetnPolygonStipple; +#define glGetnPolygonStipple glad_glGetnPolygonStipple +typedef void (APIENTRYP PFNGLGETNCOLORTABLEPROC)(GLenum target, GLenum format, GLenum type, GLsizei bufSize, void *table); +GLAPI PFNGLGETNCOLORTABLEPROC glad_glGetnColorTable; +#define glGetnColorTable glad_glGetnColorTable +typedef void (APIENTRYP PFNGLGETNCONVOLUTIONFILTERPROC)(GLenum target, GLenum format, GLenum type, GLsizei bufSize, void *image); +GLAPI PFNGLGETNCONVOLUTIONFILTERPROC glad_glGetnConvolutionFilter; +#define glGetnConvolutionFilter glad_glGetnConvolutionFilter +typedef void (APIENTRYP PFNGLGETNSEPARABLEFILTERPROC)(GLenum target, GLenum format, GLenum type, GLsizei rowBufSize, void *row, GLsizei columnBufSize, void *column, void *span); +GLAPI PFNGLGETNSEPARABLEFILTERPROC glad_glGetnSeparableFilter; +#define glGetnSeparableFilter glad_glGetnSeparableFilter +typedef void (APIENTRYP PFNGLGETNHISTOGRAMPROC)(GLenum target, GLboolean reset, GLenum format, GLenum type, GLsizei bufSize, void *values); +GLAPI PFNGLGETNHISTOGRAMPROC glad_glGetnHistogram; +#define glGetnHistogram glad_glGetnHistogram +typedef void (APIENTRYP PFNGLGETNMINMAXPROC)(GLenum target, GLboolean reset, GLenum format, GLenum type, GLsizei bufSize, void *values); +GLAPI PFNGLGETNMINMAXPROC glad_glGetnMinmax; +#define glGetnMinmax glad_glGetnMinmax +typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC)(void); +GLAPI PFNGLTEXTUREBARRIERPROC glad_glTextureBarrier; +#define glTextureBarrier glad_glTextureBarrier +#endif +#ifndef GL_VERSION_4_6 +#define GL_VERSION_4_6 1 +GLAPI int GLAD_GL_VERSION_4_6; +typedef void (APIENTRYP PFNGLSPECIALIZESHADERPROC)(GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue); +GLAPI PFNGLSPECIALIZESHADERPROC glad_glSpecializeShader; +#define glSpecializeShader glad_glSpecializeShader +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC)(GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +GLAPI PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC glad_glMultiDrawArraysIndirectCount; +#define glMultiDrawArraysIndirectCount glad_glMultiDrawArraysIndirectCount +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC)(GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +GLAPI PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC glad_glMultiDrawElementsIndirectCount; +#define glMultiDrawElementsIndirectCount glad_glMultiDrawElementsIndirectCount +typedef void (APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC)(GLfloat factor, GLfloat units, GLfloat clamp); +GLAPI PFNGLPOLYGONOFFSETCLAMPPROC glad_glPolygonOffsetClamp; +#define glPolygonOffsetClamp glad_glPolygonOffsetClamp +#endif +#ifndef GL_ES_VERSION_2_0 +#define GL_ES_VERSION_2_0 1 +GLAPI int GLAD_GL_ES_VERSION_2_0; +#endif +#ifndef GL_ES_VERSION_3_0 +#define GL_ES_VERSION_3_0 1 +GLAPI int GLAD_GL_ES_VERSION_3_0; +#endif +#ifndef GL_ES_VERSION_3_1 +#define GL_ES_VERSION_3_1 1 +GLAPI int GLAD_GL_ES_VERSION_3_1; +#endif +#ifndef GL_ES_VERSION_3_2 +#define GL_ES_VERSION_3_2 1 +GLAPI int GLAD_GL_ES_VERSION_3_2; +typedef void (APIENTRYP PFNGLBLENDBARRIERPROC)(void); +GLAPI PFNGLBLENDBARRIERPROC glad_glBlendBarrier; +#define glBlendBarrier glad_glBlendBarrier +typedef void (APIENTRYP PFNGLPRIMITIVEBOUNDINGBOXPROC)(GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW); +GLAPI PFNGLPRIMITIVEBOUNDINGBOXPROC glad_glPrimitiveBoundingBox; +#define glPrimitiveBoundingBox glad_glPrimitiveBoundingBox +#endif +#define GL_COMPRESSED_RGBA_BPTC_UNORM_ARB 0x8E8C +#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB 0x8E8D +#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB 0x8E8E +#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB 0x8E8F +#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 +#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 +#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 +#define GL_MAP_PERSISTENT_BIT_EXT 0x0040 +#define GL_MAP_COHERENT_BIT_EXT 0x0080 +#define GL_DYNAMIC_STORAGE_BIT_EXT 0x0100 +#define GL_CLIENT_STORAGE_BIT_EXT 0x0200 +#define GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT 0x00004000 +#define GL_BUFFER_IMMUTABLE_STORAGE_EXT 0x821F +#define GL_BUFFER_STORAGE_FLAGS_EXT 0x8220 +#define GL_MAX_CLIP_DISTANCES_EXT 0x0D32 +#define GL_MAX_CULL_DISTANCES_EXT 0x82F9 +#define GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES_EXT 0x82FA +#define GL_CLIP_DISTANCE0_EXT 0x3000 +#define GL_CLIP_DISTANCE1_EXT 0x3001 +#define GL_CLIP_DISTANCE2_EXT 0x3002 +#define GL_CLIP_DISTANCE3_EXT 0x3003 +#define GL_CLIP_DISTANCE4_EXT 0x3004 +#define GL_CLIP_DISTANCE5_EXT 0x3005 +#define GL_CLIP_DISTANCE6_EXT 0x3006 +#define GL_CLIP_DISTANCE7_EXT 0x3007 +#ifndef GL_ARB_buffer_storage +#define GL_ARB_buffer_storage 1 +GLAPI int GLAD_GL_ARB_buffer_storage; +#endif +#ifndef GL_ARB_direct_state_access +#define GL_ARB_direct_state_access 1 +GLAPI int GLAD_GL_ARB_direct_state_access; +#endif +#ifndef GL_ARB_texture_compression_bptc +#define GL_ARB_texture_compression_bptc 1 +GLAPI int GLAD_GL_ARB_texture_compression_bptc; +#endif +#ifndef GL_EXT_texture_compression_s3tc +#define GL_EXT_texture_compression_s3tc 1 +GLAPI int GLAD_GL_EXT_texture_compression_s3tc; #endif #ifndef GL_EXT_buffer_storage #define GL_EXT_buffer_storage 1 @@ -3694,6 +3835,10 @@ GLAPI PFNGLBUFFERSTORAGEEXTPROC glad_glBufferStorageEXT; #define GL_EXT_clip_cull_distance 1 GLAPI int GLAD_GL_EXT_clip_cull_distance; #endif +#ifndef GL_EXT_texture_compression_s3tc +#define GL_EXT_texture_compression_s3tc 1 +GLAPI int GLAD_GL_EXT_texture_compression_s3tc; +#endif #ifdef __cplusplus } diff --git a/externals/glad/src/glad.c b/externals/glad/src/glad.c index 4ff8d7931..f83bce534 100644 --- a/externals/glad/src/glad.c +++ b/externals/glad/src/glad.c @@ -1,25 +1,27 @@ /* - OpenGL, OpenGL ES loader generated by glad 0.1.36 on Fri Sep 9 09:22:43 2022. + OpenGL, OpenGL ES loader generated by glad 0.1.36 on Tue Feb 21 16:13:52 2023. Language/Generator: C/C++ Specification: gl - APIs: gl=4.4, gles2=3.2 + APIs: gl=4.6, gles2=3.2 Profile: core Extensions: GL_ARB_buffer_storage, GL_ARB_direct_state_access, + GL_ARB_texture_compression_bptc, GL_EXT_buffer_storage, - GL_EXT_clip_cull_distance + GL_EXT_clip_cull_distance, + GL_EXT_texture_compression_s3tc Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="core" --api="gl=4.4,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_direct_state_access,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance" + --profile="core" --api="gl=4.6,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_direct_state_access,GL_ARB_texture_compression_bptc,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_texture_compression_s3tc" Online: - https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.4&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_direct_state_access&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance + https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.6&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_direct_state_access&extensions=GL_ARB_texture_compression_bptc&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_texture_compression_s3tc */ #include @@ -275,6 +277,8 @@ int GLAD_GL_VERSION_4_1 = 0; int GLAD_GL_VERSION_4_2 = 0; int GLAD_GL_VERSION_4_3 = 0; int GLAD_GL_VERSION_4_4 = 0; +int GLAD_GL_VERSION_4_5 = 0; +int GLAD_GL_VERSION_4_6 = 0; int GLAD_GL_ES_VERSION_2_0 = 0; int GLAD_GL_ES_VERSION_3_0 = 0; int GLAD_GL_ES_VERSION_3_1 = 0; @@ -302,6 +306,7 @@ PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL; PFNGLBINDSAMPLERPROC glad_glBindSampler = NULL; PFNGLBINDSAMPLERSPROC glad_glBindSamplers = NULL; PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL; +PFNGLBINDTEXTUREUNITPROC glad_glBindTextureUnit = NULL; PFNGLBINDTEXTURESPROC glad_glBindTextures = NULL; PFNGLBINDTRANSFORMFEEDBACKPROC glad_glBindTransformFeedback = NULL; PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray = NULL; @@ -318,10 +323,12 @@ PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL; PFNGLBLENDFUNCSEPARATEIPROC glad_glBlendFuncSeparatei = NULL; PFNGLBLENDFUNCIPROC glad_glBlendFunci = NULL; PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer = NULL; +PFNGLBLITNAMEDFRAMEBUFFERPROC glad_glBlitNamedFramebuffer = NULL; PFNGLBUFFERDATAPROC glad_glBufferData = NULL; PFNGLBUFFERSTORAGEPROC glad_glBufferStorage = NULL; PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL; PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL; +PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC glad_glCheckNamedFramebufferStatus = NULL; PFNGLCLAMPCOLORPROC glad_glClampColor = NULL; PFNGLCLEARPROC glad_glClear = NULL; PFNGLCLEARBUFFERDATAPROC glad_glClearBufferData = NULL; @@ -333,10 +340,17 @@ PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv = NULL; PFNGLCLEARCOLORPROC glad_glClearColor = NULL; PFNGLCLEARDEPTHPROC glad_glClearDepth = NULL; PFNGLCLEARDEPTHFPROC glad_glClearDepthf = NULL; +PFNGLCLEARNAMEDBUFFERDATAPROC glad_glClearNamedBufferData = NULL; +PFNGLCLEARNAMEDBUFFERSUBDATAPROC glad_glClearNamedBufferSubData = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERFIPROC glad_glClearNamedFramebufferfi = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERFVPROC glad_glClearNamedFramebufferfv = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERIVPROC glad_glClearNamedFramebufferiv = NULL; +PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC glad_glClearNamedFramebufferuiv = NULL; PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL; PFNGLCLEARTEXIMAGEPROC glad_glClearTexImage = NULL; PFNGLCLEARTEXSUBIMAGEPROC glad_glClearTexSubImage = NULL; PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync = NULL; +PFNGLCLIPCONTROLPROC glad_glClipControl = NULL; PFNGLCOLORMASKPROC glad_glColorMask = NULL; PFNGLCOLORMASKIPROC glad_glColorMaski = NULL; PFNGLCOLORP3UIPROC glad_glColorP3ui = NULL; @@ -350,16 +364,32 @@ PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D = NULL; PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D = NULL; PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL; PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC glad_glCompressedTextureSubImage1D = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC glad_glCompressedTextureSubImage2D = NULL; +PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC glad_glCompressedTextureSubImage3D = NULL; PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData = NULL; PFNGLCOPYIMAGESUBDATAPROC glad_glCopyImageSubData = NULL; +PFNGLCOPYNAMEDBUFFERSUBDATAPROC glad_glCopyNamedBufferSubData = NULL; PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D = NULL; PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL; PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D = NULL; PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL; PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D = NULL; +PFNGLCOPYTEXTURESUBIMAGE1DPROC glad_glCopyTextureSubImage1D = NULL; +PFNGLCOPYTEXTURESUBIMAGE2DPROC glad_glCopyTextureSubImage2D = NULL; +PFNGLCOPYTEXTURESUBIMAGE3DPROC glad_glCopyTextureSubImage3D = NULL; +PFNGLCREATEBUFFERSPROC glad_glCreateBuffers = NULL; +PFNGLCREATEFRAMEBUFFERSPROC glad_glCreateFramebuffers = NULL; PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL; +PFNGLCREATEPROGRAMPIPELINESPROC glad_glCreateProgramPipelines = NULL; +PFNGLCREATEQUERIESPROC glad_glCreateQueries = NULL; +PFNGLCREATERENDERBUFFERSPROC glad_glCreateRenderbuffers = NULL; +PFNGLCREATESAMPLERSPROC glad_glCreateSamplers = NULL; PFNGLCREATESHADERPROC glad_glCreateShader = NULL; PFNGLCREATESHADERPROGRAMVPROC glad_glCreateShaderProgramv = NULL; +PFNGLCREATETEXTURESPROC glad_glCreateTextures = NULL; +PFNGLCREATETRANSFORMFEEDBACKSPROC glad_glCreateTransformFeedbacks = NULL; +PFNGLCREATEVERTEXARRAYSPROC glad_glCreateVertexArrays = NULL; PFNGLCULLFACEPROC glad_glCullFace = NULL; PFNGLDEBUGMESSAGECALLBACKPROC glad_glDebugMessageCallback = NULL; PFNGLDEBUGMESSAGECONTROLPROC glad_glDebugMessageControl = NULL; @@ -384,6 +414,7 @@ PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed = NULL; PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL; PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; PFNGLDISABLEPROC glad_glDisable = NULL; +PFNGLDISABLEVERTEXARRAYATTRIBPROC glad_glDisableVertexArrayAttrib = NULL; PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL; PFNGLDISABLEIPROC glad_glDisablei = NULL; PFNGLDISPATCHCOMPUTEPROC glad_glDispatchCompute = NULL; @@ -408,6 +439,7 @@ PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC glad_glDrawTransformFeedbackInstanced = PFNGLDRAWTRANSFORMFEEDBACKSTREAMPROC glad_glDrawTransformFeedbackStream = NULL; PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC glad_glDrawTransformFeedbackStreamInstanced = NULL; PFNGLENABLEPROC glad_glEnable = NULL; +PFNGLENABLEVERTEXARRAYATTRIBPROC glad_glEnableVertexArrayAttrib = NULL; PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL; PFNGLENABLEIPROC glad_glEnablei = NULL; PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender = NULL; @@ -418,6 +450,7 @@ PFNGLFENCESYNCPROC glad_glFenceSync = NULL; PFNGLFINISHPROC glad_glFinish = NULL; PFNGLFLUSHPROC glad_glFlush = NULL; PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange = NULL; +PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC glad_glFlushMappedNamedBufferRange = NULL; PFNGLFRAMEBUFFERPARAMETERIPROC glad_glFramebufferParameteri = NULL; PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL; PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture = NULL; @@ -436,6 +469,7 @@ PFNGLGENTEXTURESPROC glad_glGenTextures = NULL; PFNGLGENTRANSFORMFEEDBACKSPROC glad_glGenTransformFeedbacks = NULL; PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays = NULL; PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL; +PFNGLGENERATETEXTUREMIPMAPPROC glad_glGenerateTextureMipmap = NULL; PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC glad_glGetActiveAtomicCounterBufferiv = NULL; PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL; PFNGLGETACTIVESUBROUTINENAMEPROC glad_glGetActiveSubroutineName = NULL; @@ -455,6 +489,8 @@ PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL; PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL; PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData = NULL; PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage = NULL; +PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC glad_glGetCompressedTextureImage = NULL; +PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC glad_glGetCompressedTextureSubImage = NULL; PFNGLGETDEBUGMESSAGELOGPROC glad_glGetDebugMessageLog = NULL; PFNGLGETDOUBLEI_VPROC glad_glGetDoublei_v = NULL; PFNGLGETDOUBLEVPROC glad_glGetDoublev = NULL; @@ -473,6 +509,13 @@ PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL; PFNGLGETINTERNALFORMATI64VPROC glad_glGetInternalformati64v = NULL; PFNGLGETINTERNALFORMATIVPROC glad_glGetInternalformativ = NULL; PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv = NULL; +PFNGLGETNAMEDBUFFERPARAMETERI64VPROC glad_glGetNamedBufferParameteri64v = NULL; +PFNGLGETNAMEDBUFFERPARAMETERIVPROC glad_glGetNamedBufferParameteriv = NULL; +PFNGLGETNAMEDBUFFERPOINTERVPROC glad_glGetNamedBufferPointerv = NULL; +PFNGLGETNAMEDBUFFERSUBDATAPROC glad_glGetNamedBufferSubData = NULL; +PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetNamedFramebufferAttachmentParameteriv = NULL; +PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC glad_glGetNamedFramebufferParameteriv = NULL; +PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC glad_glGetNamedRenderbufferParameteriv = NULL; PFNGLGETOBJECTLABELPROC glad_glGetObjectLabel = NULL; PFNGLGETOBJECTPTRLABELPROC glad_glGetObjectPtrLabel = NULL; PFNGLGETPOINTERVPROC glad_glGetPointerv = NULL; @@ -488,6 +531,10 @@ PFNGLGETPROGRAMRESOURCENAMEPROC glad_glGetProgramResourceName = NULL; PFNGLGETPROGRAMRESOURCEIVPROC glad_glGetProgramResourceiv = NULL; PFNGLGETPROGRAMSTAGEIVPROC glad_glGetProgramStageiv = NULL; PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL; +PFNGLGETQUERYBUFFEROBJECTI64VPROC glad_glGetQueryBufferObjecti64v = NULL; +PFNGLGETQUERYBUFFEROBJECTIVPROC glad_glGetQueryBufferObjectiv = NULL; +PFNGLGETQUERYBUFFEROBJECTUI64VPROC glad_glGetQueryBufferObjectui64v = NULL; +PFNGLGETQUERYBUFFEROBJECTUIVPROC glad_glGetQueryBufferObjectuiv = NULL; PFNGLGETQUERYINDEXEDIVPROC glad_glGetQueryIndexediv = NULL; PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v = NULL; PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv = NULL; @@ -515,7 +562,18 @@ PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv = NULL; PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv = NULL; PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL; PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL; +PFNGLGETTEXTUREIMAGEPROC glad_glGetTextureImage = NULL; +PFNGLGETTEXTURELEVELPARAMETERFVPROC glad_glGetTextureLevelParameterfv = NULL; +PFNGLGETTEXTURELEVELPARAMETERIVPROC glad_glGetTextureLevelParameteriv = NULL; +PFNGLGETTEXTUREPARAMETERIIVPROC glad_glGetTextureParameterIiv = NULL; +PFNGLGETTEXTUREPARAMETERIUIVPROC glad_glGetTextureParameterIuiv = NULL; +PFNGLGETTEXTUREPARAMETERFVPROC glad_glGetTextureParameterfv = NULL; +PFNGLGETTEXTUREPARAMETERIVPROC glad_glGetTextureParameteriv = NULL; +PFNGLGETTEXTURESUBIMAGEPROC glad_glGetTextureSubImage = NULL; PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying = NULL; +PFNGLGETTRANSFORMFEEDBACKI64_VPROC glad_glGetTransformFeedbacki64_v = NULL; +PFNGLGETTRANSFORMFEEDBACKI_VPROC glad_glGetTransformFeedbacki_v = NULL; +PFNGLGETTRANSFORMFEEDBACKIVPROC glad_glGetTransformFeedbackiv = NULL; PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex = NULL; PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices = NULL; PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL; @@ -524,6 +582,9 @@ PFNGLGETUNIFORMDVPROC glad_glGetUniformdv = NULL; PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL; PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL; PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv = NULL; +PFNGLGETVERTEXARRAYINDEXED64IVPROC glad_glGetVertexArrayIndexed64iv = NULL; +PFNGLGETVERTEXARRAYINDEXEDIVPROC glad_glGetVertexArrayIndexediv = NULL; +PFNGLGETVERTEXARRAYIVPROC glad_glGetVertexArrayiv = NULL; PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv = NULL; PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv = NULL; PFNGLGETVERTEXATTRIBLDVPROC glad_glGetVertexAttribLdv = NULL; @@ -531,6 +592,21 @@ PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL; PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv = NULL; PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL; PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL; +PFNGLGETNCOLORTABLEPROC glad_glGetnColorTable = NULL; +PFNGLGETNCOMPRESSEDTEXIMAGEPROC glad_glGetnCompressedTexImage = NULL; +PFNGLGETNCONVOLUTIONFILTERPROC glad_glGetnConvolutionFilter = NULL; +PFNGLGETNHISTOGRAMPROC glad_glGetnHistogram = NULL; +PFNGLGETNMAPDVPROC glad_glGetnMapdv = NULL; +PFNGLGETNMAPFVPROC glad_glGetnMapfv = NULL; +PFNGLGETNMAPIVPROC glad_glGetnMapiv = NULL; +PFNGLGETNMINMAXPROC glad_glGetnMinmax = NULL; +PFNGLGETNPIXELMAPFVPROC glad_glGetnPixelMapfv = NULL; +PFNGLGETNPIXELMAPUIVPROC glad_glGetnPixelMapuiv = NULL; +PFNGLGETNPIXELMAPUSVPROC glad_glGetnPixelMapusv = NULL; +PFNGLGETNPOLYGONSTIPPLEPROC glad_glGetnPolygonStipple = NULL; +PFNGLGETNSEPARABLEFILTERPROC glad_glGetnSeparableFilter = NULL; +PFNGLGETNTEXIMAGEPROC glad_glGetnTexImage = NULL; +PFNGLGETNUNIFORMDVPROC glad_glGetnUniformdv = NULL; PFNGLGETNUNIFORMFVPROC glad_glGetnUniformfv = NULL; PFNGLGETNUNIFORMIVPROC glad_glGetnUniformiv = NULL; PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv = NULL; @@ -538,6 +614,8 @@ PFNGLHINTPROC glad_glHint = NULL; PFNGLINVALIDATEBUFFERDATAPROC glad_glInvalidateBufferData = NULL; PFNGLINVALIDATEBUFFERSUBDATAPROC glad_glInvalidateBufferSubData = NULL; PFNGLINVALIDATEFRAMEBUFFERPROC glad_glInvalidateFramebuffer = NULL; +PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC glad_glInvalidateNamedFramebufferData = NULL; +PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC glad_glInvalidateNamedFramebufferSubData = NULL; PFNGLINVALIDATESUBFRAMEBUFFERPROC glad_glInvalidateSubFramebuffer = NULL; PFNGLINVALIDATETEXIMAGEPROC glad_glInvalidateTexImage = NULL; PFNGLINVALIDATETEXSUBIMAGEPROC glad_glInvalidateTexSubImage = NULL; @@ -560,14 +638,18 @@ PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL; PFNGLLOGICOPPROC glad_glLogicOp = NULL; PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL; PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange = NULL; +PFNGLMAPNAMEDBUFFERPROC glad_glMapNamedBuffer = NULL; +PFNGLMAPNAMEDBUFFERRANGEPROC glad_glMapNamedBufferRange = NULL; PFNGLMEMORYBARRIERPROC glad_glMemoryBarrier = NULL; PFNGLMEMORYBARRIERBYREGIONPROC glad_glMemoryBarrierByRegion = NULL; PFNGLMINSAMPLESHADINGPROC glad_glMinSampleShading = NULL; PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays = NULL; PFNGLMULTIDRAWARRAYSINDIRECTPROC glad_glMultiDrawArraysIndirect = NULL; +PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC glad_glMultiDrawArraysIndirectCount = NULL; PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements = NULL; PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex = NULL; PFNGLMULTIDRAWELEMENTSINDIRECTPROC glad_glMultiDrawElementsIndirect = NULL; +PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC glad_glMultiDrawElementsIndirectCount = NULL; PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui = NULL; PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv = NULL; PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui = NULL; @@ -576,6 +658,18 @@ PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL; PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL; PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL; PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL; +PFNGLNAMEDBUFFERDATAPROC glad_glNamedBufferData = NULL; +PFNGLNAMEDBUFFERSTORAGEPROC glad_glNamedBufferStorage = NULL; +PFNGLNAMEDBUFFERSUBDATAPROC glad_glNamedBufferSubData = NULL; +PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC glad_glNamedFramebufferDrawBuffer = NULL; +PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC glad_glNamedFramebufferDrawBuffers = NULL; +PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC glad_glNamedFramebufferParameteri = NULL; +PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC glad_glNamedFramebufferReadBuffer = NULL; +PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC glad_glNamedFramebufferRenderbuffer = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTUREPROC glad_glNamedFramebufferTexture = NULL; +PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC glad_glNamedFramebufferTextureLayer = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEPROC glad_glNamedRenderbufferStorage = NULL; +PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glNamedRenderbufferStorageMultisample = NULL; PFNGLNORMALP3UIPROC glad_glNormalP3ui = NULL; PFNGLNORMALP3UIVPROC glad_glNormalP3uiv = NULL; PFNGLOBJECTLABELPROC glad_glObjectLabel = NULL; @@ -592,6 +686,7 @@ PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv = NULL; PFNGLPOINTSIZEPROC glad_glPointSize = NULL; PFNGLPOLYGONMODEPROC glad_glPolygonMode = NULL; PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL; +PFNGLPOLYGONOFFSETCLAMPPROC glad_glPolygonOffsetClamp = NULL; PFNGLPOPDEBUGGROUPPROC glad_glPopDebugGroup = NULL; PFNGLPRIMITIVEBOUNDINGBOXPROC glad_glPrimitiveBoundingBox = NULL; PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex = NULL; @@ -674,6 +769,7 @@ PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv = NULL; PFNGLSHADERBINARYPROC glad_glShaderBinary = NULL; PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL; PFNGLSHADERSTORAGEBLOCKBINDINGPROC glad_glShaderStorageBlockBinding = NULL; +PFNGLSPECIALIZESHADERPROC glad_glSpecializeShader = NULL; PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL; PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL; PFNGLSTENCILMASKPROC glad_glStencilMask = NULL; @@ -709,7 +805,26 @@ PFNGLTEXSTORAGE3DMULTISAMPLEPROC glad_glTexStorage3DMultisample = NULL; PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D = NULL; PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL; PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D = NULL; +PFNGLTEXTUREBARRIERPROC glad_glTextureBarrier = NULL; +PFNGLTEXTUREBUFFERPROC glad_glTextureBuffer = NULL; +PFNGLTEXTUREBUFFERRANGEPROC glad_glTextureBufferRange = NULL; +PFNGLTEXTUREPARAMETERIIVPROC glad_glTextureParameterIiv = NULL; +PFNGLTEXTUREPARAMETERIUIVPROC glad_glTextureParameterIuiv = NULL; +PFNGLTEXTUREPARAMETERFPROC glad_glTextureParameterf = NULL; +PFNGLTEXTUREPARAMETERFVPROC glad_glTextureParameterfv = NULL; +PFNGLTEXTUREPARAMETERIPROC glad_glTextureParameteri = NULL; +PFNGLTEXTUREPARAMETERIVPROC glad_glTextureParameteriv = NULL; +PFNGLTEXTURESTORAGE1DPROC glad_glTextureStorage1D = NULL; +PFNGLTEXTURESTORAGE2DPROC glad_glTextureStorage2D = NULL; +PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC glad_glTextureStorage2DMultisample = NULL; +PFNGLTEXTURESTORAGE3DPROC glad_glTextureStorage3D = NULL; +PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC glad_glTextureStorage3DMultisample = NULL; +PFNGLTEXTURESUBIMAGE1DPROC glad_glTextureSubImage1D = NULL; +PFNGLTEXTURESUBIMAGE2DPROC glad_glTextureSubImage2D = NULL; +PFNGLTEXTURESUBIMAGE3DPROC glad_glTextureSubImage3D = NULL; PFNGLTEXTUREVIEWPROC glad_glTextureView = NULL; +PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC glad_glTransformFeedbackBufferBase = NULL; +PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC glad_glTransformFeedbackBufferRange = NULL; PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings = NULL; PFNGLUNIFORM1DPROC glad_glUniform1d = NULL; PFNGLUNIFORM1DVPROC glad_glUniform1dv = NULL; @@ -764,10 +879,19 @@ PFNGLUNIFORMMATRIX4X3DVPROC glad_glUniformMatrix4x3dv = NULL; PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv = NULL; PFNGLUNIFORMSUBROUTINESUIVPROC glad_glUniformSubroutinesuiv = NULL; PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL; +PFNGLUNMAPNAMEDBUFFERPROC glad_glUnmapNamedBuffer = NULL; PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL; PFNGLUSEPROGRAMSTAGESPROC glad_glUseProgramStages = NULL; PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL; PFNGLVALIDATEPROGRAMPIPELINEPROC glad_glValidateProgramPipeline = NULL; +PFNGLVERTEXARRAYATTRIBBINDINGPROC glad_glVertexArrayAttribBinding = NULL; +PFNGLVERTEXARRAYATTRIBFORMATPROC glad_glVertexArrayAttribFormat = NULL; +PFNGLVERTEXARRAYATTRIBIFORMATPROC glad_glVertexArrayAttribIFormat = NULL; +PFNGLVERTEXARRAYATTRIBLFORMATPROC glad_glVertexArrayAttribLFormat = NULL; +PFNGLVERTEXARRAYBINDINGDIVISORPROC glad_glVertexArrayBindingDivisor = NULL; +PFNGLVERTEXARRAYELEMENTBUFFERPROC glad_glVertexArrayElementBuffer = NULL; +PFNGLVERTEXARRAYVERTEXBUFFERPROC glad_glVertexArrayVertexBuffer = NULL; +PFNGLVERTEXARRAYVERTEXBUFFERSPROC glad_glVertexArrayVertexBuffers = NULL; PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d = NULL; PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv = NULL; PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL; @@ -862,105 +986,10 @@ PFNGLVIEWPORTINDEXEDFVPROC glad_glViewportIndexedfv = NULL; PFNGLWAITSYNCPROC glad_glWaitSync = NULL; int GLAD_GL_ARB_buffer_storage = 0; int GLAD_GL_ARB_direct_state_access = 0; +int GLAD_GL_ARB_texture_compression_bptc = 0; int GLAD_GL_EXT_buffer_storage = 0; int GLAD_GL_EXT_clip_cull_distance = 0; -PFNGLCREATETRANSFORMFEEDBACKSPROC glad_glCreateTransformFeedbacks = NULL; -PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC glad_glTransformFeedbackBufferBase = NULL; -PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC glad_glTransformFeedbackBufferRange = NULL; -PFNGLGETTRANSFORMFEEDBACKIVPROC glad_glGetTransformFeedbackiv = NULL; -PFNGLGETTRANSFORMFEEDBACKI_VPROC glad_glGetTransformFeedbacki_v = NULL; -PFNGLGETTRANSFORMFEEDBACKI64_VPROC glad_glGetTransformFeedbacki64_v = NULL; -PFNGLCREATEBUFFERSPROC glad_glCreateBuffers = NULL; -PFNGLNAMEDBUFFERSTORAGEPROC glad_glNamedBufferStorage = NULL; -PFNGLNAMEDBUFFERDATAPROC glad_glNamedBufferData = NULL; -PFNGLNAMEDBUFFERSUBDATAPROC glad_glNamedBufferSubData = NULL; -PFNGLCOPYNAMEDBUFFERSUBDATAPROC glad_glCopyNamedBufferSubData = NULL; -PFNGLCLEARNAMEDBUFFERDATAPROC glad_glClearNamedBufferData = NULL; -PFNGLCLEARNAMEDBUFFERSUBDATAPROC glad_glClearNamedBufferSubData = NULL; -PFNGLMAPNAMEDBUFFERPROC glad_glMapNamedBuffer = NULL; -PFNGLMAPNAMEDBUFFERRANGEPROC glad_glMapNamedBufferRange = NULL; -PFNGLUNMAPNAMEDBUFFERPROC glad_glUnmapNamedBuffer = NULL; -PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC glad_glFlushMappedNamedBufferRange = NULL; -PFNGLGETNAMEDBUFFERPARAMETERIVPROC glad_glGetNamedBufferParameteriv = NULL; -PFNGLGETNAMEDBUFFERPARAMETERI64VPROC glad_glGetNamedBufferParameteri64v = NULL; -PFNGLGETNAMEDBUFFERPOINTERVPROC glad_glGetNamedBufferPointerv = NULL; -PFNGLGETNAMEDBUFFERSUBDATAPROC glad_glGetNamedBufferSubData = NULL; -PFNGLCREATEFRAMEBUFFERSPROC glad_glCreateFramebuffers = NULL; -PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC glad_glNamedFramebufferRenderbuffer = NULL; -PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC glad_glNamedFramebufferParameteri = NULL; -PFNGLNAMEDFRAMEBUFFERTEXTUREPROC glad_glNamedFramebufferTexture = NULL; -PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC glad_glNamedFramebufferTextureLayer = NULL; -PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC glad_glNamedFramebufferDrawBuffer = NULL; -PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC glad_glNamedFramebufferDrawBuffers = NULL; -PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC glad_glNamedFramebufferReadBuffer = NULL; -PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC glad_glInvalidateNamedFramebufferData = NULL; -PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC glad_glInvalidateNamedFramebufferSubData = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERIVPROC glad_glClearNamedFramebufferiv = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC glad_glClearNamedFramebufferuiv = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERFVPROC glad_glClearNamedFramebufferfv = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERFIPROC glad_glClearNamedFramebufferfi = NULL; -PFNGLBLITNAMEDFRAMEBUFFERPROC glad_glBlitNamedFramebuffer = NULL; -PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC glad_glCheckNamedFramebufferStatus = NULL; -PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC glad_glGetNamedFramebufferParameteriv = NULL; -PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetNamedFramebufferAttachmentParameteriv = NULL; -PFNGLCREATERENDERBUFFERSPROC glad_glCreateRenderbuffers = NULL; -PFNGLNAMEDRENDERBUFFERSTORAGEPROC glad_glNamedRenderbufferStorage = NULL; -PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glNamedRenderbufferStorageMultisample = NULL; -PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC glad_glGetNamedRenderbufferParameteriv = NULL; -PFNGLCREATETEXTURESPROC glad_glCreateTextures = NULL; -PFNGLTEXTUREBUFFERPROC glad_glTextureBuffer = NULL; -PFNGLTEXTUREBUFFERRANGEPROC glad_glTextureBufferRange = NULL; -PFNGLTEXTURESTORAGE1DPROC glad_glTextureStorage1D = NULL; -PFNGLTEXTURESTORAGE2DPROC glad_glTextureStorage2D = NULL; -PFNGLTEXTURESTORAGE3DPROC glad_glTextureStorage3D = NULL; -PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC glad_glTextureStorage2DMultisample = NULL; -PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC glad_glTextureStorage3DMultisample = NULL; -PFNGLTEXTURESUBIMAGE1DPROC glad_glTextureSubImage1D = NULL; -PFNGLTEXTURESUBIMAGE2DPROC glad_glTextureSubImage2D = NULL; -PFNGLTEXTURESUBIMAGE3DPROC glad_glTextureSubImage3D = NULL; -PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC glad_glCompressedTextureSubImage1D = NULL; -PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC glad_glCompressedTextureSubImage2D = NULL; -PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC glad_glCompressedTextureSubImage3D = NULL; -PFNGLCOPYTEXTURESUBIMAGE1DPROC glad_glCopyTextureSubImage1D = NULL; -PFNGLCOPYTEXTURESUBIMAGE2DPROC glad_glCopyTextureSubImage2D = NULL; -PFNGLCOPYTEXTURESUBIMAGE3DPROC glad_glCopyTextureSubImage3D = NULL; -PFNGLTEXTUREPARAMETERFPROC glad_glTextureParameterf = NULL; -PFNGLTEXTUREPARAMETERFVPROC glad_glTextureParameterfv = NULL; -PFNGLTEXTUREPARAMETERIPROC glad_glTextureParameteri = NULL; -PFNGLTEXTUREPARAMETERIIVPROC glad_glTextureParameterIiv = NULL; -PFNGLTEXTUREPARAMETERIUIVPROC glad_glTextureParameterIuiv = NULL; -PFNGLTEXTUREPARAMETERIVPROC glad_glTextureParameteriv = NULL; -PFNGLGENERATETEXTUREMIPMAPPROC glad_glGenerateTextureMipmap = NULL; -PFNGLBINDTEXTUREUNITPROC glad_glBindTextureUnit = NULL; -PFNGLGETTEXTUREIMAGEPROC glad_glGetTextureImage = NULL; -PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC glad_glGetCompressedTextureImage = NULL; -PFNGLGETTEXTURELEVELPARAMETERFVPROC glad_glGetTextureLevelParameterfv = NULL; -PFNGLGETTEXTURELEVELPARAMETERIVPROC glad_glGetTextureLevelParameteriv = NULL; -PFNGLGETTEXTUREPARAMETERFVPROC glad_glGetTextureParameterfv = NULL; -PFNGLGETTEXTUREPARAMETERIIVPROC glad_glGetTextureParameterIiv = NULL; -PFNGLGETTEXTUREPARAMETERIUIVPROC glad_glGetTextureParameterIuiv = NULL; -PFNGLGETTEXTUREPARAMETERIVPROC glad_glGetTextureParameteriv = NULL; -PFNGLCREATEVERTEXARRAYSPROC glad_glCreateVertexArrays = NULL; -PFNGLDISABLEVERTEXARRAYATTRIBPROC glad_glDisableVertexArrayAttrib = NULL; -PFNGLENABLEVERTEXARRAYATTRIBPROC glad_glEnableVertexArrayAttrib = NULL; -PFNGLVERTEXARRAYELEMENTBUFFERPROC glad_glVertexArrayElementBuffer = NULL; -PFNGLVERTEXARRAYVERTEXBUFFERPROC glad_glVertexArrayVertexBuffer = NULL; -PFNGLVERTEXARRAYVERTEXBUFFERSPROC glad_glVertexArrayVertexBuffers = NULL; -PFNGLVERTEXARRAYATTRIBBINDINGPROC glad_glVertexArrayAttribBinding = NULL; -PFNGLVERTEXARRAYATTRIBFORMATPROC glad_glVertexArrayAttribFormat = NULL; -PFNGLVERTEXARRAYATTRIBIFORMATPROC glad_glVertexArrayAttribIFormat = NULL; -PFNGLVERTEXARRAYATTRIBLFORMATPROC glad_glVertexArrayAttribLFormat = NULL; -PFNGLVERTEXARRAYBINDINGDIVISORPROC glad_glVertexArrayBindingDivisor = NULL; -PFNGLGETVERTEXARRAYIVPROC glad_glGetVertexArrayiv = NULL; -PFNGLGETVERTEXARRAYINDEXEDIVPROC glad_glGetVertexArrayIndexediv = NULL; -PFNGLGETVERTEXARRAYINDEXED64IVPROC glad_glGetVertexArrayIndexed64iv = NULL; -PFNGLCREATESAMPLERSPROC glad_glCreateSamplers = NULL; -PFNGLCREATEPROGRAMPIPELINESPROC glad_glCreateProgramPipelines = NULL; -PFNGLCREATEQUERIESPROC glad_glCreateQueries = NULL; -PFNGLGETQUERYBUFFEROBJECTI64VPROC glad_glGetQueryBufferObjecti64v = NULL; -PFNGLGETQUERYBUFFEROBJECTIVPROC glad_glGetQueryBufferObjectiv = NULL; -PFNGLGETQUERYBUFFEROBJECTUI64VPROC glad_glGetQueryBufferObjectui64v = NULL; -PFNGLGETQUERYBUFFEROBJECTUIVPROC glad_glGetQueryBufferObjectuiv = NULL; +int GLAD_GL_EXT_texture_compression_s3tc = 0; PFNGLBUFFERSTORAGEEXTPROC glad_glBufferStorageEXT = NULL; static void load_GL_VERSION_1_0(GLADloadproc load) { if(!GLAD_GL_VERSION_1_0) return; @@ -1590,6 +1619,138 @@ static void load_GL_VERSION_4_4(GLADloadproc load) { glad_glBindImageTextures = (PFNGLBINDIMAGETEXTURESPROC)load("glBindImageTextures"); glad_glBindVertexBuffers = (PFNGLBINDVERTEXBUFFERSPROC)load("glBindVertexBuffers"); } +static void load_GL_VERSION_4_5(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_5) return; + glad_glClipControl = (PFNGLCLIPCONTROLPROC)load("glClipControl"); + glad_glCreateTransformFeedbacks = (PFNGLCREATETRANSFORMFEEDBACKSPROC)load("glCreateTransformFeedbacks"); + glad_glTransformFeedbackBufferBase = (PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC)load("glTransformFeedbackBufferBase"); + glad_glTransformFeedbackBufferRange = (PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC)load("glTransformFeedbackBufferRange"); + glad_glGetTransformFeedbackiv = (PFNGLGETTRANSFORMFEEDBACKIVPROC)load("glGetTransformFeedbackiv"); + glad_glGetTransformFeedbacki_v = (PFNGLGETTRANSFORMFEEDBACKI_VPROC)load("glGetTransformFeedbacki_v"); + glad_glGetTransformFeedbacki64_v = (PFNGLGETTRANSFORMFEEDBACKI64_VPROC)load("glGetTransformFeedbacki64_v"); + glad_glCreateBuffers = (PFNGLCREATEBUFFERSPROC)load("glCreateBuffers"); + glad_glNamedBufferStorage = (PFNGLNAMEDBUFFERSTORAGEPROC)load("glNamedBufferStorage"); + glad_glNamedBufferData = (PFNGLNAMEDBUFFERDATAPROC)load("glNamedBufferData"); + glad_glNamedBufferSubData = (PFNGLNAMEDBUFFERSUBDATAPROC)load("glNamedBufferSubData"); + glad_glCopyNamedBufferSubData = (PFNGLCOPYNAMEDBUFFERSUBDATAPROC)load("glCopyNamedBufferSubData"); + glad_glClearNamedBufferData = (PFNGLCLEARNAMEDBUFFERDATAPROC)load("glClearNamedBufferData"); + glad_glClearNamedBufferSubData = (PFNGLCLEARNAMEDBUFFERSUBDATAPROC)load("glClearNamedBufferSubData"); + glad_glMapNamedBuffer = (PFNGLMAPNAMEDBUFFERPROC)load("glMapNamedBuffer"); + glad_glMapNamedBufferRange = (PFNGLMAPNAMEDBUFFERRANGEPROC)load("glMapNamedBufferRange"); + glad_glUnmapNamedBuffer = (PFNGLUNMAPNAMEDBUFFERPROC)load("glUnmapNamedBuffer"); + glad_glFlushMappedNamedBufferRange = (PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC)load("glFlushMappedNamedBufferRange"); + glad_glGetNamedBufferParameteriv = (PFNGLGETNAMEDBUFFERPARAMETERIVPROC)load("glGetNamedBufferParameteriv"); + glad_glGetNamedBufferParameteri64v = (PFNGLGETNAMEDBUFFERPARAMETERI64VPROC)load("glGetNamedBufferParameteri64v"); + glad_glGetNamedBufferPointerv = (PFNGLGETNAMEDBUFFERPOINTERVPROC)load("glGetNamedBufferPointerv"); + glad_glGetNamedBufferSubData = (PFNGLGETNAMEDBUFFERSUBDATAPROC)load("glGetNamedBufferSubData"); + glad_glCreateFramebuffers = (PFNGLCREATEFRAMEBUFFERSPROC)load("glCreateFramebuffers"); + glad_glNamedFramebufferRenderbuffer = (PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC)load("glNamedFramebufferRenderbuffer"); + glad_glNamedFramebufferParameteri = (PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC)load("glNamedFramebufferParameteri"); + glad_glNamedFramebufferTexture = (PFNGLNAMEDFRAMEBUFFERTEXTUREPROC)load("glNamedFramebufferTexture"); + glad_glNamedFramebufferTextureLayer = (PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC)load("glNamedFramebufferTextureLayer"); + glad_glNamedFramebufferDrawBuffer = (PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC)load("glNamedFramebufferDrawBuffer"); + glad_glNamedFramebufferDrawBuffers = (PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC)load("glNamedFramebufferDrawBuffers"); + glad_glNamedFramebufferReadBuffer = (PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC)load("glNamedFramebufferReadBuffer"); + glad_glInvalidateNamedFramebufferData = (PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC)load("glInvalidateNamedFramebufferData"); + glad_glInvalidateNamedFramebufferSubData = (PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC)load("glInvalidateNamedFramebufferSubData"); + glad_glClearNamedFramebufferiv = (PFNGLCLEARNAMEDFRAMEBUFFERIVPROC)load("glClearNamedFramebufferiv"); + glad_glClearNamedFramebufferuiv = (PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC)load("glClearNamedFramebufferuiv"); + glad_glClearNamedFramebufferfv = (PFNGLCLEARNAMEDFRAMEBUFFERFVPROC)load("glClearNamedFramebufferfv"); + glad_glClearNamedFramebufferfi = (PFNGLCLEARNAMEDFRAMEBUFFERFIPROC)load("glClearNamedFramebufferfi"); + glad_glBlitNamedFramebuffer = (PFNGLBLITNAMEDFRAMEBUFFERPROC)load("glBlitNamedFramebuffer"); + glad_glCheckNamedFramebufferStatus = (PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC)load("glCheckNamedFramebufferStatus"); + glad_glGetNamedFramebufferParameteriv = (PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC)load("glGetNamedFramebufferParameteriv"); + glad_glGetNamedFramebufferAttachmentParameteriv = (PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetNamedFramebufferAttachmentParameteriv"); + glad_glCreateRenderbuffers = (PFNGLCREATERENDERBUFFERSPROC)load("glCreateRenderbuffers"); + glad_glNamedRenderbufferStorage = (PFNGLNAMEDRENDERBUFFERSTORAGEPROC)load("glNamedRenderbufferStorage"); + glad_glNamedRenderbufferStorageMultisample = (PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glNamedRenderbufferStorageMultisample"); + glad_glGetNamedRenderbufferParameteriv = (PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC)load("glGetNamedRenderbufferParameteriv"); + glad_glCreateTextures = (PFNGLCREATETEXTURESPROC)load("glCreateTextures"); + glad_glTextureBuffer = (PFNGLTEXTUREBUFFERPROC)load("glTextureBuffer"); + glad_glTextureBufferRange = (PFNGLTEXTUREBUFFERRANGEPROC)load("glTextureBufferRange"); + glad_glTextureStorage1D = (PFNGLTEXTURESTORAGE1DPROC)load("glTextureStorage1D"); + glad_glTextureStorage2D = (PFNGLTEXTURESTORAGE2DPROC)load("glTextureStorage2D"); + glad_glTextureStorage3D = (PFNGLTEXTURESTORAGE3DPROC)load("glTextureStorage3D"); + glad_glTextureStorage2DMultisample = (PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC)load("glTextureStorage2DMultisample"); + glad_glTextureStorage3DMultisample = (PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC)load("glTextureStorage3DMultisample"); + glad_glTextureSubImage1D = (PFNGLTEXTURESUBIMAGE1DPROC)load("glTextureSubImage1D"); + glad_glTextureSubImage2D = (PFNGLTEXTURESUBIMAGE2DPROC)load("glTextureSubImage2D"); + glad_glTextureSubImage3D = (PFNGLTEXTURESUBIMAGE3DPROC)load("glTextureSubImage3D"); + glad_glCompressedTextureSubImage1D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC)load("glCompressedTextureSubImage1D"); + glad_glCompressedTextureSubImage2D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC)load("glCompressedTextureSubImage2D"); + glad_glCompressedTextureSubImage3D = (PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC)load("glCompressedTextureSubImage3D"); + glad_glCopyTextureSubImage1D = (PFNGLCOPYTEXTURESUBIMAGE1DPROC)load("glCopyTextureSubImage1D"); + glad_glCopyTextureSubImage2D = (PFNGLCOPYTEXTURESUBIMAGE2DPROC)load("glCopyTextureSubImage2D"); + glad_glCopyTextureSubImage3D = (PFNGLCOPYTEXTURESUBIMAGE3DPROC)load("glCopyTextureSubImage3D"); + glad_glTextureParameterf = (PFNGLTEXTUREPARAMETERFPROC)load("glTextureParameterf"); + glad_glTextureParameterfv = (PFNGLTEXTUREPARAMETERFVPROC)load("glTextureParameterfv"); + glad_glTextureParameteri = (PFNGLTEXTUREPARAMETERIPROC)load("glTextureParameteri"); + glad_glTextureParameterIiv = (PFNGLTEXTUREPARAMETERIIVPROC)load("glTextureParameterIiv"); + glad_glTextureParameterIuiv = (PFNGLTEXTUREPARAMETERIUIVPROC)load("glTextureParameterIuiv"); + glad_glTextureParameteriv = (PFNGLTEXTUREPARAMETERIVPROC)load("glTextureParameteriv"); + glad_glGenerateTextureMipmap = (PFNGLGENERATETEXTUREMIPMAPPROC)load("glGenerateTextureMipmap"); + glad_glBindTextureUnit = (PFNGLBINDTEXTUREUNITPROC)load("glBindTextureUnit"); + glad_glGetTextureImage = (PFNGLGETTEXTUREIMAGEPROC)load("glGetTextureImage"); + glad_glGetCompressedTextureImage = (PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC)load("glGetCompressedTextureImage"); + glad_glGetTextureLevelParameterfv = (PFNGLGETTEXTURELEVELPARAMETERFVPROC)load("glGetTextureLevelParameterfv"); + glad_glGetTextureLevelParameteriv = (PFNGLGETTEXTURELEVELPARAMETERIVPROC)load("glGetTextureLevelParameteriv"); + glad_glGetTextureParameterfv = (PFNGLGETTEXTUREPARAMETERFVPROC)load("glGetTextureParameterfv"); + glad_glGetTextureParameterIiv = (PFNGLGETTEXTUREPARAMETERIIVPROC)load("glGetTextureParameterIiv"); + glad_glGetTextureParameterIuiv = (PFNGLGETTEXTUREPARAMETERIUIVPROC)load("glGetTextureParameterIuiv"); + glad_glGetTextureParameteriv = (PFNGLGETTEXTUREPARAMETERIVPROC)load("glGetTextureParameteriv"); + glad_glCreateVertexArrays = (PFNGLCREATEVERTEXARRAYSPROC)load("glCreateVertexArrays"); + glad_glDisableVertexArrayAttrib = (PFNGLDISABLEVERTEXARRAYATTRIBPROC)load("glDisableVertexArrayAttrib"); + glad_glEnableVertexArrayAttrib = (PFNGLENABLEVERTEXARRAYATTRIBPROC)load("glEnableVertexArrayAttrib"); + glad_glVertexArrayElementBuffer = (PFNGLVERTEXARRAYELEMENTBUFFERPROC)load("glVertexArrayElementBuffer"); + glad_glVertexArrayVertexBuffer = (PFNGLVERTEXARRAYVERTEXBUFFERPROC)load("glVertexArrayVertexBuffer"); + glad_glVertexArrayVertexBuffers = (PFNGLVERTEXARRAYVERTEXBUFFERSPROC)load("glVertexArrayVertexBuffers"); + glad_glVertexArrayAttribBinding = (PFNGLVERTEXARRAYATTRIBBINDINGPROC)load("glVertexArrayAttribBinding"); + glad_glVertexArrayAttribFormat = (PFNGLVERTEXARRAYATTRIBFORMATPROC)load("glVertexArrayAttribFormat"); + glad_glVertexArrayAttribIFormat = (PFNGLVERTEXARRAYATTRIBIFORMATPROC)load("glVertexArrayAttribIFormat"); + glad_glVertexArrayAttribLFormat = (PFNGLVERTEXARRAYATTRIBLFORMATPROC)load("glVertexArrayAttribLFormat"); + glad_glVertexArrayBindingDivisor = (PFNGLVERTEXARRAYBINDINGDIVISORPROC)load("glVertexArrayBindingDivisor"); + glad_glGetVertexArrayiv = (PFNGLGETVERTEXARRAYIVPROC)load("glGetVertexArrayiv"); + glad_glGetVertexArrayIndexediv = (PFNGLGETVERTEXARRAYINDEXEDIVPROC)load("glGetVertexArrayIndexediv"); + glad_glGetVertexArrayIndexed64iv = (PFNGLGETVERTEXARRAYINDEXED64IVPROC)load("glGetVertexArrayIndexed64iv"); + glad_glCreateSamplers = (PFNGLCREATESAMPLERSPROC)load("glCreateSamplers"); + glad_glCreateProgramPipelines = (PFNGLCREATEPROGRAMPIPELINESPROC)load("glCreateProgramPipelines"); + glad_glCreateQueries = (PFNGLCREATEQUERIESPROC)load("glCreateQueries"); + glad_glGetQueryBufferObjecti64v = (PFNGLGETQUERYBUFFEROBJECTI64VPROC)load("glGetQueryBufferObjecti64v"); + glad_glGetQueryBufferObjectiv = (PFNGLGETQUERYBUFFEROBJECTIVPROC)load("glGetQueryBufferObjectiv"); + glad_glGetQueryBufferObjectui64v = (PFNGLGETQUERYBUFFEROBJECTUI64VPROC)load("glGetQueryBufferObjectui64v"); + glad_glGetQueryBufferObjectuiv = (PFNGLGETQUERYBUFFEROBJECTUIVPROC)load("glGetQueryBufferObjectuiv"); + glad_glMemoryBarrierByRegion = (PFNGLMEMORYBARRIERBYREGIONPROC)load("glMemoryBarrierByRegion"); + glad_glGetTextureSubImage = (PFNGLGETTEXTURESUBIMAGEPROC)load("glGetTextureSubImage"); + glad_glGetCompressedTextureSubImage = (PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)load("glGetCompressedTextureSubImage"); + glad_glGetGraphicsResetStatus = (PFNGLGETGRAPHICSRESETSTATUSPROC)load("glGetGraphicsResetStatus"); + glad_glGetnCompressedTexImage = (PFNGLGETNCOMPRESSEDTEXIMAGEPROC)load("glGetnCompressedTexImage"); + glad_glGetnTexImage = (PFNGLGETNTEXIMAGEPROC)load("glGetnTexImage"); + glad_glGetnUniformdv = (PFNGLGETNUNIFORMDVPROC)load("glGetnUniformdv"); + glad_glGetnUniformfv = (PFNGLGETNUNIFORMFVPROC)load("glGetnUniformfv"); + glad_glGetnUniformiv = (PFNGLGETNUNIFORMIVPROC)load("glGetnUniformiv"); + glad_glGetnUniformuiv = (PFNGLGETNUNIFORMUIVPROC)load("glGetnUniformuiv"); + glad_glReadnPixels = (PFNGLREADNPIXELSPROC)load("glReadnPixels"); + glad_glGetnMapdv = (PFNGLGETNMAPDVPROC)load("glGetnMapdv"); + glad_glGetnMapfv = (PFNGLGETNMAPFVPROC)load("glGetnMapfv"); + glad_glGetnMapiv = (PFNGLGETNMAPIVPROC)load("glGetnMapiv"); + glad_glGetnPixelMapfv = (PFNGLGETNPIXELMAPFVPROC)load("glGetnPixelMapfv"); + glad_glGetnPixelMapuiv = (PFNGLGETNPIXELMAPUIVPROC)load("glGetnPixelMapuiv"); + glad_glGetnPixelMapusv = (PFNGLGETNPIXELMAPUSVPROC)load("glGetnPixelMapusv"); + glad_glGetnPolygonStipple = (PFNGLGETNPOLYGONSTIPPLEPROC)load("glGetnPolygonStipple"); + glad_glGetnColorTable = (PFNGLGETNCOLORTABLEPROC)load("glGetnColorTable"); + glad_glGetnConvolutionFilter = (PFNGLGETNCONVOLUTIONFILTERPROC)load("glGetnConvolutionFilter"); + glad_glGetnSeparableFilter = (PFNGLGETNSEPARABLEFILTERPROC)load("glGetnSeparableFilter"); + glad_glGetnHistogram = (PFNGLGETNHISTOGRAMPROC)load("glGetnHistogram"); + glad_glGetnMinmax = (PFNGLGETNMINMAXPROC)load("glGetnMinmax"); + glad_glTextureBarrier = (PFNGLTEXTUREBARRIERPROC)load("glTextureBarrier"); +} +static void load_GL_VERSION_4_6(GLADloadproc load) { + if(!GLAD_GL_VERSION_4_6) return; + glad_glSpecializeShader = (PFNGLSPECIALIZESHADERPROC)load("glSpecializeShader"); + glad_glMultiDrawArraysIndirectCount = (PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC)load("glMultiDrawArraysIndirectCount"); + glad_glMultiDrawElementsIndirectCount = (PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC)load("glMultiDrawElementsIndirectCount"); + glad_glPolygonOffsetClamp = (PFNGLPOLYGONOFFSETCLAMPPROC)load("glPolygonOffsetClamp"); +} static void load_GL_ARB_buffer_storage(GLADloadproc load) { if(!GLAD_GL_ARB_buffer_storage) return; glad_glBufferStorage = (PFNGLBUFFERSTORAGEPROC)load("glBufferStorage"); @@ -1698,6 +1859,8 @@ static int find_extensionsGL(void) { if (!get_exts()) return 0; GLAD_GL_ARB_buffer_storage = has_ext("GL_ARB_buffer_storage"); GLAD_GL_ARB_direct_state_access = has_ext("GL_ARB_direct_state_access"); + GLAD_GL_ARB_texture_compression_bptc = has_ext("GL_ARB_texture_compression_bptc"); + GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc"); free_exts(); return 1; } @@ -1755,9 +1918,11 @@ static void find_coreGL(void) { GLAD_GL_VERSION_4_2 = (major == 4 && minor >= 2) || major > 4; GLAD_GL_VERSION_4_3 = (major == 4 && minor >= 3) || major > 4; GLAD_GL_VERSION_4_4 = (major == 4 && minor >= 4) || major > 4; - if (GLVersion.major > 4 || (GLVersion.major >= 4 && GLVersion.minor >= 4)) { + GLAD_GL_VERSION_4_5 = (major == 4 && minor >= 5) || major > 4; + GLAD_GL_VERSION_4_6 = (major == 4 && minor >= 6) || major > 4; + if (GLVersion.major > 4 || (GLVersion.major >= 4 && GLVersion.minor >= 6)) { max_loaded_major = 4; - max_loaded_minor = 4; + max_loaded_minor = 6; } } @@ -1784,6 +1949,8 @@ int gladLoadGLLoader(GLADloadproc load) { load_GL_VERSION_4_2(load); load_GL_VERSION_4_3(load); load_GL_VERSION_4_4(load); + load_GL_VERSION_4_5(load); + load_GL_VERSION_4_6(load); if (!find_extensionsGL()) return 0; load_GL_ARB_buffer_storage(load); @@ -2169,6 +2336,7 @@ static int find_extensionsGLES2(void) { if (!get_exts()) return 0; GLAD_GL_EXT_buffer_storage = has_ext("GL_EXT_buffer_storage"); GLAD_GL_EXT_clip_cull_distance = has_ext("GL_EXT_clip_cull_distance"); + GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc"); free_exts(); return 1; } diff --git a/externals/libspng b/externals/libspng deleted file mode 160000 index 75c39ce09..000000000 --- a/externals/libspng +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 75c39ce0948d3b9c623a674ebfeb63a703a7c116 diff --git a/externals/libspng/CMakeLists.txt b/externals/libspng/CMakeLists.txt new file mode 100644 index 000000000..4568e10c2 --- /dev/null +++ b/externals/libspng/CMakeLists.txt @@ -0,0 +1,14 @@ +add_library(spng STATIC spng.h spng.c) +target_compile_definitions(spng PUBLIC SPNG_STATIC) +target_include_directories(spng PUBLIC ${CMAKE_CURRENT_LIST_DIR}) +target_link_libraries(spng PRIVATE ZLIB::ZLIB) + +# Enable SSE4.1 on x64 +if ("x86_64" IN_LIST ARCHITECTURE) + target_compile_definitions(spng PRIVATE SPNG_SSE=4) + if (NOT MSVC) + target_compile_options(spng PRIVATE -msse4.1) + endif() +endif() + +add_library(spng::spng ALIAS spng) diff --git a/externals/libspng/spng.c b/externals/libspng/spng.c new file mode 100644 index 000000000..f88492b3c --- /dev/null +++ b/externals/libspng/spng.c @@ -0,0 +1,6979 @@ +/* SPDX-License-Identifier: (BSD-2-Clause AND libpng-2.0) */ + +#define SPNG__BUILD + +#include "spng.h" + +#include +#include +#include +#include + +#define ZLIB_CONST + +#ifdef __FRAMAC__ + #define SPNG_DISABLE_OPT + #include "tests/framac_stubs.h" +#else + #ifdef SPNG_USE_MINIZ + #include + #else + #include + #endif +#endif + +#ifdef SPNG_MULTITHREADING + #include +#endif + +/* Not build options, edit at your own risk! */ +#define SPNG_READ_SIZE (8192) +#define SPNG_WRITE_SIZE SPNG_READ_SIZE +#define SPNG_MAX_CHUNK_COUNT (1000) + +#define SPNG_TARGET_CLONES(x) + +#ifndef SPNG_DISABLE_OPT + + #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) + #define SPNG_X86 + + #if defined(__x86_64__) || defined(_M_X64) + #define SPNG_X86_64 + #endif + + #elif defined(__aarch64__) || defined(_M_ARM64) /* || defined(__ARM_NEON) */ + #define SPNG_ARM /* NOTE: only arm64 builds are tested! */ + #else + #pragma message "disabling SIMD optimizations for unknown target" + #define SPNG_DISABLE_OPT + #endif + + #if defined(SPNG_X86_64) && defined(SPNG_ENABLE_TARGET_CLONES) + #undef SPNG_TARGET_CLONES + #define SPNG_TARGET_CLONES(x) __attribute__((target_clones(x))) + #else + #define SPNG_TARGET_CLONES(x) + #endif + + #ifndef SPNG_DISABLE_OPT + static void defilter_sub3(size_t rowbytes, unsigned char *row); + static void defilter_sub4(size_t rowbytes, unsigned char *row); + static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev); + static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev); + static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev); + static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev); + + #if defined(SPNG_ARM) + static uint32_t expand_palette_rgba8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width); + static uint32_t expand_palette_rgb8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width); + #endif + #endif +#endif + +#if defined(_MSC_VER) + #pragma warning(push) + #pragma warning(disable: 4244) +#endif + +#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || defined(__BIG_ENDIAN__) + #define SPNG_BIG_ENDIAN +#else + #define SPNG_LITTLE_ENDIAN +#endif + +enum spng_state +{ + SPNG_STATE_INVALID = 0, + SPNG_STATE_INIT = 1, /* No PNG buffer/stream is set */ + SPNG_STATE_INPUT, /* Decoder input PNG was set */ + SPNG_STATE_OUTPUT = SPNG_STATE_INPUT, /* Encoder output was set */ + SPNG_STATE_IHDR, /* IHDR was read/written */ + SPNG_STATE_FIRST_IDAT, /* Encoded up to / reached first IDAT */ + SPNG_STATE_DECODE_INIT, /* Decoder is ready for progressive reads */ + SPNG_STATE_ENCODE_INIT = SPNG_STATE_DECODE_INIT, + SPNG_STATE_EOI, /* Reached the last scanline/row */ + SPNG_STATE_LAST_IDAT, /* Reached last IDAT, set at end of decode_image() */ + SPNG_STATE_AFTER_IDAT, /* */ + SPNG_STATE_IEND, /* Reached IEND */ +}; + +enum spng__internal +{ + SPNG__IO_SIGNAL = 1 << 9, + SPNG__CTX_FLAGS_ALL = (SPNG_CTX_IGNORE_ADLER32 | SPNG_CTX_ENCODER) +}; + +#define SPNG_STR(x) _SPNG_STR(x) +#define _SPNG_STR(x) #x + +#define SPNG_VERSION_STRING SPNG_STR(SPNG_VERSION_MAJOR) "." \ + SPNG_STR(SPNG_VERSION_MINOR) "." \ + SPNG_STR(SPNG_VERSION_PATCH) + +#define SPNG_GET_CHUNK_BOILERPLATE(chunk) \ + if(ctx == NULL) return 1; \ + int ret = read_chunks(ctx, 0); \ + if(ret) return ret; \ + if(!ctx->stored.chunk) return SPNG_ECHUNKAVAIL; \ + if(chunk == NULL) return 1 + +#define SPNG_SET_CHUNK_BOILERPLATE(chunk) \ + if(ctx == NULL || chunk == NULL) return 1; \ + if(ctx->data == NULL && !ctx->encode_only) return SPNG_ENOSRC; \ + int ret = read_chunks(ctx, 0); \ + if(ret) return ret + +/* Determine if the spng_option can be overriden/optimized */ +#define spng__optimize(option) (ctx->optimize_option & (1 << option)) + +struct spng_subimage +{ + uint32_t width; + uint32_t height; + size_t out_width; /* byte width based on output format */ + size_t scanline_width; +}; + +struct spng_text2 +{ + int type; + char *keyword; + char *text; + + size_t text_length; + + uint8_t compression_flag; /* iTXt only */ + char *language_tag; /* iTXt only */ + char *translated_keyword; /* iTXt only */ + + size_t cache_usage; + char user_keyword_storage[80]; +}; + +struct decode_flags +{ + unsigned apply_trns: 1; + unsigned apply_gamma: 1; + unsigned use_sbit: 1; + unsigned indexed: 1; + unsigned do_scaling: 1; + unsigned interlaced: 1; + unsigned same_layout: 1; + unsigned zerocopy: 1; + unsigned unpack: 1; +}; + +struct encode_flags +{ + unsigned interlace: 1; + unsigned same_layout: 1; + unsigned to_bigendian: 1; + unsigned progressive: 1; + unsigned finalize: 1; + + enum spng_filter_choice filter_choice; +}; + +struct spng_chunk_bitfield +{ + unsigned ihdr: 1; + unsigned plte: 1; + unsigned chrm: 1; + unsigned iccp: 1; + unsigned gama: 1; + unsigned sbit: 1; + unsigned srgb: 1; + unsigned text: 1; + unsigned bkgd: 1; + unsigned hist: 1; + unsigned trns: 1; + unsigned phys: 1; + unsigned splt: 1; + unsigned time: 1; + unsigned offs: 1; + unsigned exif: 1; + unsigned unknown: 1; +}; + +/* Packed sample iterator */ +struct spng__iter +{ + const uint8_t mask; + unsigned shift_amount; + const unsigned initial_shift, bit_depth; + const unsigned char *samples; +}; + +union spng__decode_plte +{ + struct spng_plte_entry rgba[256]; + unsigned char rgb[256 * 3]; + unsigned char raw[256 * 4]; + uint32_t align_this; +}; + +struct spng__zlib_options +{ + int compression_level; + int window_bits; + int mem_level; + int strategy; + int data_type; +}; + +typedef void spng__undo(spng_ctx *ctx); + +struct spng_ctx +{ + size_t data_size; + size_t bytes_read; + size_t stream_buf_size; + unsigned char *stream_buf; + const unsigned char *data; + + /* User-defined pointers for streaming */ + spng_read_fn *read_fn; + spng_write_fn *write_fn; + void *stream_user_ptr; + + /* Used for buffer reads */ + const unsigned char *png_base; + size_t bytes_left; + size_t last_read_size; + + /* Used for encoding */ + int user_owns_out_png; + unsigned char *out_png; + unsigned char *write_ptr; + size_t out_png_size; + size_t bytes_encoded; + + /* These are updated by read/write_header()/read_chunk_bytes() */ + struct spng_chunk current_chunk; + uint32_t cur_chunk_bytes_left; + uint32_t cur_actual_crc; + + struct spng_alloc alloc; + + enum spng_ctx_flags flags; + enum spng_format fmt; + + enum spng_state state; + + unsigned streaming: 1; + unsigned internal_buffer: 1; /* encoding to internal buffer */ + + unsigned inflate: 1; + unsigned deflate: 1; + unsigned encode_only: 1; + unsigned strict: 1; + unsigned discard: 1; + unsigned skip_crc: 1; + unsigned keep_unknown: 1; + unsigned prev_was_idat: 1; + + struct spng__zlib_options image_options; + struct spng__zlib_options text_options; + + spng__undo *undo; + + /* input file contains this chunk */ + struct spng_chunk_bitfield file; + + /* chunk was stored with spng_set_*() */ + struct spng_chunk_bitfield user; + + /* chunk was stored by reading or with spng_set_*() */ + struct spng_chunk_bitfield stored; + + /* used to reset the above in case of an error */ + struct spng_chunk_bitfield prev_stored; + + struct spng_chunk first_idat, last_idat; + + uint32_t max_width, max_height; + + size_t max_chunk_size; + size_t chunk_cache_limit; + size_t chunk_cache_usage; + uint32_t chunk_count_limit; + uint32_t chunk_count_total; + + int crc_action_critical; + int crc_action_ancillary; + + uint32_t optimize_option; + + struct spng_ihdr ihdr; + + struct spng_plte plte; + + struct spng_chrm_int chrm_int; + struct spng_iccp iccp; + + uint32_t gama; + + struct spng_sbit sbit; + + uint8_t srgb_rendering_intent; + + uint32_t n_text; + struct spng_text2 *text_list; + + struct spng_bkgd bkgd; + struct spng_hist hist; + struct spng_trns trns; + struct spng_phys phys; + + uint32_t n_splt; + struct spng_splt *splt_list; + + struct spng_time time; + struct spng_offs offs; + struct spng_exif exif; + + uint32_t n_chunks; + struct spng_unknown_chunk *chunk_list; + + struct spng_subimage subimage[7]; + + z_stream zstream; + unsigned char *scanline_buf, *prev_scanline_buf, *row_buf, *filtered_scanline_buf; + unsigned char *scanline, *prev_scanline, *row, *filtered_scanline; + + /* based on fmt */ + size_t image_size; /* may be zero */ + size_t image_width; + + unsigned bytes_per_pixel; /* derived from ihdr */ + unsigned pixel_size; /* derived from spng_format+ihdr */ + int widest_pass; + int last_pass; /* last non-empty pass */ + + uint16_t *gamma_lut; /* points to either _lut8 or _lut16 */ + uint16_t *gamma_lut16; + uint16_t gamma_lut8[256]; + unsigned char trns_px[8]; + union spng__decode_plte decode_plte; + struct spng_sbit decode_sb; + struct decode_flags decode_flags; + struct spng_row_info row_info; + + struct encode_flags encode_flags; +}; + +static const uint32_t spng_u32max = INT32_MAX; + +static const uint32_t adam7_x_start[7] = { 0, 4, 0, 2, 0, 1, 0 }; +static const uint32_t adam7_y_start[7] = { 0, 0, 4, 0, 2, 0, 1 }; +static const uint32_t adam7_x_delta[7] = { 8, 8, 4, 4, 2, 2, 1 }; +static const uint32_t adam7_y_delta[7] = { 8, 8, 8, 4, 4, 2, 2 }; + +static const uint8_t spng_signature[8] = { 137, 80, 78, 71, 13, 10, 26, 10 }; + +static const uint8_t type_ihdr[4] = { 73, 72, 68, 82 }; +static const uint8_t type_plte[4] = { 80, 76, 84, 69 }; +static const uint8_t type_idat[4] = { 73, 68, 65, 84 }; +static const uint8_t type_iend[4] = { 73, 69, 78, 68 }; + +static const uint8_t type_trns[4] = { 116, 82, 78, 83 }; +static const uint8_t type_chrm[4] = { 99, 72, 82, 77 }; +static const uint8_t type_gama[4] = { 103, 65, 77, 65 }; +static const uint8_t type_iccp[4] = { 105, 67, 67, 80 }; +static const uint8_t type_sbit[4] = { 115, 66, 73, 84 }; +static const uint8_t type_srgb[4] = { 115, 82, 71, 66 }; +static const uint8_t type_text[4] = { 116, 69, 88, 116 }; +static const uint8_t type_ztxt[4] = { 122, 84, 88, 116 }; +static const uint8_t type_itxt[4] = { 105, 84, 88, 116 }; +static const uint8_t type_bkgd[4] = { 98, 75, 71, 68 }; +static const uint8_t type_hist[4] = { 104, 73, 83, 84 }; +static const uint8_t type_phys[4] = { 112, 72, 89, 115 }; +static const uint8_t type_splt[4] = { 115, 80, 76, 84 }; +static const uint8_t type_time[4] = { 116, 73, 77, 69 }; + +static const uint8_t type_offs[4] = { 111, 70, 70, 115 }; +static const uint8_t type_exif[4] = { 101, 88, 73, 102 }; + +static inline void *spng__malloc(spng_ctx *ctx, size_t size) +{ + return ctx->alloc.malloc_fn(size); +} + +static inline void *spng__calloc(spng_ctx *ctx, size_t nmemb, size_t size) +{ + return ctx->alloc.calloc_fn(nmemb, size); +} + +static inline void *spng__realloc(spng_ctx *ctx, void *ptr, size_t size) +{ + return ctx->alloc.realloc_fn(ptr, size); +} + +static inline void spng__free(spng_ctx *ctx, void *ptr) +{ + ctx->alloc.free_fn(ptr); +} + +#if defined(SPNG_USE_MINIZ) +static void *spng__zalloc(void *opaque, size_t items, size_t size) +#else +static void *spng__zalloc(void *opaque, uInt items, uInt size) +#endif +{ + spng_ctx *ctx = opaque; + + if(size > SIZE_MAX / items) return NULL; + + size_t len = (size_t)items * size; + + return spng__malloc(ctx, len); +} + +static void spng__zfree(void *opqaue, void *ptr) +{ + spng_ctx *ctx = opqaue; + spng__free(ctx, ptr); +} + +static inline uint16_t read_u16(const void *src) +{ + const unsigned char *data = src; + + return (data[0] & 0xFFU) << 8 | (data[1] & 0xFFU); +} + +static inline uint32_t read_u32(const void *src) +{ + const unsigned char *data = src; + + return (data[0] & 0xFFUL) << 24 | (data[1] & 0xFFUL) << 16 | + (data[2] & 0xFFUL) << 8 | (data[3] & 0xFFUL); +} + +static inline int32_t read_s32(const void *src) +{ + int32_t ret = (int32_t)read_u32(src); + + return ret; +} + +static inline void write_u16(void *dest, uint16_t x) +{ + unsigned char *data = dest; + + data[0] = x >> 8; + data[1] = x & 0xFF; +} + +static inline void write_u32(void *dest, uint32_t x) +{ + unsigned char *data = dest; + + data[0] = (x >> 24); + data[1] = (x >> 16) & 0xFF; + data[2] = (x >> 8) & 0xFF; + data[3] = x & 0xFF; +} + +static inline void write_s32(void *dest, int32_t x) +{ + uint32_t n = x; + write_u32(dest, n); +} + +/* Returns an iterator for 1,2,4,8-bit samples */ +static struct spng__iter spng__iter_init(unsigned bit_depth, const unsigned char *samples) +{ + struct spng__iter iter = + { + .mask = (uint32_t)(1 << bit_depth) - 1, + .shift_amount = 8 - bit_depth, + .initial_shift = 8 - bit_depth, + .bit_depth = bit_depth, + .samples = samples + }; + + return iter; +} + +/* Returns the current sample unpacked, iterates to the next one */ +static inline uint8_t get_sample(struct spng__iter *iter) +{ + uint8_t x = (iter->samples[0] >> iter->shift_amount) & iter->mask; + + iter->shift_amount -= iter->bit_depth; + + if(iter->shift_amount > 7) + { + iter->shift_amount = iter->initial_shift; + iter->samples++; + } + + return x; +} + +static void u16_row_to_host(void *row, size_t size) +{ + uint16_t *px = row; + size_t i, n = size / 2; + + for(i=0; i < n; i++) + { + px[i] = read_u16(&px[i]); + } +} + +static void u16_row_to_bigendian(void *row, size_t size) +{ + uint16_t *px = (uint16_t*)row; + size_t i, n = size / 2; + + for(i=0; i < n; i++) + { + write_u16(&px[i], px[i]); + } +} + +static void rgb8_row_to_rgba8(const unsigned char *row, unsigned char *out, uint32_t n) +{ + uint32_t i; + for(i=0; i < n; i++) + { + memcpy(out + i * 4, row + i * 3, 3); + out[i*4+3] = 255; + } +} + +static unsigned num_channels(const struct spng_ihdr *ihdr) +{ + switch(ihdr->color_type) + { + case SPNG_COLOR_TYPE_TRUECOLOR: return 3; + case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA: return 2; + case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA: return 4; + case SPNG_COLOR_TYPE_GRAYSCALE: + case SPNG_COLOR_TYPE_INDEXED: + return 1; + default: return 0; + } +} + +/* Calculate scanline width in bits, round up to the nearest byte */ +static int calculate_scanline_width(const struct spng_ihdr *ihdr, uint32_t width, size_t *scanline_width) +{ + if(ihdr == NULL || !width) return SPNG_EINTERNAL; + + size_t res = num_channels(ihdr) * ihdr->bit_depth; + + if(res > SIZE_MAX / width) return SPNG_EOVERFLOW; + res = res * width; + + res += 15; /* Filter byte + 7 for rounding */ + + if(res < 15) return SPNG_EOVERFLOW; + + res /= 8; + + if(res > UINT32_MAX) return SPNG_EOVERFLOW; + + *scanline_width = res; + + return 0; +} + +static int calculate_subimages(struct spng_ctx *ctx) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + + struct spng_ihdr *ihdr = &ctx->ihdr; + struct spng_subimage *sub = ctx->subimage; + + if(ihdr->interlace_method == 1) + { + sub[0].width = (ihdr->width + 7) >> 3; + sub[0].height = (ihdr->height + 7) >> 3; + sub[1].width = (ihdr->width + 3) >> 3; + sub[1].height = (ihdr->height + 7) >> 3; + sub[2].width = (ihdr->width + 3) >> 2; + sub[2].height = (ihdr->height + 3) >> 3; + sub[3].width = (ihdr->width + 1) >> 2; + sub[3].height = (ihdr->height + 3) >> 2; + sub[4].width = (ihdr->width + 1) >> 1; + sub[4].height = (ihdr->height + 1) >> 2; + sub[5].width = ihdr->width >> 1; + sub[5].height = (ihdr->height + 1) >> 1; + sub[6].width = ihdr->width; + sub[6].height = ihdr->height >> 1; + } + else + { + sub[0].width = ihdr->width; + sub[0].height = ihdr->height; + } + + int i; + for(i=0; i < 7; i++) + { + if(sub[i].width == 0 || sub[i].height == 0) continue; + + int ret = calculate_scanline_width(ihdr, sub[i].width, &sub[i].scanline_width); + if(ret) return ret; + + if(sub[ctx->widest_pass].scanline_width < sub[i].scanline_width) ctx->widest_pass = i; + + ctx->last_pass = i; + } + + return 0; +} + +static int check_decode_fmt(const struct spng_ihdr *ihdr, const int fmt) +{ + switch(fmt) + { + case SPNG_FMT_RGBA8: + case SPNG_FMT_RGBA16: + case SPNG_FMT_RGB8: + case SPNG_FMT_PNG: + case SPNG_FMT_RAW: + return 0; + case SPNG_FMT_G8: + case SPNG_FMT_GA8: + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8) return 0; + else return SPNG_EFMT; + case SPNG_FMT_GA16: + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth == 16) return 0; + else return SPNG_EFMT; + default: return SPNG_EFMT; + } +} + +static int calculate_image_width(const struct spng_ihdr *ihdr, int fmt, size_t *len) +{ + if(ihdr == NULL || len == NULL) return SPNG_EINTERNAL; + + size_t res = ihdr->width; + unsigned bytes_per_pixel; + + switch(fmt) + { + case SPNG_FMT_RGBA8: + case SPNG_FMT_GA16: + bytes_per_pixel = 4; + break; + case SPNG_FMT_RGBA16: + bytes_per_pixel = 8; + break; + case SPNG_FMT_RGB8: + bytes_per_pixel = 3; + break; + case SPNG_FMT_PNG: + case SPNG_FMT_RAW: + { + int ret = calculate_scanline_width(ihdr, ihdr->width, &res); + if(ret) return ret; + + res -= 1; /* exclude filter byte */ + bytes_per_pixel = 1; + break; + } + case SPNG_FMT_G8: + bytes_per_pixel = 1; + break; + case SPNG_FMT_GA8: + bytes_per_pixel = 2; + break; + default: return SPNG_EINTERNAL; + } + + if(res > SIZE_MAX / bytes_per_pixel) return SPNG_EOVERFLOW; + res = res * bytes_per_pixel; + + *len = res; + + return 0; +} + +static int calculate_image_size(const struct spng_ihdr *ihdr, int fmt, size_t *len) +{ + if(ihdr == NULL || len == NULL) return SPNG_EINTERNAL; + + size_t res = 0; + + int ret = calculate_image_width(ihdr, fmt, &res); + if(ret) return ret; + + if(res > SIZE_MAX / ihdr->height) return SPNG_EOVERFLOW; + res = res * ihdr->height; + + *len = res; + + return 0; +} + +static int increase_cache_usage(spng_ctx *ctx, size_t bytes, int new_chunk) +{ + if(ctx == NULL || !bytes) return SPNG_EINTERNAL; + + if(new_chunk) + { + ctx->chunk_count_total++; + if(ctx->chunk_count_total < 1) return SPNG_EOVERFLOW; + + if(ctx->chunk_count_total > ctx->chunk_count_limit) return SPNG_ECHUNK_LIMITS; + } + + size_t new_usage = ctx->chunk_cache_usage + bytes; + + if(new_usage < ctx->chunk_cache_usage) return SPNG_EOVERFLOW; + + if(new_usage > ctx->chunk_cache_limit) return SPNG_ECHUNK_LIMITS; + + ctx->chunk_cache_usage = new_usage; + + return 0; +} + +static int decrease_cache_usage(spng_ctx *ctx, size_t usage) +{ + if(ctx == NULL || !usage) return SPNG_EINTERNAL; + if(usage > ctx->chunk_cache_usage) return SPNG_EINTERNAL; + + ctx->chunk_cache_usage -= usage; + + return 0; +} + +static int is_critical_chunk(struct spng_chunk *chunk) +{ + if(chunk == NULL) return 0; + if((chunk->type[0] & (1 << 5)) == 0) return 1; + + return 0; +} + +static int decode_err(spng_ctx *ctx, int err) +{ + ctx->state = SPNG_STATE_INVALID; + + return err; +} + +static int encode_err(spng_ctx *ctx, int err) +{ + ctx->state = SPNG_STATE_INVALID; + + return err; +} + +static inline int read_data(spng_ctx *ctx, size_t bytes) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!bytes) return 0; + + if(ctx->streaming && (bytes > SPNG_READ_SIZE)) return SPNG_EINTERNAL; + + int ret = ctx->read_fn(ctx, ctx->stream_user_ptr, ctx->stream_buf, bytes); + + if(ret) + { + if(ret > 0 || ret < SPNG_IO_ERROR) ret = SPNG_IO_ERROR; + + return ret; + } + + ctx->bytes_read += bytes; + if(ctx->bytes_read < bytes) return SPNG_EOVERFLOW; + + return 0; +} + +/* Ensure there is enough space for encoding starting at ctx->write_ptr */ +static int require_bytes(spng_ctx *ctx, size_t bytes) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + + if(ctx->streaming) + { + if(bytes > ctx->stream_buf_size) + { + size_t new_size = ctx->stream_buf_size; + + /* Start at default IDAT size + header + crc */ + if(new_size < (SPNG_WRITE_SIZE + 12)) new_size = SPNG_WRITE_SIZE + 12; + + if(new_size < bytes) new_size = bytes; + + void *temp = spng__realloc(ctx, ctx->stream_buf, new_size); + + if(temp == NULL) return encode_err(ctx, SPNG_EMEM); + + ctx->stream_buf = temp; + ctx->stream_buf_size = bytes; + ctx->write_ptr = ctx->stream_buf; + } + + return 0; + } + + if(!ctx->internal_buffer) return SPNG_ENODST; + + size_t required = ctx->bytes_encoded + bytes; + if(required < bytes) return SPNG_EOVERFLOW; + + if(required > ctx->out_png_size) + { + size_t new_size = ctx->out_png_size; + + /* Start with a size that doesn't require a realloc() 100% of the time */ + if(new_size < (SPNG_WRITE_SIZE * 2)) new_size = SPNG_WRITE_SIZE * 2; + + /* Prefer the next power of two over the requested size */ + while(new_size < required) + { + if(new_size / SIZE_MAX > 2) return encode_err(ctx, SPNG_EOVERFLOW); + + new_size *= 2; + } + + void *temp = spng__realloc(ctx, ctx->out_png, new_size); + + if(temp == NULL) return encode_err(ctx, SPNG_EMEM); + + ctx->out_png = temp; + ctx->out_png_size = new_size; + ctx->write_ptr = ctx->out_png + ctx->bytes_encoded; + } + + return 0; +} + +static int write_data(spng_ctx *ctx, const void *data, size_t bytes) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!bytes) return 0; + + if(ctx->streaming) + { + if(bytes > SPNG_WRITE_SIZE) return SPNG_EINTERNAL; + + int ret = ctx->write_fn(ctx, ctx->stream_user_ptr, (void*)data, bytes); + + if(ret) + { + if(ret > 0 || ret < SPNG_IO_ERROR) ret = SPNG_IO_ERROR; + + return encode_err(ctx, ret); + } + } + else + { + int ret = require_bytes(ctx, bytes); + if(ret) return encode_err(ctx, ret); + + memcpy(ctx->write_ptr, data, bytes); + + ctx->write_ptr += bytes; + } + + ctx->bytes_encoded += bytes; + if(ctx->bytes_encoded < bytes) return SPNG_EOVERFLOW; + + return 0; +} + +static int write_header(spng_ctx *ctx, const uint8_t chunk_type[4], size_t chunk_length, unsigned char **data) +{ + if(ctx == NULL || chunk_type == NULL) return SPNG_EINTERNAL; + if(chunk_length > spng_u32max) return SPNG_EINTERNAL; + + size_t total = chunk_length + 12; + + int ret = require_bytes(ctx, total); + if(ret) return ret; + + uint32_t crc = crc32(0, NULL, 0); + ctx->current_chunk.crc = crc32(crc, chunk_type, 4); + + memcpy(&ctx->current_chunk.type, chunk_type, 4); + ctx->current_chunk.length = (uint32_t)chunk_length; + + if(!data) return SPNG_EINTERNAL; + + if(ctx->streaming) *data = ctx->stream_buf + 8; + else *data = ctx->write_ptr + 8; + + return 0; +} + +static int trim_chunk(spng_ctx *ctx, uint32_t length) +{ + if(length > spng_u32max) return SPNG_EINTERNAL; + if(length > ctx->current_chunk.length) return SPNG_EINTERNAL; + + ctx->current_chunk.length = length; + + return 0; +} + +static int finish_chunk(spng_ctx *ctx) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + + struct spng_chunk *chunk = &ctx->current_chunk; + + unsigned char *header; + unsigned char *chunk_data; + + if(ctx->streaming) + { + chunk_data = ctx->stream_buf + 8; + header = ctx->stream_buf; + } + else + { + chunk_data = ctx->write_ptr + 8; + header = ctx->write_ptr; + } + + write_u32(header, chunk->length); + memcpy(header + 4, chunk->type, 4); + + chunk->crc = crc32(chunk->crc, chunk_data, chunk->length); + + write_u32(chunk_data + chunk->length, chunk->crc); + + if(ctx->streaming) + { + const unsigned char *ptr = ctx->stream_buf; + uint32_t bytes_left = chunk->length + 12; + uint32_t len = 0; + + while(bytes_left) + { + ptr += len; + len = SPNG_WRITE_SIZE; + + if(len > bytes_left) len = bytes_left; + + int ret = write_data(ctx, ptr, len); + if(ret) return ret; + + bytes_left -= len; + } + } + else + { + ctx->bytes_encoded += chunk->length; + if(ctx->bytes_encoded < chunk->length) return SPNG_EOVERFLOW; + + ctx->bytes_encoded += 12; + if(ctx->bytes_encoded < 12) return SPNG_EOVERFLOW; + + ctx->write_ptr += chunk->length + 12; + } + + return 0; +} + +static int write_chunk(spng_ctx *ctx, const uint8_t type[4], const void *data, size_t length) +{ + if(ctx == NULL || type == NULL) return SPNG_EINTERNAL; + if(length && data == NULL) return SPNG_EINTERNAL; + + unsigned char *write_ptr; + + int ret = write_header(ctx, type, length, &write_ptr); + if(ret) return ret; + + if(length) memcpy(write_ptr, data, length); + + return finish_chunk(ctx); +} + +static int write_iend(spng_ctx *ctx) +{ + unsigned char iend_chunk[12] = { 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130 }; + return write_data(ctx, iend_chunk, 12); +} + +static int write_unknown_chunks(spng_ctx *ctx, enum spng_location location) +{ + if(!ctx->stored.unknown) return 0; + + const struct spng_unknown_chunk *chunk = ctx->chunk_list; + + uint32_t i; + for(i=0; i < ctx->n_chunks; i++, chunk++) + { + if(chunk->location != location) continue; + + int ret = write_chunk(ctx, chunk->type, chunk->data, chunk->length); + if(ret) return ret; + } + + return 0; +} + +/* Read and check the current chunk's crc, + returns -SPNG_CRC_DISCARD if the chunk should be discarded */ +static inline int read_and_check_crc(spng_ctx *ctx) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + + int ret; + ret = read_data(ctx, 4); + if(ret) return ret; + + ctx->current_chunk.crc = read_u32(ctx->data); + + if(ctx->skip_crc) return 0; + + if(ctx->cur_actual_crc != ctx->current_chunk.crc) + { + if(is_critical_chunk(&ctx->current_chunk)) + { + if(ctx->crc_action_critical == SPNG_CRC_USE) return 0; + } + else + { + if(ctx->crc_action_ancillary == SPNG_CRC_USE) return 0; + if(ctx->crc_action_ancillary == SPNG_CRC_DISCARD) return -SPNG_CRC_DISCARD; + } + + return SPNG_ECHUNK_CRC; + } + + return 0; +} + +/* Read and validate the current chunk's crc and the next chunk header */ +static inline int read_header(spng_ctx *ctx) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + + int ret; + struct spng_chunk chunk = { 0 }; + + ret = read_and_check_crc(ctx); + if(ret) + { + if(ret == -SPNG_CRC_DISCARD) + { + ctx->discard = 1; + } + else return ret; + } + + ret = read_data(ctx, 8); + if(ret) return ret; + + chunk.offset = ctx->bytes_read - 8; + + chunk.length = read_u32(ctx->data); + + memcpy(&chunk.type, ctx->data + 4, 4); + + if(chunk.length > spng_u32max) return SPNG_ECHUNK_STDLEN; + + ctx->cur_chunk_bytes_left = chunk.length; + + if(is_critical_chunk(&chunk) && ctx->crc_action_critical == SPNG_CRC_USE) ctx->skip_crc = 1; + else if(ctx->crc_action_ancillary == SPNG_CRC_USE) ctx->skip_crc = 1; + else ctx->skip_crc = 0; + + if(!ctx->skip_crc) + { + ctx->cur_actual_crc = crc32(0, NULL, 0); + ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, chunk.type, 4); + } + + ctx->current_chunk = chunk; + + return 0; +} + +/* Read chunk bytes and update crc */ +static int read_chunk_bytes(spng_ctx *ctx, uint32_t bytes) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!ctx->cur_chunk_bytes_left || !bytes) return SPNG_EINTERNAL; + if(bytes > ctx->cur_chunk_bytes_left) return SPNG_EINTERNAL; /* XXX: more specific error? */ + + int ret; + + ret = read_data(ctx, bytes); + if(ret) return ret; + + if(!ctx->skip_crc) ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, ctx->data, bytes); + + ctx->cur_chunk_bytes_left -= bytes; + + return ret; +} + +/* read_chunk_bytes() + read_data() with custom output buffer */ +static int read_chunk_bytes2(spng_ctx *ctx, void *out, uint32_t bytes) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!ctx->cur_chunk_bytes_left || !bytes) return SPNG_EINTERNAL; + if(bytes > ctx->cur_chunk_bytes_left) return SPNG_EINTERNAL; /* XXX: more specific error? */ + + int ret; + uint32_t len = bytes; + + if(ctx->streaming && len > SPNG_READ_SIZE) len = SPNG_READ_SIZE; + + while(bytes) + { + if(len > bytes) len = bytes; + + ret = ctx->read_fn(ctx, ctx->stream_user_ptr, out, len); + if(ret) return ret; + + if(!ctx->streaming) memcpy(out, ctx->data, len); + + ctx->bytes_read += len; + if(ctx->bytes_read < len) return SPNG_EOVERFLOW; + + if(!ctx->skip_crc) ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, out, len); + + ctx->cur_chunk_bytes_left -= len; + + out = (char*)out + len; + bytes -= len; + len = SPNG_READ_SIZE; + } + + return 0; +} + +static int discard_chunk_bytes(spng_ctx *ctx, uint32_t bytes) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!bytes) return 0; + + int ret; + + if(ctx->streaming) /* Do small, consecutive reads */ + { + while(bytes) + { + uint32_t len = SPNG_READ_SIZE; + + if(len > bytes) len = bytes; + + ret = read_chunk_bytes(ctx, len); + if(ret) return ret; + + bytes -= len; + } + } + else + { + ret = read_chunk_bytes(ctx, bytes); + if(ret) return ret; + } + + return 0; +} + +static int spng__inflate_init(spng_ctx *ctx, int window_bits) +{ + if(ctx->zstream.state) inflateEnd(&ctx->zstream); + + ctx->inflate = 1; + + ctx->zstream.zalloc = spng__zalloc; + ctx->zstream.zfree = spng__zfree; + ctx->zstream.opaque = ctx; + + if(inflateInit2(&ctx->zstream, window_bits) != Z_OK) return SPNG_EZLIB_INIT; + +#if ZLIB_VERNUM >= 0x1290 && !defined(SPNG_USE_MINIZ) + + int validate = 1; + + if(ctx->flags & SPNG_CTX_IGNORE_ADLER32) validate = 0; + + if(is_critical_chunk(&ctx->current_chunk)) + { + if(ctx->crc_action_critical == SPNG_CRC_USE) validate = 0; + } + else /* ancillary */ + { + if(ctx->crc_action_ancillary == SPNG_CRC_USE) validate = 0; + } + + if(inflateValidate(&ctx->zstream, validate)) return SPNG_EZLIB_INIT; + +#else /* This requires zlib >= 1.2.11 */ + #pragma message ("inflateValidate() not available, SPNG_CTX_IGNORE_ADLER32 will be ignored") +#endif + + return 0; +} + +static int spng__deflate_init(spng_ctx *ctx, struct spng__zlib_options *options) +{ + if(ctx->zstream.state) deflateEnd(&ctx->zstream); + + ctx->deflate = 1; + + z_stream *zstream = &ctx->zstream; + zstream->zalloc = spng__zalloc; + zstream->zfree = spng__zfree; + zstream->opaque = ctx; + zstream->data_type = options->data_type; + + int ret = deflateInit2(zstream, options->compression_level, Z_DEFLATED, options->window_bits, options->mem_level, options->strategy); + + if(ret != Z_OK) return SPNG_EZLIB_INIT; + + return 0; +} + +/* Inflate a zlib stream starting with start_buf if non-NULL, + continuing from the datastream till an end marker, + allocating and writing the inflated stream to *out, + leaving "extra" bytes at the end, final buffer length is *len. + + Takes into account the chunk size and cache limits. +*/ +static int spng__inflate_stream(spng_ctx *ctx, char **out, size_t *len, size_t extra, const void *start_buf, size_t start_len) +{ + int ret = spng__inflate_init(ctx, 15); + if(ret) return ret; + + size_t max = ctx->chunk_cache_limit - ctx->chunk_cache_usage; + + if(ctx->max_chunk_size < max) max = ctx->max_chunk_size; + + if(extra > max) return SPNG_ECHUNK_LIMITS; + max -= extra; + + uint32_t read_size; + size_t size = 8 * 1024; + void *t, *buf = spng__malloc(ctx, size); + + if(buf == NULL) return SPNG_EMEM; + + z_stream *stream = &ctx->zstream; + + if(start_buf != NULL && start_len) + { + stream->avail_in = (uInt)start_len; + stream->next_in = start_buf; + } + else + { + stream->avail_in = 0; + stream->next_in = NULL; + } + + stream->avail_out = (uInt)size; + stream->next_out = buf; + + while(ret != Z_STREAM_END) + { + ret = inflate(stream, Z_NO_FLUSH); + + if(ret == Z_STREAM_END) break; + + if(ret != Z_OK && ret != Z_BUF_ERROR) + { + ret = SPNG_EZLIB; + goto err; + } + + if(!stream->avail_out) /* Resize buffer */ + { + /* overflow or reached chunk/cache limit */ + if( (2 > SIZE_MAX / size) || (size > max / 2) ) + { + ret = SPNG_ECHUNK_LIMITS; + goto err; + } + + size *= 2; + + t = spng__realloc(ctx, buf, size); + if(t == NULL) goto mem; + + buf = t; + + stream->avail_out = (uInt)size / 2; + stream->next_out = (unsigned char*)buf + size / 2; + } + else if(!stream->avail_in) /* Read more chunk bytes */ + { + read_size = ctx->cur_chunk_bytes_left; + if(ctx->streaming && read_size > SPNG_READ_SIZE) read_size = SPNG_READ_SIZE; + + ret = read_chunk_bytes(ctx, read_size); + + if(ret) + { + if(!read_size) ret = SPNG_EZLIB; + + goto err; + } + + stream->avail_in = read_size; + stream->next_in = ctx->data; + } + } + + size = stream->total_out; + + if(!size) + { + ret = SPNG_EZLIB; + goto err; + } + + size += extra; + if(size < extra) goto mem; + + t = spng__realloc(ctx, buf, size); + if(t == NULL) goto mem; + + buf = t; + + (void)increase_cache_usage(ctx, size, 0); + + *out = buf; + *len = size; + + return 0; + +mem: + ret = SPNG_EMEM; +err: + spng__free(ctx, buf); + return ret; +} + +/* Read at least one byte from the IDAT stream */ +static int read_idat_bytes(spng_ctx *ctx, uint32_t *bytes_read) +{ + if(ctx == NULL || bytes_read == NULL) return SPNG_EINTERNAL; + if(memcmp(ctx->current_chunk.type, type_idat, 4)) return SPNG_EIDAT_TOO_SHORT; + + int ret; + uint32_t len; + + while(!ctx->cur_chunk_bytes_left) + { + ret = read_header(ctx); + if(ret) return ret; + + if(memcmp(ctx->current_chunk.type, type_idat, 4)) return SPNG_EIDAT_TOO_SHORT; + } + + if(ctx->streaming) + {/* TODO: estimate bytes to read for progressive reads */ + len = SPNG_READ_SIZE; + if(len > ctx->cur_chunk_bytes_left) len = ctx->cur_chunk_bytes_left; + } + else len = ctx->current_chunk.length; + + ret = read_chunk_bytes(ctx, len); + + *bytes_read = len; + + return ret; +} + +static int read_scanline_bytes(spng_ctx *ctx, unsigned char *dest, size_t len) +{ + if(ctx == NULL || dest == NULL) return SPNG_EINTERNAL; + + int ret = Z_OK; + uint32_t bytes_read; + + z_stream *zstream = &ctx->zstream; + + zstream->avail_out = (uInt)len; + zstream->next_out = dest; + + while(zstream->avail_out != 0) + { + ret = inflate(zstream, Z_NO_FLUSH); + + if(ret == Z_OK) continue; + + if(ret == Z_STREAM_END) /* Reached an end-marker */ + { + if(zstream->avail_out != 0) return SPNG_EIDAT_TOO_SHORT; + } + else if(ret == Z_BUF_ERROR) /* Read more IDAT bytes */ + { + ret = read_idat_bytes(ctx, &bytes_read); + if(ret) return ret; + + zstream->avail_in = bytes_read; + zstream->next_in = ctx->data; + } + else return SPNG_EIDAT_STREAM; + } + + return 0; +} + +static uint8_t paeth(uint8_t a, uint8_t b, uint8_t c) +{ + int16_t p = a + b - c; + int16_t pa = abs(p - a); + int16_t pb = abs(p - b); + int16_t pc = abs(p - c); + + if(pa <= pb && pa <= pc) return a; + else if(pb <= pc) return b; + + return c; +} + +SPNG_TARGET_CLONES("default,avx2") +static void defilter_up(size_t bytes, unsigned char *row, const unsigned char *prev) +{ + size_t i; + for(i=0; i < bytes; i++) + { + row[i] += prev[i]; + } +} + +/* Defilter *scanline in-place. + *prev_scanline and *scanline should point to the first pixel, + scanline_width is the width of the scanline including the filter byte. +*/ +static int defilter_scanline(const unsigned char *prev_scanline, unsigned char *scanline, + size_t scanline_width, unsigned bytes_per_pixel, unsigned filter) +{ + if(prev_scanline == NULL || scanline == NULL || !scanline_width) return SPNG_EINTERNAL; + + size_t i; + scanline_width--; + + if(filter == 0) return 0; + +#ifndef SPNG_DISABLE_OPT + if(filter == SPNG_FILTER_UP) goto no_opt; + + if(bytes_per_pixel == 4) + { + if(filter == SPNG_FILTER_SUB) + defilter_sub4(scanline_width, scanline); + else if(filter == SPNG_FILTER_AVERAGE) + defilter_avg4(scanline_width, scanline, prev_scanline); + else if(filter == SPNG_FILTER_PAETH) + defilter_paeth4(scanline_width, scanline, prev_scanline); + else return SPNG_EFILTER; + + return 0; + } + else if(bytes_per_pixel == 3) + { + if(filter == SPNG_FILTER_SUB) + defilter_sub3(scanline_width, scanline); + else if(filter == SPNG_FILTER_AVERAGE) + defilter_avg3(scanline_width, scanline, prev_scanline); + else if(filter == SPNG_FILTER_PAETH) + defilter_paeth3(scanline_width, scanline, prev_scanline); + else return SPNG_EFILTER; + + return 0; + } +no_opt: +#endif + + if(filter == SPNG_FILTER_UP) + { + defilter_up(scanline_width, scanline, prev_scanline); + return 0; + } + + for(i=0; i < scanline_width; i++) + { + uint8_t x, a, b, c; + + if(i >= bytes_per_pixel) + { + a = scanline[i - bytes_per_pixel]; + b = prev_scanline[i]; + c = prev_scanline[i - bytes_per_pixel]; + } + else /* First pixel in row */ + { + a = 0; + b = prev_scanline[i]; + c = 0; + } + + x = scanline[i]; + + switch(filter) + { + case SPNG_FILTER_SUB: + { + x = x + a; + break; + } + case SPNG_FILTER_AVERAGE: + { + uint16_t avg = (a + b) / 2; + x = x + avg; + break; + } + case SPNG_FILTER_PAETH: + { + x = x + paeth(a,b,c); + break; + } + } + + scanline[i] = x; + } + + return 0; +} + +static int filter_scanline(unsigned char *filtered, const unsigned char *prev_scanline, const unsigned char *scanline, + size_t scanline_width, unsigned bytes_per_pixel, const unsigned filter) +{ + if(prev_scanline == NULL || scanline == NULL || scanline_width <= 1) return SPNG_EINTERNAL; + + if(filter > 4) return SPNG_EFILTER; + if(filter == 0) return 0; + + scanline_width--; + + uint32_t i; + for(i=0; i < scanline_width; i++) + { + uint8_t x, a, b, c; + + if(i >= bytes_per_pixel) + { + a = scanline[i - bytes_per_pixel]; + b = prev_scanline[i]; + c = prev_scanline[i - bytes_per_pixel]; + } + else /* first pixel in row */ + { + a = 0; + b = prev_scanline[i]; + c = 0; + } + + x = scanline[i]; + + switch(filter) + { + case SPNG_FILTER_SUB: + { + x = x - a; + break; + } + case SPNG_FILTER_UP: + { + x = x - b; + break; + } + case SPNG_FILTER_AVERAGE: + { + uint16_t avg = (a + b) / 2; + x = x - avg; + break; + } + case SPNG_FILTER_PAETH: + { + x = x - paeth(a,b,c); + break; + } + } + + filtered[i] = x; + } + + return 0; +} + +static int32_t filter_sum(const unsigned char *prev_scanline, const unsigned char *scanline, + size_t size, unsigned bytes_per_pixel, const unsigned filter) +{ + /* prevent potential over/underflow, bails out at a width of ~8M pixels for RGBA8 */ + if(size > (INT32_MAX / 128)) return INT32_MAX; + + uint32_t i; + int32_t sum = 0; + uint8_t x, a, b, c; + + for(i=0; i < size; i++) + { + if(i >= bytes_per_pixel) + { + a = scanline[i - bytes_per_pixel]; + b = prev_scanline[i]; + c = prev_scanline[i - bytes_per_pixel]; + } + else /* first pixel in row */ + { + a = 0; + b = prev_scanline[i]; + c = 0; + } + + x = scanline[i]; + + switch(filter) + { + case SPNG_FILTER_NONE: + { + break; + } + case SPNG_FILTER_SUB: + { + x = x - a; + break; + } + case SPNG_FILTER_UP: + { + x = x - b; + break; + } + case SPNG_FILTER_AVERAGE: + { + uint16_t avg = (a + b) / 2; + x = x - avg; + break; + } + case SPNG_FILTER_PAETH: + { + x = x - paeth(a,b,c); + break; + } + } + + sum += 128 - abs((int)x - 128); + } + + return sum; +} + +static unsigned get_best_filter(const unsigned char *prev_scanline, const unsigned char *scanline, + size_t scanline_width, unsigned bytes_per_pixel, const int choices) +{ + if(!choices) return SPNG_FILTER_NONE; + + scanline_width--; + + int i; + unsigned int best_filter = 0; + enum spng_filter_choice flag; + int32_t sum, best_score = INT32_MAX; + int32_t filter_scores[5] = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX }; + + if( !(choices & (choices - 1)) ) + {/* only one choice/bit is set */ + for(i=0; i < 5; i++) + { + if(choices == 1 << (i + 3)) return i; + } + } + + for(i=0; i < 5; i++) + { + flag = 1 << (i + 3); + + if(choices & flag) sum = filter_sum(prev_scanline, scanline, scanline_width, bytes_per_pixel, i); + else continue; + + filter_scores[i] = abs(sum); + + if(filter_scores[i] < best_score) + { + best_score = filter_scores[i]; + best_filter = i; + } + } + + return best_filter; +} + +/* Scale "sbits" significant bits in "sample" from "bit_depth" to "target" + + "bit_depth" must be a valid PNG depth + "sbits" must be less than or equal to "bit_depth" + "target" must be between 1 and 16 +*/ +static uint16_t sample_to_target(uint16_t sample, unsigned bit_depth, unsigned sbits, unsigned target) +{ + if(bit_depth == sbits) + { + if(target == sbits) return sample; /* No scaling */ + }/* bit_depth > sbits */ + else sample = sample >> (bit_depth - sbits); /* Shift significant bits to bottom */ + + /* Downscale */ + if(target < sbits) return sample >> (sbits - target); + + /* Upscale using left bit replication */ + int8_t shift_amount = target - sbits; + uint16_t sample_bits = sample; + sample = 0; + + while(shift_amount >= 0) + { + sample = sample | (sample_bits << shift_amount); + shift_amount -= sbits; + } + + int8_t partial = shift_amount + (int8_t)sbits; + + if(partial != 0) sample = sample | (sample_bits >> abs(shift_amount)); + + return sample; +} + +static inline void gamma_correct_row(unsigned char *row, uint32_t pixels, int fmt, const uint16_t *gamma_lut) +{ + uint32_t i; + + if(fmt == SPNG_FMT_RGBA8) + { + unsigned char *px; + for(i=0; i < pixels; i++) + { + px = row + i * 4; + + px[0] = gamma_lut[px[0]]; + px[1] = gamma_lut[px[1]]; + px[2] = gamma_lut[px[2]]; + } + } + else if(fmt == SPNG_FMT_RGBA16) + { + for(i=0; i < pixels; i++) + { + uint16_t px[4]; + memcpy(px, row + i * 8, 8); + + px[0] = gamma_lut[px[0]]; + px[1] = gamma_lut[px[1]]; + px[2] = gamma_lut[px[2]]; + + memcpy(row + i * 8, px, 8); + } + } + else if(fmt == SPNG_FMT_RGB8) + { + unsigned char *px; + for(i=0; i < pixels; i++) + { + px = row + i * 3; + + px[0] = gamma_lut[px[0]]; + px[1] = gamma_lut[px[1]]; + px[2] = gamma_lut[px[2]]; + } + } +} + +/* Apply transparency to output row */ +static inline void trns_row(unsigned char *row, + const unsigned char *scanline, + const unsigned char *trns, + unsigned scanline_stride, + struct spng_ihdr *ihdr, + uint32_t pixels, + int fmt) +{ + uint32_t i; + unsigned row_stride; + unsigned depth = ihdr->bit_depth; + + if(fmt == SPNG_FMT_RGBA8) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) return; /* already applied in the decoding loop */ + + row_stride = 4; + for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride) + { + if(!memcmp(scanline, trns, scanline_stride)) row[3] = 0; + } + } + else if(fmt == SPNG_FMT_RGBA16) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) return; /* already applied in the decoding loop */ + + row_stride = 8; + for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride) + { + if(!memcmp(scanline, trns, scanline_stride)) memset(row + 6, 0, 2); + } + } + else if(fmt == SPNG_FMT_GA8) + { + row_stride = 2; + + if(depth == 16) + { + for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride) + { + if(!memcmp(scanline, trns, scanline_stride)) memset(row + 1, 0, 1); + } + } + else /* depth <= 8 */ + { + struct spng__iter iter = spng__iter_init(depth, scanline); + + for(i=0; i < pixels; i++, row+=row_stride) + { + if(trns[0] == get_sample(&iter)) row[1] = 0; + } + } + } + else if(fmt == SPNG_FMT_GA16) + { + row_stride = 4; + + if(depth == 16) + { + for(i=0; i< pixels; i++, scanline+=scanline_stride, row+=row_stride) + { + if(!memcmp(scanline, trns, 2)) memset(row + 2, 0, 2); + } + } + else + { + struct spng__iter iter = spng__iter_init(depth, scanline); + + for(i=0; i< pixels; i++, row+=row_stride) + { + if(trns[0] == get_sample(&iter)) memset(row + 2, 0, 2); + } + } + } + else return; +} + +static inline void scale_row(unsigned char *row, uint32_t pixels, int fmt, unsigned depth, const struct spng_sbit *sbit) +{ + uint32_t i; + + if(fmt == SPNG_FMT_RGBA8) + { + unsigned char px[4]; + for(i=0; i < pixels; i++) + { + memcpy(px, row + i * 4, 4); + + px[0] = sample_to_target(px[0], depth, sbit->red_bits, 8); + px[1] = sample_to_target(px[1], depth, sbit->green_bits, 8); + px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 8); + px[3] = sample_to_target(px[3], depth, sbit->alpha_bits, 8); + + memcpy(row + i * 4, px, 4); + } + } + else if(fmt == SPNG_FMT_RGBA16) + { + uint16_t px[4]; + for(i=0; i < pixels; i++) + { + memcpy(px, row + i * 8, 8); + + px[0] = sample_to_target(px[0], depth, sbit->red_bits, 16); + px[1] = sample_to_target(px[1], depth, sbit->green_bits, 16); + px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 16); + px[3] = sample_to_target(px[3], depth, sbit->alpha_bits, 16); + + memcpy(row + i * 8, px, 8); + } + } + else if(fmt == SPNG_FMT_RGB8) + { + unsigned char px[4]; + for(i=0; i < pixels; i++) + { + memcpy(px, row + i * 3, 3); + + px[0] = sample_to_target(px[0], depth, sbit->red_bits, 8); + px[1] = sample_to_target(px[1], depth, sbit->green_bits, 8); + px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 8); + + memcpy(row + i * 3, px, 3); + } + } + else if(fmt == SPNG_FMT_G8) + { + for(i=0; i < pixels; i++) + { + row[i] = sample_to_target(row[i], depth, sbit->grayscale_bits, 8); + } + } + else if(fmt == SPNG_FMT_GA8) + { + for(i=0; i < pixels; i++) + { + row[i*2] = sample_to_target(row[i*2], depth, sbit->grayscale_bits, 8); + } + } +} + +/* Expand to *row using 8-bit palette indices from *scanline */ +static void expand_row(unsigned char *row, + const unsigned char *scanline, + const union spng__decode_plte *decode_plte, + uint32_t width, + int fmt) +{ + uint32_t i = 0; + unsigned char *px; + unsigned char entry; + const struct spng_plte_entry *plte = decode_plte->rgba; + +#if defined(SPNG_ARM) + if(fmt == SPNG_FMT_RGBA8) i = expand_palette_rgba8_neon(row, scanline, decode_plte->raw, width); + else if(fmt == SPNG_FMT_RGB8) + { + i = expand_palette_rgb8_neon(row, scanline, decode_plte->raw, width); + + for(; i < width; i++) + {/* In this case the LUT is 3 bytes packed */ + px = row + i * 3; + entry = scanline[i]; + px[0] = decode_plte->raw[entry * 3 + 0]; + px[1] = decode_plte->raw[entry * 3 + 1]; + px[2] = decode_plte->raw[entry * 3 + 2]; + } + return; + } +#endif + + if(fmt == SPNG_FMT_RGBA8) + { + for(; i < width; i++) + { + px = row + i * 4; + entry = scanline[i]; + px[0] = plte[entry].red; + px[1] = plte[entry].green; + px[2] = plte[entry].blue; + px[3] = plte[entry].alpha; + } + } + else if(fmt == SPNG_FMT_RGB8) + { + for(; i < width; i++) + { + px = row + i * 3; + entry = scanline[i]; + px[0] = plte[entry].red; + px[1] = plte[entry].green; + px[2] = plte[entry].blue; + } + } +} + +/* Unpack 1/2/4/8-bit samples to G8/GA8/GA16 or G16 -> GA16 */ +static void unpack_scanline(unsigned char *out, const unsigned char *scanline, uint32_t width, unsigned bit_depth, int fmt) +{ + struct spng__iter iter = spng__iter_init(bit_depth, scanline); + uint32_t i; + uint16_t sample, alpha = 65535; + + + if(fmt == SPNG_FMT_GA8) goto ga8; + else if(fmt == SPNG_FMT_GA16) goto ga16; + + /* 1/2/4-bit -> 8-bit */ + for(i=0; i < width; i++) out[i] = get_sample(&iter); + + return; + +ga8: + /* 1/2/4/8-bit -> GA8 */ + for(i=0; i < width; i++) + { + out[i*2] = get_sample(&iter); + out[i*2 + 1] = 255; + } + + return; + +ga16: + + /* 16 -> GA16 */ + if(bit_depth == 16) + { + for(i=0; i < width; i++) + { + memcpy(out + i * 4, scanline + i * 2, 2); + memcpy(out + i * 4 + 2, &alpha, 2); + } + return; + } + + /* 1/2/4/8-bit -> GA16 */ + for(i=0; i < width; i++) + { + sample = get_sample(&iter); + memcpy(out + i * 4, &sample, 2); + memcpy(out + i * 4 + 2, &alpha, 2); + } +} + +static int check_ihdr(const struct spng_ihdr *ihdr, uint32_t max_width, uint32_t max_height) +{ + if(ihdr->width > spng_u32max || !ihdr->width) return SPNG_EWIDTH; + if(ihdr->height > spng_u32max || !ihdr->height) return SPNG_EHEIGHT; + + if(ihdr->width > max_width) return SPNG_EUSER_WIDTH; + if(ihdr->height > max_height) return SPNG_EUSER_HEIGHT; + + switch(ihdr->color_type) + { + case SPNG_COLOR_TYPE_GRAYSCALE: + { + if( !(ihdr->bit_depth == 1 || ihdr->bit_depth == 2 || + ihdr->bit_depth == 4 || ihdr->bit_depth == 8 || + ihdr->bit_depth == 16) ) + return SPNG_EBIT_DEPTH; + + break; + } + case SPNG_COLOR_TYPE_TRUECOLOR: + case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA: + case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + if( !(ihdr->bit_depth == 8 || ihdr->bit_depth == 16) ) + return SPNG_EBIT_DEPTH; + + break; + } + case SPNG_COLOR_TYPE_INDEXED: + { + if( !(ihdr->bit_depth == 1 || ihdr->bit_depth == 2 || + ihdr->bit_depth == 4 || ihdr->bit_depth == 8) ) + return SPNG_EBIT_DEPTH; + + break; + } + default: return SPNG_ECOLOR_TYPE; + } + + if(ihdr->compression_method) return SPNG_ECOMPRESSION_METHOD; + if(ihdr->filter_method) return SPNG_EFILTER_METHOD; + + if(ihdr->interlace_method > 1) return SPNG_EINTERLACE_METHOD; + + return 0; +} + +static int check_plte(const struct spng_plte *plte, const struct spng_ihdr *ihdr) +{ + if(plte == NULL || ihdr == NULL) return 1; + + if(plte->n_entries == 0) return 1; + if(plte->n_entries > 256) return 1; + + if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED) + { + if(plte->n_entries > (1U << ihdr->bit_depth)) return 1; + } + + return 0; +} + +static int check_sbit(const struct spng_sbit *sbit, const struct spng_ihdr *ihdr) +{ + if(sbit == NULL || ihdr == NULL) return 1; + + if(ihdr->color_type == 0) + { + if(sbit->grayscale_bits == 0) return SPNG_ESBIT; + if(sbit->grayscale_bits > ihdr->bit_depth) return SPNG_ESBIT; + } + else if(ihdr->color_type == 2 || ihdr->color_type == 3) + { + if(sbit->red_bits == 0) return SPNG_ESBIT; + if(sbit->green_bits == 0) return SPNG_ESBIT; + if(sbit->blue_bits == 0) return SPNG_ESBIT; + + uint8_t bit_depth; + if(ihdr->color_type == 3) bit_depth = 8; + else bit_depth = ihdr->bit_depth; + + if(sbit->red_bits > bit_depth) return SPNG_ESBIT; + if(sbit->green_bits > bit_depth) return SPNG_ESBIT; + if(sbit->blue_bits > bit_depth) return SPNG_ESBIT; + } + else if(ihdr->color_type == 4) + { + if(sbit->grayscale_bits == 0) return SPNG_ESBIT; + if(sbit->alpha_bits == 0) return SPNG_ESBIT; + + if(sbit->grayscale_bits > ihdr->bit_depth) return SPNG_ESBIT; + if(sbit->alpha_bits > ihdr->bit_depth) return SPNG_ESBIT; + } + else if(ihdr->color_type == 6) + { + if(sbit->red_bits == 0) return SPNG_ESBIT; + if(sbit->green_bits == 0) return SPNG_ESBIT; + if(sbit->blue_bits == 0) return SPNG_ESBIT; + if(sbit->alpha_bits == 0) return SPNG_ESBIT; + + if(sbit->red_bits > ihdr->bit_depth) return SPNG_ESBIT; + if(sbit->green_bits > ihdr->bit_depth) return SPNG_ESBIT; + if(sbit->blue_bits > ihdr->bit_depth) return SPNG_ESBIT; + if(sbit->alpha_bits > ihdr->bit_depth) return SPNG_ESBIT; + } + + return 0; +} + +static int check_chrm_int(const struct spng_chrm_int *chrm_int) +{ + if(chrm_int == NULL) return 1; + + if(chrm_int->white_point_x > spng_u32max || + chrm_int->white_point_y > spng_u32max || + chrm_int->red_x > spng_u32max || + chrm_int->red_y > spng_u32max || + chrm_int->green_x > spng_u32max || + chrm_int->green_y > spng_u32max || + chrm_int->blue_x > spng_u32max || + chrm_int->blue_y > spng_u32max) return SPNG_ECHRM; + + return 0; +} + +static int check_phys(const struct spng_phys *phys) +{ + if(phys == NULL) return 1; + + if(phys->unit_specifier > 1) return SPNG_EPHYS; + + if(phys->ppu_x > spng_u32max) return SPNG_EPHYS; + if(phys->ppu_y > spng_u32max) return SPNG_EPHYS; + + return 0; +} + +static int check_time(const struct spng_time *time) +{ + if(time == NULL) return 1; + + if(time->month == 0 || time->month > 12) return 1; + if(time->day == 0 || time->day > 31) return 1; + if(time->hour > 23) return 1; + if(time->minute > 59) return 1; + if(time->second > 60) return 1; + + return 0; +} + +static int check_offs(const struct spng_offs *offs) +{ + if(offs == NULL) return 1; + + if(offs->unit_specifier > 1) return 1; + + return 0; +} + +static int check_exif(const struct spng_exif *exif) +{ + if(exif == NULL) return 1; + if(exif->data == NULL) return 1; + + if(exif->length < 4) return SPNG_ECHUNK_SIZE; + if(exif->length > spng_u32max) return SPNG_ECHUNK_STDLEN; + + const uint8_t exif_le[4] = { 73, 73, 42, 0 }; + const uint8_t exif_be[4] = { 77, 77, 0, 42 }; + + if(memcmp(exif->data, exif_le, 4) && memcmp(exif->data, exif_be, 4)) return 1; + + return 0; +} + +/* Validate PNG keyword */ +static int check_png_keyword(const char *str) +{ + if(str == NULL) return 1; + size_t len = strlen(str); + const char *end = str + len; + + if(!len) return 1; + if(len > 79) return 1; + if(str[0] == ' ') return 1; /* Leading space */ + if(end[-1] == ' ') return 1; /* Trailing space */ + if(strstr(str, " ") != NULL) return 1; /* Consecutive spaces */ + + uint8_t c; + while(str != end) + { + memcpy(&c, str, 1); + + if( (c >= 32 && c <= 126) || (c >= 161) ) str++; + else return 1; /* Invalid character */ + } + + return 0; +} + +/* Validate PNG text *str up to 'len' bytes */ +static int check_png_text(const char *str, size_t len) +{/* XXX: are consecutive newlines permitted? */ + if(str == NULL || len == 0) return 1; + + uint8_t c; + size_t i = 0; + while(i < len) + { + memcpy(&c, str + i, 1); + + if( (c >= 32 && c <= 126) || (c >= 161) || c == 10) i++; + else return 1; /* Invalid character */ + } + + return 0; +} + +/* Returns non-zero for standard chunks which are stored without allocating memory */ +static int is_small_chunk(uint8_t type[4]) +{ + if(!memcmp(type, type_plte, 4)) return 1; + else if(!memcmp(type, type_chrm, 4)) return 1; + else if(!memcmp(type, type_gama, 4)) return 1; + else if(!memcmp(type, type_sbit, 4)) return 1; + else if(!memcmp(type, type_srgb, 4)) return 1; + else if(!memcmp(type, type_bkgd, 4)) return 1; + else if(!memcmp(type, type_trns, 4)) return 1; + else if(!memcmp(type, type_hist, 4)) return 1; + else if(!memcmp(type, type_phys, 4)) return 1; + else if(!memcmp(type, type_time, 4)) return 1; + else if(!memcmp(type, type_offs, 4)) return 1; + else return 0; +} + +static int read_ihdr(spng_ctx *ctx) +{ + int ret; + struct spng_chunk *chunk = &ctx->current_chunk; + const unsigned char *data; + + chunk->offset = 8; + chunk->length = 13; + size_t sizeof_sig_ihdr = 29; + + ret = read_data(ctx, sizeof_sig_ihdr); + if(ret) return ret; + + data = ctx->data; + + if(memcmp(data, spng_signature, sizeof(spng_signature))) return SPNG_ESIGNATURE; + + chunk->length = read_u32(data + 8); + memcpy(&chunk->type, data + 12, 4); + + if(chunk->length != 13) return SPNG_EIHDR_SIZE; + if(memcmp(chunk->type, type_ihdr, 4)) return SPNG_ENOIHDR; + + ctx->cur_actual_crc = crc32(0, NULL, 0); + ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, data + 12, 17); + + ctx->ihdr.width = read_u32(data + 16); + ctx->ihdr.height = read_u32(data + 20); + ctx->ihdr.bit_depth = data[24]; + ctx->ihdr.color_type = data[25]; + ctx->ihdr.compression_method = data[26]; + ctx->ihdr.filter_method = data[27]; + ctx->ihdr.interlace_method = data[28]; + + ret = check_ihdr(&ctx->ihdr, ctx->max_width, ctx->max_height); + if(ret) return ret; + + ctx->file.ihdr = 1; + ctx->stored.ihdr = 1; + + if(ctx->ihdr.bit_depth < 8) ctx->bytes_per_pixel = 1; + else ctx->bytes_per_pixel = num_channels(&ctx->ihdr) * (ctx->ihdr.bit_depth / 8); + + ret = calculate_subimages(ctx); + if(ret) return ret; + + return 0; +} + +static void splt_undo(spng_ctx *ctx) +{ + struct spng_splt *splt = &ctx->splt_list[ctx->n_splt - 1]; + + spng__free(ctx, splt->entries); + + decrease_cache_usage(ctx, sizeof(struct spng_splt)); + decrease_cache_usage(ctx, splt->n_entries * sizeof(struct spng_splt_entry)); + + splt->entries = NULL; + + ctx->n_splt--; +} + +static void text_undo(spng_ctx *ctx) +{ + struct spng_text2 *text = &ctx->text_list[ctx->n_text - 1]; + + spng__free(ctx, text->keyword); + if(text->compression_flag) spng__free(ctx, text->text); + + decrease_cache_usage(ctx, text->cache_usage); + decrease_cache_usage(ctx, sizeof(struct spng_text2)); + + text->keyword = NULL; + text->text = NULL; + + ctx->n_text--; +} + +static void chunk_undo(spng_ctx *ctx) +{ + struct spng_unknown_chunk *chunk = &ctx->chunk_list[ctx->n_chunks - 1]; + + spng__free(ctx, chunk->data); + + decrease_cache_usage(ctx, chunk->length); + decrease_cache_usage(ctx, sizeof(struct spng_unknown_chunk)); + + chunk->data = NULL; + + ctx->n_chunks--; +} + +static int read_non_idat_chunks(spng_ctx *ctx) +{ + int ret; + struct spng_chunk chunk; + const unsigned char *data; + + ctx->discard = 0; + ctx->undo = NULL; + ctx->prev_stored = ctx->stored; + + while( !(ret = read_header(ctx))) + { + if(ctx->discard) + { + if(ctx->undo) ctx->undo(ctx); + + ctx->stored = ctx->prev_stored; + } + + ctx->discard = 0; + ctx->undo = NULL; + + ctx->prev_stored = ctx->stored; + chunk = ctx->current_chunk; + + if(!memcmp(chunk.type, type_idat, 4)) + { + if(ctx->state < SPNG_STATE_FIRST_IDAT) + { + if(ctx->ihdr.color_type == 3 && !ctx->stored.plte) return SPNG_ENOPLTE; + + ctx->first_idat = chunk; + return 0; + } + + if(ctx->prev_was_idat) + { + /* Ignore extra IDAT's */ + ret = discard_chunk_bytes(ctx, chunk.length); + if(ret) return ret; + + continue; + } + else return SPNG_ECHUNK_POS; /* IDAT chunk not at the end of the IDAT sequence */ + } + + ctx->prev_was_idat = 0; + + if(is_small_chunk(chunk.type)) + { + /* None of the known chunks can be zero length */ + if(!chunk.length) return SPNG_ECHUNK_SIZE; + + /* The largest of these chunks is PLTE with 256 entries */ + ret = read_chunk_bytes(ctx, chunk.length > 768 ? 768 : chunk.length); + if(ret) return ret; + } + + data = ctx->data; + + if(is_critical_chunk(&chunk)) + { + if(!memcmp(chunk.type, type_plte, 4)) + { + if(ctx->file.trns || ctx->file.hist || ctx->file.bkgd) return SPNG_ECHUNK_POS; + if(chunk.length % 3 != 0) return SPNG_ECHUNK_SIZE; + + ctx->plte.n_entries = chunk.length / 3; + + if(check_plte(&ctx->plte, &ctx->ihdr)) return SPNG_ECHUNK_SIZE; /* XXX: EPLTE? */ + + size_t i; + for(i=0; i < ctx->plte.n_entries; i++) + { + ctx->plte.entries[i].red = data[i * 3]; + ctx->plte.entries[i].green = data[i * 3 + 1]; + ctx->plte.entries[i].blue = data[i * 3 + 2]; + } + + ctx->file.plte = 1; + ctx->stored.plte = 1; + } + else if(!memcmp(chunk.type, type_iend, 4)) + { + if(ctx->state == SPNG_STATE_AFTER_IDAT) + { + if(chunk.length) return SPNG_ECHUNK_SIZE; + + ret = read_and_check_crc(ctx); + if(ret == -SPNG_CRC_DISCARD) ret = 0; + + return ret; + } + else return SPNG_ECHUNK_POS; + } + else if(!memcmp(chunk.type, type_ihdr, 4)) return SPNG_ECHUNK_POS; + else return SPNG_ECHUNK_UNKNOWN_CRITICAL; + } + else if(!memcmp(chunk.type, type_chrm, 4)) /* Ancillary chunks */ + { + if(ctx->file.plte) return SPNG_ECHUNK_POS; + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.chrm) return SPNG_EDUP_CHRM; + + if(chunk.length != 32) return SPNG_ECHUNK_SIZE; + + ctx->chrm_int.white_point_x = read_u32(data); + ctx->chrm_int.white_point_y = read_u32(data + 4); + ctx->chrm_int.red_x = read_u32(data + 8); + ctx->chrm_int.red_y = read_u32(data + 12); + ctx->chrm_int.green_x = read_u32(data + 16); + ctx->chrm_int.green_y = read_u32(data + 20); + ctx->chrm_int.blue_x = read_u32(data + 24); + ctx->chrm_int.blue_y = read_u32(data + 28); + + if(check_chrm_int(&ctx->chrm_int)) return SPNG_ECHRM; + + ctx->file.chrm = 1; + ctx->stored.chrm = 1; + } + else if(!memcmp(chunk.type, type_gama, 4)) + { + if(ctx->file.plte) return SPNG_ECHUNK_POS; + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.gama) return SPNG_EDUP_GAMA; + + if(chunk.length != 4) return SPNG_ECHUNK_SIZE; + + ctx->gama = read_u32(data); + + if(!ctx->gama) return SPNG_EGAMA; + if(ctx->gama > spng_u32max) return SPNG_EGAMA; + + ctx->file.gama = 1; + ctx->stored.gama = 1; + } + else if(!memcmp(chunk.type, type_sbit, 4)) + { + if(ctx->file.plte) return SPNG_ECHUNK_POS; + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.sbit) return SPNG_EDUP_SBIT; + + if(ctx->ihdr.color_type == 0) + { + if(chunk.length != 1) return SPNG_ECHUNK_SIZE; + + ctx->sbit.grayscale_bits = data[0]; + } + else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 3) + { + if(chunk.length != 3) return SPNG_ECHUNK_SIZE; + + ctx->sbit.red_bits = data[0]; + ctx->sbit.green_bits = data[1]; + ctx->sbit.blue_bits = data[2]; + } + else if(ctx->ihdr.color_type == 4) + { + if(chunk.length != 2) return SPNG_ECHUNK_SIZE; + + ctx->sbit.grayscale_bits = data[0]; + ctx->sbit.alpha_bits = data[1]; + } + else if(ctx->ihdr.color_type == 6) + { + if(chunk.length != 4) return SPNG_ECHUNK_SIZE; + + ctx->sbit.red_bits = data[0]; + ctx->sbit.green_bits = data[1]; + ctx->sbit.blue_bits = data[2]; + ctx->sbit.alpha_bits = data[3]; + } + + if(check_sbit(&ctx->sbit, &ctx->ihdr)) return SPNG_ESBIT; + + ctx->file.sbit = 1; + ctx->stored.sbit = 1; + } + else if(!memcmp(chunk.type, type_srgb, 4)) + { + if(ctx->file.plte) return SPNG_ECHUNK_POS; + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.srgb) return SPNG_EDUP_SRGB; + + if(chunk.length != 1) return SPNG_ECHUNK_SIZE; + + ctx->srgb_rendering_intent = data[0]; + + if(ctx->srgb_rendering_intent > 3) return SPNG_ESRGB; + + ctx->file.srgb = 1; + ctx->stored.srgb = 1; + } + else if(!memcmp(chunk.type, type_bkgd, 4)) + { + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.bkgd) return SPNG_EDUP_BKGD; + + if(ctx->ihdr.color_type == 0 || ctx->ihdr.color_type == 4) + { + if(chunk.length != 2) return SPNG_ECHUNK_SIZE; + + ctx->bkgd.gray = read_u16(data); + } + else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 6) + { + if(chunk.length != 6) return SPNG_ECHUNK_SIZE; + + ctx->bkgd.red = read_u16(data); + ctx->bkgd.green = read_u16(data + 2); + ctx->bkgd.blue = read_u16(data + 4); + } + else if(ctx->ihdr.color_type == 3) + { + if(chunk.length != 1) return SPNG_ECHUNK_SIZE; + if(!ctx->file.plte) return SPNG_EBKGD_NO_PLTE; + + ctx->bkgd.plte_index = data[0]; + if(ctx->bkgd.plte_index >= ctx->plte.n_entries) return SPNG_EBKGD_PLTE_IDX; + } + + ctx->file.bkgd = 1; + ctx->stored.bkgd = 1; + } + else if(!memcmp(chunk.type, type_trns, 4)) + { + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.trns) return SPNG_EDUP_TRNS; + if(!chunk.length) return SPNG_ECHUNK_SIZE; + + if(ctx->ihdr.color_type == 0) + { + if(chunk.length != 2) return SPNG_ECHUNK_SIZE; + + ctx->trns.gray = read_u16(data); + } + else if(ctx->ihdr.color_type == 2) + { + if(chunk.length != 6) return SPNG_ECHUNK_SIZE; + + ctx->trns.red = read_u16(data); + ctx->trns.green = read_u16(data + 2); + ctx->trns.blue = read_u16(data + 4); + } + else if(ctx->ihdr.color_type == 3) + { + if(chunk.length > ctx->plte.n_entries) return SPNG_ECHUNK_SIZE; + if(!ctx->file.plte) return SPNG_ETRNS_NO_PLTE; + + memcpy(ctx->trns.type3_alpha, data, chunk.length); + ctx->trns.n_type3_entries = chunk.length; + } + + if(ctx->ihdr.color_type == 4 || ctx->ihdr.color_type == 6) return SPNG_ETRNS_COLOR_TYPE; + + ctx->file.trns = 1; + ctx->stored.trns = 1; + } + else if(!memcmp(chunk.type, type_hist, 4)) + { + if(!ctx->file.plte) return SPNG_EHIST_NO_PLTE; + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.hist) return SPNG_EDUP_HIST; + + if( (chunk.length / 2) != (ctx->plte.n_entries) ) return SPNG_ECHUNK_SIZE; + + size_t k; + for(k=0; k < (chunk.length / 2); k++) + { + ctx->hist.frequency[k] = read_u16(data + k*2); + } + + ctx->file.hist = 1; + ctx->stored.hist = 1; + } + else if(!memcmp(chunk.type, type_phys, 4)) + { + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.phys) return SPNG_EDUP_PHYS; + + if(chunk.length != 9) return SPNG_ECHUNK_SIZE; + + ctx->phys.ppu_x = read_u32(data); + ctx->phys.ppu_y = read_u32(data + 4); + ctx->phys.unit_specifier = data[8]; + + if(check_phys(&ctx->phys)) return SPNG_EPHYS; + + ctx->file.phys = 1; + ctx->stored.phys = 1; + } + else if(!memcmp(chunk.type, type_time, 4)) + { + if(ctx->file.time) return SPNG_EDUP_TIME; + + if(chunk.length != 7) return SPNG_ECHUNK_SIZE; + + struct spng_time time; + + time.year = read_u16(data); + time.month = data[2]; + time.day = data[3]; + time.hour = data[4]; + time.minute = data[5]; + time.second = data[6]; + + if(check_time(&time)) return SPNG_ETIME; + + ctx->file.time = 1; + + if(!ctx->user.time) ctx->time = time; + + ctx->stored.time = 1; + } + else if(!memcmp(chunk.type, type_offs, 4)) + { + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.offs) return SPNG_EDUP_OFFS; + + if(chunk.length != 9) return SPNG_ECHUNK_SIZE; + + ctx->offs.x = read_s32(data); + ctx->offs.y = read_s32(data + 4); + ctx->offs.unit_specifier = data[8]; + + if(check_offs(&ctx->offs)) return SPNG_EOFFS; + + ctx->file.offs = 1; + ctx->stored.offs = 1; + } + else /* Arbitrary-length chunk */ + { + + if(!memcmp(chunk.type, type_exif, 4)) + { + if(ctx->file.exif) return SPNG_EDUP_EXIF; + + ctx->file.exif = 1; + + if(ctx->user.exif) goto discard; + + if(increase_cache_usage(ctx, chunk.length, 1)) return SPNG_ECHUNK_LIMITS; + + struct spng_exif exif; + + exif.length = chunk.length; + + exif.data = spng__malloc(ctx, chunk.length); + if(exif.data == NULL) return SPNG_EMEM; + + ret = read_chunk_bytes2(ctx, exif.data, chunk.length); + if(ret) + { + spng__free(ctx, exif.data); + return ret; + } + + if(check_exif(&exif)) + { + spng__free(ctx, exif.data); + return SPNG_EEXIF; + } + + ctx->exif = exif; + + ctx->stored.exif = 1; + } + else if(!memcmp(chunk.type, type_iccp, 4)) + {/* TODO: add test file with color profile */ + if(ctx->file.plte) return SPNG_ECHUNK_POS; + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->file.iccp) return SPNG_EDUP_ICCP; + if(!chunk.length) return SPNG_ECHUNK_SIZE; + + ctx->file.iccp = 1; + + uint32_t peek_bytes = 81 > chunk.length ? chunk.length : 81; + + ret = read_chunk_bytes(ctx, peek_bytes); + if(ret) return ret; + + unsigned char *keyword_nul = memchr(ctx->data, '\0', peek_bytes); + if(keyword_nul == NULL) return SPNG_EICCP_NAME; + + uint32_t keyword_len = keyword_nul - ctx->data; + + if(keyword_len > 79) return SPNG_EICCP_NAME; + + memcpy(ctx->iccp.profile_name, ctx->data, keyword_len); + + if(check_png_keyword(ctx->iccp.profile_name)) return SPNG_EICCP_NAME; + + if(chunk.length < (keyword_len + 2)) return SPNG_ECHUNK_SIZE; + + if(ctx->data[keyword_len + 1] != 0) return SPNG_EICCP_COMPRESSION_METHOD; + + ret = spng__inflate_stream(ctx, &ctx->iccp.profile, &ctx->iccp.profile_len, 0, ctx->data + keyword_len + 2, peek_bytes - (keyword_len + 2)); + + if(ret) return ret; + + ctx->stored.iccp = 1; + } + else if(!memcmp(chunk.type, type_text, 4) || + !memcmp(chunk.type, type_ztxt, 4) || + !memcmp(chunk.type, type_itxt, 4)) + { + if(!chunk.length) return SPNG_ECHUNK_SIZE; + + ctx->file.text = 1; + + if(ctx->user.text) goto discard; + + if(increase_cache_usage(ctx, sizeof(struct spng_text2), 1)) return SPNG_ECHUNK_LIMITS; + + ctx->n_text++; + if(ctx->n_text < 1) return SPNG_EOVERFLOW; + if(sizeof(struct spng_text2) > SIZE_MAX / ctx->n_text) return SPNG_EOVERFLOW; + + void *buf = spng__realloc(ctx, ctx->text_list, ctx->n_text * sizeof(struct spng_text2)); + if(buf == NULL) return SPNG_EMEM; + ctx->text_list = buf; + + struct spng_text2 *text = &ctx->text_list[ctx->n_text - 1]; + memset(text, 0, sizeof(struct spng_text2)); + + ctx->undo = text_undo; + + uint32_t text_offset = 0, language_tag_offset = 0, translated_keyword_offset = 0; + uint32_t peek_bytes = 256; /* enough for 3 80-byte keywords and some text bytes */ + uint32_t keyword_len; + + if(peek_bytes > chunk.length) peek_bytes = chunk.length; + + ret = read_chunk_bytes(ctx, peek_bytes); + if(ret) return ret; + + data = ctx->data; + + const unsigned char *zlib_stream = NULL; + const unsigned char *peek_end = data + peek_bytes; + const unsigned char *keyword_nul = memchr(data, 0, chunk.length > 80 ? 80 : chunk.length); + + if(keyword_nul == NULL) return SPNG_ETEXT_KEYWORD; + + keyword_len = keyword_nul - data; + + if(!memcmp(chunk.type, type_text, 4)) + { + text->type = SPNG_TEXT; + + text->text_length = chunk.length - keyword_len - 1; + + text_offset = keyword_len; + + /* increment past nul if there is a text field */ + if(text->text_length) text_offset++; + } + else if(!memcmp(chunk.type, type_ztxt, 4)) + { + text->type = SPNG_ZTXT; + + if((peek_bytes - keyword_len) <= 2) return SPNG_EZTXT; + + if(keyword_nul[1]) return SPNG_EZTXT_COMPRESSION_METHOD; + + text->compression_flag = 1; + + text_offset = keyword_len + 2; + } + else if(!memcmp(chunk.type, type_itxt, 4)) + { + text->type = SPNG_ITXT; + + /* at least two 1-byte fields, two >=0 length strings, and one byte of (compressed) text */ + if((peek_bytes - keyword_len) < 5) return SPNG_EITXT; + + text->compression_flag = keyword_nul[1]; + + if(text->compression_flag > 1) return SPNG_EITXT_COMPRESSION_FLAG; + + if(keyword_nul[2]) return SPNG_EITXT_COMPRESSION_METHOD; + + language_tag_offset = keyword_len + 3; + + const unsigned char *term; + term = memchr(data + language_tag_offset, 0, peek_bytes - language_tag_offset); + if(term == NULL) return SPNG_EITXT_LANG_TAG; + + if((peek_end - term) < 2) return SPNG_EITXT; + + translated_keyword_offset = term - data + 1; + + zlib_stream = memchr(data + translated_keyword_offset, 0, peek_bytes - translated_keyword_offset); + if(zlib_stream == NULL) return SPNG_EITXT; + if(zlib_stream == peek_end) return SPNG_EITXT; + + text_offset = zlib_stream - data + 1; + text->text_length = chunk.length - text_offset; + } + else return SPNG_EINTERNAL; + + + if(text->compression_flag) + { + /* cache usage = peek_bytes + decompressed text size + nul */ + if(increase_cache_usage(ctx, peek_bytes, 0)) return SPNG_ECHUNK_LIMITS; + + text->keyword = spng__calloc(ctx, 1, peek_bytes); + if(text->keyword == NULL) return SPNG_EMEM; + + memcpy(text->keyword, data, peek_bytes); + + zlib_stream = ctx->data + text_offset; + + ret = spng__inflate_stream(ctx, &text->text, &text->text_length, 1, zlib_stream, peek_bytes - text_offset); + + if(ret) return ret; + + text->text[text->text_length - 1] = '\0'; + text->cache_usage = text->text_length + peek_bytes; + } + else + { + if(increase_cache_usage(ctx, chunk.length + 1, 0)) return SPNG_ECHUNK_LIMITS; + + text->keyword = spng__malloc(ctx, chunk.length + 1); + if(text->keyword == NULL) return SPNG_EMEM; + + memcpy(text->keyword, data, peek_bytes); + + if(chunk.length > peek_bytes) + { + ret = read_chunk_bytes2(ctx, text->keyword + peek_bytes, chunk.length - peek_bytes); + if(ret) return ret; + } + + text->text = text->keyword + text_offset; + + text->text_length = chunk.length - text_offset; + + text->text[text->text_length] = '\0'; + text->cache_usage = chunk.length + 1; + } + + if(check_png_keyword(text->keyword)) return SPNG_ETEXT_KEYWORD; + + text->text_length = strlen(text->text); + + if(text->type != SPNG_ITXT) + { + language_tag_offset = keyword_len; + translated_keyword_offset = keyword_len; + + if(ctx->strict && check_png_text(text->text, text->text_length)) + { + if(text->type == SPNG_ZTXT) return SPNG_EZTXT; + else return SPNG_ETEXT; + } + } + + text->language_tag = text->keyword + language_tag_offset; + text->translated_keyword = text->keyword + translated_keyword_offset; + + ctx->stored.text = 1; + } + else if(!memcmp(chunk.type, type_splt, 4)) + { + if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS; + if(ctx->user.splt) goto discard; /* XXX: could check profile names for uniqueness */ + if(!chunk.length) return SPNG_ECHUNK_SIZE; + + ctx->file.splt = 1; + + /* chunk.length + sizeof(struct spng_splt) + splt->n_entries * sizeof(struct spng_splt_entry) */ + if(increase_cache_usage(ctx, chunk.length + sizeof(struct spng_splt), 1)) return SPNG_ECHUNK_LIMITS; + + ctx->n_splt++; + if(ctx->n_splt < 1) return SPNG_EOVERFLOW; + if(sizeof(struct spng_splt) > SIZE_MAX / ctx->n_splt) return SPNG_EOVERFLOW; + + void *buf = spng__realloc(ctx, ctx->splt_list, ctx->n_splt * sizeof(struct spng_splt)); + if(buf == NULL) return SPNG_EMEM; + ctx->splt_list = buf; + + struct spng_splt *splt = &ctx->splt_list[ctx->n_splt - 1]; + + memset(splt, 0, sizeof(struct spng_splt)); + + ctx->undo = splt_undo; + + void *t = spng__malloc(ctx, chunk.length); + if(t == NULL) return SPNG_EMEM; + + splt->entries = t; /* simplifies error handling */ + data = t; + + ret = read_chunk_bytes2(ctx, t, chunk.length); + if(ret) return ret; + + uint32_t keyword_len = chunk.length < 80 ? chunk.length : 80; + + const unsigned char *keyword_nul = memchr(data, 0, keyword_len); + if(keyword_nul == NULL) return SPNG_ESPLT_NAME; + + keyword_len = keyword_nul - data; + + memcpy(splt->name, data, keyword_len); + + if(check_png_keyword(splt->name)) return SPNG_ESPLT_NAME; + + uint32_t j; + for(j=0; j < (ctx->n_splt - 1); j++) + { + if(!strcmp(ctx->splt_list[j].name, splt->name)) return SPNG_ESPLT_DUP_NAME; + } + + if( (chunk.length - keyword_len) <= 2) return SPNG_ECHUNK_SIZE; + + splt->sample_depth = data[keyword_len + 1]; + + uint32_t entries_len = chunk.length - keyword_len - 2; + if(!entries_len) return SPNG_ECHUNK_SIZE; + + if(splt->sample_depth == 16) + { + if(entries_len % 10 != 0) return SPNG_ECHUNK_SIZE; + splt->n_entries = entries_len / 10; + } + else if(splt->sample_depth == 8) + { + if(entries_len % 6 != 0) return SPNG_ECHUNK_SIZE; + splt->n_entries = entries_len / 6; + } + else return SPNG_ESPLT_DEPTH; + + if(!splt->n_entries) return SPNG_ECHUNK_SIZE; + + size_t list_size = splt->n_entries; + + if(list_size > SIZE_MAX / sizeof(struct spng_splt_entry)) return SPNG_EOVERFLOW; + + list_size *= sizeof(struct spng_splt_entry); + + if(increase_cache_usage(ctx, list_size, 0)) return SPNG_ECHUNK_LIMITS; + + splt->entries = spng__malloc(ctx, list_size); + if(splt->entries == NULL) + { + spng__free(ctx, t); + return SPNG_EMEM; + } + + data = (unsigned char*)t + keyword_len + 2; + + uint32_t k; + if(splt->sample_depth == 16) + { + for(k=0; k < splt->n_entries; k++) + { + splt->entries[k].red = read_u16(data + k * 10); + splt->entries[k].green = read_u16(data + k * 10 + 2); + splt->entries[k].blue = read_u16(data + k * 10 + 4); + splt->entries[k].alpha = read_u16(data + k * 10 + 6); + splt->entries[k].frequency = read_u16(data + k * 10 + 8); + } + } + else if(splt->sample_depth == 8) + { + for(k=0; k < splt->n_entries; k++) + { + splt->entries[k].red = data[k * 6]; + splt->entries[k].green = data[k * 6 + 1]; + splt->entries[k].blue = data[k * 6 + 2]; + splt->entries[k].alpha = data[k * 6 + 3]; + splt->entries[k].frequency = read_u16(data + k * 6 + 4); + } + } + + spng__free(ctx, t); + decrease_cache_usage(ctx, chunk.length); + + ctx->stored.splt = 1; + } + else /* Unknown chunk */ + { + ctx->file.unknown = 1; + + if(!ctx->keep_unknown) goto discard; + if(ctx->user.unknown) goto discard; + + if(increase_cache_usage(ctx, chunk.length + sizeof(struct spng_unknown_chunk), 1)) return SPNG_ECHUNK_LIMITS; + + ctx->n_chunks++; + if(ctx->n_chunks < 1) return SPNG_EOVERFLOW; + if(sizeof(struct spng_unknown_chunk) > SIZE_MAX / ctx->n_chunks) return SPNG_EOVERFLOW; + + void *buf = spng__realloc(ctx, ctx->chunk_list, ctx->n_chunks * sizeof(struct spng_unknown_chunk)); + if(buf == NULL) return SPNG_EMEM; + ctx->chunk_list = buf; + + struct spng_unknown_chunk *chunkp = &ctx->chunk_list[ctx->n_chunks - 1]; + + memset(chunkp, 0, sizeof(struct spng_unknown_chunk)); + + ctx->undo = chunk_undo; + + memcpy(chunkp->type, chunk.type, 4); + + if(ctx->state < SPNG_STATE_FIRST_IDAT) + { + if(ctx->file.plte) chunkp->location = SPNG_AFTER_PLTE; + else chunkp->location = SPNG_AFTER_IHDR; + } + else if(ctx->state >= SPNG_STATE_AFTER_IDAT) chunkp->location = SPNG_AFTER_IDAT; + + if(chunk.length > 0) + { + void *t = spng__malloc(ctx, chunk.length); + if(t == NULL) return SPNG_EMEM; + + ret = read_chunk_bytes2(ctx, t, chunk.length); + if(ret) + { + spng__free(ctx, t); + return ret; + } + + chunkp->length = chunk.length; + chunkp->data = t; + } + + ctx->stored.unknown = 1; + } + +discard: + ret = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left); + if(ret) return ret; + } + + } + + return ret; +} + +/* Read chunks before or after the IDAT chunks depending on state */ +static int read_chunks(spng_ctx *ctx, int only_ihdr) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!ctx->state) return SPNG_EBADSTATE; + if(ctx->data == NULL) + { + if(ctx->encode_only) return 0; + else return SPNG_EINTERNAL; + } + + int ret = 0; + + if(ctx->state == SPNG_STATE_INPUT) + { + ret = read_ihdr(ctx); + + if(ret) return decode_err(ctx, ret); + + ctx->state = SPNG_STATE_IHDR; + } + + if(only_ihdr) return 0; + + if(ctx->state == SPNG_STATE_EOI) + { + ctx->state = SPNG_STATE_AFTER_IDAT; + ctx->prev_was_idat = 1; + } + + while(ctx->state < SPNG_STATE_FIRST_IDAT || ctx->state == SPNG_STATE_AFTER_IDAT) + { + ret = read_non_idat_chunks(ctx); + + if(!ret) + { + if(ctx->state < SPNG_STATE_FIRST_IDAT) ctx->state = SPNG_STATE_FIRST_IDAT; + else if(ctx->state == SPNG_STATE_AFTER_IDAT) ctx->state = SPNG_STATE_IEND; + } + else + { + switch(ret) + { + case SPNG_ECHUNK_POS: + case SPNG_ECHUNK_SIZE: /* size != expected size, SPNG_ECHUNK_STDLEN = invalid size */ + case SPNG_EDUP_PLTE: + case SPNG_EDUP_CHRM: + case SPNG_EDUP_GAMA: + case SPNG_EDUP_ICCP: + case SPNG_EDUP_SBIT: + case SPNG_EDUP_SRGB: + case SPNG_EDUP_BKGD: + case SPNG_EDUP_HIST: + case SPNG_EDUP_TRNS: + case SPNG_EDUP_PHYS: + case SPNG_EDUP_TIME: + case SPNG_EDUP_OFFS: + case SPNG_EDUP_EXIF: + case SPNG_ECHRM: + case SPNG_ETRNS_COLOR_TYPE: + case SPNG_ETRNS_NO_PLTE: + case SPNG_EGAMA: + case SPNG_EICCP_NAME: + case SPNG_EICCP_COMPRESSION_METHOD: + case SPNG_ESBIT: + case SPNG_ESRGB: + case SPNG_ETEXT: + case SPNG_ETEXT_KEYWORD: + case SPNG_EZTXT: + case SPNG_EZTXT_COMPRESSION_METHOD: + case SPNG_EITXT: + case SPNG_EITXT_COMPRESSION_FLAG: + case SPNG_EITXT_COMPRESSION_METHOD: + case SPNG_EITXT_LANG_TAG: + case SPNG_EITXT_TRANSLATED_KEY: + case SPNG_EBKGD_NO_PLTE: + case SPNG_EBKGD_PLTE_IDX: + case SPNG_EHIST_NO_PLTE: + case SPNG_EPHYS: + case SPNG_ESPLT_NAME: + case SPNG_ESPLT_DUP_NAME: + case SPNG_ESPLT_DEPTH: + case SPNG_ETIME: + case SPNG_EOFFS: + case SPNG_EEXIF: + case SPNG_EZLIB: + { + if(!ctx->strict && !is_critical_chunk(&ctx->current_chunk)) + { + ret = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left); + if(ret) return decode_err(ctx, ret); + + if(ctx->undo) ctx->undo(ctx); + + ctx->stored = ctx->prev_stored; + + ctx->discard = 0; + ctx->undo = NULL; + + continue; + } + else return decode_err(ctx, ret); + + break; + } + default: return decode_err(ctx, ret); + } + } + } + + return ret; +} + +static int read_scanline(spng_ctx *ctx) +{ + int ret, pass = ctx->row_info.pass; + struct spng_row_info *ri = &ctx->row_info; + const struct spng_subimage *sub = ctx->subimage; + size_t scanline_width = sub[pass].scanline_width; + uint32_t scanline_idx = ri->scanline_idx; + + uint8_t next_filter = 0; + + if(scanline_idx == (sub[pass].height - 1) && ri->pass == ctx->last_pass) + { + ret = read_scanline_bytes(ctx, ctx->scanline, scanline_width - 1); + } + else + { + ret = read_scanline_bytes(ctx, ctx->scanline, scanline_width); + if(ret) return ret; + + next_filter = ctx->scanline[scanline_width - 1]; + if(next_filter > 4) ret = SPNG_EFILTER; + } + + if(ret) return ret; + + if(!scanline_idx && ri->filter > 1) + { + /* prev_scanline is all zeros for the first scanline */ + memset(ctx->prev_scanline, 0, scanline_width); + } + + if(ctx->ihdr.bit_depth == 16 && ctx->fmt != SPNG_FMT_RAW) u16_row_to_host(ctx->scanline, scanline_width - 1); + + ret = defilter_scanline(ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, ri->filter); + if(ret) return ret; + + ri->filter = next_filter; + + return 0; +} + +static int update_row_info(spng_ctx *ctx) +{ + int interlacing = ctx->ihdr.interlace_method; + struct spng_row_info *ri = &ctx->row_info; + const struct spng_subimage *sub = ctx->subimage; + + if(ri->scanline_idx == (sub[ri->pass].height - 1)) /* Last scanline */ + { + if(ri->pass == ctx->last_pass) + { + ctx->state = SPNG_STATE_EOI; + + return SPNG_EOI; + } + + ri->scanline_idx = 0; + ri->pass++; + + /* Skip empty passes */ + while( (!sub[ri->pass].width || !sub[ri->pass].height) && (ri->pass < ctx->last_pass)) ri->pass++; + } + else + { + ri->row_num++; + ri->scanline_idx++; + } + + if(interlacing) ri->row_num = adam7_y_start[ri->pass] + ri->scanline_idx * adam7_y_delta[ri->pass]; + + return 0; +} + +int spng_decode_scanline(spng_ctx *ctx, void *out, size_t len) +{ + if(ctx == NULL || out == NULL) return 1; + + if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI; + + struct decode_flags f = ctx->decode_flags; + + struct spng_row_info *ri = &ctx->row_info; + const struct spng_subimage *sub = ctx->subimage; + + const struct spng_ihdr *ihdr = &ctx->ihdr; + const uint16_t *gamma_lut = ctx->gamma_lut; + unsigned char *trns_px = ctx->trns_px; + const struct spng_sbit *sb = &ctx->decode_sb; + const struct spng_plte_entry *plte = ctx->decode_plte.rgba; + struct spng__iter iter = spng__iter_init(ihdr->bit_depth, ctx->scanline); + + const unsigned char *scanline; + + const int pass = ri->pass; + const int fmt = ctx->fmt; + const size_t scanline_width = sub[pass].scanline_width; + const uint32_t width = sub[pass].width; + uint32_t k; + uint8_t r_8, g_8, b_8, a_8, gray_8; + uint16_t r_16, g_16, b_16, a_16, gray_16; + r_8=0; g_8=0; b_8=0; a_8=0; gray_8=0; + r_16=0; g_16=0; b_16=0; a_16=0; gray_16=0; + size_t pixel_size = 4; /* SPNG_FMT_RGBA8 */ + size_t pixel_offset = 0; + unsigned char *pixel; + unsigned processing_depth = ihdr->bit_depth; + + if(f.indexed) processing_depth = 8; + + if(fmt == SPNG_FMT_RGBA16) pixel_size = 8; + else if(fmt == SPNG_FMT_RGB8) pixel_size = 3; + + if(len < sub[pass].out_width) return SPNG_EBUFSIZ; + + int ret = read_scanline(ctx); + + if(ret) return decode_err(ctx, ret); + + scanline = ctx->scanline; + + for(k=0; k < width; k++) + { + pixel = (unsigned char*)out + pixel_offset; + pixel_offset += pixel_size; + + if(f.same_layout) + { + if(f.zerocopy) break; + + memcpy(out, scanline, scanline_width - 1); + break; + } + + if(f.unpack) + { + unpack_scanline(out, scanline, width, ihdr->bit_depth, fmt); + break; + } + + if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR) + { + if(ihdr->bit_depth == 16) + { + memcpy(&r_16, scanline + (k * 6), 2); + memcpy(&g_16, scanline + (k * 6) + 2, 2); + memcpy(&b_16, scanline + (k * 6) + 4, 2); + + a_16 = 65535; + } + else /* == 8 */ + { + if(fmt == SPNG_FMT_RGBA8) + { + rgb8_row_to_rgba8(scanline, out, width); + break; + } + + r_8 = scanline[k * 3]; + g_8 = scanline[k * 3 + 1]; + b_8 = scanline[k * 3 + 2]; + + a_8 = 255; + } + } + else if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED) + { + uint8_t entry = 0; + + if(ihdr->bit_depth == 8) + { + if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8)) + { + expand_row(out, scanline, &ctx->decode_plte, width, fmt); + break; + } + + entry = scanline[k]; + } + else /* < 8 */ + { + entry = get_sample(&iter); + } + + if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8)) + { + pixel[0] = plte[entry].red; + pixel[1] = plte[entry].green; + pixel[2] = plte[entry].blue; + if(fmt == SPNG_FMT_RGBA8) pixel[3] = plte[entry].alpha; + + continue; + } + else /* RGBA16 */ + { + r_16 = plte[entry].red; + g_16 = plte[entry].green; + b_16 = plte[entry].blue; + a_16 = plte[entry].alpha; + + r_16 = (r_16 << 8) | r_16; + g_16 = (g_16 << 8) | g_16; + b_16 = (b_16 << 8) | b_16; + a_16 = (a_16 << 8) | a_16; + + memcpy(pixel, &r_16, 2); + memcpy(pixel + 2, &g_16, 2); + memcpy(pixel + 4, &b_16, 2); + memcpy(pixel + 6, &a_16, 2); + + continue; + } + } + else if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA) + { + if(ihdr->bit_depth == 16) + { + memcpy(&r_16, scanline + (k * 8), 2); + memcpy(&g_16, scanline + (k * 8) + 2, 2); + memcpy(&b_16, scanline + (k * 8) + 4, 2); + memcpy(&a_16, scanline + (k * 8) + 6, 2); + } + else /* == 8 */ + { + r_8 = scanline[k * 4]; + g_8 = scanline[k * 4 + 1]; + b_8 = scanline[k * 4 + 2]; + a_8 = scanline[k * 4 + 3]; + } + } + else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) + { + if(ihdr->bit_depth == 16) + { + memcpy(&gray_16, scanline + k * 2, 2); + + if(f.apply_trns && ctx->trns.gray == gray_16) a_16 = 0; + else a_16 = 65535; + + r_16 = gray_16; + g_16 = gray_16; + b_16 = gray_16; + } + else /* <= 8 */ + { + gray_8 = get_sample(&iter); + + if(f.apply_trns && ctx->trns.gray == gray_8) a_8 = 0; + else a_8 = 255; + + r_8 = gray_8; g_8 = gray_8; b_8 = gray_8; + } + } + else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA) + { + if(ihdr->bit_depth == 16) + { + memcpy(&gray_16, scanline + (k * 4), 2); + memcpy(&a_16, scanline + (k * 4) + 2, 2); + + r_16 = gray_16; + g_16 = gray_16; + b_16 = gray_16; + } + else /* == 8 */ + { + gray_8 = scanline[k * 2]; + a_8 = scanline[k * 2 + 1]; + + r_8 = gray_8; + g_8 = gray_8; + b_8 = gray_8; + } + } + + + if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8)) + { + if(ihdr->bit_depth == 16) + { + r_8 = r_16 >> 8; + g_8 = g_16 >> 8; + b_8 = b_16 >> 8; + a_8 = a_16 >> 8; + } + + pixel[0] = r_8; + pixel[1] = g_8; + pixel[2] = b_8; + + if(fmt == SPNG_FMT_RGBA8) pixel[3] = a_8; + } + else if(fmt == SPNG_FMT_RGBA16) + { + if(ihdr->bit_depth != 16) + { + r_16 = r_8; + g_16 = g_8; + b_16 = b_8; + a_16 = a_8; + } + + memcpy(pixel, &r_16, 2); + memcpy(pixel + 2, &g_16, 2); + memcpy(pixel + 4, &b_16, 2); + memcpy(pixel + 6, &a_16, 2); + } + }/* for(k=0; k < width; k++) */ + + if(f.apply_trns) trns_row(out, scanline, trns_px, ctx->bytes_per_pixel, &ctx->ihdr, width, fmt); + + if(f.do_scaling) scale_row(out, width, fmt, processing_depth, sb); + + if(f.apply_gamma) gamma_correct_row(out, width, fmt, gamma_lut); + + /* The previous scanline is always defiltered */ + void *t = ctx->prev_scanline; + ctx->prev_scanline = ctx->scanline; + ctx->scanline = t; + + ret = update_row_info(ctx); + + if(ret == SPNG_EOI) + { + if(ctx->cur_chunk_bytes_left) /* zlib stream ended before an IDAT chunk boundary */ + {/* Discard the rest of the chunk */ + int error = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left); + if(error) return decode_err(ctx, error); + } + + ctx->last_idat = ctx->current_chunk; + } + + return ret; +} + +int spng_decode_row(spng_ctx *ctx, void *out, size_t len) +{ + if(ctx == NULL || out == NULL) return 1; + if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI; + if(len < ctx->image_width) return SPNG_EBUFSIZ; + + const struct spng_ihdr *ihdr = &ctx->ihdr; + int ret, pass = ctx->row_info.pass; + unsigned char *outptr = out; + + if(!ihdr->interlace_method || pass == 6) return spng_decode_scanline(ctx, out, len); + + ret = spng_decode_scanline(ctx, ctx->row, ctx->image_width); + if(ret && ret != SPNG_EOI) return ret; + + uint32_t k; + unsigned pixel_size = 4; /* RGBA8 */ + if(ctx->fmt == SPNG_FMT_RGBA16) pixel_size = 8; + else if(ctx->fmt == SPNG_FMT_RGB8) pixel_size = 3; + else if(ctx->fmt == SPNG_FMT_G8) pixel_size = 1; + else if(ctx->fmt == SPNG_FMT_GA8) pixel_size = 2; + else if(ctx->fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) + { + if(ihdr->bit_depth < 8) + { + struct spng__iter iter = spng__iter_init(ihdr->bit_depth, ctx->row); + const uint8_t samples_per_byte = 8 / ihdr->bit_depth; + uint8_t sample; + + for(k=0; k < ctx->subimage[pass].width; k++) + { + sample = get_sample(&iter); + + size_t ioffset = adam7_x_start[pass] + k * adam7_x_delta[pass]; + + sample = sample << (iter.initial_shift - ioffset * ihdr->bit_depth % 8); + + ioffset /= samples_per_byte; + + outptr[ioffset] |= sample; + } + + return 0; + } + else pixel_size = ctx->bytes_per_pixel; + } + + for(k=0; k < ctx->subimage[pass].width; k++) + { + size_t ioffset = (adam7_x_start[pass] + (size_t) k * adam7_x_delta[pass]) * pixel_size; + + memcpy(outptr + ioffset, ctx->row + k * pixel_size, pixel_size); + } + + return 0; +} + +int spng_decode_chunks(spng_ctx *ctx) +{ + if(ctx == NULL) return 1; + if(ctx->encode_only) return SPNG_ECTXTYPE; + if(ctx->state < SPNG_STATE_INPUT) return SPNG_ENOSRC; + if(ctx->state == SPNG_STATE_IEND) return 0; + + return read_chunks(ctx, 0); +} + +int spng_decode_image(spng_ctx *ctx, void *out, size_t len, int fmt, int flags) +{ + if(ctx == NULL) return 1; + if(ctx->encode_only) return SPNG_ECTXTYPE; + if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI; + + const struct spng_ihdr *ihdr = &ctx->ihdr; + + int ret = read_chunks(ctx, 0); + if(ret) return decode_err(ctx, ret); + + ret = check_decode_fmt(ihdr, fmt); + if(ret) return ret; + + ret = calculate_image_width(ihdr, fmt, &ctx->image_width); + if(ret) return decode_err(ctx, ret); + + if(ctx->image_width > SIZE_MAX / ihdr->height) ctx->image_size = 0; /* overflow */ + else ctx->image_size = ctx->image_width * ihdr->height; + + if( !(flags & SPNG_DECODE_PROGRESSIVE) ) + { + if(out == NULL) return 1; + if(!ctx->image_size) return SPNG_EOVERFLOW; + if(len < ctx->image_size) return SPNG_EBUFSIZ; + } + + uint32_t bytes_read = 0; + + ret = read_idat_bytes(ctx, &bytes_read); + if(ret) return decode_err(ctx, ret); + + if(bytes_read > 1) + { + int valid = read_u16(ctx->data) % 31 ? 0 : 1; + + unsigned flg = ctx->data[1]; + unsigned flevel = flg >> 6; + int compression_level = Z_DEFAULT_COMPRESSION; + + if(flevel == 0) compression_level = 0; /* fastest */ + else if(flevel == 1) compression_level = 1; /* fast */ + else if(flevel == 2) compression_level = 6; /* default */ + else if(flevel == 3) compression_level = 9; /* slowest, max compression */ + + if(valid) ctx->image_options.compression_level = compression_level; + } + + ret = spng__inflate_init(ctx, ctx->image_options.window_bits); + if(ret) return decode_err(ctx, ret); + + ctx->zstream.avail_in = bytes_read; + ctx->zstream.next_in = ctx->data; + + size_t scanline_buf_size = ctx->subimage[ctx->widest_pass].scanline_width; + + scanline_buf_size += 32; + + if(scanline_buf_size < 32) return SPNG_EOVERFLOW; + + ctx->scanline_buf = spng__malloc(ctx, scanline_buf_size); + ctx->prev_scanline_buf = spng__malloc(ctx, scanline_buf_size); + + ctx->scanline = ctx->scanline_buf; + ctx->prev_scanline = ctx->prev_scanline_buf; + + struct decode_flags f = {0}; + + ctx->fmt = fmt; + + if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED) f.indexed = 1; + + unsigned processing_depth = ihdr->bit_depth; + + if(f.indexed) processing_depth = 8; + + if(ihdr->interlace_method) + { + f.interlaced = 1; + ctx->row_buf = spng__malloc(ctx, ctx->image_width); + ctx->row = ctx->row_buf; + + if(ctx->row == NULL) return decode_err(ctx, SPNG_EMEM); + } + + if(ctx->scanline == NULL || ctx->prev_scanline == NULL) + { + return decode_err(ctx, SPNG_EMEM); + } + + f.do_scaling = 1; + if(f.indexed) f.do_scaling = 0; + + unsigned depth_target = 8; /* FMT_RGBA8, G8 */ + if(fmt == SPNG_FMT_RGBA16) depth_target = 16; + + if(flags & SPNG_DECODE_TRNS && ctx->stored.trns) f.apply_trns = 1; + else flags &= ~SPNG_DECODE_TRNS; + + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA || + ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA) flags &= ~SPNG_DECODE_TRNS; + + if(flags & SPNG_DECODE_GAMMA && ctx->stored.gama) f.apply_gamma = 1; + else flags &= ~SPNG_DECODE_GAMMA; + + if(flags & SPNG_DECODE_USE_SBIT && ctx->stored.sbit) f.use_sbit = 1; + else flags &= ~SPNG_DECODE_USE_SBIT; + + if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGBA16)) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA && + ihdr->bit_depth == depth_target) f.same_layout = 1; + } + else if(fmt == SPNG_FMT_RGB8) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR && + ihdr->bit_depth == depth_target) f.same_layout = 1; + + f.apply_trns = 0; /* not applicable */ + } + else if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) + { + f.same_layout = 1; + f.do_scaling = 0; + f.apply_gamma = 0; /* for now */ + f.apply_trns = 0; + } + else if(fmt == SPNG_FMT_G8 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8) + { + if(ihdr->bit_depth == depth_target) f.same_layout = 1; + else if(ihdr->bit_depth < 8) f.unpack = 1; + + f.apply_trns = 0; + } + else if(fmt == SPNG_FMT_GA8 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA && + ihdr->bit_depth == depth_target) f.same_layout = 1; + else if(ihdr->bit_depth <= 8) f.unpack = 1; + } + else if(fmt == SPNG_FMT_GA16 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth == 16) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA && + ihdr->bit_depth == depth_target) f.same_layout = 1; + else if(ihdr->bit_depth == 16) f.unpack = 1; + } + + /*if(f.same_layout && !flags && !f.interlaced) f.zerocopy = 1;*/ + + uint16_t *gamma_lut = NULL; + + if(f.apply_gamma) + { + float file_gamma = (float)ctx->gama / 100000.0f; + float max; + + unsigned lut_entries; + + if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8)) + { + lut_entries = 256; + max = 255.0f; + + gamma_lut = ctx->gamma_lut8; + ctx->gamma_lut = ctx->gamma_lut8; + } + else /* SPNG_FMT_RGBA16 */ + { + lut_entries = 65536; + max = 65535.0f; + + ctx->gamma_lut16 = spng__malloc(ctx, lut_entries * sizeof(uint16_t)); + if(ctx->gamma_lut16 == NULL) return decode_err(ctx, SPNG_EMEM); + + gamma_lut = ctx->gamma_lut16; + ctx->gamma_lut = ctx->gamma_lut16; + } + + float screen_gamma = 2.2f; + float exponent = file_gamma * screen_gamma; + + if(FP_ZERO == fpclassify(exponent)) return decode_err(ctx, SPNG_EGAMA); + + exponent = 1.0f / exponent; + + unsigned i; + for(i=0; i < lut_entries; i++) + { + float c = pow((float)i / max, exponent) * max; + if(c > max) c = max; + + gamma_lut[i] = (uint16_t)c; + } + } + + struct spng_sbit *sb = &ctx->decode_sb; + + sb->red_bits = processing_depth; + sb->green_bits = processing_depth; + sb->blue_bits = processing_depth; + sb->alpha_bits = processing_depth; + sb->grayscale_bits = processing_depth; + + if(f.use_sbit) + { + if(ihdr->color_type == 0) + { + sb->grayscale_bits = ctx->sbit.grayscale_bits; + sb->alpha_bits = ihdr->bit_depth; + } + else if(ihdr->color_type == 2 || ihdr->color_type == 3) + { + sb->red_bits = ctx->sbit.red_bits; + sb->green_bits = ctx->sbit.green_bits; + sb->blue_bits = ctx->sbit.blue_bits; + sb->alpha_bits = ihdr->bit_depth; + } + else if(ihdr->color_type == 4) + { + sb->grayscale_bits = ctx->sbit.grayscale_bits; + sb->alpha_bits = ctx->sbit.alpha_bits; + } + else /* == 6 */ + { + sb->red_bits = ctx->sbit.red_bits; + sb->green_bits = ctx->sbit.green_bits; + sb->blue_bits = ctx->sbit.blue_bits; + sb->alpha_bits = ctx->sbit.alpha_bits; + } + } + + if(ihdr->bit_depth == 16 && fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8)) + {/* samples are scaled down by 8 bits in the decode loop */ + sb->red_bits -= 8; + sb->green_bits -= 8; + sb->blue_bits -= 8; + sb->alpha_bits -= 8; + sb->grayscale_bits -= 8; + + processing_depth = 8; + } + + /* Prevent infinite loops in sample_to_target() */ + if(!depth_target || depth_target > 16 || + !processing_depth || processing_depth > 16 || + !sb->grayscale_bits || sb->grayscale_bits > processing_depth || + !sb->alpha_bits || sb->alpha_bits > processing_depth || + !sb->red_bits || sb->red_bits > processing_depth || + !sb->green_bits || sb->green_bits > processing_depth || + !sb->blue_bits || sb->blue_bits > processing_depth) + { + return decode_err(ctx, SPNG_ESBIT); + } + + if(sb->red_bits == sb->green_bits && + sb->green_bits == sb->blue_bits && + sb->blue_bits == sb->alpha_bits && + sb->alpha_bits == processing_depth && + processing_depth == depth_target) f.do_scaling = 0; + + struct spng_plte_entry *plte = ctx->decode_plte.rgba; + + /* Pre-process palette entries */ + if(f.indexed) + { + uint8_t red, green, blue, alpha; + + uint32_t i; + for(i=0; i < 256; i++) + { + if(f.apply_trns && i < ctx->trns.n_type3_entries) + ctx->plte.entries[i].alpha = ctx->trns.type3_alpha[i]; + else + ctx->plte.entries[i].alpha = 255; + + red = sample_to_target(ctx->plte.entries[i].red, 8, sb->red_bits, 8); + green = sample_to_target(ctx->plte.entries[i].green, 8, sb->green_bits, 8); + blue = sample_to_target(ctx->plte.entries[i].blue, 8, sb->blue_bits, 8); + alpha = sample_to_target(ctx->plte.entries[i].alpha, 8, sb->alpha_bits, 8); + +#if defined(SPNG_ARM) + if(fmt == SPNG_FMT_RGB8 && ihdr->bit_depth == 8) + {/* Working with 3 bytes at a time is more of an ARM thing */ + ctx->decode_plte.rgb[i * 3 + 0] = red; + ctx->decode_plte.rgb[i * 3 + 1] = green; + ctx->decode_plte.rgb[i * 3 + 2] = blue; + continue; + } +#endif + plte[i].red = red; + plte[i].green = green; + plte[i].blue = blue; + plte[i].alpha = alpha; + } + + f.apply_trns = 0; + } + + unsigned char *trns_px = ctx->trns_px; + + if(f.apply_trns) + { + uint16_t mask = ~0; + if(ctx->ihdr.bit_depth < 16) mask = (1 << ctx->ihdr.bit_depth) - 1; + + if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGBA16)) + { + if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR) + { + if(ihdr->bit_depth == 16) + { + memcpy(trns_px, &ctx->trns.red, 2); + memcpy(trns_px + 2, &ctx->trns.green, 2); + memcpy(trns_px + 4, &ctx->trns.blue, 2); + } + else + { + trns_px[0] = ctx->trns.red & mask; + trns_px[1] = ctx->trns.green & mask; + trns_px[2] = ctx->trns.blue & mask; + } + } + } + else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) // fmt == SPNG_FMT_GA8 && + { + if(ihdr->bit_depth == 16) + { + memcpy(trns_px, &ctx->trns.gray, 2); + } + else + { + trns_px[0] = ctx->trns.gray & mask; + } + } + } + + ctx->decode_flags = f; + + ctx->state = SPNG_STATE_DECODE_INIT; + + struct spng_row_info *ri = &ctx->row_info; + struct spng_subimage *sub = ctx->subimage; + + while(!sub[ri->pass].width || !sub[ri->pass].height) ri->pass++; + + if(f.interlaced) ri->row_num = adam7_y_start[ri->pass]; + + unsigned pixel_size = 4; /* SPNG_FMT_RGBA8 */ + + if(fmt == SPNG_FMT_RGBA16) pixel_size = 8; + else if(fmt == SPNG_FMT_RGB8) pixel_size = 3; + else if(fmt == SPNG_FMT_G8) pixel_size = 1; + else if(fmt == SPNG_FMT_GA8) pixel_size = 2; + + int i; + for(i=ri->pass; i <= ctx->last_pass; i++) + { + if(!sub[i].scanline_width) continue; + + if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) sub[i].out_width = sub[i].scanline_width - 1; + else sub[i].out_width = (size_t)sub[i].width * pixel_size; + + if(sub[i].out_width > UINT32_MAX) return decode_err(ctx, SPNG_EOVERFLOW); + } + + /* Read the first filter byte, offsetting all reads by 1 byte. + The scanlines will be aligned with the start of the array with + the next scanline's filter byte at the end, + the last scanline will end up being 1 byte "shorter". */ + ret = read_scanline_bytes(ctx, &ri->filter, 1); + if(ret) return decode_err(ctx, ret); + + if(ri->filter > 4) return decode_err(ctx, SPNG_EFILTER); + + if(flags & SPNG_DECODE_PROGRESSIVE) + { + return 0; + } + + do + { + size_t ioffset = ri->row_num * ctx->image_width; + + ret = spng_decode_row(ctx, (unsigned char*)out + ioffset, ctx->image_width); + }while(!ret); + + if(ret != SPNG_EOI) return decode_err(ctx, ret); + + return 0; +} + +int spng_get_row_info(spng_ctx *ctx, struct spng_row_info *row_info) +{ + if(ctx == NULL || row_info == NULL || ctx->state < SPNG_STATE_DECODE_INIT) return 1; + + if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI; + + *row_info = ctx->row_info; + + return 0; +} + +static int write_chunks_before_idat(spng_ctx *ctx) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + if(!ctx->encode_only) return SPNG_EINTERNAL; + if(!ctx->stored.ihdr) return SPNG_EINTERNAL; + + int ret; + uint32_t i; + size_t length; + const struct spng_ihdr *ihdr = &ctx->ihdr; + unsigned char *data = ctx->decode_plte.raw; + + ret = write_data(ctx, spng_signature, 8); + if(ret) return ret; + + write_u32(data, ihdr->width); + write_u32(data + 4, ihdr->height); + data[8] = ihdr->bit_depth; + data[9] = ihdr->color_type; + data[10] = ihdr->compression_method; + data[11] = ihdr->filter_method; + data[12] = ihdr->interlace_method; + + ret = write_chunk(ctx, type_ihdr, data, 13); + if(ret) return ret; + + if(ctx->stored.chrm) + { + write_u32(data, ctx->chrm_int.white_point_x); + write_u32(data + 4, ctx->chrm_int.white_point_y); + write_u32(data + 8, ctx->chrm_int.red_x); + write_u32(data + 12, ctx->chrm_int.red_y); + write_u32(data + 16, ctx->chrm_int.green_x); + write_u32(data + 20, ctx->chrm_int.green_y); + write_u32(data + 24, ctx->chrm_int.blue_x); + write_u32(data + 28, ctx->chrm_int.blue_y); + + ret = write_chunk(ctx, type_chrm, data, 32); + if(ret) return ret; + } + + if(ctx->stored.gama) + { + write_u32(data, ctx->gama); + + ret = write_chunk(ctx, type_gama, data, 4); + if(ret) return ret; + } + + if(ctx->stored.iccp) + { + uLongf dest_len = compressBound((uLong)ctx->iccp.profile_len); + + Bytef *buf = spng__malloc(ctx, dest_len); + if(buf == NULL) return SPNG_EMEM; + + ret = compress2(buf, &dest_len, (void*)ctx->iccp.profile, (uLong)ctx->iccp.profile_len, Z_DEFAULT_COMPRESSION); + + if(ret != Z_OK) + { + spng__free(ctx, buf); + return SPNG_EZLIB; + } + + size_t name_len = strlen(ctx->iccp.profile_name); + + length = name_len + 2; + length += dest_len; + + if(dest_len > length) return SPNG_EOVERFLOW; + + unsigned char *cdata = NULL; + + ret = write_header(ctx, type_iccp, length, &cdata); + + if(ret) + { + spng__free(ctx, buf); + return ret; + } + + memcpy(cdata, ctx->iccp.profile_name, name_len + 1); + cdata[name_len + 1] = 0; /* compression method */ + memcpy(cdata + name_len + 2, buf, dest_len); + + spng__free(ctx, buf); + + ret = finish_chunk(ctx); + if(ret) return ret; + } + + if(ctx->stored.sbit) + { + switch(ctx->ihdr.color_type) + { + case SPNG_COLOR_TYPE_GRAYSCALE: + { + length = 1; + + data[0] = ctx->sbit.grayscale_bits; + + break; + } + case SPNG_COLOR_TYPE_TRUECOLOR: + case SPNG_COLOR_TYPE_INDEXED: + { + length = 3; + + data[0] = ctx->sbit.red_bits; + data[1] = ctx->sbit.green_bits; + data[2] = ctx->sbit.blue_bits; + + break; + } + case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA: + { + length = 2; + + data[0] = ctx->sbit.grayscale_bits; + data[1] = ctx->sbit.alpha_bits; + + break; + } + case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + length = 4; + + data[0] = ctx->sbit.red_bits; + data[1] = ctx->sbit.green_bits; + data[2] = ctx->sbit.blue_bits; + data[3] = ctx->sbit.alpha_bits; + + break; + } + default: return SPNG_EINTERNAL; + } + + ret = write_chunk(ctx, type_sbit, data, length); + if(ret) return ret; + } + + if(ctx->stored.srgb) + { + ret = write_chunk(ctx, type_srgb, &ctx->srgb_rendering_intent, 1); + if(ret) return ret; + } + + ret = write_unknown_chunks(ctx, SPNG_AFTER_IHDR); + if(ret) return ret; + + if(ctx->stored.plte) + { + for(i=0; i < ctx->plte.n_entries; i++) + { + data[i * 3 + 0] = ctx->plte.entries[i].red; + data[i * 3 + 1] = ctx->plte.entries[i].green; + data[i * 3 + 2] = ctx->plte.entries[i].blue; + } + + ret = write_chunk(ctx, type_plte, data, ctx->plte.n_entries * 3); + if(ret) return ret; + } + + if(ctx->stored.bkgd) + { + switch(ctx->ihdr.color_type) + { + case SPNG_COLOR_TYPE_GRAYSCALE: + case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA: + { + length = 2; + + write_u16(data, ctx->bkgd.gray); + + break; + } + case SPNG_COLOR_TYPE_TRUECOLOR: + case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + length = 6; + + write_u16(data, ctx->bkgd.red); + write_u16(data + 2, ctx->bkgd.green); + write_u16(data + 4, ctx->bkgd.blue); + + break; + } + case SPNG_COLOR_TYPE_INDEXED: + { + length = 1; + + data[0] = ctx->bkgd.plte_index; + + break; + } + default: return SPNG_EINTERNAL; + } + + ret = write_chunk(ctx, type_bkgd, data, length); + if(ret) return ret; + } + + if(ctx->stored.hist) + { + length = ctx->plte.n_entries * 2; + + for(i=0; i < ctx->plte.n_entries; i++) + { + write_u16(data + i * 2, ctx->hist.frequency[i]); + } + + ret = write_chunk(ctx, type_hist, data, length); + if(ret) return ret; + } + + if(ctx->stored.trns) + { + if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_GRAYSCALE) + { + write_u16(data, ctx->trns.gray); + + ret = write_chunk(ctx, type_trns, data, 2); + } + else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_TRUECOLOR) + { + write_u16(data, ctx->trns.red); + write_u16(data + 2, ctx->trns.green); + write_u16(data + 4, ctx->trns.blue); + + ret = write_chunk(ctx, type_trns, data, 6); + } + else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_INDEXED) + { + ret = write_chunk(ctx, type_trns, ctx->trns.type3_alpha, ctx->trns.n_type3_entries); + } + + if(ret) return ret; + } + + if(ctx->stored.phys) + { + write_u32(data, ctx->phys.ppu_x); + write_u32(data + 4, ctx->phys.ppu_y); + data[8] = ctx->phys.unit_specifier; + + ret = write_chunk(ctx, type_phys, data, 9); + if(ret) return ret; + } + + if(ctx->stored.splt) + { + const struct spng_splt *splt; + unsigned char *cdata = NULL; + + uint32_t k; + for(i=0; i < ctx->n_splt; i++) + { + splt = &ctx->splt_list[i]; + + size_t name_len = strlen(splt->name); + length = name_len + 1; + + if(splt->sample_depth == 8) length += splt->n_entries * 6 + 1; + else if(splt->sample_depth == 16) length += splt->n_entries * 10 + 1; + + ret = write_header(ctx, type_splt, length, &cdata); + if(ret) return ret; + + memcpy(cdata, splt->name, name_len + 1); + cdata += name_len + 2; + cdata[-1] = splt->sample_depth; + + if(splt->sample_depth == 8) + { + for(k=0; k < splt->n_entries; k++) + { + cdata[k * 6 + 0] = splt->entries[k].red; + cdata[k * 6 + 1] = splt->entries[k].green; + cdata[k * 6 + 2] = splt->entries[k].blue; + cdata[k * 6 + 3] = splt->entries[k].alpha; + write_u16(cdata + k * 6 + 4, splt->entries[k].frequency); + } + } + else if(splt->sample_depth == 16) + { + for(k=0; k < splt->n_entries; k++) + { + write_u16(cdata + k * 10 + 0, splt->entries[k].red); + write_u16(cdata + k * 10 + 2, splt->entries[k].green); + write_u16(cdata + k * 10 + 4, splt->entries[k].blue); + write_u16(cdata + k * 10 + 6, splt->entries[k].alpha); + write_u16(cdata + k * 10 + 8, splt->entries[k].frequency); + } + } + + ret = finish_chunk(ctx); + if(ret) return ret; + } + } + + if(ctx->stored.time) + { + write_u16(data, ctx->time.year); + data[2] = ctx->time.month; + data[3] = ctx->time.day; + data[4] = ctx->time.hour; + data[5] = ctx->time.minute; + data[6] = ctx->time.second; + + ret = write_chunk(ctx, type_time, data, 7); + if(ret) return ret; + } + + if(ctx->stored.text) + { + unsigned char *cdata = NULL; + const struct spng_text2 *text; + const uint8_t *text_type_array[4] = { 0, type_text, type_ztxt, type_itxt }; + + for(i=0; i < ctx->n_text; i++) + { + text = &ctx->text_list[i]; + + const uint8_t *text_chunk_type = text_type_array[text->type]; + Bytef *compressed_text = NULL; + size_t keyword_len = 0; + size_t language_tag_len = 0; + size_t translated_keyword_len = 0; + size_t compressed_length = 0; + size_t text_length = 0; + + keyword_len = strlen(text->keyword); + text_length = strlen(text->text); + + length = keyword_len + 1; + + if(text->type == SPNG_ZTXT) + { + length += 1; /* compression method */ + } + else if(text->type == SPNG_ITXT) + { + if(!text->language_tag || !text->translated_keyword) return SPNG_EINTERNAL; + + language_tag_len = strlen(text->language_tag); + translated_keyword_len = strlen(text->translated_keyword); + + length += language_tag_len; + if(length < language_tag_len) return SPNG_EOVERFLOW; + + length += translated_keyword_len; + if(length < translated_keyword_len) return SPNG_EOVERFLOW; + + length += 4; /* compression flag + method + nul for the two strings */ + if(length < 4) return SPNG_EOVERFLOW; + } + + if(text->compression_flag) + { + ret = spng__deflate_init(ctx, &ctx->text_options); + if(ret) return ret; + + z_stream *zstream = &ctx->zstream; + uLongf dest_len = deflateBound(zstream, (uLong)text_length); + + compressed_text = spng__malloc(ctx, dest_len); + + if(compressed_text == NULL) return SPNG_EMEM; + + zstream->next_in = (void*)text->text; + zstream->avail_in = (uInt)text_length; + + zstream->next_out = compressed_text; + zstream->avail_out = dest_len; + + ret = deflate(zstream, Z_FINISH); + + if(ret != Z_STREAM_END) + { + spng__free(ctx, compressed_text); + return SPNG_EZLIB; + } + + compressed_length = zstream->total_out; + + length += compressed_length; + if(length < compressed_length) return SPNG_EOVERFLOW; + } + else + { + text_length = strlen(text->text); + + length += text_length; + if(length < text_length) return SPNG_EOVERFLOW; + } + + ret = write_header(ctx, text_chunk_type, length, &cdata); + if(ret) + { + spng__free(ctx, compressed_text); + return ret; + } + + memcpy(cdata, text->keyword, keyword_len + 1); + cdata += keyword_len + 1; + + if(text->type == SPNG_ITXT) + { + cdata[0] = text->compression_flag; + cdata[1] = 0; /* compression method */ + cdata += 2; + + memcpy(cdata, text->language_tag, language_tag_len + 1); + cdata += language_tag_len + 1; + + memcpy(cdata, text->translated_keyword, translated_keyword_len + 1); + cdata += translated_keyword_len + 1; + } + else if(text->type == SPNG_ZTXT) + { + cdata[0] = 0; /* compression method */ + cdata++; + } + + if(text->compression_flag) memcpy(cdata, compressed_text, compressed_length); + else memcpy(cdata, text->text, text_length); + + spng__free(ctx, compressed_text); + + ret = finish_chunk(ctx); + if(ret) return ret; + } + } + + if(ctx->stored.offs) + { + write_s32(data, ctx->offs.x); + write_s32(data + 4, ctx->offs.y); + data[8] = ctx->offs.unit_specifier; + + ret = write_chunk(ctx, type_offs, data, 9); + if(ret) return ret; + } + + if(ctx->stored.exif) + { + ret = write_chunk(ctx, type_exif, ctx->exif.data, ctx->exif.length); + if(ret) return ret; + } + + ret = write_unknown_chunks(ctx, SPNG_AFTER_PLTE); + if(ret) return ret; + + return 0; +} + +static int write_chunks_after_idat(spng_ctx *ctx) +{ + if(ctx == NULL) return SPNG_EINTERNAL; + + int ret = write_unknown_chunks(ctx, SPNG_AFTER_IDAT); + if(ret) return ret; + + return write_iend(ctx); +} + +/* Compress and write scanline to IDAT stream */ +static int write_idat_bytes(spng_ctx *ctx, const void *scanline, size_t len, int flush) +{ + if(ctx == NULL || scanline == NULL) return SPNG_EINTERNAL; + if(len > UINT_MAX) return SPNG_EINTERNAL; + + int ret = 0; + unsigned char *data = NULL; + z_stream *zstream = &ctx->zstream; + uint32_t idat_length = SPNG_WRITE_SIZE; + + zstream->next_in = scanline; + zstream->avail_in = (uInt)len; + + do + { + ret = deflate(zstream, flush); + + if(zstream->avail_out == 0) + { + ret = finish_chunk(ctx); + if(ret) return encode_err(ctx, ret); + + ret = write_header(ctx, type_idat, idat_length, &data); + if(ret) return encode_err(ctx, ret); + + zstream->next_out = data; + zstream->avail_out = idat_length; + } + + }while(zstream->avail_in); + + if(ret != Z_OK) return SPNG_EZLIB; + + return 0; +} + +static int finish_idat(spng_ctx *ctx) +{ + int ret = 0; + unsigned char *data = NULL; + z_stream *zstream = &ctx->zstream; + uint32_t idat_length = SPNG_WRITE_SIZE; + + while(ret != Z_STREAM_END) + { + ret = deflate(zstream, Z_FINISH); + + if(ret) + { + if(ret == Z_STREAM_END) break; + + if(ret != Z_BUF_ERROR) return SPNG_EZLIB; + } + + if(zstream->avail_out == 0) + { + ret = finish_chunk(ctx); + if(ret) return encode_err(ctx, ret); + + ret = write_header(ctx, type_idat, idat_length, &data); + if(ret) return encode_err(ctx, ret); + + zstream->next_out = data; + zstream->avail_out = idat_length; + } + } + + uint32_t trimmed_length = idat_length - zstream->avail_out; + + ret = trim_chunk(ctx, trimmed_length); + if(ret) return ret; + + return finish_chunk(ctx); +} + +static int encode_scanline(spng_ctx *ctx, const void *scanline, size_t len) +{ + if(ctx == NULL || scanline == NULL) return SPNG_EINTERNAL; + + int ret, pass = ctx->row_info.pass; + uint8_t filter = 0; + struct spng_row_info *ri = &ctx->row_info; + const struct spng_subimage *sub = ctx->subimage; + struct encode_flags f = ctx->encode_flags; + unsigned char *filtered_scanline = ctx->filtered_scanline; + size_t scanline_width = sub[pass].scanline_width; + + if(len < scanline_width - 1) return SPNG_EINTERNAL; + + /* encode_row() interlaces directly to ctx->scanline */ + if(scanline != ctx->scanline) memcpy(ctx->scanline, scanline, scanline_width - 1); + + if(f.to_bigendian) u16_row_to_bigendian(ctx->scanline, scanline_width - 1); + const int requires_previous = f.filter_choice & (SPNG_FILTER_CHOICE_UP | SPNG_FILTER_CHOICE_AVG | SPNG_FILTER_CHOICE_PAETH); + + /* XXX: exclude 'requires_previous' filters by default for first scanline? */ + if(!ri->scanline_idx && requires_previous) + { + /* prev_scanline is all zeros for the first scanline */ + memset(ctx->prev_scanline, 0, scanline_width); + } + + filter = get_best_filter(ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, f.filter_choice); + + if(!filter) filtered_scanline = ctx->scanline; + + filtered_scanline[-1] = filter; + + if(filter) + { + ret = filter_scanline(filtered_scanline, ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, filter); + if(ret) return encode_err(ctx, ret); + } + + ret = write_idat_bytes(ctx, filtered_scanline - 1, scanline_width, Z_NO_FLUSH); + if(ret) return encode_err(ctx, ret); + + /* The previous scanline is always unfiltered */ + void *t = ctx->prev_scanline; + ctx->prev_scanline = ctx->scanline; + ctx->scanline = t; + + ret = update_row_info(ctx); + + if(ret == SPNG_EOI) + { + int error = finish_idat(ctx); + if(error) encode_err(ctx, error); + + if(f.finalize) + { + error = spng_encode_chunks(ctx); + if(error) return encode_err(ctx, error); + } + } + + return ret; +} + +static int encode_row(spng_ctx *ctx, const void *row, size_t len) +{ + if(ctx == NULL || row == NULL) return SPNG_EINTERNAL; + + const int pass = ctx->row_info.pass; + + if(!ctx->ihdr.interlace_method || pass == 6) return encode_scanline(ctx, row, len); + + uint32_t k; + const unsigned pixel_size = ctx->pixel_size; + const unsigned bit_depth = ctx->ihdr.bit_depth; + + if(bit_depth < 8) + { + const unsigned samples_per_byte = 8 / bit_depth; + const uint8_t mask = (1 << bit_depth) - 1; + const unsigned initial_shift = 8 - bit_depth; + unsigned shift_amount = initial_shift; + + unsigned char *scanline = ctx->scanline; + const unsigned char *row_uc = row; + uint8_t sample; + + memset(scanline, 0, ctx->subimage[pass].scanline_width); + + for(k=0; k < ctx->subimage[pass].width; k++) + { + size_t ioffset = adam7_x_start[pass] + k * adam7_x_delta[pass]; + + sample = row_uc[ioffset / samples_per_byte]; + + sample = sample >> (initial_shift - ioffset * bit_depth % 8); + sample = sample & mask; + sample = sample << shift_amount; + + scanline[0] |= sample; + + shift_amount -= bit_depth; + + if(shift_amount > 7) + { + shift_amount = initial_shift; + scanline++; + } + } + + return encode_scanline(ctx, ctx->scanline, len); + } + + for(k=0; k < ctx->subimage[pass].width; k++) + { + size_t ioffset = (adam7_x_start[pass] + (size_t) k * adam7_x_delta[pass]) * pixel_size; + + memcpy(ctx->scanline + k * pixel_size, (unsigned char*)row + ioffset, pixel_size); + } + + return encode_scanline(ctx, ctx->scanline, len); +} + +int spng_encode_scanline(spng_ctx *ctx, const void *scanline, size_t len) +{ + if(ctx == NULL || scanline == NULL) return SPNG_EINVAL; + if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI; + if(len < (ctx->subimage[ctx->row_info.pass].scanline_width -1) ) return SPNG_EBUFSIZ; + + return encode_scanline(ctx, scanline, len); +} + +int spng_encode_row(spng_ctx *ctx, const void *row, size_t len) +{ + if(ctx == NULL || row == NULL) return SPNG_EINVAL; + if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI; + if(len < ctx->image_width) return SPNG_EBUFSIZ; + + return encode_row(ctx, row, len); +} + +int spng_encode_chunks(spng_ctx *ctx) +{ + if(ctx == NULL) return 1; + if(!ctx->state) return SPNG_EBADSTATE; + if(ctx->state < SPNG_STATE_OUTPUT) return SPNG_ENODST; + if(!ctx->encode_only) return SPNG_ECTXTYPE; + + int ret = 0; + + if(ctx->state < SPNG_STATE_FIRST_IDAT) + { + if(!ctx->stored.ihdr) return SPNG_ENOIHDR; + + ret = write_chunks_before_idat(ctx); + if(ret) return encode_err(ctx, ret); + + ctx->state = SPNG_STATE_FIRST_IDAT; + } + else if(ctx->state == SPNG_STATE_FIRST_IDAT) + { + return 0; + } + else if(ctx->state == SPNG_STATE_EOI) + { + ret = write_chunks_after_idat(ctx); + if(ret) return encode_err(ctx, ret); + + ctx->state = SPNG_STATE_IEND; + } + else return SPNG_EOPSTATE; + + return 0; +} + +int spng_encode_image(spng_ctx *ctx, const void *img, size_t len, int fmt, int flags) +{ + if(ctx == NULL) return 1; + if(!ctx->state) return SPNG_EBADSTATE; + if(!ctx->encode_only) return SPNG_ECTXTYPE; + if(!ctx->stored.ihdr) return SPNG_ENOIHDR; + if( !(fmt == SPNG_FMT_PNG || fmt == SPNG_FMT_RAW) ) return SPNG_EFMT; + + int ret = 0; + const struct spng_ihdr *ihdr = &ctx->ihdr; + struct encode_flags *encode_flags = &ctx->encode_flags; + + if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED && !ctx->stored.plte) return SPNG_ENOPLTE; + + ret = calculate_image_width(ihdr, fmt, &ctx->image_width); + if(ret) return encode_err(ctx, ret); + + if(ctx->image_width > SIZE_MAX / ihdr->height) ctx->image_size = 0; /* overflow */ + else ctx->image_size = ctx->image_width * ihdr->height; + + if( !(flags & SPNG_ENCODE_PROGRESSIVE) ) + { + if(img == NULL) return 1; + if(!ctx->image_size) return SPNG_EOVERFLOW; + if(len != ctx->image_size) return SPNG_EBUFSIZ; + } + + ret = spng_encode_chunks(ctx); + if(ret) return encode_err(ctx, ret); + + ret = calculate_subimages(ctx); + if(ret) return encode_err(ctx, ret); + + if(ihdr->bit_depth < 8) ctx->bytes_per_pixel = 1; + else ctx->bytes_per_pixel = num_channels(ihdr) * (ihdr->bit_depth / 8); + + if(spng__optimize(SPNG_FILTER_CHOICE)) + { + /* Filtering would make no difference */ + if(!ctx->image_options.compression_level) + { + encode_flags->filter_choice = SPNG_DISABLE_FILTERING; + } + + /* Palette indices and low bit-depth images do not benefit from filtering */ + if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED || ihdr->bit_depth < 8) + { + encode_flags->filter_choice = SPNG_DISABLE_FILTERING; + } + } + + /* This is technically the same as disabling filtering */ + if(encode_flags->filter_choice == SPNG_FILTER_CHOICE_NONE) + { + encode_flags->filter_choice = SPNG_DISABLE_FILTERING; + } + + if(!encode_flags->filter_choice && spng__optimize(SPNG_IMG_COMPRESSION_STRATEGY)) + { + ctx->image_options.strategy = Z_DEFAULT_STRATEGY; + } + + ret = spng__deflate_init(ctx, &ctx->image_options); + if(ret) return encode_err(ctx, ret); + + size_t scanline_buf_size = ctx->subimage[ctx->widest_pass].scanline_width; + + scanline_buf_size += 32; + + if(scanline_buf_size < 32) return SPNG_EOVERFLOW; + + ctx->scanline_buf = spng__malloc(ctx, scanline_buf_size); + ctx->prev_scanline_buf = spng__malloc(ctx, scanline_buf_size); + + if(ctx->scanline_buf == NULL || ctx->prev_scanline_buf == NULL) return encode_err(ctx, SPNG_EMEM); + + /* Maintain alignment for pixels, filter at [-1] */ + ctx->scanline = ctx->scanline_buf + 16; + ctx->prev_scanline = ctx->prev_scanline_buf + 16; + + if(encode_flags->filter_choice) + { + ctx->filtered_scanline_buf = spng__malloc(ctx, scanline_buf_size); + if(ctx->filtered_scanline_buf == NULL) return encode_err(ctx, SPNG_EMEM); + + ctx->filtered_scanline = ctx->filtered_scanline_buf + 16; + } + + struct spng_subimage *sub = ctx->subimage; + struct spng_row_info *ri = &ctx->row_info; + + ctx->fmt = fmt; + + z_stream *zstream = &ctx->zstream; + zstream->avail_out = SPNG_WRITE_SIZE; + + ret = write_header(ctx, type_idat, zstream->avail_out, &zstream->next_out); + if(ret) return encode_err(ctx, ret); + + if(ihdr->interlace_method) encode_flags->interlace = 1; + + if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW) ) encode_flags->same_layout = 1; + + if(ihdr->bit_depth == 16 && fmt != SPNG_FMT_RAW) encode_flags->to_bigendian = 1; + + if(flags & SPNG_ENCODE_FINALIZE) encode_flags->finalize = 1; + + while(!sub[ri->pass].width || !sub[ri->pass].height) ri->pass++; + + if(encode_flags->interlace) ri->row_num = adam7_y_start[ri->pass]; + + ctx->pixel_size = 4; /* SPNG_FMT_RGBA8 */ + + if(fmt == SPNG_FMT_RGBA16) ctx->pixel_size = 8; + else if(fmt == SPNG_FMT_RGB8) ctx->pixel_size = 3; + else if(fmt == SPNG_FMT_G8) ctx->pixel_size = 1; + else if(fmt == SPNG_FMT_GA8) ctx->pixel_size = 2; + else if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) ctx->pixel_size = ctx->bytes_per_pixel; + + ctx->state = SPNG_STATE_ENCODE_INIT; + + if(flags & SPNG_ENCODE_PROGRESSIVE) + { + encode_flags->progressive = 1; + + return 0; + } + + do + { + size_t ioffset = ri->row_num * ctx->image_width; + + ret = encode_row(ctx, (unsigned char*)img + ioffset, ctx->image_width); + + }while(!ret); + + if(ret != SPNG_EOI) return encode_err(ctx, ret); + + return 0; +} + +spng_ctx *spng_ctx_new(int flags) +{ + struct spng_alloc alloc = + { + .malloc_fn = malloc, + .realloc_fn = realloc, + .calloc_fn = calloc, + .free_fn = free + }; + + return spng_ctx_new2(&alloc, flags); +} + +spng_ctx *spng_ctx_new2(struct spng_alloc *alloc, int flags) +{ + if(alloc == NULL) return NULL; + if(flags != (flags & SPNG__CTX_FLAGS_ALL)) return NULL; + + if(alloc->malloc_fn == NULL) return NULL; + if(alloc->realloc_fn == NULL) return NULL; + if(alloc->calloc_fn == NULL) return NULL; + if(alloc->free_fn == NULL) return NULL; + + spng_ctx *ctx = alloc->calloc_fn(1, sizeof(spng_ctx)); + if(ctx == NULL) return NULL; + + ctx->alloc = *alloc; + + ctx->max_width = spng_u32max; + ctx->max_height = spng_u32max; + + ctx->max_chunk_size = spng_u32max; + ctx->chunk_cache_limit = SIZE_MAX; + ctx->chunk_count_limit = SPNG_MAX_CHUNK_COUNT; + + ctx->state = SPNG_STATE_INIT; + + ctx->crc_action_critical = SPNG_CRC_ERROR; + ctx->crc_action_ancillary = SPNG_CRC_DISCARD; + + const struct spng__zlib_options image_defaults = + { + .compression_level = Z_DEFAULT_COMPRESSION, + .window_bits = 15, + .mem_level = 8, + .strategy = Z_FILTERED, + .data_type = 0 /* Z_BINARY */ + }; + + const struct spng__zlib_options text_defaults = + { + .compression_level = Z_DEFAULT_COMPRESSION, + .window_bits = 15, + .mem_level = 8, + .strategy = Z_DEFAULT_STRATEGY, + .data_type = 1 /* Z_TEXT */ + }; + + ctx->image_options = image_defaults; + ctx->text_options = text_defaults; + + ctx->optimize_option = ~0; + ctx->encode_flags.filter_choice = SPNG_FILTER_CHOICE_ALL; + + ctx->flags = flags; + + if(flags & SPNG_CTX_ENCODER) ctx->encode_only = 1; + + return ctx; +} + +void spng_ctx_free(spng_ctx *ctx) +{ + if(ctx == NULL) return; + + if(ctx->streaming && ctx->stream_buf != NULL) spng__free(ctx, ctx->stream_buf); + + if(!ctx->user.exif) spng__free(ctx, ctx->exif.data); + + if(!ctx->user.iccp) spng__free(ctx, ctx->iccp.profile); + + uint32_t i; + + if(ctx->splt_list != NULL && !ctx->user.splt) + { + for(i=0; i < ctx->n_splt; i++) + { + spng__free(ctx, ctx->splt_list[i].entries); + } + spng__free(ctx, ctx->splt_list); + } + + if(ctx->text_list != NULL) + { + for(i=0; i< ctx->n_text; i++) + { + if(ctx->user.text) break; + + spng__free(ctx, ctx->text_list[i].keyword); + if(ctx->text_list[i].compression_flag) spng__free(ctx, ctx->text_list[i].text); + } + spng__free(ctx, ctx->text_list); + } + + if(ctx->chunk_list != NULL && !ctx->user.unknown) + { + for(i=0; i< ctx->n_chunks; i++) + { + spng__free(ctx, ctx->chunk_list[i].data); + } + spng__free(ctx, ctx->chunk_list); + } + + if(ctx->deflate) deflateEnd(&ctx->zstream); + else inflateEnd(&ctx->zstream); + + if(!ctx->user_owns_out_png) spng__free(ctx, ctx->out_png); + + spng__free(ctx, ctx->gamma_lut16); + + spng__free(ctx, ctx->row_buf); + spng__free(ctx, ctx->scanline_buf); + spng__free(ctx, ctx->prev_scanline_buf); + spng__free(ctx, ctx->filtered_scanline_buf); + + spng_free_fn *free_fn = ctx->alloc.free_fn; + + memset(ctx, 0, sizeof(spng_ctx)); + + free_fn(ctx); +} + +static int buffer_read_fn(spng_ctx *ctx, void *user, void *data, size_t n) +{ + if(n > ctx->bytes_left) return SPNG_IO_EOF; + + (void)user; + (void)data; + ctx->data = ctx->data + ctx->last_read_size; + + ctx->last_read_size = n; + ctx->bytes_left -= n; + + return 0; +} + +static int file_read_fn(spng_ctx *ctx, void *user, void *data, size_t n) +{ + FILE *file = user; + (void)ctx; + + if(fread(data, n, 1, file) != 1) + { + if(feof(file)) return SPNG_IO_EOF; + else return SPNG_IO_ERROR; + } + + return 0; +} + +static int file_write_fn(spng_ctx *ctx, void *user, void *data, size_t n) +{ + FILE *file = user; + (void)ctx; + + if(fwrite(data, n, 1, file) != 1) return SPNG_IO_ERROR; + + return 0; +} + +int spng_set_png_buffer(spng_ctx *ctx, const void *buf, size_t size) +{ + if(ctx == NULL || buf == NULL) return 1; + if(!ctx->state) return SPNG_EBADSTATE; + if(ctx->encode_only) return SPNG_ECTXTYPE; /* not supported */ + + if(ctx->data != NULL) return SPNG_EBUF_SET; + + ctx->data = buf; + ctx->png_base = buf; + ctx->data_size = size; + ctx->bytes_left = size; + + ctx->read_fn = buffer_read_fn; + + ctx->state = SPNG_STATE_INPUT; + + return 0; +} + +int spng_set_png_stream(spng_ctx *ctx, spng_rw_fn *rw_func, void *user) +{ + if(ctx == NULL || rw_func == NULL) return 1; + if(!ctx->state) return SPNG_EBADSTATE; + + /* SPNG_STATE_OUTPUT shares the same value */ + if(ctx->state >= SPNG_STATE_INPUT) return SPNG_EBUF_SET; + + if(ctx->encode_only) + { + if(ctx->out_png != NULL) return SPNG_EBUF_SET; + + ctx->write_fn = rw_func; + ctx->write_ptr = ctx->stream_buf; + + ctx->state = SPNG_STATE_OUTPUT; + } + else + { + ctx->stream_buf = spng__malloc(ctx, SPNG_READ_SIZE); + if(ctx->stream_buf == NULL) return SPNG_EMEM; + + ctx->read_fn = rw_func; + ctx->data = ctx->stream_buf; + ctx->data_size = SPNG_READ_SIZE; + + ctx->state = SPNG_STATE_INPUT; + } + + ctx->stream_user_ptr = user; + + ctx->streaming = 1; + + return 0; +} + +int spng_set_png_file(spng_ctx *ctx, FILE *file) +{ + if(file == NULL) return 1; + + if(ctx->encode_only) return spng_set_png_stream(ctx, file_write_fn, file); + + return spng_set_png_stream(ctx, file_read_fn, file); +} + +void *spng_get_png_buffer(spng_ctx *ctx, size_t *len, int *error) +{ + int tmp = 0; + error = error ? error : &tmp; + *error = 0; + + if(ctx == NULL || !len) *error = SPNG_EINVAL; + + if(*error) return NULL; + + if(!ctx->encode_only) *error = SPNG_ECTXTYPE; + else if(!ctx->state) *error = SPNG_EBADSTATE; + else if(!ctx->internal_buffer) *error = SPNG_EOPSTATE; + else if(ctx->state < SPNG_STATE_EOI) *error = SPNG_EOPSTATE; + else if(ctx->state != SPNG_STATE_IEND) *error = SPNG_ENOTFINAL; + + if(*error) return NULL; + + ctx->user_owns_out_png = 1; + + *len = ctx->bytes_encoded; + + return ctx->out_png; +} + +int spng_set_image_limits(spng_ctx *ctx, uint32_t width, uint32_t height) +{ + if(ctx == NULL) return 1; + + if(width > spng_u32max || height > spng_u32max) return 1; + + ctx->max_width = width; + ctx->max_height = height; + + return 0; +} + +int spng_get_image_limits(spng_ctx *ctx, uint32_t *width, uint32_t *height) +{ + if(ctx == NULL || width == NULL || height == NULL) return 1; + + *width = ctx->max_width; + *height = ctx->max_height; + + return 0; +} + +int spng_set_chunk_limits(spng_ctx *ctx, size_t chunk_size, size_t cache_limit) +{ + if(ctx == NULL || chunk_size > spng_u32max || chunk_size > cache_limit) return 1; + + ctx->max_chunk_size = chunk_size; + + ctx->chunk_cache_limit = cache_limit; + + return 0; +} + +int spng_get_chunk_limits(spng_ctx *ctx, size_t *chunk_size, size_t *cache_limit) +{ + if(ctx == NULL || chunk_size == NULL || cache_limit == NULL) return 1; + + *chunk_size = ctx->max_chunk_size; + + *cache_limit = ctx->chunk_cache_limit; + + return 0; +} + +int spng_set_crc_action(spng_ctx *ctx, int critical, int ancillary) +{ + if(ctx == NULL) return 1; + if(ctx->encode_only) return SPNG_ECTXTYPE; + + if(critical > 2 || critical < 0) return 1; + if(ancillary > 2 || ancillary < 0) return 1; + + if(critical == SPNG_CRC_DISCARD) return 1; + + ctx->crc_action_critical = critical; + ctx->crc_action_ancillary = ancillary; + + return 0; +} + +int spng_set_option(spng_ctx *ctx, enum spng_option option, int value) +{ + if(ctx == NULL) return 1; + if(!ctx->state) return SPNG_EBADSTATE; + + switch(option) + { + case SPNG_KEEP_UNKNOWN_CHUNKS: + { + ctx->keep_unknown = value ? 1 : 0; + break; + } + case SPNG_IMG_COMPRESSION_LEVEL: + { + ctx->image_options.compression_level = value; + break; + } + case SPNG_IMG_WINDOW_BITS: + { + ctx->image_options.window_bits = value; + break; + } + case SPNG_IMG_MEM_LEVEL: + { + ctx->image_options.mem_level = value; + break; + } + case SPNG_IMG_COMPRESSION_STRATEGY: + { + ctx->image_options.strategy = value; + break; + } + case SPNG_TEXT_COMPRESSION_LEVEL: + { + ctx->text_options.compression_level = value; + break; + } + case SPNG_TEXT_WINDOW_BITS: + { + ctx->text_options.window_bits = value; + break; + } + case SPNG_TEXT_MEM_LEVEL: + { + ctx->text_options.mem_level = value; + break; + } + case SPNG_TEXT_COMPRESSION_STRATEGY: + { + ctx->text_options.strategy = value; + break; + } + case SPNG_FILTER_CHOICE: + { + if(value & ~SPNG_FILTER_CHOICE_ALL) return 1; + ctx->encode_flags.filter_choice = value; + break; + } + case SPNG_CHUNK_COUNT_LIMIT: + { + if(value < 0) return 1; + if(value > (int)ctx->chunk_count_total) return 1; + ctx->chunk_count_limit = value; + break; + } + case SPNG_ENCODE_TO_BUFFER: + { + if(value < 0) return 1; + if(!ctx->encode_only) return SPNG_ECTXTYPE; + if(ctx->state >= SPNG_STATE_OUTPUT) return SPNG_EOPSTATE; + + if(!value) break; + + ctx->internal_buffer = 1; + ctx->state = SPNG_STATE_OUTPUT; + + break; + } + default: return 1; + } + + /* Option can no longer be overriden by the library */ + if(option < 32) ctx->optimize_option &= ~(1 << option); + + return 0; +} + +int spng_get_option(spng_ctx *ctx, enum spng_option option, int *value) +{ + if(ctx == NULL || value == NULL) return 1; + if(!ctx->state) return SPNG_EBADSTATE; + + switch(option) + { + case SPNG_KEEP_UNKNOWN_CHUNKS: + { + *value = ctx->keep_unknown; + break; + } + case SPNG_IMG_COMPRESSION_LEVEL: + { + *value = ctx->image_options.compression_level; + break; + } + case SPNG_IMG_WINDOW_BITS: + { + *value = ctx->image_options.window_bits; + break; + } + case SPNG_IMG_MEM_LEVEL: + { + *value = ctx->image_options.mem_level; + break; + } + case SPNG_IMG_COMPRESSION_STRATEGY: + { + *value = ctx->image_options.strategy; + break; + } + case SPNG_TEXT_COMPRESSION_LEVEL: + { + *value = ctx->text_options.compression_level; + break; + } + case SPNG_TEXT_WINDOW_BITS: + { + *value = ctx->text_options.window_bits; + break; + } + case SPNG_TEXT_MEM_LEVEL: + { + *value = ctx->text_options.mem_level; + break; + } + case SPNG_TEXT_COMPRESSION_STRATEGY: + { + *value = ctx->text_options.strategy; + break; + } + case SPNG_FILTER_CHOICE: + { + *value = ctx->encode_flags.filter_choice; + break; + } + case SPNG_CHUNK_COUNT_LIMIT: + { + *value = ctx->chunk_count_limit; + break; + } + case SPNG_ENCODE_TO_BUFFER: + { + if(ctx->internal_buffer) *value = 1; + else *value = 0; + + break; + } + default: return 1; + } + + return 0; +} + +int spng_decoded_image_size(spng_ctx *ctx, int fmt, size_t *len) +{ + if(ctx == NULL || len == NULL) return 1; + + int ret = read_chunks(ctx, 1); + if(ret) return ret; + + ret = check_decode_fmt(&ctx->ihdr, fmt); + if(ret) return ret; + + return calculate_image_size(&ctx->ihdr, fmt, len); +} + +int spng_get_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr) +{ + if(ctx == NULL) return 1; + int ret = read_chunks(ctx, 1); + if(ret) return ret; + if(ihdr == NULL) return 1; + + *ihdr = ctx->ihdr; + + return 0; +} + +int spng_get_plte(spng_ctx *ctx, struct spng_plte *plte) +{ + SPNG_GET_CHUNK_BOILERPLATE(plte); + + *plte = ctx->plte; + + return 0; +} + +int spng_get_trns(spng_ctx *ctx, struct spng_trns *trns) +{ + SPNG_GET_CHUNK_BOILERPLATE(trns); + + *trns = ctx->trns; + + return 0; +} + +int spng_get_chrm(spng_ctx *ctx, struct spng_chrm *chrm) +{ + SPNG_GET_CHUNK_BOILERPLATE(chrm); + + chrm->white_point_x = (double)ctx->chrm_int.white_point_x / 100000.0; + chrm->white_point_y = (double)ctx->chrm_int.white_point_y / 100000.0; + chrm->red_x = (double)ctx->chrm_int.red_x / 100000.0; + chrm->red_y = (double)ctx->chrm_int.red_y / 100000.0; + chrm->blue_y = (double)ctx->chrm_int.blue_y / 100000.0; + chrm->blue_x = (double)ctx->chrm_int.blue_x / 100000.0; + chrm->green_x = (double)ctx->chrm_int.green_x / 100000.0; + chrm->green_y = (double)ctx->chrm_int.green_y / 100000.0; + + return 0; +} + +int spng_get_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm) +{ + SPNG_GET_CHUNK_BOILERPLATE(chrm); + + *chrm = ctx->chrm_int; + + return 0; +} + +int spng_get_gama(spng_ctx *ctx, double *gamma) +{ + double *gama = gamma; + SPNG_GET_CHUNK_BOILERPLATE(gama); + + *gama = (double)ctx->gama / 100000.0; + + return 0; +} + +int spng_get_gama_int(spng_ctx *ctx, uint32_t *gama_int) +{ + uint32_t *gama = gama_int; + SPNG_GET_CHUNK_BOILERPLATE(gama); + + *gama_int = ctx->gama; + + return 0; +} + +int spng_get_iccp(spng_ctx *ctx, struct spng_iccp *iccp) +{ + SPNG_GET_CHUNK_BOILERPLATE(iccp); + + *iccp = ctx->iccp; + + return 0; +} + +int spng_get_sbit(spng_ctx *ctx, struct spng_sbit *sbit) +{ + SPNG_GET_CHUNK_BOILERPLATE(sbit); + + *sbit = ctx->sbit; + + return 0; +} + +int spng_get_srgb(spng_ctx *ctx, uint8_t *rendering_intent) +{ + uint8_t *srgb = rendering_intent; + SPNG_GET_CHUNK_BOILERPLATE(srgb); + + *srgb = ctx->srgb_rendering_intent; + + return 0; +} + +int spng_get_text(spng_ctx *ctx, struct spng_text *text, uint32_t *n_text) +{ + if(ctx == NULL) return 1; + int ret = read_chunks(ctx, 0); + if(ret) return ret; + if(!ctx->stored.text) return SPNG_ECHUNKAVAIL; + if(n_text == NULL) return 1; + + if(text == NULL) + { + *n_text = ctx->n_text; + return 0; + } + + if(*n_text < ctx->n_text) return 1; + + uint32_t i; + for(i=0; i< ctx->n_text; i++) + { + text[i].type = ctx->text_list[i].type; + memcpy(&text[i].keyword, ctx->text_list[i].keyword, strlen(ctx->text_list[i].keyword) + 1); + text[i].compression_method = 0; + text[i].compression_flag = ctx->text_list[i].compression_flag; + text[i].language_tag = ctx->text_list[i].language_tag; + text[i].translated_keyword = ctx->text_list[i].translated_keyword; + text[i].length = ctx->text_list[i].text_length; + text[i].text = ctx->text_list[i].text; + } + + return ret; +} + +int spng_get_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd) +{ + SPNG_GET_CHUNK_BOILERPLATE(bkgd); + + *bkgd = ctx->bkgd; + + return 0; +} + +int spng_get_hist(spng_ctx *ctx, struct spng_hist *hist) +{ + SPNG_GET_CHUNK_BOILERPLATE(hist); + + *hist = ctx->hist; + + return 0; +} + +int spng_get_phys(spng_ctx *ctx, struct spng_phys *phys) +{ + SPNG_GET_CHUNK_BOILERPLATE(phys); + + *phys = ctx->phys; + + return 0; +} + +int spng_get_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t *n_splt) +{ + if(ctx == NULL) return 1; + int ret = read_chunks(ctx, 0); + if(ret) return ret; + if(!ctx->stored.splt) return SPNG_ECHUNKAVAIL; + if(n_splt == NULL) return 1; + + if(splt == NULL) + { + *n_splt = ctx->n_splt; + return 0; + } + + if(*n_splt < ctx->n_splt) return 1; + + memcpy(splt, ctx->splt_list, ctx->n_splt * sizeof(struct spng_splt)); + + return 0; +} + +int spng_get_time(spng_ctx *ctx, struct spng_time *time) +{ + SPNG_GET_CHUNK_BOILERPLATE(time); + + *time = ctx->time; + + return 0; +} + +int spng_get_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t *n_chunks) +{ + if(ctx == NULL) return 1; + int ret = read_chunks(ctx, 0); + if(ret) return ret; + if(!ctx->stored.unknown) return SPNG_ECHUNKAVAIL; + if(n_chunks == NULL) return 1; + + if(chunks == NULL) + { + *n_chunks = ctx->n_chunks; + return 0; + } + + if(*n_chunks < ctx->n_chunks) return 1; + + memcpy(chunks, ctx->chunk_list, sizeof(struct spng_unknown_chunk)); + + return 0; +} + +int spng_get_offs(spng_ctx *ctx, struct spng_offs *offs) +{ + SPNG_GET_CHUNK_BOILERPLATE(offs); + + *offs = ctx->offs; + + return 0; +} + +int spng_get_exif(spng_ctx *ctx, struct spng_exif *exif) +{ + SPNG_GET_CHUNK_BOILERPLATE(exif); + + *exif = ctx->exif; + + return 0; +} + +int spng_set_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr) +{ + SPNG_SET_CHUNK_BOILERPLATE(ihdr); + + if(ctx->stored.ihdr) return 1; + + ret = check_ihdr(ihdr, ctx->max_width, ctx->max_height); + if(ret) return ret; + + ctx->ihdr = *ihdr; + + ctx->stored.ihdr = 1; + ctx->user.ihdr = 1; + + return 0; +} + +int spng_set_plte(spng_ctx *ctx, struct spng_plte *plte) +{ + SPNG_SET_CHUNK_BOILERPLATE(plte); + + if(!ctx->stored.ihdr) return 1; + + if(check_plte(plte, &ctx->ihdr)) return 1; + + ctx->plte.n_entries = plte->n_entries; + + memcpy(ctx->plte.entries, plte->entries, plte->n_entries * sizeof(struct spng_plte_entry)); + + ctx->stored.plte = 1; + ctx->user.plte = 1; + + return 0; +} + +int spng_set_trns(spng_ctx *ctx, struct spng_trns *trns) +{ + SPNG_SET_CHUNK_BOILERPLATE(trns); + + if(!ctx->stored.ihdr) return SPNG_ENOIHDR; + + if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_GRAYSCALE) + { + ctx->trns.gray = trns->gray; + } + else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_TRUECOLOR) + { + ctx->trns.red = trns->red; + ctx->trns.green = trns->green; + ctx->trns.blue = trns->blue; + } + else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_INDEXED) + { + if(!ctx->stored.plte) return SPNG_ETRNS_NO_PLTE; + if(trns->n_type3_entries > ctx->plte.n_entries) return 1; + + ctx->trns.n_type3_entries = trns->n_type3_entries; + memcpy(ctx->trns.type3_alpha, trns->type3_alpha, trns->n_type3_entries); + } + else return SPNG_ETRNS_COLOR_TYPE; + + ctx->stored.trns = 1; + ctx->user.trns = 1; + + return 0; +} + +int spng_set_chrm(spng_ctx *ctx, struct spng_chrm *chrm) +{ + SPNG_SET_CHUNK_BOILERPLATE(chrm); + + struct spng_chrm_int chrm_int; + + chrm_int.white_point_x = (uint32_t)(chrm->white_point_x * 100000.0); + chrm_int.white_point_y = (uint32_t)(chrm->white_point_y * 100000.0); + chrm_int.red_x = (uint32_t)(chrm->red_x * 100000.0); + chrm_int.red_y = (uint32_t)(chrm->red_y * 100000.0); + chrm_int.green_x = (uint32_t)(chrm->green_x * 100000.0); + chrm_int.green_y = (uint32_t)(chrm->green_y * 100000.0); + chrm_int.blue_x = (uint32_t)(chrm->blue_x * 100000.0); + chrm_int.blue_y = (uint32_t)(chrm->blue_y * 100000.0); + + if(check_chrm_int(&chrm_int)) return SPNG_ECHRM; + + ctx->chrm_int = chrm_int; + + ctx->stored.chrm = 1; + ctx->user.chrm = 1; + + return 0; +} + +int spng_set_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int) +{ + SPNG_SET_CHUNK_BOILERPLATE(chrm_int); + + if(check_chrm_int(chrm_int)) return SPNG_ECHRM; + + ctx->chrm_int = *chrm_int; + + ctx->stored.chrm = 1; + ctx->user.chrm = 1; + + return 0; +} + +int spng_set_gama(spng_ctx *ctx, double gamma) +{ + SPNG_SET_CHUNK_BOILERPLATE(ctx); + + uint32_t gama = gamma * 100000.0; + + if(!gama) return 1; + if(gama > spng_u32max) return 1; + + ctx->gama = gama; + + ctx->stored.gama = 1; + ctx->user.gama = 1; + + return 0; +} + +int spng_set_gama_int(spng_ctx *ctx, uint32_t gamma) +{ + SPNG_SET_CHUNK_BOILERPLATE(ctx); + + if(!gamma) return 1; + if(gamma > spng_u32max) return 1; + + ctx->gama = gamma; + + ctx->stored.gama = 1; + ctx->user.gama = 1; + + return 0; +} + +int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp) +{ + SPNG_SET_CHUNK_BOILERPLATE(iccp); + + if(check_png_keyword(iccp->profile_name)) return SPNG_EICCP_NAME; + if(!iccp->profile_len) return SPNG_ECHUNK_SIZE; + if(iccp->profile_len > spng_u32max) return SPNG_ECHUNK_STDLEN; + + if(ctx->iccp.profile && !ctx->user.iccp) spng__free(ctx, ctx->iccp.profile); + + ctx->iccp = *iccp; + + ctx->stored.iccp = 1; + ctx->user.iccp = 1; + + return 0; +} + +int spng_set_sbit(spng_ctx *ctx, struct spng_sbit *sbit) +{ + SPNG_SET_CHUNK_BOILERPLATE(sbit); + + if(check_sbit(sbit, &ctx->ihdr)) return 1; + + if(!ctx->stored.ihdr) return 1; + + ctx->sbit = *sbit; + + ctx->stored.sbit = 1; + ctx->user.sbit = 1; + + return 0; +} + +int spng_set_srgb(spng_ctx *ctx, uint8_t rendering_intent) +{ + SPNG_SET_CHUNK_BOILERPLATE(ctx); + + if(rendering_intent > 3) return 1; + + ctx->srgb_rendering_intent = rendering_intent; + + ctx->stored.srgb = 1; + ctx->user.srgb = 1; + + return 0; +} + +int spng_set_text(spng_ctx *ctx, struct spng_text *text, uint32_t n_text) +{ + if(!n_text) return 1; + SPNG_SET_CHUNK_BOILERPLATE(text); + + uint32_t i; + for(i=0; i < n_text; i++) + { + if(check_png_keyword(text[i].keyword)) return SPNG_ETEXT_KEYWORD; + if(!text[i].length) return 1; + if(text[i].length > UINT_MAX) return 1; + if(text[i].text == NULL) return 1; + + if(text[i].type == SPNG_TEXT) + { + if(ctx->strict && check_png_text(text[i].text, text[i].length)) return 1; + } + else if(text[i].type == SPNG_ZTXT) + { + if(ctx->strict && check_png_text(text[i].text, text[i].length)) return 1; + + if(text[i].compression_method != 0) return SPNG_EZTXT_COMPRESSION_METHOD; + } + else if(text[i].type == SPNG_ITXT) + { + if(text[i].compression_flag > 1) return SPNG_EITXT_COMPRESSION_FLAG; + if(text[i].compression_method != 0) return SPNG_EITXT_COMPRESSION_METHOD; + if(text[i].language_tag == NULL) return SPNG_EITXT_LANG_TAG; + if(text[i].translated_keyword == NULL) return SPNG_EITXT_TRANSLATED_KEY; + } + else return 1; + + } + + struct spng_text2 *text_list = spng__calloc(ctx, sizeof(struct spng_text2), n_text); + + if(!text_list) return SPNG_EMEM; + + if(ctx->text_list != NULL) + { + for(i=0; i < ctx->n_text; i++) + { + if(ctx->user.text) break; + + spng__free(ctx, ctx->text_list[i].keyword); + if(ctx->text_list[i].compression_flag) spng__free(ctx, ctx->text_list[i].text); + } + spng__free(ctx, ctx->text_list); + } + + for(i=0; i < n_text; i++) + { + text_list[i].type = text[i].type; + /* Prevent issues with spng_text.keyword[80] going out of scope */ + text_list[i].keyword = text_list[i].user_keyword_storage; + memcpy(text_list[i].user_keyword_storage, text[i].keyword, strlen(text[i].keyword)); + text_list[i].text = text[i].text; + text_list[i].text_length = text[i].length; + + if(text[i].type == SPNG_ZTXT) + { + text_list[i].compression_flag = 1; + } + else if(text[i].type == SPNG_ITXT) + { + text_list[i].compression_flag = text[i].compression_flag; + text_list[i].language_tag = text[i].language_tag; + text_list[i].translated_keyword = text[i].translated_keyword; + } + } + + ctx->text_list = text_list; + ctx->n_text = n_text; + + ctx->stored.text = 1; + ctx->user.text = 1; + + return 0; +} + +int spng_set_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd) +{ + SPNG_SET_CHUNK_BOILERPLATE(bkgd); + + if(!ctx->stored.ihdr) return 1; + + if(ctx->ihdr.color_type == 0 || ctx->ihdr.color_type == 4) + { + ctx->bkgd.gray = bkgd->gray; + } + else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 6) + { + ctx->bkgd.red = bkgd->red; + ctx->bkgd.green = bkgd->green; + ctx->bkgd.blue = bkgd->blue; + } + else if(ctx->ihdr.color_type == 3) + { + if(!ctx->stored.plte) return SPNG_EBKGD_NO_PLTE; + if(bkgd->plte_index >= ctx->plte.n_entries) return SPNG_EBKGD_PLTE_IDX; + + ctx->bkgd.plte_index = bkgd->plte_index; + } + + ctx->stored.bkgd = 1; + ctx->user.bkgd = 1; + + return 0; +} + +int spng_set_hist(spng_ctx *ctx, struct spng_hist *hist) +{ + SPNG_SET_CHUNK_BOILERPLATE(hist); + + if(!ctx->stored.plte) return SPNG_EHIST_NO_PLTE; + + ctx->hist = *hist; + + ctx->stored.hist = 1; + ctx->user.hist = 1; + + return 0; +} + +int spng_set_phys(spng_ctx *ctx, struct spng_phys *phys) +{ + SPNG_SET_CHUNK_BOILERPLATE(phys); + + if(check_phys(phys)) return SPNG_EPHYS; + + ctx->phys = *phys; + + ctx->stored.phys = 1; + ctx->user.phys = 1; + + return 0; +} + +int spng_set_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t n_splt) +{ + if(!n_splt) return 1; + SPNG_SET_CHUNK_BOILERPLATE(splt); + + uint32_t i; + for(i=0; i < n_splt; i++) + { + if(check_png_keyword(splt[i].name)) return SPNG_ESPLT_NAME; + if( !(splt[i].sample_depth == 8 || splt[i].sample_depth == 16) ) return SPNG_ESPLT_DEPTH; + } + + if(ctx->stored.splt && !ctx->user.splt) + { + for(i=0; i < ctx->n_splt; i++) + { + if(ctx->splt_list[i].entries != NULL) spng__free(ctx, ctx->splt_list[i].entries); + } + spng__free(ctx, ctx->splt_list); + } + + ctx->splt_list = splt; + ctx->n_splt = n_splt; + + ctx->stored.splt = 1; + ctx->user.splt = 1; + + return 0; +} + +int spng_set_time(spng_ctx *ctx, struct spng_time *time) +{ + SPNG_SET_CHUNK_BOILERPLATE(time); + + if(check_time(time)) return SPNG_ETIME; + + ctx->time = *time; + + ctx->stored.time = 1; + ctx->user.time = 1; + + return 0; +} + +int spng_set_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t n_chunks) +{ + if(!n_chunks) return 1; + SPNG_SET_CHUNK_BOILERPLATE(chunks); + + uint32_t i; + for(i=0; i < n_chunks; i++) + { + if(chunks[i].length > spng_u32max) return SPNG_ECHUNK_STDLEN; + if(chunks[i].length && chunks[i].data == NULL) return 1; + + switch(chunks[i].location) + { + case SPNG_AFTER_IHDR: + case SPNG_AFTER_PLTE: + case SPNG_AFTER_IDAT: + break; + default: return SPNG_ECHUNK_POS; + } + } + + if(ctx->stored.unknown && !ctx->user.unknown) + { + for(i=0; i < ctx->n_chunks; i++) + { + spng__free(ctx, ctx->chunk_list[i].data); + } + spng__free(ctx, ctx->chunk_list); + } + + ctx->chunk_list = chunks; + ctx->n_chunks = n_chunks; + + ctx->stored.unknown = 1; + ctx->user.unknown = 1; + + return 0; +} + +int spng_set_offs(spng_ctx *ctx, struct spng_offs *offs) +{ + SPNG_SET_CHUNK_BOILERPLATE(offs); + + if(check_offs(offs)) return SPNG_EOFFS; + + ctx->offs = *offs; + + ctx->stored.offs = 1; + ctx->user.offs = 1; + + return 0; +} + +int spng_set_exif(spng_ctx *ctx, struct spng_exif *exif) +{ + SPNG_SET_CHUNK_BOILERPLATE(exif); + + if(check_exif(exif)) return SPNG_EEXIF; + + if(ctx->exif.data != NULL && !ctx->user.exif) spng__free(ctx, ctx->exif.data); + + ctx->exif = *exif; + + ctx->stored.exif = 1; + ctx->user.exif = 1; + + return 0; +} + +const char *spng_strerror(int err) +{ + switch(err) + { + case SPNG_IO_EOF: return "end of stream"; + case SPNG_IO_ERROR: return "stream error"; + case SPNG_OK: return "success"; + case SPNG_EINVAL: return "invalid argument"; + case SPNG_EMEM: return "out of memory"; + case SPNG_EOVERFLOW: return "arithmetic overflow"; + case SPNG_ESIGNATURE: return "invalid signature"; + case SPNG_EWIDTH: return "invalid image width"; + case SPNG_EHEIGHT: return "invalid image height"; + case SPNG_EUSER_WIDTH: return "image width exceeds user limit"; + case SPNG_EUSER_HEIGHT: return "image height exceeds user limit"; + case SPNG_EBIT_DEPTH: return "invalid bit depth"; + case SPNG_ECOLOR_TYPE: return "invalid color type"; + case SPNG_ECOMPRESSION_METHOD: return "invalid compression method"; + case SPNG_EFILTER_METHOD: return "invalid filter method"; + case SPNG_EINTERLACE_METHOD: return "invalid interlace method"; + case SPNG_EIHDR_SIZE: return "invalid IHDR chunk size"; + case SPNG_ENOIHDR: return "missing IHDR chunk"; + case SPNG_ECHUNK_POS: return "invalid chunk position"; + case SPNG_ECHUNK_SIZE: return "invalid chunk length"; + case SPNG_ECHUNK_CRC: return "invalid chunk checksum"; + case SPNG_ECHUNK_TYPE: return "invalid chunk type"; + case SPNG_ECHUNK_UNKNOWN_CRITICAL: return "unknown critical chunk"; + case SPNG_EDUP_PLTE: return "duplicate PLTE chunk"; + case SPNG_EDUP_CHRM: return "duplicate cHRM chunk"; + case SPNG_EDUP_GAMA: return "duplicate gAMA chunk"; + case SPNG_EDUP_ICCP: return "duplicate iCCP chunk"; + case SPNG_EDUP_SBIT: return "duplicate sBIT chunk"; + case SPNG_EDUP_SRGB: return "duplicate sRGB chunk"; + case SPNG_EDUP_BKGD: return "duplicate bKGD chunk"; + case SPNG_EDUP_HIST: return "duplicate hIST chunk"; + case SPNG_EDUP_TRNS: return "duplicate tRNS chunk"; + case SPNG_EDUP_PHYS: return "duplicate pHYs chunk"; + case SPNG_EDUP_TIME: return "duplicate tIME chunk"; + case SPNG_EDUP_OFFS: return "duplicate oFFs chunk"; + case SPNG_EDUP_EXIF: return "duplicate eXIf chunk"; + case SPNG_ECHRM: return "invalid cHRM chunk"; + case SPNG_EPLTE_IDX: return "invalid palette (PLTE) index"; + case SPNG_ETRNS_COLOR_TYPE: return "tRNS chunk with incompatible color type"; + case SPNG_ETRNS_NO_PLTE: return "missing palette (PLTE) for tRNS chunk"; + case SPNG_EGAMA: return "invalid gAMA chunk"; + case SPNG_EICCP_NAME: return "invalid iCCP profile name"; + case SPNG_EICCP_COMPRESSION_METHOD: return "invalid iCCP compression method"; + case SPNG_ESBIT: return "invalid sBIT chunk"; + case SPNG_ESRGB: return "invalid sRGB chunk"; + case SPNG_ETEXT: return "invalid tEXt chunk"; + case SPNG_ETEXT_KEYWORD: return "invalid tEXt keyword"; + case SPNG_EZTXT: return "invalid zTXt chunk"; + case SPNG_EZTXT_COMPRESSION_METHOD: return "invalid zTXt compression method"; + case SPNG_EITXT: return "invalid iTXt chunk"; + case SPNG_EITXT_COMPRESSION_FLAG: return "invalid iTXt compression flag"; + case SPNG_EITXT_COMPRESSION_METHOD: return "invalid iTXt compression method"; + case SPNG_EITXT_LANG_TAG: return "invalid iTXt language tag"; + case SPNG_EITXT_TRANSLATED_KEY: return "invalid iTXt translated key"; + case SPNG_EBKGD_NO_PLTE: return "missing palette for bKGD chunk"; + case SPNG_EBKGD_PLTE_IDX: return "invalid palette index for bKGD chunk"; + case SPNG_EHIST_NO_PLTE: return "missing palette for hIST chunk"; + case SPNG_EPHYS: return "invalid pHYs chunk"; + case SPNG_ESPLT_NAME: return "invalid suggested palette name"; + case SPNG_ESPLT_DUP_NAME: return "duplicate suggested palette (sPLT) name"; + case SPNG_ESPLT_DEPTH: return "invalid suggested palette (sPLT) sample depth"; + case SPNG_ETIME: return "invalid tIME chunk"; + case SPNG_EOFFS: return "invalid oFFs chunk"; + case SPNG_EEXIF: return "invalid eXIf chunk"; + case SPNG_EIDAT_TOO_SHORT: return "IDAT stream too short"; + case SPNG_EIDAT_STREAM: return "IDAT stream error"; + case SPNG_EZLIB: return "zlib error"; + case SPNG_EFILTER: return "invalid scanline filter"; + case SPNG_EBUFSIZ: return "invalid buffer size"; + case SPNG_EIO: return "i/o error"; + case SPNG_EOF: return "end of file"; + case SPNG_EBUF_SET: return "buffer already set"; + case SPNG_EBADSTATE: return "non-recoverable state"; + case SPNG_EFMT: return "invalid format"; + case SPNG_EFLAGS: return "invalid flags"; + case SPNG_ECHUNKAVAIL: return "chunk not available"; + case SPNG_ENCODE_ONLY: return "encode only context"; + case SPNG_EOI: return "reached end-of-image state"; + case SPNG_ENOPLTE: return "missing PLTE for indexed image"; + case SPNG_ECHUNK_LIMITS: return "reached chunk/cache limits"; + case SPNG_EZLIB_INIT: return "zlib init error"; + case SPNG_ECHUNK_STDLEN: return "chunk exceeds maximum standard length"; + case SPNG_EINTERNAL: return "internal error"; + case SPNG_ECTXTYPE: return "invalid operation for context type"; + case SPNG_ENOSRC: return "source PNG not set"; + case SPNG_ENODST: return "PNG output not set"; + case SPNG_EOPSTATE: return "invalid operation for state"; + case SPNG_ENOTFINAL: return "PNG not finalized"; + default: return "unknown error"; + } +} + +const char *spng_version_string(void) +{ + return SPNG_VERSION_STRING; +} + +#if defined(_MSC_VER) + #pragma warning(pop) +#endif + +/* The following SIMD optimizations are derived from libpng source code. */ + +/* +* PNG Reference Library License version 2 +* +* Copyright (c) 1995-2019 The PNG Reference Library Authors. +* Copyright (c) 2018-2019 Cosmin Truta. +* Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson. +* Copyright (c) 1996-1997 Andreas Dilger. +* Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. +* +* The software is supplied "as is", without warranty of any kind, +* express or implied, including, without limitation, the warranties +* of merchantability, fitness for a particular purpose, title, and +* non-infringement. In no event shall the Copyright owners, or +* anyone distributing the software, be liable for any damages or +* other liability, whether in contract, tort or otherwise, arising +* from, out of, or in connection with the software, or the use or +* other dealings in the software, even if advised of the possibility +* of such damage. +* +* Permission is hereby granted to use, copy, modify, and distribute +* this software, or portions hereof, for any purpose, without fee, +* subject to the following restrictions: +* +* 1. The origin of this software must not be misrepresented; you +* must not claim that you wrote the original software. If you +* use this software in a product, an acknowledgment in the product +* documentation would be appreciated, but is not required. +* +* 2. Altered source versions must be plainly marked as such, and must +* not be misrepresented as being the original software. +* +* 3. This Copyright notice may not be removed or altered from any +* source or altered source distribution. +*/ + +#if defined(SPNG_X86) + +#ifndef SPNG_SSE + #define SPNG_SSE 1 +#endif + +#if defined(__GNUC__) && !defined(__clang__) + #if SPNG_SSE == 3 + #pragma GCC target("ssse3") + #elif SPNG_SSE == 4 + #pragma GCC target("sse4.1") + #else + #pragma GCC target("sse2") + #endif +#endif + +/* SSE2 optimised filter functions + * Derived from filter_neon_intrinsics.c + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2016-2017 Glenn Randers-Pehrson + * Written by Mike Klein and Matt Sarett + * Derived from arm/filter_neon_intrinsics.c + * + * This code is derived from libpng source code. + * For conditions of distribution and use, see the disclaimer + * and license above. + */ + +#include +#include +#include + +/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). + * They're positioned like this: + * prev: c b + * row: a d + * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be + * whichever of a, b, or c is closest to p=a+b-c. + */ + +static __m128i load4(const void* p) +{ + int tmp; + memcpy(&tmp, p, sizeof(tmp)); + return _mm_cvtsi32_si128(tmp); +} + +static void store4(void* p, __m128i v) +{ + int tmp = _mm_cvtsi128_si32(v); + memcpy(p, &tmp, sizeof(int)); +} + +static __m128i load3(const void* p) +{ + uint32_t tmp = 0; + memcpy(&tmp, p, 3); + return _mm_cvtsi32_si128(tmp); +} + +static void store3(void* p, __m128i v) +{ + int tmp = _mm_cvtsi128_si32(v); + memcpy(p, &tmp, 3); +} + +static void defilter_sub3(size_t rowbytes, unsigned char *row) +{ + /* The Sub filter predicts each pixel as the previous pixel, a. + * There is no pixel to the left of the first pixel. It's encoded directly. + * That works with our main loop if we just say that left pixel was zero. + */ + size_t rb = rowbytes; + + __m128i a, d = _mm_setzero_si128(); + + while(rb >= 4) + { + a = d; d = load4(row); + d = _mm_add_epi8(d, a); + store3(row, d); + + row += 3; + rb -= 3; + } + + if(rb > 0) + { + a = d; d = load3(row); + d = _mm_add_epi8(d, a); + store3(row, d); + } +} + +static void defilter_sub4(size_t rowbytes, unsigned char *row) +{ + /* The Sub filter predicts each pixel as the previous pixel, a. + * There is no pixel to the left of the first pixel. It's encoded directly. + * That works with our main loop if we just say that left pixel was zero. + */ + size_t rb = rowbytes+4; + + __m128i a, d = _mm_setzero_si128(); + + while(rb > 4) + { + a = d; d = load4(row); + d = _mm_add_epi8(d, a); + store4(row, d); + + row += 4; + rb -= 4; + } +} + +static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev) +{ + /* The Avg filter predicts each pixel as the (truncated) average of a and b. + * There's no pixel to the left of the first pixel. Luckily, it's + * predicted to be half of the pixel above it. So again, this works + * perfectly with our loop if we make sure a starts at zero. + */ + + size_t rb = rowbytes; + + const __m128i zero = _mm_setzero_si128(); + + __m128i b; + __m128i a, d = zero; + + while(rb >= 4) + { + __m128i avg; + b = load4(prev); + a = d; d = load4(row ); + + /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ + avg = _mm_avg_epu8(a,b); + /* ...but we can fix it up by subtracting off 1 if it rounded up. */ + avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b), + _mm_set1_epi8(1))); + d = _mm_add_epi8(d, avg); + store3(row, d); + + prev += 3; + row += 3; + rb -= 3; + } + + if(rb > 0) + { + __m128i avg; + b = load3(prev); + a = d; d = load3(row ); + + /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ + avg = _mm_avg_epu8(a, b); + /* ...but we can fix it up by subtracting off 1 if it rounded up. */ + avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b), + _mm_set1_epi8(1))); + + d = _mm_add_epi8(d, avg); + store3(row, d); + } +} + +static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev) +{ + /* The Avg filter predicts each pixel as the (truncated) average of a and b. + * There's no pixel to the left of the first pixel. Luckily, it's + * predicted to be half of the pixel above it. So again, this works + * perfectly with our loop if we make sure a starts at zero. + */ + size_t rb = rowbytes+4; + + const __m128i zero = _mm_setzero_si128(); + __m128i b; + __m128i a, d = zero; + + while(rb > 4) + { + __m128i avg; + b = load4(prev); + a = d; d = load4(row ); + + /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ + avg = _mm_avg_epu8(a,b); + /* ...but we can fix it up by subtracting off 1 if it rounded up. */ + avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b), + _mm_set1_epi8(1))); + + d = _mm_add_epi8(d, avg); + store4(row, d); + + prev += 4; + row += 4; + rb -= 4; + } +} + +/* Returns |x| for 16-bit lanes. */ +#if (SPNG_SSE >= 3) && !defined(_MSC_VER) +__attribute__((target("ssse3"))) +#endif +static __m128i abs_i16(__m128i x) +{ +#if SPNG_SSE >= 3 + return _mm_abs_epi16(x); +#else + /* Read this all as, return x<0 ? -x : x. + * To negate two's complement, you flip all the bits then add 1. + */ + __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128()); + + /* Flip negative lanes. */ + x = _mm_xor_si128(x, is_negative); + + /* +1 to negative lanes, else +0. */ + x = _mm_sub_epi16(x, is_negative); + return x; +#endif +} + +/* Bytewise c ? t : e. */ +static __m128i if_then_else(__m128i c, __m128i t, __m128i e) +{ +#if SPNG_SSE >= 4 + return _mm_blendv_epi8(e, t, c); +#else + return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); +#endif +} + +static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev) +{ + /* Paeth tries to predict pixel d using the pixel to the left of it, a, + * and two pixels from the previous row, b and c: + * prev: c b + * row: a d + * The Paeth function predicts d to be whichever of a, b, or c is nearest to + * p=a+b-c. + * + * The first pixel has no left context, and so uses an Up filter, p = b. + * This works naturally with our main loop's p = a+b-c if we force a and c + * to zero. + * Here we zero b and d, which become c and a respectively at the start of + * the loop. + */ + size_t rb = rowbytes; + const __m128i zero = _mm_setzero_si128(); + __m128i c, b = zero, + a, d = zero; + + while(rb >= 4) + { + /* It's easiest to do this math (particularly, deal with pc) with 16-bit + * intermediates. + */ + __m128i pa,pb,pc,smallest,nearest; + c = b; b = _mm_unpacklo_epi8(load4(prev), zero); + a = d; d = _mm_unpacklo_epi8(load4(row ), zero); + + /* (p-a) == (a+b-c - a) == (b-c) */ + + pa = _mm_sub_epi16(b, c); + + /* (p-b) == (a+b-c - b) == (a-c) */ + pb = _mm_sub_epi16(a, c); + + /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ + pc = _mm_add_epi16(pa, pb); + + pa = abs_i16(pa); /* |p-a| */ + pb = abs_i16(pb); /* |p-b| */ + pc = abs_i16(pc); /* |p-c| */ + + smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); + + /* Paeth breaks ties favoring a over b over c. */ + nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, + if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c)); + + /* Note `_epi8`: we need addition to wrap modulo 255. */ + d = _mm_add_epi8(d, nearest); + store3(row, _mm_packus_epi16(d, d)); + + prev += 3; + row += 3; + rb -= 3; + } + + if(rb > 0) + { + /* It's easiest to do this math (particularly, deal with pc) with 16-bit + * intermediates. + */ + __m128i pa, pb, pc, smallest, nearest; + c = b; b = _mm_unpacklo_epi8(load3(prev), zero); + a = d; d = _mm_unpacklo_epi8(load3(row ), zero); + + /* (p-a) == (a+b-c - a) == (b-c) */ + pa = _mm_sub_epi16(b, c); + + /* (p-b) == (a+b-c - b) == (a-c) */ + pb = _mm_sub_epi16(a, c); + + /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ + pc = _mm_add_epi16(pa, pb); + + pa = abs_i16(pa); /* |p-a| */ + pb = abs_i16(pb); /* |p-b| */ + pc = abs_i16(pc); /* |p-c| */ + + smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); + + /* Paeth breaks ties favoring a over b over c. */ + nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, + if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c)); + + /* Note `_epi8`: we need addition to wrap modulo 255. */ + d = _mm_add_epi8(d, nearest); + store3(row, _mm_packus_epi16(d, d)); + } +} + +static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev) +{ + /* Paeth tries to predict pixel d using the pixel to the left of it, a, + * and two pixels from the previous row, b and c: + * prev: c b + * row: a d + * The Paeth function predicts d to be whichever of a, b, or c is nearest to + * p=a+b-c. + * + * The first pixel has no left context, and so uses an Up filter, p = b. + * This works naturally with our main loop's p = a+b-c if we force a and c + * to zero. + * Here we zero b and d, which become c and a respectively at the start of + * the loop. + */ + size_t rb = rowbytes+4; + + const __m128i zero = _mm_setzero_si128(); + __m128i pa, pb, pc, smallest, nearest; + __m128i c, b = zero, + a, d = zero; + + while(rb > 4) + { + /* It's easiest to do this math (particularly, deal with pc) with 16-bit + * intermediates. + */ + c = b; b = _mm_unpacklo_epi8(load4(prev), zero); + a = d; d = _mm_unpacklo_epi8(load4(row ), zero); + + /* (p-a) == (a+b-c - a) == (b-c) */ + pa = _mm_sub_epi16(b, c); + + /* (p-b) == (a+b-c - b) == (a-c) */ + pb = _mm_sub_epi16(a, c); + + /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ + pc = _mm_add_epi16(pa, pb); + + pa = abs_i16(pa); /* |p-a| */ + pb = abs_i16(pb); /* |p-b| */ + pc = abs_i16(pc); /* |p-c| */ + + smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); + + /* Paeth breaks ties favoring a over b over c. */ + nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, + if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c)); + + /* Note `_epi8`: we need addition to wrap modulo 255. */ + d = _mm_add_epi8(d, nearest); + store4(row, _mm_packus_epi16(d, d)); + + prev += 4; + row += 4; + rb -= 4; + } +} + +#endif /* SPNG_X86 */ + + +#if defined(SPNG_ARM) + +/* NEON optimised filter functions + * Derived from filter_neon_intrinsics.c + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2014,2016 Glenn Randers-Pehrson + * Written by James Yu , October 2013. + * Based on filter_neon.S, written by Mans Rullgard, 2011. + * + * This code is derived from libpng source code. + * For conditions of distribution and use, see the disclaimer + * and license in this file. + */ + +#define png_aligncast(type, value) ((void*)(value)) +#define png_aligncastconst(type, value) ((const void*)(value)) + +/* libpng row pointers are not necessarily aligned to any particular boundary, + * however this code will only work with appropriate alignment. mips/mips_init.c + * checks for this (and will not compile unless it is done). This code uses + * variants of png_aligncast to avoid compiler warnings. + */ +#define png_ptr(type,pointer) png_aligncast(type *,pointer) +#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer) + +/* The following relies on a variable 'temp_pointer' being declared with type + * 'type'. This is written this way just to hide the GCC strict aliasing + * warning; note that the code is safe because there never is an alias between + * the input and output pointers. + */ +#define png_ldr(type,pointer)\ + (temp_pointer = png_ptr(type,pointer), *temp_pointer) + + +#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) + #include +#else + #include +#endif + +static void defilter_sub3(size_t rowbytes, unsigned char *row) +{ + unsigned char *rp = row; + unsigned char *rp_stop = row + rowbytes; + + uint8x16_t vtmp = vld1q_u8(rp); + uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp); + uint8x8x2_t vrp = *vrpt; + + uint8x8x4_t vdest; + vdest.val[3] = vdup_n_u8(0); + + for (; rp < rp_stop;) + { + uint8x8_t vtmp1, vtmp2; + uint32x2_t *temp_pointer; + + vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3); + vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]); + vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6); + vdest.val[1] = vadd_u8(vdest.val[0], vtmp1); + + vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); + vdest.val[2] = vadd_u8(vdest.val[1], vtmp2); + vdest.val[3] = vadd_u8(vdest.val[2], vtmp1); + + vtmp = vld1q_u8(rp + 12); + vrpt = png_ptr(uint8x8x2_t, &vtmp); + vrp = *vrpt; + + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0); + rp += 3; + } +} + +static void defilter_sub4(size_t rowbytes, unsigned char *row) +{ + unsigned char *rp = row; + unsigned char *rp_stop = row + rowbytes; + + uint8x8x4_t vdest; + vdest.val[3] = vdup_n_u8(0); + + for (; rp < rp_stop; rp += 16) + { + uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp)); + uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp); + uint8x8x4_t vrp = *vrpt; + uint32x2x4_t *temp_pointer; + uint32x2x4_t vdest_val; + + vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]); + vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]); + vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]); + vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]); + + vdest_val = png_ldr(uint32x2x4_t, &vdest); + vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); + } +} + +static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev_row) +{ + unsigned char *rp = row; + const unsigned char *pp = prev_row; + unsigned char *rp_stop = row + rowbytes; + + uint8x16_t vtmp; + uint8x8x2_t *vrpt; + uint8x8x2_t vrp; + uint8x8x4_t vdest; + vdest.val[3] = vdup_n_u8(0); + + vtmp = vld1q_u8(rp); + vrpt = png_ptr(uint8x8x2_t,&vtmp); + vrp = *vrpt; + + for (; rp < rp_stop; pp += 12) + { + uint8x8_t vtmp1, vtmp2, vtmp3; + + uint8x8x2_t *vppt; + uint8x8x2_t vpp; + + uint32x2_t *temp_pointer; + + vtmp = vld1q_u8(pp); + vppt = png_ptr(uint8x8x2_t,&vtmp); + vpp = *vppt; + + vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3); + vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]); + vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); + + vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3); + vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6); + vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); + vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); + + vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6); + vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); + + vtmp = vld1q_u8(rp + 12); + vrpt = png_ptr(uint8x8x2_t,&vtmp); + vrp = *vrpt; + + vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2); + vdest.val[2] = vadd_u8(vdest.val[2], vtmp3); + + vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); + + vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2); + vdest.val[3] = vadd_u8(vdest.val[3], vtmp1); + + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0); + rp += 3; + } +} + +static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev_row) +{ + unsigned char *rp = row; + unsigned char *rp_stop = row + rowbytes; + const unsigned char *pp = prev_row; + + uint8x8x4_t vdest; + vdest.val[3] = vdup_n_u8(0); + + for (; rp < rp_stop; rp += 16, pp += 16) + { + uint32x2x4_t vtmp; + uint8x8x4_t *vrpt, *vppt; + uint8x8x4_t vrp, vpp; + uint32x2x4_t *temp_pointer; + uint32x2x4_t vdest_val; + + vtmp = vld4_u32(png_ptr(uint32_t,rp)); + vrpt = png_ptr(uint8x8x4_t,&vtmp); + vrp = *vrpt; + vtmp = vld4_u32(png_ptrc(uint32_t,pp)); + vppt = png_ptr(uint8x8x4_t,&vtmp); + vpp = *vppt; + + vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]); + vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); + vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]); + vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); + vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]); + vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]); + vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]); + vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]); + + vdest_val = png_ldr(uint32x2x4_t, &vdest); + vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); + } +} + +static uint8x8_t paeth_arm(uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint8x8_t d, e; + uint16x8_t p1, pa, pb, pc; + + p1 = vaddl_u8(a, b); /* a + b */ + pc = vaddl_u8(c, c); /* c * 2 */ + pa = vabdl_u8(b, c); /* pa */ + pb = vabdl_u8(a, c); /* pb */ + pc = vabdq_u16(p1, pc); /* pc */ + + p1 = vcleq_u16(pa, pb); /* pa <= pb */ + pa = vcleq_u16(pa, pc); /* pa <= pc */ + pb = vcleq_u16(pb, pc); /* pb <= pc */ + + p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */ + + d = vmovn_u16(pb); + e = vmovn_u16(p1); + + d = vbsl_u8(d, b, c); + e = vbsl_u8(e, a, d); + + return e; +} + +static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev_row) +{ + unsigned char *rp = row; + const unsigned char *pp = prev_row; + unsigned char *rp_stop = row + rowbytes; + + uint8x16_t vtmp; + uint8x8x2_t *vrpt; + uint8x8x2_t vrp; + uint8x8_t vlast = vdup_n_u8(0); + uint8x8x4_t vdest; + vdest.val[3] = vdup_n_u8(0); + + vtmp = vld1q_u8(rp); + vrpt = png_ptr(uint8x8x2_t,&vtmp); + vrp = *vrpt; + + for (; rp < rp_stop; pp += 12) + { + uint8x8x2_t *vppt; + uint8x8x2_t vpp; + uint8x8_t vtmp1, vtmp2, vtmp3; + uint32x2_t *temp_pointer; + + vtmp = vld1q_u8(pp); + vppt = png_ptr(uint8x8x2_t,&vtmp); + vpp = *vppt; + + vdest.val[0] = paeth_arm(vdest.val[3], vpp.val[0], vlast); + vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); + + vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3); + vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3); + vdest.val[1] = paeth_arm(vdest.val[0], vtmp2, vpp.val[0]); + vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); + + vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6); + vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6); + vdest.val[2] = paeth_arm(vdest.val[1], vtmp3, vtmp2); + vdest.val[2] = vadd_u8(vdest.val[2], vtmp1); + + vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); + vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); + + vtmp = vld1q_u8(rp + 12); + vrpt = png_ptr(uint8x8x2_t,&vtmp); + vrp = *vrpt; + + vdest.val[3] = paeth_arm(vdest.val[2], vtmp2, vtmp3); + vdest.val[3] = vadd_u8(vdest.val[3], vtmp1); + + vlast = vtmp2; + + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0); + rp += 3; + vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0); + rp += 3; + } +} + +static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev_row) +{ + unsigned char *rp = row; + unsigned char *rp_stop = row + rowbytes; + const unsigned char *pp = prev_row; + + uint8x8_t vlast = vdup_n_u8(0); + uint8x8x4_t vdest; + vdest.val[3] = vdup_n_u8(0); + + for (; rp < rp_stop; rp += 16, pp += 16) + { + uint32x2x4_t vtmp; + uint8x8x4_t *vrpt, *vppt; + uint8x8x4_t vrp, vpp; + uint32x2x4_t *temp_pointer; + uint32x2x4_t vdest_val; + + vtmp = vld4_u32(png_ptr(uint32_t,rp)); + vrpt = png_ptr(uint8x8x4_t,&vtmp); + vrp = *vrpt; + vtmp = vld4_u32(png_ptrc(uint32_t,pp)); + vppt = png_ptr(uint8x8x4_t,&vtmp); + vpp = *vppt; + + vdest.val[0] = paeth_arm(vdest.val[3], vpp.val[0], vlast); + vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); + vdest.val[1] = paeth_arm(vdest.val[0], vpp.val[1], vpp.val[0]); + vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); + vdest.val[2] = paeth_arm(vdest.val[1], vpp.val[2], vpp.val[1]); + vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]); + vdest.val[3] = paeth_arm(vdest.val[2], vpp.val[3], vpp.val[2]); + vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]); + + vlast = vpp.val[3]; + + vdest_val = png_ldr(uint32x2x4_t, &vdest); + vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); + } +} + +/* NEON optimised palette expansion functions + * Derived from palette_neon_intrinsics.c + * + * Copyright (c) 2018-2019 Cosmin Truta + * Copyright (c) 2017-2018 Arm Holdings. All rights reserved. + * Written by Richard Townsend , February 2017. + * + * This code is derived from libpng source code. + * For conditions of distribution and use, see the disclaimer + * and license in this file. + * + * Related: https://developer.arm.com/documentation/101964/latest/Color-palette-expansion + * + * The functions were refactored to iterate forward. + * + */ + +/* Expands a palettized row into RGBA8. */ +static uint32_t expand_palette_rgba8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width) +{ + const uint32_t scanline_stride = 4; + const uint32_t row_stride = scanline_stride * 4; + const uint32_t count = width / scanline_stride; + const uint32_t *palette = (const uint32_t*)plte; + + if(!count) return 0; + + uint32_t i; + uint32x4_t cur; + for(i=0; i < count; i++, scanline += scanline_stride) + { + cur = vld1q_dup_u32 (palette + scanline[0]); + cur = vld1q_lane_u32(palette + scanline[1], cur, 1); + cur = vld1q_lane_u32(palette + scanline[2], cur, 2); + cur = vld1q_lane_u32(palette + scanline[3], cur, 3); + vst1q_u32((uint32_t*)(row + i * row_stride), cur); + } + + return count * scanline_stride; +} + +/* Expands a palettized row into RGB8. */ +static uint32_t expand_palette_rgb8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width) +{ + const uint32_t scanline_stride = 8; + const uint32_t row_stride = scanline_stride * 3; + const uint32_t count = width / scanline_stride; + + if(!count) return 0; + + uint32_t i; + uint8x8x3_t cur; + for(i=0; i < count; i++, scanline += scanline_stride) + { + cur = vld3_dup_u8 (plte + 3 * scanline[0]); + cur = vld3_lane_u8(plte + 3 * scanline[1], cur, 1); + cur = vld3_lane_u8(plte + 3 * scanline[2], cur, 2); + cur = vld3_lane_u8(plte + 3 * scanline[3], cur, 3); + cur = vld3_lane_u8(plte + 3 * scanline[4], cur, 4); + cur = vld3_lane_u8(plte + 3 * scanline[5], cur, 5); + cur = vld3_lane_u8(plte + 3 * scanline[6], cur, 6); + cur = vld3_lane_u8(plte + 3 * scanline[7], cur, 7); + vst3_u8(row + i * row_stride, cur); + } + + return count * scanline_stride; +} + +#endif /* SPNG_ARM */ diff --git a/externals/libspng/spng.h b/externals/libspng/spng.h new file mode 100644 index 000000000..908ffc99e --- /dev/null +++ b/externals/libspng/spng.h @@ -0,0 +1,537 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +#ifndef SPNG_H +#define SPNG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(SPNG_STATIC) + #if defined(SPNG__BUILD) + #define SPNG_API __declspec(dllexport) + #else + #define SPNG_API __declspec(dllimport) + #endif +#else + #define SPNG_API +#endif + +#if defined(_MSC_VER) + #define SPNG_CDECL __cdecl +#else + #define SPNG_CDECL +#endif + +#include +#include +#include + +#define SPNG_VERSION_MAJOR 0 +#define SPNG_VERSION_MINOR 7 +#define SPNG_VERSION_PATCH 3 + +enum spng_errno +{ + SPNG_IO_ERROR = -2, + SPNG_IO_EOF = -1, + SPNG_OK = 0, + SPNG_EINVAL, + SPNG_EMEM, + SPNG_EOVERFLOW, + SPNG_ESIGNATURE, + SPNG_EWIDTH, + SPNG_EHEIGHT, + SPNG_EUSER_WIDTH, + SPNG_EUSER_HEIGHT, + SPNG_EBIT_DEPTH, + SPNG_ECOLOR_TYPE, + SPNG_ECOMPRESSION_METHOD, + SPNG_EFILTER_METHOD, + SPNG_EINTERLACE_METHOD, + SPNG_EIHDR_SIZE, + SPNG_ENOIHDR, + SPNG_ECHUNK_POS, + SPNG_ECHUNK_SIZE, + SPNG_ECHUNK_CRC, + SPNG_ECHUNK_TYPE, + SPNG_ECHUNK_UNKNOWN_CRITICAL, + SPNG_EDUP_PLTE, + SPNG_EDUP_CHRM, + SPNG_EDUP_GAMA, + SPNG_EDUP_ICCP, + SPNG_EDUP_SBIT, + SPNG_EDUP_SRGB, + SPNG_EDUP_BKGD, + SPNG_EDUP_HIST, + SPNG_EDUP_TRNS, + SPNG_EDUP_PHYS, + SPNG_EDUP_TIME, + SPNG_EDUP_OFFS, + SPNG_EDUP_EXIF, + SPNG_ECHRM, + SPNG_EPLTE_IDX, + SPNG_ETRNS_COLOR_TYPE, + SPNG_ETRNS_NO_PLTE, + SPNG_EGAMA, + SPNG_EICCP_NAME, + SPNG_EICCP_COMPRESSION_METHOD, + SPNG_ESBIT, + SPNG_ESRGB, + SPNG_ETEXT, + SPNG_ETEXT_KEYWORD, + SPNG_EZTXT, + SPNG_EZTXT_COMPRESSION_METHOD, + SPNG_EITXT, + SPNG_EITXT_COMPRESSION_FLAG, + SPNG_EITXT_COMPRESSION_METHOD, + SPNG_EITXT_LANG_TAG, + SPNG_EITXT_TRANSLATED_KEY, + SPNG_EBKGD_NO_PLTE, + SPNG_EBKGD_PLTE_IDX, + SPNG_EHIST_NO_PLTE, + SPNG_EPHYS, + SPNG_ESPLT_NAME, + SPNG_ESPLT_DUP_NAME, + SPNG_ESPLT_DEPTH, + SPNG_ETIME, + SPNG_EOFFS, + SPNG_EEXIF, + SPNG_EIDAT_TOO_SHORT, + SPNG_EIDAT_STREAM, + SPNG_EZLIB, + SPNG_EFILTER, + SPNG_EBUFSIZ, + SPNG_EIO, + SPNG_EOF, + SPNG_EBUF_SET, + SPNG_EBADSTATE, + SPNG_EFMT, + SPNG_EFLAGS, + SPNG_ECHUNKAVAIL, + SPNG_ENCODE_ONLY, + SPNG_EOI, + SPNG_ENOPLTE, + SPNG_ECHUNK_LIMITS, + SPNG_EZLIB_INIT, + SPNG_ECHUNK_STDLEN, + SPNG_EINTERNAL, + SPNG_ECTXTYPE, + SPNG_ENOSRC, + SPNG_ENODST, + SPNG_EOPSTATE, + SPNG_ENOTFINAL, +}; + +enum spng_text_type +{ + SPNG_TEXT = 1, + SPNG_ZTXT = 2, + SPNG_ITXT = 3 +}; + +enum spng_color_type +{ + SPNG_COLOR_TYPE_GRAYSCALE = 0, + SPNG_COLOR_TYPE_TRUECOLOR = 2, + SPNG_COLOR_TYPE_INDEXED = 3, + SPNG_COLOR_TYPE_GRAYSCALE_ALPHA = 4, + SPNG_COLOR_TYPE_TRUECOLOR_ALPHA = 6 +}; + +enum spng_filter +{ + SPNG_FILTER_NONE = 0, + SPNG_FILTER_SUB = 1, + SPNG_FILTER_UP = 2, + SPNG_FILTER_AVERAGE = 3, + SPNG_FILTER_PAETH = 4 +}; + +enum spng_filter_choice +{ + SPNG_DISABLE_FILTERING = 0, + SPNG_FILTER_CHOICE_NONE = 8, + SPNG_FILTER_CHOICE_SUB = 16, + SPNG_FILTER_CHOICE_UP = 32, + SPNG_FILTER_CHOICE_AVG = 64, + SPNG_FILTER_CHOICE_PAETH = 128, + SPNG_FILTER_CHOICE_ALL = (8|16|32|64|128) +}; + +enum spng_interlace_method +{ + SPNG_INTERLACE_NONE = 0, + SPNG_INTERLACE_ADAM7 = 1 +}; + +/* Channels are always in byte-order */ +enum spng_format +{ + SPNG_FMT_RGBA8 = 1, + SPNG_FMT_RGBA16 = 2, + SPNG_FMT_RGB8 = 4, + + /* Partially implemented, see documentation */ + SPNG_FMT_GA8 = 16, + SPNG_FMT_GA16 = 32, + SPNG_FMT_G8 = 64, + + /* No conversion or scaling */ + SPNG_FMT_PNG = 256, + SPNG_FMT_RAW = 512 /* big-endian (everything else is host-endian) */ +}; + +enum spng_ctx_flags +{ + SPNG_CTX_IGNORE_ADLER32 = 1, /* Ignore checksum in DEFLATE streams */ + SPNG_CTX_ENCODER = 2 /* Create an encoder context */ +}; + +enum spng_decode_flags +{ + SPNG_DECODE_USE_TRNS = 1, /* Deprecated */ + SPNG_DECODE_USE_GAMA = 2, /* Deprecated */ + SPNG_DECODE_USE_SBIT = 8, /* Undocumented */ + + SPNG_DECODE_TRNS = 1, /* Apply transparency */ + SPNG_DECODE_GAMMA = 2, /* Apply gamma correction */ + SPNG_DECODE_PROGRESSIVE = 256 /* Initialize for progressive reads */ +}; + +enum spng_crc_action +{ + /* Default for critical chunks */ + SPNG_CRC_ERROR = 0, + + /* Discard chunk, invalid for critical chunks. + Since v0.6.2: default for ancillary chunks */ + SPNG_CRC_DISCARD = 1, + + /* Ignore and don't calculate checksum. + Since v0.6.2: also ignores checksums in DEFLATE streams */ + SPNG_CRC_USE = 2 +}; + +enum spng_encode_flags +{ + SPNG_ENCODE_PROGRESSIVE = 1, /* Initialize for progressive writes */ + SPNG_ENCODE_FINALIZE = 2, /* Finalize PNG after encoding image */ +}; + +struct spng_ihdr +{ + uint32_t width; + uint32_t height; + uint8_t bit_depth; + uint8_t color_type; + uint8_t compression_method; + uint8_t filter_method; + uint8_t interlace_method; +}; + +struct spng_plte_entry +{ + uint8_t red; + uint8_t green; + uint8_t blue; + + uint8_t alpha; /* Reserved for internal use */ +}; + +struct spng_plte +{ + uint32_t n_entries; + struct spng_plte_entry entries[256]; +}; + +struct spng_trns +{ + uint16_t gray; + + uint16_t red; + uint16_t green; + uint16_t blue; + + uint32_t n_type3_entries; + uint8_t type3_alpha[256]; +}; + +struct spng_chrm_int +{ + uint32_t white_point_x; + uint32_t white_point_y; + uint32_t red_x; + uint32_t red_y; + uint32_t green_x; + uint32_t green_y; + uint32_t blue_x; + uint32_t blue_y; +}; + +struct spng_chrm +{ + double white_point_x; + double white_point_y; + double red_x; + double red_y; + double green_x; + double green_y; + double blue_x; + double blue_y; +}; + +struct spng_iccp +{ + char profile_name[80]; + size_t profile_len; + char *profile; +}; + +struct spng_sbit +{ + uint8_t grayscale_bits; + uint8_t red_bits; + uint8_t green_bits; + uint8_t blue_bits; + uint8_t alpha_bits; +}; + +struct spng_text +{ + char keyword[80]; + int type; + + size_t length; + char *text; + + uint8_t compression_flag; /* iTXt only */ + uint8_t compression_method; /* iTXt, ztXt only */ + char *language_tag; /* iTXt only */ + char *translated_keyword; /* iTXt only */ +}; + +struct spng_bkgd +{ + uint16_t gray; /* Only for gray/gray alpha */ + uint16_t red; + uint16_t green; + uint16_t blue; + uint16_t plte_index; /* Only for indexed color */ +}; + +struct spng_hist +{ + uint16_t frequency[256]; +}; + +struct spng_phys +{ + uint32_t ppu_x, ppu_y; + uint8_t unit_specifier; +}; + +struct spng_splt_entry +{ + uint16_t red; + uint16_t green; + uint16_t blue; + uint16_t alpha; + uint16_t frequency; +}; + +struct spng_splt +{ + char name[80]; + uint8_t sample_depth; + uint32_t n_entries; + struct spng_splt_entry *entries; +}; + +struct spng_time +{ + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t minute; + uint8_t second; +}; + +struct spng_offs +{ + int32_t x, y; + uint8_t unit_specifier; +}; + +struct spng_exif +{ + size_t length; + char *data; +}; + +struct spng_chunk +{ + size_t offset; + uint32_t length; + uint8_t type[4]; + uint32_t crc; +}; + +enum spng_location +{ + SPNG_AFTER_IHDR = 1, + SPNG_AFTER_PLTE = 2, + SPNG_AFTER_IDAT = 8, +}; + +struct spng_unknown_chunk +{ + uint8_t type[4]; + size_t length; + void *data; + enum spng_location location; +}; + +enum spng_option +{ + SPNG_KEEP_UNKNOWN_CHUNKS = 1, + + SPNG_IMG_COMPRESSION_LEVEL, + SPNG_IMG_WINDOW_BITS, + SPNG_IMG_MEM_LEVEL, + SPNG_IMG_COMPRESSION_STRATEGY, + + SPNG_TEXT_COMPRESSION_LEVEL, + SPNG_TEXT_WINDOW_BITS, + SPNG_TEXT_MEM_LEVEL, + SPNG_TEXT_COMPRESSION_STRATEGY, + + SPNG_FILTER_CHOICE, + SPNG_CHUNK_COUNT_LIMIT, + SPNG_ENCODE_TO_BUFFER, +}; + +typedef void* SPNG_CDECL spng_malloc_fn(size_t size); +typedef void* SPNG_CDECL spng_realloc_fn(void* ptr, size_t size); +typedef void* SPNG_CDECL spng_calloc_fn(size_t count, size_t size); +typedef void SPNG_CDECL spng_free_fn(void* ptr); + +struct spng_alloc +{ + spng_malloc_fn *malloc_fn; + spng_realloc_fn *realloc_fn; + spng_calloc_fn *calloc_fn; + spng_free_fn *free_fn; +}; + +struct spng_row_info +{ + uint32_t scanline_idx; + uint32_t row_num; /* deinterlaced row index */ + int pass; + uint8_t filter; +}; + +typedef struct spng_ctx spng_ctx; + +typedef int spng_read_fn(spng_ctx *ctx, void *user, void *dest, size_t length); +typedef int spng_write_fn(spng_ctx *ctx, void *user, void *src, size_t length); + +typedef int spng_rw_fn(spng_ctx *ctx, void *user, void *dst_src, size_t length); + +SPNG_API spng_ctx *spng_ctx_new(int flags); +SPNG_API spng_ctx *spng_ctx_new2(struct spng_alloc *alloc, int flags); +SPNG_API void spng_ctx_free(spng_ctx *ctx); + +SPNG_API int spng_set_png_buffer(spng_ctx *ctx, const void *buf, size_t size); +SPNG_API int spng_set_png_stream(spng_ctx *ctx, spng_rw_fn *rw_func, void *user); +SPNG_API int spng_set_png_file(spng_ctx *ctx, FILE *file); + +SPNG_API void *spng_get_png_buffer(spng_ctx *ctx, size_t *len, int *error); + +SPNG_API int spng_set_image_limits(spng_ctx *ctx, uint32_t width, uint32_t height); +SPNG_API int spng_get_image_limits(spng_ctx *ctx, uint32_t *width, uint32_t *height); + +SPNG_API int spng_set_chunk_limits(spng_ctx *ctx, size_t chunk_size, size_t cache_size); +SPNG_API int spng_get_chunk_limits(spng_ctx *ctx, size_t *chunk_size, size_t *cache_size); + +SPNG_API int spng_set_crc_action(spng_ctx *ctx, int critical, int ancillary); + +SPNG_API int spng_set_option(spng_ctx *ctx, enum spng_option option, int value); +SPNG_API int spng_get_option(spng_ctx *ctx, enum spng_option option, int *value); + +SPNG_API int spng_decoded_image_size(spng_ctx *ctx, int fmt, size_t *len); + +/* Decode */ +SPNG_API int spng_decode_image(spng_ctx *ctx, void *out, size_t len, int fmt, int flags); + +/* Progressive decode */ +SPNG_API int spng_decode_scanline(spng_ctx *ctx, void *out, size_t len); +SPNG_API int spng_decode_row(spng_ctx *ctx, void *out, size_t len); +SPNG_API int spng_decode_chunks(spng_ctx *ctx); + +/* Encode/decode */ +SPNG_API int spng_get_row_info(spng_ctx *ctx, struct spng_row_info *row_info); + +/* Encode */ +SPNG_API int spng_encode_image(spng_ctx *ctx, const void *img, size_t len, int fmt, int flags); + +/* Progressive encode */ +SPNG_API int spng_encode_scanline(spng_ctx *ctx, const void *scanline, size_t len); +SPNG_API int spng_encode_row(spng_ctx *ctx, const void *row, size_t len); +SPNG_API int spng_encode_chunks(spng_ctx *ctx); + +SPNG_API int spng_get_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr); +SPNG_API int spng_get_plte(spng_ctx *ctx, struct spng_plte *plte); +SPNG_API int spng_get_trns(spng_ctx *ctx, struct spng_trns *trns); +SPNG_API int spng_get_chrm(spng_ctx *ctx, struct spng_chrm *chrm); +SPNG_API int spng_get_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int); +SPNG_API int spng_get_gama(spng_ctx *ctx, double *gamma); +SPNG_API int spng_get_gama_int(spng_ctx *ctx, uint32_t *gama_int); +SPNG_API int spng_get_iccp(spng_ctx *ctx, struct spng_iccp *iccp); +SPNG_API int spng_get_sbit(spng_ctx *ctx, struct spng_sbit *sbit); +SPNG_API int spng_get_srgb(spng_ctx *ctx, uint8_t *rendering_intent); +SPNG_API int spng_get_text(spng_ctx *ctx, struct spng_text *text, uint32_t *n_text); +SPNG_API int spng_get_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd); +SPNG_API int spng_get_hist(spng_ctx *ctx, struct spng_hist *hist); +SPNG_API int spng_get_phys(spng_ctx *ctx, struct spng_phys *phys); +SPNG_API int spng_get_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t *n_splt); +SPNG_API int spng_get_time(spng_ctx *ctx, struct spng_time *time); +SPNG_API int spng_get_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t *n_chunks); + +/* Official extensions */ +SPNG_API int spng_get_offs(spng_ctx *ctx, struct spng_offs *offs); +SPNG_API int spng_get_exif(spng_ctx *ctx, struct spng_exif *exif); + + +SPNG_API int spng_set_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr); +SPNG_API int spng_set_plte(spng_ctx *ctx, struct spng_plte *plte); +SPNG_API int spng_set_trns(spng_ctx *ctx, struct spng_trns *trns); +SPNG_API int spng_set_chrm(spng_ctx *ctx, struct spng_chrm *chrm); +SPNG_API int spng_set_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int); +SPNG_API int spng_set_gama(spng_ctx *ctx, double gamma); +SPNG_API int spng_set_gama_int(spng_ctx *ctx, uint32_t gamma); +SPNG_API int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp); +SPNG_API int spng_set_sbit(spng_ctx *ctx, struct spng_sbit *sbit); +SPNG_API int spng_set_srgb(spng_ctx *ctx, uint8_t rendering_intent); +SPNG_API int spng_set_text(spng_ctx *ctx, struct spng_text *text, uint32_t n_text); +SPNG_API int spng_set_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd); +SPNG_API int spng_set_hist(spng_ctx *ctx, struct spng_hist *hist); +SPNG_API int spng_set_phys(spng_ctx *ctx, struct spng_phys *phys); +SPNG_API int spng_set_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t n_splt); +SPNG_API int spng_set_time(spng_ctx *ctx, struct spng_time *time); +SPNG_API int spng_set_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t n_chunks); + +/* Official extensions */ +SPNG_API int spng_set_offs(spng_ctx *ctx, struct spng_offs *offs); +SPNG_API int spng_set_exif(spng_ctx *ctx, struct spng_exif *exif); + + +SPNG_API const char *spng_strerror(int err); +SPNG_API const char *spng_version_string(void); + +#ifdef __cplusplus +} +#endif + +#endif /* SPNG_H */ diff --git a/externals/zlib-ng/CMakeLists.txt b/externals/zlib-ng/CMakeLists.txt new file mode 100644 index 000000000..4d3fac33b --- /dev/null +++ b/externals/zlib-ng/CMakeLists.txt @@ -0,0 +1,14 @@ +set(ZLIB_ENABLE_TESTS OFF) +set(ZLIB_COMPAT ON) +set(SKIP_INSTALL_ALL ON) + +option(BUILD_SHARED_LIBS "Build shared library" OFF) + +add_subdirectory(zlib-ng) + +# Set ZLIB variables for find_package used by other projects +set(ZLIB_INCLUDE_DIR ${CMAKE_BINARY_DIR}/zlib-ng CACHE STRING "Path to zlib include directory") +set(ZLIB_LIBRARY ZLIB::ZLIB CACHE STRING "Path to zlib library") + +# Setup zlib alias project so FindZLIB doesn't recreate it +add_library(ZLIB::ZLIB ALIAS zlib) diff --git a/externals/zlib-ng/zlib-ng b/externals/zlib-ng/zlib-ng new file mode 160000 index 000000000..c970422ca --- /dev/null +++ b/externals/zlib-ng/zlib-ng @@ -0,0 +1 @@ +Subproject commit c970422caa6b32d0488e5dede95719eb4a9a5bd8 diff --git a/src/android/app/src/main/jni/CMakeLists.txt b/src/android/app/src/main/jni/CMakeLists.txt index ed64db358..152cdaae0 100644 --- a/src/android/app/src/main/jni/CMakeLists.txt +++ b/src/android/app/src/main/jni/CMakeLists.txt @@ -29,8 +29,6 @@ add_library(citra-android SHARED game_settings.h id_cache.cpp id_cache.h - lodepng_image_interface.cpp - lodepng_image_interface.h mic.cpp mic.h native.cpp @@ -40,6 +38,6 @@ add_library(citra-android SHARED ) target_link_libraries(citra-android PRIVATE audio_core common core input_common network) -target_link_libraries(citra-android PRIVATE android camera2ndk EGL glad inih jnigraphics lodepng log mediandk yuv) +target_link_libraries(citra-android PRIVATE android camera2ndk EGL glad inih jnigraphics log mediandk yuv) set(CPACK_PACKAGE_EXECUTABLES ${CPACK_PACKAGE_EXECUTABLES} citra-android) diff --git a/src/android/app/src/main/jni/lodepng_image_interface.cpp b/src/android/app/src/main/jni/lodepng_image_interface.cpp deleted file mode 100644 index e42c3a82c..000000000 --- a/src/android/app/src/main/jni/lodepng_image_interface.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include "common/logging/log.h" -#include "jni/lodepng_image_interface.h" - -bool LodePNGImageInterface::DecodePNG(std::vector& dst, u32& width, u32& height, - const std::string& path) { - u32 lodepng_ret = lodepng::decode(dst, width, height, path); - if (lodepng_ret) { - LOG_CRITICAL(Frontend, "Failed to decode {} because {}", path, - lodepng_error_text(lodepng_ret)); - return false; - } - return true; -} - -bool LodePNGImageInterface::EncodePNG(const std::string& path, const std::vector& src, - u32 width, u32 height) { - u32 lodepng_ret = lodepng::encode(path, src, width, height); - if (lodepng_ret) { - LOG_CRITICAL(Frontend, "Failed to encode {} because {}", path, - lodepng_error_text(lodepng_ret)); - return false; - } - return true; -} diff --git a/src/android/app/src/main/jni/lodepng_image_interface.h b/src/android/app/src/main/jni/lodepng_image_interface.h deleted file mode 100644 index 6880b10a0..000000000 --- a/src/android/app/src/main/jni/lodepng_image_interface.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2019 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "core/frontend/image_interface.h" - -class LodePNGImageInterface final : public Frontend::ImageInterface { -public: - bool DecodePNG(std::vector& dst, u32& width, u32& height, const std::string& path) override; - bool EncodePNG(const std::string& path, const std::vector& src, u32 width, - u32 height) override; -}; diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 7152700bb..a5a9bd88f 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -38,7 +38,6 @@ #include "jni/game_settings.h" #include "jni/id_cache.h" #include "jni/input_manager.h" -#include "jni/lodepng_image_interface.h" #include "jni/mic.h" #include "jni/native.h" #include "jni/ndk_motion.h" @@ -186,9 +185,6 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { system.RegisterMiiSelector(std::make_shared()); system.RegisterSoftwareKeyboard(std::make_shared()); - // Register generic image interface - Core::System::GetInstance().RegisterImageInterface(std::make_shared()); - // Register real Mic factory Frontend::Mic::RegisterRealMicFactory(std::make_unique()); diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index be5163d50..a4085c83c 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -582,6 +582,8 @@ void GMainWindow::InitializeHotkeys() { }); connect_shortcut(QStringLiteral("Toggle Texture Dumping"), [&] { Settings::values.dump_textures = !Settings::values.dump_textures; }); + connect_shortcut(QStringLiteral("Toggle Custom Textures"), + [&] { Settings::values.custom_textures = !Settings::values.custom_textures; }); // We use "static" here in order to avoid capturing by lambda due to a MSVC bug, which makes // the variable hold a garbage value after this function exits static constexpr u16 SPEED_LIMIT_STEP = 5; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 87a4ae600..a8695cc3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -75,7 +75,7 @@ add_library(common STATIC common_precompiled_headers.h common_types.h construct.h - dds.h + dds-ktx.h error.cpp error.h file_util.cpp @@ -147,8 +147,8 @@ add_library(common STATIC create_target_directory_groups(common) -target_link_libraries(common PUBLIC fmt::fmt microprofile Boost::boost Boost::serialization spng_static) -target_link_libraries(common PRIVATE libzstd_static) +target_link_libraries(common PUBLIC fmt::fmt microprofile Boost::boost Boost::serialization) +target_link_libraries(common PRIVATE libzstd_static spng::spng) set_target_properties(common PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if ("x86_64" IN_LIST ARCHITECTURE) diff --git a/src/common/async_handle.h b/src/common/async_handle.h index d7d540030..e137cf984 100644 --- a/src/common/async_handle.h +++ b/src/common/async_handle.h @@ -13,25 +13,27 @@ namespace Common { struct AsyncHandle { public: - [[nodiscard]] bool IsBuilt() noexcept { - return is_built.load(std::memory_order::relaxed); + AsyncHandle(bool is_done_ = false) : is_done{is_done_} {} + + [[nodiscard]] bool IsDone() noexcept { + return is_done.load(std::memory_order::relaxed); } - void WaitBuilt() noexcept { + void WaitDone() noexcept { std::unique_lock lock{mutex}; - condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + condvar.wait(lock, [this] { return is_done.load(std::memory_order::relaxed); }); } - void MarkBuilt() noexcept { + void MarkDone(bool done = true) noexcept { std::scoped_lock lock{mutex}; - is_built = true; + is_done = done; condvar.notify_all(); } private: std::condition_variable condvar; std::mutex mutex; - std::atomic_bool is_built{false}; + std::atomic_bool is_done{false}; }; } // namespace Common diff --git a/src/common/bit_util.h b/src/common/bit_util.h index 13368b439..e4e6287f3 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -45,19 +45,19 @@ template } template - requires std::is_unsigned_v +requires std::is_unsigned_v [[nodiscard]] constexpr bool IsPow2(T value) { return std::has_single_bit(value); } template - requires std::is_integral_v +requires std::is_integral_v [[nodiscard]] T NextPow2(T value) { return static_cast(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U))); } template - requires std::is_integral_v +requires std::is_integral_v [[nodiscard]] constexpr bool Bit(const T value) { static_assert(bit_index < BitSize(), "bit_index must be smaller than size of T"); return ((value >> bit_index) & T(1)) == T(1); diff --git a/src/common/dds-ktx.h b/src/common/dds-ktx.h new file mode 100644 index 000000000..86480c8f9 --- /dev/null +++ b/src/common/dds-ktx.h @@ -0,0 +1,1465 @@ +// +// Copyright 2018 Sepehr Taghdisian (septag@github). All rights reserved. +// License: https://github.com/septag/dds-ktx#license-bsd-2-clause +// +// Many parts of this code is taken from bimg library: +// https://github.com/bkaradzic/bimg +// +// Copyright 2011-2019 Branimir Karadzic. All rights reserved. +// License: https://github.com/bkaradzic/bimg#license-bsd-2-clause +// +// dds-ktx.h - v1.1.0 - Reader/Writer for DDS/KTX formats +// Parses DDS and KTX files from a memory blob, written in C99 +// +// Supported formats: +// For supported formats, see ddsktx_format enum. +// Both KTX/DDS parser supports all formats defined in ddsktx_format +// +// Overriable macros: +// DDSKTX_API Define any function specifier for public functions (default: extern) +// ddsktx_memcpy default: memcpy(dst, src, size) +// ddsktx_memset default: memset(dst, v, size) +// ddsktx_assert default: assert(a) +// ddsktx_strcpy default: strcpy(dst, src) +// ddsktx_memcmp default: memcmp(ptr1, ptr2, size) +// +// API: +// bool ddsktx_parse(ddsktx_texture_info* tc, const void* file_data, int size, +// ddsktx_error* err); +// Parses texture file and fills the ddsktx_texture_info struct +// Returns true if successfully parsed, false if failed with an error message inside +// ddsktx_error parameter (optional) After format is parsed, you can read the contents +// of ddsktx_format and create your GPU texture To get pointer to mips and slices see +// ddsktx_get_sub function +// +// void ddsktx_get_sub(const ddsktx_texture_info* tex, ddsktx_sub_data* buff, +// const void* file_data, int size, +// int array_idx, int slice_face_idx, int mip_idx); +// Gets sub-image data, form a parsed texture file +// user must provided the container object and the original file data which was passed +// to ddsktx_parse array_idx: array index (0..num_layers) slice_face_idx: depth-slice +// or cube-face index. +// if 'flags' have DDSKTX_TEXTURE_FLAG_CUBEMAP bit, then this value +// represents cube-face-index (0..DDSKTX_CUBE_FACE_COUNT) else it +// represents depth slice index (0..depth) +// mip_idx: mip index (0..num_mips-1 in ddsktx_texture_info) +// +// const char* ddsktx_format_str(ddsktx_format format); +// Converts a format enumeration to string +// +// bool ddsktx_format_compressed(ddsktx_format format); +// Returns true if format is compressed +// +// Example (for 2D textures only): +// int size; +// void* dds_data = load_file("test.dds", &size); +// assert(dds_data); +// ddsktx_texture_info tc = {0}; +// if (ddsktx_parse(&tc, dds_data, size, NULL)) { +// assert(tc.depth == 1); +// assert(!(tc.flags & DDSKTX_TEXTURE_FLAG_CUBEMAP)); +// assert(tc.num_layers == 1); +// // Create GPU texture from tc data +// for (int mip = 0; mip < tc->num_mips; mip++) { +// ddsktx_sub_data sub_data; +// ddsktx_get_sub(&tc, &sub_data, dds_data, size, 0, 0, mip); +// // Fill/Set texture sub resource data (mips in this case) +// } +// } +// free(dds_data); // memory must be valid during stc_ calls +// +// Version history: +// 0.9.0 Initial release, ktx is incomplete +// 1.0.0 Api change: ddsktx_sub_data +// Added KTX support +// 1.0.1 Fixed major bugs in KTX parsing +// 1.1.0 Fixed bugs in get_sub routine, refactored some parts, image-viewer example +// +// TODO +// Write KTX/DDS +// Read KTX metadata. currently it just stores the offset/size to the metadata block +// + +#pragma once + +#include +#include +#include + +#ifndef DDSKTX_API +#ifdef __cplusplus +#define DDSKTX_API extern "C" +#else +#define DDSKTX_API +#endif +#endif + +typedef struct ddsktx_sub_data { + const void* buff; + int width; + int height; + int size_bytes; + int row_pitch_bytes; +} ddsktx_sub_data; + +typedef enum ddsktx_format { + DDSKTX_FORMAT_BC1, // DXT1 + DDSKTX_FORMAT_BC2, // DXT3 + DDSKTX_FORMAT_BC3, // DXT5 + DDSKTX_FORMAT_BC4, // ATI1 + DDSKTX_FORMAT_BC5, // ATI2 + DDSKTX_FORMAT_BC6H, // BC6H + DDSKTX_FORMAT_BC7, // BC7 + DDSKTX_FORMAT_ETC1, // ETC1 RGB8 + DDSKTX_FORMAT_ETC2, // ETC2 RGB8 + DDSKTX_FORMAT_ETC2A, // ETC2 RGBA8 + DDSKTX_FORMAT_ETC2A1, // ETC2 RGBA8A1 + DDSKTX_FORMAT_PTC12, // PVRTC1 RGB 2bpp + DDSKTX_FORMAT_PTC14, // PVRTC1 RGB 4bpp + DDSKTX_FORMAT_PTC12A, // PVRTC1 RGBA 2bpp + DDSKTX_FORMAT_PTC14A, // PVRTC1 RGBA 4bpp + DDSKTX_FORMAT_PTC22, // PVRTC2 RGBA 2bpp + DDSKTX_FORMAT_PTC24, // PVRTC2 RGBA 4bpp + DDSKTX_FORMAT_ATC, // ATC RGB 4BPP + DDSKTX_FORMAT_ATCE, // ATCE RGBA 8 BPP explicit alpha + DDSKTX_FORMAT_ATCI, // ATCI RGBA 8 BPP interpolated alpha + DDSKTX_FORMAT_ASTC4x4, // ASTC 4x4 8.0 BPP + DDSKTX_FORMAT_ASTC5x5, // ASTC 5x5 5.12 BPP + DDSKTX_FORMAT_ASTC6x6, // ASTC 6x6 3.56 BPP + DDSKTX_FORMAT_ASTC8x5, // ASTC 8x5 3.20 BPP + DDSKTX_FORMAT_ASTC8x6, // ASTC 8x6 2.67 BPP + DDSKTX_FORMAT_ASTC10x5, // ASTC 10x5 2.56 BPP + _DDSKTX_FORMAT_COMPRESSED, + DDSKTX_FORMAT_A8, + DDSKTX_FORMAT_R8, + DDSKTX_FORMAT_RGBA8, + DDSKTX_FORMAT_RGBA8S, + DDSKTX_FORMAT_RG16, + DDSKTX_FORMAT_RGB8, + DDSKTX_FORMAT_R16, + DDSKTX_FORMAT_R32F, + DDSKTX_FORMAT_R16F, + DDSKTX_FORMAT_RG16F, + DDSKTX_FORMAT_RG16S, + DDSKTX_FORMAT_RGBA16F, + DDSKTX_FORMAT_RGBA16, + DDSKTX_FORMAT_BGRA8, + DDSKTX_FORMAT_RGB10A2, + DDSKTX_FORMAT_RG11B10F, + DDSKTX_FORMAT_RG8, + DDSKTX_FORMAT_RG8S, + _DDSKTX_FORMAT_COUNT +} ddsktx_format; + +typedef enum ddsktx_texture_flags { + DDSKTX_TEXTURE_FLAG_CUBEMAP = 0x01, + DDSKTX_TEXTURE_FLAG_SRGB = 0x02, + DDSKTX_TEXTURE_FLAG_ALPHA = 0x04, // Has alpha channel + DDSKTX_TEXTURE_FLAG_DDS = 0x08, // container was DDS file + DDSKTX_TEXTURE_FLAG_KTX = 0x10, // container was KTX file + DDSKTX_TEXTURE_FLAG_VOLUME = 0x20, // 3D volume +} ddsktx_texture_flags; + +typedef struct ddsktx_texture_info { + int data_offset; // start offset of pixel data + int size_bytes; + ddsktx_format format; + unsigned int flags; // ddsktx_texture_flags + int width; + int height; + int depth; + int num_layers; + int num_mips; + int bpp; + int metadata_offset; // ktx only + int metadata_size; // ktx only +} ddsktx_texture_info; + +typedef enum ddsktx_cube_face { + DDSKTX_CUBE_FACE_X_POSITIVE = 0, + DDSKTX_CUBE_FACE_X_NEGATIVE, + DDSKTX_CUBE_FACE_Y_POSITIVE, + DDSKTX_CUBE_FACE_Y_NEGATIVE, + DDSKTX_CUBE_FACE_Z_POSITIVE, + DDSKTX_CUBE_FACE_Z_NEGATIVE, + DDSKTX_CUBE_FACE_COUNT +} ddsktx_cube_face; + +typedef struct ddsktx_error { + char msg[256]; +} ddsktx_error; + +#ifdef __cplusplus +#define ddsktx_default(_v) = _v +#else +#define ddsktx_default(_v) +#endif + +DDSKTX_API bool ddsktx_parse(ddsktx_texture_info* tc, const void* file_data, int size, + ddsktx_error* err ddsktx_default(NULL)); +DDSKTX_API void ddsktx_get_sub(const ddsktx_texture_info* tex, ddsktx_sub_data* buff, + const void* file_data, int size, int array_idx, int slice_face_idx, + int mip_idx); +DDSKTX_API const char* ddsktx_format_str(ddsktx_format format); +DDSKTX_API bool ddsktx_format_compressed(ddsktx_format format); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Implementation +#ifdef DDSKTX_IMPLEMENT + +#define stc__makefourcc(_a, _b, _c, _d) \ + (((uint32_t)(_a) | ((uint32_t)(_b) << 8) | ((uint32_t)(_c) << 16) | ((uint32_t)(_d) << 24))) + +// DDS: https://docs.microsoft.com/en-us/windows/desktop/direct3ddds/dx-graphics-dds-pguide +#define DDSKTX__DDS_HEADER_SIZE 124 +#define DDSKTX__DDS_MAGIC stc__makefourcc('D', 'D', 'S', ' ') +#define DDSKTX__DDS_DXT1 stc__makefourcc('D', 'X', 'T', '1') +#define DDSKTX__DDS_DXT2 stc__makefourcc('D', 'X', 'T', '2') +#define DDSKTX__DDS_DXT3 stc__makefourcc('D', 'X', 'T', '3') +#define DDSKTX__DDS_DXT4 stc__makefourcc('D', 'X', 'T', '4') +#define DDSKTX__DDS_DXT5 stc__makefourcc('D', 'X', 'T', '5') +#define DDSKTX__DDS_ATI1 stc__makefourcc('A', 'T', 'I', '1') +#define DDSKTX__DDS_BC4U stc__makefourcc('B', 'C', '4', 'U') +#define DDSKTX__DDS_ATI2 stc__makefourcc('A', 'T', 'I', '2') +#define DDSKTX__DDS_BC5U stc__makefourcc('B', 'C', '5', 'U') +#define DDSKTX__DDS_DX10 stc__makefourcc('D', 'X', '1', '0') + +#define DDSKTX__DDS_ETC1 stc__makefourcc('E', 'T', 'C', '1') +#define DDSKTX__DDS_ETC2 stc__makefourcc('E', 'T', 'C', '2') +#define DDSKTX__DDS_ET2A stc__makefourcc('E', 'T', '2', 'A') +#define DDSKTX__DDS_PTC2 stc__makefourcc('P', 'T', 'C', '2') +#define DDSKTX__DDS_PTC4 stc__makefourcc('P', 'T', 'C', '4') +#define DDSKTX__DDS_ATC stc__makefourcc('A', 'T', 'C', ' ') +#define DDSKTX__DDS_ATCE stc__makefourcc('A', 'T', 'C', 'E') +#define DDSKTX__DDS_ATCI stc__makefourcc('A', 'T', 'C', 'I') +#define DDSKTX__DDS_ASTC4x4 stc__makefourcc('A', 'S', '4', '4') +#define DDSKTX__DDS_ASTC5x5 stc__makefourcc('A', 'S', '5', '5') +#define DDSKTX__DDS_ASTC6x6 stc__makefourcc('A', 'S', '6', '6') +#define DDSKTX__DDS_ASTC8x5 stc__makefourcc('A', 'S', '8', '5') +#define DDSKTX__DDS_ASTC8x6 stc__makefourcc('A', 'S', '8', '6') +#define DDSKTX__DDS_ASTC10x5 stc__makefourcc('A', 'S', ':', '5') + +#define DDSKTX__DDS_R8G8B8 20 +#define DDSKTX__DDS_A8R8G8B8 21 +#define DDSKTX__DDS_R5G6B5 23 +#define DDSKTX__DDS_A1R5G5B5 25 +#define DDSKTX__DDS_A4R4G4B4 26 +#define DDSKTX__DDS_A2B10G10R10 31 +#define DDSKTX__DDS_G16R16 34 +#define DDSKTX__DDS_A2R10G10B10 35 +#define DDSKTX__DDS_A16B16G16R16 36 +#define DDSKTX__DDS_A8L8 51 +#define DDSKTX__DDS_R16F 111 +#define DDSKTX__DDS_G16R16F 112 +#define DDSKTX__DDS_A16B16G16R16F 113 +#define DDSKTX__DDS_R32F 114 +#define DDSKTX__DDS_G32R32F 115 +#define DDSKTX__DDS_A32B32G32R32F 116 + +#define DDSKTX__DDS_FORMAT_R32G32B32A32_FLOAT 2 +#define DDSKTX__DDS_FORMAT_R32G32B32A32_UINT 3 +#define DDSKTX__DDS_FORMAT_R16G16B16A16_FLOAT 10 +#define DDSKTX__DDS_FORMAT_R16G16B16A16_UNORM 11 +#define DDSKTX__DDS_FORMAT_R16G16B16A16_UINT 12 +#define DDSKTX__DDS_FORMAT_R32G32_FLOAT 16 +#define DDSKTX__DDS_FORMAT_R32G32_UINT 17 +#define DDSKTX__DDS_FORMAT_R10G10B10A2_UNORM 24 +#define DDSKTX__DDS_FORMAT_R11G11B10_FLOAT 26 +#define DDSKTX__DDS_FORMAT_R8G8B8A8_UNORM 28 +#define DDSKTX__DDS_FORMAT_R8G8B8A8_UNORM_SRGB 29 +#define DDSKTX__DDS_FORMAT_R16G16_FLOAT 34 +#define DDSKTX__DDS_FORMAT_R16G16_UNORM 35 +#define DDSKTX__DDS_FORMAT_R32_FLOAT 41 +#define DDSKTX__DDS_FORMAT_R32_UINT 42 +#define DDSKTX__DDS_FORMAT_R8G8_UNORM 49 +#define DDSKTX__DDS_FORMAT_R16_FLOAT 54 +#define DDSKTX__DDS_FORMAT_R16_UNORM 56 +#define DDSKTX__DDS_FORMAT_R8_UNORM 61 +#define DDSKTX__DDS_FORMAT_R1_UNORM 66 +#define DDSKTX__DDS_FORMAT_BC1_UNORM 71 +#define DDSKTX__DDS_FORMAT_BC1_UNORM_SRGB 72 +#define DDSKTX__DDS_FORMAT_BC2_UNORM 74 +#define DDSKTX__DDS_FORMAT_BC2_UNORM_SRGB 75 +#define DDSKTX__DDS_FORMAT_BC3_UNORM 77 +#define DDSKTX__DDS_FORMAT_BC3_UNORM_SRGB 78 +#define DDSKTX__DDS_FORMAT_BC4_UNORM 80 +#define DDSKTX__DDS_FORMAT_BC5_UNORM 83 +#define DDSKTX__DDS_FORMAT_B5G6R5_UNORM 85 +#define DDSKTX__DDS_FORMAT_B5G5R5A1_UNORM 86 +#define DDSKTX__DDS_FORMAT_B8G8R8A8_UNORM 87 +#define DDSKTX__DDS_FORMAT_B8G8R8A8_UNORM_SRGB 91 +#define DDSKTX__DDS_FORMAT_BC6H_SF16 96 +#define DDSKTX__DDS_FORMAT_BC7_UNORM 98 +#define DDSKTX__DDS_FORMAT_BC7_UNORM_SRGB 99 +#define DDSKTX__DDS_FORMAT_B4G4R4A4_UNORM 115 + +#define DDSKTX__DDS_DX10_DIMENSION_TEXTURE2D 3 +#define DDSKTX__DDS_DX10_DIMENSION_TEXTURE3D 4 +#define DDSKTX__DDS_DX10_MISC_TEXTURECUBE 4 + +#define DDSKTX__DDSD_CAPS 0x00000001 +#define DDSKTX__DDSD_HEIGHT 0x00000002 +#define DDSKTX__DDSD_WIDTH 0x00000004 +#define DDSKTX__DDSD_PITCH 0x00000008 +#define DDSKTX__DDSD_PIXELFORMAT 0x00001000 +#define DDSKTX__DDSD_MIPMAPCOUNT 0x00020000 +#define DDSKTX__DDSD_LINEARSIZE 0x00080000 +#define DDSKTX__DDSD_DEPTH 0x00800000 + +#define DDSKTX__DDPF_ALPHAPIXELS 0x00000001 +#define DDSKTX__DDPF_ALPHA 0x00000002 +#define DDSKTX__DDPF_FOURCC 0x00000004 +#define DDSKTX__DDPF_INDEXED 0x00000020 +#define DDSKTX__DDPF_RGB 0x00000040 +#define DDSKTX__DDPF_YUV 0x00000200 +#define DDSKTX__DDPF_LUMINANCE 0x00020000 +#define DDSKTX__DDPF_BUMPDUDV 0x00080000 + +#define DDSKTX__DDSCAPS_COMPLEX 0x00000008 +#define DDSKTX__DDSCAPS_TEXTURE 0x00001000 +#define DDSKTX__DDSCAPS_MIPMAP 0x00400000 + +#define DDSKTX__DDSCAPS2_VOLUME 0x00200000 +#define DDSKTX__DDSCAPS2_CUBEMAP 0x00000200 +#define DDSKTX__DDSCAPS2_CUBEMAP_POSITIVEX 0x00000400 +#define DDSKTX__DDSCAPS2_CUBEMAP_NEGATIVEX 0x00000800 +#define DDSKTX__DDSCAPS2_CUBEMAP_POSITIVEY 0x00001000 +#define DDSKTX__DDSCAPS2_CUBEMAP_NEGATIVEY 0x00002000 +#define DDSKTX__DDSCAPS2_CUBEMAP_POSITIVEZ 0x00004000 +#define DDSKTX__DDSCAPS2_CUBEMAP_NEGATIVEZ 0x00008000 + +#define DDSKTX__DDSCAPS2_CUBEMAP_ALLSIDES \ + (0 | DDSKTX__DDSCAPS2_CUBEMAP_POSITIVEX | DDSKTX__DDSCAPS2_CUBEMAP_NEGATIVEX | \ + DDSKTX__DDSCAPS2_CUBEMAP_POSITIVEY | DDSKTX__DDSCAPS2_CUBEMAP_NEGATIVEY | \ + DDSKTX__DDSCAPS2_CUBEMAP_POSITIVEZ | DDSKTX__DDSCAPS2_CUBEMAP_NEGATIVEZ) + +#pragma pack(push, 1) +typedef struct ddsktx__dds_pixel_format { + uint32_t size; + uint32_t flags; + uint32_t fourcc; + uint32_t rgb_bit_count; + uint32_t bit_mask[4]; +} ddsktx__dds_pixel_format; + +// https://docs.microsoft.com/en-us/windows/desktop/direct3ddds/dds-header +typedef struct ddsktx__dds_header { + uint32_t size; + uint32_t flags; + uint32_t height; + uint32_t width; + uint32_t pitch_lin_size; + uint32_t depth; + uint32_t mip_count; + uint32_t reserved1[11]; + ddsktx__dds_pixel_format pixel_format; + uint32_t caps1; + uint32_t caps2; + uint32_t caps3; + uint32_t caps4; + uint32_t reserved2; +} ddsktx__dds_header; + +// https://docs.microsoft.com/en-us/windows/desktop/direct3ddds/dds-header-dxt10 +typedef struct ddsktx__dds_header_dxgi { + uint32_t dxgi_format; + uint32_t dimension; + uint32_t misc_flags; + uint32_t array_size; + uint32_t misc_flags2; +} ddsktx__dds_header_dxgi; + +typedef struct ddsktx__ktx_header { + uint8_t id[8]; + uint32_t endianess; + uint32_t type; + uint32_t type_size; + uint32_t format; + uint32_t internal_format; + uint32_t base_internal_format; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t array_count; + uint32_t face_count; + uint32_t mip_count; + uint32_t metadata_size; +} ddsktx__ktx_header; +#pragma pack(pop) + +typedef struct ddsktx__dds_translate_fourcc_format { + uint32_t dds_format; + ddsktx_format format; + bool srgb; +} ddsktx__dds_translate_fourcc_format; + +typedef struct ddsktx__dds_translate_pixel_format { + uint32_t bit_count; + uint32_t flags; + uint32_t bit_mask[4]; + ddsktx_format format; +} ddsktx__dds_translate_pixel_format; + +typedef struct ddsktx__mem_reader { + const uint8_t* buff; + int total; + int offset; +} ddsktx__mem_reader; + +typedef struct ddsktx__block_info { + uint8_t bpp; + uint8_t block_width; + uint8_t block_height; + uint8_t block_size; + uint8_t min_block_x; + uint8_t min_block_y; + uint8_t depth_bits; + uint8_t stencil_bits; + uint8_t r_bits; + uint8_t g_bits; + uint8_t b_bits; + uint8_t a_bits; + uint8_t encoding; +} ddsktx__block_info; + +#ifndef ddsktx_memcpy +#include +#define ddsktx_memcpy(_dst, _src, _size) memcpy((_dst), (_src), (_size)) +#endif + +#ifndef ddsktx_memset +#include +#define ddsktx_memset(_dst, _v, _size) memset((_dst), (_v), (_size)) +#endif + +#ifndef ddsktx_assert +#include +#define ddsktx_assert(_a) assert(_a) +#endif + +#ifndef ddsktx_strcpy +#include +#ifdef _MSC_VER +#define ddsktx_strcpy(_dst, _src) strcpy_s((_dst), sizeof(_dst), (_src)) +#else +#define ddsktx_strcpy(_dst, _src) strcpy((_dst), (_src)) +#endif +#endif + +#ifndef ddsktx_memcmp +#include +#define ddsktx_memcmp(_ptr1, _ptr2, _num) memcmp((_ptr1), (_ptr2), (_num)) +#endif + +#define ddsktx__max(a, b) ((a) > (b) ? (a) : (b)) +#define ddsktx__min(a, b) ((a) < (b) ? (a) : (b)) +#define ddsktx__align_mask(_value, _mask) (((_value) + (_mask)) & ((~0) & (~(_mask)))) +#define ddsktx__err(_err, _msg) \ + if (_err) \ + ddsktx_strcpy(_err->msg, _msg); \ + return false + +static const ddsktx__dds_translate_fourcc_format k__translate_dds_fourcc[] = { + {DDSKTX__DDS_DXT1, DDSKTX_FORMAT_BC1, false}, + {DDSKTX__DDS_DXT2, DDSKTX_FORMAT_BC2, false}, + {DDSKTX__DDS_DXT3, DDSKTX_FORMAT_BC2, false}, + {DDSKTX__DDS_DXT4, DDSKTX_FORMAT_BC3, false}, + {DDSKTX__DDS_DXT5, DDSKTX_FORMAT_BC3, false}, + {DDSKTX__DDS_ATI1, DDSKTX_FORMAT_BC4, false}, + {DDSKTX__DDS_BC4U, DDSKTX_FORMAT_BC4, false}, + {DDSKTX__DDS_ATI2, DDSKTX_FORMAT_BC5, false}, + {DDSKTX__DDS_BC5U, DDSKTX_FORMAT_BC5, false}, + {DDSKTX__DDS_ETC1, DDSKTX_FORMAT_ETC1, false}, + {DDSKTX__DDS_ETC2, DDSKTX_FORMAT_ETC2, false}, + {DDSKTX__DDS_ET2A, DDSKTX_FORMAT_ETC2A, false}, + {DDSKTX__DDS_PTC2, DDSKTX_FORMAT_PTC12A, false}, + {DDSKTX__DDS_PTC4, DDSKTX_FORMAT_PTC14A, false}, + {DDSKTX__DDS_ATC, DDSKTX_FORMAT_ATC, false}, + {DDSKTX__DDS_ATCE, DDSKTX_FORMAT_ATCE, false}, + {DDSKTX__DDS_ATCI, DDSKTX_FORMAT_ATCI, false}, + {DDSKTX__DDS_ASTC4x4, DDSKTX_FORMAT_ASTC4x4, false}, + {DDSKTX__DDS_ASTC5x5, DDSKTX_FORMAT_ASTC5x5, false}, + {DDSKTX__DDS_ASTC6x6, DDSKTX_FORMAT_ASTC6x6, false}, + {DDSKTX__DDS_ASTC8x5, DDSKTX_FORMAT_ASTC8x5, false}, + {DDSKTX__DDS_ASTC8x6, DDSKTX_FORMAT_ASTC8x6, false}, + {DDSKTX__DDS_ASTC10x5, DDSKTX_FORMAT_ASTC10x5, false}, + {DDSKTX__DDS_A16B16G16R16, DDSKTX_FORMAT_RGBA16, false}, + {DDSKTX__DDS_A16B16G16R16F, DDSKTX_FORMAT_RGBA16F, false}, + {DDSKTX__DDPF_RGB | DDSKTX__DDPF_ALPHAPIXELS, DDSKTX_FORMAT_BGRA8, false}, + {DDSKTX__DDPF_INDEXED, DDSKTX_FORMAT_R8, false}, + {DDSKTX__DDPF_LUMINANCE, DDSKTX_FORMAT_R8, false}, + {DDSKTX__DDPF_ALPHA, DDSKTX_FORMAT_R8, false}, + {DDSKTX__DDS_R16F, DDSKTX_FORMAT_R16F, false}, + {DDSKTX__DDS_R32F, DDSKTX_FORMAT_R32F, false}, + {DDSKTX__DDS_A8L8, DDSKTX_FORMAT_RG8, false}, + {DDSKTX__DDS_G16R16, DDSKTX_FORMAT_RG16, false}, + {DDSKTX__DDS_G16R16F, DDSKTX_FORMAT_RG16F, false}, + {DDSKTX__DDS_R8G8B8, DDSKTX_FORMAT_RGB8, false}, + {DDSKTX__DDS_A8R8G8B8, DDSKTX_FORMAT_BGRA8, false}, + {DDSKTX__DDS_A16B16G16R16, DDSKTX_FORMAT_RGBA16, false}, + {DDSKTX__DDS_A16B16G16R16F, DDSKTX_FORMAT_RGBA16F, false}, + {DDSKTX__DDS_A2B10G10R10, DDSKTX_FORMAT_RGB10A2, false}, +}; + +static const ddsktx__dds_translate_fourcc_format k__translate_dxgi[] = { + {DDSKTX__DDS_FORMAT_BC1_UNORM, DDSKTX_FORMAT_BC1, false}, + {DDSKTX__DDS_FORMAT_BC1_UNORM_SRGB, DDSKTX_FORMAT_BC1, true}, + {DDSKTX__DDS_FORMAT_BC2_UNORM, DDSKTX_FORMAT_BC2, false}, + {DDSKTX__DDS_FORMAT_BC2_UNORM_SRGB, DDSKTX_FORMAT_BC2, true}, + {DDSKTX__DDS_FORMAT_BC3_UNORM, DDSKTX_FORMAT_BC3, false}, + {DDSKTX__DDS_FORMAT_BC3_UNORM_SRGB, DDSKTX_FORMAT_BC3, true}, + {DDSKTX__DDS_FORMAT_BC4_UNORM, DDSKTX_FORMAT_BC4, false}, + {DDSKTX__DDS_FORMAT_BC5_UNORM, DDSKTX_FORMAT_BC5, false}, + {DDSKTX__DDS_FORMAT_BC6H_SF16, DDSKTX_FORMAT_BC6H, false}, + {DDSKTX__DDS_FORMAT_BC7_UNORM, DDSKTX_FORMAT_BC7, false}, + {DDSKTX__DDS_FORMAT_BC7_UNORM_SRGB, DDSKTX_FORMAT_BC7, true}, + + {DDSKTX__DDS_FORMAT_R8_UNORM, DDSKTX_FORMAT_R8, false}, + {DDSKTX__DDS_FORMAT_R16_UNORM, DDSKTX_FORMAT_R16, false}, + {DDSKTX__DDS_FORMAT_R16_FLOAT, DDSKTX_FORMAT_R16F, false}, + {DDSKTX__DDS_FORMAT_R32_FLOAT, DDSKTX_FORMAT_R32F, false}, + {DDSKTX__DDS_FORMAT_R8G8_UNORM, DDSKTX_FORMAT_RG8, false}, + {DDSKTX__DDS_FORMAT_R16G16_UNORM, DDSKTX_FORMAT_RG16, false}, + {DDSKTX__DDS_FORMAT_R16G16_FLOAT, DDSKTX_FORMAT_RG16F, false}, + {DDSKTX__DDS_FORMAT_B8G8R8A8_UNORM, DDSKTX_FORMAT_BGRA8, false}, + {DDSKTX__DDS_FORMAT_B8G8R8A8_UNORM_SRGB, DDSKTX_FORMAT_BGRA8, true}, + {DDSKTX__DDS_FORMAT_R8G8B8A8_UNORM, DDSKTX_FORMAT_RGBA8, false}, + {DDSKTX__DDS_FORMAT_R8G8B8A8_UNORM_SRGB, DDSKTX_FORMAT_RGBA8, true}, + {DDSKTX__DDS_FORMAT_R16G16B16A16_UNORM, DDSKTX_FORMAT_RGBA16, false}, + {DDSKTX__DDS_FORMAT_R16G16B16A16_FLOAT, DDSKTX_FORMAT_RGBA16F, false}, + {DDSKTX__DDS_FORMAT_R10G10B10A2_UNORM, DDSKTX_FORMAT_RGB10A2, false}, + {DDSKTX__DDS_FORMAT_R11G11B10_FLOAT, DDSKTX_FORMAT_RG11B10F, false}, +}; + +static const ddsktx__dds_translate_pixel_format k__translate_dds_pixel[] = { + {8, DDSKTX__DDPF_LUMINANCE, {0x000000ff, 0x00000000, 0x00000000, 0x00000000}, DDSKTX_FORMAT_R8}, + {16, + DDSKTX__DDPF_BUMPDUDV, + {0x000000ff, 0x0000ff00, 0x00000000, 0x00000000}, + DDSKTX_FORMAT_RG8S}, + {24, DDSKTX__DDPF_RGB, {0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}, DDSKTX_FORMAT_RGB8}, + {24, DDSKTX__DDPF_RGB, {0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000}, DDSKTX_FORMAT_RGB8}, + {32, DDSKTX__DDPF_RGB, {0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}, DDSKTX_FORMAT_BGRA8}, + {32, + DDSKTX__DDPF_RGB | DDSKTX__DDPF_ALPHAPIXELS, + {0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000}, + DDSKTX_FORMAT_RGBA8}, + {32, + DDSKTX__DDPF_BUMPDUDV, + {0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000}, + DDSKTX_FORMAT_RGBA8S}, + {32, DDSKTX__DDPF_RGB, {0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000}, DDSKTX_FORMAT_BGRA8}, + {32, + DDSKTX__DDPF_RGB | DDSKTX__DDPF_ALPHAPIXELS, + {0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000}, + DDSKTX_FORMAT_BGRA8}, // D3DFMT_A8R8G8B8 + {32, + DDSKTX__DDPF_RGB | DDSKTX__DDPF_ALPHAPIXELS, + {0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}, + DDSKTX_FORMAT_BGRA8}, // D3DFMT_X8R8G8B8 + {32, + DDSKTX__DDPF_RGB | DDSKTX__DDPF_ALPHAPIXELS, + {0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000}, + DDSKTX_FORMAT_RGB10A2}, + {32, DDSKTX__DDPF_RGB, {0x0000ffff, 0xffff0000, 0x00000000, 0x00000000}, DDSKTX_FORMAT_RG16}, + {32, + DDSKTX__DDPF_BUMPDUDV, + {0x0000ffff, 0xffff0000, 0x00000000, 0x00000000}, + DDSKTX_FORMAT_RG16S}}; + +typedef enum ddsktx__encode_type { + DDSKTX__ENCODE_UNORM, + DDSKTX__ENCODE_SNORM, + DDSKTX__ENCODE_FLOAT, + DDSKTX__ENCODE_INT, + DDSKTX__ENCODE_UINT, + DDSKTX__ENCODE_COUNT +} ddsktx__encode_type; + +static const ddsktx__block_info k__block_info[] = { + // +-------------------------------------------- bits per pixel + // | +----------------------------------------- block width + // | | +-------------------------------------- block height + // | | | +---------------------------------- block size + // | | | | +------------------------------- min blocks x + // | | | | | +---------------------------- min blocks y + // | | | | | | +------------------------ depth bits + // | | | | | | | +--------------------- stencil bits + // | | | | | | | | +---+---+---+----- r, g, b, a bits + // | | | | | | | | r g b a +-- encoding type + // | | | | | | | | | | | | | + {4, 4, 4, 8, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BC1 + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BC2 + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BC3 + {4, 4, 4, 8, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BC4 + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BC5 + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_FLOAT)}, // BC6H + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BC7 + {4, 4, 4, 8, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ETC1 + {4, 4, 4, 8, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ETC2 + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ETC2A + {4, 4, 4, 8, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ETC2A1 + {2, 8, 4, 8, 2, 2, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // PTC12 + {4, 4, 4, 8, 2, 2, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // PTC14 + {2, 8, 4, 8, 2, 2, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // PTC12A + {4, 4, 4, 8, 2, 2, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // PTC14A + {2, 8, 4, 8, 2, 2, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // PTC22 + {4, 4, 4, 8, 2, 2, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // PTC24 + {4, 4, 4, 8, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ATC + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ATCE + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ATCI + {8, 4, 4, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ASTC4x4 + {6, 5, 5, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ASTC5x5 + {4, 6, 6, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ASTC6x6 + {4, 8, 5, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ASTC8x5 + {3, 8, 6, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ASTC8x6 + {3, 10, 5, 16, 1, 1, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // ASTC10x5 + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_COUNT)}, // Unknown + {8, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 8, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // A8 + {8, 1, 1, 1, 1, 1, 0, 0, 8, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // R8 + {32, 1, 1, 4, 1, 1, 0, 0, 8, 8, 8, 8, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RGBA8 + {32, 1, 1, 4, 1, 1, 0, 0, 8, 8, 8, 8, (uint8_t)(DDSKTX__ENCODE_SNORM)}, // RGBA8S + {32, 1, 1, 4, 1, 1, 0, 0, 16, 16, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RG16 + {24, 1, 1, 3, 1, 1, 0, 0, 8, 8, 8, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RGB8 + {16, 1, 1, 2, 1, 1, 0, 0, 16, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // R16 + {32, 1, 1, 4, 1, 1, 0, 0, 32, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_FLOAT)}, // R32F + {16, 1, 1, 2, 1, 1, 0, 0, 16, 0, 0, 0, (uint8_t)(DDSKTX__ENCODE_FLOAT)}, // R16F + {32, 1, 1, 4, 1, 1, 0, 0, 16, 16, 0, 0, (uint8_t)(DDSKTX__ENCODE_FLOAT)}, // RG16F + {32, 1, 1, 4, 1, 1, 0, 0, 16, 16, 0, 0, (uint8_t)(DDSKTX__ENCODE_SNORM)}, // RG16S + {64, 1, 1, 8, 1, 1, 0, 0, 16, 16, 16, 16, (uint8_t)(DDSKTX__ENCODE_FLOAT)}, // RGBA16F + {64, 1, 1, 8, 1, 1, 0, 0, 16, 16, 16, 16, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RGBA16 + {32, 1, 1, 4, 1, 1, 0, 0, 8, 8, 8, 8, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // BGRA8 + {32, 1, 1, 4, 1, 1, 0, 0, 10, 10, 10, 2, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RGB10A2 + {32, 1, 1, 4, 1, 1, 0, 0, 11, 11, 10, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RG11B10F + {16, 1, 1, 2, 1, 1, 0, 0, 8, 8, 0, 0, (uint8_t)(DDSKTX__ENCODE_UNORM)}, // RG8 + {16, 1, 1, 2, 1, 1, 0, 0, 8, 8, 0, 0, (uint8_t)(DDSKTX__ENCODE_SNORM)} // RG8S +}; + +// KTX: https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/ +#define DDSKTX__KTX_MAGIC stc__makefourcc(0xAB, 'K', 'T', 'X') +#define DDSKTX__KTX_HEADER_SIZE 60 // actual header size is 64, but we read 4 bytes for the 'magic' + +#define DDSKTX__KTX_ETC1_RGB8_OES 0x8D64 +#define DDSKTX__KTX_COMPRESSED_R11_EAC 0x9270 +#define DDSKTX__KTX_COMPRESSED_SIGNED_R11_EAC 0x9271 +#define DDSKTX__KTX_COMPRESSED_RG11_EAC 0x9272 +#define DDSKTX__KTX_COMPRESSED_SIGNED_RG11_EAC 0x9273 +#define DDSKTX__KTX_COMPRESSED_RGB8_ETC2 0x9274 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ETC2 0x9275 +#define DDSKTX__KTX_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276 +#define DDSKTX__KTX_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277 +#define DDSKTX__KTX_COMPRESSED_RGBA8_ETC2_EAC 0x9278 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279 +#define DDSKTX__KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG 0x8C00 +#define DDSKTX__KTX_COMPRESSED_RGB_PVRTC_2BPPV1_IMG 0x8C01 +#define DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG 0x8C02 +#define DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG 0x8C03 +#define DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG 0x9137 +#define DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG 0x9138 +#define DDSKTX__KTX_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 +#define DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#define DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 +#define DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 +#define DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT 0x8C4D +#define DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT 0x8C4E +#define DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT 0x8C4F +#define DDSKTX__KTX_COMPRESSED_LUMINANCE_LATC1_EXT 0x8C70 +#define DDSKTX__KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT 0x8C72 +#define DDSKTX__KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB 0x8E8C +#define DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB 0x8E8D +#define DDSKTX__KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB 0x8E8E +#define DDSKTX__KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB 0x8E8F +#define DDSKTX__KTX_COMPRESSED_SRGB_PVRTC_2BPPV1_EXT 0x8A54 +#define DDSKTX__KTX_COMPRESSED_SRGB_PVRTC_4BPPV1_EXT 0x8A55 +#define DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT 0x8A56 +#define DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT 0x8A57 +#define DDSKTX__KTX_ATC_RGB_AMD 0x8C92 +#define DDSKTX__KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD 0x8C93 +#define DDSKTX__KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE +#define DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_4x4_KHR 0x93B0 +#define DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_5x5_KHR 0x93B2 +#define DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_6x6_KHR 0x93B4 +#define DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_8x5_KHR 0x93B5 +#define DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_8x6_KHR 0x93B6 +#define DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_10x5_KHR 0x93B8 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_4x4_KHR 0x93D0 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_5x5_KHR 0x93D2 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_6x6_KHR 0x93D4 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_8x5_KHR 0x93D5 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_8x6_KHR 0x93D6 +#define DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_10x5_KHR 0x93D8 + +#define DDSKTX__KTX_A8 0x803C +#define DDSKTX__KTX_R8 0x8229 +#define DDSKTX__KTX_R16 0x822A +#define DDSKTX__KTX_RG8 0x822B +#define DDSKTX__KTX_RG16 0x822C +#define DDSKTX__KTX_R16F 0x822D +#define DDSKTX__KTX_R32F 0x822E +#define DDSKTX__KTX_RG16F 0x822F +#define DDSKTX__KTX_RG32F 0x8230 +#define DDSKTX__KTX_RGBA8 0x8058 +#define DDSKTX__KTX_RGBA16 0x805B +#define DDSKTX__KTX_RGBA16F 0x881A +#define DDSKTX__KTX_R32UI 0x8236 +#define DDSKTX__KTX_RG32UI 0x823C +#define DDSKTX__KTX_RGBA32UI 0x8D70 +#define DDSKTX__KTX_RGBA32F 0x8814 +#define DDSKTX__KTX_RGB565 0x8D62 +#define DDSKTX__KTX_RGBA4 0x8056 +#define DDSKTX__KTX_RGB5_A1 0x8057 +#define DDSKTX__KTX_RGB10_A2 0x8059 +#define DDSKTX__KTX_R8I 0x8231 +#define DDSKTX__KTX_R8UI 0x8232 +#define DDSKTX__KTX_R16I 0x8233 +#define DDSKTX__KTX_R16UI 0x8234 +#define DDSKTX__KTX_R32I 0x8235 +#define DDSKTX__KTX_R32UI 0x8236 +#define DDSKTX__KTX_RG8I 0x8237 +#define DDSKTX__KTX_RG8UI 0x8238 +#define DDSKTX__KTX_RG16I 0x8239 +#define DDSKTX__KTX_RG16UI 0x823A +#define DDSKTX__KTX_RG32I 0x823B +#define DDSKTX__KTX_RG32UI 0x823C +#define DDSKTX__KTX_R8_SNORM 0x8F94 +#define DDSKTX__KTX_RG8_SNORM 0x8F95 +#define DDSKTX__KTX_RGB8_SNORM 0x8F96 +#define DDSKTX__KTX_RGBA8_SNORM 0x8F97 +#define DDSKTX__KTX_R16_SNORM 0x8F98 +#define DDSKTX__KTX_RG16_SNORM 0x8F99 +#define DDSKTX__KTX_RGB16_SNORM 0x8F9A +#define DDSKTX__KTX_RGBA16_SNORM 0x8F9B +#define DDSKTX__KTX_SRGB8 0x8C41 +#define DDSKTX__KTX_SRGB8_ALPHA8 0x8C43 +#define DDSKTX__KTX_RGBA32UI 0x8D70 +#define DDSKTX__KTX_RGB32UI 0x8D71 +#define DDSKTX__KTX_RGBA16UI 0x8D76 +#define DDSKTX__KTX_RGB16UI 0x8D77 +#define DDSKTX__KTX_RGBA8UI 0x8D7C +#define DDSKTX__KTX_RGB8UI 0x8D7D +#define DDSKTX__KTX_RGBA32I 0x8D82 +#define DDSKTX__KTX_RGB32I 0x8D83 +#define DDSKTX__KTX_RGBA16I 0x8D88 +#define DDSKTX__KTX_RGB16I 0x8D89 +#define DDSKTX__KTX_RGBA8I 0x8D8E +#define DDSKTX__KTX_RGB8 0x8051 +#define DDSKTX__KTX_RGB8I 0x8D8F +#define DDSKTX__KTX_RGB9_E5 0x8C3D +#define DDSKTX__KTX_R11F_G11F_B10F 0x8C3A + +#define DDSKTX__KTX_ZERO 0 +#define DDSKTX__KTX_RED 0x1903 +#define DDSKTX__KTX_ALPHA 0x1906 +#define DDSKTX__KTX_RGB 0x1907 +#define DDSKTX__KTX_RGBA 0x1908 +#define DDSKTX__KTX_BGRA 0x80E1 +#define DDSKTX__KTX_RG 0x8227 + +#define DDSKTX__KTX_BYTE 0x1400 +#define DDSKTX__KTX_UNSIGNED_BYTE 0x1401 +#define DDSKTX__KTX_SHORT 0x1402 +#define DDSKTX__KTX_UNSIGNED_SHORT 0x1403 +#define DDSKTX__KTX_INT 0x1404 +#define DDSKTX__KTX_UNSIGNED_INT 0x1405 +#define DDSKTX__KTX_FLOAT 0x1406 +#define DDSKTX__KTX_HALF_FLOAT 0x140B +#define DDSKTX__KTX_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define DDSKTX__KTX_UNSIGNED_SHORT_5_6_5 0x8363 +#define DDSKTX__KTX_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define DDSKTX__KTX_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define DDSKTX__KTX_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define DDSKTX__KTX_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B + +typedef struct ddsktx__ktx_format_info { + uint32_t internal_fmt; + uint32_t internal_fmt_srgb; + uint32_t fmt; + uint32_t type; +} ddsktx__ktx_format_info; + +typedef struct ddsktx__ktx_format_info2 { + uint32_t internal_fmt; + ddsktx_format format; +} ddsktx__ktx_format_info2; + +static const ddsktx__ktx_format_info k__translate_ktx_fmt[] = { + { + DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT, + DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, + DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT, + DDSKTX__KTX_ZERO, + }, // BC1 + { + DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT, + DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, + DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT, + DDSKTX__KTX_ZERO, + }, // BC2 + { + DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT, + DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, + DDSKTX__KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT, + DDSKTX__KTX_ZERO, + }, // BC3 + { + DDSKTX__KTX_COMPRESSED_LUMINANCE_LATC1_EXT, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_LUMINANCE_LATC1_EXT, + DDSKTX__KTX_ZERO, + }, // BC4 + { + DDSKTX__KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT, + DDSKTX__KTX_ZERO, + }, // BC5 + { + DDSKTX__KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, + DDSKTX__KTX_ZERO, + }, // BC6H + { + DDSKTX__KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB, + DDSKTX__KTX_ZERO, + }, // BC7 + { + DDSKTX__KTX_ETC1_RGB8_OES, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ETC1_RGB8_OES, + DDSKTX__KTX_ZERO, + }, // ETC1 + { + DDSKTX__KTX_COMPRESSED_RGB8_ETC2, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_RGB8_ETC2, + DDSKTX__KTX_ZERO, + }, // ETC2 + { + DDSKTX__KTX_COMPRESSED_RGBA8_ETC2_EAC, + DDSKTX__KTX_COMPRESSED_SRGB8_ETC2, + DDSKTX__KTX_COMPRESSED_RGBA8_ETC2_EAC, + DDSKTX__KTX_ZERO, + }, // ETC2A + { + DDSKTX__KTX_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, + DDSKTX__KTX_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, + DDSKTX__KTX_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, + DDSKTX__KTX_ZERO, + }, // ETC2A1 + { + DDSKTX__KTX_COMPRESSED_RGB_PVRTC_2BPPV1_IMG, + DDSKTX__KTX_COMPRESSED_SRGB_PVRTC_2BPPV1_EXT, + DDSKTX__KTX_COMPRESSED_RGB_PVRTC_2BPPV1_IMG, + DDSKTX__KTX_ZERO, + }, // PTC12 + { + DDSKTX__KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG, + DDSKTX__KTX_COMPRESSED_SRGB_PVRTC_4BPPV1_EXT, + DDSKTX__KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG, + DDSKTX__KTX_ZERO, + }, // PTC14 + { + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG, + DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT, + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG, + DDSKTX__KTX_ZERO, + }, // PTC12A + { + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG, + DDSKTX__KTX_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT, + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG, + DDSKTX__KTX_ZERO, + }, // PTC14A + { + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG, + DDSKTX__KTX_ZERO, + }, // PTC22 + { + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG, + DDSKTX__KTX_ZERO, + }, // PTC24 + { + DDSKTX__KTX_ATC_RGB_AMD, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ATC_RGB_AMD, + DDSKTX__KTX_ZERO, + }, // ATC + { + DDSKTX__KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD, + DDSKTX__KTX_ZERO, + }, // ATCE + { + DDSKTX__KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD, + DDSKTX__KTX_ZERO, + }, // ATCI + { + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_4x4_KHR, + DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_4x4_KHR, + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_4x4_KHR, + DDSKTX__KTX_ZERO, + }, // ASTC4x4 + { + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_5x5_KHR, + DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_5x5_KHR, + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_5x5_KHR, + DDSKTX__KTX_ZERO, + }, // ASTC5x5 + { + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_6x6_KHR, + DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_6x6_KHR, + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_6x6_KHR, + DDSKTX__KTX_ZERO, + }, // ASTC6x6 + { + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_8x5_KHR, + DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_8x5_KHR, + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_8x5_KHR, + DDSKTX__KTX_ZERO, + }, // ASTC8x5 + { + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_8x6_KHR, + DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_8x6_KHR, + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_8x6_KHR, + DDSKTX__KTX_ZERO, + }, // ASTC8x6 + { + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_10x5_KHR, + DDSKTX__KTX_COMPRESSED_SRGB8_ALPHA8_ADDSKTX_10x5_KHR, + DDSKTX__KTX_COMPRESSED_RGBA_ADDSKTX_10x5_KHR, + DDSKTX__KTX_ZERO, + }, // ASTC10x5 + { + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ZERO, + }, // Unknown + { + DDSKTX__KTX_ALPHA, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_ALPHA, + DDSKTX__KTX_UNSIGNED_BYTE, + }, // A8 + { + DDSKTX__KTX_R8, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RED, + DDSKTX__KTX_UNSIGNED_BYTE, + }, // R8 + { + DDSKTX__KTX_RGBA8, + DDSKTX__KTX_SRGB8_ALPHA8, + DDSKTX__KTX_RGBA, + DDSKTX__KTX_UNSIGNED_BYTE, + }, // RGBA8 + { + DDSKTX__KTX_RGBA8_SNORM, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RGBA, + DDSKTX__KTX_BYTE, + }, // RGBA8S + { + DDSKTX__KTX_RG16, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RG, + DDSKTX__KTX_UNSIGNED_SHORT, + }, // RG16 + { + DDSKTX__KTX_RGB8, + DDSKTX__KTX_SRGB8, + DDSKTX__KTX_RGB, + DDSKTX__KTX_UNSIGNED_BYTE, + }, // RGB8 + { + DDSKTX__KTX_R16, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RED, + DDSKTX__KTX_UNSIGNED_SHORT, + }, // R16 + { + DDSKTX__KTX_R32F, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RED, + DDSKTX__KTX_FLOAT, + }, // R32F + { + DDSKTX__KTX_R16F, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RED, + DDSKTX__KTX_HALF_FLOAT, + }, // R16F + { + DDSKTX__KTX_RG16F, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RG, + DDSKTX__KTX_FLOAT, + }, // RG16F + { + DDSKTX__KTX_RG16_SNORM, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RG, + DDSKTX__KTX_SHORT, + }, // RG16S + { + DDSKTX__KTX_RGBA16F, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RGBA, + DDSKTX__KTX_HALF_FLOAT, + }, // RGBA16F + { + DDSKTX__KTX_RGBA16, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RGBA, + DDSKTX__KTX_UNSIGNED_SHORT, + }, // RGBA16 + { + DDSKTX__KTX_BGRA, + DDSKTX__KTX_SRGB8_ALPHA8, + DDSKTX__KTX_BGRA, + DDSKTX__KTX_UNSIGNED_BYTE, + }, // BGRA8 + { + DDSKTX__KTX_RGB10_A2, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RGBA, + DDSKTX__KTX_UNSIGNED_INT_2_10_10_10_REV, + }, // RGB10A2 + { + DDSKTX__KTX_R11F_G11F_B10F, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RGB, + DDSKTX__KTX_UNSIGNED_INT_10F_11F_11F_REV, + }, // RG11B10F + { + DDSKTX__KTX_RG8, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RG, + DDSKTX__KTX_UNSIGNED_BYTE, + }, // RG8 + { + DDSKTX__KTX_RG8_SNORM, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RG, + DDSKTX__KTX_BYTE, + }, // RG8S + { + DDSKTX__KTX_R16I, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RED, + DDSKTX__KTX_UNSIGNED_SHORT, + }, // R16I + { + DDSKTX__KTX_R16UI, + DDSKTX__KTX_ZERO, + DDSKTX__KTX_RED, + DDSKTX__KTX_UNSIGNED_SHORT, + }, // R16UI +}; + +static const ddsktx__ktx_format_info2 k__translate_ktx_fmt2[] = { + {DDSKTX__KTX_A8, DDSKTX_FORMAT_A8}, + {DDSKTX__KTX_RED, DDSKTX_FORMAT_R8}, + {DDSKTX__KTX_RGB, DDSKTX_FORMAT_RGB8}, + {DDSKTX__KTX_RGBA, DDSKTX_FORMAT_RGBA8}, + {DDSKTX__KTX_COMPRESSED_RGB_S3TC_DXT1_EXT, DDSKTX_FORMAT_BC1}, +}; + +typedef struct ddsktx__format_info { + const char* name; + bool has_alpha; +} ddsktx__format_info; + +static const ddsktx__format_info k__formats_info[] = { + {"BC1", false}, {"BC2", true}, {"BC3", true}, {"BC4", false}, + {"BC5", false}, {"BC6H", false}, {"BC7", true}, {"ETC1", false}, + {"ETC2", false}, {"ETC2A", true}, {"ETC2A1", true}, {"PTC12", false}, + {"PTC14", false}, {"PTC12A", true}, {"PTC14A", true}, {"PTC22", true}, + {"PTC24", true}, {"ATC", false}, {"ATCE", false}, {"ATCI", false}, + {"ASTC4x4", true}, {"ASTC5x5", true}, {"ASTC6x6", false}, {"ASTC8x5", true}, + {"ASTC8x6", false}, {"ASTC10x5", false}, {"", false}, {"A8", true}, + {"R8", false}, {"RGBA8", true}, {"RGBA8S", true}, {"RG16", false}, + {"RGB8", false}, {"R16", false}, {"R32F", false}, {"R16F", false}, + {"RG16F", false}, {"RG16S", false}, {"RGBA16F", true}, {"RGBA16", true}, + {"BGRA8", true}, {"RGB10A2", true}, {"RG11B10F", false}, {"RG8", false}, + {"RG8S", false}}; + +static inline int ddsktx__read(ddsktx__mem_reader* reader, void* buff, int size) { + int read_bytes = + (reader->offset + size) <= reader->total ? size : (reader->total - reader->offset); + ddsktx_memcpy(buff, reader->buff + reader->offset, read_bytes); + reader->offset += read_bytes; + return read_bytes; +} + +static bool ddsktx__parse_ktx(ddsktx_texture_info* tc, const void* file_data, int size, + ddsktx_error* err) { + static const uint8_t ktx__id[] = {0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A}; + + ddsktx_memset(tc, 0x0, sizeof(ddsktx_texture_info)); + + ddsktx__mem_reader r = {(const uint8_t*)file_data, size, sizeof(uint32_t)}; + ddsktx__ktx_header header; + if (ddsktx__read(&r, &header, sizeof(header)) != DDSKTX__KTX_HEADER_SIZE) { + ddsktx__err(err, "ktx; header size does not match"); + } + + if (ddsktx_memcmp(header.id, ktx__id, sizeof(header.id)) != 0) { + ddsktx__err(err, "ktx: invalid file header"); + } + + // TODO: support big endian + if (header.endianess != 0x04030201) { + ddsktx__err(err, "ktx: big-endian format is not supported"); + } + + tc->metadata_offset = r.offset; + tc->metadata_size = (int)header.metadata_size; + r.offset += (int)header.metadata_size; + + ddsktx_format format = _DDSKTX_FORMAT_COUNT; + + int count = sizeof(k__translate_ktx_fmt) / sizeof(ddsktx__ktx_format_info); + for (int i = 0; i < count; i++) { + if (k__translate_ktx_fmt[i].internal_fmt == header.internal_format) { + format = (ddsktx_format)i; + break; + } + } + + if (format == _DDSKTX_FORMAT_COUNT) { + count = sizeof(k__translate_ktx_fmt2) / sizeof(ddsktx__ktx_format_info2); + for (int i = 0; i < count; i++) { + if (k__translate_ktx_fmt2[i].internal_fmt == header.internal_format) { + format = (ddsktx_format)k__translate_ktx_fmt2[i].format; + break; + } + } + } + + if (format == _DDSKTX_FORMAT_COUNT) { + ddsktx__err(err, "ktx: unsupported format"); + } + + if (header.face_count > 1 && header.face_count != DDSKTX_CUBE_FACE_COUNT) { + ddsktx__err(err, "ktx: incomplete cubemap"); + } + + tc->data_offset = r.offset; + tc->size_bytes = r.total - r.offset; + tc->format = format; + tc->width = (int)header.width; + tc->height = (int)header.height; + tc->depth = ddsktx__max((int)header.depth, 1); + tc->num_layers = ddsktx__max((int)header.array_count, 1); + tc->num_mips = ddsktx__max((int)header.mip_count, 1); + tc->bpp = k__block_info[format].bpp; + + if (header.face_count == 6) + tc->flags |= DDSKTX_TEXTURE_FLAG_CUBEMAP; + tc->flags |= k__formats_info[format].has_alpha ? DDSKTX_TEXTURE_FLAG_ALPHA : 0; + tc->flags |= DDSKTX_TEXTURE_FLAG_KTX; + + return true; +} + +static bool ddsktx__parse_dds(ddsktx_texture_info* tc, const void* file_data, int size, + ddsktx_error* err) { + ddsktx__mem_reader r = {(const uint8_t*)file_data, size, sizeof(uint32_t)}; + ddsktx__dds_header header; + if (ddsktx__read(&r, &header, sizeof(header)) < DDSKTX__DDS_HEADER_SIZE || + header.size != DDSKTX__DDS_HEADER_SIZE) { + ddsktx__err(err, "dds: header size does not match"); + } + + uint32_t required_flags = (DDSKTX__DDSD_HEIGHT | DDSKTX__DDSD_WIDTH); + if ((header.flags & required_flags) != required_flags) { + ddsktx__err(err, "dds: have invalid flags"); + } + + if (header.pixel_format.size != sizeof(ddsktx__dds_pixel_format)) { + ddsktx__err(err, "dds: pixel format header is invalid"); + } + + uint32_t dxgi_format = 0; + uint32_t array_size = 1; + if (DDSKTX__DDPF_FOURCC == (header.flags & DDSKTX__DDPF_FOURCC) && + header.pixel_format.fourcc == DDSKTX__DDS_DX10) { + ddsktx__dds_header_dxgi dxgi_header; + ddsktx__read(&r, &dxgi_header, sizeof(dxgi_header)); + dxgi_format = dxgi_header.dxgi_format; + array_size = dxgi_header.array_size; + } + + bool cubemap = (header.caps2 & DDSKTX__DDSCAPS2_CUBEMAP) != 0; + if (cubemap && + (header.caps2 & DDSKTX__DDSCAPS2_CUBEMAP_ALLSIDES) != DDSKTX__DDSCAPS2_CUBEMAP_ALLSIDES) { + ddsktx__err(err, "dds: incomplete cubemap"); + } + bool volume = (header.caps2 & DDSKTX__DDSCAPS2_VOLUME) != 0; + + ddsktx_format format = _DDSKTX_FORMAT_COUNT; + bool has_alpha = (header.pixel_format.flags & DDSKTX__DDPF_ALPHA) != 0; + bool srgb = false; + + if (dxgi_format == 0) { + if ((header.pixel_format.flags & DDSKTX__DDPF_FOURCC) == DDSKTX__DDPF_FOURCC) { + int count = + sizeof(k__translate_dds_fourcc) / sizeof(ddsktx__dds_translate_fourcc_format); + for (int i = 0; i < count; i++) { + if (k__translate_dds_fourcc[i].dds_format == header.pixel_format.fourcc) { + format = k__translate_dds_fourcc[i].format; + break; + } + } + } else { + int count = sizeof(k__translate_dds_pixel) / sizeof(ddsktx__dds_translate_pixel_format); + for (int i = 0; i < count; i++) { + const ddsktx__dds_translate_pixel_format* f = &k__translate_dds_pixel[i]; + if (f->bit_count == header.pixel_format.rgb_bit_count && + f->flags == header.pixel_format.flags && + f->bit_mask[0] == header.pixel_format.bit_mask[0] && + f->bit_mask[1] == header.pixel_format.bit_mask[1] && + f->bit_mask[2] == header.pixel_format.bit_mask[2] && + f->bit_mask[3] == header.pixel_format.bit_mask[3]) { + format = f->format; + break; + } + } + } + } else { + int count = sizeof(k__translate_dxgi) / sizeof(ddsktx__dds_translate_fourcc_format); + for (int i = 0; i < count; i++) { + if (k__translate_dxgi[i].dds_format == dxgi_format) { + format = k__translate_dxgi[i].format; + srgb = k__translate_dxgi[i].srgb; + break; + } + } + } + + if (format == _DDSKTX_FORMAT_COUNT) { + ddsktx__err(err, "dds: unknown format"); + } + + ddsktx_memset(tc, 0x0, sizeof(ddsktx_texture_info)); + tc->data_offset = r.offset; + tc->size_bytes = r.total - r.offset; + tc->format = format; + tc->width = (int)header.width; + tc->height = (int)header.height; + tc->depth = ddsktx__max(1, (int)header.depth); + tc->num_layers = ddsktx__max(1, (int)array_size); + tc->num_mips = (header.caps1 & DDSKTX__DDSCAPS_MIPMAP) ? (int)header.mip_count : 1; + tc->bpp = k__block_info[format].bpp; + if (has_alpha || k__formats_info[format].has_alpha) + tc->flags |= DDSKTX_TEXTURE_FLAG_ALPHA; + if (cubemap) + tc->flags |= DDSKTX_TEXTURE_FLAG_CUBEMAP; + if (volume) + tc->flags |= DDSKTX_TEXTURE_FLAG_VOLUME; + if (srgb) + tc->flags |= DDSKTX_TEXTURE_FLAG_SRGB; + tc->flags |= DDSKTX_TEXTURE_FLAG_DDS; + + return true; +} + +void ddsktx_get_sub(const ddsktx_texture_info* tc, ddsktx_sub_data* sub_data, const void* file_data, + int size, int array_idx, int slice_face_idx, int mip_idx) { + ddsktx_assert(tc); + ddsktx_assert(sub_data); + ddsktx_assert(file_data); + ddsktx_assert(size > 0); + ddsktx_assert(array_idx < tc->num_layers); + ddsktx_assert(!((tc->flags & DDSKTX_TEXTURE_FLAG_CUBEMAP) && + (slice_face_idx >= DDSKTX_CUBE_FACE_COUNT)) && + "invalid cube-face index"); + ddsktx_assert(!(!(tc->flags & DDSKTX_TEXTURE_FLAG_CUBEMAP) && (slice_face_idx >= tc->depth)) && + "invalid depth-slice index"); + ddsktx_assert(mip_idx < tc->num_mips); + + ddsktx__mem_reader r = {(uint8_t*)file_data, size, tc->data_offset}; + ddsktx_format format = tc->format; + + ddsktx_assert(format < _DDSKTX_FORMAT_COUNT && format != _DDSKTX_FORMAT_COMPRESSED); + const ddsktx__block_info* binfo = &k__block_info[format]; + const int bpp = binfo->bpp; + const int block_size = binfo->block_size; + const int min_block_x = binfo->min_block_x; + const int min_block_y = binfo->min_block_y; + + int num_faces; + + ddsktx_assert(!((tc->flags & DDSKTX_TEXTURE_FLAG_CUBEMAP) && tc->depth > 1) && + "textures must be either Cube or 3D"); + int slice_idx, face_idx, num_slices; + if (tc->flags & DDSKTX_TEXTURE_FLAG_CUBEMAP) { + slice_idx = 0; + face_idx = slice_face_idx; + num_faces = DDSKTX_CUBE_FACE_COUNT; + num_slices = 1; + } else { + slice_idx = slice_face_idx; + face_idx = 0; + num_faces = 1; + num_slices = tc->depth; + } + + if (tc->flags & DDSKTX_TEXTURE_FLAG_DDS) { + for (int layer = 0, num_layers = tc->num_layers; layer < num_layers; layer++) { + for (int face = 0; face < num_faces; face++) { + int width = tc->width; + int height = tc->height; + + for (int mip = 0, mip_count = tc->num_mips; mip < mip_count; mip++) { + int row_bytes, mip_size; + + if (format < _DDSKTX_FORMAT_COMPRESSED) { + int num_blocks_wide = width > 0 ? ddsktx__max(1, (width + 3) / 4) : 0; + num_blocks_wide = ddsktx__max(min_block_x, num_blocks_wide); + + int num_blocks_high = height > 0 ? ddsktx__max(1, (height + 3) / 4) : 0; + num_blocks_high = ddsktx__max(min_block_y, num_blocks_high); + + row_bytes = num_blocks_wide * block_size; + mip_size = row_bytes * num_blocks_high; + } else { + row_bytes = (width * bpp + 7) / 8; // round to nearest byte + mip_size = row_bytes * height; + } + + for (int slice = 0; slice < num_slices; slice++) { + if (layer == array_idx && mip == mip_idx && slice == slice_idx && + face_idx == face) { + sub_data->buff = r.buff + r.offset; + sub_data->width = width; + sub_data->height = height; + sub_data->size_bytes = mip_size; + sub_data->row_pitch_bytes = row_bytes; + return; + } + + r.offset += mip_size; + ddsktx_assert(r.offset <= r.total && "texture buffer overflow"); + } // foreach slice + + width >>= 1; + height >>= 1; + + if (width == 0) { + width = 1; + } + if (height == 0) { + height = 1; + } + } // foreach mip + } // foreach face + } // foreach array-item + } else if (tc->flags & DDSKTX_TEXTURE_FLAG_KTX) { + int width = tc->width; + int height = tc->height; + + for (int mip = 0, c = tc->num_mips; mip < c; mip++) { + int row_bytes, mip_size; + + if (format < _DDSKTX_FORMAT_COMPRESSED) { + int num_blocks_wide = width > 0 ? ddsktx__max(1, (width + 3) / 4) : 0; + num_blocks_wide = ddsktx__max(min_block_x, num_blocks_wide); + + int num_blocks_high = height > 0 ? ddsktx__max(1, (height + 3) / 4) : 0; + num_blocks_high = ddsktx__max(min_block_y, num_blocks_high); + + row_bytes = num_blocks_wide * block_size; + mip_size = row_bytes * num_blocks_high; + } else { + row_bytes = (width * bpp + 7) / 8; // round to nearest byte + mip_size = row_bytes * height; + } + + int image_size; + ddsktx__read(&r, &image_size, sizeof(image_size)); + ddsktx_assert(image_size == (mip_size * num_faces * num_slices) && + "image size mismatch"); + + for (int layer = 0, num_layers = tc->num_layers; layer < num_layers; layer++) { + for (int face = 0; face < num_faces; face++) { + for (int slice = 0; slice < num_slices; slice++) { + if (layer == array_idx && mip == mip_idx && slice == slice_idx && + face_idx == face) { + sub_data->buff = r.buff + r.offset; + sub_data->width = width; + sub_data->height = height; + sub_data->size_bytes = mip_size; + sub_data->row_pitch_bytes = row_bytes; + return; + } + + r.offset += mip_size; + ddsktx_assert(r.offset <= r.total && "texture buffer overflow"); + } // foreach slice + + r.offset = ddsktx__align_mask(r.offset, 3); // cube-padding + } // foreach face + } // foreach array-item + + width >>= 1; + height >>= 1; + + if (width == 0) { + width = 1; + } + if (height == 0) { + height = 1; + } + + r.offset = ddsktx__align_mask(r.offset, 3); // mip-padding + } // foreach mip + } else { + ddsktx_assert(0 && "invalid file format"); + } +} + +bool ddsktx_parse(ddsktx_texture_info* tc, const void* file_data, int size, ddsktx_error* err) { + ddsktx_assert(tc); + ddsktx_assert(file_data); + ddsktx_assert(size > 0); + + ddsktx__mem_reader r = {(const uint8_t*)file_data, size, 0}; + + // Read file flag and determine the file type + uint32_t file_flag = 0; + if (ddsktx__read(&r, &file_flag, sizeof(file_flag)) != sizeof(file_flag)) { + ddsktx__err(err, "invalid texture file"); + } + + switch (file_flag) { + case DDSKTX__DDS_MAGIC: + return ddsktx__parse_dds(tc, file_data, size, err); + case DDSKTX__KTX_MAGIC: + return ddsktx__parse_ktx(tc, file_data, size, err); + default: + ddsktx__err(err, "unknown texture format"); + } +} + +const char* ddsktx_format_str(ddsktx_format format) { + return k__formats_info[format].name; +} + +bool ddsktx_format_compressed(ddsktx_format format) { + ddsktx_assert(format != _DDSKTX_FORMAT_COMPRESSED && format != _DDSKTX_FORMAT_COUNT); + return format < _DDSKTX_FORMAT_COMPRESSED; +} + +#endif // DDSKTX_IMPLEMENT diff --git a/src/common/dds.h b/src/common/dds.h deleted file mode 100644 index 866e2eee5..000000000 --- a/src/common/dds.h +++ /dev/null @@ -1,111 +0,0 @@ -//-------------------------------------------------------------------------------------- -// DDS.h -// -// This header defines constants and structures that are useful when parsing -// DDS files. DDS files were originally designed to use several structures -// and constants that are native to DirectDraw and are defined in ddraw.h, -// such as DDSURFACEDESC2 and DDSCAPS2. This file defines similar -// (compatible) constants and structures so that one can use DDS files -// without needing to include ddraw.h. -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkId=248926 -// http://go.microsoft.com/fwlink/?LinkId=248929 -// http://go.microsoft.com/fwlink/?LinkID=615561 -//-------------------------------------------------------------------------------------- - -#pragma once - -#include - -namespace Common::DirectX { - -#pragma pack(push, 1) - -const uint32_t DDS_MAGIC = 0x20534444; // "DDS " - -struct DDS_PIXELFORMAT { - uint32_t dwSize; - uint32_t dwFlags; - uint32_t dwFourCC; - uint32_t dwRGBBitCount; - uint32_t dwRBitMask; - uint32_t dwGBitMask; - uint32_t dwBBitMask; - uint32_t dwABitMask; -}; - -#define DDS_FOURCC 0x00000004 // DDPF_FOURCC -#define DDS_RGB 0x00000040 // DDPF_RGB -#define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS -#define DDS_LUMINANCE 0x00020000 // DDPF_LUMINANCE -#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS -#define DDS_ALPHA 0x00000002 // DDPF_ALPHA -#define DDS_PAL8 0x00000020 // DDPF_PALETTEINDEXED8 -#define DDS_PAL8A 0x00000021 // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS -#define DDS_BUMPDUDV 0x00080000 // DDPF_BUMPDUDV - -#ifndef MAKEFOURCC -#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ - ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | \ - ((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24)) -#endif /* defined(MAKEFOURCC) */ - -#define DDS_HEADER_FLAGS_TEXTURE \ - 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT -#define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT -#define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH -#define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH -#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE - -// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION -enum DDS_RESOURCE_DIMENSION { - DDS_DIMENSION_TEXTURE1D = 2, - DDS_DIMENSION_TEXTURE2D = 3, - DDS_DIMENSION_TEXTURE3D = 4, -}; - -struct DDS_HEADER { - uint32_t dwSize; - uint32_t dwFlags; - uint32_t dwHeight; - uint32_t dwWidth; - uint32_t dwPitchOrLinearSize; - uint32_t dwDepth; // only if DDS_HEADER_FLAGS_VOLUME is set in dwFlags - uint32_t dwMipMapCount; - uint32_t dwReserved1[11]; - DDS_PIXELFORMAT ddspf; - uint32_t dwCaps; - uint32_t dwCaps2; - uint32_t dwCaps3; - uint32_t dwCaps4; - uint32_t dwReserved2; -}; - -struct DDS_HEADER_DXT10 { - uint32_t dxgiFormat; - uint32_t resourceDimension; - uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG - uint32_t arraySize; - uint32_t miscFlags2; // see DDS_MISC_FLAGS2 -}; - -#pragma pack(pop) - -static_assert(sizeof(DDS_HEADER) == 124, "DDS Header size mismatch"); -static_assert(sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch"); - -constexpr DDS_PIXELFORMAT DDSPF_A8R8G8B8 = { - sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000}; -constexpr DDS_PIXELFORMAT DDSPF_X8R8G8B8 = { - sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}; -constexpr DDS_PIXELFORMAT DDSPF_A8B8G8R8 = { - sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000}; -constexpr DDS_PIXELFORMAT DDSPF_X8B8G8R8 = { - sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000}; -constexpr DDS_PIXELFORMAT DDSPF_R8G8B8 = { - sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}; - -} // namespace Common::DirectX diff --git a/src/common/image_util.cpp b/src/common/image_util.cpp index 0a56c8ccb..582a02640 100644 --- a/src/common/image_util.cpp +++ b/src/common/image_util.cpp @@ -3,15 +3,13 @@ // Refer to the license.txt file included. #include -#include "common/dds.h" +#define DDSKTX_IMPLEMENT #include "common/file_util.h" #include "common/image_util.h" #include "common/logging/log.h" namespace Common { -using namespace Common::DirectX; - namespace { void spng_free(spng_ctx* ctx) { @@ -26,13 +24,13 @@ auto make_spng_ctx(int flags) { } // Anonymous namespace -bool DecodePNG(std::span in_data, std::vector& out_data, u32& width, u32& height) { +bool ParsePNG(std::span png_data, size_t& decoded_size, u32& width, u32& height) { auto ctx = make_spng_ctx(0); if (!ctx) [[unlikely]] { return false; } - if (spng_set_png_buffer(ctx.get(), in_data.data(), in_data.size())) { + if (spng_set_png_buffer(ctx.get(), png_data.data(), png_data.size())) { return false; } @@ -40,36 +38,73 @@ bool DecodePNG(std::span in_data, std::vector& out_data, u32& widt if (spng_get_ihdr(ctx.get(), &ihdr)) { return false; } + width = ihdr.width; + height = ihdr.height; + + const int format = SPNG_FMT_RGBA8; + if (spng_decoded_image_size(ctx.get(), format, &decoded_size)) { + return false; + } + + return true; +} + +bool DecodePNG(std::span png_data, std::span out_data) { + auto ctx = make_spng_ctx(0); + if (!ctx) [[unlikely]] { + return false; + } + + if (spng_set_png_buffer(ctx.get(), png_data.data(), png_data.size())) { + return false; + } const int format = SPNG_FMT_RGBA8; size_t decoded_len = 0; if (spng_decoded_image_size(ctx.get(), format, &decoded_len)) { return false; } + ASSERT(out_data.size() == decoded_len); - out_data.resize(decoded_len); - if (spng_decode_image(ctx.get(), out_data.data(), decoded_len, format, SPNG_DECODE_TRNS)) { + if (spng_decode_image(ctx.get(), out_data.data(), decoded_len, format, 0)) { return false; } - width = ihdr.width; - height = ihdr.height; return true; } -bool EncodePNG(std::span in_data, const std::string& out_path, u32 width, u32 height, - u32 stride, s32 level) { +bool ParseDDSKTX(std::span in_data, std::vector& out_data, u32& width, u32& height, + ddsktx_format& format) { + ddsktx_texture_info tc{}; + const int size = static_cast(in_data.size()); + if (!ddsktx_parse(&tc, in_data.data(), size, nullptr)) { + return false; + } + + width = tc.width; + height = tc.height; + format = tc.format; + + ddsktx_sub_data sub_data{}; + ddsktx_get_sub(&tc, &sub_data, in_data.data(), size, 0, 0, 0); + + out_data.resize(sub_data.size_bytes); + std::memcpy(out_data.data(), sub_data.buff, sub_data.size_bytes); + + return true; +} + +bool EncodePNG(const std::string& out_path, std::span in_data, u32 width, u32 height, + s32 level) { auto ctx = make_spng_ctx(SPNG_CTX_ENCODER); if (!ctx) [[unlikely]] { return false; } - auto outfile = FileUtil::IOFile(out_path, "wb"); - if (spng_set_png_file(ctx.get(), outfile.Handle())) { + if (spng_set_option(ctx.get(), SPNG_IMG_COMPRESSION_LEVEL, level)) { return false; } - - if (spng_set_option(ctx.get(), SPNG_IMG_COMPRESSION_LEVEL, level)) { + if (spng_set_option(ctx.get(), SPNG_ENCODE_TO_BUFFER, 1)) { return false; } @@ -82,24 +117,22 @@ bool EncodePNG(std::span in_data, const std::string& out_path, u32 wid return false; } - if (spng_encode_image(ctx.get(), nullptr, 0, SPNG_FMT_PNG, - SPNG_ENCODE_PROGRESSIVE | SPNG_ENCODE_FINALIZE)) { + if (spng_encode_image(ctx.get(), in_data.data(), in_data.size(), SPNG_FMT_PNG, + SPNG_ENCODE_FINALIZE)) { return false; } - for (u32 row = 0; row < height; row++) { - const int err = spng_encode_row(ctx.get(), &in_data[row * stride], stride); - if (err == SPNG_EOI) { - break; - } + int ret{}; + size_t png_size{}; + u8* png_buf = reinterpret_cast(spng_get_png_buffer(ctx.get(), &png_size, &ret)); - if (err) { - LOG_ERROR(Common, "Failed to save {} by {} image to {} at level {}: error {}", width, - height, out_path, level, err); - return false; - } + if (!png_buf) { + return false; } + auto file = FileUtil::IOFile(out_path, "wb"); + file.WriteBytes(png_buf, png_size); + size_t image_len = 0; spng_decoded_image_size(ctx.get(), SPNG_FMT_PNG, &image_len); LOG_ERROR(Common, "{} byte {} by {} image saved to {} at level {}", image_len, width, height, @@ -108,4 +141,14 @@ bool EncodePNG(std::span in_data, const std::string& out_path, u32 wid return true; } +void FlipTexture(std::span in_data, u32 width, u32 height, u32 stride) { + for (u32 line = 0; line < height / 2; line++) { + const u32 offset_1 = line * stride; + const u32 offset_2 = (height - line - 1) * stride; + // Swap lines + std::swap_ranges(in_data.begin() + offset_1, in_data.begin() + offset_1 + stride, + in_data.begin() + offset_2); + } +} + } // namespace Common diff --git a/src/common/image_util.h b/src/common/image_util.h index e0b8e4671..d0b92ce07 100644 --- a/src/common/image_util.h +++ b/src/common/image_util.h @@ -5,21 +5,20 @@ #include #include #include "common/common_types.h" +#include "common/dds-ktx.h" namespace Common { -/** - * @brief DecodePNG Given a buffer of png input data decodes said data to RGBA8 format - * and writes the result to out_data, updating width and height to match the file dimentions - * @param in_data The input png data - * @param out_data The decoded RGBA8 pixel data - * @param width The output width of the png image - * @param height The output height of the png image - * @return true on decode success, false otherwise - */ -bool DecodePNG(std::span in_data, std::vector& out_data, u32& width, u32& height); +bool ParsePNG(std::span png_data, size_t& decoded_size, u32& width, u32& height); -bool EncodePNG(std::span in_data, const std::string& out_path, u32 width, u32 height, - u32 stride, s32 level); +bool DecodePNG(std::span png_data, std::span out_data); + +bool ParseDDSKTX(std::span in_data, std::vector& out_data, u32& width, u32& height, + ddsktx_format& format); + +bool EncodePNG(const std::string& out_path, std::span in_data, u32 width, u32 height, + s32 level = 6); + +void FlipTexture(std::span in_data, u32 width, u32 height, u32 stride); } // namespace Common diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index 3fec64038..b518f5c84 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -4,14 +4,17 @@ #pragma once -#include -#include #include -#include "common/common_types.h" +#include +#include namespace Common { -template +/** + * A ScratchBuffer is a simple heap allocated array without member initialization. + * Main usage is for temporary buffers passed to threads for example + */ +template class ScratchBuffer { static_assert(std::is_trivial_v, "Must use a POD type"); @@ -28,8 +31,12 @@ public: return buffer.get(); } - [[nodiscard]] std::span Span() const noexcept { - return std::span{buffer.get(), size}; + [[nodiscard]] std::span Span(u32 index = 0) const noexcept { + return std::span{buffer.get() + index, size - index}; + } + + [[nodiscard]] std::span Span(u32 index = 0) noexcept { + return std::span{buffer.get() + index, size - index}; } private: diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 60d2559ad..4d32da95c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -36,8 +36,6 @@ add_library(core STATIC core.h core_timing.cpp core_timing.h - custom_tex_cache.cpp - custom_tex_cache.h dumping/backend.cpp dumping/backend.h file_sys/archive_backend.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 3d9468699..460aa67c9 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -12,7 +12,6 @@ #include "audio_core/lle/lle.h" #include "common/arch.h" #include "common/logging/log.h" -#include "common/texture.h" #include "core/arm/arm_interface.h" #include "core/arm/exclusive_monitor.h" #if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) @@ -27,7 +26,6 @@ #include "core/dumping/ffmpeg_backend.h" #endif #include "common/settings.h" -#include "core/custom_tex_cache.h" #include "core/gdbstub/gdbstub.h" #include "core/global.h" #include "core/hle/kernel/client_port.h" @@ -48,6 +46,7 @@ #include "core/movie.h" #include "core/rpc/rpc_server.h" #include "network/network.h" +#include "video_core/rasterizer_cache/custom_tex_manager.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -317,16 +316,9 @@ System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::st static_cast(load_result)); } perf_stats = std::make_unique(title_id); - custom_tex_cache = std::make_unique(); if (Settings::values.custom_textures) { - const u64 program_id = Kernel().GetCurrentProcess()->codeset->program_id; - FileUtil::CreateFullPath(fmt::format( - "{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::LoadDir), program_id)); - custom_tex_cache->FindCustomTextures(program_id); - } - if (Settings::values.preload_textures) { - custom_tex_cache->PreloadTextures(*GetImageInterface()); + custom_tex_manager->FindCustomTextures(); } status = ResultStatus::Success; @@ -431,7 +423,9 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, video_dumper = std::make_unique(); #endif - VideoCore::ResultStatus result = VideoCore::Init(emu_window, secondary_window, *memory); + custom_tex_manager = std::make_unique(*this); + + VideoCore::ResultStatus result = VideoCore::Init(emu_window, secondary_window, *this); if (result != VideoCore::ResultStatus::Success) { switch (result) { case VideoCore::ResultStatus::ErrorGenericDrivers: @@ -512,12 +506,12 @@ const VideoDumper::Backend& System::VideoDumper() const { return *video_dumper; } -Core::CustomTexCache& System::CustomTexCache() { - return *custom_tex_cache; +VideoCore::CustomTexManager& System::CustomTexManager() { + return *custom_tex_manager; } -const Core::CustomTexCache& System::CustomTexCache() const { - return *custom_tex_cache; +const VideoCore::CustomTexManager& System::CustomTexManager() const { + return *custom_tex_manager; } void System::RegisterMiiSelector(std::shared_ptr mii_selector) { @@ -528,10 +522,6 @@ void System::RegisterSoftwareKeyboard(std::shared_ptr image_interface) { - registered_image_interface = std::move(image_interface); -} - void System::Shutdown(bool is_deserializing) { // Log last frame performance stats const auto perf_results = GetAndResetPerfStats(); diff --git a/src/core/core.h b/src/core/core.h index 218b7e391..fc5ed692d 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -9,10 +9,8 @@ #include #include #include "common/common_types.h" -#include "core/custom_tex_cache.h" #include "core/frontend/applets/mii_selector.h" #include "core/frontend/applets/swkbd.h" -#include "core/frontend/image_interface.h" #include "core/loader/loader.h" #include "core/memory.h" #include "core/perf_stats.h" @@ -58,8 +56,9 @@ class Backend; } namespace VideoCore { +class CustomTexManager; class RendererBase; -} +} // namespace VideoCore namespace Core { @@ -257,11 +256,11 @@ public: /// Gets a const reference to the cheat engine [[nodiscard]] const Cheats::CheatEngine& CheatEngine() const; - /// Gets a reference to the custom texture cache system - [[nodiscard]] Core::CustomTexCache& CustomTexCache(); + /// Gets a reference to the custom texture management system + [[nodiscard]] VideoCore::CustomTexManager& CustomTexManager(); - /// Gets a const reference to the custom texture cache system - [[nodiscard]] const Core::CustomTexCache& CustomTexCache() const; + /// Gets a const reference to the custom texture management system + [[nodiscard]] const VideoCore::CustomTexManager& CustomTexManager() const; /// Gets a reference to the video dumper backend [[nodiscard]] VideoDumper::Backend& VideoDumper(); @@ -301,14 +300,6 @@ public: return registered_swkbd; } - /// Image interface - - void RegisterImageInterface(std::shared_ptr image_interface); - - [[nodiscard]] std::shared_ptr GetImageInterface() const { - return registered_image_interface; - } - void SaveState(u32 slot) const; void LoadState(u32 slot); @@ -367,10 +358,7 @@ private: std::unique_ptr video_dumper; /// Custom texture cache system - std::unique_ptr custom_tex_cache; - - /// Image interface - std::shared_ptr registered_image_interface; + std::unique_ptr custom_tex_manager; /// RPC Server for scripting support std::unique_ptr rpc_server; diff --git a/src/core/custom_tex_cache.cpp b/src/core/custom_tex_cache.cpp deleted file mode 100644 index d42d81d6f..000000000 --- a/src/core/custom_tex_cache.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2019 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include "common/file_util.h" -#include "common/texture.h" -#include "core.h" -#include "core/custom_tex_cache.h" - -namespace Core { -CustomTexCache::CustomTexCache() = default; - -CustomTexCache::~CustomTexCache() = default; - -bool CustomTexCache::IsTextureDumped(u64 hash) const { - return dumped_textures.count(hash); -} - -void CustomTexCache::SetTextureDumped(const u64 hash) { - dumped_textures.insert(hash); -} - -bool CustomTexCache::IsTextureCached(u64 hash) const { - return custom_textures.count(hash); -} - -const CustomTexInfo& CustomTexCache::LookupTexture(u64 hash) const { - return custom_textures.at(hash); -} - -void CustomTexCache::CacheTexture(u64 hash, const std::vector& tex, u32 width, u32 height) { - custom_textures[hash] = {width, height, tex}; -} - -void CustomTexCache::AddTexturePath(u64 hash, const std::string& path) { - if (custom_texture_paths.count(hash)) - LOG_ERROR(Core, "Textures {} and {} conflict!", custom_texture_paths[hash].path, path); - else - custom_texture_paths[hash] = {path, hash}; -} - -void CustomTexCache::FindCustomTextures(u64 program_id) { - // Custom textures are currently stored as - // [TitleID]/tex1_[width]x[height]_[64-bit hash]_[format].png - - const std::string load_path = fmt::format( - "{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::LoadDir), program_id); - - if (FileUtil::Exists(load_path)) { - FileUtil::FSTEntry texture_dir; - std::vector textures; - // 64 nested folders should be plenty for most cases - FileUtil::ScanDirectoryTree(load_path, texture_dir, 64); - FileUtil::GetAllFilesFromNestedEntries(texture_dir, textures); - - for (const auto& file : textures) { - if (file.isDirectory) - continue; - if (file.virtualName.substr(0, 5) != "tex1_") - continue; - - u32 width; - u32 height; - u64 hash; - u32 format; // unused - // TODO: more modern way of doing this - if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.png", &width, &height, - &hash, &format) == 4) { - AddTexturePath(hash, file.physicalName); - } - } - } -} - -void CustomTexCache::PreloadTextures(Frontend::ImageInterface& image_interface) { - for (const auto& path : custom_texture_paths) { - const auto& path_info = path.second; - Core::CustomTexInfo tex_info; - if (image_interface.DecodePNG(tex_info.tex, tex_info.width, tex_info.height, - path_info.path)) { - // Make sure the texture size is a power of 2 - std::bitset<32> width_bits(tex_info.width); - std::bitset<32> height_bits(tex_info.height); - if (width_bits.count() == 1 && height_bits.count() == 1) { - LOG_DEBUG(HW_GPU, "Loaded custom texture from {}", path_info.path); - Common::FlipRGBA8Texture(tex_info.tex, tex_info.width, tex_info.height); - CacheTexture(path_info.hash, tex_info.tex, tex_info.width, tex_info.height); - } else { - LOG_ERROR(HW_GPU, "Texture {} size is not a power of 2", path_info.path); - } - } else { - LOG_ERROR(HW_GPU, "Failed to load custom texture {}", path_info.path); - } - } -} - -bool CustomTexCache::CustomTextureExists(u64 hash) const { - return custom_texture_paths.count(hash); -} - -const CustomTexPathInfo& CustomTexCache::LookupTexturePathInfo(u64 hash) const { - return custom_texture_paths.at(hash); -} - -bool CustomTexCache::IsTexturePathMapEmpty() const { - return custom_texture_paths.size() == 0; -} -} // namespace Core diff --git a/src/core/custom_tex_cache.h b/src/core/custom_tex_cache.h deleted file mode 100644 index fbc21352e..000000000 --- a/src/core/custom_tex_cache.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2019 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include "common/common_types.h" - -namespace Frontend { -class ImageInterface; -} - -namespace Core { -struct CustomTexInfo { - u32 width; - u32 height; - std::vector tex; -}; - -// This is to avoid parsing the filename multiple times -struct CustomTexPathInfo { - std::string path; - u64 hash; -}; - -// TODO: think of a better name for this class... -class CustomTexCache { -public: - explicit CustomTexCache(); - ~CustomTexCache(); - - bool IsTextureDumped(u64 hash) const; - void SetTextureDumped(u64 hash); - - bool IsTextureCached(u64 hash) const; - const CustomTexInfo& LookupTexture(u64 hash) const; - void CacheTexture(u64 hash, const std::vector& tex, u32 width, u32 height); - - void AddTexturePath(u64 hash, const std::string& path); - void FindCustomTextures(u64 program_id); - void PreloadTextures(Frontend::ImageInterface& image_interface); - bool CustomTextureExists(u64 hash) const; - const CustomTexPathInfo& LookupTexturePathInfo(u64 hash) const; - bool IsTexturePathMapEmpty() const; - -private: - std::unordered_set dumped_textures; - std::unordered_map custom_textures; - std::unordered_map custom_texture_paths; -}; -} // namespace Core diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 54e663e3b..9cb0caf19 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -28,10 +28,10 @@ add_library(video_core STATIC regs_texturing.h renderer_base.cpp renderer_base.h + rasterizer_cache/custom_tex_manager.cpp + rasterizer_cache/custom_tex_manager.h rasterizer_cache/framebuffer_base.cpp rasterizer_cache/framebuffer_base.h - rasterizer_cache/hires_replacer.cpp - rasterizer_cache/hires_replacer.h rasterizer_cache/pixel_format.cpp rasterizer_cache/pixel_format.h rasterizer_cache/rasterizer_cache.cpp diff --git a/src/video_core/rasterizer_cache/custom_tex_manager.cpp b/src/video_core/rasterizer_cache/custom_tex_manager.cpp new file mode 100644 index 000000000..dc35eda19 --- /dev/null +++ b/src/video_core/rasterizer_cache/custom_tex_manager.cpp @@ -0,0 +1,251 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_util.h" +#include "common/file_util.h" +#include "common/hash.h" +#include "common/image_util.h" +#include "common/scratch_buffer.h" +#include "core/core.h" +#include "video_core/rasterizer_cache/custom_tex_manager.h" +#include "video_core/rasterizer_cache/surface_params.h" + +namespace VideoCore { + +namespace { + +using namespace Common; + +CustomFileFormat MakeFileFormat(std::string_view ext) { + if (ext == "png") { + return CustomFileFormat::PNG; + } else if (ext == "dds") { + return CustomFileFormat::DDS; + } else if (ext == "ktx") { + return CustomFileFormat::KTX; + } + LOG_ERROR(Render, "Unknown file extension {}", ext); + return CustomFileFormat::PNG; +} + +CustomPixelFormat ToCustomPixelFormat(ddsktx_format format) { + switch (format) { + case DDSKTX_FORMAT_RGBA8: + return CustomPixelFormat::RGBA8; + case DDSKTX_FORMAT_BC1: + return CustomPixelFormat::BC1; + case DDSKTX_FORMAT_BC3: + return CustomPixelFormat::BC3; + case DDSKTX_FORMAT_BC5: + return CustomPixelFormat::BC5; + case DDSKTX_FORMAT_BC7: + return CustomPixelFormat::BC7; + case DDSKTX_FORMAT_ASTC4x4: + return CustomPixelFormat::ASTC; + default: + LOG_ERROR(Common, "Unknown dds/ktx pixel format {}", format); + return CustomPixelFormat::RGBA8; + } +} + +} // Anonymous namespace + +CustomTexManager::CustomTexManager(Core::System& system_) + : system{system_}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1, + "Hires processing"} {} + +CustomTexManager::~CustomTexManager() = default; + +void CustomTexManager::FindCustomTextures() { + if (textures_loaded) { + return; + } + + // Custom textures are currently stored as + // [TitleID]/tex1_[width]x[height]_[64-bit hash]_[format].png + using namespace FileUtil; + + const u64 program_id = system.Kernel().GetCurrentProcess()->codeset->program_id; + const std::string load_path = + fmt::format("{}textures/{:016X}/", GetUserPath(UserPath::LoadDir), program_id); + + // Create the directory if it did not exist + if (!Exists(load_path)) { + CreateFullPath(load_path); + } + + FSTEntry texture_dir; + std::vector textures; + // 64 nested folders should be plenty for most cases + ScanDirectoryTree(load_path, texture_dir, 64); + GetAllFilesFromNestedEntries(texture_dir, textures); + + u32 width{}; + u32 height{}; + u64 hash{}; + u32 format{}; + std::string ext(3, ' '); + + for (const FSTEntry& file : textures) { + const std::string& path = file.physicalName; + if (file.isDirectory || !file.virtualName.starts_with("tex1_")) { + continue; + } + + // Parse the texture filename. We only really care about the hash, + // the rest should be queried from the file itself. + if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%lX_%u.%s", &width, &height, &hash, + &format, ext.data()) != 5) { + continue; + } + + auto [it, new_texture] = custom_textures.try_emplace(hash); + if (!new_texture) { + LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!", custom_textures[hash].path, + path); + continue; + } + + auto& texture = it->second; + texture.file_format = MakeFileFormat(ext); + texture.path = path; + + // Query the required information from the file and load it. + // Since this doesn't involve any decoding it shouldn't consume too much RAM. + LoadTexture(texture); + } + + textures_loaded = true; +} + +void CustomTexManager::DumpTexture(const SurfaceParams& params, std::span data) { + const u64 data_hash = ComputeHash64(data.data(), data.size()); + const u32 data_size = static_cast(data.size()); + const u32 width = params.width; + const u32 height = params.height; + const PixelFormat format = params.pixel_format; + + // Check if it's been dumped already + if (dumped_textures.contains(data_hash)) { + return; + } + + // Make sure the texture size is a power of 2. + // If not, the surface is probably a framebuffer + if (!IsPow2(width) || !IsPow2(height)) { + LOG_WARNING(Render, "Not dumping {:016X} because size isn't a power of 2 ({}x{})", + data_hash, width, height); + return; + } + + // Allocate a temporary buffer for the thread to use + const u32 decoded_size = width * height * 4; + ScratchBuffer pixels(data_size + decoded_size); + std::memcpy(pixels.Data(), data.data(), data_size); + + // Proceed with the dump. + const u64 program_id = system.Kernel().GetCurrentProcess()->codeset->program_id; + auto dump = [width, height, params, data_hash, format, data_size, program_id, + pixels = std::move(pixels)]() mutable { + // Decode and convert to RGBA8 + const std::span encoded = pixels.Span().first(data_size); + const std::span decoded = pixels.Span(data_size); + DecodeTexture(params, params.addr, params.end, encoded, decoded, + params.type == SurfaceType::Color); + + std::string dump_path = fmt::format( + "{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), program_id); + if (!FileUtil::CreateFullPath(dump_path)) { + LOG_ERROR(Render, "Unable to create {}", dump_path); + return; + } + + dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, data_hash, format); + EncodePNG(dump_path, decoded, width, height); + }; + + workers.QueueWork(std::move(dump)); + dumped_textures.insert(data_hash); +} + +const Texture& CustomTexManager::GetTexture(const SurfaceParams& params, std::span data) { + u64 data_hash; + if (compatibility_mode) { + const u32 decoded_size = + params.width * params.height * GetBytesPerPixel(params.pixel_format); + ScratchBuffer decoded(decoded_size); + DecodeTexture(params, params.addr, params.end, data, decoded.Span()); + data_hash = ComputeHash64(decoded.Data(), decoded_size); + } else { + data_hash = ComputeHash64(data.data(), data.size()); + } + + auto it = custom_textures.find(data_hash); + if (it == custom_textures.end()) { + LOG_WARNING( + Render, "Unable to find replacement for {}x{} {} surface upload with hash {:016X}", + params.width, params.height, PixelFormatAsString(params.pixel_format), data_hash); + return dummy_texture; + } + + LOG_DEBUG(Render, "Assigning {} to {}x{} {} surface with address {:#x} and hash {:016X}", + it->second.path, params.width, params.height, + PixelFormatAsString(params.pixel_format), params.addr, data_hash); + + return it->second; +} + +void CustomTexManager::DecodeToStaging(const Texture& texture, const StagingData& staging) { + switch (texture.file_format) { + case CustomFileFormat::PNG: + if (!DecodePNG(texture.data, staging.mapped)) { + LOG_ERROR(Render, "Failed to decode png {}", texture.path); + } + if (compatibility_mode) { + const u32 stride = texture.width * 4; + // FlipTexture(staging.mapped, texture.width, texture.height, stride); + } + break; + case CustomFileFormat::DDS: + case CustomFileFormat::KTX: + // Compressed formats don't need CPU decoding + std::memcpy(staging.mapped.data(), texture.data.data(), texture.data.size()); + break; + } +} + +void CustomTexManager::LoadTexture(Texture& texture) { + std::vector& data = texture.data; + + // Read the file + auto file = FileUtil::IOFile(texture.path, "rb"); + data.resize(file.GetSize()); + file.ReadBytes(data.data(), file.GetSize()); + + // Parse it based on the file extension + switch (texture.file_format) { + case CustomFileFormat::PNG: + texture.format = CustomPixelFormat::RGBA8; // Check for other formats too? + if (!ParsePNG(data, texture.staging_size, texture.width, texture.height)) { + LOG_ERROR(Render, "Failed to parse png file {}", texture.path); + return; + } + break; + case CustomFileFormat::DDS: + case CustomFileFormat::KTX: + ddsktx_format format{}; + if (!ParseDDSKTX(data, texture.data, texture.width, texture.height, format)) { + LOG_ERROR(Render, "Failed to parse dds/ktx file {}", texture.path); + return; + } + texture.staging_size = texture.data.size(); + texture.format = ToCustomPixelFormat(format); + break; + } + + ASSERT_MSG(texture.width != 0 && texture.height != 0 && texture.staging_size != 0, + "Invalid parameters read from {}", texture.path); +} + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/custom_tex_manager.h b/src/video_core/rasterizer_cache/custom_tex_manager.h new file mode 100644 index 000000000..b133fcb20 --- /dev/null +++ b/src/video_core/rasterizer_cache/custom_tex_manager.h @@ -0,0 +1,79 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "common/thread_worker.h" +#include "video_core/rasterizer_cache/pixel_format.h" + +namespace Core { +class System; +} + +namespace VideoCore { + +struct StagingData; +class SurfaceParams; +enum class PixelFormat : u32; + +enum class CustomFileFormat : u32 { + PNG = 0, + DDS = 1, + KTX = 2, +}; + +struct Texture { + u32 width; + u32 height; + CustomPixelFormat format; + CustomFileFormat file_format; + std::string path; + std::size_t staging_size; + std::vector data; + + operator bool() const noexcept { + return !data.empty(); + } +}; + +class CustomTexManager { +public: + CustomTexManager(Core::System& system); + ~CustomTexManager(); + + /// Searches the load directory assigned to program_id for any custom textures and loads them + void FindCustomTextures(); + + /// Saves the provided pixel data described by params to disk as png + void DumpTexture(const SurfaceParams& params, std::span data); + + /// Returns the custom texture handle assigned to the provided data hash + const Texture& GetTexture(const SurfaceParams& params, std::span data); + + /// Decodes the data in texture to a consumable format + void DecodeToStaging(const Texture& texture, const StagingData& staging); + + bool CompatibilityMode() const noexcept { + return compatibility_mode; + } + +private: + /// Fills the texture structure with information from the file in path + void LoadTexture(Texture& texture); + +private: + Core::System& system; + Common::ThreadWorker workers; + std::unordered_set dumped_textures; + std::unordered_map custom_textures; + Texture dummy_texture{}; + bool textures_loaded{}; + bool compatibility_mode{true}; +}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/hires_replacer.cpp b/src/video_core/rasterizer_cache/hires_replacer.cpp deleted file mode 100644 index 402a9fc03..000000000 --- a/src/video_core/rasterizer_cache/hires_replacer.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/bit_util.h" -#include "common/file_util.h" -#include "common/scratch_buffer.h" -#include "common/image_util.h" -#include "core/core.h" -#include "video_core/rasterizer_cache/hires_replacer.h" -#include "video_core/rasterizer_cache/surface_base.h" - -namespace VideoCore { - -HiresReplacer::HiresReplacer() : - workers{std::max(std::thread::hardware_concurrency(), 2U) - 1, "Hires processing"} { - -} - -void HiresReplacer::DumpSurface(const SurfaceBase& surface, std::span data) { - const u32 data_hash = Common::ComputeHash64(data.data(), data.size()); - const u32 width = surface.width; - const u32 height = surface.height; - const PixelFormat format = surface.pixel_format; - - // Check if it's been dumped already - if (dumped_surfaces.contains(data_hash)) { - return; - } - - // If this is a partial update do not dump it, it's probably not a texture - if (surface.BytesInPixels(width * height) != data.size()) { - LOG_WARNING(Render, "Not dumping {:016X} because it's a partial texture update"); - return; - } - - // Make sure the texture size is a power of 2. - // If not, the surface is probably a framebuffer - if (!Common::IsPow2(surface.width) || !Common::IsPow2(surface.height)) { - LOG_WARNING(Render, "Not dumping {:016X} because size isn't a power of 2 ({}x{})", - data_hash, width, height); - return; - } - - // Allocate a temporary buffer for the thread to use - Common::ScratchBuffer pixels(data.size()); - std::memcpy(pixels.Data(), data.data(), data.size()); - - // Proceed with the dump. The texture should be already decoded - const u64 program_id = Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id; - const auto dump = [width, height, data_hash, format, program_id, pixels = std::move(pixels)]() { - std::string dump_path = - fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), program_id); - if (!FileUtil::CreateFullPath(dump_path)) { - LOG_ERROR(Render, "Unable to create {}", dump_path); - return; - } - - dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, data_hash, format); - Common::EncodePNG(pixels.Span(), dump_path, width, height, width, 0); - }; - - dump(); -} - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/hires_replacer.h b/src/video_core/rasterizer_cache/hires_replacer.h deleted file mode 100644 index dcd27fd61..000000000 --- a/src/video_core/rasterizer_cache/hires_replacer.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include "common/thread_worker.h" - -namespace VideoCore { - -class SurfaceBase; - -class HiresReplacer { -public: - HiresReplacer(); - - void DumpSurface(const SurfaceBase& surface, std::span data); - -private: - Common::ThreadWorker workers; - std::unordered_set dumped_surfaces; -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/pixel_format.cpp b/src/video_core/rasterizer_cache/pixel_format.cpp index 32ab2cd0d..dd9d7180c 100644 --- a/src/video_core/rasterizer_cache/pixel_format.cpp +++ b/src/video_core/rasterizer_cache/pixel_format.cpp @@ -47,6 +47,23 @@ std::string_view PixelFormatAsString(PixelFormat format) { } } +std::string_view CustomPixelFormatAsString(CustomPixelFormat format) { + switch (format) { + case CustomPixelFormat::RGBA8: + return "RGBA8"; + case CustomPixelFormat::BC1: + return "BC1"; + case CustomPixelFormat::BC3: + return "BC3"; + case CustomPixelFormat::BC5: + return "BC5"; + case CustomPixelFormat::BC7: + return "BC7"; + case CustomPixelFormat::ASTC: + return "ASTC"; + } +} + bool CheckFormatsBlittable(PixelFormat source_format, PixelFormat dest_format) { SurfaceType source_type = GetFormatType(source_format); SurfaceType dest_type = GetFormatType(dest_format); diff --git a/src/video_core/rasterizer_cache/pixel_format.h b/src/video_core/rasterizer_cache/pixel_format.h index 1f8bef8cf..6c19e7358 100644 --- a/src/video_core/rasterizer_cache/pixel_format.h +++ b/src/video_core/rasterizer_cache/pixel_format.h @@ -35,6 +35,15 @@ enum class PixelFormat : u32 { Invalid = 255, }; +enum class CustomPixelFormat : u32 { + RGBA8 = 0, + BC1 = 1, + BC3 = 2, + BC5 = 3, + BC7 = 4, + ASTC = 5, +}; + enum class SurfaceType : u32 { Color = 0, Texture = 1, @@ -112,6 +121,8 @@ constexpr u32 GetBytesPerPixel(PixelFormat format) { std::string_view PixelFormatAsString(PixelFormat format); +std::string_view CustomPixelFormatAsString(CustomPixelFormat format); + bool CheckFormatsBlittable(PixelFormat source_format, PixelFormat dest_format); PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format); diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 45bac1ca5..b618fc194 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -10,6 +10,7 @@ #include "common/microprofile.h" #include "core/memory.h" #include "video_core/pica_state.h" +#include "video_core/rasterizer_cache/custom_tex_manager.h" #include "video_core/rasterizer_cache/rasterizer_cache_base.h" #include "video_core/rasterizer_cache/surface_base.h" #include "video_core/video_core.h" @@ -26,10 +27,13 @@ inline auto RangeFromInterval(auto& map, const auto& interval) { } template -RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, Runtime& runtime_) - : memory{memory_}, runtime{runtime_}, resolution_scale_factor{ - VideoCore::GetResolutionScaleFactor()}, - dump_textures{Settings::values.dump_textures.GetValue()} { +RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, + CustomTexManager& custom_tex_manager_, Runtime& runtime_) + : memory{memory_}, runtime{runtime_}, custom_tex_manager{custom_tex_manager_}, + resolution_scale_factor{VideoCore::GetResolutionScaleFactor()}, + dump_textures{Settings::values.dump_textures.GetValue()}, + use_custom_textures{Settings::values.custom_textures.GetValue()} { + using TextureConfig = Pica::TexturingRegs::TextureConfig; // Create null handles for all cached resources @@ -624,8 +628,15 @@ auto RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, bool using_ // Update resolution_scale_factor and reset cache if changed const bool resolution_scale_changed = resolution_scale_factor != VideoCore::GetResolutionScaleFactor(); - if (resolution_scale_changed) [[unlikely]] { + const bool custom_textures_changed = + use_custom_textures != Settings::values.custom_textures.GetValue(); + if (resolution_scale_changed || custom_textures_changed) [[unlikely]] { resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + use_custom_textures = Settings::values.custom_textures.GetValue(); + if (use_custom_textures) { + custom_tex_manager.FindCustomTextures(); + } + UnregisterAll(); } @@ -866,22 +877,29 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - const auto staging = runtime.FindStaging( - load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true); - MemoryRef source_ptr = memory.GetPhysicalRef(load_info.addr); if (!source_ptr) [[unlikely]] { return; } const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr); + if (dump_textures) { + custom_tex_manager.DumpTexture(load_info, upload_data); + } + + // Check if we need to replace the texture + if (use_custom_textures && UploadCustomSurface(surface, load_info, upload_data)) { + return; + } + + // Upload the 3DS texture to the host GPU + const u32 upload_size = + load_info.width * load_info.height * surface->GetInternalBytesPerPixel(); + const StagingData staging = runtime.FindStaging(upload_size, true); + DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, runtime.NeedsConvertion(surface->pixel_format)); - if (dump_textures) { - replacer.DumpSurface(*surface, staging.mapped); - } - const BufferTextureCopy upload = { .buffer_offset = 0, .buffer_size = staging.size, @@ -891,6 +909,60 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i surface->Upload(upload, staging); } +template +bool RasterizerCache::UploadCustomSurface(const Surface& surface, const SurfaceParams& load_info, + std::span upload_data) { + const u32 level = surface->LevelOf(load_info.addr); + const bool is_base_level = level == 0; + const Texture& texture = custom_tex_manager.GetTexture(load_info, upload_data); + + // The old texture pack system did not support mipmaps so older packs might do + // wonky things. For example Henriko's pack has mipmaps larger than the base + // level. To avoid crashes just don't upload mipmaps + if (custom_tex_manager.CompatibilityMode() && !is_base_level) { + return true; + } + if (!texture) { + return false; + } + + // Swap the internal surface allocation to the desired dimentions and format + if (is_base_level && !surface->Swap(texture.width, texture.height, texture.format)) { + // This means the backend doesn't support the custom compression format. + // We could implement a CPU/GPU decoder but it's always better for packs to + // have compatible compression formats. + LOG_ERROR(HW_GPU, "Custom compressed format {} unsupported by host GPU", texture.format); + return false; + } + + // Ensure surface has a compatible allocation before proceeding + if (!surface->IsCustom() || surface->CustomFormat() != texture.format) { + LOG_ERROR(HW_GPU, "Surface does not have a compatible allocation, ignoring"); + return true; + } + + // Copy and decode the custom texture to the staging buffer + const u32 custom_size = static_cast(texture.staging_size); + const StagingData staging = runtime.FindStaging(custom_size, true); + custom_tex_manager.DecodeToStaging(texture, staging); + + // Upload surface + const BufferTextureCopy upload = { + .buffer_offset = 0, + .buffer_size = custom_size, + .texture_rect = {0, texture.height, texture.width, 0}, + .texture_level = level, + }; + surface->Upload(upload, staging); + + // Manually generate mipmaps in compatibility mode + if (custom_tex_manager.CompatibilityMode()) { + runtime.GenerateMipmaps(*surface); + } + + return true; +} + template void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval interval) { const SurfaceParams flush_info = surface->FromInterval(interval); @@ -898,8 +970,9 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval const u32 flush_end = boost::icl::last_next(interval); ASSERT(flush_start >= surface->addr && flush_end <= surface->end); - const auto staging = runtime.FindStaging( - flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false); + const u32 flush_size = + flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(); + const StagingData staging = runtime.FindStaging(flush_size, false); const BufferTextureCopy download = { .buffer_offset = 0, diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index 60c5e946b..b9d969bb1 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -7,9 +7,10 @@ #include #include #include +#include #include #include -#include "video_core/rasterizer_cache/hires_replacer.h" +#include "common/thread_worker.h" #include "video_core/rasterizer_cache/sampler_params.h" #include "video_core/rasterizer_cache/surface_params.h" #include "video_core/rasterizer_cache/utils.h" @@ -37,7 +38,7 @@ enum class MatchFlags { DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); -class RasterizerAccelerated; +class CustomTexManager; template class RasterizerCache : NonCopyable { @@ -57,8 +58,14 @@ class RasterizerCache : NonCopyable { using SurfaceRect_Tuple = std::tuple>; using PageMap = boost::icl::interval_map; + struct RenderTargets { + Surface color_surface; + Surface depth_surface; + }; + public: - RasterizerCache(Memory::MemorySystem& memory, Runtime& runtime); + RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, + Runtime& runtime); ~RasterizerCache(); /// Perform hardware accelerated texture copy according to the provided configuration @@ -150,6 +157,10 @@ private: /// Copies pixel data in interval from the guest VRAM to the host GPU surface void UploadSurface(const Surface& surface, SurfaceInterval interval); + /// Uploads a custom texture associated with upload_data to the target surface + bool UploadCustomSurface(const Surface& surface, const SurfaceParams& load_info, + std::span upload_data); + /// Copies pixel data in interval from the host GPU surface to the guest VRAM void DownloadSurface(const Surface& surface, SurfaceInterval interval); @@ -185,7 +196,7 @@ private: private: Memory::MemorySystem& memory; Runtime& runtime; - HiresReplacer replacer; + CustomTexManager& custom_tex_manager; PageMap cached_pages; SurfaceMap dirty_regions; std::vector remove_surfaces; @@ -199,14 +210,11 @@ private: std::unordered_map samplers; SlotVector slot_samplers; - - struct RenderTargets { - Surface color_surface; - Surface depth_surface; - }; - RenderTargets render_targets; - const bool& dump_textures; + + // Custom textures + bool dump_textures; + bool use_custom_textures; }; } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 52d6400ac..5bb0ae784 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -17,11 +17,6 @@ public: SurfaceBase(); explicit SurfaceBase(const SurfaceParams& params); - [[nodiscard]] bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept { - const PAddr overlap_end = overlap_addr + static_cast(overlap_size); - return addr < overlap_end && overlap_addr < end; - } - /// Returns true when this surface can be used to fill the fill_interval of dest_surface bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; @@ -35,12 +30,23 @@ public: /// Returns the clear value used to validate another surface from this fill surface ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format); - /// Returns true when the region denoted by interval is valid + bool IsCustom() const noexcept { + return is_custom; + } + + CustomPixelFormat CustomFormat() const noexcept { + return custom_format; + } + + bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept { + const PAddr overlap_end = overlap_addr + static_cast(overlap_size); + return addr < overlap_end && overlap_addr < end; + } + bool IsRegionValid(SurfaceInterval interval) const { return (invalid_regions.find(interval) == invalid_regions.end()); } - /// Returns true when the entire surface is invalid bool IsFullyInvalid() const { auto interval = GetInterval(); return *invalid_regions.equal_range(interval).first == interval; @@ -53,6 +59,8 @@ private: public: bool registered = false; bool picked = false; + bool is_custom = false; + CustomPixelFormat custom_format{}; SurfaceRegions invalid_regions; std::array fill_data; u32 fill_size = 0; diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index 10047266d..ac0006a2e 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -77,18 +77,10 @@ struct BufferTextureCopy { u32 texture_level; }; -struct HostTextureTag { - PixelFormat format{}; - TextureType type{}; - u32 width = 0; - u32 height = 0; - u32 levels = 1; - - auto operator<=>(const HostTextureTag&) const noexcept = default; - - const u64 Hash() const { - return Common::ComputeHash64(this, sizeof(HostTextureTag)); - } +struct StagingData { + u32 size = 0; + std::span mapped{}; + u64 buffer_offset = 0; }; struct TextureCubeConfig { @@ -144,13 +136,6 @@ void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr en } // namespace VideoCore namespace std { -template <> -struct hash { - std::size_t operator()(const VideoCore::HostTextureTag& tag) const noexcept { - return tag.Hash(); - } -}; - template <> struct hash { std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept { diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 39ba6967f..f263fcf3e 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -5,6 +5,7 @@ #include #include "common/assert.h" #include "core/core.h" +#include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_opengl/gl_driver.h" namespace OpenGL { @@ -96,6 +97,21 @@ bool Driver::HasBug(DriverBug bug) const { return True(bugs & bug); } +bool Driver::IsCustomFormatSupported(VideoCore::CustomPixelFormat format) const { + switch (format) { + case VideoCore::CustomPixelFormat::RGBA8: + return true; + case VideoCore::CustomPixelFormat::BC1: + case VideoCore::CustomPixelFormat::BC3: + case VideoCore::CustomPixelFormat::BC5: + return ext_texture_compression_s3tc; + case VideoCore::CustomPixelFormat::BC7: + return arb_texture_compression_bptc; + case VideoCore::CustomPixelFormat::ASTC: + return is_gles; + } +} + void Driver::ReportDriverInfo() { // Report the context version and the vendor string gl_version = std::string_view{reinterpret_cast(glGetString(GL_VERSION))}; @@ -135,6 +151,8 @@ void Driver::CheckExtensionSupport() { arb_buffer_storage = GLAD_GL_ARB_buffer_storage; ext_clip_cull_distance = GLAD_GL_EXT_clip_cull_distance; arb_direct_state_access = GLAD_GL_ARB_direct_state_access; + ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc; + arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc; } void Driver::FindBugs() { diff --git a/src/video_core/renderer_opengl/gl_driver.h b/src/video_core/renderer_opengl/gl_driver.h index 3490081ad..63323fc3f 100644 --- a/src/video_core/renderer_opengl/gl_driver.h +++ b/src/video_core/renderer_opengl/gl_driver.h @@ -5,6 +5,11 @@ #pragma once #include +#include "common/common_types.h" + +namespace VideoCore { +enum class CustomPixelFormat : u32; +} namespace OpenGL { @@ -40,6 +45,9 @@ public: /// Returns true of the driver has a particular bug stated in the DriverBug enum bool HasBug(DriverBug bug) const; + /// Returns true if the driver supports the provided custom format + bool IsCustomFormatSupported(VideoCore::CustomPixelFormat format) const; + /// Returns the vendor of the currently selected physical device Vendor GetVendor() const { return vendor; @@ -90,6 +98,8 @@ private: bool arb_buffer_storage{}; bool ext_clip_cull_distance{}; bool arb_direct_state_access{}; + bool ext_texture_compression_s3tc{}; + bool arb_texture_compression_bptc{}; std::string_view gl_version{}; std::string_view gpu_vendor{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3ac66feea..5f736422c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,9 +36,12 @@ constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; } // Anonymous namespace -RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory_, Frontend::EmuWindow& emu_window, - Driver& driver_) - : RasterizerAccelerated{memory_}, driver{driver_}, runtime{driver}, res_cache{memory, runtime}, +RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + Frontend::EmuWindow& emu_window, Driver& driver_) + : RasterizerAccelerated{memory}, driver{driver_}, runtime{driver}, res_cache{memory, + custom_tex_manager, + runtime}, shader_program_manager{emu_window, driver, !driver.IsOpenGLES()}, vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE}, uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE}, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 81d1744bc..38b8f3a5e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -15,6 +15,10 @@ namespace Frontend { class EmuWindow; } +namespace VideoCore { +class CustomTexManager; +} + namespace OpenGL { struct ScreenInfo; @@ -24,8 +28,9 @@ class ShaderProgramManager; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerOpenGL(Memory::MemorySystem& memory, Frontend::EmuWindow& emu_window, - Driver& driver); + explicit RasterizerOpenGL(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + Frontend::EmuWindow& emu_window, Driver& driver); ~RasterizerOpenGL() override; void LoadDiskResources(const std::atomic_bool& stop_loading, diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 3f2665dc5..e86a31455 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -16,6 +16,7 @@ namespace OpenGL { +using VideoCore::StagingData; using VideoCore::TextureType; constexpr FormatTuple DEFAULT_TUPLE = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; @@ -42,6 +43,18 @@ static constexpr std::array COLOR_TUPLES_OES = { FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }; +static constexpr std::array CUSTOM_TUPLES = { + DEFAULT_TUPLE, + FormatTuple{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, + GL_UNSIGNED_BYTE}, + FormatTuple{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, + GL_UNSIGNED_BYTE}, + FormatTuple{GL_COMPRESSED_RG_RGTC2, GL_COMPRESSED_RG_RGTC2, GL_UNSIGNED_BYTE}, + FormatTuple{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, + GL_UNSIGNED_BYTE}, + FormatTuple{GL_COMPRESSED_RGBA_ASTC_4x4, GL_COMPRESSED_RGBA_ASTC_4x4, GL_UNSIGNED_BYTE}, +}; + [[nodiscard]] GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { switch (type) { case VideoCore::SurfaceType::Color: @@ -97,7 +110,6 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) { auto [data, offset, invalidate] = upload_buffer.Map(size, 4); return StagingData{ - .buffer = upload_buffer.Handle(), .size = size, .mapped = std::span{data, size}, .buffer_offset = offset, @@ -121,14 +133,22 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f return DEFAULT_TUPLE; } -OGLTexture TextureRuntime::Allocate(u32 width, u32 height, u32 levels, - VideoCore::PixelFormat format, VideoCore::TextureType type) { +const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::CustomPixelFormat pixel_format) { + const std::size_t format_index = static_cast(pixel_format); + return CUSTOM_TUPLES[format_index]; +} + +void TextureRuntime::Recycle(const HostTextureTag tag, Allocation&& alloc) { + texture_recycler.emplace(tag, std::move(alloc)); +} + +Allocation TextureRuntime::Allocate(u32 width, u32 height, u32 levels, const FormatTuple& tuple, + VideoCore::TextureType type) { const GLenum target = type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; - // Attempt to recycle an unused texture - const VideoCore::HostTextureTag key = { - .format = format, + const HostTextureTag key = { + .tuple = tuple, .type = type, .width = width, .height = height, @@ -136,9 +156,9 @@ OGLTexture TextureRuntime::Allocate(u32 width, u32 height, u32 levels, }; if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { - OGLTexture texture = std::move(it->second); + Allocation alloc = std::move(it->second); texture_recycler.erase(it); - return texture; + return alloc; } // Allocate new texture @@ -148,7 +168,6 @@ OGLTexture TextureRuntime::Allocate(u32 width, u32 height, u32 levels, glActiveTexture(GL_TEXTURE0); glBindTexture(target, texture.handle); - const auto& tuple = GetFormatTuple(format); glTexStorage2D(target, levels, tuple.internal_format, width, height); glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -156,7 +175,14 @@ OGLTexture TextureRuntime::Allocate(u32 width, u32 height, u32 levels, glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glBindTexture(target, OpenGLState::GetCurState().texture_units[0].texture_2d); - return texture; + + return Allocation{ + .texture = std::move(texture), + .tuple = tuple, + .width = width, + .height = height, + .levels = levels, + }; } bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) { @@ -267,7 +293,7 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, return true; } -void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { +void TextureRuntime::GenerateMipmaps(Surface& surface) { OpenGLState prev_state = OpenGLState::GetCurState(); SCOPE_EXIT({ prev_state.Apply(); }); @@ -276,7 +302,7 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { state.Apply(); glActiveTexture(GL_TEXTURE0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, surface.levels - 1); glGenerateMipmap(GL_TEXTURE_2D); } @@ -318,22 +344,24 @@ void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarge Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) : VideoCore::SurfaceBase{params}, runtime{runtime}, driver{runtime.GetDriver()} { if (pixel_format != VideoCore::PixelFormat::Invalid) { - texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, params.pixel_format, - texture_type); + const auto& tuple = runtime.GetFormatTuple(pixel_format); + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, tuple, texture_type); } } Surface::~Surface() { - if (pixel_format != VideoCore::PixelFormat::Invalid) { - const VideoCore::HostTextureTag tag = { - .format = pixel_format, - .type = texture_type, - .width = GetScaledWidth(), - .height = GetScaledHeight(), - .levels = levels, - }; - runtime.texture_recycler.emplace(tag, std::move(texture)); + if (pixel_format == VideoCore::PixelFormat::Invalid) { + return; } + + const HostTextureTag tag = { + .tuple = alloc.tuple, + .type = texture_type, + .width = alloc.width, + .height = alloc.height, + .levels = alloc.levels, + }; + runtime.Recycle(tag, std::move(alloc)); } void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { @@ -344,25 +372,31 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa if (is_scaled) { ScaledUpload(upload, staging); } else { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - const VideoCore::Rect2D rect = upload.texture_rect; glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(rect.GetWidth())); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, runtime.upload_buffer.Handle()); // Unmap the buffer FindStaging mapped beforehand runtime.upload_buffer.Unmap(staging.size); glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, texture.handle); + glBindTexture(GL_TEXTURE_2D, Handle()); - const auto& tuple = runtime.GetFormatTuple(pixel_format); - glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom, - rect.GetWidth(), rect.GetHeight(), tuple.format, tuple.type, - reinterpret_cast(staging.buffer_offset)); + const auto& tuple = alloc.tuple; + if (is_custom && custom_format != VideoCore::CustomPixelFormat::RGBA8) { + glCompressedTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom, + rect.GetWidth(), rect.GetHeight(), tuple.format, staging.size, + reinterpret_cast(staging.buffer_offset)); + } else { + glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom, + rect.GetWidth(), rect.GetHeight(), tuple.format, tuple.type, + reinterpret_cast(staging.buffer_offset)); + } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + // Restore old texture + glBindTexture(GL_TEXTURE_2D, OpenGLState::GetCurState().texture_units[0].texture_2d); } } @@ -374,23 +408,53 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi if (is_scaled) { ScaledDownload(download, staging); } else { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - const VideoCore::Rect2D rect = download.texture_rect; glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(rect.GetWidth())); runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, - texture.handle); + Handle()); const auto& tuple = runtime.GetFormatTuple(pixel_format); glReadPixels(rect.left, rect.bottom, rect.GetWidth(), rect.GetHeight(), tuple.format, tuple.type, staging.mapped.data()); glPixelStorei(GL_PACK_ROW_LENGTH, 0); + + // Restore previous framebuffer + glBindFramebuffer(GL_READ_FRAMEBUFFER, OpenGLState::GetCurState().draw.read_framebuffer); } } +bool Surface::Swap(u32 width, u32 height, VideoCore::CustomPixelFormat format) { + if (!driver.IsCustomFormatSupported(format)) { + return false; + } + + const auto& tuple = runtime.GetFormatTuple(format); + if (alloc.Matches(width, height, levels, tuple)) { + return true; + } + + const HostTextureTag tag = { + .tuple = alloc.tuple, + .type = texture_type, + .width = alloc.width, + .height = alloc.height, + .levels = alloc.levels, + }; + runtime.Recycle(tag, std::move(alloc)); + + is_custom = true; + custom_format = format; + alloc = runtime.Allocate(width, height, levels, tuple, texture_type); + + LOG_DEBUG(Render_OpenGL, "Swapped {}x{} {} surface at address {:#x} to {}x{} {}", + GetScaledWidth(), GetScaledHeight(), VideoCore::PixelFormatAsString(pixel_format), + addr, width, height, VideoCore::CustomPixelFormatAsString(format)); + + return true; +} + void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { const u32 rect_width = upload.texture_rect.GetWidth(); const u32 rect_height = upload.texture_rect.GetHeight(); @@ -412,7 +476,8 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const Sta unscaled_surface.Upload(unscaled_upload, staging); const auto& filterer = runtime.GetFilterer(); - if (!filterer.Filter(unscaled_surface.texture, unscaled_rect, texture, scaled_rect, type)) { + if (!filterer.Filter(unscaled_surface.alloc.texture, unscaled_rect, alloc.texture, scaled_rect, + type)) { const VideoCore::TextureBlit blit = { .src_level = 0, .dst_level = upload.texture_level, @@ -450,7 +515,7 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, runtime.BlitTextures(*this, unscaled_surface, blit); glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, unscaled_surface.texture.handle); + glBindTexture(GL_TEXTURE_2D, unscaled_surface.Handle()); const auto& tuple = runtime.GetFormatTuple(pixel_format); if (driver.IsOpenGLES()) { diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 0e352d41d..ec27d3f9e 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -13,21 +13,65 @@ #include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h" +namespace VideoCore { +enum class CustomPixelFormat : u32; +} + namespace OpenGL { struct FormatTuple { GLint internal_format; GLenum format; GLenum type; + + bool operator==(const FormatTuple& other) const noexcept { + return std::tie(internal_format, format, type) == + std::tie(other.internal_format, other.format, other.type); + } }; -struct StagingData { - GLuint buffer; - u32 size = 0; - std::span mapped{}; - u64 buffer_offset = 0; +struct Allocation { + OGLTexture texture; + FormatTuple tuple; + u32 width; + u32 height; + u32 levels; + + bool Matches(u32 width_, u32 height_, u32 levels_, const FormatTuple& tuple_) const { + return std::tie(width, height, levels, tuple) == std::tie(width_, height_, levels_, tuple_); + } }; +struct HostTextureTag { + FormatTuple tuple{}; + VideoCore::TextureType type{}; + u32 width = 0; + u32 height = 0; + u32 levels = 1; + + bool operator==(const HostTextureTag& other) const noexcept { + return std::tie(tuple, type, width, height, levels) == + std::tie(other.tuple, other.type, other.width, other.height, other.levels); + } + + const u64 Hash() const { + return Common::ComputeHash64(this, sizeof(HostTextureTag)); + } +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + std::size_t operator()(const OpenGL::HostTextureTag& tag) const noexcept { + return tag.Hash(); + } +}; +} // namespace std + +namespace OpenGL { + class Driver; class Surface; @@ -44,16 +88,20 @@ public: ~TextureRuntime(); /// Maps an internal staging buffer of the provided size of pixel uploads/downloads - StagingData FindStaging(u32 size, bool upload); + VideoCore::StagingData FindStaging(u32 size, bool upload); /// Returns the OpenGL format tuple associated with the provided pixel format const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); + const FormatTuple& GetFormatTuple(VideoCore::CustomPixelFormat pixel_format); /// Causes a GPU command flush void Finish() const {} + /// Takes back ownership of the allocation for recycling + void Recycle(const HostTextureTag tag, Allocation&& alloc); + /// Allocates an OpenGL texture with the specified dimentions and format - OGLTexture Allocate(u32 width, u32 height, u32 levels, VideoCore::PixelFormat format, + Allocation Allocate(u32 width, u32 height, u32 levels, const FormatTuple& tuple, VideoCore::TextureType type); /// Fills the rectangle of the texture with the clear value provided @@ -66,7 +114,7 @@ public: bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); /// Generates mipmaps for all the available levels of the texture - void GenerateMipmaps(Surface& surface, u32 max_level); + void GenerateMipmaps(Surface& surface); /// Returns all source formats that support reinterpretation to the dest format [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( @@ -94,7 +142,7 @@ private: Driver& driver; TextureFilterer filterer; std::array reinterpreters; - std::unordered_multimap texture_recycler; + std::unordered_multimap texture_recycler; std::unordered_map> framebuffer_cache; StreamBuffer upload_buffer; std::vector download_buffer; @@ -108,14 +156,18 @@ public: /// Returns the surface image handle GLuint Handle() const noexcept { - return texture.handle; + return alloc.texture.handle; } /// Uploads pixel data in staging to a rectangle region of the surface texture - void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); + void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging); /// Downloads pixel data to staging from a rectangle region of the surface texture - void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging); + void Download(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); + + /// Swaps the internal allocation to match the provided dimentions and format + bool Swap(u32 width, u32 height, VideoCore::CustomPixelFormat format); /// Returns the bpp of the internal surface format u32 GetInternalBytesPerPixel() const { @@ -124,15 +176,17 @@ public: private: /// Uploads pixel data to scaled texture - void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); + void ScaledUpload(const VideoCore::BufferTextureCopy& upload, + const VideoCore::StagingData& staging); /// Downloads scaled image by downscaling the requested rectangle - void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging); + void ScaledDownload(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); private: TextureRuntime& runtime; const Driver& driver; - OGLTexture texture{}; + Allocation alloc; }; class Framebuffer : public VideoCore::FramebufferBase { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cb7b1b9cb..69ed22a99 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -273,14 +273,13 @@ static std::array MakeOrthographicMatrix(const float width, cons return matrix; } -RendererOpenGL::RendererOpenGL(Memory::MemorySystem& memory_, Frontend::EmuWindow& window, +RendererOpenGL::RendererOpenGL(Core::System& system_, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window) - : RendererBase{window, secondary_window}, memory{memory_}, + : RendererBase{window, secondary_window}, system{system_}, memory{system.Memory()}, driver{Settings::values.graphics_api.GetValue() == Settings::GraphicsAPI::OpenGLES, Settings::values.renderer_debug.GetValue()}, - rasterizer{memory, render_window, driver}, frame_dumper{ - Core::System::GetInstance().VideoDumper(), - window} { + rasterizer{memory, system.CustomTexManager(), render_window, driver}, + frame_dumper{system.VideoDumper(), window} { const Vendor vendor = driver.GetVendor(); if (vendor == Vendor::Generic || vendor == Vendor::Unknown) { @@ -326,8 +325,6 @@ void RendererOpenGL::SwapBuffers() { } m_current_frame++; - - Core::System& system = Core::System::GetInstance(); system.perf_stats->EndSystemFrame(); render_window.PollEvents(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 1f6633bb0..cea2d1e95 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -11,6 +11,10 @@ #include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +namespace Core { +class System; +} + namespace Layout { struct FramebufferLayout; } @@ -60,7 +64,7 @@ class RasterizerOpenGL; class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Memory::MemorySystem& memory, Frontend::EmuWindow& window, + explicit RendererOpenGL(Core::System& system, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window); ~RendererOpenGL() override; @@ -129,6 +133,7 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); private: + Core::System& system; Memory::MemorySystem& memory; Driver driver; OpenGLState state; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d88424df6..9fc98f6f8 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -100,10 +100,10 @@ std::string BuildCommaSeparatedExtensions(std::vector available_ext } // Anonymous namespace -RendererVulkan::RendererVulkan(Memory::MemorySystem& memory_, Frontend::EmuWindow& window, +RendererVulkan::RendererVulkan(Core::System& system_, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window) - : RendererBase{window, secondary_window}, memory{memory_}, - telemetry_session{Core::System::GetInstance().TelemetrySession()}, + : RendererBase{window, secondary_window}, system{system_}, memory{system.Memory()}, + telemetry_session{system.TelemetrySession()}, instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance, renderpass_cache}, renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler}, @@ -111,8 +111,9 @@ RendererVulkan::RendererVulkan(Memory::MemorySystem& memory_, Frontend::EmuWindo renderpass_cache}, vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, VERTEX_BUFFER_SIZE}, - rasterizer{memory, render_window, instance, scheduler, - desc_manager, runtime, renderpass_cache} { + rasterizer{ + memory, system.CustomTexManager(), render_window, instance, scheduler, desc_manager, + runtime, renderpass_cache} { Report(); CompileShaders(); BuildLayouts(); @@ -986,9 +987,7 @@ void RendererVulkan::SwapBuffers() { m_current_frame++; - Core::System& system = Core::System::GetInstance(); system.perf_stats->EndSystemFrame(); - render_window.PollEvents(); system.frame_limiter.DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 90e1ade26..b17143100 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -20,8 +20,9 @@ #include "video_core/renderer_vulkan/vk_swapchain.h" namespace Core { +class System; class TelemetrySession; -} +} // namespace Core namespace Memory { class MemorySystem; @@ -61,7 +62,7 @@ class RendererVulkan : public VideoCore::RendererBase { static constexpr std::size_t PRESENT_PIPELINES = 3; public: - explicit RendererVulkan(Memory::MemorySystem& memory, Frontend::EmuWindow& window, + explicit RendererVulkan(Core::System& system, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window); ~RendererVulkan() override; @@ -125,6 +126,7 @@ private: void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); private: + Core::System& system; Memory::MemorySystem& memory; Core::TelemetrySession& telemetry_session; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 5ab119ca5..f28960b00 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/settings.h" #include "core/frontend/emu_window.h" +#include "video_core/rasterizer_cache/custom_tex_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_platform.h" @@ -108,6 +109,25 @@ vk::Format MakeFormat(VideoCore::PixelFormat format) { } } +vk::Format MakeCustomFormat(VideoCore::CustomPixelFormat format) { + switch (format) { + case VideoCore::CustomPixelFormat::RGBA8: + return vk::Format::eR8G8B8A8Unorm; + case VideoCore::CustomPixelFormat::BC1: + return vk::Format::eBc1RgbaUnormBlock; + case VideoCore::CustomPixelFormat::BC3: + return vk::Format::eBc3UnormBlock; + case VideoCore::CustomPixelFormat::BC5: + return vk::Format::eBc5UnormBlock; + case VideoCore::CustomPixelFormat::BC7: + return vk::Format::eBc7UnormBlock; + case VideoCore::CustomPixelFormat::ASTC: + return vk::Format::eAstc4x4UnormBlock; + } + LOG_ERROR(Render_Vulkan, "Unknown custom format {}", format); + return vk::Format::eR8G8B8A8Unorm; +} + vk::Format MakeAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 count, bool scaled = true) { static constexpr std::array attrib_formats_scaled = { @@ -350,6 +370,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) CreateDevice(); CreateFormatTable(); + CreateCustomFormatTable(); CreateAttribTable(); CollectTelemetryParameters(); } @@ -380,6 +401,11 @@ const FormatTraits& Instance::GetTraits(VideoCore::PixelFormat pixel_format) con return format_table[index]; } +const FormatTraits& Instance::GetTraits(VideoCore::CustomPixelFormat pixel_format) const { + const u32 index = static_cast(pixel_format); + return custom_format_table[index]; +} + const FormatTraits& Instance::GetTraits(Pica::PipelineRegs::VertexAttributeFormat format, u32 count) const { if (count == 0) [[unlikely]] { @@ -419,7 +445,7 @@ FormatTraits Instance::DetermineTraits(VideoCore::PixelFormat pixel_format, vk:: pixel_format == VideoCore::PixelFormat::D24S8; // Find the most inclusive usage flags for this format - vk::ImageUsageFlags best_usage; + vk::ImageUsageFlags best_usage{}; if (supports_blit || supports_transfer) { best_usage |= vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eTransferSrc; @@ -459,7 +485,7 @@ void Instance::CreateFormatTable() { }; for (const auto& pixel_format : pixel_formats) { - const auto format = MakeFormat(pixel_format); + const vk::Format format = MakeFormat(pixel_format); FormatTraits traits = DetermineTraits(pixel_format, format); const bool is_suitable = @@ -487,6 +513,41 @@ void Instance::CreateFormatTable() { } } +void Instance::CreateCustomFormatTable() { + // The traits are the same for RGBA8 + custom_format_table[0] = format_table[static_cast(VideoCore::PixelFormat::RGBA8)]; + + constexpr std::array custom_formats = { + VideoCore::CustomPixelFormat::BC1, VideoCore::CustomPixelFormat::BC3, + VideoCore::CustomPixelFormat::BC5, VideoCore::CustomPixelFormat::BC7, + VideoCore::CustomPixelFormat::ASTC, + }; + + for (const auto& custom_format : custom_formats) { + const vk::Format format = MakeCustomFormat(custom_format); + const vk::FormatProperties format_properties = physical_device.getFormatProperties(format); + + // Compressed formats don't support blit_dst in general so just check for transfer + const vk::FormatFeatureFlags transfer_usage = vk::FormatFeatureFlagBits::eSampledImage; + const bool supports_transfer = + (format_properties.optimalTilingFeatures & transfer_usage) == transfer_usage; + + vk::ImageUsageFlags best_usage{}; + if (supports_transfer) { + best_usage |= vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | + vk::ImageUsageFlagBits::eTransferSrc; + } + + const u32 index = static_cast(custom_format); + custom_format_table[index] = FormatTraits{ + .transfer_support = supports_transfer, + .usage = best_usage, + .aspect = vk::ImageAspectFlagBits::eColor, + .native = format, + }; + } +} + void Instance::CreateAttribTable() { constexpr std::array attrib_formats = { Pica::PipelineRegs::VertexAttributeFormat::BYTE, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index de481b362..7272e47a9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -14,6 +14,10 @@ namespace Frontend { class EmuWindow; } +namespace VideoCore { +enum class CustomPixelFormat : u32; +} + VK_DEFINE_HANDLE(VmaAllocator) namespace Vulkan { @@ -39,6 +43,7 @@ public: /// Returns the FormatTraits struct for the provided pixel format const FormatTraits& GetTraits(VideoCore::PixelFormat pixel_format) const; + const FormatTraits& GetTraits(VideoCore::CustomPixelFormat pixel_format) const; /// Returns the FormatTraits struct for the provided attribute format and count const FormatTraits& GetTraits(Pica::PipelineRegs::VertexAttributeFormat format, @@ -271,6 +276,7 @@ private: /// Creates the format compatibility table for the current device void CreateFormatTable(); + void CreateCustomFormatTable(); /// Creates the attribute format table for the current device void CreateAttribTable(); @@ -302,6 +308,7 @@ private: vk::Queue graphics_queue; std::vector physical_devices; std::array format_table; + std::array custom_format_table; std::array attrib_table; std::vector available_extensions; u32 present_queue_family_index{0}; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0f2f352a8..55081eab2 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -4,8 +4,8 @@ #pragma once #include -#include #include +#include #include #include #include "common/common_types.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 11c9e9b60..70fa0803c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -100,7 +100,7 @@ PipelineCache::Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits std::string code) : Shader{instance} { module = Compile(code, stage, instance.GetDevice(), ShaderOptimization::High); - MarkBuilt(); + MarkDone(); } PipelineCache::Shader::~Shader() { @@ -139,7 +139,7 @@ bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { if (fail_on_compile_required) { // Check if all shader modules are ready for (auto& shader : stages) { - if (shader && !shader->IsBuilt()) { + if (shader && !shader->IsDone()) { return false; } } @@ -309,7 +309,7 @@ bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { continue; } - shader->WaitBuilt(); + shader->WaitDone(); shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ .stage = MakeShaderStage(i), .module = shader->Handle(), @@ -392,7 +392,7 @@ bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { UNREACHABLE(); } - MarkBuilt(); + MarkDone(); return true; } @@ -489,7 +489,7 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { } GraphicsPipeline* const pipeline{it->second.get()}; - if (!wait_built && !pipeline->IsBuilt()) { + if (!wait_built && !pipeline->IsDone()) { return false; } @@ -497,8 +497,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { ApplyDynamic(info, is_dirty); if (current_pipeline != pipeline || is_dirty) { - if (!pipeline->IsBuilt()) { - scheduler.Record([pipeline](vk::CommandBuffer) { pipeline->WaitBuilt(); }); + if (!pipeline->IsDone()) { + scheduler.Record([pipeline](vk::CommandBuffer) { pipeline->WaitDone(); }); } scheduler.Record([pipeline](vk::CommandBuffer cmdbuf) { @@ -549,7 +549,7 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, workers.QueueWork([device, &shader] { shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device, ShaderOptimization::High); - shader.MarkBuilt(); + shader.MarkDone(); }); } @@ -589,7 +589,7 @@ bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { const std::string code = GenerateFixedGeometryShader(gs_config); shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device, ShaderOptimization::High); - shader.MarkBuilt(); + shader.MarkDone(); }); } @@ -620,13 +620,13 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { if (emit_spirv) { const std::vector code = GenerateFragmentShaderSPV(config); shader.module = CompileSPV(code, device); - shader.MarkBuilt(); + shader.MarkDone(); } else { workers.QueueWork([config, device, &shader]() { const std::string code = GenerateFragmentShader(config); shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device, - ShaderOptimization::High); - shader.MarkBuilt(); + ShaderOptimization::Debug); + shader.MarkDone(); }); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 16dc2ae4a..8d2fdb16b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -10,7 +10,6 @@ #include "video_core/regs_framebuffer.h" #include "video_core/regs_pipeline.h" #include "video_core/regs_rasterizer.h" -#include "video_core/renderer_vulkan/pica_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -69,12 +68,15 @@ struct DrawParams { } // Anonymous namespace -RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory_, Frontend::EmuWindow& emu_window, - const Instance& instance, Scheduler& scheduler, - DescriptorManager& desc_manager, TextureRuntime& runtime, - RenderpassCache& renderpass_cache) - : RasterizerAccelerated{memory_}, instance{instance}, scheduler{scheduler}, runtime{runtime}, - renderpass_cache{renderpass_cache}, desc_manager{desc_manager}, res_cache{memory, runtime}, +RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + Frontend::EmuWindow& emu_window, const Instance& instance, + Scheduler& scheduler, DescriptorManager& desc_manager, + TextureRuntime& runtime, RenderpassCache& renderpass_cache) + : RasterizerAccelerated{memory}, instance{instance}, scheduler{scheduler}, runtime{runtime}, + renderpass_cache{renderpass_cache}, desc_manager{desc_manager}, res_cache{memory, + custom_tex_manager, + runtime}, pipeline_cache{instance, scheduler, renderpass_cache, desc_manager}, null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, vk::ImageAspectFlagBits::eColor, runtime}, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index f75d418e9..6b3b12d91 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -15,6 +15,10 @@ namespace Frontend { class EmuWindow; } +namespace VideoCore { +class CustomTexManager; +} + namespace Vulkan { struct ScreenInfo; @@ -28,10 +32,11 @@ class RasterizerVulkan : public VideoCore::RasterizerAccelerated { friend class RendererVulkan; public: - explicit RasterizerVulkan(Memory::MemorySystem& memory, Frontend::EmuWindow& emu_window, - const Instance& instance, Scheduler& scheduler, - DescriptorManager& desc_manager, TextureRuntime& runtime, - RenderpassCache& renderpass_cache); + explicit RasterizerVulkan(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + Frontend::EmuWindow& emu_window, const Instance& instance, + Scheduler& scheduler, DescriptorManager& desc_manager, + TextureRuntime& runtime, RenderpassCache& renderpass_cache); ~RasterizerVulkan() override; void LoadDiskResources(const std::atomic_bool& stop_loading, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 97bcde994..a56198777 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/microprofile.h" +#include "video_core/rasterizer_cache/custom_tex_manager.h" #include "video_core/rasterizer_cache/texture_codec.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_vulkan/pica_to_vk.h" @@ -18,9 +19,11 @@ MICROPROFILE_DEFINE(Vulkan_ImageAlloc, "Vulkan", "Texture Allocation", MP_RGB(19 namespace Vulkan { +namespace { + using VideoCore::GetFormatType; using VideoCore::MipLevels; -using VideoCore::PixelFormatAsString; +using VideoCore::StagingData; using VideoCore::TextureType; struct RecordParams { @@ -33,7 +36,7 @@ struct RecordParams { vk::Image dst_image; }; -[[nodiscard]] vk::Filter MakeFilter(VideoCore::PixelFormat pixel_format) { +vk::Filter MakeFilter(VideoCore::PixelFormat pixel_format) { switch (pixel_format) { case VideoCore::PixelFormat::D16: case VideoCore::PixelFormat::D24: @@ -94,7 +97,7 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { break; } default: - LOG_ERROR(Render_Vulkan, "Unimplemtend convertion for depth format {}", + LOG_ERROR(Render_Vulkan, "Unimplemented convertion for depth format {}", vk::to_string(dest)); UNREACHABLE(); } @@ -103,8 +106,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { return depth_offset; } -constexpr u64 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; -constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr u64 UPLOAD_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr u64 DOWNLOAD_BUFFER_SIZE = 16 * 1024 * 1024; + +} // Anonymous namespace TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) @@ -146,10 +151,9 @@ TextureRuntime::~TextureRuntime() { StagingData TextureRuntime::FindStaging(u32 size, bool upload) { auto& buffer = upload ? upload_buffer : download_buffer; - auto [data, offset, invalidate] = buffer.Map(size, 4); + auto [data, offset, invalidate] = buffer.Map(size, 16); return StagingData{ - .buffer = buffer.Handle(), .size = size, .mapped = std::span{data, size}, .buffer_offset = offset, @@ -163,24 +167,24 @@ void TextureRuntime::Finish() { Allocation TextureRuntime::Allocate(u32 width, u32 height, u32 levels, VideoCore::PixelFormat format, VideoCore::TextureType type) { const FormatTraits traits = instance.GetTraits(format); - return Allocate(width, height, levels, format, type, traits.native, traits.usage, + const bool is_mutable = format == VideoCore::PixelFormat::RGBA8; + return Allocate(width, height, levels, is_mutable, type, traits.native, traits.usage, traits.aspect); } -Allocation TextureRuntime::Allocate(u32 width, u32 height, u32 levels, - VideoCore::PixelFormat pixel_format, +Allocation TextureRuntime::Allocate(u32 width, u32 height, u32 levels, bool is_mutable, VideoCore::TextureType type, vk::Format format, vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect) { MICROPROFILE_SCOPE(Vulkan_ImageAlloc); - ASSERT(pixel_format != VideoCore::PixelFormat::Invalid && levels >= 1); + ASSERT(format != vk::Format::eUndefined && levels >= 1); const HostTextureTag key = { .format = format, - .pixel_format = pixel_format, .type = type, .width = width, .height = height, .levels = levels, + .is_mutable = is_mutable, }; if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { @@ -189,18 +193,17 @@ Allocation TextureRuntime::Allocate(u32 width, u32 height, u32 levels, return alloc; } - const bool create_storage_view = pixel_format == VideoCore::PixelFormat::RGBA8; const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; vk::ImageCreateFlags flags; if (type == VideoCore::TextureType::CubeMap) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; } - if (create_storage_view) { + if (is_mutable) { flags |= vk::ImageCreateFlagBits::eMutableFormat; } - const bool need_format_list = create_storage_view && instance.IsImageFormatListSupported(); + const bool need_format_list = is_mutable && instance.IsImageFormatListSupported(); const std::array format_list = { vk::Format::eR8G8B8A8Unorm, vk::Format::eR32Uint, @@ -290,6 +293,10 @@ Allocation TextureRuntime::Allocate(u32 width, u32 height, u32 levels, .allocation = allocation, .aspect = aspect, .format = format, + .is_mutable = is_mutable, + .width = width, + .height = height, + .levels = levels, }; } @@ -722,7 +729,35 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, return true; } -void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {} +void TextureRuntime::GenerateMipmaps(Surface& surface) { + return; + if (surface.custom_format != VideoCore::CustomPixelFormat::RGBA8) { + LOG_ERROR(Render_Vulkan, "Generating mipmaps for compressed formats unsupported!"); + return; + } + + renderpass_cache.ExitRenderpass(); + + // Always use the allocation width on custom textures + u32 current_width = surface.alloc.width; + u32 current_height = surface.alloc.height; + const u32 levels = surface.levels; + + for (u32 i = 1; i < levels; i++) { + const VideoCore::Rect2D src_rect{0, current_height, current_width, 0}; + current_width = current_width > 1 ? current_width >> 1 : 1; + current_height = current_height > 1 ? current_height >> 1 : 1; + const VideoCore::Rect2D dst_rect{0, current_height, current_width, 0}; + + const VideoCore::TextureBlit blit = { + .src_level = i - 1, + .dst_level = i, + .src_rect = src_rect, + .dst_rect = dst_rect, + }; + BlitTextures(surface, surface, blit); + } +} const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( VideoCore::PixelFormat dest_format) const { @@ -751,23 +786,25 @@ Surface::Surface(const VideoCore::SurfaceParams& params, vk::Format format, : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()} { if (format != vk::Format::eUndefined) { - alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, pixel_format, - texture_type, format, usage, aspect); + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, false, texture_type, + format, usage, aspect); } } Surface::~Surface() { - if (pixel_format != VideoCore::PixelFormat::Invalid) { - const HostTextureTag tag = { - .format = alloc.format, - .pixel_format = pixel_format, - .type = texture_type, - .width = GetScaledWidth(), - .height = GetScaledHeight(), - }; - - runtime.Recycle(tag, std::move(alloc)); + if (pixel_format == VideoCore::PixelFormat::Invalid) { + return; } + + const HostTextureTag tag = { + .format = alloc.format, + .type = texture_type, + .width = alloc.width, + .height = alloc.height, + .levels = alloc.levels, + .is_mutable = alloc.is_mutable, + }; + runtime.Recycle(tag, std::move(alloc)); } void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { @@ -784,79 +821,78 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa .src_image = alloc.image, }; - scheduler.Record( - [format = alloc.format, params, staging, upload](vk::CommandBuffer cmdbuf) { - u32 num_copies = 1; - std::array buffer_image_copies; + scheduler.Record([buffer = runtime.upload_buffer.Handle(), format = alloc.format, params, + staging, upload](vk::CommandBuffer cmdbuf) { + u32 num_copies = 1; + std::array buffer_image_copies; - const VideoCore::Rect2D rect = upload.texture_rect; - buffer_image_copies[0] = vk::BufferImageCopy{ - .bufferOffset = staging.buffer_offset + upload.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource{ - .aspectMask = params.aspect, - .mipLevel = upload.texture_level, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, - }; + const VideoCore::Rect2D rect = upload.texture_rect; + buffer_image_copies[0] = vk::BufferImageCopy{ + .bufferOffset = staging.buffer_offset + upload.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; - if (params.aspect & vk::ImageAspectFlagBits::eStencil) { - buffer_image_copies[0].imageSubresource.aspectMask = - vk::ImageAspectFlagBits::eDepth; - vk::BufferImageCopy& stencil_copy = buffer_image_copies[1]; - stencil_copy = buffer_image_copies[0]; - stencil_copy.bufferOffset += UnpackDepthStencil(staging, format); - stencil_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; - num_copies++; - } + if (params.aspect & vk::ImageAspectFlagBits::eStencil) { + buffer_image_copies[0].imageSubresource.aspectMask = + vk::ImageAspectFlagBits::eDepth; + vk::BufferImageCopy& stencil_copy = buffer_image_copies[1]; + stencil_copy = buffer_image_copies[0]; + stencil_copy.bufferOffset += UnpackDepthStencil(staging, format); + stencil_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + num_copies++; + } - const vk::ImageMemoryBarrier read_barrier = { - .srcAccessMask = params.src_access, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eGeneral, - .newLayout = vk::ImageLayout::eTransferDstOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = params.src_image, - .subresourceRange{ - .aspectMask = params.aspect, - .baseMipLevel = upload.texture_level, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - const vk::ImageMemoryBarrier write_barrier = { - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = params.src_access, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::eGeneral, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = params.src_image, - .subresourceRange{ - .aspectMask = params.aspect, - .baseMipLevel = upload.texture_level, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange{ + .aspectMask = params.aspect, + .baseMipLevel = upload.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange{ + .aspectMask = params.aspect, + .baseMipLevel = upload.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; - cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); - cmdbuf.copyBufferToImage(staging.buffer, params.src_image, - vk::ImageLayout::eTransferDstOptimal, num_copies, - buffer_image_copies.data()); + cmdbuf.copyBufferToImage(buffer, params.src_image, vk::ImageLayout::eTransferDstOptimal, + num_copies, buffer_image_copies.data()); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, - vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); - }); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); + }); runtime.upload_buffer.Commit(staging.size); } @@ -883,7 +919,8 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi .src_image = alloc.image, }; - scheduler.Record([params, staging, download](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = runtime.download_buffer.Handle(), params, staging, + download](vk::CommandBuffer cmdbuf) { const VideoCore::Rect2D rect = download.texture_rect; const vk::BufferImageCopy buffer_image_copy = { .bufferOffset = staging.buffer_offset + download.buffer_offset, @@ -939,8 +976,8 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); - cmdbuf.copyImageToBuffer(params.src_image, vk::ImageLayout::eTransferSrcOptimal, - staging.buffer, buffer_image_copy); + cmdbuf.copyImageToBuffer(params.src_image, vk::ImageLayout::eTransferSrcOptimal, buffer, + buffer_image_copy); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, @@ -950,6 +987,39 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi } } +bool Surface::Swap(u32 width, u32 height, VideoCore::CustomPixelFormat format) { + const FormatTraits& traits = instance.GetTraits(format); + if (!traits.transfer_support) { + return false; + } + + const vk::Format custom_vk_format = traits.native; + if (alloc.Matches(width, height, levels, custom_vk_format)) { + return true; + } + + const HostTextureTag tag = { + .format = alloc.format, + .type = texture_type, + .width = alloc.width, + .height = alloc.height, + .levels = levels, + .is_mutable = alloc.is_mutable, + }; + runtime.Recycle(tag, std::move(alloc)); + + is_custom = true; + custom_format = format; + alloc = runtime.Allocate(width, height, levels, false, texture_type, custom_vk_format, + traits.usage, traits.aspect); + + LOG_DEBUG(Render_Vulkan, "Swapped {}x{} {} surface at address {:#x} to {}x{} {}", + GetScaledWidth(), GetScaledHeight(), VideoCore::PixelFormatAsString(pixel_format), + addr, width, height, VideoCore::CustomPixelFormatAsString(format)); + + return true; +} + u32 Surface::GetInternalBytesPerPixel() const { // Request 5 bytes for D24S8 as well because we can use the // extra space when deinterleaving the data during upload @@ -1041,7 +1111,7 @@ vk::ImageView Surface::StorageView() noexcept { ASSERT_MSG(pixel_format == VideoCore::PixelFormat::RGBA8, "Attempted to retrieve storage view from unsupported surface with format {}", - PixelFormatAsString(pixel_format)); + VideoCore::PixelFormatAsString(pixel_format)); const vk::ImageViewCreateInfo storage_view_info = { .image = alloc.image, @@ -1268,8 +1338,8 @@ Sampler::Sampler(TextureRuntime& runtime, VideoCore::SamplerParams params) .maxAnisotropy = properties.limits.maxSamplerAnisotropy, .compareEnable = false, .compareOp = vk::CompareOp::eAlways, - .minLod = lod_min, - .maxLod = lod_max, + .minLod = 0.f * lod_min, + .maxLod = 0.f * lod_max, .borderColor = use_border_color ? vk::BorderColor::eFloatCustomEXT : vk::BorderColor::eIntOpaqueBlack, .unnormalizedCoordinates = false, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 1e7d62066..fd74148a5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -17,14 +17,11 @@ VK_DEFINE_HANDLE(VmaAllocation) -namespace Vulkan { +namespace VideoCore { +enum class CustomPixelFormat : u32; +} -struct StagingData { - vk::Buffer buffer; - u32 size = 0; - std::span mapped{}; - u64 buffer_offset = 0; -}; +namespace Vulkan { struct Allocation { vk::Image image; @@ -35,15 +32,24 @@ struct Allocation { VmaAllocation allocation; vk::ImageAspectFlags aspect; vk::Format format; + bool is_mutable; + u32 width; + u32 height; + u32 levels; + + bool Matches(u32 width_, u32 height_, u32 levels_, vk::Format format_) const noexcept { + return std::tie(width, height, levels, format) == + std::tie(width_, height_, levels_, format_); + } }; struct HostTextureTag { vk::Format format = vk::Format::eUndefined; - VideoCore::PixelFormat pixel_format = VideoCore::PixelFormat::Invalid; VideoCore::TextureType type = VideoCore::TextureType::Texture2D; u32 width = 1; u32 height = 1; u32 levels = 1; + u32 is_mutable = 0; // This is a u32 to ensure alignment for hashing auto operator<=>(const HostTextureTag&) const noexcept = default; @@ -90,15 +96,14 @@ public: void Recycle(const HostTextureTag tag, Allocation&& alloc); /// Maps an internal staging buffer of the provided size of pixel uploads/downloads - [[nodiscard]] StagingData FindStaging(u32 size, bool upload); + [[nodiscard]] VideoCore::StagingData FindStaging(u32 size, bool upload); /// Allocates a vulkan image possibly resusing an existing one [[nodiscard]] Allocation Allocate(u32 width, u32 height, u32 levels, VideoCore::PixelFormat format, VideoCore::TextureType type); /// Allocates a vulkan image - [[nodiscard]] Allocation Allocate(u32 width, u32 height, u32 levels, - VideoCore::PixelFormat pixel_format, + [[nodiscard]] Allocation Allocate(u32 width, u32 height, u32 levels, bool is_mutable, VideoCore::TextureType type, vk::Format format, vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect); @@ -112,7 +117,7 @@ public: bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); /// Generates mipmaps for all the available levels of the texture - void GenerateMipmaps(Surface& surface, u32 max_level); + void GenerateMipmaps(Surface& surface); /// Returns all source formats that support reinterpretation to the dest format [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( @@ -176,10 +181,14 @@ public: } /// Uploads pixel data in staging to a rectangle region of the surface texture - void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); + void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging); /// Downloads pixel data to staging from a rectangle region of the surface texture - void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging); + void Download(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); + + /// Swaps the internal allocation to match the provided dimentions and format + bool Swap(u32 width, u32 height, VideoCore::CustomPixelFormat format); /// Returns the bpp of the internal surface format u32 GetInternalBytesPerPixel() const; @@ -201,14 +210,16 @@ public: private: /// Uploads pixel data to scaled texture - void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); + void ScaledUpload(const VideoCore::BufferTextureCopy& upload, + const VideoCore::StagingData& staging); /// Downloads scaled image by downscaling the requested rectangle - void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& stagings); + void ScaledDownload(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& stagings); /// Downloads scaled depth stencil data void DepthStencilDownload(const VideoCore::BufferTextureCopy& download, - const StagingData& staging); + const VideoCore::StagingData& staging); private: TextureRuntime& runtime; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 3803e1faf..56e3bcc4c 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -6,6 +6,7 @@ #include "common/archives.h" #include "common/logging/log.h" #include "common/settings.h" +#include "core/core.h" #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/renderer_base.h" @@ -31,8 +32,8 @@ Memory::MemorySystem* g_memory; /// Initialize the video core ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window, - Memory::MemorySystem& memory) { - g_memory = &memory; + Core::System& system) { + g_memory = &system.Memory(); Pica::Init(); const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api.GetValue(); @@ -40,10 +41,10 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondar case Settings::GraphicsAPI::OpenGL: case Settings::GraphicsAPI::OpenGLES: OpenGL::GLES = graphics_api == Settings::GraphicsAPI::OpenGLES; - g_renderer = std::make_unique(memory, emu_window, secondary_window); + g_renderer = std::make_unique(system, emu_window, secondary_window); break; case Settings::GraphicsAPI::Vulkan: - g_renderer = std::make_unique(memory, emu_window, secondary_window); + g_renderer = std::make_unique(system, emu_window, secondary_window); break; default: LOG_CRITICAL(Render, "Invalid graphics API enum value {}", graphics_api); diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index bcb0f90b6..69432c86b 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -10,6 +10,10 @@ #include #include "core/frontend/emu_window.h" +namespace Core { +class System; +} + namespace Frontend { class EmuWindow; } @@ -44,7 +48,7 @@ enum class ResultStatus { /// Initialize the video core ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window, - Memory::MemorySystem& memory); + Core::System& system); /// Shutdown the video core void Shutdown();