PDF4QT/PdfForQtLib/sources/pdfimage.cpp

//    Copyright (C) 2019 Jakub Melka
//
//    This file is part of PdfForQt.
//
//    PdfForQt is free software: you can redistribute it and/or modify
//    it under the terms of the GNU Lesser General Public License as published by
//    the Free Software Foundation, either version 3 of the License, or
//    (at your option) any later version.
//
//    PdfForQt is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with PDFForQt.  If not, see <https://www.gnu.org/licenses/>.

#include "pdfimage.h"
#include "pdfdocument.h"
#include "pdfconstants.h"
#include "pdfexception.h"
#include "pdfutils.h"

#include <openjpeg.h>
#include <jpeglib.h>

namespace pdf
{

struct PDFJPEG2000ImageData
{
    const QByteArray* byteArray = nullptr;
    OPJ_SIZE_T position = 0;
    std::vector<PDFRenderError> errors;

    static OPJ_SIZE_T read(void* p_buffer, OPJ_SIZE_T p_nb_bytes, void* p_user_data);
    static OPJ_BOOL seek(OPJ_OFF_T p_nb_bytes, void* p_user_data);
    static OPJ_OFF_T skip(OPJ_OFF_T p_nb_bytes, void* p_user_data);
};

struct PDFJPEGDCTSource
{
    jpeg_source_mgr sourceManager;
    const QByteArray* buffer = nullptr;
    int startByte = 0;
};

PDFImage PDFImage::createImage(const PDFDocument* document, const PDFStream* stream, PDFColorSpacePointer colorSpace, PDFRenderErrorReporter* errorReporter)
{
    PDFImage image;
    image.m_colorSpace = colorSpace;

    const PDFDictionary* dictionary = stream->getDictionary();
    QByteArray content = document->getDecodedStream(stream);
    PDFDocumentDataLoaderDecorator loader(document);

    if (content.isEmpty())
    {
        throw PDFException(PDFTranslationContext::tr("Image has not data."));
    }

    // TODO: Implement SMask
    // TODO: Implement SMaskInData

    for (const char* notImplementedKey : { "SMask", "SMaskInData" })
    {
        if (dictionary->hasKey(notImplementedKey))
        {
            throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image property '%2'.").arg(QString::fromLatin1(notImplementedKey)));
        }
    }

    PDFImageData::MaskingType maskingType = PDFImageData::MaskingType::None;
    std::vector<PDFInteger> mask;
    std::vector<PDFReal> decode = loader.readNumberArrayFromDictionary(dictionary, "Decode");
    bool imageMask = loader.readBooleanFromDictionary(dictionary, "ImageMask", false);

    // Fill Mask
    if (dictionary->hasKey("Mask"))
    {
        const PDFObject& object = document->getObject(dictionary->get("Mask"));
        if (object.isArray())
        {
            maskingType = PDFImageData::MaskingType::ColorKeyMasking;
            mask = loader.readIntegerArray(object);
        }
        else if (object.isStream())
        {
            // TODO: Implement Mask Image
            maskingType = PDFImageData::MaskingType::Image;
            throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Mask image is not implemented."));
        }
    }

    if (imageMask)
    {
        maskingType = PDFImageData::MaskingType::ImageMask;
    }

    // Retrieve filters
    PDFObject filters;
    if (dictionary->hasKey(PDF_STREAM_DICT_FILTER))
    {
        filters = document->getObject(dictionary->get(PDF_STREAM_DICT_FILTER));
    }
    else if (dictionary->hasKey(PDF_STREAM_DICT_FILE_FILTER))
    {
        filters = document->getObject(dictionary->get(PDF_STREAM_DICT_FILE_FILTER));
    }

    // Retrieve filter parameters
    PDFObject filterParameters;
    if (dictionary->hasKey(PDF_STREAM_DICT_DECODE_PARMS))
    {
        filterParameters = document->getObject(dictionary->get(PDF_STREAM_DICT_DECODE_PARMS));
    }
    else if (dictionary->hasKey(PDF_STREAM_DICT_FDECODE_PARMS))
    {
        filterParameters = document->getObject(dictionary->get(PDF_STREAM_DICT_FDECODE_PARMS));
    }

    QByteArray imageFilterName;
    if (filters.isName())
    {
        imageFilterName = filters.getString();
    }
    else if (filters.isArray())
    {
        const PDFArray* filterArray = filters.getArray();
        const size_t filterCount = filterArray->getCount();

        if (filterCount)
        {
            const PDFObject& object = document->getObject(filterArray->getItem(filterCount - 1));
            if (object.isName())
            {
                imageFilterName = object.getString();
            }
        }
    }

    if (imageFilterName == "DCTDecode" || imageFilterName == "DCT")
    {
        int colorTransform = loader.readIntegerFromDictionary(dictionary, "ColorTransform", -1);

        jpeg_decompress_struct codec;
        jpeg_error_mgr errorManager;
        std::memset(&codec, 0, sizeof(jpeg_decompress_struct));
        std::memset(&errorManager, 0, sizeof(errorManager));

        PDFJPEGDCTSource source;
        source.buffer = &content;
        std::memset(&source.sourceManager, 0, sizeof(jpeg_source_mgr));

        // Fix issue, that image doesn't start with FFD8 (start of image marker). If this
        // occurs, try to find sequence FFD8, and if we can find it, then advance the buffer.
        source.startByte = qMax(content.indexOf("\xFF\xD8"), 0);
        if (source.startByte > 0)
        {
            errorReporter->reportRenderError(RenderErrorType::Warning, PDFTranslationContext::tr("Malformed data while reading JPEG stream. %1 bytes skipped.").arg(source.startByte));
        }

        auto errorMethod = [](j_common_ptr ptr)
        {
            char buffer[JMSG_LENGTH_MAX] = { };
            (ptr->err->format_message)(ptr, buffer);

            jpeg_destroy(ptr);
            throw PDFException(PDFTranslationContext::tr("Error reading JPEG (DCT) image: %1.").arg(QString::fromLatin1(buffer)));
        };

        auto fillInputBufferMethod = [](j_decompress_ptr decompress) -> boolean
        {
            PDFJPEGDCTSource* source = reinterpret_cast<PDFJPEGDCTSource*>(decompress->src);

            if (!source->sourceManager.next_input_byte)
            {
                const QByteArray* buffer = source->buffer;
                source->sourceManager.next_input_byte = reinterpret_cast<const JOCTET*>(buffer->constData());
                source->sourceManager.bytes_in_buffer = buffer->size();
                source->sourceManager.next_input_byte += source->startByte;
                source->sourceManager.bytes_in_buffer -= source->startByte;
                return TRUE;
            }

            return FALSE;
        };

        auto skipInputDataMethod = [](j_decompress_ptr decompress, long num_bytes)
        {
            PDFJPEGDCTSource* source = reinterpret_cast<PDFJPEGDCTSource*>(decompress->src);

            const size_t skippedBytes = qMin(source->sourceManager.bytes_in_buffer, static_cast<size_t>(num_bytes));
            source->sourceManager.next_input_byte += skippedBytes;
            source->sourceManager.bytes_in_buffer -= skippedBytes;
        };

        source.sourceManager.bytes_in_buffer = 0;
        source.sourceManager.next_input_byte = nullptr;
        source.sourceManager.init_source = [](j_decompress_ptr) { };
        source.sourceManager.fill_input_buffer = fillInputBufferMethod;
        source.sourceManager.skip_input_data = skipInputDataMethod;
        source.sourceManager.resync_to_restart = jpeg_resync_to_restart;
        source.sourceManager.term_source = [](j_decompress_ptr) { };

        jpeg_std_error(&errorManager);
        errorManager.error_exit = errorMethod;
        codec.err = &errorManager;

        jpeg_create_decompress(&codec);
        codec.src = reinterpret_cast<jpeg_source_mgr*>(&source);

        if (jpeg_read_header(&codec, TRUE) == JPEG_HEADER_OK)
        {
            // Determine color transform
            if (colorTransform == -1 && codec.saw_Adobe_marker)
            {
                colorTransform = codec.Adobe_transform;
            }

            // Set the input transform
            if (colorTransform > -1)
            {
                switch (codec.num_components)
                {
                    case 3:
                    {
                        codec.jpeg_color_space = colorTransform ? JCS_YCbCr : JCS_RGB;
                        break;
                    }

                    case 4:
                    {
                        codec.jpeg_color_space = colorTransform ? JCS_YCCK : JCS_CMYK;
                        break;
                    }

                    default:
                        break;
                }
            }

            jpeg_start_decompress(&codec);

            const JDIMENSION rowStride = codec.output_width * codec.output_components;
            JSAMPARRAY samples = codec.mem->alloc_sarray(reinterpret_cast<j_common_ptr>(&codec), JPOOL_IMAGE, rowStride, 1);
            JDIMENSION scanLineCount = codec.output_height;

            const unsigned int width = codec.output_width;
            const unsigned int height = codec.output_height;
            const unsigned int components = codec.output_components;
            const unsigned int bitsPerComponent =  8;
            QByteArray buffer(rowStride * height, 0);
            JSAMPROW rowData = reinterpret_cast<JSAMPROW>(buffer.data());

            while (scanLineCount)
            {
                JDIMENSION readCount = jpeg_read_scanlines(&codec, samples, 1);
                std::memcpy(rowData, samples[0], rowStride);
                scanLineCount -= readCount;
                rowData += rowStride;
            }

            jpeg_finish_decompress(&codec);
            image.m_imageData = PDFImageData(components, bitsPerComponent, width, height, rowStride, maskingType, qMove(buffer), qMove(mask), qMove(decode));
        }

        jpeg_destroy_decompress(&codec);
    }
    else if (imageFilterName == "JPXDecode")
    {
        PDFJPEG2000ImageData imageData;
        imageData.byteArray = &content;
        imageData.position = 0;

        auto warningCallback = [](const char* message, void* userData)
        {
            PDFJPEG2000ImageData* data = reinterpret_cast<PDFJPEG2000ImageData*>(userData);
            data->errors.push_back(PDFRenderError(RenderErrorType::Warning, PDFTranslationContext::tr("JPEG 2000 Warning: %1").arg(QString::fromLatin1(message))));
        };

        auto errorCallback = [](const char* message, void* userData)
        {
            PDFJPEG2000ImageData* data = reinterpret_cast<PDFJPEG2000ImageData*>(userData);
            data->errors.push_back(PDFRenderError(RenderErrorType::Error, PDFTranslationContext::tr("JPEG 2000 Error: %1").arg(QString::fromLatin1(message))));
        };

        opj_dparameters_t decompressParameters;
        opj_set_default_decoder_parameters(&decompressParameters);

        const bool isIndexed = dynamic_cast<const PDFIndexedColorSpace*>(image.m_colorSpace.data());
        if (isIndexed)
        {
            // What is this flag for? When we have indexed color space, we do not want to resolve index to color
            // using the color map in the image. Instead of that, we just get indices and resolve them using
            // our color space.
            decompressParameters.flags |= OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG;
        }

        constexpr CODEC_FORMAT formats[] = { OPJ_CODEC_J2K, OPJ_CODEC_JP2, OPJ_CODEC_JPT, OPJ_CODEC_JPP, OPJ_CODEC_JPX };
        for (CODEC_FORMAT format : formats)
        {
            opj_codec_t* codec = opj_create_decompress(format);

            if (!codec)
            {
                // Codec is not present
                continue;
            }

            opj_set_warning_handler(codec, warningCallback, &imageData);
            opj_set_error_handler(codec, errorCallback, &imageData);

            opj_stream_t* stream = opj_stream_create(content.size(), OPJ_TRUE);
            opj_stream_set_user_data(stream, &imageData, nullptr);
            opj_stream_set_user_data_length(stream, content.size());
            opj_stream_set_read_function(stream, &PDFJPEG2000ImageData::read);
            opj_stream_set_seek_function(stream, &PDFJPEG2000ImageData::seek);
            opj_stream_set_skip_function(stream, &PDFJPEG2000ImageData::skip);

            // Reset the stream position, clear the data
            imageData.position = 0;
            imageData.errors.clear();

            opj_image_t* jpegImage = nullptr;

            // Setup the decoder
            if (opj_setup_decoder(codec, &decompressParameters))
            {
                // Try to read the header

                if (opj_read_header(stream, codec, &jpegImage))
                {
                    if (opj_set_decode_area(codec, jpegImage, decompressParameters.DA_x0, decompressParameters.DA_y0, decompressParameters.DA_x1, decompressParameters.DA_y1))
                    {
                        if (opj_decode(codec, stream, jpegImage))
                        {
                            if (opj_end_decompress(codec, stream))
                            {

                            }
                        }
                    }
                }
            }

            opj_stream_destroy(stream);
            opj_destroy_codec(codec);

            stream = nullptr;
            codec = nullptr;

            // If we have a valid image, then adjust it
            if (jpegImage)
            {
                // First we must check, if all components are valid (i.e has same width/height/precision)

                bool valid = true;
                const OPJ_UINT32 componentCount = jpegImage->numcomps;
                for (OPJ_UINT32 i = 1; i < componentCount; ++i)
                {
                    if (jpegImage->comps[0].w != jpegImage->comps[i].w ||
                        jpegImage->comps[0].h != jpegImage->comps[i].h ||
                        jpegImage->comps[0].prec != jpegImage->comps[i].prec ||
                        jpegImage->comps[0].sgnd != jpegImage->comps[i].sgnd)
                    {
                        valid = false;
                        break;
                    }
                }

                // TODO: Include alpha channel functionality - mask in image

                if (valid)
                {
                    const OPJ_UINT32 w = jpegImage->comps[0].w;
                    const OPJ_UINT32 h = jpegImage->comps[0].h;
                    const OPJ_UINT32 prec = jpegImage->comps[0].prec;
                    const OPJ_UINT32 sgnd = jpegImage->comps[0].sgnd;

                    int signumCorrection = (sgnd) ? (1 << (prec - 1)) : 0;
                    int shiftLeft = (jpegImage->comps[0].prec < 8) ? 8 - jpegImage->comps[0].prec : 0;
                    int shiftRight = (jpegImage->comps[0].prec > 8) ? jpegImage->comps[0].prec - 8 : 0;

                    auto transformValue = [signumCorrection, isIndexed, shiftLeft, shiftRight](int value) -> unsigned char
                    {
                        value += signumCorrection;

                        if (!isIndexed)
                        {
                            // Indexed color space should have at most 255 indices, do not modify indices in this case

                            if (shiftLeft > 0)
                            {
                                value = value << shiftLeft;
                            }
                            else if (shiftRight > 0)
                            {
                                // We clamp value to the lower part (so, we use similar algorithm as in 'floor' function).
                                //
                                value = value >> shiftRight;
                            }
                        }

                        value = qBound(0, value, 255);
                        return static_cast<unsigned char>(value);
                    };

                    // Variables for image data. We convert all components to the 8-bit format
                    unsigned int components = jpegImage->numcomps;
                    unsigned int bitsPerComponent = 8;
                    unsigned int width = w;
                    unsigned int height = h;
                    unsigned int stride = w * components;

                    QByteArray imageDataBuffer(components * width * height, 0);
                    for (unsigned int row = 0; row < h; ++row)
                    {
                        for (unsigned int col = 0; col < w; ++col)
                        {
                            for (unsigned int componentIndex = 0; componentIndex < components; ++ componentIndex)
                            {
                                int index = stride * row + col * components + componentIndex;
                                Q_ASSERT(index < imageDataBuffer.size());

                                imageDataBuffer[index] = transformValue(jpegImage->comps[componentIndex].data[w * row + col]);
                            }
                        }
                    }

                    image.m_imageData = PDFImageData(components, bitsPerComponent, width, height, stride, maskingType, qMove(imageDataBuffer), qMove(mask), qMove(decode));
                    valid = image.m_imageData.isValid();
                }
                else
                {
                    // Easiest way is to just add errors to the error list
                    imageData.errors.push_back(PDFRenderError(RenderErrorType::Error, PDFTranslationContext::tr("Incompatible color components for JPEG 2000 image.")));
                }

                opj_image_destroy(jpegImage);

                if (valid)
                {
                    // Image was successfully decoded
                    break;
                }
            }
        }

        // Report errors, if we have any
        if (!imageData.errors.empty())
        {
            for (const PDFRenderError& error : imageData.errors)
            {
                QString message = error.message.simplified().trimmed();
                if (error.type == RenderErrorType::Error)
                {
                    throw PDFRendererException(error.type, message);
                }
                else
                {
                    errorReporter->reportRenderError(error.type, message);
                }
            }
        }
    }
    else if (imageFilterName == "CCITTFaxDecode")
    {
        throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'CCITFaxDecode'."));
    }
    else if (imageFilterName == "JBIG2Decode")
    {
        throw PDFRendererException(RenderErrorType::NotImplemented, PDFTranslationContext::tr("Not implemented image filter 'JBIG2Decode'."));
    }
    else if (colorSpace)
    {
        // We treat data as binary maybe compressed stream (for example by Flate/LZW method), but data can also be not compressed.
        const unsigned int components = static_cast<unsigned int>(colorSpace->getColorComponentCount());
        const unsigned int bitsPerComponent = static_cast<unsigned int>(loader.readIntegerFromDictionary(dictionary, "BitsPerComponent", 8));
        const unsigned int width = static_cast<unsigned int>(loader.readIntegerFromDictionary(dictionary, "Width", 0));
        const unsigned int height = static_cast<unsigned int>(loader.readIntegerFromDictionary(dictionary, "Height", 0));

        if (bitsPerComponent < 1 || bitsPerComponent > 32)
        {
            throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid number of bits per component (%1).").arg(bitsPerComponent));
        }

        if (width == 0 || height == 0)
        {
            throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid size of image (%1x%2)").arg(width).arg(height));
        }

        // Calculate stride
        const unsigned int stride = (components * bitsPerComponent * width + 7) / 8;

        QByteArray imageDataBuffer = document->getDecodedStream(stream);
        image.m_imageData = PDFImageData(components, bitsPerComponent, width, height, stride, maskingType, qMove(imageDataBuffer), qMove(mask), qMove(decode));
    }
    else if (imageMask)
    {
        // We intentionally have 8 bits in the following code, because if ImageMask is set to true, then "BitsPerComponent"
        // should have always value of 1.
        const unsigned int bitsPerComponent = static_cast<unsigned int>(loader.readIntegerFromDictionary(dictionary, "BitsPerComponent", 8));

        if (bitsPerComponent != 1)
        {
            throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid number bits of image mask (should be 1 bit instead of %1 bits).").arg(bitsPerComponent));
        }

        const unsigned int width = static_cast<unsigned int>(loader.readIntegerFromDictionary(dictionary, "Width", 0));
        const unsigned int height = static_cast<unsigned int>(loader.readIntegerFromDictionary(dictionary, "Height", 0));

        if (width == 0 || height == 0)
        {
            throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid size of image (%1x%2)").arg(width).arg(height));
        }

        // Calculate stride
        const unsigned int stride = (width + 7) / 8;

        QByteArray imageDataBuffer = document->getDecodedStream(stream);
        image.m_imageData = PDFImageData(1, bitsPerComponent, width, height, stride, maskingType, qMove(imageDataBuffer), qMove(mask), qMove(decode));
    }

    return image;
}

QImage PDFImage::getImage() const
{
    const bool isImageMask = m_imageData.getMaskingType() == PDFImageData::MaskingType::ImageMask;
    if (m_colorSpace && !isImageMask)
    {
        return m_colorSpace->getImage(m_imageData);
    }
    else if (isImageMask)
    {
        if (m_imageData.getBitsPerComponent() != 1)
        {
            throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid number bits of image mask (should be 1 bit instead of %1 bits).").arg(m_imageData.getBitsPerComponent()));
        }

        if (m_imageData.getWidth() == 0 || m_imageData.getHeight() == 0)
        {
            throw PDFRendererException(RenderErrorType::Error, PDFTranslationContext::tr("Invalid size of image (%1x%2)").arg(m_imageData.getWidth()).arg(m_imageData.getHeight()));
        }

        QImage image(m_imageData.getWidth(), m_imageData.getHeight(), QImage::Format_Alpha8);

        const bool flip01 = !m_imageData.getDecode().empty() && qFuzzyCompare(m_imageData.getDecode().front(), 1.0);
        PDFBitReader reader(&m_imageData.getData(), m_imageData.getBitsPerComponent());

        for (unsigned int i = 0, rowCount = m_imageData.getHeight(); i < rowCount; ++i)
        {
            reader.seek(i * m_imageData.getStride());
            unsigned char* outputLine = image.scanLine(i);

            for (unsigned int j = 0; j < m_imageData.getWidth(); ++j)
            {
                const bool transparent = flip01 != static_cast<bool>(reader.read());
                *outputLine++ = transparent ? 0x00 : 0xFF;
            }
        }

        return image;
    }

    return QImage();
}

OPJ_SIZE_T PDFJPEG2000ImageData::read(void* p_buffer, OPJ_SIZE_T p_nb_bytes, void* p_user_data)
{
    PDFJPEG2000ImageData* data = reinterpret_cast<PDFJPEG2000ImageData*>(p_user_data);

    // Remaining length
    OPJ_OFF_T length = static_cast<OPJ_OFF_T>(data->byteArray->size()) - data->position;

    if (length < 0)
    {
        length = 0;
    }

    if (length > static_cast<OPJ_OFF_T>(p_nb_bytes))
    {
        length = static_cast<OPJ_OFF_T>(p_nb_bytes);
    }

    if (length > 0)
    {
        std::memcpy(p_buffer, data->byteArray->constData() + data->position, length);
        data->position += length;
    }

    if (length == 0)
    {
        return (OPJ_SIZE_T) - 1;
    }

    return length;
}

OPJ_BOOL PDFJPEG2000ImageData::seek(OPJ_OFF_T p_nb_bytes, void* p_user_data)
{
    PDFJPEG2000ImageData* data = reinterpret_cast<PDFJPEG2000ImageData*>(p_user_data);

    if (p_nb_bytes >= data->byteArray->size())
    {
        return OPJ_FALSE;
    }

    data->position = p_nb_bytes;
    return OPJ_TRUE;
}

OPJ_OFF_T PDFJPEG2000ImageData::skip(OPJ_OFF_T p_nb_bytes, void* p_user_data)
{
    PDFJPEG2000ImageData* data = reinterpret_cast<PDFJPEG2000ImageData*>(p_user_data);

    // Remaining length
    OPJ_OFF_T length = static_cast<OPJ_OFF_T>(data->byteArray->size()) - data->position;

    if (length < 0)
    {
        length = 0;
    }

    if (length > static_cast<OPJ_OFF_T>(p_nb_bytes))
    {
        length = static_cast<OPJ_OFF_T>(p_nb_bytes);
    }

    data->position += length;
    return length;
}

}   // namespace pdf