[feature] support processing of (many) more media types (#3090)

* initial work replacing our media decoding / encoding pipeline with ffprobe + ffmpeg

* specify the video codec to use when generating static image from emoji

* update go-storage library (fixes incompatibility after updating go-iotools)

* maintain image aspect ratio when generating a thumbnail for it

* update readme to show go-ffmpreg

* fix a bunch of media tests, move filesize checking to callers of media manager for more flexibility

* remove extra debug from error message

* fix up incorrect function signatures

* update PutFile to just use regular file copy, as changes are file is on separate partition

* fix remaining tests, remove some unneeded tests now we're working with ffmpeg/ffprobe

* update more tests, add more code comments

* add utilities to generate processed emoji / media outputs

* fix remaining tests

* add test for opus media file, add license header to utility cmds

* limit the number of concurrently available ffmpeg / ffprobe instances

* reduce number of instances

* further reduce number of instances

* fix envparsing test with configuration variables

* update docs and configuration with new media-{local,remote}-max-size variables
This commit is contained in:
kim
2024-07-12 09:39:47 +00:00
committed by GitHub
parent 5bc567196b
commit cde2fb6244
376 changed files with 8026 additions and 54091 deletions

View File

View File

@ -1,14 +0,0 @@
language: go
go:
- master
- stable
- "1.13"
- "1.12"
env:
- GO111MODULE=on
install:
- go get -t ./...
- go get github.com/mattn/goveralls
script:
- go test -v ./...
- goveralls -v -service=travis-ci

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2020 Dustin Oprea
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,8 +0,0 @@
[![Build Status](https://travis-ci.org/dsoprea/go-iptc.svg?branch=master)](https://travis-ci.org/dsoprea/go-iptc)
[![Coverage Status](https://coveralls.io/repos/github/dsoprea/go-iptc/badge.svg?branch=master)](https://coveralls.io/github/dsoprea/go-iptc?branch=master)
[![Go Report Card](https://goreportcard.com/badge/github.com/dsoprea/go-iptc)](https://goreportcard.com/report/github.com/dsoprea/go-iptc)
[![GoDoc](https://godoc.org/github.com/dsoprea/go-iptc?status.svg)](https://godoc.org/github.com/dsoprea/go-iptc)
# Overview
This project provides functionality to parse a series of IPTC records/datasets. It also provides name resolution, but other constraints/validation is not yet implemented (though there is structure present that can accommodate this when desired/required).

View File

@ -1,101 +0,0 @@
package iptc
import (
"errors"
)
// StreamTagInfo encapsulates the properties of each tag.
type StreamTagInfo struct {
// Description is the human-readable description of the tag.
Description string
}
var (
standardTags = map[StreamTagKey]StreamTagInfo{
{1, 120}: {"ARM Identifier"},
{1, 122}: {"ARM Version"},
{2, 0}: {"Record Version"},
{2, 3}: {"Object Type Reference"},
{2, 4}: {"Object Attribute Reference"},
{2, 5}: {"Object Name"},
{2, 7}: {"Edit Status"},
{2, 8}: {"Editorial Update"},
{2, 10}: {"Urgency"},
{2, 12}: {"Subject Reference"},
{2, 15}: {"Category"},
{2, 20}: {"Supplemental Category"},
{2, 22}: {"Fixture Identifier"},
{2, 25}: {"Keywords"},
{2, 26}: {"Content Location Code"},
{2, 27}: {"Content Location Name"},
{2, 30}: {"Release Date"},
{2, 35}: {"Release Time"},
{2, 37}: {"Expiration Date"},
{2, 38}: {"Expiration Time"},
{2, 40}: {"Special Instructions"},
{2, 42}: {"Action Advised"},
{2, 45}: {"Reference Service"},
{2, 47}: {"Reference Date"},
{2, 50}: {"Reference Number"},
{2, 55}: {"Date Created"},
{2, 60}: {"Time Created"},
{2, 62}: {"Digital Creation Date"},
{2, 63}: {"Digital Creation Time"},
{2, 65}: {"Originating Program"},
{2, 70}: {"Program Version"},
{2, 75}: {"Object Cycle"},
{2, 80}: {"By-line"},
{2, 85}: {"By-line Title"},
{2, 90}: {"City"},
{2, 92}: {"Sublocation"},
{2, 95}: {"Province/State"},
{2, 100}: {"Country/Primary Location Code"},
{2, 101}: {"Country/Primary Location Name"},
{2, 103}: {"Original Transmission Reference"},
{2, 105}: {"Headline"},
{2, 110}: {"Credit"},
{2, 115}: {"Source"},
{2, 116}: {"Copyright Notice"},
{2, 118}: {"Contact"},
{2, 120}: {"Caption/Abstract"},
{2, 122}: {"Writer/Editor"},
{2, 125}: {"Rasterized Caption"},
{2, 130}: {"Image Type"},
{2, 131}: {"Image Orientation"},
{2, 135}: {"Language Identifier"},
{2, 150}: {"Audio Type"},
{2, 151}: {"Audio Sampling Rate"},
{2, 152}: {"Audio Sampling Resolution"},
{2, 153}: {"Audio Duration"},
{2, 154}: {"Audio Outcue"},
{2, 200}: {"ObjectData Preview File Format"},
{2, 201}: {"ObjectData Preview File Format Version"},
{2, 202}: {"ObjectData Preview Data"},
{7, 10}: {"Size Mode"},
{7, 20}: {"Max Subfile Size"},
{7, 90}: {"ObjectData Size Announced"},
{7, 95}: {"Maximum ObjectData Size"},
{8, 10}: {"Subfile"},
{9, 10}: {"Confirmed ObjectData Size"},
}
)
var (
// ErrTagNotStandard indicates that the given tag is not known among the
// documented standard set.
ErrTagNotStandard = errors.New("not a standard tag")
)
// GetTagInfo return the info for the given tag. Returns ErrTagNotStandard if
// not known.
func GetTagInfo(recordNumber, datasetNumber int) (sti StreamTagInfo, err error) {
stk := StreamTagKey{uint8(recordNumber), uint8(datasetNumber)}
sti, found := standardTags[stk]
if found == false {
return sti, ErrTagNotStandard
}
return sti, nil
}

View File

@ -1,277 +0,0 @@
package iptc
import (
"errors"
"fmt"
"io"
"strings"
"unicode"
"encoding/binary"
"github.com/dsoprea/go-logging"
)
var (
// TODO(dustin): We're still not sure if this is the right endianness. No search to IPTC or IIM seems to state one or the other.
// DefaultEncoding is the standard encoding for the IPTC format.
defaultEncoding = binary.BigEndian
)
var (
// ErrInvalidTagMarker indicates that the tag can not be parsed because the
// tag boundary marker is not the expected value.
ErrInvalidTagMarker = errors.New("invalid tag marker")
)
// Tag describes one tag read from the stream.
type Tag struct {
recordNumber uint8
datasetNumber uint8
dataSize uint64
}
// String expresses state as a string.
func (tag *Tag) String() string {
return fmt.Sprintf(
"Tag<DATASET=(%d:%d) DATA-SIZE=(%d)>",
tag.recordNumber, tag.datasetNumber, tag.dataSize)
}
// DecodeTag parses one tag from the stream.
func DecodeTag(r io.Reader) (tag Tag, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
tagMarker := uint8(0)
err = binary.Read(r, defaultEncoding, &tagMarker)
if err != nil {
if err == io.EOF {
return tag, err
}
log.Panic(err)
}
if tagMarker != 0x1c {
return tag, ErrInvalidTagMarker
}
recordNumber := uint8(0)
err = binary.Read(r, defaultEncoding, &recordNumber)
log.PanicIf(err)
datasetNumber := uint8(0)
err = binary.Read(r, defaultEncoding, &datasetNumber)
log.PanicIf(err)
dataSize16Raw := uint16(0)
err = binary.Read(r, defaultEncoding, &dataSize16Raw)
log.PanicIf(err)
var dataSize uint64
if dataSize16Raw < 32768 {
// We only had 16-bits (has the MSB set to (0)).
dataSize = uint64(dataSize16Raw)
} else {
// This field is just the length of the length (has the MSB set to (1)).
// Clear the MSB.
lengthLength := dataSize16Raw & 32767
if lengthLength == 4 {
dataSize32Raw := uint32(0)
err := binary.Read(r, defaultEncoding, &dataSize32Raw)
log.PanicIf(err)
dataSize = uint64(dataSize32Raw)
} else if lengthLength == 8 {
err := binary.Read(r, defaultEncoding, &dataSize)
log.PanicIf(err)
} else {
// No specific sizes or limits are specified in the specification
// so we need to impose our own limits in order to implement.
log.Panicf("extended data-set tag size is not supported: (%d)", lengthLength)
}
}
tag = Tag{
recordNumber: recordNumber,
datasetNumber: datasetNumber,
dataSize: dataSize,
}
return tag, nil
}
// StreamTagKey is a convenience type that lets us key our index with a high-
// level type.
type StreamTagKey struct {
// RecordNumber is the major classification of the dataset.
RecordNumber uint8
// DatasetNumber is the minor classification of the dataset.
DatasetNumber uint8
}
// String returns a descriptive string.
func (stk StreamTagKey) String() string {
return fmt.Sprintf("%d:%d", stk.RecordNumber, stk.DatasetNumber)
}
// TagData is a convenience wrapper around a byte-slice.
type TagData []byte
// IsPrintable returns true if all characters are printable.
func (tg TagData) IsPrintable() bool {
for _, b := range tg {
r := rune(b)
// Newline characters aren't considered printable.
if r == 0x0d || r == 0x0a {
continue
}
if unicode.IsGraphic(r) == false || unicode.IsPrint(r) == false {
return false
}
}
return true
}
// String returns a descriptive string. If the data doesn't include any non-
// printable characters, it will include the value itself.
func (tg TagData) String() string {
if tg.IsPrintable() == true {
return string(tg)
}
return fmt.Sprintf("BINARY<(%d) bytes>", len(tg))
}
// ParsedTags is the complete, unordered set of tags parsed from the stream.
type ParsedTags map[StreamTagKey][]TagData
// ParseStream parses a serial sequence of tags and tag data out of the stream.
func ParseStream(r io.Reader) (tags map[StreamTagKey][]TagData, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
tags = make(ParsedTags)
for {
tag, err := DecodeTag(r)
if err != nil {
if err == io.EOF {
break
}
log.Panic(err)
}
raw := make([]byte, tag.dataSize)
_, err = io.ReadFull(r, raw)
log.PanicIf(err)
data := TagData(raw)
stk := StreamTagKey{
RecordNumber: tag.recordNumber,
DatasetNumber: tag.datasetNumber,
}
if existing, found := tags[stk]; found == true {
tags[stk] = append(existing, data)
} else {
tags[stk] = []TagData{data}
}
}
return tags, nil
}
// GetSimpleDictionaryFromParsedTags returns a dictionary of tag names to tag
// values, where all values are strings and any tag that had a non-printable
// value is omitted. We will also only return the first value, therefore
// dropping any follow-up values for repeatable tags. This will ignore non-
// standard tags. This will trim whitespace from the ends of strings.
//
// This is a convenience function for quickly displaying only the summary IPTC
// metadata that a user might actually be interested in at first glance.
func GetSimpleDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) {
distilled = make(map[string]string)
for stk, dataSlice := range pt {
sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber))
if err != nil {
if err == ErrTagNotStandard {
continue
} else {
log.Panic(err)
}
}
data := dataSlice[0]
if data.IsPrintable() == false {
continue
}
// TODO(dustin): Trim leading whitespace, too.
distilled[sti.Description] = strings.Trim(string(data), "\r\n")
}
return distilled
}
// GetDictionaryFromParsedTags returns all tags. It will keep non-printable
// values, though will not print a placeholder instead. This will keep non-
// standard tags (and print the fully-qualified dataset ID rather than the
// name). It will keep repeated values (with the counter value appended to the
// end).
func GetDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) {
distilled = make(map[string]string)
for stk, dataSlice := range pt {
var keyPhrase string
sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber))
if err != nil {
if err == ErrTagNotStandard {
keyPhrase = fmt.Sprintf("%s (not a standard tag)", stk.String())
} else {
log.Panic(err)
}
} else {
keyPhrase = sti.Description
}
for i, data := range dataSlice {
currentKeyPhrase := keyPhrase
if len(dataSlice) > 1 {
currentKeyPhrase = fmt.Sprintf("%s (%d)", currentKeyPhrase, i+1)
}
var presentable string
if data.IsPrintable() == false {
presentable = fmt.Sprintf("[BINARY] %s", DumpBytesToString(data))
} else {
presentable = string(data)
}
distilled[currentKeyPhrase] = presentable
}
}
return distilled
}

View File

@ -1,73 +0,0 @@
package iptc
import (
"os"
"path"
"github.com/dsoprea/go-logging"
)
var (
testDataRelFilepath = "iptc.data"
)
var (
moduleRootPath = ""
assetsPath = ""
)
// GetModuleRootPath returns the root-path of the module.
func GetModuleRootPath() string {
if moduleRootPath == "" {
moduleRootPath = os.Getenv("IPTC_MODULE_ROOT_PATH")
if moduleRootPath != "" {
return moduleRootPath
}
currentWd, err := os.Getwd()
log.PanicIf(err)
currentPath := currentWd
visited := make([]string, 0)
for {
tryStampFilepath := path.Join(currentPath, ".MODULE_ROOT")
_, err := os.Stat(tryStampFilepath)
if err != nil && os.IsNotExist(err) != true {
log.Panic(err)
} else if err == nil {
break
}
visited = append(visited, tryStampFilepath)
currentPath = path.Dir(currentPath)
if currentPath == "/" {
log.Panicf("could not find module-root: %v", visited)
}
}
moduleRootPath = currentPath
}
return moduleRootPath
}
// GetTestAssetsPath returns the path of the test-assets.
func GetTestAssetsPath() string {
if assetsPath == "" {
moduleRootPath := GetModuleRootPath()
assetsPath = path.Join(moduleRootPath, "assets")
}
return assetsPath
}
// GetTestDataFilepath returns the file-path of the common test-data.
func GetTestDataFilepath() string {
assetsPath := GetTestAssetsPath()
filepath := path.Join(assetsPath, testDataRelFilepath)
return filepath
}

View File

@ -1,25 +0,0 @@
package iptc
import (
"bytes"
"fmt"
"github.com/dsoprea/go-logging"
)
// DumpBytesToString returns a stringified list of hex-encoded bytes.
func DumpBytesToString(data []byte) string {
b := new(bytes.Buffer)
for i, x := range data {
_, err := b.WriteString(fmt.Sprintf("%02x", x))
log.PanicIf(err)
if i < len(data)-1 {
_, err := b.WriteRune(' ')
log.PanicIf(err)
}
}
return b.String()
}