242 lines
5.3 KiB
Go
242 lines
5.3 KiB
Go
// Package magic holds the matching functions used to find MIME types.
|
||
package magic
|
||
|
||
import (
|
||
"bytes"
|
||
"fmt"
|
||
)
|
||
|
||
type (
|
||
// Detector receiveѕ the raw data of a file and returns whether the data
|
||
// meets any conditions. The limit parameter is an upper limit to the number
|
||
// of bytes received and is used to tell if the byte slice represents the
|
||
// whole file or is just the header of a file: len(raw) < limit or len(raw)>limit.
|
||
Detector func(raw []byte, limit uint32) bool
|
||
xmlSig struct {
|
||
// the local name of the root tag
|
||
localName []byte
|
||
// the namespace of the XML document
|
||
xmlns []byte
|
||
}
|
||
)
|
||
|
||
// prefix creates a Detector which returns true if any of the provided signatures
|
||
// is the prefix of the raw input.
|
||
func prefix(sigs ...[]byte) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
for _, s := range sigs {
|
||
if bytes.HasPrefix(raw, s) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// offset creates a Detector which returns true if the provided signature can be
|
||
// found at offset in the raw input.
|
||
func offset(sig []byte, offset int) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig)
|
||
}
|
||
}
|
||
|
||
// ciPrefix is like prefix but the check is case insensitive.
|
||
func ciPrefix(sigs ...[]byte) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
for _, s := range sigs {
|
||
if ciCheck(s, raw) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
func ciCheck(sig, raw []byte) bool {
|
||
if len(raw) < len(sig)+1 {
|
||
return false
|
||
}
|
||
// perform case insensitive check
|
||
for i, b := range sig {
|
||
db := raw[i]
|
||
if 'A' <= b && b <= 'Z' {
|
||
db &= 0xDF
|
||
}
|
||
if b != db {
|
||
return false
|
||
}
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
// xml creates a Detector which returns true if any of the provided XML signatures
|
||
// matches the raw input.
|
||
func xml(sigs ...xmlSig) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
raw = trimLWS(raw)
|
||
if len(raw) == 0 {
|
||
return false
|
||
}
|
||
for _, s := range sigs {
|
||
if xmlCheck(s, raw) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
func xmlCheck(sig xmlSig, raw []byte) bool {
|
||
raw = raw[:min(len(raw), 512)]
|
||
|
||
if len(sig.localName) == 0 {
|
||
return bytes.Index(raw, sig.xmlns) > 0
|
||
}
|
||
if len(sig.xmlns) == 0 {
|
||
return bytes.Index(raw, sig.localName) > 0
|
||
}
|
||
|
||
localNameIndex := bytes.Index(raw, sig.localName)
|
||
return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns)
|
||
}
|
||
|
||
// markup creates a Detector which returns true is any of the HTML signatures
|
||
// matches the raw input.
|
||
func markup(sigs ...[]byte) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
if bytes.HasPrefix(raw, []byte{0xEF, 0xBB, 0xBF}) {
|
||
// We skip the UTF-8 BOM if present to ensure we correctly
|
||
// process any leading whitespace. The presence of the BOM
|
||
// is taken into account during charset detection in charset.go.
|
||
raw = trimLWS(raw[3:])
|
||
} else {
|
||
raw = trimLWS(raw)
|
||
}
|
||
if len(raw) == 0 {
|
||
return false
|
||
}
|
||
for _, s := range sigs {
|
||
if markupCheck(s, raw) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
func markupCheck(sig, raw []byte) bool {
|
||
if len(raw) < len(sig)+1 {
|
||
return false
|
||
}
|
||
|
||
// perform case insensitive check
|
||
for i, b := range sig {
|
||
db := raw[i]
|
||
if 'A' <= b && b <= 'Z' {
|
||
db &= 0xDF
|
||
}
|
||
if b != db {
|
||
return false
|
||
}
|
||
}
|
||
// Next byte must be space or right angle bracket.
|
||
if db := raw[len(sig)]; db != ' ' && db != '>' {
|
||
return false
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
// ftyp creates a Detector which returns true if any of the FTYP signatures
|
||
// matches the raw input.
|
||
func ftyp(sigs ...[]byte) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
if len(raw) < 12 {
|
||
return false
|
||
}
|
||
for _, s := range sigs {
|
||
if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
func newXMLSig(localName, xmlns string) xmlSig {
|
||
ret := xmlSig{xmlns: []byte(xmlns)}
|
||
if localName != "" {
|
||
ret.localName = []byte(fmt.Sprintf("<%s", localName))
|
||
}
|
||
|
||
return ret
|
||
}
|
||
|
||
// A valid shebang starts with the "#!" characters,
|
||
// followed by any number of spaces,
|
||
// followed by the path to the interpreter,
|
||
// and, optionally, followed by the arguments for the interpreter.
|
||
//
|
||
// Ex:
|
||
//
|
||
// #! /usr/bin/env php
|
||
//
|
||
// /usr/bin/env is the interpreter, php is the first and only argument.
|
||
func shebang(sigs ...[]byte) Detector {
|
||
return func(raw []byte, limit uint32) bool {
|
||
for _, s := range sigs {
|
||
if shebangCheck(s, firstLine(raw)) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
func shebangCheck(sig, raw []byte) bool {
|
||
if len(raw) < len(sig)+2 {
|
||
return false
|
||
}
|
||
if raw[0] != '#' || raw[1] != '!' {
|
||
return false
|
||
}
|
||
|
||
return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig)
|
||
}
|
||
|
||
// trimLWS trims whitespace from beginning of the input.
|
||
func trimLWS(in []byte) []byte {
|
||
firstNonWS := 0
|
||
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
|
||
}
|
||
|
||
return in[firstNonWS:]
|
||
}
|
||
|
||
// trimRWS trims whitespace from the end of the input.
|
||
func trimRWS(in []byte) []byte {
|
||
lastNonWS := len(in) - 1
|
||
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
|
||
}
|
||
|
||
return in[:lastNonWS+1]
|
||
}
|
||
|
||
func firstLine(in []byte) []byte {
|
||
lineEnd := 0
|
||
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
|
||
}
|
||
|
||
return in[:lineEnd]
|
||
}
|
||
|
||
func isWS(b byte) bool {
|
||
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
|
||
}
|
||
|
||
func min(a, b int) int {
|
||
if a < b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|