525 lines
18 KiB
C++
525 lines
18 KiB
C++
// Copyright (c) 2010, Google Inc.
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
|
|
|
|
// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
|
|
// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
|
|
|
|
#include "common/mac/macho_reader.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
namespace google_breakpad {
|
|
namespace mach_o {
|
|
|
|
// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
|
|
// arguments, so you can't place expressions that do necessary work in
|
|
// the argument of an assert. Nor can you assign the result of the
|
|
// expression to a variable and assert that the variable's value is
|
|
// true: you'll get unused variable warnings when NDEBUG is #defined.
|
|
//
|
|
// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
|
|
// the result is true if NDEBUG is not #defined.
|
|
#if defined(NDEBUG)
|
|
#define ASSERT_ALWAYS_EVAL(x) (x)
|
|
#else
|
|
#define ASSERT_ALWAYS_EVAL(x) assert(x)
|
|
#endif
|
|
|
|
void FatReader::Reporter::BadHeader() {
|
|
fprintf(stderr, "%s: file is neither a fat binary file"
|
|
" nor a Mach-O object file\n", filename_.c_str());
|
|
}
|
|
|
|
void FatReader::Reporter::TooShort() {
|
|
fprintf(stderr, "%s: file too short for the data it claims to contain\n",
|
|
filename_.c_str());
|
|
}
|
|
|
|
void FatReader::Reporter::MisplacedObjectFile() {
|
|
fprintf(stderr, "%s: file too short for the object files it claims"
|
|
" to contain\n", filename_.c_str());
|
|
}
|
|
|
|
bool FatReader::Read(const uint8_t *buffer, size_t size) {
|
|
buffer_.start = buffer;
|
|
buffer_.end = buffer + size;
|
|
ByteCursor cursor(&buffer_);
|
|
|
|
// Fat binaries always use big-endian, so read the magic number in
|
|
// that endianness. To recognize Mach-O magic numbers, which can use
|
|
// either endianness, check for both the proper and reversed forms
|
|
// of the magic numbers.
|
|
cursor.set_big_endian(true);
|
|
if (cursor >> magic_) {
|
|
if (magic_ == FAT_MAGIC) {
|
|
// How many object files does this fat binary contain?
|
|
uint32_t object_files_count;
|
|
if (!(cursor >> object_files_count)) { // nfat_arch
|
|
reporter_->TooShort();
|
|
return false;
|
|
}
|
|
|
|
// Read the list of object files.
|
|
object_files_.resize(object_files_count);
|
|
for (size_t i = 0; i < object_files_count; i++) {
|
|
struct fat_arch *objfile = &object_files_[i];
|
|
|
|
// Read this object file entry, byte-swapping as appropriate.
|
|
cursor >> objfile->cputype
|
|
>> objfile->cpusubtype
|
|
>> objfile->offset
|
|
>> objfile->size
|
|
>> objfile->align;
|
|
if (!cursor) {
|
|
reporter_->TooShort();
|
|
return false;
|
|
}
|
|
// Does the file actually have the bytes this entry refers to?
|
|
size_t fat_size = buffer_.Size();
|
|
if (objfile->offset > fat_size ||
|
|
objfile->size > fat_size - objfile->offset) {
|
|
reporter_->MisplacedObjectFile();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
} else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
|
|
magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
|
|
// If this is a little-endian Mach-O file, fix the cursor's endianness.
|
|
if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
|
|
cursor.set_big_endian(false);
|
|
// Record the entire file as a single entry in the object file list.
|
|
object_files_.resize(1);
|
|
|
|
// Get the cpu type and subtype from the Mach-O header.
|
|
if (!(cursor >> object_files_[0].cputype
|
|
>> object_files_[0].cpusubtype)) {
|
|
reporter_->TooShort();
|
|
return false;
|
|
}
|
|
|
|
object_files_[0].offset = 0;
|
|
object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
|
|
// This alignment is correct for 32 and 64-bit x86 and ppc.
|
|
// See get_align in the lipo source for other architectures:
|
|
// http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
|
|
object_files_[0].align = 12; // 2^12 == 4096
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
reporter_->BadHeader();
|
|
return false;
|
|
}
|
|
|
|
void Reader::Reporter::BadHeader() {
|
|
fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
|
|
cpu_subtype_t cpu_subtype,
|
|
cpu_type_t expected_cpu_type,
|
|
cpu_subtype_t expected_cpu_subtype) {
|
|
fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
|
|
" type %d, subtype %d\n",
|
|
filename_.c_str(), cpu_type, cpu_subtype,
|
|
expected_cpu_type, expected_cpu_subtype);
|
|
}
|
|
|
|
void Reader::Reporter::HeaderTruncated() {
|
|
fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
|
|
filename_.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::LoadCommandRegionTruncated() {
|
|
fprintf(stderr, "%s: file too short to hold load command region"
|
|
" given in Mach-O header\n", filename_.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
|
|
LoadCommandType type) {
|
|
fprintf(stderr, "%s: file's header claims there are %ld"
|
|
" load commands, but load command #%ld",
|
|
filename_.c_str(), claimed, i);
|
|
if (type) fprintf(stderr, ", of type %d,", type);
|
|
fprintf(stderr, " extends beyond the end of the load command region\n");
|
|
}
|
|
|
|
void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
|
|
fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
|
|
" extend beyond the size given in the load command's header\n",
|
|
filename_.c_str(), i, type);
|
|
}
|
|
|
|
void Reader::Reporter::SectionsMissing(const string &name) {
|
|
fprintf(stderr, "%s: the load command for segment '%s'"
|
|
" is too short to hold the section headers it claims to have\n",
|
|
filename_.c_str(), name.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::MisplacedSegmentData(const string &name) {
|
|
fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
|
|
" the end of the file\n", filename_.c_str(), name.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::MisplacedSectionData(const string §ion,
|
|
const string &segment) {
|
|
fprintf(stderr, "%s: the section '%s' in segment '%s'"
|
|
" claims its contents lie outside the segment's contents\n",
|
|
filename_.c_str(), section.c_str(), segment.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::MisplacedSymbolTable() {
|
|
fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
|
|
" table's contents are located beyond the end of the file\n",
|
|
filename_.c_str());
|
|
}
|
|
|
|
void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
|
|
fprintf(stderr, "%s: CPU type %d is not supported\n",
|
|
filename_.c_str(), cpu_type);
|
|
}
|
|
|
|
bool Reader::Read(const uint8_t *buffer,
|
|
size_t size,
|
|
cpu_type_t expected_cpu_type,
|
|
cpu_subtype_t expected_cpu_subtype) {
|
|
assert(!buffer_.start);
|
|
buffer_.start = buffer;
|
|
buffer_.end = buffer + size;
|
|
ByteCursor cursor(&buffer_, true);
|
|
uint32_t magic;
|
|
if (!(cursor >> magic)) {
|
|
reporter_->HeaderTruncated();
|
|
return false;
|
|
}
|
|
|
|
if (expected_cpu_type != CPU_TYPE_ANY) {
|
|
uint32_t expected_magic;
|
|
// validate that magic matches the expected cpu type
|
|
switch (expected_cpu_type) {
|
|
case CPU_TYPE_I386:
|
|
expected_magic = MH_CIGAM;
|
|
break;
|
|
case CPU_TYPE_POWERPC:
|
|
expected_magic = MH_MAGIC;
|
|
break;
|
|
case CPU_TYPE_X86_64:
|
|
expected_magic = MH_CIGAM_64;
|
|
break;
|
|
case CPU_TYPE_POWERPC64:
|
|
expected_magic = MH_MAGIC_64;
|
|
break;
|
|
default:
|
|
reporter_->UnsupportedCPUType(expected_cpu_type);
|
|
return false;
|
|
}
|
|
|
|
if (expected_magic != magic) {
|
|
reporter_->BadHeader();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Since the byte cursor is in big-endian mode, a reversed magic number
|
|
// always indicates a little-endian file, regardless of our own endianness.
|
|
switch (magic) {
|
|
case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
|
|
case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
|
|
case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
|
|
case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
|
|
default:
|
|
reporter_->BadHeader();
|
|
return false;
|
|
}
|
|
cursor.set_big_endian(big_endian_);
|
|
uint32_t commands_size, reserved;
|
|
cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
|
|
>> commands_size >> flags_;
|
|
if (bits_64_)
|
|
cursor >> reserved;
|
|
if (!cursor) {
|
|
reporter_->HeaderTruncated();
|
|
return false;
|
|
}
|
|
|
|
if (expected_cpu_type != CPU_TYPE_ANY &&
|
|
(expected_cpu_type != cpu_type_ ||
|
|
expected_cpu_subtype != cpu_subtype_)) {
|
|
reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
|
|
expected_cpu_type, expected_cpu_subtype);
|
|
return false;
|
|
}
|
|
|
|
cursor
|
|
.PointTo(&load_commands_.start, commands_size)
|
|
.PointTo(&load_commands_.end, 0);
|
|
if (!cursor) {
|
|
reporter_->LoadCommandRegionTruncated();
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
|
|
ByteCursor list_cursor(&load_commands_, big_endian_);
|
|
|
|
for (size_t index = 0; index < load_command_count_; ++index) {
|
|
// command refers to this load command alone, so that cursor will
|
|
// refuse to read past the load command's end. But since we haven't
|
|
// read the size yet, let command initially refer to the entire
|
|
// remainder of the load command series.
|
|
ByteBuffer command(list_cursor.here(), list_cursor.Available());
|
|
ByteCursor cursor(&command, big_endian_);
|
|
|
|
// Read the command type and size --- fields common to all commands.
|
|
uint32_t type, size;
|
|
if (!(cursor >> type)) {
|
|
reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
|
|
return false;
|
|
}
|
|
if (!(cursor >> size) || size > command.Size()) {
|
|
reporter_->LoadCommandsOverrun(load_command_count_, index, type);
|
|
return false;
|
|
}
|
|
|
|
// Now that we've read the length, restrict command's range to this
|
|
// load command only.
|
|
command.end = command.start + size;
|
|
|
|
switch (type) {
|
|
case LC_SEGMENT:
|
|
case LC_SEGMENT_64: {
|
|
Segment segment;
|
|
segment.bits_64 = (type == LC_SEGMENT_64);
|
|
size_t word_size = segment.bits_64 ? 8 : 4;
|
|
cursor.CString(&segment.name, 16);
|
|
size_t file_offset, file_size;
|
|
cursor
|
|
.Read(word_size, false, &segment.vmaddr)
|
|
.Read(word_size, false, &segment.vmsize)
|
|
.Read(word_size, false, &file_offset)
|
|
.Read(word_size, false, &file_size);
|
|
cursor >> segment.maxprot
|
|
>> segment.initprot
|
|
>> segment.nsects
|
|
>> segment.flags;
|
|
if (!cursor) {
|
|
reporter_->LoadCommandTooShort(index, type);
|
|
return false;
|
|
}
|
|
if (file_offset > buffer_.Size() ||
|
|
file_size > buffer_.Size() - file_offset) {
|
|
reporter_->MisplacedSegmentData(segment.name);
|
|
return false;
|
|
}
|
|
// Mach-O files in .dSYM bundles have the contents of the loaded
|
|
// segments removed, and their file offsets and file sizes zeroed
|
|
// out. To help us handle this special case properly, give such
|
|
// segments' contents NULL starting and ending pointers.
|
|
if (file_offset == 0 && file_size == 0) {
|
|
segment.contents.start = segment.contents.end = NULL;
|
|
} else {
|
|
segment.contents.start = buffer_.start + file_offset;
|
|
segment.contents.end = segment.contents.start + file_size;
|
|
}
|
|
// The section list occupies the remainder of this load command's space.
|
|
segment.section_list.start = cursor.here();
|
|
segment.section_list.end = command.end;
|
|
|
|
if (!handler->SegmentCommand(segment))
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
case LC_SYMTAB: {
|
|
uint32_t symoff, nsyms, stroff, strsize;
|
|
cursor >> symoff >> nsyms >> stroff >> strsize;
|
|
if (!cursor) {
|
|
reporter_->LoadCommandTooShort(index, type);
|
|
return false;
|
|
}
|
|
// How big are the entries in the symbol table?
|
|
// sizeof(struct nlist_64) : sizeof(struct nlist),
|
|
// but be paranoid about alignment vs. target architecture.
|
|
size_t symbol_size = bits_64_ ? 16 : 12;
|
|
// How big is the entire symbol array?
|
|
size_t symbols_size = nsyms * symbol_size;
|
|
if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
|
|
stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
|
|
reporter_->MisplacedSymbolTable();
|
|
return false;
|
|
}
|
|
ByteBuffer entries(buffer_.start + symoff, symbols_size);
|
|
ByteBuffer names(buffer_.start + stroff, strsize);
|
|
if (!handler->SymtabCommand(entries, names))
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
if (!handler->UnknownCommand(type, command))
|
|
return false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
list_cursor.set_here(command.end);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// A load command handler that looks for a segment of a given name.
|
|
class Reader::SegmentFinder : public LoadCommandHandler {
|
|
public:
|
|
// Create a load command handler that looks for a segment named NAME,
|
|
// and sets SEGMENT to describe it if found.
|
|
SegmentFinder(const string &name, Segment *segment)
|
|
: name_(name), segment_(segment), found_() { }
|
|
|
|
// Return true if the traversal found the segment, false otherwise.
|
|
bool found() const { return found_; }
|
|
|
|
bool SegmentCommand(const Segment &segment) {
|
|
if (segment.name == name_) {
|
|
*segment_ = segment;
|
|
found_ = true;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
// The name of the segment our creator is looking for.
|
|
const string &name_;
|
|
|
|
// Where we should store the segment if found. (WEAK)
|
|
Segment *segment_;
|
|
|
|
// True if we found the segment.
|
|
bool found_;
|
|
};
|
|
|
|
bool Reader::FindSegment(const string &name, Segment *segment) const {
|
|
SegmentFinder finder(name, segment);
|
|
WalkLoadCommands(&finder);
|
|
return finder.found();
|
|
}
|
|
|
|
bool Reader::WalkSegmentSections(const Segment &segment,
|
|
SectionHandler *handler) const {
|
|
size_t word_size = segment.bits_64 ? 8 : 4;
|
|
ByteCursor cursor(&segment.section_list, big_endian_);
|
|
|
|
for (size_t i = 0; i < segment.nsects; i++) {
|
|
Section section;
|
|
section.bits_64 = segment.bits_64;
|
|
uint64_t size;
|
|
uint32_t offset, dummy32;
|
|
cursor
|
|
.CString(§ion.section_name, 16)
|
|
.CString(§ion.segment_name, 16)
|
|
.Read(word_size, false, §ion.address)
|
|
.Read(word_size, false, &size)
|
|
>> offset
|
|
>> section.align
|
|
>> dummy32
|
|
>> dummy32
|
|
>> section.flags
|
|
>> dummy32
|
|
>> dummy32;
|
|
if (section.bits_64)
|
|
cursor >> dummy32;
|
|
if (!cursor) {
|
|
reporter_->SectionsMissing(segment.name);
|
|
return false;
|
|
}
|
|
if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
|
|
// Zero-fill sections have a size, but no contents.
|
|
section.contents.start = section.contents.end = NULL;
|
|
} else if (segment.contents.start == NULL &&
|
|
segment.contents.end == NULL) {
|
|
// Mach-O files in .dSYM bundles have the contents of the loaded
|
|
// segments removed, and their file offsets and file sizes zeroed
|
|
// out. However, the sections within those segments still have
|
|
// non-zero sizes. There's no reason to call MisplacedSectionData in
|
|
// this case; the caller may just need the section's load
|
|
// address. But do set the contents' limits to NULL, for safety.
|
|
section.contents.start = section.contents.end = NULL;
|
|
} else {
|
|
if (offset < size_t(segment.contents.start - buffer_.start) ||
|
|
offset > size_t(segment.contents.end - buffer_.start) ||
|
|
size > size_t(segment.contents.end - buffer_.start - offset)) {
|
|
reporter_->MisplacedSectionData(section.section_name,
|
|
section.segment_name);
|
|
return false;
|
|
}
|
|
section.contents.start = buffer_.start + offset;
|
|
section.contents.end = section.contents.start + size;
|
|
}
|
|
if (!handler->HandleSection(section))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// A SectionHandler that builds a SectionMap for the sections within a
|
|
// given segment.
|
|
class Reader::SectionMapper: public SectionHandler {
|
|
public:
|
|
// Create a SectionHandler that populates MAP with an entry for
|
|
// each section it is given.
|
|
SectionMapper(SectionMap *map) : map_(map) { }
|
|
bool HandleSection(const Section §ion) {
|
|
(*map_)[section.section_name] = section;
|
|
return true;
|
|
}
|
|
private:
|
|
// The map under construction. (WEAK)
|
|
SectionMap *map_;
|
|
};
|
|
|
|
bool Reader::MapSegmentSections(const Segment &segment,
|
|
SectionMap *section_map) const {
|
|
section_map->clear();
|
|
SectionMapper mapper(section_map);
|
|
return WalkSegmentSections(segment, &mapper);
|
|
}
|
|
|
|
} // namespace mach_o
|
|
} // namespace google_breakpad
|