326 lines
13 KiB
C++
326 lines
13 KiB
C++
// -*- mode: c++ -*-
|
|
|
|
// Copyright (c) 2010 Google Inc. All Rights Reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
|
|
|
|
// stabs_reader.h: Define StabsReader, a parser for STABS debugging
|
|
// information. A description of the STABS debugging format can be
|
|
// found at:
|
|
//
|
|
// http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
|
|
//
|
|
// The comments here assume you understand the format.
|
|
//
|
|
// This parser can handle big-endian and little-endian data, and the symbol
|
|
// values may be either 32 or 64 bits long. It handles both STABS in
|
|
// sections (as used on Linux) and STABS appearing directly in an
|
|
// a.out-like symbol table (as used in Darwin OS X Mach-O files).
|
|
|
|
#ifndef COMMON_STABS_READER_H__
|
|
#define COMMON_STABS_READER_H__
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_A_OUT_H
|
|
#include <a.out.h>
|
|
#endif
|
|
#ifdef HAVE_MACH_O_NLIST_H
|
|
#include <mach-o/nlist.h>
|
|
#endif
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "common/byte_cursor.h"
|
|
|
|
namespace google_breakpad {
|
|
|
|
class StabsHandler;
|
|
|
|
class StabsReader {
|
|
public:
|
|
// Create a reader for the STABS debug information whose .stab section is
|
|
// being traversed by ITERATOR, and whose .stabstr section is referred to
|
|
// by STRINGS. The reader will call the member functions of HANDLER to
|
|
// report the information it finds, when the reader's 'Process' member
|
|
// function is called.
|
|
//
|
|
// BIG_ENDIAN should be true if the entries in the .stab section are in
|
|
// big-endian form, or false if they are in little-endian form.
|
|
//
|
|
// VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
|
|
// field in each entry in bytes.
|
|
//
|
|
// UNITIZED should be true if the STABS data is stored in units with
|
|
// N_UNDF headers. This is usually the case for STABS stored in sections,
|
|
// like .stab/.stabstr, and usually not the case for STABS stored in the
|
|
// actual symbol table; UNITIZED should be true when parsing Linux stabs,
|
|
// false when parsing Mac OS X STABS. For details, see:
|
|
// http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
|
|
//
|
|
// Note that, in ELF, the .stabstr section should be found using the
|
|
// 'sh_link' field of the .stab section header, not by name.
|
|
StabsReader(const uint8_t *stab, size_t stab_size,
|
|
const uint8_t *stabstr, size_t stabstr_size,
|
|
bool big_endian, size_t value_size, bool unitized,
|
|
StabsHandler *handler);
|
|
|
|
// Process the STABS data, calling the handler's member functions to
|
|
// report what we find. While the handler functions return true,
|
|
// continue to process until we reach the end of the section. If we
|
|
// processed the entire section and all handlers returned true,
|
|
// return true. If any handler returned false, return false.
|
|
//
|
|
// This is only meant to be called once per StabsReader instance;
|
|
// resuming a prior processing pass that stopped abruptly isn't supported.
|
|
bool Process();
|
|
|
|
private:
|
|
|
|
// An class for walking arrays of STABS entries. This isolates the main
|
|
// STABS reader from the exact format (size; endianness) of the entries
|
|
// themselves.
|
|
class EntryIterator {
|
|
public:
|
|
// The contents of a STABS entry, adjusted for the host's endianness,
|
|
// word size, 'struct nlist' layout, and so on.
|
|
struct Entry {
|
|
// True if this iterator has reached the end of the entry array. When
|
|
// this is set, the other members of this structure are not valid.
|
|
bool at_end;
|
|
|
|
// The number of this entry within the list.
|
|
size_t index;
|
|
|
|
// The current entry's name offset. This is the offset within the
|
|
// current compilation unit's strings, as establish by the N_UNDF entries.
|
|
size_t name_offset;
|
|
|
|
// The current entry's type, 'other' field, descriptor, and value.
|
|
unsigned char type;
|
|
unsigned char other;
|
|
short descriptor;
|
|
uint64_t value;
|
|
};
|
|
|
|
// Create a EntryIterator walking the entries in BUFFER. Treat the
|
|
// entries as big-endian if BIG_ENDIAN is true, as little-endian
|
|
// otherwise. Assume each entry has a 'value' field whose size is
|
|
// VALUE_SIZE.
|
|
//
|
|
// This would not be terribly clean to extend to other format variations,
|
|
// but it's enough to handle Linux and Mac, and we'd like STABS to die
|
|
// anyway.
|
|
//
|
|
// For the record: on Linux, STABS entry values are always 32 bits,
|
|
// regardless of the architecture address size (don't ask me why); on
|
|
// Mac, they are 32 or 64 bits long. Oddly, the section header's entry
|
|
// size for a Linux ELF .stab section varies according to the ELF class
|
|
// from 12 to 20 even as the actual entries remain unchanged.
|
|
EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
|
|
|
|
// Move to the next entry. This function's behavior is undefined if
|
|
// at_end() is true when it is called.
|
|
EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
|
|
|
|
// Dereferencing this iterator produces a reference to an Entry structure
|
|
// that holds the current entry's values. The entry is owned by this
|
|
// EntryIterator, and will be invalidated at the next call to operator++.
|
|
const Entry &operator*() const { return entry_; }
|
|
const Entry *operator->() const { return &entry_; }
|
|
|
|
private:
|
|
// Read the STABS entry at cursor_, and set entry_ appropriately.
|
|
void Fetch();
|
|
|
|
// The size of entries' value field, in bytes.
|
|
size_t value_size_;
|
|
|
|
// A byte cursor traversing buffer_.
|
|
ByteCursor cursor_;
|
|
|
|
// Values for the entry this iterator refers to.
|
|
Entry entry_;
|
|
};
|
|
|
|
// A source line, saved to be reported later.
|
|
struct Line {
|
|
uint64_t address;
|
|
const char *filename;
|
|
int number;
|
|
};
|
|
|
|
// Return the name of the current symbol.
|
|
const char *SymbolString();
|
|
|
|
// Process a compilation unit starting at symbol_. Return true
|
|
// to continue processing, or false to abort.
|
|
bool ProcessCompilationUnit();
|
|
|
|
// Process a function in current_source_file_ starting at symbol_.
|
|
// Return true to continue processing, or false to abort.
|
|
bool ProcessFunction();
|
|
|
|
// Process an exported function symbol.
|
|
// Return true to continue processing, or false to abort.
|
|
bool ProcessExtern();
|
|
|
|
// The STABS entries being parsed.
|
|
ByteBuffer entries_;
|
|
|
|
// The string section to which the entries refer.
|
|
ByteBuffer strings_;
|
|
|
|
// The iterator walking the STABS entries.
|
|
EntryIterator iterator_;
|
|
|
|
// True if the data is "unitized"; see the explanation in the comment for
|
|
// StabsReader::StabsReader.
|
|
bool unitized_;
|
|
|
|
StabsHandler *handler_;
|
|
|
|
// The offset of the current compilation unit's strings within stabstr_.
|
|
size_t string_offset_;
|
|
|
|
// The value string_offset_ should have for the next compilation unit,
|
|
// as established by N_UNDF entries.
|
|
size_t next_cu_string_offset_;
|
|
|
|
// The current source file name.
|
|
const char *current_source_file_;
|
|
|
|
// Mac OS X STABS place SLINE records before functions; we accumulate a
|
|
// vector of these until we see the FUN record, and then report them
|
|
// after the StartFunction call.
|
|
std::vector<Line> queued_lines_;
|
|
};
|
|
|
|
// Consumer-provided callback structure for the STABS reader. Clients
|
|
// of the STABS reader provide an instance of this structure. The
|
|
// reader then invokes the member functions of that instance to report
|
|
// the information it finds.
|
|
//
|
|
// The default definitions of the member functions do nothing, and return
|
|
// true so processing will continue.
|
|
class StabsHandler {
|
|
public:
|
|
StabsHandler() { }
|
|
virtual ~StabsHandler() { }
|
|
|
|
// Some general notes about the handler callback functions:
|
|
|
|
// Processing proceeds until the end of the .stabs section, or until
|
|
// one of these functions returns false.
|
|
|
|
// The addresses given are as reported in the STABS info, without
|
|
// regard for whether the module may be loaded at different
|
|
// addresses at different times (a shared library, say). When
|
|
// processing STABS from an ELF shared library, the addresses given
|
|
// all assume the library is loaded at its nominal load address.
|
|
// They are *not* offsets from the nominal load address. If you
|
|
// want offsets, you must subtract off the library's nominal load
|
|
// address.
|
|
|
|
// The arguments to these functions named FILENAME are all
|
|
// references to strings stored in the .stabstr section. Because
|
|
// both the Linux and Solaris linkers factor out duplicate strings
|
|
// from the .stabstr section, the consumer can assume that if two
|
|
// FILENAME values are different addresses, they represent different
|
|
// file names.
|
|
//
|
|
// Thus, it's safe to use (say) std::map<char *, ...>, which does
|
|
// string address comparisons, not string content comparisons.
|
|
// Since all the strings are in same array of characters --- the
|
|
// .stabstr section --- comparing their addresses produces
|
|
// predictable, if not lexicographically meaningful, results.
|
|
|
|
// Begin processing a compilation unit whose main source file is
|
|
// named FILENAME, and whose base address is ADDRESS. If
|
|
// BUILD_DIRECTORY is non-NULL, it is the name of the build
|
|
// directory in which the compilation occurred.
|
|
virtual bool StartCompilationUnit(const char *filename, uint64_t address,
|
|
const char *build_directory) {
|
|
return true;
|
|
}
|
|
|
|
// Finish processing the compilation unit. If ADDRESS is non-zero,
|
|
// it is the ending address of the compilation unit. If ADDRESS is
|
|
// zero, then the compilation unit's ending address is not
|
|
// available, and the consumer must infer it by other means.
|
|
virtual bool EndCompilationUnit(uint64_t address) { return true; }
|
|
|
|
// Begin processing a function named NAME, whose starting address is
|
|
// ADDRESS. This function belongs to the compilation unit that was
|
|
// most recently started but not ended.
|
|
//
|
|
// Note that, unlike filenames, NAME is not a pointer into the
|
|
// .stabstr section; this is because the name as it appears in the
|
|
// STABS data is followed by type information. The value passed to
|
|
// StartFunction is the function name alone.
|
|
//
|
|
// In languages that use name mangling, like C++, NAME is mangled.
|
|
virtual bool StartFunction(const std::string &name, uint64_t address) {
|
|
return true;
|
|
}
|
|
|
|
// Finish processing the function. If ADDRESS is non-zero, it is
|
|
// the ending address for the function. If ADDRESS is zero, then
|
|
// the function's ending address is not available, and the consumer
|
|
// must infer it by other means.
|
|
virtual bool EndFunction(uint64_t address) { return true; }
|
|
|
|
// Report that the code at ADDRESS is attributable to line NUMBER of
|
|
// the source file named FILENAME. The caller must infer the ending
|
|
// address of the line.
|
|
virtual bool Line(uint64_t address, const char *filename, int number) {
|
|
return true;
|
|
}
|
|
|
|
// Report that an exported function NAME is present at ADDRESS.
|
|
// The size of the function is unknown.
|
|
virtual bool Extern(const std::string &name, uint64_t address) {
|
|
return true;
|
|
}
|
|
|
|
// Report a warning. FORMAT is a printf-like format string,
|
|
// specifying how to format the subsequent arguments.
|
|
virtual void Warning(const char *format, ...) = 0;
|
|
};
|
|
|
|
} // namespace google_breakpad
|
|
|
|
#endif // COMMON_STABS_READER_H__
|