337 lines
9.9 KiB
C++
337 lines
9.9 KiB
C++
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
// SPDX-FileCopyrightText: 2016 Jan Engelhardt
|
|
/*
|
|
* An educational piece-by-piece G++ symbol name demangler
|
|
* in the style of wireshark's bytewise highlighting
|
|
*/
|
|
#include <list>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <cctype>
|
|
#include <cstdio>
|
|
#include <libHX/string.h>
|
|
|
|
struct seg {
|
|
size_t start, end;
|
|
std::string desc;
|
|
std::list<seg> sub;
|
|
};
|
|
|
|
static const char c_hi[] = "\e\1331;37;42m";
|
|
static const char c_normal[] = "\e\1330m";
|
|
|
|
static ssize_t dparse_type(std::list<seg> &, const char *, size_t);
|
|
static ssize_t dparse_paramlist(std::list<seg> &, const char *, size_t, char = '\0');
|
|
|
|
static ssize_t dparse_digitname(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
size_t ndigit = strspn(sym + idx, "0123456789");
|
|
size_t nchar = 0;
|
|
size_t i = ndigit;
|
|
|
|
for (const char *p = sym + idx; i-- > 0; ++p) {
|
|
nchar *= 10;
|
|
nchar += *p - '0';
|
|
}
|
|
ast.push_back({idx, idx + ndigit, "length of name"});
|
|
if (strnlen(sym + idx, ndigit + nchar) < ndigit + nchar) {
|
|
/* not enough chars before end of string */
|
|
ast.push_back({idx + ndigit, idx + ndigit + nchar, "name (incomplete)"});
|
|
return -1;
|
|
}
|
|
ast.push_back({idx + ndigit, idx + ndigit + nchar, "name"});
|
|
return ast.rbegin()->end;
|
|
}
|
|
|
|
static ssize_t dparse_qual_mf(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
for (; strchr("KORV", sym[idx]) != nullptr; ++idx) {
|
|
if (sym[idx] == 'K')
|
|
ast.push_back({idx, idx + 1, "const qualifier for member function"});
|
|
else if (sym[idx] == 'O')
|
|
ast.push_back({idx, idx + 1, "uref qualifier for member function"});
|
|
else if (sym[idx] == 'R')
|
|
ast.push_back({idx, idx + 1, "ref qualifier for member function"});
|
|
else if (sym[idx] == 'O')
|
|
ast.push_back({idx, idx + 1, "volatile qualifier for member function"});
|
|
else if (sym[idx] == 'r')
|
|
ast.push_back({idx, idx + 1, "restrict qualifier for member function"});
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
static ssize_t dparse_nspace(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
ast.push_back({idx, idx, "nspace"});
|
|
if (sym[idx] != 'N')
|
|
return -1;
|
|
++idx;
|
|
auto &tl = *ast.rbegin();
|
|
int ret = dparse_qual_mf(tl.sub, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
while (isdigit(sym[idx])) {
|
|
ret = dparse_digitname(tl.sub, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
}
|
|
tl.end = idx;
|
|
if (sym[idx] != 'E')
|
|
return -1;
|
|
tl.end = ++idx;
|
|
return idx;
|
|
}
|
|
|
|
static ssize_t dparse_array(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
if (sym[idx] != 'A')
|
|
return -1;
|
|
ast.push_back({idx, idx + 1, "array"});
|
|
auto &e_array = *ast.rbegin();
|
|
++idx;
|
|
e_array.sub.push_back({idx, idx, "array size"});
|
|
auto &e_asize = *e_array.sub.rbegin();
|
|
size_t arsize = 0;
|
|
for (; isdigit(sym[idx]); ++idx) {
|
|
arsize *= 10;
|
|
arsize += sym[idx] - '0';
|
|
}
|
|
e_asize.end = idx;
|
|
e_array.end = idx;
|
|
if (sym[idx] != '_')
|
|
return -1;
|
|
e_array.end = ++idx;
|
|
return dparse_type(e_array.sub, sym, idx);
|
|
}
|
|
|
|
static ssize_t dparse_functype(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
ast.push_back({idx, idx, "return type"});
|
|
auto &rt_type = *ast.rbegin();
|
|
int ret = dparse_type(rt_type.sub, sym, idx);
|
|
if (ret < 0)
|
|
return -1;
|
|
rt_type.end = idx = ret;
|
|
|
|
ret = dparse_paramlist(ast, sym, idx, 'E');
|
|
if (ret < 0)
|
|
return -1;
|
|
ast.rbegin()->end = idx = ret;
|
|
return idx;
|
|
}
|
|
|
|
static ssize_t dparse_type(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
if (sym[idx] == 'r') ast.push_back({idx, idx + 1, "restrict"});
|
|
else if (sym[idx] == 'K') ast.push_back({idx, idx + 1, "const"});
|
|
else if (sym[idx] == 'P') ast.push_back({idx, idx + 1, "pointer to"});
|
|
else if (sym[idx] == 'R') ast.push_back({idx, idx + 1, "reference to"});
|
|
else if (sym[idx] == 'O') ast.push_back({idx, idx + 1, "uref to"});
|
|
else if (sym[idx] == 'V') ast.push_back({idx, idx + 1, "volatile"});
|
|
if (strchr("KOPRVr", sym[idx]) != NULL) {
|
|
auto &ptr = *ast.rbegin();
|
|
if (sym[idx+1] == '\0')
|
|
return -1;
|
|
int ret = dparse_type(ptr.sub, sym, idx + 1);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
return idx;
|
|
}
|
|
else if (sym[idx] == 'A') {
|
|
int ret = dparse_array(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
return idx;
|
|
}
|
|
else if (sym[idx] == 'F') {
|
|
ast.push_back({idx, idx + 1, "function"});
|
|
auto &ptr = *ast.rbegin();
|
|
if (sym[idx+1] == '\0')
|
|
return -1;
|
|
int ret = dparse_functype(ptr.sub, sym, idx + 1);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (sym[ret] == 'E')
|
|
++ret;
|
|
ptr.end = ret;
|
|
return ret;
|
|
}
|
|
else if (isdigit(sym[idx])) {
|
|
int ret = dparse_digitname(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
return idx;
|
|
}
|
|
else if (sym[idx] == 'N') {
|
|
int ret = dparse_nspace(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
return idx;
|
|
}
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'a') { ast.push_back({idx, idx + 1, "auto"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'd') { ast.push_back({idx, idx + 1, "decimal64"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'e') { ast.push_back({idx, idx + 1, "decimal128"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'f') { ast.push_back({idx, idx + 1, "decimal32"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'h') { ast.push_back({idx, idx + 1, "half"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'i') { ast.push_back({idx, idx + 1, "char32_t"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 'n') { ast.push_back({idx, idx + 1, "decltype(nullptr)"}); }
|
|
else if (sym[idx] == 'D' && sym[idx+1] == 's') { ast.push_back({idx, idx + 1, "char16_t"}); }
|
|
else if (sym[idx] == 'a') { ast.push_back({idx, idx + 1, "signed char"}); }
|
|
else if (sym[idx] == 'b') { ast.push_back({idx, idx + 1, "bool"}); }
|
|
else if (sym[idx] == 'c') { ast.push_back({idx, idx + 1, "char"}); }
|
|
else if (sym[idx] == 'd') { ast.push_back({idx, idx + 1, "double"}); }
|
|
else if (sym[idx] == 'e') { ast.push_back({idx, idx + 1, "long double"}); }
|
|
else if (sym[idx] == 'f') { ast.push_back({idx, idx + 1, "float"}); }
|
|
else if (sym[idx] == 'g') { ast.push_back({idx, idx + 1, "__float128"}); }
|
|
else if (sym[idx] == 'h') { ast.push_back({idx, idx + 1, "unsigned char"}); }
|
|
else if (sym[idx] == 'i') { ast.push_back({idx, idx + 1, "int"}); }
|
|
else if (sym[idx] == 'j') { ast.push_back({idx, idx + 1, "unsigned int"}); }
|
|
else if (sym[idx] == 'l') { ast.push_back({idx, idx + 1, "long"}); }
|
|
else if (sym[idx] == 'm') { ast.push_back({idx, idx + 1, "unsigned long"}); }
|
|
else if (sym[idx] == 'n') { ast.push_back({idx, idx + 1, "__int128"}); }
|
|
else if (sym[idx] == 'o') { ast.push_back({idx, idx + 1, "unsigned __int128"}); }
|
|
else if (sym[idx] == 's') { ast.push_back({idx, idx + 1, "short"}); }
|
|
else if (sym[idx] == 't') { ast.push_back({idx, idx + 1, "unsigned short"}); }
|
|
else if (sym[idx] == 'u') { ast.push_back({idx, idx + 1, "special: vendor type (unimplemented)"}); return -1; }
|
|
else if (sym[idx] == 'v') { ast.push_back({idx, idx + 1, "void"}); }
|
|
else if (sym[idx] == 'w') { ast.push_back({idx, idx + 1, "wchar_t"}); }
|
|
else if (sym[idx] == 'x') { ast.push_back({idx, idx + 1, "long long"}); }
|
|
else if (sym[idx] == 'y') { ast.push_back({idx, idx + 1, "unsigned long long"}); }
|
|
else if (sym[idx] == 'z') { ast.push_back({idx, idx + 1, "..."}); }
|
|
else {
|
|
return -1;
|
|
}
|
|
return ++idx;
|
|
}
|
|
|
|
static ssize_t dparse_paramlist(std::list<seg> &ast, const char *sym, size_t idx,
|
|
char endchar)
|
|
{
|
|
ast.push_back({idx, idx, "parameter type list"});
|
|
auto &plist = *ast.rbegin();
|
|
while (sym[idx] != endchar && sym[idx] != '\0') {
|
|
int ret = dparse_type(plist.sub, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
}
|
|
plist.end = idx;
|
|
return idx;
|
|
}
|
|
|
|
static ssize_t dparse_template1(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
ast.push_back({idx, idx, "template"});
|
|
if (sym[idx] != 'I')
|
|
return -1;
|
|
auto &tl = *ast.rbegin();
|
|
tl.sub.push_back({idx, idx + 1, "template parameter list start marker"});
|
|
++idx;
|
|
if (sym[idx] == '\0')
|
|
return -1;
|
|
ssize_t ret = dparse_paramlist(tl.sub, sym, idx, 'E');
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
if (sym[idx] != 'E')
|
|
return -1;
|
|
tl.sub.push_back({idx, idx + 1, "template parameter list end marker"});
|
|
++idx;
|
|
|
|
tl.sub.push_back({idx, idx, "return type"});
|
|
auto &rt_type = *tl.sub.rbegin();
|
|
ret = dparse_type(rt_type.sub, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
tl.end = rt_type.end = idx;
|
|
|
|
/* template means it's a function and MUST take params */
|
|
if (sym[idx] == '\0')
|
|
return -1;
|
|
return dparse_paramlist(ast, sym, idx);
|
|
}
|
|
|
|
static ssize_t dparse_z(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
if (sym[idx] == 'N') {
|
|
int ret = dparse_nspace(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
} else if (isdigit(sym[idx])) {
|
|
int ret = dparse_digitname(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
} else {
|
|
return -1;
|
|
}
|
|
if (sym[idx] == 'I') {
|
|
int ret = dparse_template1(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
} else if (sym[idx] != '\0') {
|
|
int ret = dparse_paramlist(ast, sym, idx);
|
|
if (ret < 0)
|
|
return ret;
|
|
idx = ret;
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
static ssize_t dparse_start(std::list<seg> &ast, const char *sym, size_t idx)
|
|
{
|
|
if (sym[idx] == '_' && sym[idx+1] == 'Z') {
|
|
ast.push_back({idx, idx + 2, "normal name marker"});
|
|
int ret = dparse_z(ast, sym, idx + 2);
|
|
if (ret < 0)
|
|
return ret;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void dprint(const std::list<seg> &list, const char *sym, size_t depth = 0)
|
|
{
|
|
for (const auto &seg : list) {
|
|
printf("%.*s%s%.*s%s%s: %-*s%s\n",
|
|
static_cast<int>(seg.start), sym,
|
|
c_hi, static_cast<int>(seg.end - seg.start), sym + seg.start, c_normal,
|
|
sym + seg.end,
|
|
static_cast<int>(depth * 2), "", seg.desc.c_str());
|
|
if (seg.sub.size() > 0)
|
|
dprint(seg.sub, sym, depth + 1);
|
|
}
|
|
}
|
|
|
|
static void demangle(const char *sym)
|
|
{
|
|
printf("==== %s ====\n", sym);
|
|
std::list<seg> ast;
|
|
int ret = dparse_start(ast, sym, 0);
|
|
if (ret < 0)
|
|
printf("Demangle failed along the way\n");
|
|
dprint(ast, sym);
|
|
}
|
|
int main(int argc, const char **argv)
|
|
{
|
|
if (argc == 1) {
|
|
hxmc_t *line = NULL;
|
|
while (HX_getl(&line, stdin) != NULL) {
|
|
HX_chomp(line);
|
|
demangle(line);
|
|
}
|
|
} else {
|
|
while (*++argv != NULL)
|
|
demangle(*argv);
|
|
}
|
|
return EXIT_SUCCESS;
|
|
}
|