From b229490bc26a0411607e273141b1172da0d86935 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:26:55 +0700 Subject: [PATCH 01/12] Added Symbol.raw_name --- actual symbol name (decorated). --- SymbolSort.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index 3e51518..f59901e 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -181,6 +181,7 @@ class Symbol public int rva_end; public string name; public string short_name; + public string raw_name; //decorated symbol name public string source_filename; public string section; public SymbolFlags flags = 0; From 5d5b911036f5ad3132112039814eb77df989cc39 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:27:10 +0700 Subject: [PATCH 02/12] Record raw_name when parsing COMDAT files. --- SymbolSort.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index f59901e..f110767 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -561,7 +561,7 @@ private static void ReadSymbolsFromNM(List symbols, string inFilename, I private static Regex ReadSymbolsFromCOMDAT_regexName = new Regex(@"\n[ \t]*([^ \t]+)[ \t]+name", RegexOptions.Compiled); private static Regex ReadSymbolsFromCOMDAT_regexSize = new Regex(@"\n[ \t]*([A-Za-z0-9]+)[ \t]+size of raw data", RegexOptions.Compiled); - private static Regex ReadSymbolsFromCOMDAT_regexCOMDAT = new Regex(@"\n[ \t]*COMDAT; sym= \""([^\n\""]+)", RegexOptions.Compiled); + private static Regex ReadSymbolsFromCOMDAT_regexCOMDAT = new Regex(@"\n[ \t]*COMDAT; sym= \""([^\n\""]+)\"" \(([^\n()]+)\)", RegexOptions.Compiled); private static void ReadSymbolsFromCOMDAT(List symbols, string inFilename) { Regex regexName = ReadSymbolsFromCOMDAT_regexName; @@ -613,6 +613,7 @@ record += ln; m = regexCOMDAT.Match(record); symbol.name = m.Groups[1].Value; + symbol.raw_name = m.Groups[2].Value; if (symbol.name != "") { symbol.rva_start = 0; From 93a492b837dbf527f8e10166db3231ce8e428940 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 19:59:40 +0700 Subject: [PATCH 03/12] Added enum for IDiaSymbol::get_undecoratedNameEx flags. --- SymbolSort.cs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index f110767..bc80f33 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -96,6 +96,33 @@ enum LocationType LocTypeMax } + // See https://msdn.microsoft.com/en-us/library/kszfk0fs.aspx + // for documentation of IDiaSymbol::get_undecoratedNameEx flags + [Flags] + enum IDiaSymbolUndecoratedNameExFlags : uint + { + UNDNAME_COMPLETE = 0x0000, + UNDNAME_NO_LEADING_UNDERSCORES = 0x0001, + UNDNAME_NO_MS_KEYWORDS = 0x0002, + UNDNAME_NO_FUNCTION_RETURNS = 0x0004, + UNDNAME_NO_ALLOCATION_MODEL = 0x0008, + UNDNAME_NO_ALLOCATION_LANGUAGE = 0x0010, + UNDNAME_RESERVED1 = 0x0020, + UNDNAME_RESERVED2 = 0x0040, + UNDNAME_NO_THISTYPE = 0x0060, + UNDNAME_NO_ACCESS_SPECIFIERS = 0x0080, + UNDNAME_NO_THROW_SIGNATURES = 0x0100, + UNDNAME_NO_MEMBER_TYPE = 0x0200, + UNDNAME_NO_RETURN_UDT_MODEL = 0x0400, + UNDNAME_32_BIT_DECODE = 0x0800, + UNDNAME_NAME_ONLY = 0x1000, + UNDNAME_TYPE_ONLY = 0x2000, + UNDNAME_HAVE_PARAMETERS = 0x4000, + UNDNAME_NO_ECSU = 0x8000, + UNDNAME_NO_IDENT_CHAR_CHECK = 0x10000, + UNDNAME_NO_PTR64 = 0x20000, + } + // See http://msdn.microsoft.com/en-us/library/windows/desktop/ms680341(v=vs.85).aspx for // more flag options and descriptions [Flags] From d72d59ff788c8f43cfca2e39ee17ab67b7c457ea Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:27:26 +0700 Subject: [PATCH 04/12] Record raw_name when parsing PDB files (if available). --- SymbolSort.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index bc80f33..9b44d32 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -983,6 +983,14 @@ private static void ReadSymbolsFromScope(IDiaSymbol parent, Dia2Lib.SymTagEnum t symbol.short_name = diaSymbol.name == null ? "" : diaSymbol.name; symbol.name = diaSymbol.undecoratedName == null ? symbol.short_name : diaSymbol.undecoratedName; symbol.flags = additionalFlags; + + //extract raw symbol name (see https://stackoverflow.com/a/19637731/556899) + string rawName; + IDiaSymbolUndecoratedNameExFlags flags = IDiaSymbolUndecoratedNameExFlags.UNDNAME_32_BIT_DECODE | IDiaSymbolUndecoratedNameExFlags.UNDNAME_TYPE_ONLY; + diaSymbol.get_undecoratedNameEx((uint)flags, out rawName); + if (rawName != "") + symbol.raw_name = rawName; + switch (type) { case SymTagEnum.SymTagData: From 20982a4953eac142cb95657befdbcce6aefd64f6 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Thu, 10 May 2018 00:10:56 +0700 Subject: [PATCH 05/12] Do not register raw names with space. Note: there are some very rare names starting with " ?? ". I cannot understand where they come from. --- SymbolSort.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 9b44d32..b461936 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -988,8 +988,11 @@ private static void ReadSymbolsFromScope(IDiaSymbol parent, Dia2Lib.SymTagEnum t string rawName; IDiaSymbolUndecoratedNameExFlags flags = IDiaSymbolUndecoratedNameExFlags.UNDNAME_32_BIT_DECODE | IDiaSymbolUndecoratedNameExFlags.UNDNAME_TYPE_ONLY; diaSymbol.get_undecoratedNameEx((uint)flags, out rawName); - if (rawName != "") - symbol.raw_name = rawName; + if (rawName != null) { + //ignore trashy names like " ?? :: ?? ::Z::_NPEBDI_N * __ptr64 volatile " + if (!rawName.Contains(' ')) + symbol.raw_name = rawName; + } switch (type) { From 78515aa3d03542f444a1c023c8bf36930555e489 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:56:31 +0700 Subject: [PATCH 06/12] Added "-info XXX.pdb" parameter: such input files are ignored in symbols extraction. --- SymbolSort.cs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index b461936..98af6e4 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -244,10 +244,12 @@ class InputFile { public string filename; public InputType type; + public bool info; //parse file but exclude it from stats public InputFile(string filename, InputType type) { this.filename = filename; this.type = type; + this.info = false; } } @@ -1662,6 +1664,12 @@ private static UserOptions ParseArgs(string[] args) { opts.inputFiles.Add(new InputFile(args[++curArg], InputType.nm_bsd)); } + else if (curArgStr == "-info") + { + var infile = new InputFile(args[++curArg], InputType.pdb); + infile.info = true; + opts.inputFiles.Add(infile); + } else if (curArgStr == "-out") { opts.outFilename = args[++curArg]; @@ -1862,6 +1870,7 @@ static void Main(string[] args) List symbols = new List(); foreach (InputFile inputFile in opts.inputFiles) { + if (inputFile.info) continue; LoadSymbols(inputFile, symbols, opts.searchPath, opts.flags); Console.WriteLine(); } From 125f5b656a0e140232e9821d2745708217668651 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Wed, 9 May 2018 20:57:19 +0700 Subject: [PATCH 07/12] Use info PDBs specifically to replace filename in symbols. --- SymbolSort.cs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index 98af6e4..d477e66 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -1914,6 +1914,35 @@ static void Main(string[] args) }); } + List infoPdb = opts.inputFiles.FindAll(f => f.info && f.type == InputType.pdb); + if (infoPdb.Count() > 0) + { + var infoSymbols = new List(); + foreach (InputFile f in infoPdb) + LoadSymbols(f, infoSymbols, opts.searchPath, opts.flags); + var infoDict = new Dictionary(); + foreach (Symbol s in infoSymbols) + if (s.raw_name != null && !infoDict.ContainsKey(s.raw_name)) + infoDict.Add(s.raw_name, s); + + Console.WriteLine("Connecting symbols to PDB info..."); + int connectedCnt = 0, allCnt = symbols.Count; + foreach (Symbol s in symbols) + { + Symbol info; + if (infoDict.TryGetValue(s.raw_name, out info)) + { + connectedCnt++; + s.source_filename = info.source_filename; + //TODO: take any other parameters from PDB? + } + else + s.name = ""; //to be removed + } + symbols.RemoveAll(s => s.name == ""); + Console.WriteLine("Connected {0}% symbols ({1}/{2})", (uint)(100.0 * connectedCnt / allCnt), connectedCnt, allCnt); + } + Console.WriteLine("Processing raw symbols..."); { long totalCount = 0; From 0169025bcef42dddfde040246f9050b153bb3212 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Thu, 10 May 2018 01:02:29 +0700 Subject: [PATCH 08/12] Report symbols with no clear source and symbols not in PDB as coming from special files. [unclear_source]: default-generated methods (constructors and destructors) [not_in_pdb]: non-code stuff (constants, values, RTTI, locals, dtors, initializers, etc) --- SymbolSort.cs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index d477e66..d27a912 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -197,8 +197,9 @@ enum SymbolFlags PublicSymbol = 0x008, Section = 0x010, Unmapped = 0x020, - Weak = 0x040 - }; + Weak = 0x040, + SourceApprox = 0x080, //source filename of this function is not precise + }; class Symbol { @@ -1032,6 +1033,7 @@ private static void ReadSymbolsFromScope(IDiaSymbol parent, Dia2Lib.SymTagEnum t { IDiaSectionContrib sectionContrib = FindSectionContribForRVA(symbol.rva_start, sectionContribs); symbol.source_filename = sectionContrib == null ? "" : compilandFileMap[sectionContrib.compilandId]; + symbol.flags |= SymbolFlags.SourceApprox; } symbol.section = "code"; symbol.flags |= SymbolFlags.Function; @@ -1922,8 +1924,13 @@ static void Main(string[] args) LoadSymbols(f, infoSymbols, opts.searchPath, opts.flags); var infoDict = new Dictionary(); foreach (Symbol s in infoSymbols) - if (s.raw_name != null && !infoDict.ContainsKey(s.raw_name)) - infoDict.Add(s.raw_name, s); + if (s.raw_name != null) + if (!infoDict.ContainsKey(s.raw_name)) + { + infoDict.Add(s.raw_name, s); + if ((s.flags & SymbolFlags.SourceApprox) != 0) + s.source_filename = "[unclear_source]"; + } Console.WriteLine("Connecting symbols to PDB info..."); int connectedCnt = 0, allCnt = symbols.Count; @@ -1934,12 +1941,10 @@ static void Main(string[] args) { connectedCnt++; s.source_filename = info.source_filename; - //TODO: take any other parameters from PDB? } else - s.name = ""; //to be removed + s.source_filename = "[not_in_pdb]"; } - symbols.RemoveAll(s => s.name == ""); Console.WriteLine("Connected {0}% symbols ({1}/{2})", (uint)(100.0 * connectedCnt / allCnt), connectedCnt, allCnt); } From 57987a82de2ab91c543238a5a67fd5257fad4c8d Mon Sep 17 00:00:00 2001 From: stgatilov Date: Mon, 14 May 2018 13:00:44 +0700 Subject: [PATCH 09/12] Never remove overlapping symbols when reading PDB for information only. --- SymbolSort.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index d27a912..202df56 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -1921,7 +1921,7 @@ static void Main(string[] args) { var infoSymbols = new List(); foreach (InputFile f in infoPdb) - LoadSymbols(f, infoSymbols, opts.searchPath, opts.flags); + LoadSymbols(f, infoSymbols, opts.searchPath, opts.flags | Options.KeepRedundantSymbols); var infoDict = new Dictionary(); foreach (Symbol s in infoSymbols) if (s.raw_name != null) From 58221d44e0d141bc68d276276b450ade43c0e11d Mon Sep 17 00:00:00 2001 From: stgatilov Date: Mon, 21 May 2018 00:33:10 +0700 Subject: [PATCH 10/12] Always include public symbols when opening PDB for info. For public symbols, "name" gives exactly the raw name of symbol. For private ones, it is not even clear how to determine raw name, since linker did not see then. --- SymbolSort.cs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/SymbolSort.cs b/SymbolSort.cs index 202df56..053629b 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -987,14 +987,22 @@ private static void ReadSymbolsFromScope(IDiaSymbol parent, Dia2Lib.SymTagEnum t symbol.name = diaSymbol.undecoratedName == null ? symbol.short_name : diaSymbol.undecoratedName; symbol.flags = additionalFlags; - //extract raw symbol name (see https://stackoverflow.com/a/19637731/556899) - string rawName; - IDiaSymbolUndecoratedNameExFlags flags = IDiaSymbolUndecoratedNameExFlags.UNDNAME_32_BIT_DECODE | IDiaSymbolUndecoratedNameExFlags.UNDNAME_TYPE_ONLY; - diaSymbol.get_undecoratedNameEx((uint)flags, out rawName); - if (rawName != null) { - //ignore trashy names like " ?? :: ?? ::Z::_NPEBDI_N * __ptr64 volatile " - if (!rawName.Contains(' ')) - symbol.raw_name = rawName; + if (type == SymTagEnum.SymTagPublicSymbol) + { + symbol.raw_name = symbol.short_name; + } + else + { + //there is no reason this can work, but it often works... + string rawName; + IDiaSymbolUndecoratedNameExFlags flags = IDiaSymbolUndecoratedNameExFlags.UNDNAME_32_BIT_DECODE | IDiaSymbolUndecoratedNameExFlags.UNDNAME_TYPE_ONLY; + diaSymbol.get_undecoratedNameEx((uint)flags, out rawName); + if (rawName != null) + { + //ignore trashy names like " ?? :: ?? ::Z::_NPEBDI_N * __ptr64 volatile " + if (!rawName.Contains(' ')) + symbol.raw_name = rawName; + } } switch (type) @@ -1920,8 +1928,9 @@ static void Main(string[] args) if (infoPdb.Count() > 0) { var infoSymbols = new List(); + UserFlags adjustedFlags = opts.flags | UserFlags.KeepRedundantSymbols | UserFlags.IncludePublicSymbols; foreach (InputFile f in infoPdb) - LoadSymbols(f, infoSymbols, opts.searchPath, opts.flags | Options.KeepRedundantSymbols); + LoadSymbols(f, infoSymbols, opts.searchPath, adjustedFlags); var infoDict = new Dictionary(); foreach (Symbol s in infoSymbols) if (s.raw_name != null) From 44cebc9462f836e4c22b82f35243d7f1bfe1985a Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sun, 27 May 2018 16:25:12 +0700 Subject: [PATCH 11/12] Added help description for -info parameter. --- SymbolSort.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index 053629b..4106cf7 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -1842,6 +1842,13 @@ static void Main(string[] args) Console.WriteLine(" PDB. This option can highlight sections of the executable that"); Console.WriteLine(" aren't directly attributable to symbols. In the complete view"); Console.WriteLine(" this will also highlight space lost due to alignment padding."); + Console.WriteLine(); + Console.WriteLine("Options specific to Comdat input with PDB information:"); + Console.WriteLine(" -info filename"); + Console.WriteLine(" Specify PDB file which will be used only to fetch source file information."); + Console.WriteLine(" Source filename of each input symbol will be deduced from this PDB."); + Console.WriteLine(" You can specify many such arguments for multi-project analysis."); + Console.WriteLine(); return; } From 1d64bc30aae89fd8c957adf6939bd1a378cb8515 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sun, 27 May 2018 16:27:35 +0700 Subject: [PATCH 12/12] Help description: many input files are allowed. --- SymbolSort.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/SymbolSort.cs b/SymbolSort.cs index 4106cf7..d5e693d 100644 --- a/SymbolSort.cs +++ b/SymbolSort.cs @@ -1782,6 +1782,7 @@ static void Main(string[] args) Console.WriteLine(" comdat - the format produced by DumpBin /headers"); Console.WriteLine(" sysv - the format produced by nm --format=sysv"); Console.WriteLine(" bsd - the format produced by nm --format=bsd --print-size"); + Console.WriteLine(" It is allowed to specify many input files for total analysis."); Console.WriteLine(); Console.WriteLine(" -out filename"); Console.WriteLine(" Write output to specified file instead of stdout");