22 * Core parser for Go stack trace files
33 */
44
5- import { File , Result , Group , Frame , Goroutine } from './types.js' ;
5+ import { ParsedFile , Result , Group , Frame , Goroutine } from './types.js' ;
6+ import { Profile } from 'pprof-format' ;
67
78/**
89 * Simple FNV-1a hash implementation as fallback when crypto.subtle is unavailable
@@ -64,13 +65,46 @@ export class FileParser {
6465 }
6566
6667 /**
67- * Parse a file and return common data structure
68+ * Parse a Blob or File (handles binary detection and decompression)
6869 */
69- async parseFile ( content : string , fileName : string ) : Promise < Result > {
70- if ( this . detectFormat2 ( content ) ) {
71- return await this . parseFormat2 ( content , fileName ) ;
70+ async parseFile ( blob : Blob , fileName ?: string ) : Promise < Result > {
71+ // Read first 2 bytes to detect gzip magic bytes
72+ const chunk = await blob . slice ( 0 , 2 ) . arrayBuffer ( ) ;
73+ const bytes = new Uint8Array ( chunk ) ;
74+ const isGzipped = bytes . length >= 2 && bytes [ 0 ] === 0x1f && bytes [ 1 ] === 0x8b ;
75+
76+ // Use provided fileName or default
77+ const name = fileName || 'unknown' ;
78+
79+ if ( isGzipped ) {
80+ // Binary format0 - stream decompression
81+ return await this . parseFormat0 ( blob , name ) ;
7282 } else {
83+ // Text format - read as text and dispatch
84+ const content = await blob . text ( ) ;
85+ return this . parseString ( content , name ) ;
86+ }
87+ }
88+
89+ /**
90+ * Parse string content (detects format1/2 and dispatches)
91+ */
92+ async parseString ( content : string , fileName : string ) : Promise < Result > {
93+ // Handle empty content
94+ if ( ! content . trim ( ) ) {
95+ return { success : true , data : { originalName : fileName , groups : [ ] } } ;
96+ }
97+
98+ if ( this . detectFormat1 ( content ) ) {
7399 return await this . parseFormat1 ( content , fileName ) ;
100+ } else {
101+ // Assume format 2, but validate it matches the expected pattern
102+ if ( this . detectFormat2 ( content ) ) {
103+ return await this . parseFormat2 ( content , fileName ) ;
104+ } else {
105+ // Return empty result for unrecognized content instead of error
106+ return { success : true , data : { originalName : fileName , groups : [ ] } } ;
107+ }
74108 }
75109 }
76110
@@ -106,13 +140,120 @@ export class FileParser {
106140 return null ;
107141 }
108142
143+
144+ private detectFormat1 ( content : string ) : boolean {
145+ // Format 1 starts with "goroutine profile:" header - check first 18 characters
146+ return content . startsWith ( 'goroutine profile:' ) ;
147+ }
148+
109149 private detectFormat2 ( content : string ) : boolean {
110150 // Format 2 has individual goroutine entries with "goroutine N ["
111151 // Check if content starts with goroutine line OR contains goroutine lines (for test logs)
112152 const trimmed = content . trim ( ) ;
113153 return / ^ g o r o u t i n e \d + \[ / . test ( trimmed ) || / \n g o r o u t i n e \d + \[ / . test ( content ) ;
114154 }
115155
156+ private async parseFormat0 ( blob : Blob , fileName : string ) : Promise < Result > {
157+ try {
158+ // Stream decompression - much cleaner!
159+ const decompressedStream = blob . stream ( ) . pipeThrough ( new DecompressionStream ( 'gzip' ) ) ;
160+ const response = new Response ( decompressedStream ) ;
161+ const arrayBuffer = await response . arrayBuffer ( ) ;
162+ const decodedData = new Uint8Array ( arrayBuffer ) ;
163+
164+ // Decode the pprof profile
165+ const profile = Profile . decode ( decodedData ) ;
166+
167+ // Convert pprof data to our internal format
168+ const groups : Group [ ] = [ ] ;
169+
170+ // Process samples - each sample represents a stack trace with count
171+ for ( const sample of profile . sample ) {
172+ const frames : Frame [ ] = [ ] ;
173+ const values = sample . value || [ ] ;
174+ const count = values . length > 0 ? Number ( values [ 0 ] ) : 1 ;
175+
176+ // Extract labels from the sample
177+ const labels : string [ ] = [ ] ;
178+ const stringTable = ( profile . stringTable as any ) ?. strings || [ ] ;
179+
180+ if ( sample . label ) {
181+ for ( const label of sample . label ) {
182+ const key = stringTable [ Number ( label . key ) || 0 ] || '' ;
183+ const value = stringTable [ Number ( label . str ) || 0 ] || '' ;
184+ if ( key && value ) {
185+ labels . push ( `${ key } =${ value } ` ) ;
186+ } else if ( key ) {
187+ labels . push ( key ) ;
188+ }
189+ }
190+ }
191+
192+ // Build stack trace from location IDs, skipping initial runtime frames
193+ let skipInitialRuntimeFrames = true ;
194+ let lastSkippedRuntimeFrame : string | null = null ;
195+
196+ for ( const locationId of sample . locationId || [ ] ) {
197+ const location = profile . location . find ( loc => loc . id === locationId ) ;
198+ if ( location ) {
199+ for ( const line of location . line || [ ] ) {
200+ const func = profile . function . find ( f => f . id === line . functionId ) ;
201+ if ( func ) {
202+ // String table access - the pprof format uses string table indexes
203+ const functionName = stringTable [ Number ( func . name ) || 0 ] || 'unknown' ;
204+ const fileName = stringTable [ Number ( func . filename ) || 0 ] || 'unknown' ;
205+
206+ // Skip initial runtime frames during parsing, but track the last one for label synthesis
207+ if ( skipInitialRuntimeFrames && this . shouldSkipRuntimeFrame ( functionName ) ) {
208+ lastSkippedRuntimeFrame = functionName ;
209+ continue ; // Skip this frame, don't allocate it
210+ }
211+
212+ // Once we find a non-runtime frame, stop skipping
213+ skipInitialRuntimeFrames = false ;
214+
215+ frames . push ( {
216+ func : functionName ,
217+ file : fileName ,
218+ line : Number ( line . line ) || 0
219+ } ) ;
220+ }
221+ }
222+ }
223+ }
224+
225+ // Add synthesized label for the last skipped runtime frame
226+ if ( lastSkippedRuntimeFrame ) {
227+ const label = this . synthesizeRuntimeLabel ( lastSkippedRuntimeFrame ) ;
228+ if ( label ) {
229+ labels . push ( label ) ;
230+ }
231+ }
232+
233+ // Create group for this stack trace
234+ if ( frames . length > 0 ) {
235+ const traceId = await fingerprint ( frames ) ;
236+ groups . push ( {
237+ traceId,
238+ count,
239+ labels,
240+ goroutines : [ ] ,
241+ trace : frames
242+ } ) ;
243+ }
244+ }
245+
246+ const result : ParsedFile = { originalName : fileName , groups } ;
247+ return { success : true , data : result } ;
248+
249+ } catch ( error ) {
250+ return {
251+ success : false ,
252+ error : `Failed to parse pprof format: ${ error instanceof Error ? error . message : String ( error ) } `
253+ } ;
254+ }
255+ }
256+
116257 private async parseFormat2 ( content : string , fileName : string ) : Promise < Result > {
117258 const lines = content . split ( '\n' ) ;
118259 const goroutineMap = new Map < string , boolean > ( ) ; // Track which goroutine IDs exist
@@ -267,7 +408,7 @@ export class FileParser {
267408 } )
268409 ) ;
269410
270- const result : File = { originalName : fileName , groups } ;
411+ const result : ParsedFile = { originalName : fileName , groups } ;
271412 if ( extractedName ) {
272413 result . extractedName = extractedName ;
273414 }
@@ -362,7 +503,7 @@ export class FileParser {
362503 groups . push ( { traceId : await fingerprint ( trace ) , count, labels, goroutines : [ ] , trace } ) ;
363504 }
364505
365- const result : File = { originalName : fileName , totalGoroutines, groups } ;
506+ const result : ParsedFile = { originalName : fileName , totalGoroutines, groups } ;
366507 if ( extractedName ) {
367508 result . extractedName = extractedName ;
368509 }
@@ -385,4 +526,49 @@ export class FileParser {
385526
386527 return state ;
387528 }
529+
530+ /**
531+ * Determine if a runtime frame should be skipped
532+ */
533+ private shouldSkipRuntimeFrame ( functionName : string ) : boolean {
534+ return functionName === 'runtime.gopark' ||
535+ functionName === 'runtime.goparkunlock' ||
536+ functionName === 'runtime.selectgo' ||
537+ functionName === 'runtime.chanrecv' ||
538+ functionName === 'runtime.chanrecv1' ||
539+ functionName === 'runtime.chanrecv2' ||
540+ functionName === 'runtime.chansend' ||
541+ functionName === 'runtime.semacquire' ||
542+ functionName === 'runtime.semacquire1' ||
543+ functionName === 'runtime.netpollblock' ||
544+ functionName === 'runtime.notetsleepg' ;
545+ }
546+
547+ /**
548+ * Synthesize a descriptive label for a skipped runtime frame
549+ */
550+ private synthesizeRuntimeLabel ( functionName : string ) : string | null {
551+ switch ( functionName ) {
552+ case 'runtime.chanrecv' :
553+ case 'runtime.chanrecv1' :
554+ case 'runtime.chanrecv2' :
555+ return 'state=chan receive' ;
556+ case 'runtime.chansend' :
557+ return 'state=chan send' ;
558+ case 'runtime.selectgo' :
559+ return 'state=select' ;
560+ case 'runtime.gopark' :
561+ case 'runtime.goparkunlock' :
562+ return 'state=parked' ;
563+ case 'runtime.semacquire' :
564+ case 'runtime.semacquire1' :
565+ return 'state=semacquire' ;
566+ case 'runtime.netpollblock' :
567+ return 'state=netpoll' ;
568+ case 'runtime.notetsleepg' :
569+ return 'state=sleep' ;
570+ default :
571+ return null ;
572+ }
573+ }
388574}
0 commit comments