1 /**
2  * Contains support code for code profiling.
3  *
4  * Copyright: Copyright Digital Mars 1995 - 2017.
5  * License: Distributed under the
6  *      $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0).
7  *    (See accompanying file LICENSE)
8  * Authors:   Walter Bright, Sean Kelly, the LDC team
9  * Source: $(DRUNTIMESRC rt/_trace.d)
10  */
11 
12 module rt.trace;
13 
14 import core.demangle;
15 import core.stdc.ctype;
16 import core.stdc.stdio;
17 import core.stdc.stdlib;
18 import core.stdc.string;
19 
20 version (CRuntime_Microsoft)
21     private alias core.stdc.stdlib._strtoui64 strtoull;
22 
23 shared static this ()
24 {
25     enum DefaultLog = "trace.log";
26     enum DefaultDef = "trace.def";
27 
28     trace_logfilename = strdup(DefaultLog.ptr)[0 .. DefaultLog.length + 1];
29     trace_deffilename = strdup(DefaultDef.ptr)[0 .. DefaultDef.length + 1];
30 }
31 
32 /**
33  * Set the file path for profile reports (`-profile`)
34  *
35  * This function is a public API, exposed in `core.runtime`.
36  *
37  * Since we are calling C functions under the hood,
38  * and we might need to open and close files during the
39  * runtime tear-down we copy the parameter via malloc
40  * to ensure NUL-termination.
41  *
42  * Params:
43  *   name = Path to the output file. Empty means stdout.
44  */
45 extern(C) void trace_setlogfilename(string name)
46 {
47     updateFileName(trace_logfilename, name);
48 }
49 
50 /**
51  * Set the file path for the optimized profile linker DEF file (`-profile`)
52  *
53  * This function is a public API, exposed in `core.runtime`.
54  *
55  * Since we are calling C functions under the hood,
56  * and we might need to open and close files during the
57  * runtime tear-down we copy the parameter via malloc
58  * to ensure NUL-termination.
59  *
60  * Params:
61  *   name = Path to the output file. Empty means stdout.
62  */
63 extern(C) void trace_setdeffilename(string name)
64 {
65     updateFileName(trace_deffilename, name);
66 }
67 
68 private:
69 
70 // Code shared by both `trace_setXXXfilename`
71 void updateFileName(ref char[] filename, string name)
72 {
73     if (!name.length)
74     {
75         free(filename.ptr);
76         filename = null;
77     }
78     else if (auto newPtr = cast(char*)realloc(filename.ptr, name.length + 1))
79     {
80         filename = newPtr[0 .. name.length + 1];
81         filename[0 .. $ - 1] = name[];
82         filename[$ - 1] = 0;
83     }
84     else
85         assert(0, "Memory allocation failed");
86 }
87 
88 alias long timer_t;
89 
90 /////////////////////////////////////
91 //
92 
93 struct SymPair
94 {
95     SymPair* next;
96     Symbol* sym;        // function that is called
97     ulong count;        // number of times sym is called
98 }
99 
100 /////////////////////////////////////
101 // A Symbol for each function name.
102 
103 struct Symbol
104 {
105         Symbol* Sl, Sr;         // left, right children
106         SymPair* Sfanin;        // list of calling functions
107         SymPair* Sfanout;       // list of called functions
108         timer_t totaltime;      // aggregate time
109         timer_t functime;       // time excluding subfunction calls
110         ubyte Sflags;           // SFxxxx
111         uint recursion;         // call recursion level
112         const(char)[] Sident;   // name of symbol
113 }
114 
115 enum ubyte SFvisited = 1;      // visited
116 
117 
118 //////////////////////////////////
119 // Build a linked list of these.
120 
121 struct Stack
122 {
123     Stack* prev;
124     Symbol* sym;
125     timer_t starttime;          // time when function was entered
126     timer_t ohd;                // overhead of all the bookkeeping code
127     timer_t subtime;            // time used by all subfunctions
128 }
129 
130 Symbol* root;               // root of symbol table
131 bool trace_inited;
132 
133 Stack* stack_freelist;
134 Stack* trace_tos;           // top of stack
135 
136 __gshared
137 {
138     Symbol* groot;              // merged symbol table
139     int gtrace_inited;          // !=0 if initialized
140 
141     timer_t trace_ohd;
142 
143     // Those strings include the `\0` in their slice as they're used with fopen
144     char[] trace_logfilename;
145     char[] trace_deffilename;
146 }
147 
148 ////////////////////////////////////////
149 // Output optimal function link order.
150 
151 private void trace_order(FILE* fpdef, Symbol *s)
152 {
153     while (s)
154     {
155         trace_place(fpdef, s, 0);
156         if (s.Sl)
157             trace_order(fpdef, s.Sl);
158         s = s.Sr;
159     }
160 }
161 
162 //////////////////////////////////////////////
163 //
164 
165 private Stack* stack_push()
166 {
167     Stack *s;
168 
169     if (stack_freelist)
170     {
171         s = stack_freelist;
172         stack_freelist = s.prev;
173     }
174     else
175     {
176         s = cast(Stack *)trace_malloc(Stack.sizeof);
177     }
178     s.prev = trace_tos;
179     trace_tos = s;
180     return s;
181 }
182 
183 //////////////////////////////////////////////
184 //
185 
186 private void stack_free(Stack *s)
187 {
188     s.prev = stack_freelist;
189     stack_freelist = s;
190 }
191 
192 //////////////////////////////////////
193 // Qsort() comparison routine for array of pointers to SymPair's.
194 
195 extern(C) int sympair_cmp(scope const void* e1, scope const void* e2) nothrow @nogc
196 {
197     auto count1 = (*cast(SymPair**)e1).count;
198     auto count2 = (*cast(SymPair**)e2).count;
199     return (count1 > count2) - (count1 < count2);
200 }
201 
202 //////////////////////////////////////
203 // Place symbol s, and then place any fan ins or fan outs with
204 // counts greater than count.
205 
206 private void trace_place(FILE* fpdef, Symbol* s, ulong count)
207 {
208     if (!(s.Sflags & SFvisited))
209     {
210         //printf("\t%.*s\t%llu\n", s.Sident.length, s.Sident.ptr, count);
211         fprintf(fpdef,"\t%.*s\n", cast(int) s.Sident.length, s.Sident.ptr);
212         s.Sflags |= SFvisited;
213 
214         // Compute number of items in array
215         size_t num = 0;
216         for (auto sp = s.Sfanin; sp; sp = sp.next)
217             num++;
218         for (auto sp = s.Sfanout; sp; sp = sp.next)
219             num++;
220         if (!num)
221             return;
222 
223         // Allocate and fill array
224         auto base = cast(SymPair**)trace_malloc(SymPair.sizeof * num);
225         size_t u = 0;
226         for (auto sp = s.Sfanin; sp; sp = sp.next)
227             base[u++] = sp;
228         for (auto sp = s.Sfanout; sp; sp = sp.next)
229             base[u++] = sp;
230         assert(u == num);
231 
232         // Sort array
233         qsort(base, num, (SymPair *).sizeof, &sympair_cmp);
234 
235         //for (u = 0; u < num; u++)
236             //printf("\t\t%.*s\t%llu\n", base[u].sym.Sident.length, base[u].sym.Sident.ptr, base[u].count);
237 
238         // Place symbols
239         for (u = 0; u < num; u++)
240         {
241             if (base[u].count >= count)
242             {
243                 auto u2 = (u + 1 < num) ? u + 1 : u;
244                 auto c2 = base[u2].count;
245                 if (c2 < count)
246                     c2 = count;
247                 trace_place(fpdef, base[u].sym,c2);
248             }
249             else
250                 break;
251         }
252 
253         // Clean up
254         trace_free(base);
255     }
256 }
257 
258 ///////////////////////////////////
259 // Report results.
260 // Also compute and return number of symbols.
261 
262 private size_t trace_report(FILE* fplog, Symbol* s)
263 {
264     //printf("trace_report()\n");
265     size_t nsymbols;
266     while (s)
267     {
268         ++nsymbols;
269         if (s.Sl)
270             nsymbols += trace_report(fplog, s.Sl);
271         fprintf(fplog,"------------------\n");
272         ulong count = 0;
273         for (auto sp = s.Sfanin; sp; sp = sp.next)
274         {
275             fprintf(fplog,"\t%5llu\t%.*s\n", sp.count, cast(int) sp.sym.Sident.length, sp.sym.Sident.ptr);
276             count += sp.count;
277         }
278         fprintf(fplog,"%.*s\t%llu\t%lld\t%lld\n", cast(int) s.Sident.length, s.Sident.ptr, count, s.totaltime, s.functime);
279         for (auto sp = s.Sfanout; sp; sp = sp.next)
280         {
281             fprintf(fplog,"\t%5llu\t%.*s\n", sp.count, cast(int) sp.sym.Sident.length, sp.sym.Sident.ptr);
282         }
283         s = s.Sr;
284     }
285     return nsymbols;
286 }
287 
288 ////////////////////////////////////
289 // Allocate and fill array of symbols.
290 
291 private void trace_array(Symbol*[] psymbols, Symbol *s, ref uint u)
292 {
293     while (s)
294     {
295         psymbols[u++] = s;
296         trace_array(psymbols, s.Sl, u);
297         s = s.Sr;
298     }
299 }
300 
301 
302 //////////////////////////////////////
303 // Qsort() comparison routine for array of pointers to Symbol's.
304 
305 extern(C) int symbol_cmp(scope const void* e1, scope const void* e2) nothrow @nogc
306 {
307     auto ps1 = cast(Symbol **)e1;
308     auto ps2 = cast(Symbol **)e2;
309 
310     auto diff = (*ps2).functime - (*ps1).functime;
311     return (diff == 0) ? 0 : ((diff > 0) ? 1 : -1);
312 }
313 
314 
315 ///////////////////////////////////
316 // Report function timings
317 
318 private void trace_times(FILE* fplog, Symbol*[] psymbols)
319 {
320     // Sort array
321     qsort(psymbols.ptr, psymbols.length, (Symbol *).sizeof, &symbol_cmp);
322 
323     // Print array header
324     timer_t time_scale;
325     static if (is(typeof(&QueryPerformanceFrequency)))
326     {
327         timer_t freq;
328         QueryPerformanceFrequency(&freq);
329         time_scale = freq / 1_000_000;
330         fprintf(fplog,"\n======== Timer Is %lld Ticks/Sec, Times are in Microsecs ========\n\n",freq);
331     }
332     else
333     {
334         // The exact frequency is unknown (and may vary), so do the reporting in Mega Ticks,
335         // which corresponds to 1 microsecond on a 1GHz clock.
336         time_scale = 1_000_000;
337         fprintf(fplog,"\n======== Timer frequency unknown, Times are in Megaticks ========\n\n");
338     }
339     fprintf(fplog,"  Num          Tree        Func        Per\n");
340     fprintf(fplog,"  Calls        Time        Time        Call\n\n");
341 
342     // Print array
343     foreach (s; psymbols)
344     {
345         timer_t tl,tr;
346         timer_t fl,fr;
347         timer_t pl,pr;
348         char[8192] buf = void;
349         SymPair* sp;
350         ulong calls;
351         char[] id;
352 
353         calls = 0;
354         id = demangle(s.Sident, buf);
355         for (sp = s.Sfanin; sp; sp = sp.next)
356             calls += sp.count;
357         if (calls == 0)
358             calls = 1;
359 
360         tl = s.totaltime / time_scale;
361         fl = s.functime / time_scale;
362         pl = s.functime / calls / time_scale;
363 
364         fprintf(fplog,"%7llu%12lld%12lld%12lld     %.*s\n",
365                       calls, tl, fl, pl, cast(int) id.length, id.ptr);
366     }
367 }
368 
369 
370 ///////////////////////////////////
371 // Initialize.
372 
373 private void trace_init()
374 {
375     synchronized        // protects gtrace_inited
376     {
377         if (!gtrace_inited)
378         {
379             gtrace_inited = 1;
380 
381             {   // See if we can determine the overhead.
382                 timer_t starttime;
383                 timer_t endtime;
384 
385                 auto st = trace_tos;
386                 trace_tos = null;
387                 QueryPerformanceCounter(&starttime);
388                 uint u;
389                 for (u = 0; u < 100; u++)
390                 {
391                     _c_trace_pro(0,null);
392                     _c_trace_epi();
393                 }
394                 QueryPerformanceCounter(&endtime);
395                 trace_ohd = (endtime - starttime) / u;
396                 //printf("trace_ohd = %lld\n",trace_ohd);
397                 if (trace_ohd > 0)
398                     trace_ohd--;            // round down
399                 trace_tos = st;
400             }
401         }
402     }
403 }
404 
405 /////////////////////////////////
406 // Terminate.
407 
408 static ~this()
409 {
410     // Free remainder of the thread local stack
411     while (trace_tos)
412     {
413         auto n = trace_tos.prev;
414         stack_free(trace_tos);
415         trace_tos = n;
416     }
417 
418     // And free the thread local stack's memory
419     while (stack_freelist)
420     {
421         auto n = stack_freelist.prev;
422         stack_free(stack_freelist);
423         stack_freelist = n;
424     }
425 
426     synchronized        // protects groot
427     {
428         // Merge thread local root into global groot
429 
430         if (!groot)
431         {
432             groot = root;       // that was easy
433             root = null;
434         }
435         else
436         {
437             void mergeSymbol(Symbol** proot, const(Symbol)* s)
438             {
439                 while (s)
440                 {
441                     auto gs = trace_addsym(proot, s.Sident);
442                     gs.totaltime += s.totaltime;
443                     gs.functime  += s.functime;
444 
445                     static void mergeFan(Symbol** proot, SymPair** pgf, const(SymPair)* sf)
446                     {
447                         for (; sf; sf = sf.next)
448                         {
449                             auto sym = trace_addsym(proot, sf.sym.Sident);
450                             for (auto gf = *pgf; 1; gf = gf.next)
451                             {
452                                 if (!gf)
453                                 {
454                                     auto sp = cast(SymPair *)trace_malloc(SymPair.sizeof);
455                                     sp.next = *pgf;
456                                     *pgf = sp;
457                                     sp.sym = sym;
458                                     sp.count = sf.count;
459                                     break;
460                                 }
461                                 if (gf.sym == sym)
462                                 {
463                                     gf.count += sf.count;
464                                     break;
465                                 }
466                             }
467                         }
468                     }
469 
470                     mergeFan(proot, &gs.Sfanin, s.Sfanin);
471                     mergeFan(proot, &gs.Sfanout, s.Sfanout);
472 
473                     mergeSymbol(proot, s.Sl);
474                     s = s.Sr;
475                 }
476             }
477 
478             mergeSymbol(&groot, root);
479         }
480     }
481 
482     // Free the memory for the thread local symbol table (root)
483     static void freeSymbol(Symbol* s)
484     {
485         while (s)
486         {
487             freeSymbol(s.Sl);
488             auto next = s.Sr;
489 
490             static void freeSymPair(SymPair* sp)
491             {
492                 while (sp)
493                 {
494                     auto spnext = sp.next;
495                     trace_free(sp);
496                     sp = spnext;
497                 }
498             }
499 
500             freeSymPair(s.Sfanin);
501             freeSymPair(s.Sfanout);
502             trace_free(s);
503             s = next;
504         }
505     }
506 
507     freeSymbol(root);
508     root = null;
509 }
510 
511 shared static ~this()
512 {
513     //printf("shared static ~this() groot = %p\n", groot);
514     if (gtrace_inited == 1)
515     {
516         gtrace_inited = 2;
517 
518         // Merge in data from any existing file
519         trace_merge(&groot);
520 
521         // Report results
522         FILE* fplog = trace_logfilename.length == 0 ? stdout :
523             fopen(trace_logfilename.ptr, "w");
524         if (fplog)
525         {
526             auto nsymbols = trace_report(fplog, groot);
527 
528             auto p = cast(Symbol **)trace_malloc((Symbol *).sizeof * nsymbols);
529             auto psymbols = p[0 .. nsymbols];
530 
531             uint u;
532             trace_array(psymbols, groot, u);
533             trace_times(fplog, psymbols);
534             fclose(fplog);
535 
536             trace_free(psymbols.ptr);
537             psymbols = null;
538         }
539         else
540             fprintf(stderr, "cannot write '%s'", trace_logfilename.ptr);
541 
542         // Output function link order
543         FILE* fpdef = trace_deffilename.length == 0 ? stdout :
544             fopen(trace_deffilename.ptr, "w");
545         if (fpdef)
546         {
547             fprintf(fpdef,"\nFUNCTIONS\n");
548             trace_order(fpdef, groot);
549             fclose(fpdef);
550         }
551         else
552             fprintf(stderr, "cannot write '%s'", trace_deffilename.ptr);
553     }
554 }
555 
556 /////////////////////////////////
557 // Our storage allocator.
558 
559 private void *trace_malloc(size_t nbytes)
560 {
561     auto p = malloc(nbytes);
562     if (!p)
563         exit(EXIT_FAILURE);
564     return p;
565 }
566 
567 private void trace_free(void *p)
568 {
569     free(p);
570 }
571 
572 //////////////////////////////////////////////
573 //
574 
575 private Symbol* trace_addsym(Symbol** proot, const(char)[] id)
576 {
577     //printf("trace_addsym('%s',%d)\n",p,len);
578     auto parent = proot;
579     auto rover = *parent;
580     while (rover !is null)               // while we haven't run out of tree
581     {
582         immutable len = id.length <= rover.Sident.length ? id.length : rover.Sident.length;
583         int cmp = memcmp(id.ptr, rover.Sident.ptr, len);
584         if (!cmp)
585             cmp = id.length < rover.Sident.length ? -1 : (id.length > rover.Sident.length);
586         if (cmp == 0)
587         {
588             return rover;
589         }
590         parent = (cmp < 0) ?            /* if we go down left side      */
591             &(rover.Sl) :               /* then get left child          */
592             &(rover.Sr);                /* else get right child         */
593         rover = *parent;                /* get child                    */
594     }
595     /* not in table, so insert into table       */
596     auto s = cast(Symbol *)trace_malloc(Symbol.sizeof);
597     memset(s,0,Symbol.sizeof);
598     s.Sident = id;
599     *parent = s;                        // link new symbol into tree
600     return s;
601 }
602 
603 /***********************************
604  * Add symbol s with count to SymPair list.
605  */
606 
607 private void trace_sympair_add(SymPair** psp, Symbol* s, ulong count)
608 {
609     SymPair* sp;
610 
611     for (; 1; psp = &sp.next)
612     {
613         sp = *psp;
614         if (!sp)
615         {
616             sp = cast(SymPair *)trace_malloc(SymPair.sizeof);
617             sp.sym = s;
618             sp.count = 0;
619             sp.next = null;
620             *psp = sp;
621             break;
622         }
623         else if (sp.sym == s)
624         {
625             break;
626         }
627     }
628     sp.count += count;
629 }
630 
631 //////////////////////////////////////////////
632 // This one is called by DMD
633 
634 private extern(C) void trace_pro(char[] id)
635 {
636     //printf("trace_pro(ptr = %p, length = %lld)\n", id.ptr, id.length);
637     //printf("trace_pro(id = '%.*s')\n", id.length, id.ptr);
638 
639     if (!trace_inited)
640     {
641         trace_inited = true;
642         trace_init();                   // initialize package
643     }
644 
645     timer_t starttime;
646     QueryPerformanceCounter(&starttime);
647     if (id.length == 0)
648         return;
649     auto tos = stack_push();
650     auto s = trace_addsym(&root, id);
651     tos.sym = s;
652     if (tos.prev)
653     {
654         // Accumulate Sfanout and Sfanin
655         auto prev = tos.prev.sym;
656         trace_sympair_add(&prev.Sfanout,s,1);
657         trace_sympair_add(&s.Sfanin,prev,1);
658     }
659     timer_t t;
660     QueryPerformanceCounter(&t);
661     tos.starttime = starttime;
662     tos.ohd = trace_ohd + t - starttime;
663     tos.subtime = 0;
664     ++s.recursion;
665     //printf("tos.ohd=%lld, trace_ohd=%lld + t=%lld - starttime=%lld\n",
666     //  tos.ohd,trace_ohd,t,starttime);
667 }
668 
669 // Called by some old versions of DMD
670 extern(C) void _c_trace_pro(size_t idlen, char* idptr)
671 {
672     char[] id = idptr[0 .. idlen];
673     trace_pro(id);
674 }
675 
676 /////////////////////////////////////////
677 // Called by DMD generated code
678 
679 extern(C) void _c_trace_epi()
680 {
681     //printf("_c_trace_epi()\n");
682     auto tos = trace_tos;
683     if (tos)
684     {
685         timer_t endtime;
686         QueryPerformanceCounter(&endtime);
687         auto starttime = tos.starttime;
688         auto totaltime = endtime - starttime - tos.ohd;
689         if (totaltime < 0)
690         {   //printf("endtime=%lld - starttime=%lld - tos.ohd=%lld < 0\n",
691             //  endtime,starttime,tos.ohd);
692             totaltime = 0;              // round off error, just make it 0
693         }
694 
695         // totaltime is time spent in this function + all time spent in
696         // subfunctions - bookkeeping overhead.
697         --tos.sym.recursion;
698         if (tos.sym.recursion == 0)
699             tos.sym.totaltime += totaltime;
700 
701         //if (totaltime < tos.subtime)
702         //printf("totaltime=%lld < tos.subtime=%lld\n",totaltime,tos.subtime);
703         tos.sym.functime  += totaltime - tos.subtime;
704         auto ohd = tos.ohd;
705         auto n = tos.prev;
706         stack_free(tos);
707         trace_tos = n;
708         if (n)
709         {
710             timer_t t;
711             QueryPerformanceCounter(&t);
712             n.ohd += ohd + t - endtime;
713             n.subtime += totaltime;
714             //printf("n.ohd = %lld\n",n.ohd);
715         }
716     }
717 }
718 
719 
720 ////////////////////////// FILE INTERFACE /////////////////////////
721 
722 /////////////////////////////////////
723 // Read line from file fp.
724 // Returns:
725 //      trace_malloc'd line buffer
726 //      null if end of file
727 
728 char[] trace_readline(FILE* fp)
729 {
730     char[] buf;
731     // Last character used in `buf`
732     size_t idx;
733     // Used to break out of the do .. while
734     int currentChar = EOF;
735 
736     //printf("trace_readline(%p)\n", fp);
737     do
738     {
739         if (buf.length <= idx)
740         {
741             const size_t newLength = buf.length + 80;
742             if (auto newPtr = cast(char*)realloc(buf.ptr, newLength))
743                 buf = newPtr[0 .. newLength];
744             else
745                 assert(0, "Memory allocation failed");
746         }
747         currentChar = fgetc(fp);
748         buf[idx++] = cast(char)currentChar;
749     } while (currentChar != EOF && currentChar != '\n');
750 
751     // Encountered '\n' or EOF immediately
752     // The calling code makes a distinction between EOF and '\n'
753     if (idx == 1 && currentChar == EOF)
754     {
755         trace_free(buf.ptr);
756         return null;
757     }
758     buf[idx - 1] = 0;
759     return buf[0 .. idx];
760 }
761 
762 //////////////////////////////////////
763 // Skip space
764 
765 private char *skipspace(char *p)
766 {
767     while (isspace(*p))
768         p++;
769     return p;
770 }
771 
772 ////////////////////////////////////////////////////////
773 // Merge in profiling data from existing file.
774 
775 private void trace_merge(Symbol** proot)
776 {
777     // We're outputting to stdout
778     if (!trace_logfilename.length)
779         return;
780 
781     FILE* fp = fopen(trace_logfilename.ptr, "r");
782     if (fp is null)
783         return;
784     scope(exit) fclose(fp);
785 
786     char* buf = null;
787     SymPair* sfanin = null;
788     auto psp = &sfanin;
789     char *p;
790     ulong count;
791     Symbol *s;
792 
793     while (1)
794     {
795         trace_free(buf);
796         buf = trace_readline(fp).ptr;
797         if (!buf)
798             break;
799         switch (*buf)
800         {
801         case '=':               // ignore rest of file
802             trace_free(buf);
803             return;
804         case ' ':
805         case '\t':              // fan in or fan out line
806             count = strtoul(buf,&p,10);
807             if (p == buf)       // if invalid conversion
808                 continue;
809             p = skipspace(p);
810             if (!*p)
811                 continue;
812             s = trace_addsym(proot, p[0 .. strlen(p)]);
813             trace_sympair_add(psp,s,count);
814             break;
815         default:
816             if (!isalpha(*buf))
817             {
818                 if (!sfanin)
819                     psp = &sfanin;
820                 continue;       // regard unrecognized line as separator
821             }
822             goto case;
823         case '?':
824         case '_':
825         case '$':
826         case '@':
827             p = buf;
828             while (isgraph(*p))
829                 p++;
830             *p = 0;
831             //printf("trace_addsym('%s')\n",buf);
832             s = trace_addsym(proot, buf[0 .. strlen(buf)]);
833             if (s.Sfanin)
834             {
835                 SymPair *sp;
836 
837                 for (; sfanin; sfanin = sp)
838                 {
839                     trace_sympair_add(&s.Sfanin,sfanin.sym,sfanin.count);
840                     sp = sfanin.next;
841                     trace_free(sfanin);
842                 }
843             }
844             else
845                 s.Sfanin = sfanin;
846 
847             sfanin = null;
848             psp = &s.Sfanout;
849 
850             {
851                 p++;
852                 count = strtoul(p,&p,10);
853                 timer_t t = cast(long)strtoull(p,&p,10);
854                 s.totaltime += t;
855                 t = cast(long)strtoull(p,&p,10);
856                 s.functime += t;
857             }
858             break;
859         }
860     }
861 }
862 
863 ////////////////////////// COMPILER INTERFACE /////////////////////
864 
865 version (Windows)
866 {
867     extern (Windows)
868     {
869         export int QueryPerformanceCounter(timer_t *);
870         export int QueryPerformanceFrequency(timer_t *);
871     }
872 }
873 else version (AArch64)
874 {
875     // We cannot use ldc.intrinsics.llvm_readcyclecounter because that is not an accurate
876     // time counter (it is a counter of CPU cycles, where here we want a time clock).
877     // Also, priviledged execution rights are needed to enable correct counting with
878     // ldc.intrinsics.llvm_readcyclecounter on AArch64.
879     extern (D) void QueryPerformanceCounter(timer_t* ctr)
880     {
881         asm { "mrs %0, cntvct_el0" : "=r" (*ctr); }
882     }
883     extern (D) void QueryPerformanceFrequency(timer_t* freq)
884     {
885         asm { "mrs %0, cntfrq_el0" : "=r" (*freq); }
886     }
887 }
888 else version (LDC)
889 {
890     extern (D) void QueryPerformanceCounter(timer_t* ctr)
891     {
892         import ldc.intrinsics: llvm_readcyclecounter;
893         *ctr = llvm_readcyclecounter();
894     }
895 }
896 else
897 {
898     extern (D) void QueryPerformanceCounter(timer_t* ctr)
899     {
900         version (D_InlineAsm_X86)
901         {
902             asm
903             {
904                 naked                   ;
905                 mov       ECX,EAX       ;
906                 rdtsc                   ;
907                 mov   [ECX],EAX         ;
908                 mov   [ECX+4],EDX        ;
909                 ret                     ;
910             }
911         }
912         else version (D_InlineAsm_X86_64)
913         {
914             asm
915             {
916                 naked                   ;
917                 // rdtsc can produce skewed results without preceding lfence/mfence.
918                 // this is what GNU/Linux does, but only use mfence here.
919                 // see https://github.com/torvalds/linux/blob/03b9730b769fc4d87e40f6104f4c5b2e43889f19/arch/x86/include/asm/msr.h#L130-L154
920                 mfence                  ; // serialize rdtsc instruction.
921                 rdtsc                   ;
922                 mov   [RDI],EAX         ;
923                 mov   [RDI+4],EDX        ;
924                 ret                     ;
925             }
926         }
927         else
928         {
929             static assert(0);
930         }
931     }
932 }