1 /**
2  * Support for lexical scope of local variables
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 2015-2023 by The D Language Foundation, All Rights Reserved
8  * Authors:     Rainer Schuetze
9  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
10  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/dvarstats.d, backend/dvarstats.d)
11  */
12 
13 module dmd.backend.dvarstats;
14 
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 
18 import dmd.backend.cc;
19 import dmd.backend.cdef;
20 import dmd.backend.global;
21 import dmd.backend.code;
22 import dmd.backend.symtab;
23 import dmd.backend.barray;
24 
25 
26 nothrow:
27 
28 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
29 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
30 
31 version (all) // free function version
32 {
33     import dmd.backend.dvarstats;
34 
35     void varStats_writeSymbolTable(ref symtab_t symtab,
36             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
37             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
38     {
39         varStats.writeSymbolTable(symtab, fnWriteVar, fnEndArgs, fnBeginBlock, fnEndBlock);
40     }
41 
42     void varStats_startFunction()
43     {
44         varStats.startFunction();
45     }
46 
47     void varStats_recordLineOffset(Srcpos src, targ_size_t off)
48     {
49         varStats.recordLineOffset(src, off);
50     }
51 
52     __gshared VarStatistics varStats;
53 }
54 
55 
56 // estimate of variable life time
57 struct LifeTime
58 {
59     Symbol* sym;
60     int offCreate;  // variable created before this code offset
61     int offDestroy; // variable destroyed after this code offset
62 }
63 
64 struct LineOffset
65 {
66     targ_size_t offset;
67     uint linnum;
68     uint diffNextOffset;
69 }
70 
71 struct VarStatistics
72 {
73 private:
74 nothrow:
75     Barray!LifeTime lifeTimes;
76 
77     // symbol table sorted by offset of variable creation
78     symtab_t sortedSymtab;
79     Barray!SYMIDX nextSym;      // next symbol with identifier with same hash, same size as sortedSymtab
80     int uniquecnt;        // number of variables that have unique name and don't need lexical scope
81 
82     // line number records for the current function
83     Barray!LineOffset lineOffsets;
84     const(char)* srcfile;  // only one file supported, no inline
85 
86 public void startFunction()
87 {
88     lineOffsets.setLength(0);
89     srcfile = null;
90 }
91 
92 // figure if can we can add a lexical scope for the variable
93 // (this should exclude variables from inlined functions as there is
94 //  no support for gathering stats from different files)
95 private bool isLexicalScopeVar(Symbol* sa)
96 {
97     if (sa.lposscopestart.Slinnum <= 0 || sa.lposscopestart.Slinnum > sa.lnoscopeend)
98         return false;
99 
100     // is it inside the function? Unfortunately we cannot verify the source file in case of inlining
101     if (sa.lposscopestart.Slinnum < funcsym_p.Sfunc.Fstartline.Slinnum)
102         return false;
103     if (sa.lnoscopeend > funcsym_p.Sfunc.Fendline.Slinnum)
104         return false;
105 
106     return true;
107 }
108 
109 // compare function to sort symbols by line offsets of their creation
110 private extern (C) static int cmpLifeTime(scope const void* p1, scope const void* p2)
111 {
112     const LifeTime* lt1 = cast(const(LifeTime)*)p1;
113     const LifeTime* lt2 = cast(const(LifeTime)*)p2;
114 
115     return lt1.offCreate - lt2.offCreate;
116 }
117 
118 // a parent scope contains the creation offset of the child scope
119 private static extern(D) SYMIDX isParentScope(ref Barray!LifeTime lifetimes, SYMIDX parent, SYMIDX si)
120 {
121     if (parent == SYMIDX.max) // full function
122         return true;
123     return lifetimes[parent].offCreate <= lifetimes[si].offCreate &&
124            lifetimes[parent].offDestroy > lifetimes[si].offCreate;
125 }
126 
127 // find a symbol that includes the creation of the given symbol as part of its life time
128 private static extern(D) SYMIDX findParentScope(ref Barray!LifeTime lifetimes, SYMIDX si)
129 {
130     if (si != SYMIDX.max)
131     {
132         for(SYMIDX sj = si; sj; --sj)
133             if(isParentScope(lifetimes, sj - 1, si))
134                 return sj;
135     }
136     return SYMIDX.max;
137 }
138 
139 private static int getHash(const(char)* s)
140 {
141     int hash = 0;
142     for (; *s; s++)
143         hash = hash * 11 + *s;
144     return hash;
145 }
146 
147 private bool hashSymbolIdentifiers(ref symtab_t symtab)
148 {
149     // build circular-linked lists of symbols with same identifier hash
150     bool hashCollisions = false;
151     SYMIDX[256] firstSym = SYMIDX.max;
152     for (SYMIDX si = 0; si < symtab.length; si++)
153     {
154         Symbol* sa = symtab[si];
155         int hash = getHash(sa.Sident.ptr) & 255;
156         SYMIDX first = firstSym[hash];
157         if (first == SYMIDX.max)
158         {
159             // connect full circle, so we don't have to recalculate the hash
160             nextSym[si] = si;
161             firstSym[hash] = si;
162         }
163         else
164         {
165             // insert after first entry
166             nextSym[si] = nextSym[first];
167             nextSym[first] = si;
168             hashCollisions = true;
169         }
170     }
171     return hashCollisions;
172 }
173 
174 private bool hasUniqueIdentifier(ref symtab_t symtab, SYMIDX si)
175 {
176     Symbol* sa = symtab[si];
177     for (SYMIDX sj = nextSym[si]; sj != si; sj = nextSym[sj])
178         if (strcmp(sa.Sident.ptr, symtab[sj].Sident.ptr) == 0)
179             return false;
180     return true;
181 }
182 
183 // gather statistics about creation and destructions of variables that are
184 //  used by the current function
185 private symtab_t* calcLexicalScope(return ref symtab_t symtab) return
186 {
187     // make a copy of the symbol table
188     // - arguments should be kept at the very beginning
189     // - variables with unique name come first (will be emitted with full function scope)
190     // - variables with duplicate names are added with ascending code offset
191     nextSym.setLength(symtab.length);
192     if (sortedSymtab.symmax < symtab.length)
193     {
194         sortedSymtab.tab = cast(Symbol**) util_realloc(sortedSymtab.tab, symtab.length, (Symbol*).sizeof);
195         sortedSymtab.symmax = symtab.length;
196     }
197     sortedSymtab.length = symtab.length;
198 
199     if (!hashSymbolIdentifiers(symtab))
200     {
201         // without any collisions, there are no duplicate symbol names, so bail out early
202         uniquecnt = cast(int)symtab.length;
203         return &symtab;
204     }
205 
206     SYMIDX argcnt;
207     for (argcnt = 0; argcnt < symtab.length; argcnt++)
208     {
209         Symbol* sa = symtab[argcnt];
210         if (sa.Sclass != SC.parameter && sa.Sclass != SC.regpar && sa.Sclass != SC.fastpar && sa.Sclass != SC.shadowreg)
211             break;
212         sortedSymtab[argcnt] = sa;
213     }
214 
215     // find symbols with identical names, only these need lexical scope
216     uniquecnt = cast(int)argcnt;
217     SYMIDX dupcnt = 0;
218     for (SYMIDX sj, si = argcnt; si < symtab.length; si++)
219     {
220         Symbol* sa = symtab[si];
221         if (!isLexicalScopeVar(sa) || hasUniqueIdentifier(symtab, si))
222             sortedSymtab[uniquecnt++] = sa;
223         else
224             sortedSymtab[symtab.length - 1 - dupcnt++] = sa; // fill from the top
225     }
226     sortedSymtab.length = symtab.length;
227     if(dupcnt == 0)
228         return &symtab;
229 
230     sortLineOffsets();
231 
232     // precalc the lexical blocks to emit so that identically named symbols don't overlap
233     lifeTimes.setLength(dupcnt);
234 
235     for (SYMIDX si = 0; si < dupcnt; si++)
236     {
237         lifeTimes[si].sym = sortedSymtab[uniquecnt + si];
238         lifeTimes[si].offCreate = cast(int)getLineOffset(lifeTimes[si].sym.lposscopestart.Slinnum);
239         lifeTimes[si].offDestroy = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopeend);
240     }
241     qsort(lifeTimes[].ptr, dupcnt, (lifeTimes[0]).sizeof, &cmpLifeTime);
242 
243     // ensure that an inner block does not extend beyond the end of a parent block
244     for (SYMIDX si = 0; si < dupcnt; si++)
245     {
246         SYMIDX sj = findParentScope(lifeTimes, si);
247         if(sj != SYMIDX.max && lifeTimes[si].offDestroy > lifeTimes[sj].offDestroy)
248             lifeTimes[si].offDestroy = lifeTimes[sj].offDestroy;
249     }
250 
251     // extend life time to the creation of the next symbol that is not contained in the parent scope
252     // or that has the same name
253     for (SYMIDX sj, si = 0; si < dupcnt; si++)
254     {
255         SYMIDX parent = findParentScope(lifeTimes, si);
256 
257         for (sj = si + 1; sj < dupcnt; sj++)
258             if(!isParentScope(lifeTimes, parent, sj))
259                 break;
260             else if (strcmp(lifeTimes[si].sym.Sident.ptr, lifeTimes[sj].sym.Sident.ptr) == 0)
261                 break;
262 
263         lifeTimes[si].offDestroy = cast(int)(sj < dupcnt ? lifeTimes[sj].offCreate : retoffset + retsize); // function length
264     }
265 
266     // store duplicate symbols back with new ordering
267     for (SYMIDX si = 0; si < dupcnt; si++)
268         sortedSymtab[uniquecnt + si] = lifeTimes[si].sym;
269 
270     return &sortedSymtab;
271 }
272 
273 public void writeSymbolTable(ref symtab_t symtab,
274             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
275             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
276 {
277     auto symtab2 = calcLexicalScope(symtab);
278 
279     int openBlocks = 0;
280     int lastOffset = 0;
281 
282     // Write local symbol table
283     bool endarg = false;
284     for (SYMIDX si = 0; si < symtab2.length; si++)
285     {
286         Symbol *sa = (*symtab2)[si];
287         if (endarg == false &&
288             sa.Sclass != SC.parameter &&
289             sa.Sclass != SC.fastpar &&
290             sa.Sclass != SC.regpar &&
291             sa.Sclass != SC.shadowreg)
292         {
293             if(fnEndArgs)
294                 (*fnEndArgs)();
295             endarg = true;
296         }
297         if (si >= uniquecnt)
298         {
299             int off = lifeTimes[si - uniquecnt].offCreate;
300             // close scopes that end before the creation of this symbol
301             for (SYMIDX sj = si; sj > uniquecnt; --sj)
302             {
303                 if (lastOffset < lifeTimes[sj - 1 - uniquecnt].offDestroy && lifeTimes[sj - 1 - uniquecnt].offDestroy <= off)
304                 {
305                     assert(openBlocks > 0);
306                     if(fnEndBlock)
307                         (*fnEndBlock)();
308                     openBlocks--;
309                 }
310             }
311             int len = lifeTimes[si - uniquecnt].offDestroy - off;
312             // don't emit a block for length 0, it isn't captured by the close condition above
313             if (len > 0)
314             {
315                 if(fnBeginBlock)
316                     (*fnBeginBlock)(off, len);
317                 openBlocks++;
318             }
319             lastOffset = off;
320         }
321         (*fnWriteVar)(sa);
322     }
323 
324     while (openBlocks > 0)
325     {
326         if(fnEndBlock)
327             (*fnEndBlock)();
328         openBlocks--;
329     }
330 }
331 
332 // compare function to sort line offsets ascending by line (and offset on identical line)
333 private extern (C) static int cmpLineOffsets(scope const void* off1, scope const void* off2)
334 {
335     const LineOffset* loff1 = cast(const(LineOffset)*)off1;
336     const LineOffset* loff2 = cast(const(LineOffset)*)off2;
337 
338     if (loff1.linnum == loff2.linnum)
339         return cast(int)(loff1.offset - loff2.offset);
340     return loff1.linnum - loff2.linnum;
341 }
342 
343 private void sortLineOffsets()
344 {
345     if (lineOffsets.length == 0)
346         return;
347 
348     // remember the offset to the next recorded offset on another line
349     for (int i = 1; i < lineOffsets.length; i++)
350         lineOffsets[i-1].diffNextOffset = cast(uint)(lineOffsets[i].offset - lineOffsets[i-1].offset);
351     lineOffsets[lineOffsets.length - 1].diffNextOffset = cast(uint)(retoffset + retsize - lineOffsets[lineOffsets.length - 1].offset);
352 
353     // sort line records and remove duplicate lines preferring smaller offsets
354     qsort(lineOffsets[].ptr, lineOffsets.length, (lineOffsets[0]).sizeof, &cmpLineOffsets);
355     int j = 0;
356     for (int i = 1; i < lineOffsets.length; i++)
357         if (lineOffsets[i].linnum > lineOffsets[j].linnum)
358             lineOffsets[++j] = lineOffsets[i];
359     lineOffsets.setLength(j + 1);
360 }
361 
362 private targ_size_t getLineOffset(int linnum)
363 {
364     int idx = findLineIndex(linnum);
365     if (idx >= lineOffsets.length || lineOffsets[idx].linnum < linnum)
366         return retoffset + retsize; // function length
367     if (idx > 0 && lineOffsets[idx].linnum != linnum)
368         // for inexact line numbers, use the offset following the previous line
369         return lineOffsets[idx-1].offset + lineOffsets[idx-1].diffNextOffset;
370     return lineOffsets[idx].offset;
371 }
372 
373 // return the first record index in the lineOffsets array with linnum >= line
374 private int findLineIndex(uint line)
375 {
376     int low = 0;
377     int high = cast(int)lineOffsets.length;
378     while (low < high)
379     {
380         int mid = low + ((high - low) >> 1);
381         int ln = lineOffsets[mid].linnum;
382         if (line < ln)
383             high = mid;
384         else if (line > ln)
385             low = mid + 1;
386         else
387             return mid;
388     }
389     return low;
390 }
391 
392 public void recordLineOffset(Srcpos src, targ_size_t off)
393 {
394     const sfilename = src.Sfilename;
395 
396     // only record line numbers from one file, symbol info does not include source file
397     if (!sfilename || !src.Slinnum)
398         return;
399     if (!srcfile)
400         srcfile = sfilename;
401     if (srcfile != sfilename && strcmp(srcfile, sfilename) != 0)
402         return;
403 
404     // assume ascending code offsets generated during codegen, ignore any other
405     //  (e.g. there is an additional line number emitted at the end of the function
406     //   or multiple line numbers at the same offset)
407     if (lineOffsets.length > 0 && lineOffsets[lineOffsets.length-1].offset >= off)
408         return;
409 
410     if (lineOffsets.length > 0 && lineOffsets[lineOffsets.length-1].linnum == src.Slinnum)
411     {
412         // optimize common case: new offset on same line
413         return;
414     }
415     // don't care for lineOffsets being ordered now, that is taken care of later (calcLexicalScope)
416     LineOffset* linoff = lineOffsets.push();
417     linoff.linnum = src.Slinnum;
418     linoff.offset = off;
419     linoff.diffNextOffset = 0;
420 }
421 
422 }