1 
2 /**
3  * Text macro processor
4  *
5  * Copyright: Copyright Digital Mars 1999-2015
6  * License:   $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Authors:   $(WEB digitalmars.com, Walter Bright)
8  * Source:    $(SARGONSRC src/sargon/_textmac.d)
9  */
10 
11 module sargon.textmac;
12 
13 import core.stdc.ctype;
14 import std.outbuffer;
15 
16 private:
17 
18 enum ubyte[2] BLUEL = [0xFF, '{'];
19 enum ubyte[2] BLUER = [0xFF, '}'];
20 
21 /**********************************************************
22  * Given buffer p[], extract argument marg[].
23  * Params:
24  *      n =     0:      get entire argument
25  *              1..9:   get nth argument
26  *              -1:     get 2nd through end
27  *      html = skip over html comments and tags
28  * Returns:
29  *      number of characters from start of p[] to end of argument
30  */
31 
32 size_t extractArgN(T)(T[] p, out T[] marg, int n, bool html = false) pure nothrow @nogc @safe
33 {
34     /* Scan forward for matching right parenthesis.
35      * Nest parentheses.
36      * Skip over 0xFF { ... 0xFF } blue paint
37      * Skip over "..." and '...' strings inside HTML tags.
38      * Skip over <!-- ... --> comments.
39      * Skip over previous macro insertions
40      */
41     size_t end = p.length;
42     uint parens = 1;            // inside ( ), can nest
43     char instring = 0;          // either 0, ' or "
44     bool incomment = false;     // in <!-- ... -->
45     bool intag = false;
46     uint inexp = 0;             // inside 0xFF { ... 0xFF }, can nest
47     uint argn = 0;
48 
49     size_t v = 0;
50 
51   Largstart:
52     // Skip first space, if any, to find the start of the macro argument
53     if (n != 1 && v < end && isspace(p[v]))
54         v++;
55 
56     auto vstart = v;
57 
58     for (; v < end; v++)
59     {   char c = p[v];
60 
61         switch (c)
62         {
63             case ',':
64                 if (!inexp && !instring && !incomment && parens == 1)
65                 {
66                     argn++;
67                     if (argn == 1 && n == -1)
68                     {   v++;
69                         goto Largstart;
70                     }
71                     if (argn == n)
72                         break;
73                     if (argn + 1 == n)
74                     {   v++;
75                         goto Largstart;
76                     }
77                 }
78                 continue;
79 
80             case '(':
81                 if (!inexp && !instring && !incomment)
82                     parens++;
83                 continue;
84 
85             case ')':
86                 if (!inexp && !instring && !incomment && --parens == 0)
87                 {
88                     break;
89                 }
90                 continue;
91 
92             case '"':
93             case '\'':
94                 if (!inexp && !incomment && intag)
95                 {
96                     if (c == instring)
97                         instring = 0;
98                     else if (!instring)
99                         instring = c;
100                 }
101                 continue;
102 
103             case '<':
104                 if (html && !inexp && !instring && !incomment)
105                 {
106                     if (v + 6 < end &&
107                         p[v + 1] == '!' &&
108                         p[v + 2] == '-' &&
109                         p[v + 3] == '-')
110                     {
111                         incomment = true;
112                         v += 3;
113                     }
114                     else if (v + 2 < end &&
115                         isalpha(p[v + 1]))
116                         intag = true;
117                 }
118                 continue;
119 
120             case '>':
121                 if (!inexp)
122                     intag = false;
123                 continue;
124 
125             case '-':
126                 if (!inexp &&
127                     !instring &&
128                     incomment &&
129                     v + 2 < end &&
130                     p[v + 1] == '-' &&
131                     p[v + 2] == '>')
132                 {
133                     incomment = false;
134                     v += 2;
135                 }
136                 continue;
137 
138             case BLUEL[0]:
139                 if (v + 1 < end)
140                 {
141                     if (p[v + 1] == BLUEL[1])
142                         inexp++;
143                     else if (p[v + 1] == BLUER[1])
144                         inexp--;
145                 }
146                 continue;
147 
148             default:
149                 continue;
150         }
151         break;
152     }
153     if (argn == 0 && n == -1)
154         marg = p[v .. end];
155     else
156         marg = p[vstart .. v];
157     //printf("extractArg%d('%.*s') = '%.*s'\n", n, end, p, *pmarglen, *pmarg);
158     return v;
159 }
160 
161 ///
162 unittest
163 {
164     import std.stdio;
165 
166     size_t v;
167     string marg;
168 
169     v = extractArgN(" hello", marg, 0);
170     assert(marg == "hello" && v == 6);
171 
172     v = extractArgN(" hello", marg, 1);
173     assert(marg == " hello" && v == 6);
174 
175     v = extractArgN(" hello", marg, 2);
176     assert(marg == "hello" && v == 6);
177 
178     v = extractArgN(" hello", marg, -1);
179     assert(marg == "" && v == 6);
180 
181     v = extractArgN(" hello)x", marg, 0);
182     assert(marg == "hello" && v == 6);
183 
184     v = extractArgN(" hell(o)x", marg, 0);
185     assert(marg == "hell(o)x" && v == 9);
186 
187     v = extractArgN(" he,l,lo", marg, 0);
188     assert(marg == "he,l,lo" && v == 8);
189 
190     v = extractArgN(" he,l,lo", marg, 1);
191     assert(marg == " he" && v == 3);
192 
193     v = extractArgN(" he, l, lo", marg, 2);
194     assert(marg == "l" && v == 6);
195 
196     v = extractArgN(" he, l, lo", marg, 3);
197     assert(marg == "lo" && v == 10);
198 
199     v = extractArgN(" he, l, lo", marg, 4);
200     assert(marg == "he, l, lo" && v == 10);
201 
202     v = extractArgN(" he, l, lo", marg, -1);
203     assert(marg == "l, lo" && v == 10);
204 
205     v = extractArgN(" he<!--, -->", marg, 1, true);
206     assert(marg == " he<!--, -->" && v == 12);
207 
208     v = extractArgN(" he<tag ',' \",\">", marg, 1, true);
209     assert(marg == " he<tag ',' \",\">" && v == 16);
210 
211     v = extractArgN(" he\xFF{ , \xFF}a", marg, 1);
212     //writefln("v = %s, marg = '%s'", v, marg);
213     assert(marg == " he\xFF{ , \xFF}a" && v == 11);
214 }
215 
216 
217 /*****************************************************
218  * Expand macro.
219  *
220  * The macro processor is the same one used in Ddoc.
221  *
222  * Params:
223  *      text = source text to expand
224  *      table = table of name=value macro definitions
225  *      html = true if recognize HTML tags and comments
226  *
227  * Returns:
228  *      The source text after macro expansion.
229  *      The return string is GC allocated.
230  */
231 
232 
233 public
234 
235 string expand(const(char)[] text, string[string] table, bool html = false)
236 {
237     //import std.stdio;
238     import core.stdc.stdlib : malloc, free;
239 
240     OutBuffer buf;
241 
242     void expandImpl(size_t start, size_t *pend, char[] arg, void *pinuse = null)
243     {
244         version (none)
245         {
246             writefln("expand(buf[%s..%s], arg = '%s')\n", start, *pend, arg);
247             writefln("Buf is: '%s'", cast(string)buf.data[start .. *pend]);
248         }
249 
250         static int nest;
251         if (nest > 100)             // limit recursive expansion
252             return;
253         nest++;
254 
255         static struct Inuse
256         {
257             Inuse* next;
258             string value;
259         }
260 
261         bool isInuse(string value)
262         {
263             for (Inuse* p = cast(Inuse*)pinuse; p; p = p.next)
264             {
265                 if (p.value is value)
266                     return true;
267             }
268             return false;
269         }
270 
271         // Alloc/free a temporary buf that uses a stack buffer and overflows to malloc/free
272         static char[] bufdup(const(char)[] src, char[] tmp)
273         {
274             char[] result;
275             if (src.length < tmp.length)
276                 result = tmp[0 .. src.length];
277             else
278             {
279                 char* p = cast(char*)core.stdc.stdlib.malloc(src.length * char.sizeof);
280                 assert(p);
281                 result = p[0 .. src.length];
282             }
283             result[] = src[];
284             return result;
285         }
286 
287         static void buffree(char[] buf, const char[] tmp)
288         {
289             if (buf.ptr != tmp.ptr)
290                 core.stdc.stdlib.free(buf.ptr);
291         }
292 
293         size_t end = *pend;
294         assert(start <= end);
295         assert(end <= buf.offset);
296 
297         // copy arg[] as it may be a slice into buf[] which may shift
298         version (unittest)
299             char[2] argtmp = void;
300         else
301             char[10] argtmp = void;
302         arg = bufdup(arg, argtmp);
303         scope (exit) buffree(arg, argtmp);
304 
305         /* First pass - replace $x where x is a digit or '+'
306          */
307         for (size_t u = start; u + 1 < end; )
308         {
309             char* p = cast(char *)buf.data.ptr;   // buf->data is not loop invariant
310 
311             /* Look for $x, but not $$x, and replace it with arg.
312              */
313             if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
314             {
315                 if (u > start && p[u - 1] == '$')
316                 {   // Don't expand $$x, but replace it with $x
317                     buf.remove(u - 1, 1);
318                     end--;
319                     u += 1; // now u is one past the x
320                     continue;
321                 }
322 
323                 auto c = p[u + 1];
324                 int n = (c == '+') ? -1 : c - '0';
325 
326                 char[] marg;
327                 if (n == 0)             // if $0
328                     marg = arg;
329                 else
330                     extractArgN(arg, marg, n, html);
331 
332                 if (marg.length == 0)
333                 {   // Just remove macro invocation
334                     //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
335                     buf.remove(u, 2);
336                     end -= 2;
337                 }
338                 else if (c == '+')      // if $+
339                 {
340                     // Replace '$+' with 'marg'
341                     //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
342                     buf.remove(u, 2);
343                     buf.insert(u, cast(ubyte[])marg);
344                     end += marg.length - 2;
345 
346                     // Scan replaced text for further expansion
347                     size_t mend = u + marg.length;
348                     expandImpl(u, &mend, null, pinuse);
349                     end += mend - (u + marg.length);
350                     u = mend;
351                 }
352                 else
353                 {
354                     // Replace '$n' with 'BLUEL marg BLUER'
355                     //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg);
356                     buf.data[u] = BLUEL[0];
357                     buf.data[u + 1] = BLUEL[1];
358                     buf.insert(u + 2, cast(ubyte[])marg);
359                     buf.insert(u + 2 + marg.length, cast(ubyte[])(BLUER[]));
360                     end += -2 + BLUEL.length + marg.length + BLUER.length;
361 
362                     // Scan replaced text for further expansion
363                     size_t mend = u + 2 + marg.length;
364                     expandImpl(u + 2, &mend, null, pinuse);
365                     end += mend - (u + 2 + marg.length);
366                     u = mend;
367                 }
368                 //printf("u = %d, end = %d\n", u, end);
369                 //printf("#%.*s#\n", end, buf.data.ptr);
370                 continue;
371             }
372 
373             u++;
374         }
375 
376         /* Second pass - replace other macros
377          */
378         for (size_t u = start; u + 4 < end; )
379         {
380             char *p = cast(char *)buf.data.ptr;   // buf->data is not loop invariant
381 
382             /* A valid start of macro expansion is $(c, where c is
383              * an id start character, and not $$(c.
384              */
385             if (p[u] == '$' &&
386                 p[u + 1] == '(' &&
387                 isIdStart(p+u+2))
388             {
389                 //printf("\tfound macro start '%c'\n", p[u + 2]);
390                 char[] name;
391 
392                 size_t v;
393                 /* Scan forward to find end of macro name and
394                  * beginning of macro argument (marg).
395                  */
396                 for (v = u + 2; v < end; v += utfStride(p+v))
397                 {
398 
399                     if (!isIdTail(p+v))
400                     {   // We've gone past the end of the macro name.
401                         name = p[u + 2 .. v];
402                         break;
403                     }
404                 }
405 
406                 char[] marg;
407                 v += extractArgN(p[v .. end], marg, 0, html);
408                 assert(v <= end);
409 
410                 if (v < end)
411                 {   // v is on the closing ')'
412                     if (u > start && p[u - 1] == '$')
413                     {   // Don't expand $$(NAME), but replace it with $(NAME)
414                         buf.remove(u - 1, 1);
415                         end--;
416                         u = v;      // now u is one past the closing ')'
417                         continue;
418                     }
419 
420                     auto pm = name in table;
421                     if (pm)
422                     {
423                         auto m = *pm;
424                         bool mIsInuse = isInuse(m);
425 
426                         //writefln("mIsInuse = %s, arg = '%s', marg =  '%s'", mIsInuse, arg, marg);
427                         if (mIsInuse && marg.length == 0)
428                         {   // Remove macro invocation because it expands to nothing
429                             buf.remove(u, v + 1 - u);
430                             end -= v + 1 - u;
431                         }
432                         else if (mIsInuse &&
433                                  (arg == marg ||
434                                   (arg.length + 4 == marg.length &&
435                                    marg[0] == BLUEL[0] &&
436                                    marg[1] == BLUEL[1] &&
437                                    arg == marg[2 .. marg.length - 2] &&
438                                    marg[marg.length - 2] == BLUER[0] &&
439                                    marg[marg.length - 1] == BLUER[1]
440                                  )
441                                 )
442                                )
443                         {   // Recursive expansion; just leave in place
444                             ;
445                         }
446                         else
447                         {
448                             //writefln("\tmacro '%s'(%s) = '%s'\n", name, marg, m);
449 
450                             // copy marg[] as it is a slice into buf which will shift
451                             version (unittest)
452                                 char[2] margtmp = void;
453                             else
454                                 char[10] margtmp = void;
455                             marg = bufdup(marg, margtmp);
456                             scope (exit) buffree(marg, margtmp);
457 
458                             // Insert replacement text
459                             buf.spread(v + 1, BLUEL.length + m.length + BLUER.length);
460                             buf.data[v + 1] = BLUEL[0];
461                             buf.data[v + 2] = BLUEL[1];
462                             buf.data[v + 3 .. v + 3 + m.length] = cast(ubyte[])m[];
463                             buf.data[v + 3 + m.length]     = BLUER[0];
464                             buf.data[v + 3 + m.length + 1] = BLUER[1];
465 
466                             end += 2 + m.length + 2;
467 
468                             // Scan replaced text for further expansion
469                             Inuse inuse;
470                             inuse.next = cast(Inuse *)pinuse;
471                             inuse.value = m;
472 
473                             size_t mend = v + 1 + 2+m.length+2;
474                             expandImpl(v + 1, &mend, marg, &inuse);
475                             end += mend - (v + 1 + 2+m.length+2);
476 
477                             buf.remove(u, v + 1 - u);
478                             end -= v + 1 - u;
479                             u += mend - (v + 1);
480 
481                             //printf("u = %d, end = %d\n", u, end);
482                             //printf("#%.*s#\n", end - u, &buf->data[u]);
483                             continue;
484                         }
485                     }
486                     else
487                     {
488                         // Replace $(NAME) with nothing
489                         buf.remove(u, v + 1 - u);
490                         end -= (v + 1 - u);
491                         continue;
492                     }
493                 }
494             }
495             u++;
496         }
497         *pend = end;
498         nest--;
499     }
500 
501     buf = new OutBuffer();
502     buf.write(text);
503     size_t end = buf.offset;
504     expandImpl(0, &end, null);
505     assert(end == buf.offset);
506 
507     /* Remove the blue paint
508      */
509     size_t j;
510     for (size_t i = 0; i < buf.offset; ++i)
511     {
512         char c = buf.data[i];
513         if (c == BLUEL[0] && i + 1 < buf.offset)
514             ++i;
515         else
516             buf.data[j++] = c;
517     }
518 
519     // Convert result to string
520     return cast(string)buf.data[0 .. j];
521 }
522 
523 ///
524 unittest
525 {
526     import std.stdio;
527 
528     string[string] table;
529     string s;
530 
531     s = expand("hello", table);
532     assert(s == "hello");
533 
534     table["ABC"] = "def";
535     s = expand("foo$(ABC)", table);
536     assert(s == "foodef");
537 
538     s = expand("foo$(DEF)", table);
539     assert(s == "foo");
540 
541     table["GHI"] = "";
542     s = expand("foo$(GHI)x", table);
543     assert(s == "foox");
544 
545     table["JKI"] = "$(JKI)";
546     s = expand("foo$(JKI)x", table);
547     assert(s == "foox");
548 
549     s = expand("foo$$(JKI)x", table);
550     assert(s == "foo$(JKI)x");
551 
552     s = expand("foo$(123)x", table);
553     assert(s == "foo$(123)x");
554 
555     table["M3"] = "$0";
556     s = expand("foo$(M3)x", table);
557     assert(s == "foox");
558 
559     s = expand("foo$(M3 $(M3 1) 1)x", table);
560     assert(s == "foo1 1x");
561 
562     table["M4"] = "$+";
563     s = expand("foo$(M4 1,2,3)x", table);
564     assert(s == "foo2,3x");
565 
566     table["M5"] = "$$1";
567     s = expand("foo$(M5 1,2,3)x", table);
568     assert(s == "foo$1x");
569 
570     table["M6"] = "$(M6 $0)";
571     s = expand("foo$(M6 1)x", table);
572 //writefln("s = '%s'", s);
573     assert(s == "foo$(M6 1)x");
574 }
575 
576 void remove(OutBuffer buf, size_t index, size_t nbytes)
577 {
578     //writefln("%s %s %s", index, nbytes, buf.offset);
579     assert(index + nbytes <= buf.offset);
580     for (size_t i = 0; i < buf.offset - (index + nbytes); ++i)
581     {
582         buf.data[index + i] = buf.data[index + i + nbytes];
583     }
584     buf.offset -= nbytes;
585 }
586 
587 void insert(OutBuffer buf, size_t index, ubyte[] data)
588 {
589     buf.spread(index, data.length);
590     for (size_t i = 0; i < data.length; ++i)
591     {
592         buf.data[index + i] = data[i];
593     }
594 }
595 
596 int isIdStart(const char *p)
597 {
598     char c = *p;
599     if (isalpha(c) || c == '_')
600         return 1;
601 /+ fix later
602     if (c >= 0x80)
603     {   size_t i = 0;
604         if (utf_decodeChar(p, 4, &i, &c))
605             return 0;   // ignore errors
606         if (std.uni.isAlpha(c))
607             return 1;
608     }
609 +/
610     return 0;
611 }
612 
613 int isIdTail(const char *p)
614 {
615     char c = *p;
616     if (isalnum(c) || c == '_')
617         return 1;
618     if (c >= 0x80)
619     {
620         return isIdStart(p);
621     }
622     return 0;
623 }
624 
625 int utfStride(const char *p)
626 {
627     char c = *p;
628     if (c < 0x80)
629         return 1;
630 
631     import core.bitop : bsr;
632     immutable msbs = 7 - bsr(~c);
633     if (msbs < 2 || msbs > 4)
634         return 1;                       // errors consume 1 character
635     return msbs;
636 }
637 
638 
639