1 2 /** 3 * Text macro processor 4 * 5 * Copyright: Copyright Digital Mars 1999-2015 6 * License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Authors: $(WEB digitalmars.com, Walter Bright) 8 * Source: $(SARGONSRC src/sargon/_textmac.d) 9 */ 10 11 module sargon.textmac; 12 13 import core.stdc.ctype; 14 import std.outbuffer; 15 16 private: 17 18 enum ubyte[2] BLUEL = [0xFF, '{']; 19 enum ubyte[2] BLUER = [0xFF, '}']; 20 21 /********************************************************** 22 * Given buffer p[], extract argument marg[]. 23 * Params: 24 * n = 0: get entire argument 25 * 1..9: get nth argument 26 * -1: get 2nd through end 27 * html = skip over html comments and tags 28 * Returns: 29 * number of characters from start of p[] to end of argument 30 */ 31 32 size_t extractArgN(T)(T[] p, out T[] marg, int n, bool html = false) pure nothrow @nogc @safe 33 { 34 /* Scan forward for matching right parenthesis. 35 * Nest parentheses. 36 * Skip over 0xFF { ... 0xFF } blue paint 37 * Skip over "..." and '...' strings inside HTML tags. 38 * Skip over <!-- ... --> comments. 39 * Skip over previous macro insertions 40 */ 41 size_t end = p.length; 42 uint parens = 1; // inside ( ), can nest 43 char instring = 0; // either 0, ' or " 44 bool incomment = false; // in <!-- ... --> 45 bool intag = false; 46 uint inexp = 0; // inside 0xFF { ... 0xFF }, can nest 47 uint argn = 0; 48 49 size_t v = 0; 50 51 Largstart: 52 // Skip first space, if any, to find the start of the macro argument 53 if (n != 1 && v < end && isspace(p[v])) 54 v++; 55 56 auto vstart = v; 57 58 for (; v < end; v++) 59 { char c = p[v]; 60 61 switch (c) 62 { 63 case ',': 64 if (!inexp && !instring && !incomment && parens == 1) 65 { 66 argn++; 67 if (argn == 1 && n == -1) 68 { v++; 69 goto Largstart; 70 } 71 if (argn == n) 72 break; 73 if (argn + 1 == n) 74 { v++; 75 goto Largstart; 76 } 77 } 78 continue; 79 80 case '(': 81 if (!inexp && !instring && !incomment) 82 parens++; 83 continue; 84 85 case ')': 86 if (!inexp && !instring && !incomment && --parens == 0) 87 { 88 break; 89 } 90 continue; 91 92 case '"': 93 case '\'': 94 if (!inexp && !incomment && intag) 95 { 96 if (c == instring) 97 instring = 0; 98 else if (!instring) 99 instring = c; 100 } 101 continue; 102 103 case '<': 104 if (html && !inexp && !instring && !incomment) 105 { 106 if (v + 6 < end && 107 p[v + 1] == '!' && 108 p[v + 2] == '-' && 109 p[v + 3] == '-') 110 { 111 incomment = true; 112 v += 3; 113 } 114 else if (v + 2 < end && 115 isalpha(p[v + 1])) 116 intag = true; 117 } 118 continue; 119 120 case '>': 121 if (!inexp) 122 intag = false; 123 continue; 124 125 case '-': 126 if (!inexp && 127 !instring && 128 incomment && 129 v + 2 < end && 130 p[v + 1] == '-' && 131 p[v + 2] == '>') 132 { 133 incomment = false; 134 v += 2; 135 } 136 continue; 137 138 case BLUEL[0]: 139 if (v + 1 < end) 140 { 141 if (p[v + 1] == BLUEL[1]) 142 inexp++; 143 else if (p[v + 1] == BLUER[1]) 144 inexp--; 145 } 146 continue; 147 148 default: 149 continue; 150 } 151 break; 152 } 153 if (argn == 0 && n == -1) 154 marg = p[v .. end]; 155 else 156 marg = p[vstart .. v]; 157 //printf("extractArg%d('%.*s') = '%.*s'\n", n, end, p, *pmarglen, *pmarg); 158 return v; 159 } 160 161 /// 162 unittest 163 { 164 import std.stdio; 165 166 size_t v; 167 string marg; 168 169 v = extractArgN(" hello", marg, 0); 170 assert(marg == "hello" && v == 6); 171 172 v = extractArgN(" hello", marg, 1); 173 assert(marg == " hello" && v == 6); 174 175 v = extractArgN(" hello", marg, 2); 176 assert(marg == "hello" && v == 6); 177 178 v = extractArgN(" hello", marg, -1); 179 assert(marg == "" && v == 6); 180 181 v = extractArgN(" hello)x", marg, 0); 182 assert(marg == "hello" && v == 6); 183 184 v = extractArgN(" hell(o)x", marg, 0); 185 assert(marg == "hell(o)x" && v == 9); 186 187 v = extractArgN(" he,l,lo", marg, 0); 188 assert(marg == "he,l,lo" && v == 8); 189 190 v = extractArgN(" he,l,lo", marg, 1); 191 assert(marg == " he" && v == 3); 192 193 v = extractArgN(" he, l, lo", marg, 2); 194 assert(marg == "l" && v == 6); 195 196 v = extractArgN(" he, l, lo", marg, 3); 197 assert(marg == "lo" && v == 10); 198 199 v = extractArgN(" he, l, lo", marg, 4); 200 assert(marg == "he, l, lo" && v == 10); 201 202 v = extractArgN(" he, l, lo", marg, -1); 203 assert(marg == "l, lo" && v == 10); 204 205 v = extractArgN(" he<!--, -->", marg, 1, true); 206 assert(marg == " he<!--, -->" && v == 12); 207 208 v = extractArgN(" he<tag ',' \",\">", marg, 1, true); 209 assert(marg == " he<tag ',' \",\">" && v == 16); 210 211 v = extractArgN(" he\xFF{ , \xFF}a", marg, 1); 212 //writefln("v = %s, marg = '%s'", v, marg); 213 assert(marg == " he\xFF{ , \xFF}a" && v == 11); 214 } 215 216 217 /***************************************************** 218 * Expand macro. 219 * 220 * The macro processor is the same one used in Ddoc. 221 * 222 * Params: 223 * text = source text to expand 224 * table = table of name=value macro definitions 225 * html = true if recognize HTML tags and comments 226 * 227 * Returns: 228 * The source text after macro expansion. 229 * The return string is GC allocated. 230 */ 231 232 233 public 234 235 string expand(const(char)[] text, string[string] table, bool html = false) 236 { 237 //import std.stdio; 238 import core.stdc.stdlib : malloc, free; 239 240 OutBuffer buf; 241 242 void expandImpl(size_t start, size_t *pend, char[] arg, void *pinuse = null) 243 { 244 version (none) 245 { 246 writefln("expand(buf[%s..%s], arg = '%s')\n", start, *pend, arg); 247 writefln("Buf is: '%s'", cast(string)buf.data[start .. *pend]); 248 } 249 250 static int nest; 251 if (nest > 100) // limit recursive expansion 252 return; 253 nest++; 254 255 static struct Inuse 256 { 257 Inuse* next; 258 string value; 259 } 260 261 bool isInuse(string value) 262 { 263 for (Inuse* p = cast(Inuse*)pinuse; p; p = p.next) 264 { 265 if (p.value is value) 266 return true; 267 } 268 return false; 269 } 270 271 // Alloc/free a temporary buf that uses a stack buffer and overflows to malloc/free 272 static char[] bufdup(const(char)[] src, char[] tmp) 273 { 274 char[] result; 275 if (src.length < tmp.length) 276 result = tmp[0 .. src.length]; 277 else 278 { 279 char* p = cast(char*)core.stdc.stdlib.malloc(src.length * char.sizeof); 280 assert(p); 281 result = p[0 .. src.length]; 282 } 283 result[] = src[]; 284 return result; 285 } 286 287 static void buffree(char[] buf, const char[] tmp) 288 { 289 if (buf.ptr != tmp.ptr) 290 core.stdc.stdlib.free(buf.ptr); 291 } 292 293 size_t end = *pend; 294 assert(start <= end); 295 assert(end <= buf.offset); 296 297 // copy arg[] as it may be a slice into buf[] which may shift 298 version (unittest) 299 char[2] argtmp = void; 300 else 301 char[10] argtmp = void; 302 arg = bufdup(arg, argtmp); 303 scope (exit) buffree(arg, argtmp); 304 305 /* First pass - replace $x where x is a digit or '+' 306 */ 307 for (size_t u = start; u + 1 < end; ) 308 { 309 char* p = cast(char *)buf.data.ptr; // buf->data is not loop invariant 310 311 /* Look for $x, but not $$x, and replace it with arg. 312 */ 313 if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+')) 314 { 315 if (u > start && p[u - 1] == '$') 316 { // Don't expand $$x, but replace it with $x 317 buf.remove(u - 1, 1); 318 end--; 319 u += 1; // now u is one past the x 320 continue; 321 } 322 323 auto c = p[u + 1]; 324 int n = (c == '+') ? -1 : c - '0'; 325 326 char[] marg; 327 if (n == 0) // if $0 328 marg = arg; 329 else 330 extractArgN(arg, marg, n, html); 331 332 if (marg.length == 0) 333 { // Just remove macro invocation 334 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg); 335 buf.remove(u, 2); 336 end -= 2; 337 } 338 else if (c == '+') // if $+ 339 { 340 // Replace '$+' with 'marg' 341 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg); 342 buf.remove(u, 2); 343 buf.insert(u, cast(ubyte[])marg); 344 end += marg.length - 2; 345 346 // Scan replaced text for further expansion 347 size_t mend = u + marg.length; 348 expandImpl(u, &mend, null, pinuse); 349 end += mend - (u + marg.length); 350 u = mend; 351 } 352 else 353 { 354 // Replace '$n' with 'BLUEL marg BLUER' 355 //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg); 356 buf.data[u] = BLUEL[0]; 357 buf.data[u + 1] = BLUEL[1]; 358 buf.insert(u + 2, cast(ubyte[])marg); 359 buf.insert(u + 2 + marg.length, cast(ubyte[])(BLUER[])); 360 end += -2 + BLUEL.length + marg.length + BLUER.length; 361 362 // Scan replaced text for further expansion 363 size_t mend = u + 2 + marg.length; 364 expandImpl(u + 2, &mend, null, pinuse); 365 end += mend - (u + 2 + marg.length); 366 u = mend; 367 } 368 //printf("u = %d, end = %d\n", u, end); 369 //printf("#%.*s#\n", end, buf.data.ptr); 370 continue; 371 } 372 373 u++; 374 } 375 376 /* Second pass - replace other macros 377 */ 378 for (size_t u = start; u + 4 < end; ) 379 { 380 char *p = cast(char *)buf.data.ptr; // buf->data is not loop invariant 381 382 /* A valid start of macro expansion is $(c, where c is 383 * an id start character, and not $$(c. 384 */ 385 if (p[u] == '$' && 386 p[u + 1] == '(' && 387 isIdStart(p+u+2)) 388 { 389 //printf("\tfound macro start '%c'\n", p[u + 2]); 390 char[] name; 391 392 size_t v; 393 /* Scan forward to find end of macro name and 394 * beginning of macro argument (marg). 395 */ 396 for (v = u + 2; v < end; v += utfStride(p+v)) 397 { 398 399 if (!isIdTail(p+v)) 400 { // We've gone past the end of the macro name. 401 name = p[u + 2 .. v]; 402 break; 403 } 404 } 405 406 char[] marg; 407 v += extractArgN(p[v .. end], marg, 0, html); 408 assert(v <= end); 409 410 if (v < end) 411 { // v is on the closing ')' 412 if (u > start && p[u - 1] == '$') 413 { // Don't expand $$(NAME), but replace it with $(NAME) 414 buf.remove(u - 1, 1); 415 end--; 416 u = v; // now u is one past the closing ')' 417 continue; 418 } 419 420 auto pm = name in table; 421 if (pm) 422 { 423 auto m = *pm; 424 bool mIsInuse = isInuse(m); 425 426 //writefln("mIsInuse = %s, arg = '%s', marg = '%s'", mIsInuse, arg, marg); 427 if (mIsInuse && marg.length == 0) 428 { // Remove macro invocation because it expands to nothing 429 buf.remove(u, v + 1 - u); 430 end -= v + 1 - u; 431 } 432 else if (mIsInuse && 433 (arg == marg || 434 (arg.length + 4 == marg.length && 435 marg[0] == BLUEL[0] && 436 marg[1] == BLUEL[1] && 437 arg == marg[2 .. marg.length - 2] && 438 marg[marg.length - 2] == BLUER[0] && 439 marg[marg.length - 1] == BLUER[1] 440 ) 441 ) 442 ) 443 { // Recursive expansion; just leave in place 444 ; 445 } 446 else 447 { 448 //writefln("\tmacro '%s'(%s) = '%s'\n", name, marg, m); 449 450 // copy marg[] as it is a slice into buf which will shift 451 version (unittest) 452 char[2] margtmp = void; 453 else 454 char[10] margtmp = void; 455 marg = bufdup(marg, margtmp); 456 scope (exit) buffree(marg, margtmp); 457 458 // Insert replacement text 459 buf.spread(v + 1, BLUEL.length + m.length + BLUER.length); 460 buf.data[v + 1] = BLUEL[0]; 461 buf.data[v + 2] = BLUEL[1]; 462 buf.data[v + 3 .. v + 3 + m.length] = cast(ubyte[])m[]; 463 buf.data[v + 3 + m.length] = BLUER[0]; 464 buf.data[v + 3 + m.length + 1] = BLUER[1]; 465 466 end += 2 + m.length + 2; 467 468 // Scan replaced text for further expansion 469 Inuse inuse; 470 inuse.next = cast(Inuse *)pinuse; 471 inuse.value = m; 472 473 size_t mend = v + 1 + 2+m.length+2; 474 expandImpl(v + 1, &mend, marg, &inuse); 475 end += mend - (v + 1 + 2+m.length+2); 476 477 buf.remove(u, v + 1 - u); 478 end -= v + 1 - u; 479 u += mend - (v + 1); 480 481 //printf("u = %d, end = %d\n", u, end); 482 //printf("#%.*s#\n", end - u, &buf->data[u]); 483 continue; 484 } 485 } 486 else 487 { 488 // Replace $(NAME) with nothing 489 buf.remove(u, v + 1 - u); 490 end -= (v + 1 - u); 491 continue; 492 } 493 } 494 } 495 u++; 496 } 497 *pend = end; 498 nest--; 499 } 500 501 buf = new OutBuffer(); 502 buf.write(text); 503 size_t end = buf.offset; 504 expandImpl(0, &end, null); 505 assert(end == buf.offset); 506 507 /* Remove the blue paint 508 */ 509 size_t j; 510 for (size_t i = 0; i < buf.offset; ++i) 511 { 512 char c = buf.data[i]; 513 if (c == BLUEL[0] && i + 1 < buf.offset) 514 ++i; 515 else 516 buf.data[j++] = c; 517 } 518 519 // Convert result to string 520 return cast(string)buf.data[0 .. j]; 521 } 522 523 /// 524 unittest 525 { 526 import std.stdio; 527 528 string[string] table; 529 string s; 530 531 s = expand("hello", table); 532 assert(s == "hello"); 533 534 table["ABC"] = "def"; 535 s = expand("foo$(ABC)", table); 536 assert(s == "foodef"); 537 538 s = expand("foo$(DEF)", table); 539 assert(s == "foo"); 540 541 table["GHI"] = ""; 542 s = expand("foo$(GHI)x", table); 543 assert(s == "foox"); 544 545 table["JKI"] = "$(JKI)"; 546 s = expand("foo$(JKI)x", table); 547 assert(s == "foox"); 548 549 s = expand("foo$$(JKI)x", table); 550 assert(s == "foo$(JKI)x"); 551 552 s = expand("foo$(123)x", table); 553 assert(s == "foo$(123)x"); 554 555 table["M3"] = "$0"; 556 s = expand("foo$(M3)x", table); 557 assert(s == "foox"); 558 559 s = expand("foo$(M3 $(M3 1) 1)x", table); 560 assert(s == "foo1 1x"); 561 562 table["M4"] = "$+"; 563 s = expand("foo$(M4 1,2,3)x", table); 564 assert(s == "foo2,3x"); 565 566 table["M5"] = "$$1"; 567 s = expand("foo$(M5 1,2,3)x", table); 568 assert(s == "foo$1x"); 569 570 table["M6"] = "$(M6 $0)"; 571 s = expand("foo$(M6 1)x", table); 572 //writefln("s = '%s'", s); 573 assert(s == "foo$(M6 1)x"); 574 } 575 576 void remove(OutBuffer buf, size_t index, size_t nbytes) 577 { 578 //writefln("%s %s %s", index, nbytes, buf.offset); 579 assert(index + nbytes <= buf.offset); 580 for (size_t i = 0; i < buf.offset - (index + nbytes); ++i) 581 { 582 buf.data[index + i] = buf.data[index + i + nbytes]; 583 } 584 buf.offset -= nbytes; 585 } 586 587 void insert(OutBuffer buf, size_t index, ubyte[] data) 588 { 589 buf.spread(index, data.length); 590 for (size_t i = 0; i < data.length; ++i) 591 { 592 buf.data[index + i] = data[i]; 593 } 594 } 595 596 int isIdStart(const char *p) 597 { 598 char c = *p; 599 if (isalpha(c) || c == '_') 600 return 1; 601 /+ fix later 602 if (c >= 0x80) 603 { size_t i = 0; 604 if (utf_decodeChar(p, 4, &i, &c)) 605 return 0; // ignore errors 606 if (std.uni.isAlpha(c)) 607 return 1; 608 } 609 +/ 610 return 0; 611 } 612 613 int isIdTail(const char *p) 614 { 615 char c = *p; 616 if (isalnum(c) || c == '_') 617 return 1; 618 if (c >= 0x80) 619 { 620 return isIdStart(p); 621 } 622 return 0; 623 } 624 625 int utfStride(const char *p) 626 { 627 char c = *p; 628 if (c < 0x80) 629 return 1; 630 631 import core.bitop : bsr; 632 immutable msbs = 7 - bsr(~c); 633 if (msbs < 2 || msbs > 4) 634 return 1; // errors consume 1 character 635 return msbs; 636 } 637 638 639