1 module compressor; 2 3 4 import std.array; 5 import std.regex; 6 import std.string; 7 8 9 enum CompressOptions : uint { 10 none = 0, 11 removeMultiSpaces = 1 << 0, // replace multiple consecutive spaces with a single space 12 removeLineBreaks = 1 << 1, // replace line breaks with a space 13 removeHTMLComments = 1 << 2, // remove any html comments - preserving conditional comments 14 removeTagSpaces = 1 << 3, // remove unnecessary spaces around = in tags i.e. <div id = "foo"> -> <div id="foo"> 15 removeTagQuotes = 1 << 4, // remove unnecessary quotes around attribute values <div id="foo"> -> <div id=foo> 16 removeTagSurroundSpaces = 1 << 5, // remove spaces around some tags i.e <ul> <li> 17 18 defaults = ~0U, 19 } 20 21 22 auto compress(string content, CompressOptions options) { 23 if ((options & CompressOptions.removeLineBreaks) == 0) { 24 content = content.replaceAll(linebreaks, "%%~LB~%%"); 25 } 26 27 if (options & CompressOptions.removeMultiSpaces) { 28 content = content.replaceAll(multispaces, " "); 29 } 30 31 if (options & CompressOptions.removeHTMLComments) { 32 content = content.replaceAll(htmlComments, ""); 33 } 34 35 if (options & CompressOptions.removeTagSpaces) { 36 content = content.replaceAll(tagSpaces, "$1="); 37 38 string removeEndSpaces(Captures!(string) capture) { 39 // keep space if attribute is unquoted before trailing slash 40 return ((capture[2][0] == '/') && (!matchAll(capture[1], tagSpacesEndLastQuote).empty)) ? (capture[1] ~ " " ~ capture[2]) : (capture[1] ~ capture[2]); 41 } 42 43 content = content.replaceAll!removeEndSpaces(tagSpacesEnd); 44 } 45 46 if (options & CompressOptions.removeTagQuotes) { 47 string removeQuotes(Captures!(string) capture) { 48 return (capture[3].strip.empty) ? ("=" ~ capture[2]) : ("=" ~ capture[2] ~ " " ~ capture[3]); 49 } 50 51 content = content.replaceAll!removeQuotes(tagQuotes); 52 } 53 54 if (options & CompressOptions.removeTagSurroundSpaces) { 55 content = content.replaceAll(tagSurround, "$1"); 56 } 57 58 if ((options & CompressOptions.removeLineBreaks) == 0) { 59 content = content.replace("%%~LB~%%", "\n"); 60 } 61 62 return content; 63 } 64 65 66 private __gshared { 67 auto multispaces = regex(`\s+`, "i"); 68 auto linebreaks = regex(`(?:\r\n)|(?:\n)`, "i"); 69 auto htmlComments = regex(`<!---->|<!--[^\[].*?-->`, "i"); 70 auto tagSpaces = regex(`(\s\w+)\s*=\s*(?=[^<]*?>)`, "i"); 71 auto tagSpacesEnd = regex(`(<\w+(?:\s+[a-z0-9-_]+(?:\s*=\s*(?:(?:[a-z0-9-_]+)|(?:"[^"]*")|(?:'[^']*')))?)*)(?:\s+?)(/?>)`, "i"); 72 auto tagSpacesEndLastQuote = regex(`"=\s*[a-z0-9-_]+$"`, "i"); 73 auto tagQuotes = regex(`\s*=\s*(["'])([a-z0-9-_]+?)\1(/?)(?=[^<]*?>)`, "i"); 74 auto tagSurround = regex(`\s*(</?(?:html|head|body|br|p|div|center|dl|form|hr|ol|ul|table|tbody|tr|td|th|tfoot|thead)(?:>|[\s/][^>]*>))\s*`, "i"); 75 auto tagInterSpace = regex(`>\s+<`, "i"); 76 }