1 module compressor;
2 
3 
4 import std.array;
5 import std.regex;
6 import std.string;
7 
8 
9 enum CompressOptions : uint {
10 	none					= 0,
11 	removeMultiSpaces 		= 1 << 0,	// replace multiple consecutive spaces with a single space
12 	removeLineBreaks		= 1 << 1,	// replace line breaks with a space
13 	removeHTMLComments 		= 1 << 2,	// remove any html comments - preserving conditional comments
14 	removeTagSpaces			= 1 << 3,	// remove unnecessary spaces around = in tags i.e. <div id = "foo"> -> <div id="foo">
15 	removeTagQuotes			= 1 << 4,	// remove unnecessary quotes around attribute values <div id="foo"> -> <div id=foo>
16 	removeTagSurroundSpaces	= 1 << 5,	// remove spaces around some tags i.e <ul> <li>
17 
18 	defaults = ~0U,
19 }
20 
21 
22 auto compress(string content, CompressOptions options) {
23 	if ((options & CompressOptions.removeLineBreaks) == 0) {
24 		content = content.replaceAll(linebreaks, "%%~LB~%%");
25 	}
26 
27 	if (options & CompressOptions.removeMultiSpaces) {
28 		content = content.replaceAll(multispaces, " ");
29 	}
30 
31 	if (options & CompressOptions.removeHTMLComments) {
32 		content = content.replaceAll(htmlComments, "");
33 	}
34 
35 	if (options & CompressOptions.removeTagSpaces) {
36 		content = content.replaceAll(tagSpaces, "$1=");
37 
38 		string removeEndSpaces(Captures!(string) capture) {
39 			// keep space if attribute is unquoted before trailing slash
40 			return ((capture[2][0] == '/') && (!matchAll(capture[1], tagSpacesEndLastQuote).empty)) ? (capture[1] ~ " " ~ capture[2]) : (capture[1] ~ capture[2]);
41 		}
42 
43 		content = content.replaceAll!removeEndSpaces(tagSpacesEnd);
44 	}
45 
46 	if (options & CompressOptions.removeTagQuotes) {
47 		string removeQuotes(Captures!(string) capture) {
48 			return (capture[3].strip.empty) ? ("=" ~ capture[2]) : ("=" ~ capture[2] ~ " " ~ capture[3]);
49 		}
50 
51 		content = content.replaceAll!removeQuotes(tagQuotes);
52 	}
53 
54 	if (options & CompressOptions.removeTagSurroundSpaces) {
55 		content = content.replaceAll(tagSurround, "$1");
56 	}
57 
58 	if ((options & CompressOptions.removeLineBreaks) == 0) {
59 		content = content.replace("%%~LB~%%", "\n");
60 	}
61 
62 	return content;
63 }
64 
65 
66 private __gshared {
67 	auto multispaces = regex(`\s+`, "i");
68 	auto linebreaks = regex(`(?:\r\n)|(?:\n)`, "i");
69 	auto htmlComments = regex(`<!---->|<!--[^\[].*?-->`, "i");
70 	auto tagSpaces = regex(`(\s\w+)\s*=\s*(?=[^<]*?>)`, "i");
71 	auto tagSpacesEnd = regex(`(<\w+(?:\s+[a-z0-9-_]+(?:\s*=\s*(?:(?:[a-z0-9-_]+)|(?:"[^"]*")|(?:'[^']*')))?)*)(?:\s+?)(/?>)`, "i");
72 	auto tagSpacesEndLastQuote = regex(`"=\s*[a-z0-9-_]+$"`, "i");
73 	auto tagQuotes = regex(`\s*=\s*(["'])([a-z0-9-_]+?)\1(/?)(?=[^<]*?>)`, "i");
74 	auto tagSurround = regex(`\s*(</?(?:html|head|body|br|p|div|center|dl|form|hr|ol|ul|table|tbody|tr|td|th|tfoot|thead)(?:>|[\s/][^>]*>))\s*`, "i");
75 	auto tagInterSpace = regex(`>\s+<`, "i");
76 }