Removing extra whitespace from generated HTML in M

2019-01-21 07:51发布

I have an MVC application view that is generating quite a large HTML table of values (>20MB).

I am compressing the view in the controller using a compression filter

 internal class CompressFilter : ActionFilterAttribute
 {
     public override void OnActionExecuting(ActionExecutingContext filterContext)
     {
         HttpRequestBase request = filterContext.HttpContext.Request;
         string acceptEncoding = request.Headers["Accept-Encoding"];
         if (string.IsNullOrEmpty(acceptEncoding))
             return;
         acceptEncoding = acceptEncoding.ToUpperInvariant();
         HttpResponseBase response = filterContext.HttpContext.Response;
         if (acceptEncoding.Contains("GZIP"))
         {
             response.AppendHeader("Content-encoding", "gzip");
             response.Filter = new GZipStream(response.Filter, CompressionMode.Compress);
         }
         else if (acceptEncoding.Contains("DEFLATE"))
         {
             response.AppendHeader("Content-encoding", "deflate");
             response.Filter = new DeflateStream(response.Filter, CompressionMode.Compress);
         }
     }
 }

Is there a way to also eliminate the (quite large) amount of redundant whitespace generated in the view before I run the compress filter (to reduce compression workload and size)?

EDIT: I got it working using the WhiteSpaceFilter technique suggested by Womp below.

For interest here's the results, as analysed by Firebug:

1) No Compression, no whitespace strip - 21MB, 2.59 minutes
2) With GZIP compression, no whitespace strip - 2MB, 17.59s
3) With GZIP compression, whitespace strip - 558kB, 12.77s

So certainly worth it.

8条回答
女痞
2楼-- · 2019-01-21 08:44

This guy wrote a neat little whitespace compactor that simply runs a fast block copy of your bytes through a regular expression to strip out blobs of space. He wrote it as an http module, but you could take the 7 lines of workhorse code out of it and plop it into your function.

查看更多
叛逆
3楼-- · 2019-01-21 08:45
#region Stream filter
class StringFilterStream : Stream
{
  private Stream _sink;
  private Func<string, string> _filter;

  public StringFilterStream(Stream sink, Func<string, string> filter) {
    _sink = sink;
    _filter = filter;
  }

  #region Mixin Properties/Methods
  public override bool CanRead { get { return true; } }
  public override bool CanSeek { get { return true; } }
  public override bool CanWrite { get { return true; } }
  public override void Flush() { _sink.Flush(); }
  public override long Length { get { return 0; } }
  private long _position;
  public override long Position {
    get { return _position; }
    set { _position = value; }
  }
  public override int Read(byte[] buffer, int offset, int count) {
    return _sink.Read(buffer, offset, count);
  }
  public override long Seek(long offset, SeekOrigin origin) {
    return _sink.Seek(offset, origin);
  }
  public override void SetLength(long value) {
    _sink.SetLength(value);
  }
  public override void Close() {
    _sink.Close();
  }
  #endregion

  public override void Write(byte[] buffer, int offset, int count) {
    // intercept the data and convert to string
    byte[] data = new byte[count];
    Buffer.BlockCopy(buffer, offset, data, 0, count);
    string s = Encoding.Default.GetString(buffer);

    // apply the filter
    s = _filter(s);

    // write the data back to stream
    byte[] outdata = Encoding.Default.GetBytes(s);
    _sink.Write(outdata, 0, outdata.GetLength(0));
  }
}
#endregion

public enum WebWhitespaceFilterContentType
{
  Xml = 0, Css = 1, Javascript = 2
}
public class WebWhitespaceFilterAttribute : ActionFilterAttribute
{
  private WebWhitespaceFilterContentType _contentType;

  public WebWhitespaceFilterAttribute() {
    _contentType = WebWhitespaceFilterContentType.Xml;
  }
  public WebWhitespaceFilterAttribute(WebWhitespaceFilterContentType contentType) {
    _contentType = contentType;
  }

  public override void OnActionExecuting(ActionExecutingContext filterContext) {

    var request = filterContext.HttpContext.Request;
    var response = filterContext.HttpContext.Response;

    switch (_contentType) {
      case WebWhitespaceFilterContentType.Xml:

        response.Filter = new StringFilterStream(response.Filter, s => {
          s = Regex.Replace(s, @"\s+", " ");
          s = Regex.Replace(s, @"\s*\n\s*", "\n");
          s = Regex.Replace(s, @"\s*\>\s*\<\s*", "><");
          // single-line doctype must be preserved
          var firstEndBracketPosition = s.IndexOf(">");
          if (firstEndBracketPosition >= 0) {
            s = s.Remove(firstEndBracketPosition, 1);
            s = s.Insert(firstEndBracketPosition, ">\n");
          }
          return s;
        });
        break;

      case WebWhitespaceFilterContentType.Css:
      case WebWhitespaceFilterContentType.Javascript:

        response.Filter = new StringFilterStream(response.Filter, s => {
          s = Regex.Replace(s, @"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/", "");
          s = Regex.Replace(s, @"\s+", " ");
          s = Regex.Replace(s, @"\s*{\s*", "{");
          s = Regex.Replace(s, @"\s*}\s*", "}");
          s = Regex.Replace(s, @"\s*;\s*", ";");
          return s;
        });
        break;
    }
  }
}
查看更多
登录 后发表回答