首页 .NET/Web ASP.NET C#之清除html标签

ASP.NET C#之清除html标签

作者:胡同里的砖头 围观群众:1811 更新于:2013-12-19

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace Common.Common
{
   public class DeleteHtml
    {
       public static string DelHTML(string Htmlstring)//将HTML去除
       {
           #region
           //删除脚本

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           //删除HTML

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"-->", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<!--.*", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");

           //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");

 

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(amp|#38);", "&", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(lt|#60);", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(gt|#62);", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&#(\d+);", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&ldquo;", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&rdquo;", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
           Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @";", ";", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

           Htmlstring.Replace("<", "");

           Htmlstring.Replace(">", "");

           Htmlstring.Replace("\r\n", "");

           //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
           #endregion


           return Htmlstring;

       }


    }
}

  • 本文标题: ASP.NET C#之清除html标签
  • 文章分类:【.NET/Web】
  • 非特殊说明,本文版权归【胡同里的砖头】个人博客 所有,转载请注明出处.
留言评论
站点声明:
1、本站【胡同里的砖头】个人博客,借鉴网上一些博客模板,取其各优点模块自行拼装开发,本博客开发纯属个人爱好。
2、所有笔记提供给广大用户交流使用,可转载,可复制,纯个人开发所遇问题锦集记录使用
Copyright © huzlblog.com All Rights Reserved. 备案号:苏ICP备2021056683号-8