当前位置: 编程技术>.net/c#/asp.net
C#读取HTML文件内容写入记事本的代码
来源: 互联网 发布时间:2014-08-30
本文导语: 代码如下: try { int totalFile = 0; //string dirPath = @"E:filesBSC6810 alarm"; if (this.textBox1.Text.Trim() == "") { MessageBox.Show("请输入HTML文件路径!"); } else { string dirPath = this.textBox1.Text.Trim(); ...
代码如下:
try { int totalFile = 0; //string dirPath = @"E:filesBSC6810 alarm"; if (this.textBox1.Text.Trim() == "") { MessageBox.Show("请输入HTML文件路径!"); } else { string dirPath = this.textBox1.Text.Trim(); if (!dirPath.Substring(dirPath.Length - 1).Contains("\")) { dirPath = dirPath+"\"; } StreamWriter sw; DirectoryInfo dirInfo = new DirectoryInfo(dirPath); FileInfo[] files = dirInfo.GetFiles(); string filename = dirPath + "告警经验库信息.txt"; if (File.Exists(filename)) { sw = File.AppendText(filename); } else { sw = File.CreateText(filename); } foreach (FileInfo fileinfo in files) { if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件 { totalFile = totalFile + 1; WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name); WebResponse myWebResponse = myWebRequest.GetResponse(); Stream myStream = myWebResponse.GetResponseStream(); Encoding encode = System.Text.Encoding.GetEncoding("gb2312"); StreamReader myStreamReader = new StreamReader(myStream, encode); string strhtml = myStreamReader.ReadToEnd(); myWebResponse.Close(); string stroutput = strhtml; Regex regex = new Regex(@"]+>|]+>");//去掉HTML标记的正则表达式 string tmpStr = "([^