博客園博客PDF生成器
周末寫了一個博客園博客PDF生成器,由于博客園文件上傳大小的限制,我把源代碼放在CSDN上了(想信大家都有帳號哈),如果沒有帳號的請留下郵箱,我會盡快發給你,當然如果哪位朋友能幫忙把源代碼上傳到博客園上更好:博客園博客PDF生成器
廢話不多說,直接看生成后的PDF效果哈:

博客中圖片效果:

代碼比較簡單,這里先簡單說一下思路,先通過博客地址取得該博客的RSS信息,這是一個XML文件,把源碼存在本地,然后解析這個XML文件,從中取出需要的信息,再用iTextSharp這個DLL來操作PDF,從面生成PDF文檔。
下面只帖出幾個主要的類,大家有興趣可以下載源代碼看:
實體類channel,類屬性是從XML文件中取得的:
實體類:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BlogsConvert
{
public class channel
{
private string title;
private string link;
private string description;
private string language;
private DateTime lastBuildDate;
private DateTime pubDate;
private int ttl;
public string Title
{
get { return title; }
set { title = value; }
}
public string Link
{
get { return link; }
set { link = value; }
}
public string Description
{
get { return description; }
set { description = value; }
}
public string Language
{
get { return language; }
set { language = value; }
}
public DateTime LastBuildDate
{
get { return lastBuildDate; }
set { lastBuildDate = value; }
}
public DateTime PubDate
{
get { return pubDate; }
set { pubDate = value; }
}
public int Ttl
{
get { return ttl; }
set { ttl = value; }
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BlogsConvert
{
public class channel
{
private string title;
private string link;
private string description;
private string language;
private DateTime lastBuildDate;
private DateTime pubDate;
private int ttl;
public string Title
{
get { return title; }
set { title = value; }
}
public string Link
{
get { return link; }
set { link = value; }
}
public string Description
{
get { return description; }
set { description = value; }
}
public string Language
{
get { return language; }
set { language = value; }
}
public DateTime LastBuildDate
{
get { return lastBuildDate; }
set { lastBuildDate = value; }
}
public DateTime PubDate
{
get { return pubDate; }
set { pubDate = value; }
}
public int Ttl
{
get { return ttl; }
set { ttl = value; }
}
}
}
實體類item(屬性來自XML文件):
實體類:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BlogsConvert
{
public class item
{
private string title;
private string link;
private string dc_creator;
private string author;
private DateTime pubDate;
private string guid;
private string description;
public string Title
{
get { return title; }
set { title = value; }
}
public string Link
{
get { return link; }
set { link = value; }
}
public string Dc_creator
{
get { return dc_creator; }
set { dc_creator = value; }
}
public string Author
{
get { return author; }
set { author = value; }
}
public DateTime PubDate
{
get { return pubDate; }
set { pubDate = value; }
}
public string Guid
{
get { return guid; }
set { guid = value; }
}
public string Description
{
get { return description; }
set { description = value; }
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BlogsConvert
{
public class item
{
private string title;
private string link;
private string dc_creator;
private string author;
private DateTime pubDate;
private string guid;
private string description;
public string Title
{
get { return title; }
set { title = value; }
}
public string Link
{
get { return link; }
set { link = value; }
}
public string Dc_creator
{
get { return dc_creator; }
set { dc_creator = value; }
}
public string Author
{
get { return author; }
set { author = value; }
}
public DateTime PubDate
{
get { return pubDate; }
set { pubDate = value; }
}
public string Guid
{
get { return guid; }
set { guid = value; }
}
public string Description
{
get { return description; }
set { description = value; }
}
}
}
從XML文件中提取博客信息類:
代碼
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;
namespace BlogsConvert
{
public class BlogsInfo
{
/// <summary>
/// 從XML文件中取得博主信息
/// </summary>
/// <param name="xmlPath">xml文件路徑</param>
/// <returns>channel</returns>
public channel GetChannel(string xmlPath)
{
channel cha=new channel();
//解析XML文件
XmlDocument myXml = new XmlDocument();
myXml.Load(xmlPath);
XmlNode blogs = myXml.DocumentElement;
XmlNode node=blogs.ChildNodes[0];
if (node.Name == "channel")
{
foreach (XmlNode chanode in node.ChildNodes)
{
switch (chanode.Name)
{
case "title":
cha.Title = chanode.InnerText;
break;
case "link":
cha.Link = chanode.InnerText;
break;
case "description":
cha.Description = chanode.InnerText;
break;
case "language":
cha.Language = chanode.InnerText;
break;
case "lastBuildDate":
cha.LastBuildDate = DateTime.Parse(chanode.InnerText);
break;
case "pubDate":
cha.PubDate = DateTime.Parse(chanode.InnerText);
break;
case "ttl":
cha.Ttl = int.Parse(chanode.InnerText);
break;
}
if (chanode.Name == "item")
break;
}
}
if (cha.Title.Trim()!="")
return cha;
return null;
}
/// <summary>
/// 從XML文件中取得文章信息
/// </summary>
/// <param name="xmlPath">xml文件路徑</param>
/// <returns>IList</returns>
public IList<item> GetItems(string xmlPath)
{
return GetItems(xmlPath,"");
}
/// <summary>
/// 從XML文件中取得文章信息
/// </summary>
/// <param name="xmlPath">xml文件路徑</param>
/// <param name="keyWord">按關鍵字提取博客信息</param>
/// <returns>IList</returns>
public IList<item> GetItems(string xmlPath,string keyWord)
{
IList<item> itemList = new List<item>();
item temp;
//解析XML文件
XmlDocument myXml = new XmlDocument();
myXml.Load(xmlPath);
XmlNode blogs = myXml.DocumentElement;
XmlNode node = blogs.ChildNodes[0];
if (node.Name == "channel")
{
foreach (XmlNode statusnode in node.ChildNodes)
{
switch (statusnode.Name)
{
case "item":
temp=new item();
bool flag = true;
foreach (XmlNode o in statusnode.ChildNodes)
{
if (flag)
{
switch (o.Name)
{
case "title":
if (keyWord.Trim() != "")
{
if (!o.InnerText.Contains(keyWord))
flag = false;
}
temp.Title = o.InnerText;
break;
case "link":
temp.Link = o.InnerText;
break;
case "dc:creator":
temp.Dc_creator = o.InnerText;
break;
case "author":
temp.Author = o.InnerText;
break;
case "pubDate":
temp.PubDate = DateTime.Parse(o.InnerText);
break;
case "guid":
temp.Guid = o.InnerText;
break;
case "description":
temp.Description = o.InnerText;
break;
}
}
}
if(temp.Link!=null)
itemList.Add(temp);
break;
}
}
}
if(itemList.Count>0)
return itemList;
return null;
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;
namespace BlogsConvert
{
public class BlogsInfo
{
/// <summary>
/// 從XML文件中取得博主信息
/// </summary>
/// <param name="xmlPath">xml文件路徑</param>
/// <returns>channel</returns>
public channel GetChannel(string xmlPath)
{
channel cha=new channel();
//解析XML文件
XmlDocument myXml = new XmlDocument();
myXml.Load(xmlPath);
XmlNode blogs = myXml.DocumentElement;
XmlNode node=blogs.ChildNodes[0];
if (node.Name == "channel")
{
foreach (XmlNode chanode in node.ChildNodes)
{
switch (chanode.Name)
{
case "title":
cha.Title = chanode.InnerText;
break;
case "link":
cha.Link = chanode.InnerText;
break;
case "description":
cha.Description = chanode.InnerText;
break;
case "language":
cha.Language = chanode.InnerText;
break;
case "lastBuildDate":
cha.LastBuildDate = DateTime.Parse(chanode.InnerText);
break;
case "pubDate":
cha.PubDate = DateTime.Parse(chanode.InnerText);
break;
case "ttl":
cha.Ttl = int.Parse(chanode.InnerText);
break;
}
if (chanode.Name == "item")
break;
}
}
if (cha.Title.Trim()!="")
return cha;
return null;
}
/// <summary>
/// 從XML文件中取得文章信息
/// </summary>
/// <param name="xmlPath">xml文件路徑</param>
/// <returns>IList</returns>
public IList<item> GetItems(string xmlPath)
{
return GetItems(xmlPath,"");
}
/// <summary>
/// 從XML文件中取得文章信息
/// </summary>
/// <param name="xmlPath">xml文件路徑</param>
/// <param name="keyWord">按關鍵字提取博客信息</param>
/// <returns>IList</returns>
public IList<item> GetItems(string xmlPath,string keyWord)
{
IList<item> itemList = new List<item>();
item temp;
//解析XML文件
XmlDocument myXml = new XmlDocument();
myXml.Load(xmlPath);
XmlNode blogs = myXml.DocumentElement;
XmlNode node = blogs.ChildNodes[0];
if (node.Name == "channel")
{
foreach (XmlNode statusnode in node.ChildNodes)
{
switch (statusnode.Name)
{
case "item":
temp=new item();
bool flag = true;
foreach (XmlNode o in statusnode.ChildNodes)
{
if (flag)
{
switch (o.Name)
{
case "title":
if (keyWord.Trim() != "")
{
if (!o.InnerText.Contains(keyWord))
flag = false;
}
temp.Title = o.InnerText;
break;
case "link":
temp.Link = o.InnerText;
break;
case "dc:creator":
temp.Dc_creator = o.InnerText;
break;
case "author":
temp.Author = o.InnerText;
break;
case "pubDate":
temp.PubDate = DateTime.Parse(o.InnerText);
break;
case "guid":
temp.Guid = o.InnerText;
break;
case "description":
temp.Description = o.InnerText;
break;
}
}
}
if(temp.Link!=null)
itemList.Add(temp);
break;
}
}
}
if(itemList.Count>0)
return itemList;
return null;
}
}
}
PDF文件生成類,也是本軟件中最重要的一個類,其實就是iTextSharp的運用(這個DLL文件在源代碼中有):
代碼
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;
namespace BlogsConvert
{
public class ToPdf:IConvert
{
#region IConvert 成員
/// <summary>
/// 轉為PDF
/// </summary>
/// <param name="commonInfo">博主信息</param>
/// <param name="itemList">文章信息</param>
/// <param name="path">生成的PDF文件存放路徑</param>
public void Convert(channel commonInfo, IList<item> itemList,string path)
{
if (commonInfo != null && itemList != null)
{
//設置頁面大小
Rectangle pageSize = PageSize.A4;
//創建文檔對象
Document document = new Document(pageSize);
PdfWriter.GetInstance(document,new FileStream(path,FileMode.Create));
//打開文檔
document.Open();
//定義字體
BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font font = new Font(bfSongTi, 12);
//定義字體
BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font fontBlod = new Font(bfSongTiBlod, 15);
//提示段落
Paragraph pToop=new Paragraph(new Chunk("本文檔由程序整理生成(生成時間:"+DateTime.Now+")",fontBlod));
//1為居中,0為居左,2為居右
pToop.Alignment = 1;
pToop.SpacingAfter = 20;
document.Add(pToop);
//博客標題
Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
pTitle.Alignment = 1;
pTitle.SpacingAfter = 20;
document.Add(pTitle);
//添加博客子標題
Paragraph pDescription=new Paragraph(commonInfo.Description,font);
pDescription.Alignment = 0;
//行間距(倍)
pDescription.MultipliedLeading = 2;
pDescription.SpacingAfter = 20;
document.Add(pDescription);
//博客目錄
Paragraph allGuid = new Paragraph("目 錄", fontBlod);
allGuid.Alignment = 1;
allGuid.SpacingBefore = 10;
document.Add(allGuid);
//添加目錄
Paragraph guid=new Paragraph(" ");
guid.MultipliedLeading = 1;
Anchor aTitle;
for (int i = 0; i < itemList.Count;i++ )
{
item o = itemList[i];
aTitle = new Anchor("第"+(i+1)+"篇: "+o.Title,font);
aTitle.Reference = "#link" + o.PubDate.ToString();
document.Add(aTitle);
document.Add(guid);
}
document.Add(guid);
document.Add(guid);
document.Add(guid);
//文章標題
Paragraph blogTitle;
//文章內容
Paragraph blogContent;
//分割線
Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
hr.Alignment=1;
hr.SpacingAfter=20;
hr.SpacingBefore=20;
//提取圖片
string Content;
Regex reg = new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
MatchCollection mc;
IList<string> picList;
//內容處理
string[] ContentArray;
Anchor lTitle;
int index = 1;
foreach (var o in itemList)
{
lTitle = new Anchor("第"+index+"篇:",font);
lTitle.Name = "link" + o.PubDate.ToString();
document.Add(lTitle);
index++;
blogTitle=new Paragraph(o.Title,fontBlod);
blogTitle.Alignment = 1;
blogTitle.MultipliedLeading = 1;
document.Add(blogTitle);
Content = o.Description;
Content = Content.Replace("<p>", "卍");
Content = Content.Replace("<br />", "卍");
Content = Content.Replace("<br/ />", "卍");
mc= reg.Matches(Content);
picList = new List<string>();
for(int i=0;i<mc.Count;i++)
{
Match m=mc[i];
if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
{
picList.Add(m.Groups["img"].Value);
Content = Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
}
}
//去掉Html標簽
Content = NoHTML(Content);
//按文章內容生成段落
ContentArray = Content.Split('卍');
for (int i = 0; i < ContentArray.Length; i++)
{
for (int j = 0; j < picList.Count; j++)
{
if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
{
Image jpeg = Image.GetInstance(picList[j]);
if (jpeg.Width > PageSize.A4.Width)
{
jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width * jpeg.Height / PageSize.A4.Width);
}
jpeg.Alignment = Image.MIDDLE_ALIGN;
document.Add(jpeg);
ContentArray[i] = "PicDRJciP";
}
}
if (ContentArray[i] != "PicDRJciP")
{
blogContent = new Paragraph(ContentArray[i], font);
blogContent.Alignment = 0;
blogContent.MultipliedLeading = 2;
blogContent.SpacingAfter = 10;
document.Add(blogContent);
}
}
document.Add(hr);
}
//提示信息
Paragraph drj = new Paragraph(new Chunk("本程序由博客園——天行健(http://home.cnblogs.com/u/durongjian/)制作,如有建議請發郵件至drjchina@163.com", font));
//1為居中,0為居左,2為居右
drj.Alignment = 1;
drj.SpacingAfter = 20;
drj.SpacingBefore = 20;
document.Add(drj);
//關閉文檔
document.Close();
}
}
/// <summary>
/// 去掉HTML標簽
/// </summary>
/// <param name="Htmlstring">帶有HTML標簽的字符串</param>
/// <returns>string</returns>
public static string NoHTML(string Htmlstring)
{
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
return Htmlstring.Trim();
}
#endregion
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;
namespace BlogsConvert
{
public class ToPdf:IConvert
{
#region IConvert 成員
/// <summary>
/// 轉為PDF
/// </summary>
/// <param name="commonInfo">博主信息</param>
/// <param name="itemList">文章信息</param>
/// <param name="path">生成的PDF文件存放路徑</param>
public void Convert(channel commonInfo, IList<item> itemList,string path)
{
if (commonInfo != null && itemList != null)
{
//設置頁面大小
Rectangle pageSize = PageSize.A4;
//創建文檔對象
Document document = new Document(pageSize);
PdfWriter.GetInstance(document,new FileStream(path,FileMode.Create));
//打開文檔
document.Open();
//定義字體
BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font font = new Font(bfSongTi, 12);
//定義字體
BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font fontBlod = new Font(bfSongTiBlod, 15);
//提示段落
Paragraph pToop=new Paragraph(new Chunk("本文檔由程序整理生成(生成時間:"+DateTime.Now+")",fontBlod));
//1為居中,0為居左,2為居右
pToop.Alignment = 1;
pToop.SpacingAfter = 20;
document.Add(pToop);
//博客標題
Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
pTitle.Alignment = 1;
pTitle.SpacingAfter = 20;
document.Add(pTitle);
//添加博客子標題
Paragraph pDescription=new Paragraph(commonInfo.Description,font);
pDescription.Alignment = 0;
//行間距(倍)
pDescription.MultipliedLeading = 2;
pDescription.SpacingAfter = 20;
document.Add(pDescription);
//博客目錄
Paragraph allGuid = new Paragraph("目 錄", fontBlod);
allGuid.Alignment = 1;
allGuid.SpacingBefore = 10;
document.Add(allGuid);
//添加目錄
Paragraph guid=new Paragraph(" ");
guid.MultipliedLeading = 1;
Anchor aTitle;
for (int i = 0; i < itemList.Count;i++ )
{
item o = itemList[i];
aTitle = new Anchor("第"+(i+1)+"篇: "+o.Title,font);
aTitle.Reference = "#link" + o.PubDate.ToString();
document.Add(aTitle);
document.Add(guid);
}
document.Add(guid);
document.Add(guid);
document.Add(guid);
//文章標題
Paragraph blogTitle;
//文章內容
Paragraph blogContent;
//分割線
Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
hr.Alignment=1;
hr.SpacingAfter=20;
hr.SpacingBefore=20;
//提取圖片
string Content;
Regex reg = new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
MatchCollection mc;
IList<string> picList;
//內容處理
string[] ContentArray;
Anchor lTitle;
int index = 1;
foreach (var o in itemList)
{
lTitle = new Anchor("第"+index+"篇:",font);
lTitle.Name = "link" + o.PubDate.ToString();
document.Add(lTitle);
index++;
blogTitle=new Paragraph(o.Title,fontBlod);
blogTitle.Alignment = 1;
blogTitle.MultipliedLeading = 1;
document.Add(blogTitle);
Content = o.Description;
Content = Content.Replace("<p>", "卍");
Content = Content.Replace("<br />", "卍");
Content = Content.Replace("<br/ />", "卍");
mc= reg.Matches(Content);
picList = new List<string>();
for(int i=0;i<mc.Count;i++)
{
Match m=mc[i];
if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
{
picList.Add(m.Groups["img"].Value);
Content = Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
}
}
//去掉Html標簽
Content = NoHTML(Content);
//按文章內容生成段落
ContentArray = Content.Split('卍');
for (int i = 0; i < ContentArray.Length; i++)
{
for (int j = 0; j < picList.Count; j++)
{
if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
{
Image jpeg = Image.GetInstance(picList[j]);
if (jpeg.Width > PageSize.A4.Width)
{
jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width * jpeg.Height / PageSize.A4.Width);
}
jpeg.Alignment = Image.MIDDLE_ALIGN;
document.Add(jpeg);
ContentArray[i] = "PicDRJciP";
}
}
if (ContentArray[i] != "PicDRJciP")
{
blogContent = new Paragraph(ContentArray[i], font);
blogContent.Alignment = 0;
blogContent.MultipliedLeading = 2;
blogContent.SpacingAfter = 10;
document.Add(blogContent);
}
}
document.Add(hr);
}
//提示信息
Paragraph drj = new Paragraph(new Chunk("本程序由博客園——天行健(http://home.cnblogs.com/u/durongjian/)制作,如有建議請發郵件至drjchina@163.com", font));
//1為居中,0為居左,2為居右
drj.Alignment = 1;
drj.SpacingAfter = 20;
drj.SpacingBefore = 20;
document.Add(drj);
//關閉文檔
document.Close();
}
}
/// <summary>
/// 去掉HTML標簽
/// </summary>
/// <param name="Htmlstring">帶有HTML標簽的字符串</param>
/// <returns>string</returns>
public static string NoHTML(string Htmlstring)
{
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
return Htmlstring.Trim();
}
#endregion
}
}
最后就是調用類了,先看一下軟件界面吧:

后臺代碼:
代碼
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;
namespace CnBlogsHelper
{
public partial class BlogToPdf : Form
{
public channel commonInfo=new channel();
public IList<item> blogInfos=new List<item>();
public BlogToPdf()
{
InitializeComponent();
}
private void BlogToPdf_Load(object sender, EventArgs e)
{
}
/// <summary>
/// 獲取RSS源碼,存入XML文件中
/// </summary>
/// <param name="PageUrl">XML文件路徑</param>
public void GetXML(string PageUrl)
{
//發送GET請求,得到XML格式的數據
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
string Content = sr.ReadToEnd();
string xmlPath =Application.StartupPath+ @"\Blogs.xml";
//如果XML文件不存在就創建
if (!System.IO.File.Exists(xmlPath))
{
System.IO.FileStream f = System.IO.File.Create(xmlPath);
f.Close();
}
//以覆蓋的形式把數據寫入XML文件
System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
f2.Write(Content);
f2.Close();
f2.Dispose();
sr.Close();
resStream.Close();
if (Content.Trim() == "")
{
throw new Exception("用戶名有誤,請檢查后重新輸入!");
}
}
/// <summary>
/// 生成PDF文件
/// </summary>
/// <param name="saveName">生成的PDF文件名</param>
/// <param name="cha">博主信息</param>
/// <param name="itemList">文章信息</param>
public void CreatePDF(string saveName,channel cha,IList<item> itemList)
{
BlogsInfo blog = new BlogsInfo();
IConvert con = new ToPdf();
string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
con.Convert(cha,itemList,dir+"\\"+saveName+".pdf");
}
//生成事件
private void btnCreate_Click(object sender, EventArgs e)
{
if (!CheckForm())
return;
try
{
if (blogInfos.Count > 0)
{
Wait f = new Wait();
f.Show();
Application.DoEvents();
CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);
f.Close();
MessageBox.Show("PDF文檔“" + txtFileName.Text.Trim() + ".pdf”生成成功,文檔在桌面!");
}
else
{
MessageBox.Show("博客數為0,請先提取博客信息!");
}
}
catch (Exception ex)
{
MessageBox.Show("異常信息:"+ex.Message);
}
}
//提取博客信息事件
private void btnFind_Click(object sender, EventArgs e)
{
if (!CheckForm())
return;
libBlog.Items.Clear();
string pageUrl = txtBlogUrl.Text.Trim();
if (pageUrl.Substring(pageUrl.Length - 1, 1) != "/")
{
pageUrl = pageUrl + @"/";
}
pageUrl = pageUrl + "rss";
try
{
//彈出等待窗體
Wait f = new Wait();
f.Show();
Application.DoEvents();
GetXML(pageUrl);
string path = Application.StartupPath + @"\Blogs.xml";
BlogsInfo blogInfo = new BlogsInfo();
commonInfo = blogInfo.GetChannel(path);
blogInfos = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "請輸入標題中的關鍵字"?"":txtKeyWord.Text.Trim());
foreach (item o in blogInfos)
{
libBlog.Items.Add(o.Title);
}
f.Close();
}
catch (Exception ex)
{
MessageBox.Show("異常信息:" + ex.Message);
}
}
//清空事件
private void btnClearAll_Click(object sender, EventArgs e)
{
libBlog.Items.Clear();
blogInfos.Clear();
}
//刪除當前選中項事件
private void btnClearCurrent_Click(object sender, EventArgs e)
{
int index=libBlog.SelectedIndex;
libBlog.Items.Remove(libBlog.Items[index]);
blogInfos.RemoveAt(index);
}
//鼠標進入文本框清空默認文本
private void txtKeyWord_Click(object sender, EventArgs e)
{
txtKeyWord.Text = txtKeyWord.Text.Trim() == "請輸入標題中的關鍵字"?"":txtKeyWord.Text;
}
private bool CheckForm()
{
if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
{
MessageBox.Show("博客地址和保存文件名不能為空!");
txtBlogUrl.Text = "http://www.rzrgm.cn/";
txtFileName.Text = "我的博客";
return false;
}
return true;
}
}
}
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;
namespace CnBlogsHelper
{
public partial class BlogToPdf : Form
{
public channel commonInfo=new channel();
public IList<item> blogInfos=new List<item>();
public BlogToPdf()
{
InitializeComponent();
}
private void BlogToPdf_Load(object sender, EventArgs e)
{
}
/// <summary>
/// 獲取RSS源碼,存入XML文件中
/// </summary>
/// <param name="PageUrl">XML文件路徑</param>
public void GetXML(string PageUrl)
{
//發送GET請求,得到XML格式的數據
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
string Content = sr.ReadToEnd();
string xmlPath =Application.StartupPath+ @"\Blogs.xml";
//如果XML文件不存在就創建
if (!System.IO.File.Exists(xmlPath))
{
System.IO.FileStream f = System.IO.File.Create(xmlPath);
f.Close();
}
//以覆蓋的形式把數據寫入XML文件
System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
f2.Write(Content);
f2.Close();
f2.Dispose();
sr.Close();
resStream.Close();
if (Content.Trim() == "")
{
throw new Exception("用戶名有誤,請檢查后重新輸入!");
}
}
/// <summary>
/// 生成PDF文件
/// </summary>
/// <param name="saveName">生成的PDF文件名</param>
/// <param name="cha">博主信息</param>
/// <param name="itemList">文章信息</param>
public void CreatePDF(string saveName,channel cha,IList<item> itemList)
{
BlogsInfo blog = new BlogsInfo();
IConvert con = new ToPdf();
string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
con.Convert(cha,itemList,dir+"\\"+saveName+".pdf");
}
//生成事件
private void btnCreate_Click(object sender, EventArgs e)
{
if (!CheckForm())
return;
try
{
if (blogInfos.Count > 0)
{
Wait f = new Wait();
f.Show();
Application.DoEvents();
CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);
f.Close();
MessageBox.Show("PDF文檔“" + txtFileName.Text.Trim() + ".pdf”生成成功,文檔在桌面!");
}
else
{
MessageBox.Show("博客數為0,請先提取博客信息!");
}
}
catch (Exception ex)
{
MessageBox.Show("異常信息:"+ex.Message);
}
}
//提取博客信息事件
private void btnFind_Click(object sender, EventArgs e)
{
if (!CheckForm())
return;
libBlog.Items.Clear();
string pageUrl = txtBlogUrl.Text.Trim();
if (pageUrl.Substring(pageUrl.Length - 1, 1) != "/")
{
pageUrl = pageUrl + @"/";
}
pageUrl = pageUrl + "rss";
try
{
//彈出等待窗體
Wait f = new Wait();
f.Show();
Application.DoEvents();
GetXML(pageUrl);
string path = Application.StartupPath + @"\Blogs.xml";
BlogsInfo blogInfo = new BlogsInfo();
commonInfo = blogInfo.GetChannel(path);
blogInfos = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "請輸入標題中的關鍵字"?"":txtKeyWord.Text.Trim());
foreach (item o in blogInfos)
{
libBlog.Items.Add(o.Title);
}
f.Close();
}
catch (Exception ex)
{
MessageBox.Show("異常信息:" + ex.Message);
}
}
//清空事件
private void btnClearAll_Click(object sender, EventArgs e)
{
libBlog.Items.Clear();
blogInfos.Clear();
}
//刪除當前選中項事件
private void btnClearCurrent_Click(object sender, EventArgs e)
{
int index=libBlog.SelectedIndex;
libBlog.Items.Remove(libBlog.Items[index]);
blogInfos.RemoveAt(index);
}
//鼠標進入文本框清空默認文本
private void txtKeyWord_Click(object sender, EventArgs e)
{
txtKeyWord.Text = txtKeyWord.Text.Trim() == "請輸入標題中的關鍵字"?"":txtKeyWord.Text;
}
private bool CheckForm()
{
if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
{
MessageBox.Show("博客地址和保存文件名不能為空!");
txtBlogUrl.Text = "http://www.rzrgm.cn/";
txtFileName.Text = "我的博客";
return false;
}
return true;
}
}
}
其中調用了一個等待窗體Wait,非常簡單,這里就不說了,大家可以看源代碼。
博客園中高手如云,本人只能算個菜,只是把自己寫的一點小東西拿出來跟大家分享,希望能幫到大家,歡迎各位朋友批評指正,如果使用過程中有錯誤請留言哦。
本軟件目地是服務博客園的朋友們,源代碼完全開源,但轉載或二次開發請注明出處。


浙公網安備 33010602011771號