博客園博客PDF生成器

周末寫了一個博客園博客PDF生成器，由于博客園文件上傳大小的限制，我把源代碼放在CSDN上了（想信大家都有帳號哈），如果沒有帳號的請留下郵箱，我會盡快發給你，當然如果哪位朋友能幫忙把源代碼上傳到博客園上更好：博客園博客PDF生成器

廢話不多說，直接看生成后的PDF效果哈：

博客中圖片效果：

代碼比較簡單，這里先簡單說一下思路，先通過博客地址取得該博客的RSS信息，這是一個XML文件，把源碼存在本地，然后解析這個XML文件，從中取出需要的信息，再用iTextSharp這個DLL來操作PDF，從面生成PDF文檔。

下面只帖出幾個主要的類，大家有興趣可以下載源代碼看：

實體類channel，類屬性是從XML文件中取得的：

實體類：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
    public class channel
    {
        private string title;
        private string link;
        private string description;
        private string language;
        private DateTime lastBuildDate;
        private DateTime pubDate;
        private int ttl;

        public string Title
        {
            get { return title; }
            set { title = value; }
        }

        public string Link
        {
            get { return link; }
            set { link = value; }
        }

        public string Description
        {
            get { return description; }
            set { description = value; }
        }

        public string Language
        {
            get { return language; }
            set { language = value; }
        }

        public DateTime LastBuildDate
        {
            get { return lastBuildDate; }
            set { lastBuildDate = value; }
        }

        public DateTime PubDate
        {
            get { return pubDate; }
            set { pubDate = value; }
        }

        public int Ttl
        {
            get { return ttl; }
            set { ttl = value; }
        }
    }
}

實體類item（屬性來自XML文件）：

實體類：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
    public class item
    {
        private string title;
        private string link;
        private string dc_creator;
        private string author;
        private DateTime pubDate;
        private string guid;
        private string description;

        public string Title
        {
            get { return title; }
            set { title = value; }
        }

        public string Link
        {
            get { return link; }
            set { link = value; }
        }

        public string Dc_creator
        {
            get { return dc_creator; }
            set { dc_creator = value; }
        }

        public string Author
        {
            get { return author; }
            set { author = value; }
        }

        public DateTime PubDate
        {
            get { return pubDate; }
            set { pubDate = value; }
        }

        public string Guid
        {
            get { return guid; }
            set { guid = value; }
        }

        public string Description
        {
            get { return description; }
            set { description = value; }
        }
    }
}

從XML文件中提取博客信息類：

代碼

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;

namespace BlogsConvert
{
    public class BlogsInfo
    {
        /// <summary>
        /// 從XML文件中取得博主信息
        /// </summary>
        /// <param name="xmlPath">xml文件路徑</param>
        /// <returns>channel</returns>
        public channel GetChannel(string xmlPath)
        {
            channel cha=new channel();
            //解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs = myXml.DocumentElement;
            XmlNode node=blogs.ChildNodes[0];
            if (node.Name == "channel")
            {
                foreach (XmlNode chanode in node.ChildNodes)
                {
                    switch (chanode.Name)
                    {
                        case "title":
                            cha.Title = chanode.InnerText;
                            break;
                        case "link":
                            cha.Link = chanode.InnerText;
                            break;
                        case "description":
                            cha.Description = chanode.InnerText;
                            break;
                        case "language":
                            cha.Language = chanode.InnerText;
                            break;
                        case "lastBuildDate":
                            cha.LastBuildDate = DateTime.Parse(chanode.InnerText);
                            break;
                        case "pubDate":
                            cha.PubDate = DateTime.Parse(chanode.InnerText);
                            break;
                        case "ttl":
                            cha.Ttl = int.Parse(chanode.InnerText);
                            break;
                    }
                    if (chanode.Name == "item")
                        break;
                }
            }
            if (cha.Title.Trim()!="")
                return cha;
            return null;
        }

        /// <summary>
        /// 從XML文件中取得文章信息
        /// </summary>
        /// <param name="xmlPath">xml文件路徑</param>
        /// <returns>IList</returns>
        public IList<item> GetItems(string xmlPath)
        {
            return GetItems(xmlPath,"");
        }

        /// <summary>
        /// 從XML文件中取得文章信息
        /// </summary>
        /// <param name="xmlPath">xml文件路徑</param>
        /// <param name="keyWord">按關鍵字提取博客信息</param>
        /// <returns>IList</returns>
        public IList<item> GetItems(string xmlPath,string keyWord)
        {
            IList<item> itemList = new List<item>();
            item temp;
            //解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs = myXml.DocumentElement;
            XmlNode node = blogs.ChildNodes[0];
            if (node.Name == "channel")
            {
                foreach (XmlNode statusnode in node.ChildNodes)
                {
                    switch (statusnode.Name)
                    {
                        case "item":
                            temp=new item();
                            bool flag = true;
                            foreach (XmlNode o in statusnode.ChildNodes)
                            {
                                if (flag)
                                {
                                    switch (o.Name)
                                    {
                                        case "title":
                                            if (keyWord.Trim() != "")
                                            {
                                                if (!o.InnerText.Contains(keyWord))
                                                    flag = false;
                                            }
                                            temp.Title = o.InnerText;
                                            break;
                                        case "link":
                                            temp.Link = o.InnerText;
                                            break;
                                        case "dc:creator":
                                            temp.Dc_creator = o.InnerText;
                                            break;
                                        case "author":
                                            temp.Author = o.InnerText;
                                            break;
                                        case "pubDate":
                                            temp.PubDate = DateTime.Parse(o.InnerText);
                                            break;
                                        case "guid":
                                            temp.Guid = o.InnerText;
                                            break;
                                        case "description":
                                            temp.Description = o.InnerText;
                                            break;
                                    }
                                }
                            }
                            if(temp.Link!=null)
                                itemList.Add(temp);
                            break;
                    }
                }
            }
            if(itemList.Count>0)
                return itemList;
            return null;
        }
    }
}

PDF文件生成類，也是本軟件中最重要的一個類，其實就是iTextSharp的運用（這個DLL文件在源代碼中有）：

代碼

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;

namespace BlogsConvert
{
    public class ToPdf:IConvert
    {
        #region IConvert 成員

        /// <summary>
        /// 轉為PDF
        /// </summary>
        /// <param name="commonInfo">博主信息</param>
        /// <param name="itemList">文章信息</param>
        /// <param name="path">生成的PDF文件存放路徑</param>
        public void Convert(channel commonInfo, IList<item> itemList,string path)
        {
            if (commonInfo != null && itemList != null)
            {
                //設置頁面大小
                Rectangle pageSize = PageSize.A4;
                //創建文檔對象
                Document document = new Document(pageSize);
                PdfWriter.GetInstance(document,new FileStream(path,FileMode.Create));

                //打開文檔
                document.Open();

                //定義字體
                BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font font = new Font(bfSongTi, 12);

                //定義字體
                BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font fontBlod = new Font(bfSongTiBlod, 15);

                //提示段落
                Paragraph pToop=new Paragraph(new Chunk("本文檔由程序整理生成（生成時間："+DateTime.Now+"）",fontBlod));
                //1為居中，0為居左，2為居右
                pToop.Alignment = 1;
                pToop.SpacingAfter = 20;
                document.Add(pToop);

                //博客標題
                Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
                pTitle.Alignment = 1;
                pTitle.SpacingAfter = 20;
                document.Add(pTitle);

                //添加博客子標題
                Paragraph pDescription=new Paragraph(commonInfo.Description,font);
                pDescription.Alignment = 0;
                //行間距（倍）
                pDescription.MultipliedLeading = 2;
                pDescription.SpacingAfter = 20;
                document.Add(pDescription);

                //博客目錄
                Paragraph allGuid = new Paragraph("目      錄", fontBlod);
                allGuid.Alignment = 1;
                allGuid.SpacingBefore = 10;
                document.Add(allGuid);

                //添加目錄
                Paragraph guid=new Paragraph("    ");
                guid.MultipliedLeading = 1;
                Anchor aTitle;
                for (int i = 0; i < itemList.Count;i++ )
                {
                    item o = itemList[i];
                    aTitle = new Anchor("第"+(i+1)+"篇： "+o.Title,font);
                    aTitle.Reference = "#link" + o.PubDate.ToString();
                    document.Add(aTitle);
                    document.Add(guid);
                }
                document.Add(guid);
                document.Add(guid);
                document.Add(guid);

                //文章標題
                Paragraph blogTitle;
                //文章內容
                Paragraph blogContent;
                //分割線
                Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
                hr.Alignment=1;
                hr.SpacingAfter=20;
                hr.SpacingBefore=20;

                //提取圖片
                string Content;
                Regex reg = new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
                MatchCollection mc;
                IList<string> picList;

                //內容處理
                string[] ContentArray;
                Anchor lTitle;
                int index = 1;
                foreach (var o in itemList)
                {
                    lTitle = new Anchor("第"+index+"篇：",font);
                    lTitle.Name = "link" + o.PubDate.ToString();
                    document.Add(lTitle);
                    index++;
                    blogTitle=new Paragraph(o.Title,fontBlod);
                    blogTitle.Alignment = 1;
                    blogTitle.MultipliedLeading = 1;
                    document.Add(blogTitle);

                    Content = o.Description;
                    Content = Content.Replace("<p>", "卍");
                    Content = Content.Replace("<br />", "卍");
                    Content = Content.Replace("<br/ />", "卍");

                     mc= reg.Matches(Content);
                     picList = new List<string>();
                    for(int i=0;i<mc.Count;i++)
                    {
                        Match m=mc[i];
                        if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
                        {
                            picList.Add(m.Groups["img"].Value);
                            Content = Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
                        }
                    }

                    //去掉Html標簽
                    Content = NoHTML(Content);

                    //按文章內容生成段落
                    ContentArray = Content.Split('卍');
                    for (int i = 0; i < ContentArray.Length; i++)
                    {
                        for (int j = 0; j < picList.Count; j++)
                        {
                            if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
                            {
                                Image jpeg = Image.GetInstance(picList[j]);
                                if (jpeg.Width > PageSize.A4.Width)
                                {
                                    jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width * jpeg.Height / PageSize.A4.Width);
                                }
                                jpeg.Alignment = Image.MIDDLE_ALIGN;
                                document.Add(jpeg);
                                ContentArray[i] = "PicDRJciP";
                            }
                        }
                        if (ContentArray[i] != "PicDRJciP")
                        {
                            blogContent = new Paragraph(ContentArray[i], font);
                            blogContent.Alignment = 0;
                            blogContent.MultipliedLeading = 2;
                            blogContent.SpacingAfter = 10;
                            document.Add(blogContent);
                        }
                    }
                    document.Add(hr);
                }

                //提示信息
                Paragraph drj = new Paragraph(new Chunk("本程序由博客園——天行健(http://home.cnblogs.com/u/durongjian/)制作，如有建議請發郵件至drjchina@163.com", font));
                //1為居中，0為居左，2為居右
                drj.Alignment = 1;
                drj.SpacingAfter = 20;
                drj.SpacingBefore = 20;
                document.Add(drj);

                //關閉文檔
                document.Close();
            }
        }

        /// <summary>
        /// 去掉HTML標簽
        /// </summary>
        /// <param name="Htmlstring">帶有HTML標簽的字符串</param>
        /// <returns>string</returns>
        public static string NoHTML(string Htmlstring)
        {
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);

            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("\r\n", "");

            return Htmlstring.Trim();
        }

        #endregion
    }
}

最后就是調用類了，先看一下軟件界面吧：

后臺代碼：

代碼

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;

namespace CnBlogsHelper
{
    public partial class BlogToPdf : Form
    {
        public channel commonInfo=new channel();
        public IList<item> blogInfos=new List<item>();
        public BlogToPdf()
        {
            InitializeComponent();
        }

        private void BlogToPdf_Load(object sender, EventArgs e)
        {
        }

        /// <summary>
        /// 獲取RSS源碼，存入XML文件中
        /// </summary>
        /// <param name="PageUrl">XML文件路徑</param>
        public void GetXML(string PageUrl)
        {
            //發送GET請求，得到XML格式的數據
            WebRequest request = WebRequest.Create(PageUrl);
            WebResponse response = request.GetResponse();
            Stream resStream = response.GetResponseStream();
            StreamReader sr = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
            string Content = sr.ReadToEnd();
            string xmlPath =Application.StartupPath+ @"\Blogs.xml";

            //如果XML文件不存在就創建
            if (!System.IO.File.Exists(xmlPath))
            {
                System.IO.FileStream f = System.IO.File.Create(xmlPath);
                f.Close();
            }
            //以覆蓋的形式把數據寫入XML文件
            System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
            f2.Write(Content);
            f2.Close();
            f2.Dispose();
            sr.Close();
            resStream.Close();

            if (Content.Trim() == "")
            {
                throw new Exception("用戶名有誤，請檢查后重新輸入!");
            }
        }

        /// <summary>
        /// 生成PDF文件
        /// </summary>
        /// <param name="saveName">生成的PDF文件名</param>
        /// <param name="cha">博主信息</param>
        /// <param name="itemList">文章信息</param>
        public void CreatePDF(string saveName,channel cha,IList<item> itemList)
        {
            BlogsInfo blog = new BlogsInfo();
            IConvert con = new ToPdf();
            string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
            con.Convert(cha,itemList,dir+"\\"+saveName+".pdf");
        }

        //生成事件
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (!CheckForm())
                return;
            try
            {
                if (blogInfos.Count > 0)
                {
                    Wait f = new Wait();
                    f.Show();
                    Application.DoEvents();

                    CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);

                    f.Close();
                    MessageBox.Show("PDF文檔“" + txtFileName.Text.Trim() + ".pdf”生成成功,文檔在桌面!");
                }
                else
                {
                    MessageBox.Show("博客數為0，請先提取博客信息！");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("異常信息:"+ex.Message);
            }
        }

        //提取博客信息事件
        private void btnFind_Click(object sender, EventArgs e)
        {
            if (!CheckForm())
                return;
            libBlog.Items.Clear();
            string pageUrl = txtBlogUrl.Text.Trim();
            if (pageUrl.Substring(pageUrl.Length - 1, 1) != "/")
            {
                pageUrl = pageUrl + @"/";
            }
            pageUrl = pageUrl + "rss";
            try
            {
                //彈出等待窗體
                Wait f = new Wait();
                f.Show();
                Application.DoEvents();

                GetXML(pageUrl);
                string path = Application.StartupPath + @"\Blogs.xml";
                BlogsInfo blogInfo = new BlogsInfo();
                commonInfo = blogInfo.GetChannel(path);
                blogInfos = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "請輸入標題中的關鍵字"?"":txtKeyWord.Text.Trim());

                foreach (item o in blogInfos)
                {
                    libBlog.Items.Add(o.Title);
                }

                f.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show("異常信息:" + ex.Message);
            }
        }

        //清空事件
        private void btnClearAll_Click(object sender, EventArgs e)
        {
            libBlog.Items.Clear();
            blogInfos.Clear();
        }

        //刪除當前選中項事件
        private void btnClearCurrent_Click(object sender, EventArgs e)
        {
            int index=libBlog.SelectedIndex;
            libBlog.Items.Remove(libBlog.Items[index]);
            blogInfos.RemoveAt(index);
        }

        //鼠標進入文本框清空默認文本
        private void txtKeyWord_Click(object sender, EventArgs e)
        {
            txtKeyWord.Text = txtKeyWord.Text.Trim() == "請輸入標題中的關鍵字"?"":txtKeyWord.Text;
        }

        private bool CheckForm()
        {
            if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
            {
                MessageBox.Show("博客地址和保存文件名不能為空!");
                txtBlogUrl.Text = "http://www.rzrgm.cn/";
                txtFileName.Text = "我的博客";
                return false;
            }
            return true;
        }
    }
}

其中調用了一個等待窗體Wait，非常簡單，這里就不說了，大家可以看源代碼。

博客園中高手如云，本人只能算個菜，只是把自己寫的一點小東西拿出來跟大家分享，希望能幫到大家，歡迎各位朋友批評指正，如果使用過程中有錯誤請留言哦。

本軟件目地是服務博客園的朋友們，源代碼完全開源，但轉載或二次開發請注明出處。

posted @ 2010-10-25 16:24 artwl 閱讀(3035) 評論(33) 收藏舉報

刷新頁面返回頂部

var ME = { "name": "土豆/Artwl", "job": "coding", "languages": [ "JS", "HTML", "CSS", "jQuery" "MVC",".NET", "設計模式" ], "hobby": [ "閱讀", "旅游", "音樂", "電影" ] }

博客園博客PDF生成器

公告

個人簡介