Generate_Cnblogs_Articles_To_Markdown_Files.CnblogsHelper.ExportToMarkdown C# (CSharp) Méthode

ExportToMarkdown() public static méthode

导出博客园的文章成本地 Markdown 进行保存
public static ExportToMarkdown ( int pageStart, int pageEnd, bool isSaveImage, string imagePrefixUrl = "", bool isAddMoreSeparateLine = false, int separateLineLocation = 300 ) : bool
pageStart int 博客起始页码,即 http://www.cnblogs.com/parry/default.html?page={0}
pageEnd int 博客结束页码,即 http://www.cnblogs.com/parry/default.html?page={0}
isSaveImage bool 是否将文章中的图片保存到本地,保存后文件夹在程序运行的 images 文件夹
imagePrefixUrl string 替换文章中的图片为自己图床的前缀 Url
isAddMoreSeparateLine bool 在抓取到的文章 separateLineLocation(参数) 处添加分隔符,用于博客展示文章时用于抽取描述以及阅读更多使用。
separateLineLocation int 添加分隔符的位置
Résultat bool
        public static bool ExportToMarkdown(int pageStart, int pageEnd, bool isSaveImage, string imagePrefixUrl = "", bool isAddMoreSeparateLine = false, int separateLineLocation = 300)
        {
            for (var page = pageStart; page <= pageEnd; page++)
            {
                var pagesUrl = string.Format("http://www.cnblogs.com/parry/default.html?page={0}", page);
                //抓取所有的文章内容链接地址,进行循环抓取并存储
                var regex = new Regex(@"class=""postTitle"">\s+<a.*?href=""(?<href>.*?)"">",
                    RegexOptions.Singleline | RegexOptions.Multiline);
                var matches = regex.Matches(NetworkHelper.GetHtmlFromGet(pagesUrl, Encoding.UTF8));
                foreach (Match match in matches)
                {
                    var articleUrl = match.Groups["href"].ToString();
                    var regexArticle =
                        new Regex(
                            @"<div\s+id=""topics"">.*?id=""cb_post_title_url"".*?>(?<title>.*?)</a>.*?<div\s+id=""cnblogs_post_body"">(?<articlecontent>.*?)</div><div\s+(?:id=""MySignature""></div>)?\s+<div\s+class=""clear""></div>.*?id=""post-date"">(?<date>.*?)</span>",
                            RegexOptions.Singleline | RegexOptions.Multiline);
                    var content = NetworkHelper.GetHtmlFromGet(articleUrl, Encoding.UTF8);
                    var regexAppName = new Regex("currentBlogApp\\s+=\\s+'(?<appName>.*?)'", RegexOptions.Singleline | RegexOptions.Multiline);
                    var matchAppName = regexAppName.Match(content);
                    var appName = string.Empty;
                    if (matchAppName.Success)
                    {
                        appName = matchAppName.Groups["appName"].ToString();
                    }
                    var matchArticle = regexArticle.Match(content);
                    if (matchArticle.Success)
                    {
                        var title = matchArticle.Groups["title"].ToString().Trim();
                        var date = matchArticle.Groups["date"].ToString().Trim();
                        var articleContent = matchArticle.Groups["articlecontent"].ToString();
                        if (isSaveImage)
                        {
                            articleContent = ProcessArticleImage(articleContent, imagePrefixUrl); //对文章中的图片进行保存,根据情况可以不处理,如何有自己的图床,那么保存下来后替换掉图床前缀就可以了。
                        }

                        articleContent = ProcessArticleCode(articleContent);
                        articleContent =
                            articleContent.Replace("<div id=\"parrycontent\">", string.Empty)
                                .Replace("</div>", string.Empty);
                        var regexId = new Regex(@"cb_blogId=(?<blogid>\d+),cb_entryId=(?<entryid>\d+)",
                            RegexOptions.Singleline | RegexOptions.Multiline);
                        int blogId = 0, postId = 0;
                        var matchId = regexId.Match(content);
                        if (matchId.Success)
                        {
                            int.TryParse(matchId.Groups["blogid"].ToString(), out blogId);
                            int.TryParse(matchId.Groups["entryid"].ToString(), out postId);
                        }

                        var categoryTags = GetArticleCategory(appName, blogId, postId);
                        var fileName = GetFileName(articleUrl);
                        var filePath = Application.StartupPath + "\\output\\" + fileName;
                        var mdContent = string.Format("---\r\ntitle: {0}\r\ndate: {1}\r\n{2}\r\n---\r\n{3}", title, date,
                            categoryTags, articleContent);
                        var converter = new Converter();
                        var markdown = converter.Convert(mdContent);
                        //注意此处的作用是在抓取到的文章 300 字符处添加<!--more-->分隔符,用于博客展示文章时用于抽取描述以及阅读更多使用。
                        if (isAddMoreSeparateLine)
                        {
                            markdown = markdown.Substring(0, separateLineLocation) + "\r\n<!--more-->\r\n" +
                                       markdown.Substring(separateLineLocation + 1);
                        }

                        //保存文件
                        var streamWriter = new StreamWriter(filePath);
                        streamWriter.Write(markdown);
                        streamWriter.Close();
                    }
                }
            }
            return true;
        }

Usage Example

        private static void Main(string[] args)
        {
            //Init
            if (!Directory.Exists(Application.StartupPath + "\\output\\"))
            {
                Directory.CreateDirectory(Application.StartupPath + "\\output\\");
            }

            if (!Directory.Exists(Application.StartupPath + "\\images\\"))
            {
                Directory.CreateDirectory(Application.StartupPath + "\\images\\");
            }

            CnblogsHelper.ExportToMarkdown(1, 4, true, "http://7xqdjc.com1.z0.glb.clouddn.com/blog_");

            Console.ReadKey();
        }
All Usage Examples Of Generate_Cnblogs_Articles_To_Markdown_Files.CnblogsHelper::ExportToMarkdown