public static String Extract(String url, String saveTo = null)
{
try
{
WebClient wc = new WebClient();
byte[] data = wc.DownloadData(url);
// 如果设置了保存到文件,则写入文件
if (!String.IsNullOrWhiteSpace(saveTo) && data != null && data.Length > 0)
{
if (!Directory.Exists(saveTo))
{
Directory.CreateDirectory(saveTo);
}
String file = GeneratePath(saveTo, url);
try
{
using (FileStream fs = new FileStream(file, FileMode.CreateNew))
{
fs.Write(data, 0, data.Length);
}
}
catch (IOException ioe)
{
}
}
using (PdfReader reader = new PdfReader(data))
{
try
{
StringBuilder sb = new StringBuilder();
ITextExtractionStrategy extract = new SimpleTextExtractionStrategy();
for (int i = 1; i <= reader.NumberOfPages; i++)
{
sb.Append(PdfTextExtractor.GetTextFromPage(reader, i, extract));
reader.ReleasePage(i);
}
return sb.ToString();
}
finally
{
if (reader != null)
{
reader.Close();
}
}
}
}
catch (Exception e)
{
return String.Empty;
}
}