So I am trying to extract from the PDF file certain content. So it is an invoice, I want to be able to search the PDF file for the word "Invoice Number:" and then "First Name" and extract them in the
Console.WriteLine();
So at the moment this is what I got and I need to figure out how to move further.
using iTextSharp.text.pdf;
using System.IO;
using iTextSharp.text.pdf.parser;
using System;
namespace PdfProperties
{
class Program
{
static void Main(string[] args)
{
PdfReader reader = new PdfReader("C:/PDF/invoiceDetail.pdf");
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
FileStream fs = new FileStream("C:/PDF/result0.txt", FileMode.Create);
StreamWriter sw = new StreamWriter(fs);
SimpleTextExtractionStrategy strategy;
string text = "";
for (int i = 1; i <= reader.NumberOfPages; i++)
{
strategy = parser.ProcessContent(i, new SimpleTextExtractionStrategy());
sw.WriteLine(strategy.GetResultantText());
text = strategy.GetResultantText();
String[] splitText = text.Split(new char[] {'.' });
Console.WriteLine("Test");
Console.WriteLine(text);
}
sw.Flush();
sw.Close();
}
}
}
Any help would be greatly appreciated