I am working for text search and extraction from pdf using third party dll itextsharp.
I am getting the text on searching but not only that text, the whole text of that page.
I thought to use phrases or chunks so that I can get pre-and post of that text only along with it instead of whole page text. Can anyone suggest me code for phrases or anything else which I can use for it. Thanks!
My code is:
string searchText = null;
string filename = System.AppDomain.CurrentDomain.BaseDirectory;
filename = @"C:\test.pdf";
searchText = textBox.Text.ToString();
List<int> pages = new List<int>();
if (File.Exists(filename))
{
PdfReader pdfReader = new PdfReader(filename);
List<Phrase> PhraseList = new List<Phrase>();
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
ITextExtractionStrategy strategy = SimpleTextExtractionStrategy();
string currentPageText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy)
if (currentPageText.Contains(searchText))
pages.Add(page);
textBox1.AppendText(PdfTextExtractor.GetTextFromPage(pdfReader, page));
textBox1.Text += pages.ToString();
}
pdfReader.Close();