Content |
public static ContentExtractorResult GetContentExtractor( Stream documentStream, IdResult docIdResult, string filePath, ContentExtractionSettings settings )
This example shows the pattern that should be used with ContentExtractorFactory to get a specific interface to extract content for the specific document format type.
using (var stream = File.OpenRead(filePath)) { // Step 1: Identify document format: var docIdResult = DocumentIdentifier.Identify(stream, filePath); // Step 2: Extract content from document (uses 'docIdResult' from above line to get correct content extractor): var docContentResult = ContentExtractorFactory.GetContentExtractor(stream, docIdResult, filePath, _contentConfig); if (docContentResult.HasError) { LogMessage(string.Format("Error getting content extractor for file format ID {0}: {1}", docIdResult.ID, docContentResult.Error)); } else { var extractorType = docContentResult.ContentExtractor.ContentExtractorType; // Step 3: Convert base interface using above ContentExtractorType to a specific interface: switch (extractorType) { case ContentExtractorType.Archive: { var archiveExtractor = (IArchiveExtractor) docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.Document: { var documentExtractor = (IDocumentContentExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.Database: { var databaseExtractor = (IDatabaseExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.MailStore: { var mailStoreExtractor = (IMailStoreExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.DocumentStore: { var docStoreExtractor = (IDocumentStoreExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.Unsupported: { var unsupportedExtractor = (IUnsupportedExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.LargeUnsupported: { var largeBlobUnsupportedExtractor = (ILargeUnsupportedExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; case ContentExtractorType.LargeEncodedText: { var largeEncodedTextExtractor = (ILargeEncodedTextExtractor)docContentResult.ContentExtractor; // See help file "How To" section for how to use this interface ... } break; } } }