AzureSearchOCRwithKeywordExtraction.AzureSearch.CreateIndex C# (CSharp) Method

CreateIndex() public static method

public static CreateIndex ( Microsoft.Azure.Search.SearchServiceClient serviceClient, string indexName ) : void
serviceClient Microsoft.Azure.Search.SearchServiceClient
indexName string
return void
        public static void CreateIndex(SearchServiceClient serviceClient, string indexName)
        {

            if (serviceClient.Indexes.Exists(indexName))
            {
                serviceClient.Indexes.Delete(indexName);
            }

            var definition = new Index()
            {
                Name = indexName,
                Fields = new[]
                {
                    new Field("fileId", DataType.String)                       { IsKey = true },
                    new Field("fileName", DataType.String)                     { IsSearchable = true, IsFilterable = false, IsSortable = false, IsFacetable = false },
                    new Field("ocrText", DataType.String)                      { IsSearchable = true, IsFilterable = false, IsSortable = false, IsFacetable = false },
                    new Field("keyPhrases", DataType.Collection(DataType.String)) { IsSearchable = true, IsFilterable = true,  IsFacetable = true }
                }
            };

            serviceClient.Indexes.Create(definition);
        }

Usage Example

Beispiel #1
0
        static void Main(string[] args)
        {
            var searchPath = "pdf";
            var outPath    = "image";

            // Note, this will create a new Azure Search Index for the OCR text
            Console.WriteLine("Creating Azure Search index...");
            AzureSearch.CreateIndex(serviceClient, indexName);
            // Creating an image directory
            if (Directory.Exists(outPath) == false)
            {
                Directory.CreateDirectory(outPath);
            }
            foreach (var filename in Directory.GetFiles(searchPath, "*.pdf", SearchOption.TopDirectoryOnly))
            {
                Console.WriteLine("Extracting images from {0} \r\n", System.IO.Path.GetFileName(filename));
                var images = PdfImageExtractor.ExtractImages(filename);
                Console.WriteLine("{0} images found.", images.Count);
                Console.WriteLine();
                var directory = System.IO.Path.GetDirectoryName(filename);
                foreach (var name in images.Keys)
                {
                    if (name.LastIndexOf(".") + 1 != name.Length)
                    {
                        images[name].Save(Path.Combine(outPath, name));
                    }
                }
                string ocrText = string.Empty;
                Console.WriteLine("Extracting text from image... \r\n");
                foreach (var imagefilename in Directory.GetFiles(outPath))
                {
                    OcrResults ocr = vision.RecognizeText(imagefilename);
                    ocrText += vision.GetRetrieveText(ocr);
                    File.Delete(imagefilename);
                }
                Console.WriteLine("Extracting key phrases from processed text... \r\n");
                KeyPhraseResult keyPhraseResult = TextExtraction.ProcessText(ocrText);
                // Take the resulting orcText and upload to a new Azure Search Index
                // It is highly recommended that you upload documents in batches rather
                // individually like is done here
                if (ocrText.Length > 0)
                {
                    Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
                    string fileNameOnly = System.IO.Path.GetFileName(filename);
                    string fileId       = System.Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(fileNameOnly));
                    AzureSearch.UploadDocuments(indexClient, fileId, fileNameOnly, ocrText, keyPhraseResult);
                }
            }
            // Execute a test search
            Console.WriteLine("Execute Search...");
            AzureSearch.SearchDocuments(indexClient, "Azure Search");
            Console.WriteLine("All done.  Press any key to continue.");
            Console.ReadLine();
        }