public ExtractTextToGEOJSON ( string inputPath, string outputPath, string TesseractResultsJSONFileName ) : void | ||
inputPath | string | |
outputPath | string | |
TesseractResultsJSONFileName | string | |
리턴 | void |
public void ExtractTextToGEOJSON(string inputPath, string outputPath, string TesseractResultsJSONFileName)
{
string[] filePaths = Directory.GetFiles(inputPath, "*.png");
if (filePaths.Length == 0)
return;
int noOfFiles = filePaths.Length;
String[] text = new String[noOfFiles];
float[] textCost = new float[noOfFiles];
Bgr drawColor = new Bgr(Color.Gray);
int[] imageIds = new int[noOfFiles];
try
{
Console.WriteLine("Tessearct in progress...");
for (int i = 0; i < noOfFiles; i++)
{
string filename = Path.GetFileName(filePaths[i]);
String[] splitTokens = filename.Split('_');
if (splitTokens.Length != 11)
continue;
Image<Bgr, Byte> image = new Image<Bgr, byte>(filePaths[i]);
using (Image<Gray, byte> gray = image.Convert<Gray, Byte>())
{
int index = 0;
_ocr.Recognize(gray);
int charCount = 0;
textCost[i] = 0;
Tesseract.Charactor[] recog_char = _ocr.GetCharactors();
while (charCount < recog_char.Length)
{
text[i] += recog_char[charCount].Text;
textCost[i] += recog_char[charCount].Cost;
charCount++;
}
imageIds[i] = int.Parse(filename.Split('_')[0]);
index = Array.IndexOf(imageIds, imageIds[i]);
if (index != -1 && index != i)
{
if (textCost[index] < textCost[i])
imageIds[i] = -1;
else
imageIds[index] = -1;
}
}
}
}
catch (Exception exception)
{
//MessageBox.Show(exception.Message);
Log.WriteLine(exception.Message);
Log.WriteLine(exception.ToString());
throw (exception);
}
try
{
FeatureInJSON jsonFeatures = new FeatureInJSON();
jsonFeatures.displayFieldName = "";
jsonFeatures.geometryType = "esriGeometryPolygon";
jsonFeatures.fieldAliases.OBJECTID = "OBJECTID";
jsonFeatures.fieldAliases.Filename = "Filename";
jsonFeatures.fieldAliases.Orientation = "Orientation";
jsonFeatures.fieldAliases.Text = "Text";
jsonFeatures.fieldAliases.Susp_char_count = "Susp_char_count";
jsonFeatures.fieldAliases.Susp_text = "Susp_text";
jsonFeatures.fieldAliases.Mass_centerX = "Mass_centerX";
jsonFeatures.fieldAliases.Mass_centerY = "Mass_centerY";
jsonFeatures.fieldAliases.Char_count = "Char_count";
jsonFeatures.fieldAliases.DetectionCost = "DetectionCost";
jsonFeatures.spatialReference.latestWkid = 0;
jsonFeatures.spatialReference.wkid = 0;
jsonFeatures.fields[0].name = "OBJECTID";
jsonFeatures.fields[0].type = "esriFieldTypeOID";
jsonFeatures.fields[0].alias = "OBJECTID";
jsonFeatures.fields[0].length = 0;
jsonFeatures.fields[1].name = "Text";
jsonFeatures.fields[1].type = "esriFieldTypeString";
jsonFeatures.fields[1].alias = "Text";
jsonFeatures.fields[1].length = 80;
jsonFeatures.fields[2].name = "Char_count";
jsonFeatures.fields[2].type = "esriFieldTypeInteger";
jsonFeatures.fields[2].alias = "Char_count";
jsonFeatures.fields[2].length = 0;
jsonFeatures.fields[3].name = "Orientation";
jsonFeatures.fields[3].type = "esriFieldTypeDouble";
jsonFeatures.fields[3].alias = "Orientation";
jsonFeatures.fields[3].length = 0;
jsonFeatures.fields[4].name = "Filename";
jsonFeatures.fields[4].type = "esriFieldTypeString";
jsonFeatures.fields[4].alias = "Filename";
jsonFeatures.fields[4].length = 80;
jsonFeatures.fields[5].name = "Susp_text";
jsonFeatures.fields[5].type = "esriFieldTypeString";
jsonFeatures.fields[5].alias = "Susp_text";
jsonFeatures.fields[5].length = 80;
jsonFeatures.fields[6].name = "Susp_char_count";
jsonFeatures.fields[6].type = "esriFieldTypeInteger";
jsonFeatures.fields[6].alias = "Susp_char_count";
jsonFeatures.fields[6].length = 0;
jsonFeatures.fields[7].name = "Mass_centerX";
jsonFeatures.fields[7].type = "esriFieldTypeDouble";
jsonFeatures.fields[7].alias = "Mass_centerX";
jsonFeatures.fields[7].length = 0;
jsonFeatures.fields[8].name = "Mass_centerY";
jsonFeatures.fields[8].type = "esriFieldTypeDouble";
jsonFeatures.fields[8].alias = "Mass_centerY";
jsonFeatures.fields[8].length = 0;
jsonFeatures.fields[9].name = "DetectionCost";
jsonFeatures.fields[9].type = "esriFieldTypeDouble";
jsonFeatures.fields[9].alias = "DetectionCost";
jsonFeatures.fields[9].length = 0;
Array filesSearchArray = (Array)filePaths;
Console.WriteLine("Converting to JSON...");
for (int i = 0; i < noOfFiles; i++)
{
if (imageIds[i] == -1 || textCost[i] == 0 || filePaths[i] == "")
continue;
char[] separator = new char[] { '_', '.' };
string filename = Path.GetFileName(filePaths[i]);
String[] token = filename.Split(separator, StringSplitOptions.RemoveEmptyEntries);
/* Regular expression to match non-word characters more than 3 in the text
\p{L} matches any kind of letter from any langauge
\p{Nd} matches a digit zero through nine in any script except ideographic scripts*/
Regex nonWords = new Regex("[^\\p{L}\\p{Nd}]{3}[^\\p{L}\\p{Nd}]+");
text[i] = nonWords.Replace(text[i], "");
if (text[i].Contains("1 1 1")) // dashed lines
continue;
if(alphnumericratio(text[i])==false)
continue;
Features ftr = new Features();
ftr.attributes.OBJECTID = Convert.ToInt16(token[0]);
ftr.attributes.Text = text[i];
ftr.attributes.Char_count = Convert.ToInt16(token[2]);
ftr.attributes.Orientation = Convert.ToInt16(token[6]);
ftr.attributes.Filename = filename;
ftr.attributes.Susp_char_count = 0;
ftr.attributes.Susp_text = "";
ftr.attributes.Mass_centerX = Convert.ToInt16(token[3]);
ftr.attributes.Mass_centerY = Convert.ToInt16(token[4]);
ftr.attributes.DetectionCost = textCost[i];
int x = Convert.ToInt16(token[7]);
int y = Convert.ToInt16(token[8]);
int w = Convert.ToInt16(token[9]);
int h = Convert.ToInt16(token[10]);
ftr.geometry.rings[0, 0, 0] = x;
ftr.geometry.rings[0, 0, 1] = -y;
ftr.geometry.rings[0, 1, 0] = x + w;
ftr.geometry.rings[0, 1, 1] = -y;
ftr.geometry.rings[0, 2, 0] = x + w;
ftr.geometry.rings[0, 2, 1] = -y - h;
ftr.geometry.rings[0, 3, 0] = x;
ftr.geometry.rings[0, 3, 1] = -y - h;
ftr.geometry.rings[0, 4, 0] = x;
ftr.geometry.rings[0, 4, 1] = -y;
jsonFeatures.features.Add(ftr);
GeoJson geoJson = new GeoJson();
geoJson.featureInJson = jsonFeatures;
geoJson.writeJsonFile(outputPath + "\\" + TesseractResultsJSONFileName);
}
}
catch (Exception exception)
{
//MessageBox.Show(exception.Message);
Log.WriteLine(exception.Message);
Log.WriteLine(exception.ToString());
throw (exception);
}
}
protected override void OnClick() { string straboPath = Environment.GetEnvironmentVariable(ArcStrabo10Extension.EnvironmentVariableSTRABO_HOME, EnvironmentVariableTarget.User); string tessPath = Environment.GetEnvironmentVariable(ArcStrabo10Extension.EnvironmentVariableTESS_DATA, EnvironmentVariableTarget.User); if (ArcStrabo10Extension.PathSet == false) { if (String.IsNullOrEmpty(straboPath) == true) { MessageBox.Show(ArcStrabo10Extension.ErrorMsgNoStraboHome); return; } if (String.IsNullOrEmpty(tessPath) == true) { MessageBox.Show(ArcStrabo10Extension.ErrorMsgNoTess_Data); return; } ////Initialize directories bool Initialize_straboPath_Correct = ArcStrabo10Extension.initialize_straboPath_directories(straboPath); if (Initialize_straboPath_Correct == false) { MessageBox.Show(ArcStrabo10Extension.ErrorMsgNoStraboHomeWritePermission); return; } ArcStrabo10Extension.PathSet = true; } #region Text Recognition ////Save Positive and Negative Layer and making GeoJason File ComboBoxLayerSelector layerNameCombo = ComboBoxLayerSelector.GetLayerNameComboBox(); ////Select correct raster map layer RasterLayer rasterlayer = new RasterLayer(); rasterlayer = ((RasterLayer)layerNameCombo.GetSelectedLayer()); string input_data_source_directory; try { input_data_source_directory = rasterlayer.FilePath; //input_data_source_directory = "B:\\Users\\akshay anand\\strabo\\strabo\\data\\text_extraction\\Opensource_Afghanistan_Kabul_City_Center\\"; } catch (Exception) { // Handle no input map error MessageBox.Show(ArcStrabo10Extension.ErrorMsgNoInputMap, "Input Map Error", MessageBoxButtons.OK); return; } ////Select language from combo box in toolbar ComboBoxLanguageSelector languageNameCombo = ComboBoxLanguageSelector.GetLanguageNameComboBox(); string lng = languageNameCombo.Get_selected_language(); if (lng == null) { MessageBox.Show(ArcStrabo10Extension.ErrorMsgNoInputLanguage, "Input Language Error", MessageBoxButtons.OK); return; } ////Set Log Directory Path Log.SetLogDir(ArcStrabo10Extension.Log_Path); Log.SetOutputDir(ArcStrabo10Extension.Log_Path); Log.WriteLine("MakingTextLabelGeoJsonFile Method Start SIMA"); IMap map = ArcMap.Document.FocusMap; ArcStraboObject arcStraboObject = new ArcStraboObject(); arcStraboObject.MakingTextLabelGeoJsonFile(ArcStrabo10Extension.Text_Result_Path); Log.WriteLine("MakingTextLabelGeoJsonFile Method Finish"); ////Run TextExtraction Layer from Strabo.core and load raster Layer Log.WriteLine("textLayerExtract Medthod Start SIMA"); arcStraboObject.textLayerExtract(input_data_source_directory, ArcStrabo10Extension.Text_Result_Path); Log.WriteLine("textLayerExtract Method Finish"); Log.WriteLine("AddRasterLayer Method Start SIMA"); arcStraboObject.AddRasterLayer(ArcStrabo10Extension.Text_Result_Path, ArcStrabo10Extension.TextLayerPNGFileName); Log.WriteLine("AddRasterLayer Method Finish"); ////Run TextIdentifier Method Log.WriteLine("textIndentification Method Start SIMA"); System.Windows.Forms.Cursor.Current = Cursors.WaitCursor; ///// Attempting to create cancel feature window //DialogResult result = MessageBox.Show("Text Recognition is running.", "Application Running", MessageBoxButtons.OKCancel); //if (result == DialogResult.Cancel) //{ // return; //} //else if (result == DialogResult.OK) arcStraboObject.textIndentification(ArcStrabo10Extension.Text_Result_Path + "\\", ArcStrabo10Extension.Intermediate_Result_Path + "\\", ArcStrabo10Extension.TextLayerPNGFileName); System.Windows.Forms.Cursor.Current = Cursors.Default; Log.WriteLine("textIndentification Method Finish"); ////OCR Part Log.WriteLine("ExtractTextToGEOJSON Method Start SANJUALI"); System.Windows.Forms.Cursor.Current = Cursors.AppStarting; //// Select language from combo box in toolbar // ComboBoxLanguageSelector languageNameCombo = ComboBoxLanguageSelector.GetLanguageNameComboBox(); // string lng = languageNameCombo.Get_selected_language(); if (lng == null) { MessageBox.Show(ArcStrabo10Extension.ErrorMsgNoInputLanguage, "Input Language Error", MessageBoxButtons.OK); return; } Strabo.Core.OCR.WrapperTesseract language = new Strabo.Core.OCR.WrapperTesseract(tessPath, lng); /// Strabo.Core.OCR.WrapperTesseract language = new Strabo.Core.OCR.WrapperTesseract(tessPath); language.ExtractTextToGEOJSON(ArcStrabo10Extension.Intermediate_Result_Path,ArcStrabo10Extension.Text_Result_Path,ArcStrabo10Extension.TesseractResultsJSONFileName); Log.WriteLine("ExtractTextToGEOJSON Method Finish"); System.Windows.Forms.Cursor.Current = Cursors.Default; ////Add Polygon of OCR Layer Log.WriteLine("CreateFeatureClassWithFields Method Start SIMA"); IWorkspace workspace = arcStraboObject.CreateShapefileWorkspace(ArcStrabo10Extension.Text_Result_Path); IFeatureWorkspace featureworkspace = (IFeatureWorkspace)workspace; string tesseDataPath = ArcStrabo10Extension.Text_Result_Path + "\\" + ArcStrabo10Extension.TesseractResultsJSONFileName; IFeatureClass featureClass = arcStraboObject.CreateFeatureClassWithFields(ArcStrabo10Extension.TextLayerOCRShapefile, featureworkspace, tesseDataPath); IFeatureLayer featureLayer = arcStraboObject.CreateFeatureLayer(featureClass); Log.WriteLine("CreateFeatureClassWithFields Method Finish"); Log.WriteLine("AddPolygon Method Start"); arcStraboObject.AddPolygon(featureLayer, featureworkspace, tesseDataPath); Log.WriteLine("AddPolygon Method Finish"); Log.ArchiveLog(); MessageBox.Show("Text recognition finished!", "Done", MessageBoxButtons.OK); #endregion }