Камера Tesseract iOS с низкой точностью

Когда я использую изображение, снятое камерой на iPhone, и отправляю его tesseract, точность ужасна, это весь мусорный текст, но когда я выбираю то же изображение из библиотеки фотографий, я получаю большую точность.

Как я могу улучшить точность tesseracts при фотосъемке с помощью камеры? Вот что я делаю с изображением перед отправкой

- (void)imagePickerController:(UIImagePickerController *)picker didFinishPickingMediaWithInfo:(NSDictionary *)info {

    UIImage *image = info[UIImagePickerControllerOriginalImage];
    CGFloat newWidth = 1200;
    CGSize newSize = CGSizeMake(newWidth, newWidth);
    image = [image resizedImage:newSize interpolationQuality:kCGInterpolationHigh];

    Tesseract* tesseract = [[Tesseract alloc]initWithLanguage:@"eng"];
    [tesseract setVariableValue:@"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@#&*()-_:." forKey:@"tessedit_char_whitelist"];
    [tesseract setVariableValue:@"`~!%^*[]{}<>?|" forKey:@"tessedit_char_blacklist"];
    [tesseract setImage:image];
    [tesseract recognize];
    NSLog(@"%@", [tesseract recognizedText]);

    [picker dismissViewControllerAnimated:YES completion:NULL];

}

Если кто-то нашел какой-нибудь "волшебный" способ заставить тессеракт сканировать с большой точностью, я награжу награду!


person Community    schedule 23.02.2014    source источник
comment
Вы пробовали без resizedImage: interpolationQuality:?   -  person rdurand    schedule 27.02.2014
comment
Ознакомьтесь с этой ссылкой, используя OpenCV для обработки изображений. введите здесь описание ссылки   -  person    schedule 01.03.2014


Ответы (2)


Основные моменты, которые следует учитывать:

  • Tesseract необходимо обучить распознаванию шрифта и языка. Похоже, вы указываете здесь соответствующие параметры - они те же, что вы используете, когда предоставляете предопределенное изображение?

  • Tesseract не ориентирует и не «очищает» изображения. Когда вы передаете изображение в библиотеку, оно уже должно быть очищено и в портретной ориентации. Изображение на катушке с фотографиями тоже снято с камеры или это образец изображения, который вы каким-то образом сохранили?

Одна проверка, которую нужно сделать, - это функция изменения размера:

CGFloat newWidth = 1200;
CGSize newSize = CGSizeMake(newWidth, newWidth);

Из-за этого исходное изображение будет искажено, поскольку вы изменяете размер неквадратного изображения на квадратное. Это определенно раздавит текст и усложнит жизнь Tesseract. По крайней мере, вы хотите сохранить соотношение сторон захваченного изображения.

person davbryn    schedule 27.02.2014

Мой код работает нормально, но он сложнее

- (IBAction)captureTapped:(id)sender
{

    NSMutableArray *results = [NSMutableArray array];
    NSString *fullWord = @"";
    OCRImplementation *ocr = [[OCRImplementation alloc] init];

    for(int j = 0; j < [self.images count]; j++){

        UIImage *imageToTesseract = [self.images objectAtIndex:j];

        //UIImage *imageToTesseract = self.imgfinal.image;

        NSMutableArray *sortedKeys = [NSMutableArray array];

        @try {
            sortedKeys = [ocr processImageDetectText:imageToTesseract threadhold:198];
        }
        @catch (NSException *exception) {
            sortedKeys = [NSMutableArray array];
        }

        NSString *finalWord = @"";
        if([sortedKeys count] > 0){

            for(int i=0; i<[sortedKeys count]; i++){

                UIImage *image = [sortedKeys objectAtIndex:i];

                finalWord = [self confidencesOCRTesseract:image];
                if(finalWord.length > 1){
                    finalWord = [NSString stringWithFormat:@"%c",[finalWord characterAtIndex:0]];
                }

                fullWord = [fullWord stringByAppendingString:finalWord];

            }

        }

        fullWord = [fullWord stringByReplacingOccurrencesOfString:@"\n" withString:@""];
        [results addObject:fullWord];

        NSLog(@"-- RESULT -- %@",fullWord);
}

    NSString *resultWord = @"";
    if([results count] > 0){
        resultWord = [self calculateStatics:results];
    }
    //Your text Result
    NSLog(@"%@",resultWord);
}

- (NSString*)calculateStatics:(NSMutableArray*)results{

    NSMutableArray *first = [NSMutableArray array];
    NSMutableArray *second = [NSMutableArray array];
    NSMutableArray *third = [NSMutableArray array];
    NSMutableArray *fourth = [NSMutableArray array];
    NSMutableArray *fifth = [NSMutableArray array];
    NSMutableArray *six = [NSMutableArray array];
    NSMutableArray *seven = [NSMutableArray array];

    for(int i = 0; i<[results count]; i++){

        NSString *result = [results objectAtIndex:i];

        if(result && ![result isEqualToString:@""]){

            if(result.length >= 1 && [result characterAtIndex:0]){
                [first addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:0]]];
            }else{
                [first addObject:@" "];
            }

            if(result.length >= 2 &&[result characterAtIndex:1]){
                [second addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:1]]];
            }else{
                [second addObject:@" "];
            }

            if(result.length >= 3 &&[result characterAtIndex:2]){
                [third addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:2]]];
            }else{
                [third addObject:@" "];
            }

            if(result.length >= 4 &&[result characterAtIndex:3]){
                [fourth addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:3]]];
            }else{
                [fourth addObject:@" "];
            }

            if(result.length >= 5 &&[result characterAtIndex:4]){
                [fifth addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:4]]];
            }else{
                [fifth addObject:@" "];
            }

            if(result.length >= 6 &&[result characterAtIndex:5]){
                [six addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:5]]];
            }else{
                [six addObject:@" "];
            }

            if(result.length >= 7 &&[result characterAtIndex:6]){
                [seven addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:6]]];

            }else{
                [seven addObject:@" "];
            }

        }else{
            [first addObject:@" "];
            [second addObject:@" "];
            [third addObject:@" "];
            [fourth addObject:@" "];
            [fifth addObject:@" "];
            [six addObject:@" "];
            [seven addObject:@" "];
        }

    }

    NSString *word = @"";

    NSCountedSet *frequencies = [NSCountedSet setWithArray:first];
    if([frequencies count] == 1){
        word = [word stringByAppendingString:[[frequencies allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies count]; i++){

            NSString *possibleWord = [[frequencies allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }

    NSCountedSet *frequencies2 = [NSCountedSet setWithArray:second];
    if([frequencies2 count] == 1){
        word = [word stringByAppendingString:[[frequencies2 allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies2 count]; i++){

            NSString *possibleWord = [[frequencies2 allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies2 countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }


    NSCountedSet *frequencies3 = [NSCountedSet setWithArray:third];
    if([frequencies3 count] == 1){
        word = [word stringByAppendingString:[[frequencies3 allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies3 count]; i++){

            NSString *possibleWord = [[frequencies3 allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies3 countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }


    NSCountedSet *frequencies4 = [NSCountedSet setWithArray:fourth];
    if([frequencies4 count] == 1){
        word = [word stringByAppendingString:[[frequencies4 allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies4 count]; i++){

            NSString *possibleWord = [[frequencies4 allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies4 countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }


    NSCountedSet *frequencies5 = [NSCountedSet setWithArray:fifth];
    if([frequencies5 count] == 1){
        word = [word stringByAppendingString:[[frequencies5 allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies5 count]; i++){

            NSString *possibleWord = [[frequencies5 allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies5 countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }

    NSCountedSet *frequencies6 = [NSCountedSet setWithArray:six];
    if([frequencies6 count] == 1){
        word = [word stringByAppendingString:[[frequencies6 allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies6 count]; i++){

            NSString *possibleWord = [[frequencies6 allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies6 countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }

    NSCountedSet *frequencies7 = [NSCountedSet setWithArray:seven];
    if([frequencies7 count] == 1){
        word = [word stringByAppendingString:[[frequencies7 allObjects] objectAtIndex:0]];
    }else{

        NSUInteger count = 0;
        NSString *repeatedWord = @"";

        for(int i=0; i<[frequencies7 count]; i++){

            NSString *possibleWord = [[frequencies7 allObjects] objectAtIndex:i];
            NSUInteger wordCount = [frequencies7 countForObject:possibleWord];

            if(count < wordCount){
                count = wordCount;
                repeatedWord = possibleWord;
            }

        }

        word = [word stringByAppendingString:repeatedWord];

    }

    return word;

}

OCRImplementation класс

OCRImplementation.h

#ifndef __TesseractSample__OCRImplementation__
#define __TesseractSample__OCRImplementation__

#endif /* defined(__TesseractSample__OCRImplementation__) */

@interface OCRImplementation : NSObject{

}

- (UIImage*)processImage:(id)sender;

- (NSString*)confidencesOCRTesseract:(UIImage*)picture;

@end

OCRImplementation.mm

#include "OCRImplementation.h"
#import <OpenCV/opencv2/imgproc/imgproc.hpp>
#import <OpenCV/opencv2/highgui/highgui.hpp>
#import "UIImage+OpenCV.h"
#import "Tesseract.h"

#import "baseapi.h"
#import "environ.h"
#import "pix.h"

#include <sstream>
#include <iostream>
#include <vector>
#include "OpenCV/opencv2/core/core.hpp"
#include "OpenCV/opencv2/features2d/features2d.hpp"
#include "OpenCV/opencv2/calib3d/calib3d.hpp"

@implementation OCRImplementation

- (NSMutableArray*)processImageDetectText:(id)sender threadhold:(int)threadhold{

    UIImage *img1 = sender;
    cv::Mat src = [img1 CVMat];
    cv::Mat src_gray;
    cv::Mat threshold_output;
    cv::vector<cv::vector<cv::Point> > contours;
    cv::vector<cv::Vec4i> hierarchy;
    int thresh = 100;
    cv::RNG rng(12345);

    /// Convert image to gray and blur it
    cvtColor( src, src_gray, CV_BGR2GRAY );
    blur( src_gray, src_gray, cv::Size(3,3) );

    /// Detect edges using Threshold
    cv::threshold( src_gray, threshold_output, thresh, 255, cv::THRESH_BINARY );
    /// Find contours
    cv::findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0) );

    /// Approximate contours to polygons + get bounding rects and circles
    cv::vector<cv::vector<cv::Point> > contours_poly( contours.size() );
    cv::vector<cv::Rect> boundRect( contours.size() );
    cv::vector<cv::Point2f>center( contours.size() );
    cv::vector<float>radius( contours.size() );

    for( int i = 0; i < contours.size(); i++ )
    {
        approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
        boundRect[i] = boundingRect( cv::Mat(contours_poly[i]) );
        minEnclosingCircle( (cv::Mat)contours_poly[i], center[i], radius[i] );
    }


    /// Draw polygonal contour + bonding rects + circles
    cv::Mat drawing = cv::Mat::zeros( threshold_output.size(), CV_8UC3 );

    NSMutableDictionary *dictionaryImages = [NSMutableDictionary dictionary];
    NSMutableArray *areaArray = [NSMutableArray array];

    float lastArea = 0.0;

    for( int i = 0; i< contours.size(); i++ )
    {
        cv::Scalar color = cv::Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );

        if(boundRect[i].height > 50){

            double area = cv::contourArea(contours[i]);

            if((boundRect[i].width < boundRect[i].height) && area > (lastArea / 2)){

                lastArea = area;

                [areaArray addObject:[NSString stringWithFormat:@"%f",area]];

                rectangle( drawing, boundRect[i].tl(), boundRect[i].br(), color, 2, 8, 0 );

                cv::Rect extendedRect = cv::Rect(boundRect[i].x - 5, boundRect[i].y - 5, boundRect[i].width + 10, boundRect[i].height + 10);

                cv::Mat source = src;
                cv::Mat target(extendedRect.size(), source.type());

                if(0 <= extendedRect.x && 0 <= extendedRect.width && extendedRect.x + extendedRect.width <= source.cols && 0 <= extendedRect.y && 0 <= extendedRect.height && extendedRect.y + extendedRect.height <= source.rows){

                    source(extendedRect).copyTo(target);
                    //converting the original image into grayscale
                    cv::cvtColor(target, target, CV_BGR2GRAY);

                    cv::multiply(target, cv::Scalar(2,2,2), target);
                    cv::add(target, cv::Scalar(2,2,2), target);

                    /// Detect edges using Threshold
                    cv::threshold( target, threshold_output, threadhold, 255, cv::THRESH_BINARY );
                    cv::dilate(threshold_output, threshold_output, NULL);

                    UIImage *imgFinal = [OCRImplementation imageWithCVMat:threshold_output];

                    [dictionaryImages setObject:imgFinal forKey:[NSString stringWithFormat:@"%f", area]];

                }else{
                    NSLog(@"Error al leer la imagen. NO ROI");
                }


            }



        }

    }

    NSMutableDictionary *finalImages = [NSMutableDictionary dictionary];
    NSMutableArray *sortedKeys = [NSMutableArray arrayWithArray:[areaArray sortedArrayUsingFunction:intSort context:NULL]];

    for( int k = 0; k< contours.size(); k++ )
    {

        if(boundRect[k].height > 50){

            for(int i = 0; i < [sortedKeys count]; i++){

                double area = cv::contourArea(contours[k]);

                if(area == [[sortedKeys objectAtIndex:i] floatValue]){
                    [finalImages setObject:[dictionaryImages objectForKey:[sortedKeys objectAtIndex:i]] forKey:[NSString stringWithFormat:@"%d",boundRect[k].x]];
                }

            }

        }

    }


    NSMutableArray *array = [NSMutableArray array];
    NSArray *keys = [finalImages allKeys];
    NSArray *sortedKeys2 = [keys sortedArrayUsingFunction:intSortDesc context:NULL];

    for(int i=0; i<[sortedKeys2 count]; i++){
        [array addObject:[finalImages objectForKey:[sortedKeys2 objectAtIndex:i]]];
    }


    return array;

}

+ (UIImage *)imageWithCVMat:(const cv::Mat&)cvMat
{
    NSData *data = [NSData dataWithBytes:cvMat.data length:cvMat.elemSize() * cvMat.total()];

    CGColorSpaceRef colorSpace;

    if (cvMat.elemSize() == 1) {
        colorSpace = CGColorSpaceCreateDeviceGray();
    } else {
        colorSpace = CGColorSpaceCreateDeviceRGB();
    }

    CGDataProviderRef provider = CGDataProviderCreateWithCFData((CFDataRef)data);

    CGImageRef imageRef = CGImageCreate(cvMat.cols,                                     // Width
                                        cvMat.rows,                                     // Height
                                        8,                                              // Bits per component
                                        8 * cvMat.elemSize(),                           // Bits per pixel
                                        cvMat.step[0],                                  // Bytes per row
                                        colorSpace,                                     // Colorspace
                                        kCGImageAlphaNone | kCGBitmapByteOrderDefault,  // Bitmap info flags
                                        provider,                                       // CGDataProviderRef
                                        NULL,                                           // Decode
                                        false,                                          // Should interpolate
                                        kCGRenderingIntentDefault);                     // Intent


    UIImage *image = [UIImage imageWithCGImage:imageRef];
    CGImageRelease(imageRef);
    CGDataProviderRelease(provider);
    CGColorSpaceRelease(colorSpace);

    return image;
}

- (NSString*)confidencesOCRTesseract:(UIImage*)picture{

    tesseract::TessBaseAPI* tess;

    uint32_t* _pixels;

    NSString* _dataPath = @"tessdata";
    NSString* _language = @"eng";

    // Useful paths
    NSFileManager *fileManager = [NSFileManager defaultManager];
    NSArray *documentPaths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
    NSString *documentPath = ([documentPaths count] > 0) ? [documentPaths objectAtIndex:0] : nil;
    NSString *dataPath = [documentPath stringByAppendingPathComponent:_dataPath];

    // Copy data in Doc Directory
    if (![fileManager fileExistsAtPath:dataPath]) {
        NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
        NSString *tessdataPath = [bundlePath stringByAppendingPathComponent:_dataPath];
        if (tessdataPath) {
            [fileManager copyItemAtPath:tessdataPath toPath:dataPath error:nil];
        }
    }

    setenv("TESSDATA_PREFIX", [[documentPath stringByAppendingString:@"/"] UTF8String], 1);

    tess = new tesseract::TessBaseAPI();
    tess->Init([_dataPath UTF8String], [_language UTF8String]);

    tess->SetVariable("save_blob_choices", "T");
    tess->SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");

    //SET IMAGE
    CGSize size = [picture size];
    int width = size.width;
    int height = size.height;

    _pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));

    // Clear the pixels so any transparency is preserved
    memset(_pixels, 0, width * height * sizeof(uint32_t));

    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();

    // Create a context with RGBA _pixels
    CGContextRef context = CGBitmapContextCreate(_pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
                                                 kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);

    // Paint the bitmap to our context which will fill in the _pixels array
    CGContextDrawImage(context, CGRectMake(0, 0, width, height), [picture CGImage]);

    // We're done with the context and color space
    CGContextRelease(context);
    CGColorSpaceRelease(colorSpace);

    tess->SetImage((const unsigned char *) _pixels, width, height, sizeof(uint32_t), width * sizeof(uint32_t));

    //END SET IMAGE

    tess->Recognize(NULL);
    tesseract::ResultIterator* ri = tess->GetIterator();
    tesseract::ChoiceIterator* ci;

    // For every identified symbol (there should be only one...)
    NSString *finalWord = @"";

    if(ri != 0) {
        do {
            const char* symbol = ri->GetUTF8Text(tesseract::RIL_SYMBOL);
            if(symbol != 0) {
                float conf = ri->Confidence(tesseract::RIL_SYMBOL);

                if(conf > 80.0f){

                    finalWord = [finalWord stringByAppendingString:[NSString stringWithUTF8String:symbol]];

                }else{

                    const tesseract::ResultIterator itr = *ri;
                    ci = new tesseract::ChoiceIterator(itr);
                    // For every chosen candidate...
                    do {

                        const char* choice = ci->GetUTF8Text();
                        NSString *choiceStr = [NSString stringWithUTF8String:choice];
                        if(choice && ![choiceStr isEqualToString:@""]){
                            finalWord = [finalWord stringByAppendingString:choiceStr];
                            break;
                        }else{
                            finalWord = [finalWord stringByAppendingString:[NSString stringWithUTF8String:symbol]];
                        }

                    } while(ci->Next());

                    delete ci;

                }

            }
            delete[] symbol;
        } while((ri->Next(tesseract::RIL_SYMBOL)));
    }

    return finalWord;

}

NSInteger intSort(id num1, id num2, void *context) {
    NSString *n1 = (NSString *) num1;
    NSString *n2 = (NSString *) num2;
    n1 = [[n1 componentsSeparatedByString:@"."] objectAtIndex:0];
    n2 = [[n2 componentsSeparatedByString:@"."] objectAtIndex:0];
    if ([n1 floatValue] > [n2 floatValue]) {
        return NSOrderedAscending;
    }
    else if ([n1 floatValue] < [n2 floatValue]) {
        return NSOrderedDescending;
    }
    return NSOrderedSame;
}

NSInteger intSortDesc(id num1, id num2, void *context) {
    NSString *n1 = (NSString *) num1;
    NSString *n2 = (NSString *) num2;
    n1 = [[n1 componentsSeparatedByString:@"."] objectAtIndex:0];
    n2 = [[n2 componentsSeparatedByString:@"."] objectAtIndex:0];
    if ([n1 floatValue] < [n2 floatValue]) {
        return NSOrderedAscending;
    }
    else if ([n1 floatValue] > [n2 floatValue]) {
        return NSOrderedDescending;
    }
    return NSOrderedSame;
}


@end
person Fran Martin    schedule 27.02.2014