-->

从Objective-C的PDF中提取可编辑字段(Extract Editable Fields f

2019-08-05 15:12发布

我一直在研究现在在我的iOS应用程序的PDF工作了一段时间。 我已经想通了像扫描为运营商和显示在一个UIWebView的PDF拼图的几件。 不过,我真正需要做的是在PDF文档中找出可编辑的字段

我非常希望能够直接与磁场相互作用,但听起来很困难的,而不是一个明显的第一步。 我已经有一个Windows服务,可以通过这种方式操纵PDF和能安定识别可编辑的字段,聚集在表单视图来自用户的现场数据,并张贴数据返回给服务器接口。 问题是,我不能看到如何识别领域。 我与交互政府颁发的PDF文件,如I-9和W-4S,所以我没有在创建PDF文件或字段的命名控制。 这就是为什么我需要动态提取它们。 任何帮助和/或引用,将不胜感激。

我使用[此引用(https://developer.apple.com/library/mac/#documentation/graphicsimaging/conceptual/drawingwithquartz2d/dq_pdf_scan/dq_pdf_scan.html“PDF文档分析”),从苹果公司的Quatrz 2D编程指南扫描触发操作回调时,PDF但这是没有帮助我找到可编辑的字段。

我也简单地加载与PDF数据一个UIWebView来显示给用户。

[_webView loadData:decodedData MIMEType:@"application/pdf" textEncodingName:@"utf-8" baseURL:nil];

更新:

我建立了一个PDF Helper类(如下所示)遍历所有可能的对象类型的产品目录。 本来我是不处理阵列中的嵌套的字典,所以我没有看到的表单字段。 有一次,我定,我意识到有,我不得不考虑到避免循环递归调用,将启动一个无限循环的父引用。 下面的代码显示了丰富的从文档目录信息。 现在我只需要解析它隔离我需要的表单字段。

PDFHelper.h

#import <Foundation/Foundation.h>

id selfClass;

@interface PDFHelper : NSObject

@property (nonatomic, strong) NSData *pdfData;
@property (nonatomic, strong) NSMutableDictionary *pdfDict;
@property (nonatomic) int catalogLevel;


-(NSArray *) copyPDFArray:(CGPDFArrayRef)arr referencingDictionary:(CGPDFDictionaryRef)dict referencingKey:(const char *)key;
-(NSArray *) getFormFields;
-(CGPDFDictionaryRef) getDocumentCatalog;

@end

PDFHelper.m

#import "PDFHelper.h"
#import "FileHelpers.h"
#import "Log.h"

@implementation PDFHelper

@synthesize pdfData = _pdfData;
@synthesize pdfDict = _pdfDict;
@synthesize catalogLevel = _catalogLevel;

-(id)init
{
    self = [super init];
    if(self)
    {
        selfClass = self;
        _pdfDict = [[NSMutableDictionary alloc] init];
        _catalogLevel = 1;
    }

    return self;
}

-(NSArray *) getFormFields
{
    CGPDFDictionaryRef acroForm = NULL;
    if (CGPDFDictionaryGetDictionary([self getPdfDocDictionary], "AcroForm", &acroForm))
        CGPDFDictionaryApplyFunction(acroForm, getDictionaryObjects, acroForm);
    return [_pdfDict objectForKey:@"XFA"];
}

-(CGPDFDictionaryRef) getDocumentCatalog
{
    CGPDFDictionaryRef docCatalog = [self getPdfDocDictionary];
    CGPDFDictionaryApplyFunction(docCatalog, getDictionaryObjects, docCatalog);
    return docCatalog;
}

-(CGPDFDictionaryRef) getPdfDocDictionary
{
    NSURL *pdf = [[NSURL alloc] initFileURLWithPath:[FileHelpers pathInLibraryDirectory:@"file.pdf"]];

    [_pdfData writeToFile:[pdf path] atomically:YES];

    CGPDFDocumentRef pdfDocument = CGPDFDocumentCreateWithURL((__bridge CFURLRef)pdf);
    CGPDFDictionaryRef returnDict = CGPDFDocumentGetCatalog(pdfDocument);
    return returnDict;
}

void getDictionaryObjects (const char *key, CGPDFObjectRef object, void *info) {

    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"key: %s", key]];
    for (int i = 0; i < [selfClass catalogLevel]; i++)
        logString = [NSString stringWithFormat:@"-%@", logString];
    [Log LogDebug:logString];

    CGPDFDictionaryRef contentDict = (CGPDFDictionaryRef)info;

    CGPDFObjectType type = CGPDFObjectGetType(object);
    switch (type) {
        case kCGPDFObjectTypeNull: {            
                [Log LogDebug:[NSString stringWithFormat:@"*****pdf null value"]];
            break;
        }
        case kCGPDFObjectTypeBoolean: {
            CGPDFBoolean objectBoolean;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeBoolean, &objectBoolean)) {
                NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf boolean value: %@", [NSNumber numberWithBool:objectBoolean]]];
                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                [Log LogDebug:logString];
                [[selfClass pdfDict] setObject:[NSNumber numberWithBool:objectBoolean]
                                        forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
            }
            break;
        }
        case kCGPDFObjectTypeInteger: {
            CGPDFInteger objectInteger;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeInteger, &objectInteger)) {
                NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf integer value: %ld", (long int)objectInteger]];
                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                [Log LogDebug:logString];
                [[selfClass pdfDict] setObject:[NSNumber numberWithInt:objectInteger]
                                        forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
            }
            break;
        }
        case kCGPDFObjectTypeReal: {
            CGPDFReal objectReal;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeReal, &objectReal)) {
                NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf real value: %ld", (long int)objectReal]];
                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                [Log LogDebug:logString];
                [[selfClass pdfDict] setObject:[NSNumber numberWithInt:objectReal]
                                        forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
            }
            break;
        }
        case kCGPDFObjectTypeName: {
            const char *name;
            if (CGPDFDictionaryGetName(contentDict, key, &name))
            {
                NSString *dictName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding];
                if (dictName)
                {
                    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf name value: %@", dictName]];
                    for (int i = 0; i < [selfClass catalogLevel]; i++)
                        logString = [NSString stringWithFormat:@"-%@", logString];
                    [Log LogDebug:logString];
                    [[selfClass pdfDict] setObject:dictName
                                            forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
                }
            }
            break;
        }
        case kCGPDFObjectTypeString: {
            CGPDFStringRef objectString;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeString, &objectString)) {
                NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf string value: %@", (__bridge NSString *)CGPDFStringCopyTextString(objectString)]];
                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                [Log LogDebug:logString];
                [[selfClass pdfDict] setObject:(__bridge NSString *)CGPDFStringCopyTextString(objectString)
                                        forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
            }
            break;
        }
        case kCGPDFObjectTypeArray: {
            CGPDFArrayRef objectArray;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeArray, &objectArray)) {
                NSArray *myArray=[selfClass copyPDFArray:objectArray referencingDictionary:contentDict referencingKey:key];
                [[selfClass pdfDict] setObject:myArray
                                        forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];

            }
            break;
        }
        case kCGPDFObjectTypeDictionary: {
            CGPDFDictionaryRef objectDictionary;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &objectDictionary)) {
                NSString *logString = @"Found dictionary";
                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                //[Log LogDebug:logString];
                NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
                if (![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
                {
                    [selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
                    CGPDFDictionaryApplyFunction(objectDictionary, getDictionaryObjects, objectDictionary);
                    [selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
                }
            }
            break;
        }
        case kCGPDFObjectTypeStream: {
            CGPDFStreamRef objectStream;
            if (CGPDFObjectGetValue(object, kCGPDFObjectTypeStream, &objectStream)) {

                CGPDFDictionaryRef dict = CGPDFStreamGetDictionary( objectStream );

                CGPDFDataFormat fmt = CGPDFDataFormatRaw;
                CFDataRef streamData = CGPDFStreamCopyData(objectStream, &fmt);
                NSData *data = [[NSData alloc] initWithData:(__bridge NSData *)(streamData)];
                [data writeToFile:[FileHelpers pathInDocumentDirectory:@"data.dat"] atomically:YES];
                NSString *dataString = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
                //if (!dataString) {
                //    dataString = [[NSString alloc] initWithData:(__bridge NSData *)(streamData) encoding:NSUTF16StringEncoding];
               // }

                NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf stream length: %ld - %@", (long int)CFDataGetLength( streamData ), dataString]];

                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                [Log LogDebug:logString];

                NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
                if( dict && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
                {
                    [selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
                    CGPDFDictionaryApplyFunction(dict, getDictionaryObjects, dict);
                    [selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
                }
            }
        }
    }
}

- (NSArray *)copyPDFArray:(CGPDFArrayRef)arr referencingDictionary:(CGPDFDictionaryRef)dict referencingKey:(const char *)key
{
    int i = 0;
    NSMutableArray *temp = [[NSMutableArray alloc] init];

    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array count: %zu", CGPDFArrayGetCount(arr)]];
    for (int i = 0; i < [selfClass catalogLevel]; i++)
        logString = [NSString stringWithFormat:@"-%@", logString];
    [Log LogDebug:logString];

    for(i=0; i<CGPDFArrayGetCount(arr); i++){
        CGPDFObjectRef object;
        CGPDFArrayGetObject(arr, i, &object);
        CGPDFObjectType type = CGPDFObjectGetType(object);
        switch(type){
            case kCGPDFObjectTypeNull: {
                NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array null(%d)", i]];
                for (int i = 0; i < [selfClass catalogLevel]; i++)
                    logString = [NSString stringWithFormat:@"-%@", logString];
                [Log LogDebug:logString];
                break;
            }
            case kCGPDFObjectTypeBoolean: {
                CGPDFBoolean objectBool;
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeBoolean, &objectBool)) {
                    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array boolean value(%d): %@", i, [NSNumber numberWithBool:objectBool]]];
                    for (int i = 0; i < [selfClass catalogLevel]; i++)
                        logString = [NSString stringWithFormat:@"-%@", logString];
                    [Log LogDebug:logString];
                    [temp addObject:[NSNumber numberWithBool:objectBool]];
                }
                break;
            }
            case kCGPDFObjectTypeInteger: {
                CGPDFInteger objectInteger;
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeInteger, &objectInteger)) {
                    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array integer value(%d): %ld", i, (long int)objectInteger]];
                    for (int i = 0; i < [selfClass catalogLevel]; i++)
                        logString = [NSString stringWithFormat:@"-%@", logString];
                    [Log LogDebug:logString];
                    [temp addObject:[NSNumber numberWithInt:objectInteger]];
                }
                break;
            }
            case kCGPDFObjectTypeReal:
            {
                CGPDFReal objectReal;
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeReal, &objectReal))
                {
                    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array real(%d): %ld", i, (long int)objectReal]];
                    for (int i = 0; i < [selfClass catalogLevel]; i++)
                        logString = [NSString stringWithFormat:@"-%@", logString];
                    [Log LogDebug:logString];
                    [temp addObject:[NSNumber numberWithInt:objectReal]];
                }
                break;
            }
            case kCGPDFObjectTypeName:
            {
                const char *name;
                if (CGPDFDictionaryGetName(dict, key, &name))
                {
                    NSString *dictName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding];

                    if (dictName)
                    {
                        NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array name value(%d): %@", i, dictName]];
                        for (int i = 0; i < [selfClass catalogLevel]; i++)
                            logString = [NSString stringWithFormat:@"-%@", logString];
                        [Log LogDebug:logString];
                        [[selfClass pdfDict] setObject:dictName
                                                forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
                    }
                }
                break;
            }
            case kCGPDFObjectTypeString:
            {
                CGPDFStringRef objectString;
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeString, &objectString))
                {
                    NSString *tempStr = (__bridge NSString *)CGPDFStringCopyTextString(objectString);
                    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array string(%d): %@", i, tempStr]];
                    for (int i = 0; i < [selfClass catalogLevel]; i++)
                        logString = [NSString stringWithFormat:@"-%@", logString];
                    [Log LogDebug:logString];
                    [temp addObject:tempStr];
                }
                break;
            }
            case kCGPDFObjectTypeArray :
            {
                CGPDFArrayRef objectArray;
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeArray, &objectArray))
                {
                    NSArray *tempArr = [selfClass copyPDFArray:objectArray referencingDictionary:dict referencingKey:key];
                    [temp addObject:tempArr];
                }
                break;
            }
            case kCGPDFObjectTypeDictionary :
            {
                CGPDFDictionaryRef objectDict;
                NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &objectDict) && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
                {
                    [selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
                    CGPDFDictionaryApplyFunction( objectDict, getDictionaryObjects,  objectDict);
                    [selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
                }
                break;
            }
            case kCGPDFObjectTypeStream :
            {
                CGPDFStreamRef objectStream;
                if (CGPDFObjectGetValue(object, kCGPDFObjectTypeStream, &objectStream))
                {
                    CGPDFDictionaryRef streamDict = CGPDFStreamGetDictionary( objectStream );
                    CGPDFDataFormat fmt = CGPDFDataFormatRaw;
                    CFDataRef streamData = CGPDFStreamCopyData(objectStream, &fmt);
                    NSString *dataString = [[NSString alloc] initWithData:(__bridge NSData *)(streamData) encoding:NSUTF8StringEncoding];

                    NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array stream length: (%d): %ld - %@", i, (long int)CFDataGetLength( streamData ), dataString]];

                    for (int i = 0; i < [selfClass catalogLevel]; i++)
                        logString = [NSString stringWithFormat:@"-%@", logString];
                    [Log LogDebug:logString];


                    NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
                    if( streamDict && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
                    {
                        [selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
                        CGPDFDictionaryApplyFunction( streamDict, getDictionaryObjects, streamDict );
                        [selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
                    }
                }
            }

        }
    }
    return temp;
}

@end

Answer 1:

随着“可编辑字段”你的意思是可以使用Acrobat或Adobe Reader填写表单元素的类型?

这些字段是不是实际的页面描述的一部分。 如果你在看PDF文档规范,你会发现“交互式表单”在该解释说,在文档领域的词典会存储在文档目录名为“AcroForm”的元素开始章节12.7描述。

iOS版确实给你访问到文件目录,据我了解,这样你就必须找到在该目录字典中的“AcroForm”字段,然后下降到现场字典结构来收集所需的信息。 所有的完整文档字段都存储在一个分层的方式在这个地方。



文章来源: Extract Editable Fields from a PDF in Objective-C