Extracting input parameters and its Identifier typ

2019-08-06 12:59发布

问题:

Note: Ones who are familiar with pycparser would understand the problem much better.

I am using pycparser v2.10 and I am trying to extract all the functions that have been defined in the C file as well extract its input parameter name and identifier type While parsing that C file (Using pycparser).

Code sample

import sys
sys.path.extend(['.', '..'])
CPPPATH = '../utils/cpp.exe' if sys.platform == 'win32' else 'cpp'
from pycparser import c_parser, c_ast, parse_file

class FunctionParameter(c_ast.NodeVisitor):

    def visit_FuncDef(self, node):
        #node.decl.type.args.params
        print "Function name is", node.decl.name, "at", node.decl.coord
        print "    It's parameters name  and type is (are)"
        for params in (node.decl.type.args.params):
            print "        ", params.name, params.type


def func_parameter(filename):
    ast = parse_file(filename, use_cpp=True, cpp_path=CPPPATH, cpp_args=r'-I../utils/fake_libc/include')

    vf = FunctionParameter()
    vf.visit(ast)

if __name__ == '__main__':
    if len(sys.argv) > 1:
        filename = sys.argv[1]
    else:
        filename = 'c_files/hash.c'
    func_parameter(filename)

Here in the visit_FuncDef I am printing the Function name and then in the for loop, it's parameters.

The problem is that I am able to get the name of the input parameter passed to the function using the params.name but not able to get its Identifier type using params.type in the for loop.

Can somebody tell me how can I extract the identifier of the parameter?

Btw, the output is like this:

Function name is hash_func at c_files/hash.c:32
    It's parameters name  and type is (are)
         str <pycparser.c_ast.PtrDecl object at 0x00000000024EFC88>
         table_size <pycparser.c_ast.TypeDecl object at 0x00000000024EFEF0>
Function name is HashCreate at c_files/hash.c:44
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000024FABE0>
         table_size <pycparser.c_ast.TypeDecl object at 0x00000000024FAE48>
Function name is HashInsert at c_files/hash.c:77
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000024F99E8>
         entry <pycparser.c_ast.PtrDecl object at 0x00000000024F9BE0>
Function name is HashFind at c_files/hash.c:100
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000028C4160>
         key <pycparser.c_ast.PtrDecl object at 0x00000000028C4358>
Function name is HashRemove at c_files/hash.c:117
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000028C5780>
         key <pycparser.c_ast.PtrDecl object at 0x00000000028C5978>
Function name is HashPrint at c_files/hash.c:149
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000028E9438>
         PrintFunc <pycparser.c_ast.PtrDecl object at 0x00000000028E9668>
Function name is HashDestroy at c_files/hash.c:170
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000028EF240>

Here as you can see, Instead of getting the Identifier type, I am getting the object type in each line. e.g <pycparser.c_ast.PtrDecl object at 0x00000000024EFC88>

sample hash.c file which i am using as a test file (Anyways it's all there in pycparser):

/*
** C implementation of a hash table ADT
*/
typedef enum tagReturnCode {SUCCESS, FAIL} ReturnCode;


typedef struct tagEntry
{
    char* key;
    char* value;
} Entry;



typedef struct tagNode
{
    Entry* entry;

    struct tagNode* next;
} Node;


typedef struct tagHash
{
    unsigned int table_size;

    Node** heads; 

} Hash;


static unsigned int hash_func(const char* str, unsigned int table_size)
{
    unsigned int hash_value;
    unsigned int a = 127;

    for (hash_value = 0; *str != 0; ++str)
        hash_value = (a*hash_value + *str) % table_size;

    return hash_value;
}


ReturnCode HashCreate(Hash** hash, unsigned int table_size)
{
    unsigned int i;

    if (table_size < 1)
        return FAIL;

    //
    // Allocate space for the Hash
    //
    if (((*hash) = malloc(sizeof(**hash))) == NULL)
        return FAIL;

    //
    // Allocate space for the array of list heads
    //
    if (((*hash)->heads = malloc(table_size*sizeof(*((*hash)->heads)))) == NULL)
        return FAIL;

    //
    // Initialize Hash info
    //
    for (i = 0; i < table_size; ++i)
    {
        (*hash)->heads[i] = NULL;
    }

    (*hash)->table_size = table_size;

    return SUCCESS;
}


ReturnCode HashInsert(Hash* hash, const Entry* entry)
{
    unsigned int index = hash_func(entry->key, hash->table_size);
    Node* temp = hash->heads[index];

    HashRemove(hash, entry->key);

    if ((hash->heads[index] = malloc(sizeof(Node))) == NULL)
        return FAIL;

    hash->heads[index]->entry = malloc(sizeof(Entry));
    hash->heads[index]->entry->key = malloc(strlen(entry->key)+1);
    hash->heads[index]->entry->value = malloc(strlen(entry->value)+1);
    strcpy(hash->heads[index]->entry->key, entry->key);
    strcpy(hash->heads[index]->entry->value, entry->value);

    hash->heads[index]->next = temp;

    return SUCCESS;
}



const Entry* HashFind(const Hash* hash, const char* key)
{
    unsigned int index = hash_func(key, hash->table_size);
    Node* temp = hash->heads[index];

    while (temp != NULL)
    {
        if (!strcmp(key, temp->entry->key))
            return temp->entry;

        temp = temp->next;
    }

    return NULL;
}


ReturnCode HashRemove(Hash* hash, const char* key)
{
    unsigned int index = hash_func(key, hash->table_size);
    Node* temp1 = hash->heads[index];
    Node* temp2 = temp1;

    while (temp1 != NULL)
    {
        if (!strcmp(key, temp1->entry->key))
        {
            if (temp1 == hash->heads[index])
                hash->heads[index] = hash->heads[index]->next;
            else
                temp2->next = temp1->next;

            free(temp1->entry->key);
            free(temp1->entry->value);
            free(temp1->entry);
            free(temp1);
            temp1 = NULL;

            return SUCCESS;
        }

        temp2 = temp1;
        temp1 = temp1->next;
    }

    return FAIL;
}


void HashPrint(Hash* hash, void (*PrintFunc)(char*, char*))
{
    unsigned int i;

    if (hash == NULL || hash->heads == NULL)
        return;

    for (i = 0; i < hash->table_size; ++i)
    {
        Node* temp = hash->heads[i];

        while (temp != NULL)
        {
            PrintFunc(temp->entry->key, temp->entry->value);
            temp = temp->next;
        }
    }
}



void HashDestroy(Hash* hash)
{
    unsigned int i;

    if (hash == NULL)
        return;

    for (i = 0; i < hash->table_size; ++i)
    {
        Node* temp = hash->heads[i];

        while (temp != NULL)
        {
            Node* temp2 = temp;

            free(temp->entry->key);
            free(temp->entry->value);
            free(temp->entry);

            temp = temp->next;

            free(temp2);
        }
    }    

    free(hash->heads);
    hash->heads = NULL;

    free(hash);
}

回答1:

What makes you think you don't extract the type?

Function name is HashCreate at c_files/hash.c:44
    It's parameters name  and type is (are)
         hash <pycparser.c_ast.PtrDecl object at 0x00000000024FABE0>
         table_size <pycparser.c_ast.TypeDecl object at 0x00000000024FAE48>

The name is table_size, the type is in TypeDecl. Simple names of types are not provided - you have to reconstruct them. For an example of how to unravel a "decl" to its textual representation, see the cdecl example.



回答2:

To get the exact type of identifiers in the AST you - just like Leo in the Inception movie - "need to go deeper". 8]

Here is an extension of your visit_FuncDef function to demonstrate how different items of the AST can be reached from a given point:

def visit_FuncDef(self, node):
    #node.decl.type.args.params
    print "Function name is", node.decl.name, "at", node.decl.coord
    print "    It's parameters name  and type is (are)"
    for params in (node.decl.type.args.params): ###FuncDef/Decl/FuncDecl/ParamList
        # Assign parameter name
        pname = params.name ###ParamList/Decl

        # Parameter is a pointer type of some kind
        if type(params.type) is c_ast.PtrDecl:
            # Parameter is a pointer to a pointer type - double indirection
            if type(params.type.type) is c_ast.PtrDecl:
                ptype = params.type.type.type.type.names ###Decl/PtrDecl/PtrDecl/TypeDecl/IdentifierType
            # There is no double indirection
            else:
                # Parameter is a pointer to a function type
                if type(params.type.type) is c_ast.FuncDecl:
                    pname = str(params.type.type.type.type.names) + ' (*' ###Decl/PtrDecl/TypeDecl/IdentifierType
                    pname = pname + params.type.type.type.declname + ')' ###Decl/PtrDecl/FuncDecl/TypeDecl
                    ptype = ''
                    for subparams in params.type.type.args.params: ###Decl/PtrDecl/FuncDecl/ParamList
                        ptype = ptype + str(subparams.type.type.type.names) ###Typename/PtrDecl/TypeDecl/IdentifierType
                # Parameter is a pointer type - single indirection
                else:
                    ptype = params.type.type.type.names ###Decl/PtrDecl/TypeDecl/IdentifierType

        # Parameter is a variable
        elif type(params.type.type) is c_ast.IdentifierType:
            ptype = params.type.type.names

        print "        ", pname, ptype

With the comments I tried to explain which type of parameter the code is looking for. With the triple hashmarks I marked the actual location in the AST tree.

As an example here is a part of the AST tree of the function HashPrint() which contains a pointer to a function as a parameter:

  FuncDef: 
    Decl: HashPrint, [], [], []
      FuncDecl: 
        ParamList: 
          Decl: hash, [], [], []
            PtrDecl: []
              TypeDecl: hash, []
                IdentifierType: ['Hash']
          Decl: PrintFunc, [], [], []
            PtrDecl: []
              FuncDecl: 
                ParamList: 
                  Typename: None, []
                    PtrDecl: []
                      TypeDecl: None, []
                        IdentifierType: ['char']
                  Typename: None, []
                    PtrDecl: []
                      TypeDecl: None, []
                        IdentifierType: ['char']
                TypeDecl: PrintFunc, []
                  IdentifierType: ['void']
        TypeDecl: HashPrint, []
          IdentifierType: ['void']
    Compound: 

And finally here is the output of the function:

Function name is hash_func at c_files/hash.c:32
    It's parameters name  and type is (are)
         str ['char']
         table_size ['unsigned', 'int']
Function name is HashCreate at c_files/hash.c:44
    It's parameters name  and type is (are)
         hash ['Hash']
         table_size ['unsigned', 'int']
Function name is HashInsert at c_files/hash.c:77
    It's parameters name  and type is (are)
         hash ['Hash']
         entry ['Entry']
Function name is HashFind at c_files/hash.c:100
    It's parameters name  and type is (are)
         hash ['Hash']
         key ['char']
Function name is HashRemove at c_files/hash.c:117
    It's parameters name  and type is (are)
         hash ['Hash']
         key ['char']
Function name is HashPrint at c_files/hash.c:149
    It's parameters name  and type is (are)
         hash ['Hash']
         ['void'] (*PrintFunc) ['char']['char']
Function name is HashDestroy at c_files/hash.c:170
    It's parameters name  and type is (are)
         hash ['Hash']

This works specifically for the example file hash.c. I just wanted you to get an insight on how to access specific parts of the AST from one point.

Best practice is to save the AST to a file:

file = open('ast.txt', 'w')
ast.show(buf=file)
file.close()

then to compare the AST with _c_ast.cfg to see what kind of properties each node have so that you can "go deeper" in the tree.