Note: Ones who are familiar with pycparser would understand the problem much better.
I am using pycparser v2.10 and I am trying to extract all the functions that have been defined in the C file as well extract its input parameter name and identifier type While parsing that C file (Using pycparser).
Code sample
import sys
sys.path.extend(['.', '..'])
CPPPATH = '../utils/cpp.exe' if sys.platform == 'win32' else 'cpp'
from pycparser import c_parser, c_ast, parse_file
class FunctionParameter(c_ast.NodeVisitor):
def visit_FuncDef(self, node):
#node.decl.type.args.params
print "Function name is", node.decl.name, "at", node.decl.coord
print " It's parameters name and type is (are)"
for params in (node.decl.type.args.params):
print " ", params.name, params.type
def func_parameter(filename):
ast = parse_file(filename, use_cpp=True, cpp_path=CPPPATH, cpp_args=r'-I../utils/fake_libc/include')
vf = FunctionParameter()
vf.visit(ast)
if __name__ == '__main__':
if len(sys.argv) > 1:
filename = sys.argv[1]
else:
filename = 'c_files/hash.c'
func_parameter(filename)
Here in the visit_FuncDef I am printing the Function name and then in the for loop, it's parameters.
The problem is that I am able to get the name of the input parameter passed to the function using the params.name
but not able to get its Identifier type using params.type
in the for loop.
Can somebody tell me how can I extract the identifier of the parameter?
Btw, the output is like this:
Function name is hash_func at c_files/hash.c:32
It's parameters name and type is (are)
str <pycparser.c_ast.PtrDecl object at 0x00000000024EFC88>
table_size <pycparser.c_ast.TypeDecl object at 0x00000000024EFEF0>
Function name is HashCreate at c_files/hash.c:44
It's parameters name and type is (are)
hash <pycparser.c_ast.PtrDecl object at 0x00000000024FABE0>
table_size <pycparser.c_ast.TypeDecl object at 0x00000000024FAE48>
Function name is HashInsert at c_files/hash.c:77
It's parameters name and type is (are)
hash <pycparser.c_ast.PtrDecl object at 0x00000000024F99E8>
entry <pycparser.c_ast.PtrDecl object at 0x00000000024F9BE0>
Function name is HashFind at c_files/hash.c:100
It's parameters name and type is (are)
hash <pycparser.c_ast.PtrDecl object at 0x00000000028C4160>
key <pycparser.c_ast.PtrDecl object at 0x00000000028C4358>
Function name is HashRemove at c_files/hash.c:117
It's parameters name and type is (are)
hash <pycparser.c_ast.PtrDecl object at 0x00000000028C5780>
key <pycparser.c_ast.PtrDecl object at 0x00000000028C5978>
Function name is HashPrint at c_files/hash.c:149
It's parameters name and type is (are)
hash <pycparser.c_ast.PtrDecl object at 0x00000000028E9438>
PrintFunc <pycparser.c_ast.PtrDecl object at 0x00000000028E9668>
Function name is HashDestroy at c_files/hash.c:170
It's parameters name and type is (are)
hash <pycparser.c_ast.PtrDecl object at 0x00000000028EF240>
Here as you can see, Instead of getting the Identifier type, I am getting the object type in each line. e.g <pycparser.c_ast.PtrDecl object at 0x00000000024EFC88>
sample hash.c file which i am using as a test file (Anyways it's all there in pycparser):
/*
** C implementation of a hash table ADT
*/
typedef enum tagReturnCode {SUCCESS, FAIL} ReturnCode;
typedef struct tagEntry
{
char* key;
char* value;
} Entry;
typedef struct tagNode
{
Entry* entry;
struct tagNode* next;
} Node;
typedef struct tagHash
{
unsigned int table_size;
Node** heads;
} Hash;
static unsigned int hash_func(const char* str, unsigned int table_size)
{
unsigned int hash_value;
unsigned int a = 127;
for (hash_value = 0; *str != 0; ++str)
hash_value = (a*hash_value + *str) % table_size;
return hash_value;
}
ReturnCode HashCreate(Hash** hash, unsigned int table_size)
{
unsigned int i;
if (table_size < 1)
return FAIL;
//
// Allocate space for the Hash
//
if (((*hash) = malloc(sizeof(**hash))) == NULL)
return FAIL;
//
// Allocate space for the array of list heads
//
if (((*hash)->heads = malloc(table_size*sizeof(*((*hash)->heads)))) == NULL)
return FAIL;
//
// Initialize Hash info
//
for (i = 0; i < table_size; ++i)
{
(*hash)->heads[i] = NULL;
}
(*hash)->table_size = table_size;
return SUCCESS;
}
ReturnCode HashInsert(Hash* hash, const Entry* entry)
{
unsigned int index = hash_func(entry->key, hash->table_size);
Node* temp = hash->heads[index];
HashRemove(hash, entry->key);
if ((hash->heads[index] = malloc(sizeof(Node))) == NULL)
return FAIL;
hash->heads[index]->entry = malloc(sizeof(Entry));
hash->heads[index]->entry->key = malloc(strlen(entry->key)+1);
hash->heads[index]->entry->value = malloc(strlen(entry->value)+1);
strcpy(hash->heads[index]->entry->key, entry->key);
strcpy(hash->heads[index]->entry->value, entry->value);
hash->heads[index]->next = temp;
return SUCCESS;
}
const Entry* HashFind(const Hash* hash, const char* key)
{
unsigned int index = hash_func(key, hash->table_size);
Node* temp = hash->heads[index];
while (temp != NULL)
{
if (!strcmp(key, temp->entry->key))
return temp->entry;
temp = temp->next;
}
return NULL;
}
ReturnCode HashRemove(Hash* hash, const char* key)
{
unsigned int index = hash_func(key, hash->table_size);
Node* temp1 = hash->heads[index];
Node* temp2 = temp1;
while (temp1 != NULL)
{
if (!strcmp(key, temp1->entry->key))
{
if (temp1 == hash->heads[index])
hash->heads[index] = hash->heads[index]->next;
else
temp2->next = temp1->next;
free(temp1->entry->key);
free(temp1->entry->value);
free(temp1->entry);
free(temp1);
temp1 = NULL;
return SUCCESS;
}
temp2 = temp1;
temp1 = temp1->next;
}
return FAIL;
}
void HashPrint(Hash* hash, void (*PrintFunc)(char*, char*))
{
unsigned int i;
if (hash == NULL || hash->heads == NULL)
return;
for (i = 0; i < hash->table_size; ++i)
{
Node* temp = hash->heads[i];
while (temp != NULL)
{
PrintFunc(temp->entry->key, temp->entry->value);
temp = temp->next;
}
}
}
void HashDestroy(Hash* hash)
{
unsigned int i;
if (hash == NULL)
return;
for (i = 0; i < hash->table_size; ++i)
{
Node* temp = hash->heads[i];
while (temp != NULL)
{
Node* temp2 = temp;
free(temp->entry->key);
free(temp->entry->value);
free(temp->entry);
temp = temp->next;
free(temp2);
}
}
free(hash->heads);
hash->heads = NULL;
free(hash);
}
What makes you think you don't extract the type?
The name is
table_size
, the type is inTypeDecl
. Simple names of types are not provided - you have to reconstruct them. For an example of how to unravel a "decl" to its textual representation, see the cdecl example.To get the exact type of identifiers in the AST you - just like Leo in the Inception movie - "need to go deeper". 8]
Here is an extension of your visit_FuncDef function to demonstrate how different items of the AST can be reached from a given point:
With the comments I tried to explain which type of parameter the code is looking for. With the triple hashmarks I marked the actual location in the AST tree.
As an example here is a part of the AST tree of the function HashPrint() which contains a pointer to a function as a parameter:
And finally here is the output of the function:
This works specifically for the example file hash.c. I just wanted you to get an insight on how to access specific parts of the AST from one point.
Best practice is to save the AST to a file:
then to compare the AST with _c_ast.cfg to see what kind of properties each node have so that you can "go deeper" in the tree.