Given a file docprep.pyx
import numpy
from cymem.cymem cimport Pool
from spacy.tokens.doc cimport Doc
from spacy.typedefs cimport hash_t
from spacy.structs cimport TokenC
from spacy.strings import hash_string
cdef struct DocElement:
TokenC* c
int length
cdef int fast_loop(DocElement* docs, int n_docs, hash_t word, hash_t tag):
cdef int n_out = 0
for doc in docs[:n_docs]:
for c in doc.c[:doc.length]:
if c.lex.lower == word and c.tag == tag:
n_out += 1
return n_out
def main_nlp_fast(doc_list):
cdef int i, n_out, n_docs = len(doc_list)
cdef Pool mem = Pool()
cdef DocElement* docs = <DocElement*>mem.alloc(n_docs, sizeof(DocElement))
cdef Doc doc
# Populate our database structure
for i, doc in enumerate(doc_list):
docs[i].c = doc.c
docs[i].length = (<Doc>doc).length
word_hash = hash_string('run')
tag_hash = hash_string('NN')
n_out = fast_loop(docs, n_docs, word_hash, tag_hash)
print(n_out)
docs = [Doc("asd run NN ddd")]
main_nlp_fast(docs)
I compiled to C with Cython:
cython -X language_level=3 docprep.pyx
With system paths determined as such:
import numpy
import sysconfig
numpy.get_include()
sysconfig.get_paths()['include']
I tried compiling the C file with GCC:
gcc docprep.c -I '/home/torstein/anaconda3/include/python3.7m' -I '/home/torstein/anaconda3/lib/python3.7/site-packages/numpy/core/include' -L '/home/torstein/anaconda3/lib/python3.7' -lpython3.7 -o docprep.o
But get:
In file included from /home/torstein/anaconda3/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1823,
from /home/torstein/anaconda3/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:18,
from /home/torstein/anaconda3/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h:4,
from docprep.c:610:
/home/torstein/anaconda3/lib/python3.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:15:2: warning: #warning "Using deprecated NumPy API, disable it by " "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp]
#warning "Using deprecated NumPy API, disable it by " \
^~~~~~~
docprep.c:613:10: fatal error: ios: No such file or directory
#include "ios"
^~~~~
compilation terminated
As you can tell from the paths, this system has an Anaconda installation with Python 3.7. numpy
, spacy
and cython
are all installed through conda
.