i computed tfidf vectorizer for text data and got vectors as (100000,2000) max_feature = 2000.
while i am computing the co occurance matrix by below code.
length = 2000
m = np.zeros([length,length]) # n is the count of all words
def cal_occ(sentence,m):
for i,word in enumerate(sentence):
print(i)
print(word)
for j in range(max(i-window,0),min(i+window,length)):
print(j)
print(sentence[j])
m[word,sentence[j]]+=1
for sentence in tf_vec:
cal_occ(sentence, m)
I am getting the following error.
0
(0, 1210) 0.20426932204609685
(0, 191) 0.23516811545499153
(0, 592) 0.2537746177804585
(0, 1927) 0.2896119458034052
(0, 1200) 0.1624114163299802
(0, 1856) 0.24376566018277918
(0, 1325) 0.2789314085220367
(0, 756) 0.15365704375851477
(0, 1130) 0.293489555928974
(0, 346) 0.21231046306681553
(0, 557) 0.2036759579760878
(0, 1036) 0.29666992324872365
(0, 264) 0.36435609585838674
(0, 1701) 0.242619998334931
(0, 1939) 0.33934107208095693
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-96-ad505b6df734> in <module>()
11 m[word,sentence[j]]+=1
12 for sentence in tf_vec:
---> 13 cal_occ(sentence, m)
<ipython-input-96-ad505b6df734> in cal_occ(sentence, m)
9 print(j)
10 print(sentence[j])
---> 11 m[word,sentence[j]]+=1
12 for sentence in tf_vec:
13 cal_occ(sentence, m)
IndexError: only integers, slices (:
), ellipsis (...
), numpy.newaxis (None
) and integer or boolean arrays are valid indices