I'm trying to convert an image to text using opencv, but the code gives the following error:
contours.sort(key=lambda x: get_contour_precedence(x, img.shape[1])) TypeError: 'key' is an invalid keyword argument for this function error.
Is there any way to fix it? This is the code:
import cv2
import pytesseract
import numpy as np
import PIL.Image as Image
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-
OCR\\tesseract'
def get_contour_precedence(contour, cols):
tolerance_factor = 20
origin = cv2.boundingRect(contour)
return ((origin[0] // tolerance_factor) * tolerance_factor) * cols +
origin[1]
img = cv2.imread("C:/Users/Akshatha/Desktop/text_detection_from
_image/images/news1.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY_INV +
cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
erosion = cv2.erode(thresh, kernel, iterations=1)
dilation = cv2.dilate(thresh, kernel, iterations=3)
(contours, heirarchy,_) = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
heirarchy = heirarchy[0]
print("start")
print(np.array(heirarchy).shape, np.array(contours).shape)
print("stop")
contours.sort(key=lambda x: get_contour_precedence(x, img.shape[1]))
# print(contours[0])
idx = 0
textlist = []
i = 0
rect_list = []
for c in contours:
# get the bounding rect
x, y, w, h = cv2.boundingRect(c)
rect_list.append((x, y, x + w, y + h))
# draw a green rectangle to visualize the bounding rect
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 255, 0), 3)
roi = img[y:y + h, x:x + w]
text = pytesseract.image_to_string(roi, lang='eng', config='--oem 1 --
psm 6 -c preserve_interword_spaces=1 ')
print(text)
cv2.putText(img, "#{}".format(i + 1), (x, y - 15),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4)
i += 1
cv2.namedWindow('Dilation', cv2.WINDOW_NORMAL)
cv2.imshow('Dilation', img)
cv2.waitKey(0)