Here's a potential solution:
Obtain binary image. Convert image to grayscale
and adaptive threshold
Filter out all numbers and noise to isolate only boxes. We filter using contour area to remove the numbers since we only want each individual cell
Fix grid lines. Perform morphological closing
with a horizontal and vertical kernel
to repair grid lines.
Sort each cell in top-to-bottom and left-to-right order. We organize each cell into a sequential order using imutils.contours.sort_contours()
with the top-to-bottom
and left-to-right
parameter
Here's the initial binary image (left) and filtered out numbers + repaired grid lines + inverted image (right)
Here's a visualization of the iteration of each cell
The detected numbers in each cell
Code
import cv2
from imutils import contours
import numpy as np
# Load image, grayscale, and adaptive threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,57,5)
# Filter out all numbers and noise to isolate only boxes
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 1000:
cv2.drawContours(thresh, [c], -1, (0,0,0), -1)
# Fix horizontal and vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, vertical_kernel, iterations=9)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,1))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, horizontal_kernel, iterations=4)
# Sort by top to bottom and each row by left to right
invert = 255 - thresh
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
sudoku_rows = []
row = []
for (i, c) in enumerate(cnts, 1):
area = cv2.contourArea(c)
if area < 50000:
row.append(c)
if i % 9 == 0:
(cnts, _) = contours.sort_contours(row, method="left-to-right")
sudoku_rows.append(cnts)
row = []
# Iterate through each box
for row in sudoku_rows:
for c in row:
mask = np.zeros(image.shape, dtype=np.uint8)
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
result = cv2.bitwise_and(image, mask)
result[mask==0] = 255
cv2.imshow('result', result)
cv2.waitKey(175)
cv2.imshow('thresh', thresh)
cv2.imshow('invert', invert)
cv2.waitKey()
Note: The sorting idea was adapted from an old previous answer in Rubrik cube solver color extraction.