#!/usr/bin/python
#-*- coding:utf8 -*-
import Image,ImageDraw
import sys, os
def horizontalHistogram (input_image, image_name):
width = input_image.size[0]
height = input_image.size[1]
print "Width = %d and height = %d" %(width,height)
histoGram = []
for i in range (height):
blackPixelCount = 0
for j in range (width):
pixel = input_image.getpixel ((j, i))
if (pixel == 0):
blackPixelCount += 1
histoGram.append (blackPixelCount)
print histoGram
histogramImage = Image.new("L",(width,height),255)
pen = ImageDraw.Draw(histogramImage)
y = 0
for count in histoGram:
pen.line((0, y) + (count, y), fill=128)
y += 1
cumulativeImage = Image.new("L",(width * 2, height),255)
cumulativeImage.paste (input_image, (0, 0, width, height))
cumulativeImage.paste (histogramImage, (width, 0, width*2, height))
cumulativeImage.save(image_name+"_"+".png","PNG")
def verticalHistogram ():
pass
path = "/home/debayan/code/lower_descender_images/"
dirName = os.listdir (path)
for image_name in dirName:
input_image = Image.open(path + image_name)
horizontalHistogram (input_image, image_name)
The piece of code above takes a set of images (consonant + vowel) in the folder /home/debayan/code/lower_descender_images/ generated by
http://code.google.com/p/tesseractindic/source/browse/trunk/tesseract_trainer/generate.py and generates tiny images with horizontal histogram profiles. The generated set can be found at
https://picasaweb.google.com/debayanin/OCRStuff .
Observing the set thus generated can give us insights as to where the descender vowel sign begins. If we know this, we can separate the consonant and vowel sign. One general observation in this case is that the point where the vowel sign begins causes a local minima in the histogram profile.
Here are some examples. Note the red horizontal lines which mark the minima in the histogram:
data:image/s3,"s3://crabby-images/65f8a/65f8a9686c7040dfe6dbeed96177b0f629ab7ef5" alt=""
Tthri in Bengali
data:image/s3,"s3://crabby-images/43b73/43b73006a40f127f3eceea16c2a14f367c31ad90" alt=""
Mu in Bengali
These are the exceptions:
data:image/s3,"s3://crabby-images/7910e/7910e4ab82eeec78920817d2788e5494dbfdd42c" alt=""
Chhu in Bengali
data:image/s3,"s3://crabby-images/75f35/75f350bf686a72115e762b3094294ffb689c261d" alt=""
Du in Bengali
With some fonts the above glyph might now show a local minima in the histogram
data:image/s3,"s3://crabby-images/6a506/6a506b1c78dfee833fb79262641ec1e4b4591392" alt=""
Jri in Bengali
data:image/s3,"s3://crabby-images/f7c64/f7c649ef9c3955034fd7b3429d36339e79b164bc" alt=""
Hri in Bengali
data:image/s3,"s3://crabby-images/6cd24/6cd24db436290170012af472ba03418c0e83c511" alt=""
Hu in Bengali
data:image/s3,"s3://crabby-images/08072/08072e6200f6db94c07024acf0e9b9b4d2a0af5b" alt=""
Gu in Bengali