This script creates a text file for each page of a DJVU.

Parameters:

-i is the input DJVU file
-o is the output directory
-p is the number of pages (the script will do the first "p" pages)
-d is and optional debug flag

### Example

```pagewise-ocr.py -i "C:\Documents and Settings\Me\My Documents\input.djvu" -p 100 -o "C:\Documents and Settings\John\My Documents\inputOCR"
```

## Source code

```#!/usr/bin/python

import os
import optparse
import subprocess

def main():

parser = optparse.OptionParser(usage='Usage: %prog -i <source directory> <options> -o <output file>')
help='input DJVU (required)')
help='number of pages (required)' )
help='output directory (required)' )
help='debug flag' )

(opts, args) = parser.parse_args()

# check mandatory options
if opts.pages is None:
print("The input file '-i' must be given\n")
parser.print_help()
exit(-1)

if opts.pages is None :
print("The number of pages (-p) must be given\n")
parser.print_help()
exit(-1)

if opts.output is None :
print("The output file '-o' must be given\n")
parser.print_help()
exit(-1)

PagewiseOCR(opts)

class PagewiseOCR():

def __init__(self, opts):

self.opts = opts

#define djvu directory
self.opts.djvuDir=r"c:\program files\djvuzone\djvulibre" #directory of djvu libre execuatables <--CHANGE ME

for page in range(1, int(self.opts.pages) + 1): #for every page in the

filename = os.path.join(self.opts.output, 'OCRoutput_%04d'%page + '.txt')

if self.opts.debug:
print '\tProcessing page %d'%page

f = open(filename,'w') #create the file
f.close()
cmd = [os.path.join(self.opts.djvuDir, 'djvutxt'), '-page='+str(page), self.opts.input, filename]
subprocess.call(cmd)

if __name__ == "__main__":
try:
main()
finally:
None
#wikipedia.stopme()
```