#!/usr/bin/python -u
#
# mbox2mdir.py
#
# Converts a Communigate Pro .mbox file into an .mdir directory;

def print_help():
	print """
	Usage: mbox2mdir.py [-b backupdir] [-f] filename.mbox
	
	This creates a directory "filename.mdir" and populates it.
	
	-b backupdir
		Moves the .mbox file into this backup directory if the
		operation completes successfully.  If the backup directory
		does not exist, it is created.

	-f	Forces the script to run even if "filename.mdir" already exists.
		If there are files inside this directory, they may be overwritten.
	"""

import os, sys, time, re, array

# This is used to extract the flags/msgid and date string
# from the header line using the re.match() function.
gHeaderExpr = re.compile(r"From <.*>\(([\w_]{8})\-(\d{12})\) ([\S ]*)\n")
gTmpFileName = "__tmpfile__"
gMboxTempName = "__temp__.mbox"

class MyException(Exception):
	pass

def PrintProgress(numreadlines, numtotallines):
	# progress meter
	if numreadlines == 0 or numtotallines < 101:
		return
	numreadlines += 1
	if numreadlines % (numtotallines / 10) == 0:
		sys.stdout.write(str((numreadlines+1)*100/numtotallines))
	elif numreadlines % (numtotallines / 100) == 0:
		sys.stdout.write(".")

def close_file(outfile, mdirname, headerline, curmsg, numlines):
	# strip the last blank line in the file
	try:
		lastbyte = curmsg.pop()
		numlines -= 1
		if lastbyte != "\n":
			curmsg.append(lastbyte)
	except IndexError:
		pass    # curmsg was empty, so nothing to pop()
	# make the new file name
	(flags, msgnum, datestr) = gHeaderExpr.match(headerline).groups()
	infmt = "%a %b %d %H:%M:%S %Y"
	outfmt = "%Y%m%d%H%M%S"
	datestr = time.strftime(outfmt, time.strptime(datestr, infmt))
	filename = "%d-%s-%s-%d" % (int(msgnum), flags, datestr, numlines)
	filename = os.path.join(mdirname, filename)
	# close out the file and rename/move
	curmsg.tofile(outfile)
	outfile.close()
	os.rename(gTmpFileName, filename)

def ProcessFile(mboxname, force=0):
	"The main conversion routine."
	if not mboxname.endswith(".mbox"):
		raise MyException, '\nInput file does not end with ".mbox".  Exiting.\n'
	mdirname = mboxname[:-5] + ".mdir"
	if os.path.exists(mdirname):
		if (force == 0):
			raise MyException, "\nOutput directory " + mdirname + " exists.  Exiting.\n"
	else:
		os.mkdir(mdirname)
	global gTmpFileName
	gTmpFileName = os.path.join(mdirname, gTmpFileName)

	# Get the initial file size so we can make sure it doesn't change
	# during the course of the operation.
	initialSize = os.path.getsize(mboxname)

	mbox = open(mboxname, "r")

	# clean up the file name
	escapedName = mboxname
	for char in list("""*?!`'" &$#(){}[]"""):
		escapedName = escapedName.replace(char, '\\'+char)
	tmp = os.popen("wc -l " + escapedName, "r")
	totalnumlines = int(tmp.readline().split()[0])
	tmp.close()

	if totalnumlines == 0:
		print "No lines in file - nothing to do."
		return

	nummessages = 0
	numreadlines = 0
	numvalidlines = 0
	array_write_threshold = 5*1024*1024    # max size of array before we dump to disk
	curmsglines = 0
	curmsg = array.array("c")
	outfile = None
	lastheader = ""
	for line in mbox.xreadlines():
		numreadlines += 1
		PrintProgress(numreadlines, totalnumlines)
		if not ((line[0] == "F") and gHeaderExpr.match(line)):
			if lastheader != "":
				curmsg.fromstring(line)
				curmsglines += 1
				numvalidlines += 1
				# flush the buffer?
				if curmsg.buffer_info()[1] > array_write_threshold:
					curmsg.tofile(outfile)
					curmsg = array.array("c")
		else:
			# Found a new header line
			if curmsglines != 0:
				close_file(outfile, mdirname, lastheader, curmsg, curmsglines)
				nummessages += 1
				curmsglines = 0
				curmsg = array.array("c")
			lastheader = line
			outfile = open(gTmpFileName, "w")
	# take care of the last open message
	close_file(outfile, mdirname, lastheader, curmsg, curmsglines)
	nummessages += 1
	print "\nStatistics:"
	print "Read %d lines out of %d total; %d valid email lines found." % (numreadlines, totalnumlines, numvalidlines)
	print "Found %d emails." % nummessages
	if (numvalidlines + nummessages) != totalnumlines:
		print "\nWARNING!  (Number of valid lines) + (Number of header lines) != (Total number of lines)"

	# check the file size
	newSize = os.path.getsize(mboxname)
	if newSize != initialSize:
		print "\nWARNING!  The size of " + mboxname + " has changed since the start"
		print "of the operation.  Old size=%d, New size=%d." % (initialSize, newSize)


if __name__ == "__main__":
	import getopt
	opts, pargs = getopt.getopt(sys.argv[1:], "fb:")
	if len(pargs) == 0:
		print "Insufficient arguments."
		print_help()
		sys.exit()
	
	mboxname = pargs[0]
	force = 0
	backupdir = ""
	for opt, val in opts:
		if opt == "-f":
			force = 1
		elif opt == "-b":
			backupdir = val
			if not os.path.exists(backupdir):
				print "Creating directory", backupdir
				os.makedirs(backupdir)
	try:
		ProcessFile(mboxname, force)
		if backupdir != "":
			print "Moving", mboxname, "to", backupdir
			os.rename(mboxname, os.path.join(backupdir, os.path.basename(mboxname)))
	except MyException, e:
		print e
