#!/usr/bin/env python
"""
 Convert Psion Series3 data to XML

 build with the fileformat description as in the psionic files

 Copyright (c) 2001  Geert Stappers

 Copyright notice:

 You are free to use & copy this script as long as you keep it as it is.

 
 Change permission:

 You are allowed the change this script as long as
 you publish the new version on the Internet
 and reference to Geert Stappers as (co-)author.
 

 Comments are welcome, sent them to Geert.Stappers@xs4all.nl


 todo:
	- update this "triple quote doc string"
 - length in header info for roundtripping
 - add extended header code, not just print statements
 - check if read raises EOFError
 - add option to set skip_deleted
 - add option to set try_recover

"""

__version__ = "$Id: s3data2xml.py,v 1.1.1.1 2002/11/05 21:02:29 stappers Exp $"
# First Public Release at 15 september 2001

import struct

true = 1
false = 0
skip_deleted = true
skip_deleted = false
try_recover = true
try_recover = false
Standardlength = 22
ourfiletype = 'OPLDatabaseFile'
Stheader = {}
recordtypes = [ 'deleted', 'mergable1', 'field_information', 'descriptive',
		'private4', 'private5', 'private6', 'private7',
		'mergable8', 'mergable9', 'mergable10', 'mergable11',
		'mergable12', 'mergable13', 'mergable_voice', 'reserved4sys']
record = {}  # empty dictionary
subrecord = {}
fieldtypes = [ 'word', 'long', 'real', 'qstr' ]
field = ['zero'] # field[0] exists

def checkheader(header):
	"""
	check the header
	"""
#j	if header[15] != 0 : # avoid that the unpack would break
#j		return None # not our file type
	name, crea_ver, len,  use_ver = struct.unpack("<16shhh", header)
	name = name[0:15]  # get rid of the trailing null
	if name != ourfiletype :
		return None # not our file type
# handeling extended header
#	if len != Standardlength:
#		print "Warning: headerlength not als excepted"
	Stheader['name'] = name
	Stheader['creator'] = crea_ver
	Stheader['length'] = len
	Stheader['user'] = use_ver
	return 'Okay'

def getrecordtal(buf):
	" Get record type and length, result is in dict. 'record'"
	low, high = struct.unpack("<BB", buf )  # unpack a single intger didn't work
	tal = high * 256 + low
	record['type'] = recordtypes[( tal & 0xf000 ) / 0x1000]
	record['length'] = ( tal & 0x0fff )

def getsubrecordtal(buf):
	" Get subrecord type and length, result is in dict. 'subrecord'"
	low, high = struct.unpack("<BB", buf )  # unpack a single intger didn't work
	tal = high * 256 + low
	subrecord['type'] = ( tal & 0xf000 ) / 0x1000
	subrecord['length'] = ( tal & 0x0fff )

def printxmlrecordhead():
	" print xml record head"
	print " <" + record['type'] + \
		" length='" + repr(record['length']) + "'>"
	
def printxmlrecordtail():
	" print xml record tail"
	print " </" + record['type'] + ">"
	
def printxmlfield(fn, str):
	print "  <field" + repr(fn) + ">" + str + "</field" + repr(fn) + ">"

def printhexvalue(buf):
	print "   <hexvalue>"
	i = 0
	cbl = 0 # counter bytes at a line
	beginassemble = "    "
	assemble = beginassemble
	while i < len(buf):
		assemble = assemble + " " + hex(ord(buf[i]))
		i = i + 1
		cbl = cbl + 1
		if cbl > 11:
			print assemble # output buffer
			assemble = beginassemble # clear it
			cbl = 0 # reset counter
	if assemble != beginassemble: print assemble
	print "   </hexvalue>"

def cleandata(buf):
	i = 0
	pcdata = ''
	while i < len(buf):
		if   ord(buf[i]) >= ord(' ') and ord(buf[i]) <= ord('%'):
			pcdata = pcdata + buf[i]
		elif ord(buf[i]) >= ord('(') and ord(buf[i]) <= ord(';'):
			pcdata = pcdata + buf[i]
		elif ord(buf[i]) >= ord('@') and ord(buf[i]) <= ord('z'):
			pcdata = pcdata + buf[i]
		elif ord(buf[i]) == ord('\005'):
			pcdata = pcdata + "&lt;phone/>"
		elif ord(buf[i]) == ord('\025'):
			pcdata = pcdata + "&lt;lf/>"
		else:
			pcdata = pcdata + "&#" + repr(ord(buf[i])) + ";"
		i = i + 1
	return pcdata

def processfield_inforecord(buf):
	i = 1
	while i <= record['length']:
		j = ord(buf[i-1])
		field.append(fieldtypes[j])
		print "  <field" + repr(i) + ">" +  field[i] + \
			"</field" + repr(i) + ">"
		i = i + 1

def proc_desc_subrec_label(buf):
	i = 0
	fn = 1 # first field number
	while i < subrecord['length']:
		j = ord(buf[i])
		i = i + 1
		if j > 0:
			label = cleandata(buf[i:i+j])
		else:
			label = "blank" + repr(fn)
		#name = "label" + repr(fn)
		name = "label" # easier xpath finding label
		print  "   <" + name + ">" + label + "</" + name + ">"
		i = i + j
		fn = fn + 1

def proc_desc_subrec_pd(buf):
	print "   <modelnumber>" + repr(ord(buf[0])) + "</modelnumber>"
	print "   <library str='C'>" + buf[1:-1] + "</library>"
	
def proc_desc_subrec_hf(hf,cstr):
	print "   <" + hf + " str='C'>" + cleandata(cstr[:-1]) + "</" + hf + ">"
	
def processdescriptivesubrecord(buf):
	print "  <subrecord type='" + repr(subrecord['type']) + \
		"' length='" + repr(subrecord['length']) + "'>"
	if   subrecord['type'] ==  4: proc_desc_subrec_label(buf)
	elif subrecord['type'] ==  7: proc_desc_subrec_pd(buf)
	elif subrecord['type'] ==  8: proc_desc_subrec_hf("header",buf)
	elif subrecord['type'] ==  9: proc_desc_subrec_hf("footer",buf)
	else: printhexvalue(buf)
	print "  </subrecord>"

def processdescriptiverecord(buf):
	i = 0
	while i < record['length']:
		getsubrecordtal(buf[i:i+2])
		i = i + 2
		processdescriptivesubrecord(buf[i:i+subrecord['length']])
		i = i + subrecord['length']

def processdeletedrecord(buf):
	if try_recover == true:
		if ord(buf[0]) == ord('\002') and ord(buf[1]) == ord('\020'):
			print "  <trying_recover_descriptive_record/>"
			processdescriptiverecord(buf)
		else:
			print "  <trying_recover_other_record/>"
			processotherrecords(buf)
	else:
		printhexvalue(buf)

def processotherrecords(buf):
	i = 0 # at the begin of "buf"
	fn = 1 # the first fieldnummer
	while i < record['length']:
		if field[fn] == 'qstr':
			ss = ord(buf[i])
			i = i + 1 # next position in buffer
			string = cleandata(buf[i:i+ss])
			printxmlfield(fn,string)
			i = i + ss
		else:
			print "<!-- don't known how to handle this field -->"	
		fn = fn + 1

def processrecord(buf):
	printxmlrecordhead()
	if   record['type'] == 'field_information':
		processfield_inforecord(buf)
	elif record['type'] == 'descriptive':
		processdescriptiverecord(buf)
	elif record['type'] == 'deleted':
		processdeletedrecord(buf)
	else:
		processotherrecords(buf)
	printxmlrecordtail()

def printxmlhead():
	print '<?xml version="1.0"?>'
	print "<" + ourfiletype + ">"
	print "<header>"
	print " <Standard length='" + repr(Standardlength) + "'>"
	print "  <name str='C'>" + Stheader['name'] + "</name>"
	print "  <creator>" + hex(Stheader['creator']) + "</creator>"
	print "  <length>" + repr(Stheader['length']) + "</length>"
	print "  <user>" + hex(Stheader['user']) + "</user>"
	print " </Standard>"
	print " <extended length='0'>"  # there should come more code
	print "  <hexvalues>"
	print "   <!-- there should come more code -->"
	print "  </hexvalues>"
	print " </extended>"
	print "</header>"
	print "<records>"
  
def printxmltail():
	print "</records>"
	print "</" + ourfiletype + ">"
	
def s3data2xml(file):
	""""
	read file and if it is a OPL databasefile
		then continue processing
	"""

	buf = file.read(Standardlength)
	if checkheader(buf) != 'Okay' :
		print "Error: not our file type"
		return  # exit
	printxmlhead()

	nextrecord = true
	try:
		buf = file.read(2)
	except EOFError:
		print "  <!-- Error: unexcepted end of file -->"
		nextrecord = false
	counter = 1

	while nextrecord:
		getrecordtal(buf)
		try:
			buf = file.read(record['length'])
		except EOFError:
			print "  <!-- Error: unexcepted end of file -->"
			nextrecord = false
			break  # leave while loop
		if record['type'] == 'deleted' and skip_deleted == true :
			print " <!-- skipping record marked as 'deleted' -->"
		else:
			processrecord(buf)
		try:
			buf = file.read(2)
		except EOFError:
			nextrecord = false
		if len(buf) == 0: nextrecord = false
		counter = counter + 1

	printxmltail()

if __name__ == '__main__':
        import sys
        if len(sys.argv)==1:
                s3data2xml(sys.stdin)
        else:
                s3data2xml(open(sys.argv[1],"rb"))
# end of script
