#!/usr/bin/python
# -*- coding: utf-8 -*-
# script name: ba_export_xml.py
# This scripts are Copyrighted by Robert Steininger, and licensed under the Creative Commons Attribution-Share Alike 3.0 License.
# see http://www.creativecommons.org/licenses/by-sa/3.0/
# Please respect my wish and meet the license requiremnts.
#
# !!! THIS SCRIPT COMES WITH ABSOLUT NO WARRANTY !!!
#

import urllib2, cookielib
import os, sys, time, re, datetime
import codecs
import getpass
from lxml import etree

if len( sys.argv ) != 3:
   print "usage: %s <verfueger_nummer> <konto_nummer>"
   sys.exit(1)
 
verfueger_nummer = sys.argv[1]
konto_nummer     = sys.argv[2]

if os.path.exists( "pin.txt" ):
   fh = open( "pin.txt" )
   pin = re.sub( '(\n|\r)', '', fh.readline() )
else:
   pin = getpass.getpass()

start_url       = 'https://online.bankaustria.at//bach/de/login/login.html'
login_url       = 'https://online.bankaustria.at/servlet/SSOLogin'
giro_url        = 'https://online.bankaustria.at/servlet/GiroKontoDetail'
detail_url      = 'https://online.bankaustria.at/servlet/GiroBuchungDetail'
logout_url      = 'https://online.bankaustria.at/servlet/Logout'

cj = cookielib.CookieJar()

if os.environ.has_key('HTTPS_PROXY'):
   proxy_handler = urllib2.ProxyHandler({ 'https': os.environ['HTTPS_PROXY'] })
 
   # uncomment the following 2 lines if you proxy needs basic auth !!! ... not tested
   #proxy_auth_handler = urllib2.ProxyBasicAuthHandler()
   #proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
   #opener = urllib2.build_opener(proxy_handler, proxy_auth_handler)
   
   # no proxy auth
   opener = urllib2.build_opener( proxy_handler, urllib2.HTTPCookieProcessor(cj) )
else:
   opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cj) )

# fake User-Agent
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)

print >> sys.stderr, "STEP 1 ... get session cookie"
req = urllib2.Request( start_url )
r = urllib2.urlopen(req)

if r.code != 200:
   print >> sys.stderr, "ERROR: got status code %d" % r.code
   sys.exit( 1 )


print >> sys.stderr, "STEP 2 ... perform login"
req = urllib2.Request( login_url )
req.add_data( 'timestamp=%d&JSOS=Linux i686&yzbks=%s&jklwd=%s&JSBROWSER=mozilla' % (
                                        int( time.time() ), # unix epoch time
                                        verfueger_nummer, 
                                        pin,                # passwort
                                       ) )
r = urllib2.urlopen(req)

if r.code != 200:
   print >> sys.stderr, "ERROR: got status code %d" % r.code
   sys.exit( 1 )

print >> sys.stderr, "STEP 3 ... extract session arguments"
session_args = None
regex = re.compile( '^.*MenuHead\?(sessionid=.*?)".*$')
for line in r.readlines():
   match_object = regex.match( line )
   if not match_object:
      continue
   if len( match_object.groups() ) > 0:
      session_args = match_object.groups()[0]
      break

if not session_args:
   print >> sys.stderr, "unable to get session arguments"
   sys.exit(1)

print >> sys.stderr, "   OK ... got session_args"

print >> sys.stderr, "STEP 4 ... get transaction xml file"
req = urllib2.Request( giro_url )
arguments = []
arguments.append( session_args )
arguments.append( "language=DE" )
arguments.append( "mode=bno" )
arguments.append( "gknSel=01%sEUR12000*" % konto_nummer )
arguments.append( "gknDefault=yes" )
arguments.append( "gknRadio=gknRadioPeriode" )
arguments.append( "gknPeriodeSel=gknPeriodeSelLetzte6monate" )
arguments.append( "downloadmode=ifxxmlyes" )
arguments.append( "downloadfilename=export.xml" )

req.add_data( '&'.join( arguments ) )
r = urllib2.urlopen(req)

if r.code != 200:
   print >> sys.stderr, "ERROR: got status code %d" % r.code
   sys.exit( 1 )


xml_data = r.read()

# archive xml file
fname = "ba_%s.xml" % datetime.datetime.now().strftime('%Y%m%d_%H%M')
fh = codecs.open( fname, encoding='utf-8', mode='w+' )
fh.write( xml_data )
fh.close()
xmltree =  etree.fromstring( xml_data )
transactions = []

# extrace transactions from xml
for t in list( xmltree.iter( "BankAcctTrnRec") ):
   transactions.append( [ unicode( t.find("PostedDt").text.decode('utf-8')),
                          unicode( t.find("EffDt").text.decode('utf-8')),
                          unicode( t.find("CurAmt").find("Amt").text.decode('utf-8')),
                          unicode( t.find("CurAmt").find("CurCode").text.decode('utf-8')),
                          unicode( t.find("Memo").text.encode("utf-8").decode('utf-8'))
                      ] )

print >> sys.stderr, "   OK: got %s transactions" % len( transactions )

print >> sys.stderr, "STEP 5 ... exctractin Payees"

# regex for transaction with the style:
# DA-NR.: 003 ... Empfänger:    Hans Wurst ... Kontonummer: 0187247773    BLZ: 21357      Dauerwurstauftrag
empf_regex = re.compile( '^.{58}Empf.nger:\s+(.*)\s+(?<=^.{130})Kontonummer:.*$', re.U )

# regex for transaction with the style:
# BAUHAUS 2231 0298  K9 12.01.UM 18.25     O
O_regex = re.compile( '^(.*?)\s+\d+\s+K\d+\s+\d\d\.\d\d\.UM\s+\d\d\.\d\d\s+(?<=^.{41})O$', re.U )

# regex for transaction with the style:
# AT  1249,98 MAESTRO POS 20.06.12 14.32K9 O                WUERSCHTLBUDE          AM GRABEN
Oplus_regex = re.compile( '^.*(?<=^.{41})O\s+(?<=^.{58})(.{23}).*$', re.U )

# regex for transaction with the style:
# ABHEBUNG AUTOMAT NR. 19999 AM 18.01. UM 12.42 UHR NEUSIEDL PK BANKCARD 9
abh_regex = re.compile( '^ABHEBUNG AUTOMAT.*', re.U )

# regex for transaction with the style:
# EZE-Lastschrift a/TUAMEISTNIX AG                          TUAMEISTNIX-Vtkonto 13088418 011384105403                 ....
schrift_regex = re.compile( '^(?:SEPA-Last|EZE-Last|Gut|Last)schrift.*a/(.*?)(?<=^.{58}).*$', re.U )

# regex for transaction with the style:
# PILLA DANKT  4222P K9 01.05.UM 11.34"
sonstige_regex = re.compile( '^(.*)(?<=^.{13})..*K\d \d\d\.\d\d\.UM \d\d\.\d\d$', re.U )

# regex for transaction with the style:
# BARAUSZAHLUNG NIRGENDWO
bara_regex = re.compile( '^(BARAUSZAHLUNG).*$', re.U )
bare_regex = re.compile( '^(BAREINZAHLUNG).*$', re.U )


konto_regex = re.compile( '^(Kontopaket|Porto|KEST|Habenzinsen|Sollzinsen)$', re.U )


# remove leading and tailing spaces
space_regex = re.compile( '(^\s+|\s+$)', re.U )

shorten_regex = re.compile( '\s{2,}', re.U )

for t in transactions:
   #text = t[4].decode('utf-8')
   text = t[4]
   t[4] = space_regex.sub( '; ', t[4] )
   t[4] = shorten_regex.sub( ' ', t[4] )


   match = schrift_regex.match( text )
   if match != None:
      t.append( space_regex.sub( '', match.groups()[0] ) )
      continue

   match = empf_regex.match( text )
   if match != None:
      t.append( space_regex.sub( '', match.groups()[0] ) )
      continue
   
   match = O_regex.match( text )
   if match != None:
      t.append( space_regex.sub( '', match.groups()[0] ) )
      continue

   match = Oplus_regex.match( text )
   if match != None:
      t.append( space_regex.sub( '', match.groups()[0] ) )
      continue

   match = sonstige_regex.match( text )
   if match != None:
      t.append( space_regex.sub( '', match.groups()[0] ) )
      continue

   match = abh_regex.match( text )
   if match != None:
      t.append( 'AUTOMAT' )
      continue

   match = konto_regex.match( text )
   if match != None:
      t.append( match.groups()[0] )
      continue

   match = bare_regex.match( text )
   if match != None:
      t.append( match.groups()[0] )
      continue

   match = bara_regex.match( text )
   if match != None:
      t.append( match.groups()[0] )
      continue

   # else UNKNOWN
   t.append( 'UNBEKANNT' )
   print '   WARNING: no payee for : %s' % text
   




fh = codecs.open( "transactions.csv", encoding='utf-8', mode='a+' )
# seek to the end
fh.seek(0, os.SEEK_END)
file_length = fh.tell()

# seek to the file begin
fh.seek(0)
if file_length == 0:
   last_transaction = ''
   fh.write( u'"Buchungsdatum";"Valutadatum";"Währung";"Betrag";"Buchungstext";"Empfänger/Sender"\n' )
else:
   last_transaction = re.sub( '[\r\n]', '', fh.readlines()[-1] )

found_last = False
import_list = []
for t in list( reversed( transactions ) ):
   if last_transaction == '':
      fh.write( '"' + '";"'.join( t ) + '"\n' )
      import_list.append( t )
      continue

   if found_last == True:
      fh.write( '"' + '";"'.join( t ) + '"\n' )
      import_list.append( t )
      continue

   if last_transaction == '"' + '";"'.join( t ) + '"':
      print >> sys.stderr, "   OK: last transaction found"
      print >> sys.stderr, "     last transaction was: %s" % last_transaction
      found_last = True
fh.close()

# generate qif
print >> sys.stderr, "STEP 6 ... generate QIF file"
if len( import_list ) > 0:
   fname = "import_%s.qif" % datetime.datetime.now().strftime('%Y%m%d_%H%M')
   fh = codecs.open( fname, encoding='utf-8', mode='w+' )
   fh.write( "!Account\nNBA Konto\nTBank\n^\n!Type:Bank\n" )
   for t in import_list:
      fh.write( "D" + t[0] + "\n" )
      fh.write( "P" + t[5] + "\n" )
      fh.write( "T" + t[2] + "\n" )
      fh.write( "M" + t[4] + "\n" )
      fh.write( "^" + "\n" )
   fh.close()
   print >> sys.stderr, "   OK: QIF file %s was written with %d transactions" % ( fname, len( import_list ) )
else:
   print >> sys.stderr, "   nothing to do"


# logout
print >> sys.stderr, "STEP 7 ... logout"
req = urllib2.Request( logout_url )
req.add_data( '%s&language=DE&mode=no' % session_args )
r = urllib2.urlopen(req)

if r.code != 200:
   print >> sys.stderr, "ERROR: got status code %d" % r.code
   sys.exit( 1 )
