'''
Copyright (c) 2008  Yap Sok Ann <sayap@sayap.com>

This module contains xmltv grabbers for Malaysia channels.
'''

__author__ = 'Yap Sok Ann <sayap@sayap.com>'
__license__ = 'PSF License'

import logging

from datetime import date as dt
from datetime import datetime, time, timedelta
from dateutil.tz import tzlocal
from httplib2 import Http
from lxml import etree
from urllib import urlencode
from BeautifulSoup import BeautifulSoup

channels = ['rtm1', 'rtm2', 'tv3', 'ntv7', '8tv', 'tv9']

datetime_format = '%Y%m%d%H%M%S %z'

h = Http()
h.force_exception_to_status_code = True
#h.timeout = 15

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(levelname)-8s %(process)d %(message)s',
)
log = logging.getLogger(__name__)

def strclean(s):
    s = s.strip().replace('&lsquo;', '\'').replace('&rsquo;', '\'')
    if s != '&nbsp;':
        return s

class Grabber(object):
    
    base_url = None
    
    def __init__(self, channel):
        self.channel = channel
        self.url = self.base_url
    
    def qs_params(self, date, **kwargs):
        '''Returns a dict of params to form the url's query string
        '''
        raise NotImplementedError
    
    def _parse_html(self, date, html):
        '''Returns a list of dicts with the following keys:
        - mandatory: title, start
        - optional: stop, sub_title, desc, episode_number, episode_system
        '''
        raise NotImplementedError
    
    def get_html(self, date, **kwargs):
        params = self.qs_params(date, **kwargs)
        response, content = h.request(self.url + '?' + urlencode(params))
        if response.status == 200:
            return BeautifulSoup(content)
        else:
            log.error('Status: %s\nContent: %s' % (response.status, content))
    
    def parse_html(self, date, html):
        prev_schedule = None
        try:
            for schedule in self._parse_html(date, html):
                if 'stop' in schedule:
                    yield schedule
                elif prev_schedule:
                    prev_schedule['stop'] = schedule['start']
                    yield prev_schedule
                prev_schedule = schedule
        except:
            log.exception('Cannot parse html for date %s' % date)
    
    def to_xml(self, schedules):
        for schedule in schedules:
            program = etree.Element('programme', channel=self.channel,
                start=schedule['start'].strftime(datetime_format),
                stop=schedule['stop'].strftime(datetime_format))
            
            title = etree.SubElement(program, 'title')
            title.text = schedule['title']
            
            if schedule.get('episode_num'):
                episode_num = etree.SubElement(program, 'episode-num')
                episode_num.set('system', schedule.get('episode_system'))
                episode_num.text = schedule['episode_num']
            
            for field in ['sub_title', 'desc']:
                if schedule.get(field):
                    elem = etree.SubElement(program, field.replace('_', '-'))
                    elem.text = schedule[field]
            
            yield program
    
    def grab(self, date, **kwargs):
        html = self.get_html(date, **kwargs)
        if html:
            return self.to_xml(self.parse_html(date, html))

class Astro(Grabber):
    
    base_url = 'http://www.astro.com.my/channels/%(channel)s/Default.asp'
    params_dicts = [dict(batch=1),
                    dict(batch=2)]
    ignores = ['No Transmission', 'Transmission Ends']
    
    def __init__(self, channel):
        self.channel = channel
        self.url = self.base_url % dict(channel=channel)
    
    def qs_params(self, date, **kwargs):
        kwargs['sDate'] = date.strftime('%d-%b-%Y')
        return kwargs
    
    def _parse_html(self, date, html):
        header_row = html.find('tr', bgcolor='#29487F')
        for tr in header_row.fetchNextSiblings('tr'):
            tds = tr.findChildren('td')
            
            title = strclean(tds[1].find('a').string)
            if title in self.ignores:
                continue
            
            # start time, '21:00' -> 9 PM
            hour, minute = [int(x) for x in tds[0].string.split(':')]
            start = datetime.combine(date,
                                     time(hour, minute, tzinfo=tzlocal()))
            
            # duration, '00:30' -> 30 minutes
            hours, minutes = [int(x) for x in tds[2].string.split(':')]
            stop = start + timedelta(hours=hours, minutes=minutes)
            
            yield dict(title=title, start=start, stop=stop)

class TheStar(Grabber):
    
    base_url = 'http://star-ecentral.com/tvnradio/tvguide/guide.asp'
    params_dicts = [dict(db='live')]
    
    def qs_params(self, date, **kwargs):
        kwargs['pdate'] = date.strftime('%m/%d/%Y')
        kwargs['chn'] = self.channel.replace('rtm', 'tv')
        return kwargs
    
    def _parse_html(self, date, html):
        last_ampm = None
        header_row = html.find('tr', bgcolor='#5e789c')
        for tr in header_row.fetchNextSiblings('tr'):
            tds = tr.findChildren('td')
            
            schedule = {}
            schedule['title'] = strclean(tds[1].find('b').find('font').string)
            schedule['desc'] = strclean(tds[2].find('font').string)
            
            episode_num = strclean(tds[3].find('font').string)
            if episode_num:
                try:
                    episode_num = int(episode_num) - 1
                    episode_num = '.' + str(episode_num) + '.'
                    episode_system = 'xmltv_ns'
                except ValueError:
                    episode_system = 'onscreen'
                schedule['episode_num'] = episode_num
                schedule['episode_system'] = episode_system
            
            # start time, '9.00pm' -> 9 PM
            time_str = tds[0].find('font').string
            ampm = time_str[-2:]
            hour, minute = [int(x) for x in time_str[:-2].split('.')]
            if ampm == 'pm' and hour < 12:
                hour += 12
            elif ampm =='am' and hour == 12:
                hour = 0
            
            if last_ampm == 'pm' and ampm == 'am':
                date = date + timedelta(1)
            schedule['start'] = datetime.combine(
                date, time(hour, minute, tzinfo=tzlocal()))
            last_ampm = ampm
            
            yield schedule

def main():
    from optparse import OptionParser
    
    parser = OptionParser()
    parser.add_option('-s', '--source', dest='source',
        help='SOURCE to grab from: Astro, TheStar. Default: TheStar')
    parser.add_option('-d', '--date', dest='date',
        help='Start DATE to grab schedules for (YYYY-MM-DD). Default: today')
    parser.add_option('-n', '--days', dest='days',
        help='Number of DAYS to grab schedules for. Default: 1')
    parser.add_option('-f', '--file', dest='filename', metavar='FILE',
        help='Output FILE to write to. Default: stdout')
    
    options, args = parser.parse_args()
    
    if options.source is None:
        cls = TheStar
    else:
        cls = globals()[options.source]
    
    if options.date is None:
        date = dt.today()
    else:
        date = dt(*[int(x) for x in options.date.split('-')])
    
    if options.days is None:
        days = 1
    else:
        days = int(options.days)
    
    root = etree.Element('tv')
    
    for channel in channels:
        grabber = cls(channel)
        for i in range(days):
            for params_dict in cls.params_dicts:
                for elem in grabber.grab(date + timedelta(i), **params_dict):
                    root.append(elem)
    
    xml = etree.tostring(root, encoding='UTF-8', xml_declaration=True,
                         pretty_print=True)
    if options.filename is None:
        print xml
    else:
        open(options.filename, 'w').write(xml)

if __name__ == '__main__':
    main()