diff --git a/BUGS b/BUGS new file mode 100755 index 0000000..004e0db --- /dev/null +++ b/BUGS @@ -0,0 +1,27 @@ +Investigate: + +[2011-02-10 03:51:47.148968] ALL_RAW_MESSAGES: [':malacoda!~liberius@Rizon-8449AE11.satx.res.rr.com PRIVMSG #eightbar :!eightball Is eightball working?'] from irc.kickassanime.org to None +Traceback (most recent call last): + File "./tenquestionmarks.py", line 383, in + tqm.loop() + File "./tenquestionmarks.py", line 125, in loop + self.ircobj.process_once() + File "./lib/irclib/irclib.py", line 214, in process_once + self.process_data(i) + File "./lib/irclib/irclib.py", line 183, in process_data + c.process_data() + File "./lib/irclib/irclib.py", line 571, in process_data + self._handle_event(Event(command, prefix, target, [m])) + File "./lib/irclib/irclib.py", line 594, in _handle_event + self.irclibobj._handle_event(self, event) + File "./lib/irclib/irclib.py", line 326, in _handle_event + if handler[1](connection, event) == "NO MORE": + File "./tenquestionmarks.py", line 79, in _dispatcher + irclib.SimpleIRCClient._dispatcher(self,connection,event) + File "./lib/irclib/irclib.py", line 1043, in _dispatcher + getattr(self, m)(c, e) + File "./tenquestionmarks.py", line 163, in on_pubmsg + if message.startswith(self.command_prefix): +TypeError: expected a character buffer object + +python 2.6.6 diff --git a/TODO b/TODO new file mode 100755 index 0000000..035ced7 --- /dev/null +++ b/TODO @@ -0,0 +1,12 @@ +Modules: + - Welcome + - Help + - Log + - Stats + - Forum Integration (smf, mybb) + +Make more object-oriented (i.e. user objects, channel objects instead of strings) + +Documentation + +Release diff --git a/lib/feedparser/feedparser.py b/lib/feedparser/feedparser.py new file mode 100755 index 0000000..bb802df --- /dev/null +++ b/lib/feedparser/feedparser.py @@ -0,0 +1,2858 @@ +#!/usr/bin/env python +"""Universal feed parser + +Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds + +Visit http://feedparser.org/ for the latest version +Visit http://feedparser.org/docs/ for the latest documentation + +Required: Python 2.1 or later +Recommended: Python 2.3 or later +Recommended: CJKCodecs and iconv_codec +""" + +__version__ = "4.1"# + "$Revision: 1.92 $"[11:15] + "-cvs" +__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE.""" +__author__ = "Mark Pilgrim " +__contributors__ = ["Jason Diamond ", + "John Beimler ", + "Fazal Majid ", + "Aaron Swartz ", + "Kevin Marks "] +_debug = 0 + +# HTTP "User-Agent" header to send to servers when downloading feeds. +# If you are embedding feedparser in a larger application, you should +# change this to your application name and URL. +USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ + +# HTTP "Accept" header to send to servers when downloading feeds. If you don't +# want to send an Accept header, set this to None. +ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" + +# List of preferred XML parsers, by SAX driver name. These will be tried first, +# but if they're not installed, Python will keep searching through its own list +# of pre-installed parsers until it finds one that supports everything we need. +PREFERRED_XML_PARSERS = ["drv_libxml2"] + +# If you want feedparser to automatically run HTML markup through HTML Tidy, set +# this to 1. Requires mxTidy +# or utidylib . +TIDY_MARKUP = 0 + +# List of Python interfaces for HTML Tidy, in order of preference. Only useful +# if TIDY_MARKUP = 1 +PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] + +# ---------- required modules (should come with any Python distribution) ---------- +import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2 +try: + from cStringIO import StringIO as _StringIO +except: + from StringIO import StringIO as _StringIO + +# ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- + +# gzip is included with most Python distributions, but may not be available if you compiled your own +try: + import gzip +except: + gzip = None +try: + import zlib +except: + zlib = None + +# If a real XML parser is available, feedparser will attempt to use it. feedparser has +# been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the +# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some +# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. +try: + import xml.sax + xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers + from xml.sax.saxutils import escape as _xmlescape + _XML_AVAILABLE = 1 +except: + _XML_AVAILABLE = 0 + def _xmlescape(data): + data = data.replace('&', '&') + data = data.replace('>', '>') + data = data.replace('<', '<') + return data + +# base64 support for Atom feeds that contain embedded binary data +try: + import base64, binascii +except: + base64 = binascii = None + +# cjkcodecs and iconv_codec provide support for more character encodings. +# Both are available from http://cjkpython.i18n.org/ +try: + import cjkcodecs.aliases +except: + pass +try: + import iconv_codec +except: + pass + +# chardet library auto-detects character encodings +# Download from http://chardet.feedparser.org/ +try: + import chardet + if _debug: + import chardet.constants + chardet.constants._debug = 1 +except: + chardet = None + +# ---------- don't touch these ---------- +class ThingsNobodyCaresAboutButMe(Exception): pass +class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass +class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass +class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass +class UndeclaredNamespace(Exception): pass + +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +sgmllib.special = re.compile('' % (tag, ''.join([' %s="%s"' % t for t in attrs])), escape=0) + + # match namespaces + if tag.find(':') <> -1: + prefix, suffix = tag.split(':', 1) + else: + prefix, suffix = '', tag + prefix = self.namespacemap.get(prefix, prefix) + if prefix: + prefix = prefix + '_' + + # special hack for better tracking of empty textinput/image elements in illformed feeds + if (not prefix) and tag not in ('title', 'link', 'description', 'name'): + self.intextinput = 0 + if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): + self.inimage = 0 + + # call special handler (if defined) or default handler + methodname = '_start_' + prefix + suffix + try: + method = getattr(self, methodname) + return method(attrsD) + except AttributeError: + return self.push(prefix + suffix, 1) + + def unknown_endtag(self, tag): + if _debug: sys.stderr.write('end %s\n' % tag) + # match namespaces + if tag.find(':') <> -1: + prefix, suffix = tag.split(':', 1) + else: + prefix, suffix = '', tag + prefix = self.namespacemap.get(prefix, prefix) + if prefix: + prefix = prefix + '_' + + # call special handler (if defined) or default handler + methodname = '_end_' + prefix + suffix + try: + method = getattr(self, methodname) + method() + except AttributeError: + self.pop(prefix + suffix) + + # track inline content + if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + # element declared itself as escaped markup, but it isn't really + self.contentparams['type'] = 'application/xhtml+xml' + if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': + tag = tag.split(':')[-1] + self.handle_data('' % tag, escape=0) + + # track xml:base and xml:lang going out of scope + if self.basestack: + self.basestack.pop() + if self.basestack and self.basestack[-1]: + self.baseuri = self.basestack[-1] + if self.langstack: + self.langstack.pop() + if self.langstack: # and (self.langstack[-1] is not None): + self.lang = self.langstack[-1] + + def handle_charref(self, ref): + # called for each character reference, e.g. for ' ', ref will be '160' + if not self.elementstack: return + ref = ref.lower() + if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): + text = '&#%s;' % ref + else: + if ref[0] == 'x': + c = int(ref[1:], 16) + else: + c = int(ref) + text = unichr(c).encode('utf-8') + self.elementstack[-1][2].append(text) + + def handle_entityref(self, ref): + # called for each entity reference, e.g. for '©', ref will be 'copy' + if not self.elementstack: return + if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) + if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): + text = '&%s;' % ref + else: + # entity resolution graciously donated by Aaron Swartz + def name2cp(k): + import htmlentitydefs + if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3 + return htmlentitydefs.name2codepoint[k] + k = htmlentitydefs.entitydefs[k] + if k.startswith('&#') and k.endswith(';'): + return int(k[2:-1]) # not in latin-1 + return ord(k) + try: name2cp(ref) + except KeyError: text = '&%s;' % ref + else: text = unichr(name2cp(ref)).encode('utf-8') + self.elementstack[-1][2].append(text) + + def handle_data(self, text, escape=1): + # called for each block of plain text, i.e. outside of any tag and + # not containing any character or entity references + if not self.elementstack: return + if escape and self.contentparams.get('type') == 'application/xhtml+xml': + text = _xmlescape(text) + self.elementstack[-1][2].append(text) + + def handle_comment(self, text): + # called for each comment, e.g. + pass + + def handle_pi(self, text): + # called for each processing instruction, e.g. + pass + + def handle_decl(self, text): + pass + + def parse_declaration(self, i): + # override internal declaration handler to handle CDATA blocks + if _debug: sys.stderr.write('entering parse_declaration\n') + if self.rawdata[i:i+9] == '', i) + if k == -1: k = len(self.rawdata) + self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) + return k+3 + else: + k = self.rawdata.find('>', i) + return k+1 + + def mapContentType(self, contentType): + contentType = contentType.lower() + if contentType == 'text': + contentType = 'text/plain' + elif contentType == 'html': + contentType = 'text/html' + elif contentType == 'xhtml': + contentType = 'application/xhtml+xml' + return contentType + + def trackNamespace(self, prefix, uri): + loweruri = uri.lower() + if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: + self.version = 'rss090' + if loweruri == 'http://purl.org/rss/1.0/' and not self.version: + self.version = 'rss10' + if loweruri == 'http://www.w3.org/2005/atom' and not self.version: + self.version = 'atom10' + if loweruri.find('backend.userland.com/rss') <> -1: + # match any backend.userland.com namespace + uri = 'http://backend.userland.com/rss' + loweruri = uri + if self._matchnamespaces.has_key(loweruri): + self.namespacemap[prefix] = self._matchnamespaces[loweruri] + self.namespacesInUse[self._matchnamespaces[loweruri]] = uri + else: + self.namespacesInUse[prefix or ''] = uri + + def resolveURI(self, uri): + return _urljoin(self.baseuri or '', uri) + + def decodeEntities(self, element, data): + return data + + def push(self, element, expectingText): + self.elementstack.append([element, expectingText, []]) + + def pop(self, element, stripWhitespace=1): + if not self.elementstack: return + if self.elementstack[-1][0] != element: return + + element, expectingText, pieces = self.elementstack.pop() + output = ''.join(pieces) + if stripWhitespace: + output = output.strip() + if not expectingText: return output + + # decode base64 content + if base64 and self.contentparams.get('base64', 0): + try: + output = base64.decodestring(output) + except binascii.Error: + pass + except binascii.Incomplete: + pass + + # resolve relative URIs + if (element in self.can_be_relative_uri) and output: + output = self.resolveURI(output) + + # decode entities within embedded markup + if not self.contentparams.get('base64', 0): + output = self.decodeEntities(element, output) + + # remove temporary cruft from contentparams + try: + del self.contentparams['mode'] + except KeyError: + pass + try: + del self.contentparams['base64'] + except KeyError: + pass + + # resolve relative URIs within embedded markup + if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types: + if element in self.can_contain_relative_uris: + output = _resolveRelativeURIs(output, self.baseuri, self.encoding) + + # sanitize embedded markup + if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types: + if element in self.can_contain_dangerous_markup: + output = _sanitizeHTML(output, self.encoding) + + if self.encoding and type(output) != type(u''): + try: + output = unicode(output, self.encoding) + except: + pass + + # categories/tags/keywords/whatever are handled in _end_category + if element == 'category': + return output + + # store output in appropriate place(s) + if self.inentry and not self.insource: + if element == 'content': + self.entries[-1].setdefault(element, []) + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + self.entries[-1][element].append(contentparams) + elif element == 'link': + self.entries[-1][element] = output + if output: + self.entries[-1]['links'][-1]['href'] = output + else: + if element == 'description': + element = 'summary' + self.entries[-1][element] = output + if self.incontent: + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + self.entries[-1][element + '_detail'] = contentparams + elif (self.infeed or self.insource) and (not self.intextinput) and (not self.inimage): + context = self._getContext() + if element == 'description': + element = 'subtitle' + context[element] = output + if element == 'link': + context['links'][-1]['href'] = output + elif self.incontent: + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + context[element + '_detail'] = contentparams + return output + + def pushContent(self, tag, attrsD, defaultContentType, expectingText): + self.incontent += 1 + self.contentparams = FeedParserDict({ + 'type': self.mapContentType(attrsD.get('type', defaultContentType)), + 'language': self.lang, + 'base': self.baseuri}) + self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) + self.push(tag, expectingText) + + def popContent(self, tag): + value = self.pop(tag) + self.incontent -= 1 + self.contentparams.clear() + return value + + def _mapToStandardPrefix(self, name): + colonpos = name.find(':') + if colonpos <> -1: + prefix = name[:colonpos] + suffix = name[colonpos+1:] + prefix = self.namespacemap.get(prefix, prefix) + name = prefix + ':' + suffix + return name + + def _getAttribute(self, attrsD, name): + return attrsD.get(self._mapToStandardPrefix(name)) + + def _isBase64(self, attrsD, contentparams): + if attrsD.get('mode', '') == 'base64': + return 1 + if self.contentparams['type'].startswith('text/'): + return 0 + if self.contentparams['type'].endswith('+xml'): + return 0 + if self.contentparams['type'].endswith('/xml'): + return 0 + return 1 + + def _itsAnHrefDamnIt(self, attrsD): + href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) + if href: + try: + del attrsD['url'] + except KeyError: + pass + try: + del attrsD['uri'] + except KeyError: + pass + attrsD['href'] = href + return attrsD + + def _save(self, key, value): + context = self._getContext() + context.setdefault(key, value) + + def _start_rss(self, attrsD): + versionmap = {'0.91': 'rss091u', + '0.92': 'rss092', + '0.93': 'rss093', + '0.94': 'rss094'} + if not self.version: + attr_version = attrsD.get('version', '') + version = versionmap.get(attr_version) + if version: + self.version = version + elif attr_version.startswith('2.'): + self.version = 'rss20' + else: + self.version = 'rss' + + def _start_dlhottitles(self, attrsD): + self.version = 'hotrss' + + def _start_channel(self, attrsD): + self.infeed = 1 + self._cdf_common(attrsD) + _start_feedinfo = _start_channel + + def _cdf_common(self, attrsD): + if attrsD.has_key('lastmod'): + self._start_modified({}) + self.elementstack[-1][-1] = attrsD['lastmod'] + self._end_modified() + if attrsD.has_key('href'): + self._start_link({}) + self.elementstack[-1][-1] = attrsD['href'] + self._end_link() + + def _start_feed(self, attrsD): + self.infeed = 1 + versionmap = {'0.1': 'atom01', + '0.2': 'atom02', + '0.3': 'atom03'} + if not self.version: + attr_version = attrsD.get('version') + version = versionmap.get(attr_version) + if version: + self.version = version + else: + self.version = 'atom' + + def _end_channel(self): + self.infeed = 0 + _end_feed = _end_channel + + def _start_image(self, attrsD): + self.inimage = 1 + self.push('image', 0) + context = self._getContext() + context.setdefault('image', FeedParserDict()) + + def _end_image(self): + self.pop('image') + self.inimage = 0 + + def _start_textinput(self, attrsD): + self.intextinput = 1 + self.push('textinput', 0) + context = self._getContext() + context.setdefault('textinput', FeedParserDict()) + _start_textInput = _start_textinput + + def _end_textinput(self): + self.pop('textinput') + self.intextinput = 0 + _end_textInput = _end_textinput + + def _start_author(self, attrsD): + self.inauthor = 1 + self.push('author', 1) + _start_managingeditor = _start_author + _start_dc_author = _start_author + _start_dc_creator = _start_author + _start_itunes_author = _start_author + + def _end_author(self): + self.pop('author') + self.inauthor = 0 + self._sync_author_detail() + _end_managingeditor = _end_author + _end_dc_author = _end_author + _end_dc_creator = _end_author + _end_itunes_author = _end_author + + def _start_itunes_owner(self, attrsD): + self.inpublisher = 1 + self.push('publisher', 0) + + def _end_itunes_owner(self): + self.pop('publisher') + self.inpublisher = 0 + self._sync_author_detail('publisher') + + def _start_contributor(self, attrsD): + self.incontributor = 1 + context = self._getContext() + context.setdefault('contributors', []) + context['contributors'].append(FeedParserDict()) + self.push('contributor', 0) + + def _end_contributor(self): + self.pop('contributor') + self.incontributor = 0 + + def _start_dc_contributor(self, attrsD): + self.incontributor = 1 + context = self._getContext() + context.setdefault('contributors', []) + context['contributors'].append(FeedParserDict()) + self.push('name', 0) + + def _end_dc_contributor(self): + self._end_name() + self.incontributor = 0 + + def _start_name(self, attrsD): + self.push('name', 0) + _start_itunes_name = _start_name + + def _end_name(self): + value = self.pop('name') + if self.inpublisher: + self._save_author('name', value, 'publisher') + elif self.inauthor: + self._save_author('name', value) + elif self.incontributor: + self._save_contributor('name', value) + elif self.intextinput: + context = self._getContext() + context['textinput']['name'] = value + _end_itunes_name = _end_name + + def _start_width(self, attrsD): + self.push('width', 0) + + def _end_width(self): + value = self.pop('width') + try: + value = int(value) + except: + value = 0 + if self.inimage: + context = self._getContext() + context['image']['width'] = value + + def _start_height(self, attrsD): + self.push('height', 0) + + def _end_height(self): + value = self.pop('height') + try: + value = int(value) + except: + value = 0 + if self.inimage: + context = self._getContext() + context['image']['height'] = value + + def _start_url(self, attrsD): + self.push('href', 1) + _start_homepage = _start_url + _start_uri = _start_url + + def _end_url(self): + value = self.pop('href') + if self.inauthor: + self._save_author('href', value) + elif self.incontributor: + self._save_contributor('href', value) + elif self.inimage: + context = self._getContext() + context['image']['href'] = value + elif self.intextinput: + context = self._getContext() + context['textinput']['link'] = value + _end_homepage = _end_url + _end_uri = _end_url + + def _start_email(self, attrsD): + self.push('email', 0) + _start_itunes_email = _start_email + + def _end_email(self): + value = self.pop('email') + if self.inpublisher: + self._save_author('email', value, 'publisher') + elif self.inauthor: + self._save_author('email', value) + elif self.incontributor: + self._save_contributor('email', value) + _end_itunes_email = _end_email + + def _getContext(self): + if self.insource: + context = self.sourcedata + elif self.inentry: + context = self.entries[-1] + else: + context = self.feeddata + return context + + def _save_author(self, key, value, prefix='author'): + context = self._getContext() + context.setdefault(prefix + '_detail', FeedParserDict()) + context[prefix + '_detail'][key] = value + self._sync_author_detail() + + def _save_contributor(self, key, value): + context = self._getContext() + context.setdefault('contributors', [FeedParserDict()]) + context['contributors'][-1][key] = value + + def _sync_author_detail(self, key='author'): + context = self._getContext() + detail = context.get('%s_detail' % key) + if detail: + name = detail.get('name') + email = detail.get('email') + if name and email: + context[key] = '%s (%s)' % (name, email) + elif name: + context[key] = name + elif email: + context[key] = email + else: + author = context.get(key) + if not author: return + emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author) + if not emailmatch: return + email = emailmatch.group(0) + # probably a better way to do the following, but it passes all the tests + author = author.replace(email, '') + author = author.replace('()', '') + author = author.strip() + if author and (author[0] == '('): + author = author[1:] + if author and (author[-1] == ')'): + author = author[:-1] + author = author.strip() + context.setdefault('%s_detail' % key, FeedParserDict()) + context['%s_detail' % key]['name'] = author + context['%s_detail' % key]['email'] = email + + def _start_subtitle(self, attrsD): + self.pushContent('subtitle', attrsD, 'text/plain', 1) + _start_tagline = _start_subtitle + _start_itunes_subtitle = _start_subtitle + + def _end_subtitle(self): + self.popContent('subtitle') + _end_tagline = _end_subtitle + _end_itunes_subtitle = _end_subtitle + + def _start_rights(self, attrsD): + self.pushContent('rights', attrsD, 'text/plain', 1) + _start_dc_rights = _start_rights + _start_copyright = _start_rights + + def _end_rights(self): + self.popContent('rights') + _end_dc_rights = _end_rights + _end_copyright = _end_rights + + def _start_item(self, attrsD): + self.entries.append(FeedParserDict()) + self.push('item', 0) + self.inentry = 1 + self.guidislink = 0 + id = self._getAttribute(attrsD, 'rdf:about') + if id: + context = self._getContext() + context['id'] = id + self._cdf_common(attrsD) + _start_entry = _start_item + _start_product = _start_item + + def _end_item(self): + self.pop('item') + self.inentry = 0 + _end_entry = _end_item + + def _start_dc_language(self, attrsD): + self.push('language', 1) + _start_language = _start_dc_language + + def _end_dc_language(self): + self.lang = self.pop('language') + _end_language = _end_dc_language + + def _start_dc_publisher(self, attrsD): + self.push('publisher', 1) + _start_webmaster = _start_dc_publisher + + def _end_dc_publisher(self): + self.pop('publisher') + self._sync_author_detail('publisher') + _end_webmaster = _end_dc_publisher + + def _start_published(self, attrsD): + self.push('published', 1) + _start_dcterms_issued = _start_published + _start_issued = _start_published + + def _end_published(self): + value = self.pop('published') + self._save('published_parsed', _parse_date(value)) + _end_dcterms_issued = _end_published + _end_issued = _end_published + + def _start_updated(self, attrsD): + self.push('updated', 1) + _start_modified = _start_updated + _start_dcterms_modified = _start_updated + _start_pubdate = _start_updated + _start_dc_date = _start_updated + + def _end_updated(self): + value = self.pop('updated') + parsed_value = _parse_date(value) + self._save('updated_parsed', parsed_value) + _end_modified = _end_updated + _end_dcterms_modified = _end_updated + _end_pubdate = _end_updated + _end_dc_date = _end_updated + + def _start_created(self, attrsD): + self.push('created', 1) + _start_dcterms_created = _start_created + + def _end_created(self): + value = self.pop('created') + self._save('created_parsed', _parse_date(value)) + _end_dcterms_created = _end_created + + def _start_expirationdate(self, attrsD): + self.push('expired', 1) + + def _end_expirationdate(self): + self._save('expired_parsed', _parse_date(self.pop('expired'))) + + def _start_cc_license(self, attrsD): + self.push('license', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('license') + + def _start_creativecommons_license(self, attrsD): + self.push('license', 1) + + def _end_creativecommons_license(self): + self.pop('license') + + def _addTag(self, term, scheme, label): + context = self._getContext() + tags = context.setdefault('tags', []) + if (not term) and (not scheme) and (not label): return + value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) + if value not in tags: + tags.append(FeedParserDict({'term': term, 'scheme': scheme, 'label': label})) + + def _start_category(self, attrsD): + if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) + term = attrsD.get('term') + scheme = attrsD.get('scheme', attrsD.get('domain')) + label = attrsD.get('label') + self._addTag(term, scheme, label) + self.push('category', 1) + _start_dc_subject = _start_category + _start_keywords = _start_category + + def _end_itunes_keywords(self): + for term in self.pop('itunes_keywords').split(): + self._addTag(term, 'http://www.itunes.com/', None) + + def _start_itunes_category(self, attrsD): + self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) + self.push('category', 1) + + def _end_category(self): + value = self.pop('category') + if not value: return + context = self._getContext() + tags = context['tags'] + if value and len(tags) and not tags[-1]['term']: + tags[-1]['term'] = value + else: + self._addTag(value, None, None) + _end_dc_subject = _end_category + _end_keywords = _end_category + _end_itunes_category = _end_category + + def _start_cloud(self, attrsD): + self._getContext()['cloud'] = FeedParserDict(attrsD) + + def _start_link(self, attrsD): + attrsD.setdefault('rel', 'alternate') + attrsD.setdefault('type', 'text/html') + attrsD = self._itsAnHrefDamnIt(attrsD) + if attrsD.has_key('href'): + attrsD['href'] = self.resolveURI(attrsD['href']) + expectingText = self.infeed or self.inentry or self.insource + context = self._getContext() + context.setdefault('links', []) + context['links'].append(FeedParserDict(attrsD)) + if attrsD['rel'] == 'enclosure': + self._start_enclosure(attrsD) + if attrsD.has_key('href'): + expectingText = 0 + if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): + context['link'] = attrsD['href'] + else: + self.push('link', expectingText) + _start_producturl = _start_link + + def _end_link(self): + value = self.pop('link') + context = self._getContext() + if self.intextinput: + context['textinput']['link'] = value + if self.inimage: + context['image']['link'] = value + _end_producturl = _end_link + + def _start_guid(self, attrsD): + self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') + self.push('id', 1) + + def _end_guid(self): + value = self.pop('id') + self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) + if self.guidislink: + # guid acts as link, but only if 'ispermalink' is not present or is 'true', + # and only if the item doesn't already have a link element + self._save('link', value) + + def _start_title(self, attrsD): + self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) + _start_dc_title = _start_title + _start_media_title = _start_title + + def _end_title(self): + value = self.popContent('title') + context = self._getContext() + if self.intextinput: + context['textinput']['title'] = value + elif self.inimage: + context['image']['title'] = value + _end_dc_title = _end_title + _end_media_title = _end_title + + def _start_description(self, attrsD): + context = self._getContext() + if context.has_key('summary'): + self._summaryKey = 'content' + self._start_content(attrsD) + else: + self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) + + def _start_abstract(self, attrsD): + self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) + + def _end_description(self): + if self._summaryKey == 'content': + self._end_content() + else: + value = self.popContent('description') + context = self._getContext() + if self.intextinput: + context['textinput']['description'] = value + elif self.inimage: + context['image']['description'] = value + self._summaryKey = None + _end_abstract = _end_description + + def _start_info(self, attrsD): + self.pushContent('info', attrsD, 'text/plain', 1) + _start_feedburner_browserfriendly = _start_info + + def _end_info(self): + self.popContent('info') + _end_feedburner_browserfriendly = _end_info + + def _start_generator(self, attrsD): + if attrsD: + attrsD = self._itsAnHrefDamnIt(attrsD) + if attrsD.has_key('href'): + attrsD['href'] = self.resolveURI(attrsD['href']) + self._getContext()['generator_detail'] = FeedParserDict(attrsD) + self.push('generator', 1) + + def _end_generator(self): + value = self.pop('generator') + context = self._getContext() + if context.has_key('generator_detail'): + context['generator_detail']['name'] = value + + def _start_admin_generatoragent(self, attrsD): + self.push('generator', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('generator') + self._getContext()['generator_detail'] = FeedParserDict({'href': value}) + + def _start_admin_errorreportsto(self, attrsD): + self.push('errorreportsto', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('errorreportsto') + + def _start_summary(self, attrsD): + context = self._getContext() + if context.has_key('summary'): + self._summaryKey = 'content' + self._start_content(attrsD) + else: + self._summaryKey = 'summary' + self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) + _start_itunes_summary = _start_summary + + def _end_summary(self): + if self._summaryKey == 'content': + self._end_content() + else: + self.popContent(self._summaryKey or 'summary') + self._summaryKey = None + _end_itunes_summary = _end_summary + + def _start_enclosure(self, attrsD): + attrsD = self._itsAnHrefDamnIt(attrsD) + self._getContext().setdefault('enclosures', []).append(FeedParserDict(attrsD)) + href = attrsD.get('href') + if href: + context = self._getContext() + if not context.get('id'): + context['id'] = href + + def _start_source(self, attrsD): + self.insource = 1 + + def _end_source(self): + self.insource = 0 + self._getContext()['source'] = copy.deepcopy(self.sourcedata) + self.sourcedata.clear() + + def _start_content(self, attrsD): + self.pushContent('content', attrsD, 'text/plain', 1) + src = attrsD.get('src') + if src: + self.contentparams['src'] = src + self.push('content', 1) + + def _start_prodlink(self, attrsD): + self.pushContent('content', attrsD, 'text/html', 1) + + def _start_body(self, attrsD): + self.pushContent('content', attrsD, 'application/xhtml+xml', 1) + _start_xhtml_body = _start_body + + def _start_content_encoded(self, attrsD): + self.pushContent('content', attrsD, 'text/html', 1) + _start_fullitem = _start_content_encoded + + def _end_content(self): + copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) + value = self.popContent('content') + if copyToDescription: + self._save('description', value) + _end_body = _end_content + _end_xhtml_body = _end_content + _end_content_encoded = _end_content + _end_fullitem = _end_content + _end_prodlink = _end_content + + def _start_itunes_image(self, attrsD): + self.push('itunes_image', 0) + self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) + _start_itunes_link = _start_itunes_image + + def _end_itunes_block(self): + value = self.pop('itunes_block', 0) + self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 + + def _end_itunes_explicit(self): + value = self.pop('itunes_explicit', 0) + self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0 + +if _XML_AVAILABLE: + class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): + def __init__(self, baseuri, baselang, encoding): + if _debug: sys.stderr.write('trying StrictFeedParser\n') + xml.sax.handler.ContentHandler.__init__(self) + _FeedParserMixin.__init__(self, baseuri, baselang, encoding) + self.bozo = 0 + self.exc = None + + def startPrefixMapping(self, prefix, uri): + self.trackNamespace(prefix, uri) + + def startElementNS(self, name, qname, attrs): + namespace, localname = name + lowernamespace = str(namespace or '').lower() + if lowernamespace.find('backend.userland.com/rss') <> -1: + # match any backend.userland.com namespace + namespace = 'http://backend.userland.com/rss' + lowernamespace = namespace + if qname and qname.find(':') > 0: + givenprefix = qname.split(':')[0] + else: + givenprefix = None + prefix = self._matchnamespaces.get(lowernamespace, givenprefix) + if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): + raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix + if prefix: + localname = prefix + ':' + localname + localname = str(localname).lower() + if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) + + # qname implementation is horribly broken in Python 2.1 (it + # doesn't report any), and slightly broken in Python 2.2 (it + # doesn't report the xml: namespace). So we match up namespaces + # with a known list first, and then possibly override them with + # the qnames the SAX parser gives us (if indeed it gives us any + # at all). Thanks to MatejC for helping me test this and + # tirelessly telling me that it didn't work yet. + attrsD = {} + for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): + lowernamespace = (namespace or '').lower() + prefix = self._matchnamespaces.get(lowernamespace, '') + if prefix: + attrlocalname = prefix + ':' + attrlocalname + attrsD[str(attrlocalname).lower()] = attrvalue + for qname in attrs.getQNames(): + attrsD[str(qname).lower()] = attrs.getValueByQName(qname) + self.unknown_starttag(localname, attrsD.items()) + + def characters(self, text): + self.handle_data(text) + + def endElementNS(self, name, qname): + namespace, localname = name + lowernamespace = str(namespace or '').lower() + if qname and qname.find(':') > 0: + givenprefix = qname.split(':')[0] + else: + givenprefix = '' + prefix = self._matchnamespaces.get(lowernamespace, givenprefix) + if prefix: + localname = prefix + ':' + localname + localname = str(localname).lower() + self.unknown_endtag(localname) + + def error(self, exc): + self.bozo = 1 + self.exc = exc + + def fatalError(self, exc): + self.error(exc) + raise exc + +class _BaseHTMLProcessor(sgmllib.SGMLParser): + elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', + 'img', 'input', 'isindex', 'link', 'meta', 'param'] + + def __init__(self, encoding): + self.encoding = encoding + if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) + sgmllib.SGMLParser.__init__(self) + + def reset(self): + self.pieces = [] + sgmllib.SGMLParser.reset(self) + + def _shorttag_replace(self, match): + tag = match.group(1) + if tag in self.elements_no_end_tag: + return '<' + tag + ' />' + else: + return '<' + tag + '>' + + def feed(self, data): + data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace + data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) + data = data.replace(''', "'") + data = data.replace('"', '"') + if self.encoding and type(data) == type(u''): + data = data.encode(self.encoding) + sgmllib.SGMLParser.feed(self, data) + + def normalize_attrs(self, attrs): + # utility method to be called by descendants + attrs = [(k.lower(), v) for k, v in attrs] + attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] + return attrs + + def unknown_starttag(self, tag, attrs): + # called for each start tag + # attrs is a list of (attr, value) tuples + # e.g. for
, tag='pre', attrs=[('class', 'screen')]
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+        uattrs = []
+        # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+        for key, value in attrs:
+            if type(value) != type(u''):
+                value = unicode(value, self.encoding)
+            uattrs.append((unicode(key, self.encoding), value))
+        strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
+        if tag in self.elements_no_end_tag:
+            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+        else:
+            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+    def unknown_endtag(self, tag):
+        # called for each end tag, e.g. for 
, tag will be 'pre' + # Reconstruct the original end tag. + if tag not in self.elements_no_end_tag: + self.pieces.append("" % locals()) + + def handle_charref(self, ref): + # called for each character reference, e.g. for ' ', ref will be '160' + # Reconstruct the original character reference. + self.pieces.append('&#%(ref)s;' % locals()) + + def handle_entityref(self, ref): + # called for each entity reference, e.g. for '©', ref will be 'copy' + # Reconstruct the original entity reference. + self.pieces.append('&%(ref)s;' % locals()) + + def handle_data(self, text): + # called for each block of plain text, i.e. outside of any tag and + # not containing any character or entity references + # Store the original text verbatim. + if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text) + self.pieces.append(text) + + def handle_comment(self, text): + # called for each HTML comment, e.g. + # Reconstruct the original comment. + self.pieces.append('' % locals()) + + def handle_pi(self, text): + # called for each processing instruction, e.g. + # Reconstruct original processing instruction. + self.pieces.append('' % locals()) + + def handle_decl(self, text): + # called for the DOCTYPE, if present, e.g. + # + # Reconstruct original DOCTYPE + self.pieces.append('' % locals()) + + _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match + def _scan_name(self, i, declstartpos): + rawdata = self.rawdata + n = len(rawdata) + if i == n: + return None, -1 + m = self._new_declname_match(rawdata, i) + if m: + s = m.group() + name = s.strip() + if (i + len(s)) == n: + return None, -1 # end of buffer + return name.lower(), m.end() + else: + self.handle_data(rawdata) +# self.updatepos(declstartpos, i) + return None, -1 + + def output(self): + '''Return processed HTML as a single string''' + return ''.join([str(p) for p in self.pieces]) + +class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): + def __init__(self, baseuri, baselang, encoding): + sgmllib.SGMLParser.__init__(self) + _FeedParserMixin.__init__(self, baseuri, baselang, encoding) + + def decodeEntities(self, element, data): + data = data.replace('<', '<') + data = data.replace('<', '<') + data = data.replace('>', '>') + data = data.replace('>', '>') + data = data.replace('&', '&') + data = data.replace('&', '&') + data = data.replace('"', '"') + data = data.replace('"', '"') + data = data.replace(''', ''') + data = data.replace(''', ''') + if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + data = data.replace('<', '<') + data = data.replace('>', '>') + data = data.replace('&', '&') + data = data.replace('"', '"') + data = data.replace(''', "'") + return data + +class _RelativeURIResolver(_BaseHTMLProcessor): + relative_uris = [('a', 'href'), + ('applet', 'codebase'), + ('area', 'href'), + ('blockquote', 'cite'), + ('body', 'background'), + ('del', 'cite'), + ('form', 'action'), + ('frame', 'longdesc'), + ('frame', 'src'), + ('iframe', 'longdesc'), + ('iframe', 'src'), + ('head', 'profile'), + ('img', 'longdesc'), + ('img', 'src'), + ('img', 'usemap'), + ('input', 'src'), + ('input', 'usemap'), + ('ins', 'cite'), + ('link', 'href'), + ('object', 'classid'), + ('object', 'codebase'), + ('object', 'data'), + ('object', 'usemap'), + ('q', 'cite'), + ('script', 'src')] + + def __init__(self, baseuri, encoding): + _BaseHTMLProcessor.__init__(self, encoding) + self.baseuri = baseuri + + def resolveURI(self, uri): + return _urljoin(self.baseuri, uri) + + def unknown_starttag(self, tag, attrs): + attrs = self.normalize_attrs(attrs) + attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] + _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) + +def _resolveRelativeURIs(htmlSource, baseURI, encoding): + if _debug: sys.stderr.write('entering _resolveRelativeURIs\n') + p = _RelativeURIResolver(baseURI, encoding) + p.feed(htmlSource) + return p.output() + +class _HTMLSanitizer(_BaseHTMLProcessor): + acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', + 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col', + 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', + 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', + 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', + 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', + 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', + 'thead', 'tr', 'tt', 'u', 'ul', 'var'] + + acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', + 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing', + 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols', + 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', + 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', + 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', + 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', + 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', + 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type', + 'usemap', 'valign', 'value', 'vspace', 'width'] + + unacceptable_elements_with_end_tag = ['script', 'applet'] + + def reset(self): + _BaseHTMLProcessor.reset(self) + self.unacceptablestack = 0 + + def unknown_starttag(self, tag, attrs): + if not tag in self.acceptable_elements: + if tag in self.unacceptable_elements_with_end_tag: + self.unacceptablestack += 1 + return + attrs = self.normalize_attrs(attrs) + attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes] + _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) + + def unknown_endtag(self, tag): + if not tag in self.acceptable_elements: + if tag in self.unacceptable_elements_with_end_tag: + self.unacceptablestack -= 1 + return + _BaseHTMLProcessor.unknown_endtag(self, tag) + + def handle_pi(self, text): + pass + + def handle_decl(self, text): + pass + + def handle_data(self, text): + if not self.unacceptablestack: + _BaseHTMLProcessor.handle_data(self, text) + +def _sanitizeHTML(htmlSource, encoding): + p = _HTMLSanitizer(encoding) + p.feed(htmlSource) + data = p.output() + if TIDY_MARKUP: + # loop through list of preferred Tidy interfaces looking for one that's installed, + # then set up a common _tidy function to wrap the interface-specific API. + _tidy = None + for tidy_interface in PREFERRED_TIDY_INTERFACES: + try: + if tidy_interface == "uTidy": + from tidy import parseString as _utidy + def _tidy(data, **kwargs): + return str(_utidy(data, **kwargs)) + break + elif tidy_interface == "mxTidy": + from mx.Tidy import Tidy as _mxtidy + def _tidy(data, **kwargs): + nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs) + return data + break + except: + pass + if _tidy: + utf8 = type(data) == type(u'') + if utf8: + data = data.encode('utf-8') + data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8") + if utf8: + data = unicode(data, 'utf-8') + if data.count(''): + data = data.split('>', 1)[1] + if data.count('= '2.3.3' + assert base64 != None + user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':') + realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] + self.add_password(realm, host, user, passw) + retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) + self.reset_retry_count() + return retry + except: + return self.http_error_default(req, fp, code, msg, headers) + +def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers): + """URL, filename, or string --> stream + + This function lets you define parsers that take any input source + (URL, pathname to local or network file, or actual data as a string) + and deal with it in a uniform manner. Returned object is guaranteed + to have all the basic stdio read methods (read, readline, readlines). + Just .close() the object when you're done with it. + + If the etag argument is supplied, it will be used as the value of an + If-None-Match request header. + + If the modified argument is supplied, it must be a tuple of 9 integers + as returned by gmtime() in the standard Python time module. This MUST + be in GMT (Greenwich Mean Time). The formatted date/time will be used + as the value of an If-Modified-Since request header. + + If the agent argument is supplied, it will be used as the value of a + User-Agent request header. + + If the referrer argument is supplied, it will be used as the value of a + Referer[sic] request header. + + If handlers is supplied, it is a list of handlers used to build a + urllib2 opener. + """ + + if hasattr(url_file_stream_or_string, 'read'): + return url_file_stream_or_string + + if url_file_stream_or_string == '-': + return sys.stdin + + if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'): + if not agent: + agent = USER_AGENT + # test for inline user:password for basic auth + auth = None + if base64: + urltype, rest = urllib.splittype(url_file_stream_or_string) + realhost, rest = urllib.splithost(rest) + if realhost: + user_passwd, realhost = urllib.splituser(realhost) + if user_passwd: + url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) + auth = base64.encodestring(user_passwd).strip() + # try to open with urllib2 (to use optional headers) + request = urllib2.Request(url_file_stream_or_string) + request.add_header('User-Agent', agent) + if etag: + request.add_header('If-None-Match', etag) + if modified: + # format into an RFC 1123-compliant timestamp. We can't use + # time.strftime() since the %a and %b directives can be affected + # by the current locale, but RFC 2616 states that dates must be + # in English. + short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) + if referrer: + request.add_header('Referer', referrer) + if gzip and zlib: + request.add_header('Accept-encoding', 'gzip, deflate') + elif gzip: + request.add_header('Accept-encoding', 'gzip') + elif zlib: + request.add_header('Accept-encoding', 'deflate') + else: + request.add_header('Accept-encoding', '') + if auth: + request.add_header('Authorization', 'Basic %s' % auth) + if ACCEPT_HEADER: + request.add_header('Accept', ACCEPT_HEADER) + request.add_header('A-IM', 'feed') # RFC 3229 support + opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers)) + opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent + try: + return opener.open(request) + finally: + opener.close() # JohnD + + # try to open with native open function (if url_file_stream_or_string is a filename) + try: + return open(url_file_stream_or_string) + except: + pass + + # treat url_file_stream_or_string as string + return _StringIO(str(url_file_stream_or_string)) + +_date_handlers = [] +def registerDateHandler(func): + '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' + _date_handlers.insert(0, func) + +# ISO-8601 date parsing routines written by Fazal Majid. +# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 +# parser is beyond the scope of feedparser and would be a worthwhile addition +# to the Python library. +# A single regular expression cannot parse ISO 8601 date formats into groups +# as the standard is highly irregular (for instance is 030104 2003-01-04 or +# 0301-04-01), so we use templates instead. +# Please note the order in templates is significant because we need a +# greedy match. +_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO', + 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', + '-YY-?MM', '-OOO', '-YY', + '--MM-?DD', '--MM', + '---DD', + 'CC', ''] +_iso8601_re = [ + tmpl.replace( + 'YYYY', r'(?P\d{4})').replace( + 'YY', r'(?P\d\d)').replace( + 'MM', r'(?P[01]\d)').replace( + 'DD', r'(?P[0123]\d)').replace( + 'OOO', r'(?P[0123]\d\d)').replace( + 'CC', r'(?P\d\d$)') + + r'(T?(?P\d{2}):(?P\d{2})' + + r'(:(?P\d{2}))?' + + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' + for tmpl in _iso8601_tmpl] +del tmpl +_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] +del regex +def _parse_date_iso8601(dateString): + '''Parse a variety of ISO-8601-compatible formats like 20040105''' + m = None + for _iso8601_match in _iso8601_matches: + m = _iso8601_match(dateString) + if m: break + if not m: return + if m.span() == (0, 0): return + params = m.groupdict() + ordinal = params.get('ordinal', 0) + if ordinal: + ordinal = int(ordinal) + else: + ordinal = 0 + year = params.get('year', '--') + if not year or year == '--': + year = time.gmtime()[0] + elif len(year) == 2: + # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993 + year = 100 * int(time.gmtime()[0] / 100) + int(year) + else: + year = int(year) + month = params.get('month', '-') + if not month or month == '-': + # ordinals are NOT normalized by mktime, we simulate them + # by setting month=1, day=ordinal + if ordinal: + month = 1 + else: + month = time.gmtime()[1] + month = int(month) + day = params.get('day', 0) + if not day: + # see above + if ordinal: + day = ordinal + elif params.get('century', 0) or \ + params.get('year', 0) or params.get('month', 0): + day = 1 + else: + day = time.gmtime()[2] + else: + day = int(day) + # special case of the century - is the first year of the 21st century + # 2000 or 2001 ? The debate goes on... + if 'century' in params.keys(): + year = (int(params['century']) - 1) * 100 + 1 + # in ISO 8601 most fields are optional + for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']: + if not params.get(field, None): + params[field] = 0 + hour = int(params.get('hour', 0)) + minute = int(params.get('minute', 0)) + second = int(params.get('second', 0)) + # weekday is normalized by mktime(), we can ignore it + weekday = 0 + # daylight savings is complex, but not needed for feedparser's purposes + # as time zones, if specified, include mention of whether it is active + # (e.g. PST vs. PDT, CET). Using -1 is implementation-dependent and + # and most implementations have DST bugs + daylight_savings_flag = 0 + tm = [year, month, day, hour, minute, second, weekday, + ordinal, daylight_savings_flag] + # ISO 8601 time zone adjustments + tz = params.get('tz') + if tz and tz != 'Z': + if tz[0] == '-': + tm[3] += int(params.get('tzhour', 0)) + tm[4] += int(params.get('tzmin', 0)) + elif tz[0] == '+': + tm[3] -= int(params.get('tzhour', 0)) + tm[4] -= int(params.get('tzmin', 0)) + else: + return None + # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) + # which is guaranteed to normalize d/m/y/h/m/s. + # Many implementations have bugs, but we'll pretend they don't. + return time.localtime(time.mktime(tm)) +registerDateHandler(_parse_date_iso8601) + +# 8-bit date handling routines written by ytrewq1. +_korean_year = u'\ub144' # b3e2 in euc-kr +_korean_month = u'\uc6d4' # bff9 in euc-kr +_korean_day = u'\uc77c' # c0cf in euc-kr +_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr +_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr + +_korean_onblog_date_re = \ + re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ + (_korean_year, _korean_month, _korean_day)) +_korean_nate_date_re = \ + re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ + (_korean_am, _korean_pm)) +def _parse_date_onblog(dateString): + '''Parse a string according to the OnBlog 8-bit date format''' + m = _korean_onblog_date_re.match(dateString) + if not m: return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_onblog) + +def _parse_date_nate(dateString): + '''Parse a string according to the Nate 8-bit date format''' + m = _korean_nate_date_re.match(dateString) + if not m: return + hour = int(m.group(5)) + ampm = m.group(4) + if (ampm == _korean_pm): + hour += 12 + hour = str(hour) + if len(hour) == 1: + hour = '0' + hour + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_nate) + +_mssql_date_re = \ + re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') +def _parse_date_mssql(dateString): + '''Parse a string according to the MS SQL date format''' + m = _mssql_date_re.match(dateString) + if not m: return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_mssql) + +# Unicode strings for Greek date strings +_greek_months = \ + { \ + u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 + u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 + u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 + u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 + u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 + u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 + u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 + u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 + u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 + u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 + u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 + u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 + u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 + u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 + u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 + u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 + u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 + u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 + u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 + } + +_greek_wdays = \ + { \ + u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 + u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 + u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 + u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 + u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 + u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 + u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 + } + +_greek_date_format_re = \ + re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') + +def _parse_date_greek(dateString): + '''Parse a string according to a Greek 8-bit date format.''' + m = _greek_date_format_re.match(dateString) + if not m: return + try: + wday = _greek_wdays[m.group(1)] + month = _greek_months[m.group(3)] + except: + return + rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \ + {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\ + 'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\ + 'zonediff': m.group(8)} + if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date) + return _parse_date_rfc822(rfc822date) +registerDateHandler(_parse_date_greek) + +# Unicode strings for Hungarian date strings +_hungarian_months = \ + { \ + u'janu\u00e1r': u'01', # e1 in iso-8859-2 + u'febru\u00e1ri': u'02', # e1 in iso-8859-2 + u'm\u00e1rcius': u'03', # e1 in iso-8859-2 + u'\u00e1prilis': u'04', # e1 in iso-8859-2 + u'm\u00e1ujus': u'05', # e1 in iso-8859-2 + u'j\u00fanius': u'06', # fa in iso-8859-2 + u'j\u00falius': u'07', # fa in iso-8859-2 + u'augusztus': u'08', + u'szeptember': u'09', + u'okt\u00f3ber': u'10', # f3 in iso-8859-2 + u'november': u'11', + u'december': u'12', + } + +_hungarian_date_format_re = \ + re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') + +def _parse_date_hungarian(dateString): + '''Parse a string according to a Hungarian 8-bit date format.''' + m = _hungarian_date_format_re.match(dateString) + if not m: return + try: + month = _hungarian_months[m.group(2)] + day = m.group(3) + if len(day) == 1: + day = '0' + day + hour = m.group(4) + if len(hour) == 1: + hour = '0' + hour + except: + return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \ + {'year': m.group(1), 'month': month, 'day': day,\ + 'hour': hour, 'minute': m.group(5),\ + 'zonediff': m.group(6)} + if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_hungarian) + +# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by +# Drake and licensed under the Python license. Removed all range checking +# for month, day, hour, minute, and second, since mktime will normalize +# these later +def _parse_date_w3dtf(dateString): + def __extract_date(m): + year = int(m.group('year')) + if year < 100: + year = 100 * int(time.gmtime()[0] / 100) + int(year) + if year < 1000: + return 0, 0, 0 + julian = m.group('julian') + if julian: + julian = int(julian) + month = julian / 30 + 1 + day = julian % 30 + 1 + jday = None + while jday != julian: + t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) + jday = time.gmtime(t)[-2] + diff = abs(jday - julian) + if jday > julian: + if diff < day: + day = day - diff + else: + month = month - 1 + day = 31 + elif jday < julian: + if day + diff < 28: + day = day + diff + else: + month = month + 1 + return year, month, day + month = m.group('month') + day = 1 + if month is None: + month = 1 + else: + month = int(month) + day = m.group('day') + if day: + day = int(day) + else: + day = 1 + return year, month, day + + def __extract_time(m): + if not m: + return 0, 0, 0 + hours = m.group('hours') + if not hours: + return 0, 0, 0 + hours = int(hours) + minutes = int(m.group('minutes')) + seconds = m.group('seconds') + if seconds: + seconds = int(seconds) + else: + seconds = 0 + return hours, minutes, seconds + + def __extract_tzd(m): + '''Return the Time Zone Designator as an offset in seconds from UTC.''' + if not m: + return 0 + tzd = m.group('tzd') + if not tzd: + return 0 + if tzd == 'Z': + return 0 + hours = int(m.group('tzdhours')) + minutes = m.group('tzdminutes') + if minutes: + minutes = int(minutes) + else: + minutes = 0 + offset = (hours*60 + minutes) * 60 + if tzd[0] == '+': + return -offset + return offset + + __date_re = ('(?P\d\d\d\d)' + '(?:(?P-|)' + '(?:(?P\d\d\d)' + '|(?P\d\d)(?:(?P=dsep)(?P\d\d))?))?') + __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' + __tzd_rx = re.compile(__tzd_re) + __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' + '(?:(?P=tsep)(?P\d\d(?:[.,]\d+)?))?' + + __tzd_re) + __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) + __datetime_rx = re.compile(__datetime_re) + m = __datetime_rx.match(dateString) + if (m is None) or (m.group() != dateString): return + gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0) + if gmt[0] == 0: return + return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone) +registerDateHandler(_parse_date_w3dtf) + +def _parse_date_rfc822(dateString): + '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' + data = dateString.split() + if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: + del data[0] + if len(data) == 4: + s = data[3] + i = s.find('+') + if i > 0: + data[3:] = [s[:i], s[i+1:]] + else: + data.append('') + dateString = " ".join(data) + if len(data) < 5: + dateString += ' 00:00:00 GMT' + tm = rfc822.parsedate_tz(dateString) + if tm: + return time.gmtime(rfc822.mktime_tz(tm)) +# rfc822.py defines several time zones, but we define some extra ones. +# 'ET' is equivalent to 'EST', etc. +_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800} +rfc822._timezones.update(_additional_timezones) +registerDateHandler(_parse_date_rfc822) + +def _parse_date(dateString): + '''Parses a variety of date formats into a 9-tuple in GMT''' + for handler in _date_handlers: + try: + date9tuple = handler(dateString) + if not date9tuple: continue + if len(date9tuple) != 9: + if _debug: sys.stderr.write('date handler function must return 9-tuple\n') + raise ValueError + map(int, date9tuple) + return date9tuple + except Exception, e: + if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) + pass + return None + +def _getCharacterEncoding(http_headers, xml_data): + '''Get the character encoding of the XML document + + http_headers is a dictionary + xml_data is a raw string (not Unicode) + + This is so much trickier than it sounds, it's not even funny. + According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type + is application/xml, application/*+xml, + application/xml-external-parsed-entity, or application/xml-dtd, + the encoding given in the charset parameter of the HTTP Content-Type + takes precedence over the encoding given in the XML prefix within the + document, and defaults to 'utf-8' if neither are specified. But, if + the HTTP Content-Type is text/xml, text/*+xml, or + text/xml-external-parsed-entity, the encoding given in the XML prefix + within the document is ALWAYS IGNORED and only the encoding given in + the charset parameter of the HTTP Content-Type header should be + respected, and it defaults to 'us-ascii' if not specified. + + Furthermore, discussion on the atom-syntax mailing list with the + author of RFC 3023 leads me to the conclusion that any document + served with a Content-Type of text/* and no charset parameter + must be treated as us-ascii. (We now do this.) And also that it + must always be flagged as non-well-formed. (We now do this too.) + + If Content-Type is unspecified (input was local file or non-HTTP source) + or unrecognized (server just got it totally wrong), then go by the + encoding given in the XML prefix of the document and default to + 'iso-8859-1' as per the HTTP specification (RFC 2616). + + Then, assuming we didn't find a character encoding in the HTTP headers + (and the HTTP Content-type allowed us to look in the body), we need + to sniff the first few bytes of the XML data and try to determine + whether the encoding is ASCII-compatible. Section F of the XML + specification shows the way here: + http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info + + If the sniffed encoding is not ASCII-compatible, we need to make it + ASCII compatible so that we can sniff further into the XML declaration + to find the encoding attribute, which will tell us the true encoding. + + Of course, none of this guarantees that we will be able to parse the + feed in the declared character encoding (assuming it was declared + correctly, which many are not). CJKCodecs and iconv_codec help a lot; + you should definitely install them if you can. + http://cjkpython.i18n.org/ + ''' + + def _parseHTTPContentType(content_type): + '''takes HTTP Content-Type header and returns (content type, charset) + + If no charset is specified, returns (content type, '') + If no content type is specified, returns ('', '') + Both return parameters are guaranteed to be lowercase strings + ''' + content_type = content_type or '' + content_type, params = cgi.parse_header(content_type) + return content_type, params.get('charset', '').replace("'", '') + + sniffed_xml_encoding = '' + xml_encoding = '' + true_encoding = '' + http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type')) + # Must sniff for non-ASCII-compatible character encodings before + # searching for XML declaration. This heuristic is defined in + # section F of the XML specification: + # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = _ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + # ASCII-compatible + pass + xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + except: + xml_encoding_match = None + if xml_encoding_match: + xml_encoding = xml_encoding_match.groups()[0].lower() + if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + acceptable_content_type = 0 + application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity') + text_content_types = ('text/xml', 'text/xml-external-parsed-entity') + if (http_content_type in application_content_types) or \ + (http_content_type.startswith('application/') and http_content_type.endswith('+xml')): + acceptable_content_type = 1 + true_encoding = http_encoding or xml_encoding or 'utf-8' + elif (http_content_type in text_content_types) or \ + (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'): + acceptable_content_type = 1 + true_encoding = http_encoding or 'us-ascii' + elif http_content_type.startswith('text/'): + true_encoding = http_encoding or 'us-ascii' + elif http_headers and (not http_headers.has_key('content-type')): + true_encoding = xml_encoding or 'iso-8859-1' + else: + true_encoding = xml_encoding or 'utf-8' + return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type + +def _toUTF8(data, encoding): + '''Changes an XML data stream on the fly to specify a new encoding + + data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already + encoding is a string recognized by encodings.aliases + ''' + if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') and (data[2:4] != '\x00\x00'): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-16be': + sys.stderr.write('trying utf-16be instead\n') + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-16le': + sys.stderr.write('trying utf-16le instead\n') + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-8': + sys.stderr.write('trying utf-8 instead\n') + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-32be': + sys.stderr.write('trying utf-32be instead\n') + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-32le': + sys.stderr.write('trying utf-32le instead\n') + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) + declmatch = re.compile('^<\?xml[^>]*?>') + newdecl = '''''' + if declmatch.search(newdata): + newdata = declmatch.sub(newdecl, newdata) + else: + newdata = newdecl + u'\n' + newdata + return newdata.encode('utf-8') + +def _stripDoctype(data): + '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data) + + rss_version may be 'rss091n' or None + stripped_data is the same XML document, minus the DOCTYPE + ''' + entity_pattern = re.compile(r']*?)>', re.MULTILINE) + data = entity_pattern.sub('', data) + doctype_pattern = re.compile(r']*?)>', re.MULTILINE) + doctype_results = doctype_pattern.findall(data) + doctype = doctype_results and doctype_results[0] or '' + if doctype.lower().count('netscape'): + version = 'rss091n' + else: + version = None + data = doctype_pattern.sub('', data) + return version, data + +def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]): + '''Parse a feed from a URL, file, stream, or string''' + result = FeedParserDict() + result['feed'] = FeedParserDict() + result['entries'] = [] + if _XML_AVAILABLE: + result['bozo'] = 0 + if type(handlers) == types.InstanceType: + handlers = [handlers] + try: + f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers) + data = f.read() + except Exception, e: + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + f = None + + # if feed is gzip-compressed, decompress it + if f and data and hasattr(f, 'headers'): + if gzip and f.headers.get('content-encoding', '') == 'gzip': + try: + data = gzip.GzipFile(fileobj=_StringIO(data)).read() + except Exception, e: + # Some feeds claim to be gzipped but they're not, so + # we get garbage. Ideally, we should re-request the + # feed without the 'Accept-encoding: gzip' header, + # but we don't. + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + elif zlib and f.headers.get('content-encoding', '') == 'deflate': + try: + data = zlib.decompress(data, -zlib.MAX_WBITS) + except Exception, e: + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + + # save HTTP headers + if hasattr(f, 'info'): + info = f.info() + result['etag'] = info.getheader('ETag') + last_modified = info.getheader('Last-Modified') + if last_modified: + result['modified'] = _parse_date(last_modified) + if hasattr(f, 'url'): + result['href'] = f.url + result['status'] = 200 + if hasattr(f, 'status'): + result['status'] = f.status + if hasattr(f, 'headers'): + result['headers'] = f.headers.dict + if hasattr(f, 'close'): + f.close() + + # there are four encodings to keep track of: + # - http_encoding is the encoding declared in the Content-Type HTTP header + # - xml_encoding is the encoding declared in the ; changed +# project name +#2.5 - 7/25/2003 - MAP - changed to Python license (all contributors agree); +# removed unnecessary urllib code -- urllib2 should always be available anyway; +# return actual url, status, and full HTTP headers (as result['url'], +# result['status'], and result['headers']) if parsing a remote feed over HTTP -- +# this should pass all the HTTP tests at ; +# added the latest namespace-of-the-week for RSS 2.0 +#2.5.1 - 7/26/2003 - RMK - clear opener.addheaders so we only send our custom +# User-Agent (otherwise urllib2 sends two, which confuses some servers) +#2.5.2 - 7/28/2003 - MAP - entity-decode inline xml properly; added support for +# inline and as used in some RSS 2.0 feeds +#2.5.3 - 8/6/2003 - TvdV - patch to track whether we're inside an image or +# textInput, and also to return the character encoding (if specified) +#2.6 - 1/1/2004 - MAP - dc:author support (MarekK); fixed bug tracking +# nested divs within content (JohnD); fixed missing sys import (JohanS); +# fixed regular expression to capture XML character encoding (Andrei); +# added support for Atom 0.3-style links; fixed bug with textInput tracking; +# added support for cloud (MartijnP); added support for multiple +# category/dc:subject (MartijnP); normalize content model: 'description' gets +# description (which can come from description, summary, or full content if no +# description), 'content' gets dict of base/language/type/value (which can come +# from content:encoded, xhtml:body, content, or fullitem); +# fixed bug matching arbitrary Userland namespaces; added xml:base and xml:lang +# tracking; fixed bug tracking unknown tags; fixed bug tracking content when +# element is not in default namespace (like Pocketsoap feed); +# resolve relative URLs in link, guid, docs, url, comments, wfw:comment, +# wfw:commentRSS; resolve relative URLs within embedded HTML markup in +# description, xhtml:body, content, content:encoded, title, subtitle, +# summary, info, tagline, and copyright; added support for pingback and +# trackback namespaces +#2.7 - 1/5/2004 - MAP - really added support for trackback and pingback +# namespaces, as opposed to 2.6 when I said I did but didn't really; +# sanitize HTML markup within some elements; added mxTidy support (if +# installed) to tidy HTML markup within some elements; fixed indentation +# bug in _parse_date (FazalM); use socket.setdefaulttimeout if available +# (FazalM); universal date parsing and normalization (FazalM): 'created', modified', +# 'issued' are parsed into 9-tuple date format and stored in 'created_parsed', +# 'modified_parsed', and 'issued_parsed'; 'date' is duplicated in 'modified' +# and vice-versa; 'date_parsed' is duplicated in 'modified_parsed' and vice-versa +#2.7.1 - 1/9/2004 - MAP - fixed bug handling " and '. fixed memory +# leak not closing url opener (JohnD); added dc:publisher support (MarekK); +# added admin:errorReportsTo support (MarekK); Python 2.1 dict support (MarekK) +#2.7.4 - 1/14/2004 - MAP - added workaround for improperly formed
tags in +# encoded HTML (skadz); fixed unicode handling in normalize_attrs (ChrisL); +# fixed relative URI processing for guid (skadz); added ICBM support; added +# base64 support +#2.7.5 - 1/15/2004 - MAP - added workaround for malformed DOCTYPE (seen on many +# blogspot.com sites); added _debug variable +#2.7.6 - 1/16/2004 - MAP - fixed bug with StringIO importing +#3.0b3 - 1/23/2004 - MAP - parse entire feed with real XML parser (if available); +# added several new supported namespaces; fixed bug tracking naked markup in +# description; added support for enclosure; added support for source; re-added +# support for cloud which got dropped somehow; added support for expirationDate +#3.0b4 - 1/26/2004 - MAP - fixed xml:lang inheritance; fixed multiple bugs tracking +# xml:base URI, one for documents that don't define one explicitly and one for +# documents that define an outer and an inner xml:base that goes out of scope +# before the end of the document +#3.0b5 - 1/26/2004 - MAP - fixed bug parsing multiple links at feed level +#3.0b6 - 1/27/2004 - MAP - added feed type and version detection, result['version'] +# will be one of SUPPORTED_VERSIONS.keys() or empty string if unrecognized; +# added support for creativeCommons:license and cc:license; added support for +# full Atom content model in title, tagline, info, copyright, summary; fixed bug +# with gzip encoding (not always telling server we support it when we do) +#3.0b7 - 1/28/2004 - MAP - support Atom-style author element in author_detail +# (dictionary of 'name', 'url', 'email'); map author to author_detail if author +# contains name + email address +#3.0b8 - 1/28/2004 - MAP - added support for contributor +#3.0b9 - 1/29/2004 - MAP - fixed check for presence of dict function; added +# support for summary +#3.0b10 - 1/31/2004 - MAP - incorporated ISO-8601 date parsing routines from +# xml.util.iso8601 +#3.0b11 - 2/2/2004 - MAP - added 'rights' to list of elements that can contain +# dangerous markup; fiddled with decodeEntities (not right); liberalized +# date parsing even further +#3.0b12 - 2/6/2004 - MAP - fiddled with decodeEntities (still not right); +# added support to Atom 0.2 subtitle; added support for Atom content model +# in copyright; better sanitizing of dangerous HTML elements with end tags +# (script, frameset) +#3.0b13 - 2/8/2004 - MAP - better handling of empty HTML tags (br, hr, img, +# etc.) in embedded markup, in either HTML or XHTML form (
,
,
) +#3.0b14 - 2/8/2004 - MAP - fixed CDATA handling in non-wellformed feeds under +# Python 2.1 +#3.0b15 - 2/11/2004 - MAP - fixed bug resolving relative links in wfw:commentRSS; +# fixed bug capturing author and contributor URL; fixed bug resolving relative +# links in author and contributor URL; fixed bug resolvin relative links in +# generator URL; added support for recognizing RSS 1.0; passed Simon Fell's +# namespace tests, and included them permanently in the test suite with his +# permission; fixed namespace handling under Python 2.1 +#3.0b16 - 2/12/2004 - MAP - fixed support for RSS 0.90 (broken in b15) +#3.0b17 - 2/13/2004 - MAP - determine character encoding as per RFC 3023 +#3.0b18 - 2/17/2004 - MAP - always map description to summary_detail (Andrei); +# use libxml2 (if available) +#3.0b19 - 3/15/2004 - MAP - fixed bug exploding author information when author +# name was in parentheses; removed ultra-problematic mxTidy support; patch to +# workaround crash in PyXML/expat when encountering invalid entities +# (MarkMoraes); support for textinput/textInput +#3.0b20 - 4/7/2004 - MAP - added CDF support +#3.0b21 - 4/14/2004 - MAP - added Hot RSS support +#3.0b22 - 4/19/2004 - MAP - changed 'channel' to 'feed', 'item' to 'entries' in +# results dict; changed results dict to allow getting values with results.key +# as well as results[key]; work around embedded illformed HTML with half +# a DOCTYPE; work around malformed Content-Type header; if character encoding +# is wrong, try several common ones before falling back to regexes (if this +# works, bozo_exception is set to CharacterEncodingOverride); fixed character +# encoding issues in BaseHTMLProcessor by tracking encoding and converting +# from Unicode to raw strings before feeding data to sgmllib.SGMLParser; +# convert each value in results to Unicode (if possible), even if using +# regex-based parsing +#3.0b23 - 4/21/2004 - MAP - fixed UnicodeDecodeError for feeds that contain +# high-bit characters in attributes in embedded HTML in description (thanks +# Thijs van de Vossen); moved guid, date, and date_parsed to mapped keys in +# FeedParserDict; tweaked FeedParserDict.has_key to return True if asking +# about a mapped key +#3.0fc1 - 4/23/2004 - MAP - made results.entries[0].links[0] and +# results.entries[0].enclosures[0] into FeedParserDict; fixed typo that could +# cause the same encoding to be tried twice (even if it failed the first time); +# fixed DOCTYPE stripping when DOCTYPE contained entity declarations; +# better textinput and image tracking in illformed RSS 1.0 feeds +#3.0fc2 - 5/10/2004 - MAP - added and passed Sam's amp tests; added and passed +# my blink tag tests +#3.0fc3 - 6/18/2004 - MAP - fixed bug in _changeEncodingDeclaration that +# failed to parse utf-16 encoded feeds; made source into a FeedParserDict; +# duplicate admin:generatorAgent/@rdf:resource in generator_detail.url; +# added support for image; refactored parse() fallback logic to try other +# encodings if SAX parsing fails (previously it would only try other encodings +# if re-encoding failed); remove unichr madness in normalize_attrs now that +# we're properly tracking encoding in and out of BaseHTMLProcessor; set +# feed.language from root-level xml:lang; set entry.id from rdf:about; +# send Accept header +#3.0 - 6/21/2004 - MAP - don't try iso-8859-1 (can't distinguish between +# iso-8859-1 and windows-1252 anyway, and most incorrectly marked feeds are +# windows-1252); fixed regression that could cause the same encoding to be +# tried twice (even if it failed the first time) +#3.0.1 - 6/22/2004 - MAP - default to us-ascii for all text/* content types; +# recover from malformed content-type header parameter with no equals sign +# ('text/xml; charset:iso-8859-1') +#3.1 - 6/28/2004 - MAP - added and passed tests for converting HTML entities +# to Unicode equivalents in illformed feeds (aaronsw); added and +# passed tests for converting character entities to Unicode equivalents +# in illformed feeds (aaronsw); test for valid parsers when setting +# XML_AVAILABLE; make version and encoding available when server returns +# a 304; add handlers parameter to pass arbitrary urllib2 handlers (like +# digest auth or proxy support); add code to parse username/password +# out of url and send as basic authentication; expose downloading-related +# exceptions in bozo_exception (aaronsw); added __contains__ method to +# FeedParserDict (aaronsw); added publisher_detail (aaronsw) +#3.2 - 7/3/2004 - MAP - use cjkcodecs and iconv_codec if available; always +# convert feed to UTF-8 before passing to XML parser; completely revamped +# logic for determining character encoding and attempting XML parsing +# (much faster); increased default timeout to 20 seconds; test for presence +# of Location header on redirects; added tests for many alternate character +# encodings; support various EBCDIC encodings; support UTF-16BE and +# UTF16-LE with or without a BOM; support UTF-8 with a BOM; support +# UTF-32BE and UTF-32LE with or without a BOM; fixed crashing bug if no +# XML parsers are available; added support for 'Content-encoding: deflate'; +# send blank 'Accept-encoding: ' header if neither gzip nor zlib modules +# are available +#3.3 - 7/15/2004 - MAP - optimize EBCDIC to ASCII conversion; fix obscure +# problem tracking xml:base and xml:lang if element declares it, child +# doesn't, first grandchild redeclares it, and second grandchild doesn't; +# refactored date parsing; defined public registerDateHandler so callers +# can add support for additional date formats at runtime; added support +# for OnBlog, Nate, MSSQL, Greek, and Hungarian dates (ytrewq1); added +# zopeCompatibilityHack() which turns FeedParserDict into a regular +# dictionary, required for Zope compatibility, and also makes command- +# line debugging easier because pprint module formats real dictionaries +# better than dictionary-like objects; added NonXMLContentType exception, +# which is stored in bozo_exception when a feed is served with a non-XML +# media type such as 'text/plain'; respect Content-Language as default +# language if not xml:lang is present; cloud dict is now FeedParserDict; +# generator dict is now FeedParserDict; better tracking of xml:lang, +# including support for xml:lang='' to unset the current language; +# recognize RSS 1.0 feeds even when RSS 1.0 namespace is not the default +# namespace; don't overwrite final status on redirects (scenarios: +# redirecting to a URL that returns 304, redirecting to a URL that +# redirects to another URL with a different type of redirect); add +# support for HTTP 303 redirects +#4.0 - MAP - support for relative URIs in xml:base attribute; fixed +# encoding issue with mxTidy (phopkins); preliminary support for RFC 3229; +# support for Atom 1.0; support for iTunes extensions; new 'tags' for +# categories/keywords/etc. as array of dict +# {'term': term, 'scheme': scheme, 'label': label} to match Atom 1.0 +# terminology; parse RFC 822-style dates with no time; lots of other +# bug fixes +#4.1 - MAP - removed socket timeout; added support for chardet library diff --git a/lib/irclib/COPYING b/lib/irclib/COPYING new file mode 100755 index 0000000..cf9b6b9 --- /dev/null +++ b/lib/irclib/COPYING @@ -0,0 +1,510 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/lib/irclib/ChangeLog b/lib/irclib/ChangeLog new file mode 100755 index 0000000..3f086a0 --- /dev/null +++ b/lib/irclib/ChangeLog @@ -0,0 +1,420 @@ +2005-12-24 Keltus + * Released version 0.4.6. + + * irclib.py (VERSION): + * python-irclib.spec.in: + Preparations for version 0.4.6. + +2005-12-23 Keltus + * dccsend: + * dccreceive: + * irclib.py: + * ircbot.py: + * irccat: + * irccat2: + * servermap: + * testbot.py: + Code modernization - String methods used instead of deprecated + string functions, keyword 'in' used for membership testing instead + of 'has_key' method, etc. + +2005-12-06 Keltus + * irclib.py (ServerConnection.process_data): Reversed fix from + 2005-05-28. This is strange because there was a bug before and + now it's gone. Either python changed something, or the IRC + networks changed something. Confirmed by peter. + +2005-11-03 Keltus + * irclib.py (numeric_events): Renamed numeric code 332 from topic + to currenttopic (the message when "/topic " is sent), so it + doesn't collide with TOPIC (the message when the topic is set). + +2005-08-27 Keltus + * irclib.py (ServerConnection.disconnect): Fixed infinitely + recursive calls when disconnecting with a failed connection. Bug + reported by Erik Max Francis. + +2005-08-18 Keltus + * irclib.py: Made ServerConnection.disconnect more consistant and + changed some functions to use it instead of quit. Previously, + disconnect would ignore the quit message, but now it sends a quit + message and disconnect. Suggestion by Erik Max Francis. + * ircbot.py: Changed to use ServerConnection.disconnect instead of + ServerConnection.quit as well. + +2005-05-28 Keltus + * irclib.py (ServerConnection.process_data): Fixed quit arguments + to return a list rather than a list of a list. Patch from peter. + +2005-05-18 Keltus + * Released version 0.4.5. + + * irclib.py (ServerConnection.__init__): Added self.socket = None + to be able to process events when ServerConnection is not + connected to a socket. Patch from alst. + + * irclib.py (VERSION): + * python-irclib.spec.in: + Preparations for version 0.4.5. + +2005-04-26 Keltus + * irclib.py (IRC.__doc__): Corrected server.process_forever() to + irc.process_forever(). Suggestion by olecom. + +2005-04-17 Keltus + * irclib.py (ServerConnection.process_data): Moved event + translation code. + * irclib.py (ServerConnection): Reverted the 2005-01-28 change + because it breaks jump_server(). + * irclib.py: minor comment changes + +2005-04-03 Keltus + * irclib.py (protocol_events): Added "pong" and "invite" events. + Patch from Adam Mikuta. + * irclib.py (ServerConnection.part): Added message parameter. + Patch from Adam Mikuta. + +2005-02-23 Keltus + * Released version 0.4.4. + + * irclib.py (VERSION): + * python-irclib.spec.in: + Preparations for version 0.4.4. + +2005-01-28 Keltus + * irclib.py: (ServerConnection): Moved + self.irclibobj._remove_connection call from close() to + disconnect(). Patch from Alexey Nezhdanov. + +2005-01-25 Keltus + * irclib.py (ServerConnection.connect): closes socket if a + connection does not occur + * irclib.py (ServerConnection.connect): "Changing server" -> + "Changing servers" (more ubiquitous quit phrase) + +2005-01-23 Keltus + + * irclib.py: Removed depreciated apply functions. python-irclib is + now compatible with Python 1.6 and above. + * testbot.py: Removed redundant extra start() call + +2005-01-20 Joel Rosdahl + + * Released version 0.4.3. + * Makefile: Removed more GNU make specific constructs. + +2005-01-19 Joel Rosdahl + + * Makefile: Don't require GNU make. + +2005-01-19 Keltus + + * ircbot.py (IRCDict.__iter__): Added __iter__ method for IRCDict. + +2005-01-17 Joel Rosdahl + + * ircbot.py (IRCDict.__contains__): Added __contains__method for + IRCDict. Patch from Keltus. + (SingleServerIRCBot.on_ctcp): Corrected default decoding of CTCP + DCC CHAT. Patch from Keltus. + + * irclib.py (VERSION): + * python-irclib.spec.in: + Preparations for version 0.4.3. + + * debian: Removed Debian package directory since python-irclib is + in Debian now. + + * ircbot.py (SingleServerIRCBot._on_namreply): Improved comment + about arguments to the function. Patch from Keltus. + (Channel.has_allow_external_messages): Renamed from + has_message_from_outside_protection. Patch from Keltus. + + * irclib.py (ServerConnection.quit): Added comment about how some + IRC servers' treat QUIT messages. Patch from Keltus. + + * ircbot.py (SingleServerIRCBot.jump_server): Improved jump_server + behaviour. Patch from Keltus. + +2004-08-04 Joel Rosdahl + + * irclib.py (ServerConnection.process_data): Added "bonus" action + event that is triggered on CTCP ACTION messages. + +2004-07-09 Joel Rosdahl + + * Released version 0.4.2. + + * debian/rules: Remove built *.pyc files before making package. + + * irclib.py (DEBUG): + * debian/changelog: + * python-irclib.spec.in: + Preparations for version 0.4.2. + + * irclib.py (ServerNotConnectedError): New exception. + (ServerConnection.send_raw): Fix bug #922446, "Raise + IllegalStateException in send_raw when disconnected". + +2003-10-30 Joel Rosdahl + + * Released version 0.4.1. + + * debian/examples: Added dccreceive and dccsend as example files + in Debian. + + * python-irclib.spec.in: Likewise. + +2003-10-29 Joel Rosdahl + + * debian: Added Debian packaging files. + + * setup.py.in: Create setup.py from setup.py.in. + + * python-irclib.spec.in: RPM spec file from Gary Benson. + + * testbot.py (TestBot.on_nicknameinuse): New method. + + * irclib.py (ServerConnection.process_data): Record nickname when + welcome message is sent to trap nickname change triggered in a + nicknameinuse callback. + + * ircbot.py (SingleServerIRCBot._on_join): Use + Connection.get_nickname instead of relying on self._nickname. + (SingleServerIRCBot._on_kick): Likewise. + (SingleServerIRCBot._on_part): And here too. + (SingleServerIRCBot._on_nick): No need to remember nickname change + here. + +2003-08-31 Joel Rosdahl + + * Released version 0.4.0. + + Implemented DCC support (based on patches from Adam Langley and + Marco Bettio): + + * irclib.py (IRC.dcc): New method. + (DCCConnectionError): New class. + (DCCConnection): New class. + (SimpleIRCClient.__init__): Added dcc_connections attribute. + (SimpleIRCClient._dcc_disconnect): New method. + (SimpleIRCClient.connect): Added localaddress and + localport parameters. The socket will be bound accordingly before + connecting. + (SimpleIRCClient.dcc_connect): New method. + (SimpleIRCClient.dcc_listen): New method. + (ip_numstr_to_quad): New function. + (ip_quad_to_numstr): New function. + + * ircbot.py (SingleServerIRCBot.on_ctcp): Relay DCC CHAT CTCPs to + the on_dccchat method. + + * testbot.py: Added support for accepting DCC chats and for + initiating DCC chats via a "dcc" command. + + * dccreceive: New example program. + + * dccsend: New example program. + + * Makefile: Added dccreceive and dccsend to dist files. + + Other changes: + + * setup.py: Added. + + * irclib.py (ServerConnection.connect, ServerConnection.user): + Send USER command according to RFC 2812. + (ServerConnection.connect): Added localaddress and + localport parameters. The socket will be bound accordingly before + connecting. + (ServerConnection.process_data): Ignore empty lines from the + server. (Patch by Jason Wies.) + (ServerConnection._get_socket): Simplified. + (ServerConnection.remove_global_handler): Added. (Patch from + Brandon Beck.) + + * ircbot.py (SingleServerIRCBot.on_ctcp): Prepend VERSION reply + with VERSION. (Patch from Andrew Gaul.) + + * Makefile: Added setup.py to dist files. Also create zip archive. + + * README: Added requirements and installation sections. + +2002-03-01 Joel Rosdahl + + * Released version 0.3.4. + + Corrected problems spotted by Markku Hnninen : + + * irccat2 (IRCCat.on_welcome): Added missing connection argument. + (IRCCat.on_join): Likewise. + (IRCCat.on_disconnect): Likewise. + + * irclib.py (ServerConnection.ison): Bug fix: Join nicks by space + instead of commas. + + * irclib.py (ServerConnection.whowas): Bug fix: Let the max + argument default to the empty string. + + * irclib.py (numeric_events): Added new events: traceservice, + tracereconnect, tryagain, invitelist, endofinvitelist, exceptlist, + endofexceptlist, unavailresource, nochanmodes, banlistfull, + restricted and uniqopprivsneeded. + +2002-02-17 Joel Rosdahl + + * Released version 0.3.3. + + * Makefile, README, .cvsignore: Removed documentation generated by + pythondoc. Use pydoc instead. + + * servermap: Removed some excess whitespace. + + * README: Mention http://python-irclib.sourceforge.net. + + * Makefile (dist): Changed archive name from irclib-* to + python-irclib-*. + + Changed license from GPL 2 to LGPL 2.1: + + * COPYING: New license text. + + * irclib.py, ircbot.py, servermap: New license header. + +2001-10-21 Joel Rosdahl + + * Released version 0.3.2. + + * irclib.py (_parse_modes): Fixed problem found by Tom Morton: the + mode parsing code bailed out if a unary mode character didn't have + a corresponding argument. + + * irclib.py (_alpha): Fixed bug found by Tom Morton: w was missing + in the alphabet used by irc_lower(). + + * ircbot.py: Removed redundant import of is_channel. + + * servermap: Clarified copyright and license. + + * irccat: Ditto. + + * irccat2: Ditto. + +2000-12-11 Joel Rosdahl + + * Released version 0.3.1. + + * irclib.py (IRC.process_once): Work-around for platform-dependent + select() on Windows systems. + + * ircbot.py: Clarification of SingleServerIRCBot doc string. + +2000-11-26 Joel Rosdahl + + * Released version 0.3.0. + + * Makefile (dist): Include ircbot.py again. + + * README: Updated. + + * irclib.py (ServerConnection.get_nickname): Renamed from + get_nick_name. + (ServerConnection._get_socket): Return None if not connected. + +2000-11-25 Joel Rosdahl + + * irclib.py (ServerConnection.process_data): all_raw_messages + instead of allrawmessages. + (IRC._handle_event): Added "all_events" event type. + (nm_to_n): Renamed from nick_from_nickmask. + (nm_to_uh): Renamed from userhost_from_nickmask. + (nm_to_h): Renamed from host_from_nickmask. + (nm_to_u): Renamed from user_from_nickmask. + (SimpleIRCClient): Created. + +2000-11-22 Joel Rosdahl + + * irclib.py (lower_irc_string): Use translation instead. + (ServerConnection.process_data): Split non-RFC-compliant lines a + bit more intelligently. + (ServerConnection.process_data): Removed unnecessary try/except + block. + (ServerConnection.get_server_name): Return empty server if + unknown. + (_rfc_1459_command_regexp): Tweaked a bit. + + * ircbot.py: Rewritten. + +2000-11-21 Joel Rosdahl + + * irclib.py (IRC.process_forever): Default to processing a bit + more often. + +2000-10-29 Joel Rosdahl + + * Released version 0.2.4. + + * Makefile (dist): Include generated documentation in + distribution. + + * Makefile (doc): Make documentation. + + * irclib.py: Updated documentation. + + * irclib.py (is_channel): Included "!" as channel prefix. + +2000-10-02 Joel Rosdahl + + * Released version 0.2.3. + + * irclib.py (ServerConnection.connect): Make socket.connect() work + for Python >= 1.6. + +2000-09-26 Joel Rosdahl + + * Released version 0.2.2. + + * irclib.py (ServerConnection.user): Fixed erroneous format + string. + +2000-09-24 Joel Rosdahl + + * Released version 0.2.1. + + * irclib.py (ServerConnection.process_data): Bug fix (didn't keep + track of nick name). + (IRC.process_once): New method. + (ServerConnection.process_data): Bug fix. + (IRC.disconnect_all): Created. + (IRC.exit): Removed. + (ServerConnection.exit): Removed. + (ServerConnection.connect): Follow RFC closer. + (ServerConnection.user): Follow RFC closer. + + * ircbot.py: Removed. + + * irccat (on_disconnect): Just sys.exit(0). + + * servermap (on_disconnect): Just sys.exit(0). + + * irclib.py: Various documentation and some clean-ups. + +1999-08-21 Joel Rosdahl + + * Released version 0.2.0. + + * servermap: Updated to work with irclib 0.2.0. + + * irccat: Updated to work with irclib 0.2.0. + + * ircbot.py: Updated to work with irclib 0.2.0. The bot now + checks every minute that it is connected. If it's not, it + reconnects. + + * irclib.py: Changes in how to create a ServerConnection object. + Made the code for handling disconnection hopefully more robust. + Renamed connect() to sconnect(). + +1999-06-19 Joel Rosdahl + + * irclib.py: Released 0.1.0. diff --git a/lib/irclib/Makefile b/lib/irclib/Makefile new file mode 100755 index 0000000..0409ec3 --- /dev/null +++ b/lib/irclib/Makefile @@ -0,0 +1,42 @@ +VERSION := `sed -n -e '/VERSION = /{s/VERSION = \(.*\), \(.*\), \(.*\)/\1.\2.\3/;p;}' setup.py + +python-irclib.spec: python-irclib.spec.in + sed 's/%%VERSION%%/'$(VERSION)'/g' python-irclib.spec.in >python-irclib.spec + +dist: $(DISTFILES) + mkdir $(PACKAGENAME) + cp -r $(DISTFILES) $(PACKAGENAME) + tar cvzf $(PACKAGENAME).tar.gz $(PACKAGENAME) + zip -r9yq $(PACKAGENAME).zip $(PACKAGENAME) + rm -rf $(PACKAGENAME) + +cvstag: + ver=$(VERSION); echo cvs tag version_`echo $$ver | sed 's/\./_/g'` + +clean: + rm -rf *~ *.pyc build python-irclib.spec setup.py + +.PHONY: all doc dist cvstag clean diff --git a/lib/irclib/README b/lib/irclib/README new file mode 100755 index 0000000..3b9d913 --- /dev/null +++ b/lib/irclib/README @@ -0,0 +1,106 @@ +irclib -- Internet Relay Chat (IRC) protocol client library +----------------------------------------------------------- + +The home of irclib.py is now: + + http://python-irclib.sourceforge.net + +This library is intended to encapsulate the IRC protocol at a quite +low level. It provides an event-driven IRC client framework. It has +a fairly thorough support for the basic IRC protocol, CTCP and DCC +connections. + +In order to understand how to make an IRC client, I'm afraid you more +or less must understand the IRC specifications. They are available +here: + + http://www.irchelp.org/irchelp/rfc/ + +Requirements: + + * Python 1.6 or newer. + +Installation: + + * Run "python setup.py install" or copy irclib.py and/or ircbot.py + to an appropriate Python module directory. + +The main features of the IRC client framework are: + + * Abstraction of the IRC protocol. + * Handles multiple simultaneous IRC server connections. + * Handles server PONGing transparently. + * Messages to the IRC server are done by calling methods on an IRC + connection object. + * Messages from an IRC server triggers events, which can be caught + by event handlers. + * Reading from and writing to IRC server sockets are normally done + by an internal select() loop, but the select()ing may be done by + an external main loop. + * Functions can be registered to execute at specified times by the + event-loop. + * Decodes CTCP tagging correctly (hopefully); I haven't seen any + other IRC client implementation that handles the CTCP + specification subtilties. + * A kind of simple, single-server, object-oriented IRC client class + that dispatches events to instance methods is included. + * DCC connection support. + +Current limitations: + + * The IRC protocol shines through the abstraction a bit too much. + * Data is not written asynchronously to the server (and DCC peers), + i.e. the write() may block if the TCP buffers are stuffed. + * Like most projects, documentation is lacking... + +Unfortunately, this library isn't as well-documented as I would like +it to be. I think the best way to get started is to read and +understand the example program irccat, which is included in the +distribution. + +The following files might be of interest: + + * irclib.py + + The library itself. Read the code along with comments and + docstrings to get a grip of what it does. Use it at your own risk + and read the source, Luke! + + * irccat + + A simple example of how to use irclib.py. irccat reads text from + stdin and writes it to a specified user or channel on an IRC + server. + + * irccat2 + + The same as above, but using the SimpleIRCClient class. + + * servermap + + Another simple example. servermap connects to an IRC server, + finds out what other IRC servers there are in the net and prints + a tree-like map of their interconnections. + + * testbot.py + + An example bot that uses the SingleServerIRCBot class from + ircbot.py. The bot enters a channel and listens for commands in + private messages or channel traffic. It also accepts DCC + invitations and echos back sent DCC chat messages. + + * dccreceive + + Receives a file over DCC. + + * dccsend + + Sends a file over DCC. + +Enjoy. + +Maintainer: +keltus + +Original Founder: +Joel Rosdahl diff --git a/lib/irclib/dccreceive b/lib/irclib/dccreceive new file mode 100755 index 0000000..5383dbb --- /dev/null +++ b/lib/irclib/dccreceive @@ -0,0 +1,77 @@ +#! /usr/bin/env python +# +# Example program using irclib.py. +# +# This program is free without restrictions; do anything you like with +# it. +# +# Joel Rosdahl + +import irclib +import os +import struct +import sys + +class DCCReceive(irclib.SimpleIRCClient): + def __init__(self): + irclib.SimpleIRCClient.__init__(self) + self.received_bytes = 0 + + def on_ctcp(self, connection, event): + args = event.arguments()[1].split() + if args[0] != "SEND": + return + self.filename = os.path.basename(args[1]) + if os.path.exists(self.filename): + print "A file named", self.filename, + print "already exists. Refusing to save it." + self.connection.quit() + self.file = open(self.filename, "w") + peeraddress = irclib.ip_numstr_to_quad(args[2]) + peerport = int(args[3]) + self.dcc = self.dcc_connect(peeraddress, peerport, "raw") + + def on_dccmsg(self, connection, event): + data = event.arguments()[0] + self.file.write(data) + self.received_bytes = self.received_bytes + len(data) + self.dcc.privmsg(struct.pack("!I", self.received_bytes)) + + def on_dcc_disconnect(self, connection, event): + self.file.close() + print "Received file %s (%d bytes)." % (self.filename, + self.received_bytes) + self.connection.quit() + + def on_disconnect(self, connection, event): + sys.exit(0) + +def main(): + if len(sys.argv) != 3: + print "Usage: dccreceive " + print "\nReceives one file via DCC and then exits. The file is stored in the" + print "current directory." + sys.exit(1) + + s = sys.argv[1].split(":", 1) + server = s[0] + if len(s) == 2: + try: + port = int(s[1]) + except ValueError: + print "Error: Erroneous port." + sys.exit(1) + else: + port = 6667 + nickname = sys.argv[2] + + c = DCCReceive() + try: + c.connect(server, port, nickname) + except irclib.ServerConnectionError, x: + print x + sys.exit(1) + c.start() + +if __name__ == "__main__": + main() diff --git a/lib/irclib/dccsend b/lib/irclib/dccsend new file mode 100755 index 0000000..9debec1 --- /dev/null +++ b/lib/irclib/dccsend @@ -0,0 +1,91 @@ +#! /usr/bin/env python +# +# Example program using irclib.py. +# +# This program is free without restrictions; do anything you like with +# it. +# +# Joel Rosdahl + +import irclib +import os +import struct +import sys + +class DCCSend(irclib.SimpleIRCClient): + def __init__(self, receiver, filename): + irclib.SimpleIRCClient.__init__(self) + self.receiver = receiver + self.filename = filename + self.filesize = os.path.getsize(self.filename) + self.file = open(filename) + self.sent_bytes = 0 + + def on_welcome(self, connection, event): + self.dcc = self.dcc_listen("raw") + self.connection.ctcp("DCC", self.receiver, "SEND %s %s %d %d" % ( + os.path.basename(self.filename), + irclib.ip_quad_to_numstr(self.dcc.localaddress), + self.dcc.localport, + self.filesize)) + + def on_dcc_connect(self, connection, event): + if self.filesize == 0: + self.dcc.disconnect() + return + self.send_chunk() + + def on_dcc_disconnect(self, connection, event): + print "Sent file %s (%d bytes)." % (self.filename, self.filesize) + self.connection.quit() + + def on_dccmsg(self, connection, event): + acked = struct.unpack("!I", event.arguments()[0])[0] + if acked == self.filesize: + self.dcc.disconnect() + self.connection.quit() + elif acked == self.sent_bytes: + self.send_chunk() + + def on_disconnect(self, connection, event): + sys.exit(0) + + def on_nosuchnick(self, connection, event): + print "No such nickname:", event.arguments()[0] + self.connection.quit() + + def send_chunk(self): + data = self.file.read(1024) + self.dcc.privmsg(data) + self.sent_bytes = self.sent_bytes + len(data) + +def main(): + if len(sys.argv) != 5: + print "Usage: dccsend " + print "\nSends to via DCC and then exits." + sys.exit(1) + + s = sys.argv[1].split(":", 1) + server = s[0] + if len(s) == 2: + try: + port = int(s[1]) + except ValueError: + print "Error: Erroneous port." + sys.exit(1) + else: + port = 6667 + nickname = sys.argv[2] + receiver = sys.argv[3] + filename = sys.argv[4] + + c = DCCSend(receiver, filename) + try: + c.connect(server, port, nickname) + except irclib.ServerConnectionError, x: + print x + sys.exit(1) + c.start() + +if __name__ == "__main__": + main() diff --git a/lib/irclib/ircbot.py b/lib/irclib/ircbot.py new file mode 100755 index 0000000..ad47981 --- /dev/null +++ b/lib/irclib/ircbot.py @@ -0,0 +1,438 @@ +# Copyright (C) 1999--2002 Joel Rosdahl +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Joel Rosdahl +# +# $Id: ircbot.py,v 1.21 2005/12/23 18:44:43 keltus Exp $ + +"""ircbot -- Simple IRC bot library. + +This module contains a single-server IRC bot class that can be used to +write simpler bots. +""" + +import sys +from UserDict import UserDict + +from irclib import SimpleIRCClient +from irclib import nm_to_n, irc_lower, all_events +from irclib import parse_channel_modes, is_channel +from irclib import ServerConnectionError + +class SingleServerIRCBot(SimpleIRCClient): + """A single-server IRC bot class. + + The bot tries to reconnect if it is disconnected. + + The bot keeps track of the channels it has joined, the other + clients that are present in the channels and which of those that + have operator or voice modes. The "database" is kept in the + self.channels attribute, which is an IRCDict of Channels. + """ + def __init__(self, server_list, nickname, realname, reconnection_interval=60): + """Constructor for SingleServerIRCBot objects. + + Arguments: + + server_list -- A list of tuples (server, port) that + defines which servers the bot should try to + connect to. + + nickname -- The bot's nickname. + + realname -- The bot's realname. + + reconnection_interval -- How long the bot should wait + before trying to reconnect. + + dcc_connections -- A list of initiated/accepted DCC + connections. + """ + + SimpleIRCClient.__init__(self) + self.channels = IRCDict() + self.server_list = server_list + if not reconnection_interval or reconnection_interval < 0: + reconnection_interval = 2**31 + self.reconnection_interval = reconnection_interval + + self._nickname = nickname + self._realname = realname + for i in ["disconnect", "join", "kick", "mode", + "namreply", "nick", "part", "quit"]: + self.connection.add_global_handler(i, + getattr(self, "_on_" + i), + -10) + def _connected_checker(self): + """[Internal]""" + if not self.connection.is_connected(): + self.connection.execute_delayed(self.reconnection_interval, + self._connected_checker) + self.jump_server() + + def _connect(self): + """[Internal]""" + password = None + if len(self.server_list[0]) > 2: + password = self.server_list[0][2] + try: + self.connect(self.server_list[0][0], + self.server_list[0][1], + self._nickname, + password, + ircname=self._realname) + except ServerConnectionError: + pass + + def _on_disconnect(self, c, e): + """[Internal]""" + self.channels = IRCDict() + self.connection.execute_delayed(self.reconnection_interval, + self._connected_checker) + + def _on_join(self, c, e): + """[Internal]""" + ch = e.target() + nick = nm_to_n(e.source()) + if nick == c.get_nickname(): + self.channels[ch] = Channel() + self.channels[ch].add_user(nick) + + def _on_kick(self, c, e): + """[Internal]""" + nick = e.arguments()[0] + channel = e.target() + + if nick == c.get_nickname(): + del self.channels[channel] + else: + self.channels[channel].remove_user(nick) + + def _on_mode(self, c, e): + """[Internal]""" + modes = parse_channel_modes(" ".join(e.arguments())) + t = e.target() + if is_channel(t): + ch = self.channels[t] + for mode in modes: + if mode[0] == "+": + f = ch.set_mode + else: + f = ch.clear_mode + f(mode[1], mode[2]) + else: + # Mode on self... XXX + pass + + def _on_namreply(self, c, e): + """[Internal]""" + + # e.arguments()[0] == "@" for secret channels, + # "*" for private channels, + # "=" for others (public channels) + # e.arguments()[1] == channel + # e.arguments()[2] == nick list + + ch = e.arguments()[1] + for nick in e.arguments()[2].split(): + if nick[0] == "@": + nick = nick[1:] + self.channels[ch].set_mode("o", nick) + elif nick[0] == "+": + nick = nick[1:] + self.channels[ch].set_mode("v", nick) + self.channels[ch].add_user(nick) + + def _on_nick(self, c, e): + """[Internal]""" + before = nm_to_n(e.source()) + after = e.target() + for ch in self.channels.values(): + if ch.has_user(before): + ch.change_nick(before, after) + + def _on_part(self, c, e): + """[Internal]""" + nick = nm_to_n(e.source()) + channel = e.target() + + if nick == c.get_nickname(): + del self.channels[channel] + else: + self.channels[channel].remove_user(nick) + + def _on_quit(self, c, e): + """[Internal]""" + nick = nm_to_n(e.source()) + for ch in self.channels.values(): + if ch.has_user(nick): + ch.remove_user(nick) + + def die(self, msg="Bye, cruel world!"): + """Let the bot die. + + Arguments: + + msg -- Quit message. + """ + + self.connection.disconnect(msg) + sys.exit(0) + + def disconnect(self, msg="I'll be back!"): + """Disconnect the bot. + + The bot will try to reconnect after a while. + + Arguments: + + msg -- Quit message. + """ + self.connection.disconnect(msg) + + def get_version(self): + """Returns the bot version. + + Used when answering a CTCP VERSION request. + """ + return "ircbot.py by Joel Rosdahl " + + def jump_server(self, msg="Changing servers"): + """Connect to a new server, possibly disconnecting from the current. + + The bot will skip to next server in the server_list each time + jump_server is called. + """ + if self.connection.is_connected(): + self.connection.disconnect(msg) + + self.server_list.append(self.server_list.pop(0)) + self._connect() + + def on_ctcp(self, c, e): + """Default handler for ctcp events. + + Replies to VERSION and PING requests and relays DCC requests + to the on_dccchat method. + """ + if e.arguments()[0] == "VERSION": + c.ctcp_reply(nm_to_n(e.source()), + "VERSION " + self.get_version()) + elif e.arguments()[0] == "PING": + if len(e.arguments()) > 1: + c.ctcp_reply(nm_to_n(e.source()), + "PING " + e.arguments()[1]) + elif e.arguments()[0] == "DCC" and e.arguments()[1].split(" ", 1)[0] == "CHAT": + self.on_dccchat(c, e) + + def on_dccchat(self, c, e): + pass + + def start(self): + """Start the bot.""" + self._connect() + SimpleIRCClient.start(self) + + +class IRCDict: + """A dictionary suitable for storing IRC-related things. + + Dictionary keys a and b are considered equal if and only if + irc_lower(a) == irc_lower(b) + + Otherwise, it should behave exactly as a normal dictionary. + """ + + def __init__(self, dict=None): + self.data = {} + self.canon_keys = {} # Canonical keys + if dict is not None: + self.update(dict) + def __repr__(self): + return repr(self.data) + def __cmp__(self, dict): + if isinstance(dict, IRCDict): + return cmp(self.data, dict.data) + else: + return cmp(self.data, dict) + def __len__(self): + return len(self.data) + def __getitem__(self, key): + return self.data[self.canon_keys[irc_lower(key)]] + def __setitem__(self, key, item): + if key in self: + del self[key] + self.data[key] = item + self.canon_keys[irc_lower(key)] = key + def __delitem__(self, key): + ck = irc_lower(key) + del self.data[self.canon_keys[ck]] + del self.canon_keys[ck] + def __iter__(self): + return iter(self.data) + def __contains__(self, key): + return self.has_key(key) + def clear(self): + self.data.clear() + self.canon_keys.clear() + def copy(self): + if self.__class__ is UserDict: + return UserDict(self.data) + import copy + return copy.copy(self) + def keys(self): + return self.data.keys() + def items(self): + return self.data.items() + def values(self): + return self.data.values() + def has_key(self, key): + return irc_lower(key) in self.canon_keys + def update(self, dict): + for k, v in dict.items(): + self.data[k] = v + def get(self, key, failobj=None): + return self.data.get(key, failobj) + + +class Channel: + """A class for keeping information about an IRC channel. + + This class can be improved a lot. + """ + + def __init__(self): + self.userdict = IRCDict() + self.operdict = IRCDict() + self.voiceddict = IRCDict() + self.modes = {} + + def users(self): + """Returns an unsorted list of the channel's users.""" + return self.userdict.keys() + + def opers(self): + """Returns an unsorted list of the channel's operators.""" + return self.operdict.keys() + + def voiced(self): + """Returns an unsorted list of the persons that have voice + mode set in the channel.""" + return self.voiceddict.keys() + + def has_user(self, nick): + """Check whether the channel has a user.""" + return nick in self.userdict + + def is_oper(self, nick): + """Check whether a user has operator status in the channel.""" + return nick in self.operdict + + def is_voiced(self, nick): + """Check whether a user has voice mode set in the channel.""" + return nick in self.voiceddict + + def add_user(self, nick): + self.userdict[nick] = 1 + + def remove_user(self, nick): + for d in self.userdict, self.operdict, self.voiceddict: + if nick in d: + del d[nick] + + def change_nick(self, before, after): + self.userdict[after] = 1 + del self.userdict[before] + if before in self.operdict: + self.operdict[after] = 1 + del self.operdict[before] + if before in self.voiceddict: + self.voiceddict[after] = 1 + del self.voiceddict[before] + + def set_mode(self, mode, value=None): + """Set mode on the channel. + + Arguments: + + mode -- The mode (a single-character string). + + value -- Value + """ + if mode == "o": + self.operdict[value] = 1 + elif mode == "v": + self.voiceddict[value] = 1 + else: + self.modes[mode] = value + + def clear_mode(self, mode, value=None): + """Clear mode on the channel. + + Arguments: + + mode -- The mode (a single-character string). + + value -- Value + """ + try: + if mode == "o": + del self.operdict[value] + elif mode == "v": + del self.voiceddict[value] + else: + del self.modes[mode] + except KeyError: + pass + + def has_mode(self, mode): + return mode in self.modes + + def is_moderated(self): + return self.has_mode("m") + + def is_secret(self): + return self.has_mode("s") + + def is_protected(self): + return self.has_mode("p") + + def has_topic_lock(self): + return self.has_mode("t") + + def is_invite_only(self): + return self.has_mode("i") + + def has_allow_external_messages(self): + return self.has_mode("n") + + def has_limit(self): + return self.has_mode("l") + + def limit(self): + if self.has_limit(): + return self.modes[l] + else: + return None + + def has_key(self): + return self.has_mode("k") + + def key(self): + if self.has_key(): + return self.modes["k"] + else: + return None diff --git a/lib/irclib/irccat b/lib/irclib/irccat new file mode 100755 index 0000000..6398ae3 --- /dev/null +++ b/lib/irclib/irccat @@ -0,0 +1,64 @@ +#! /usr/bin/env python +# +# Example program using irclib.py. +# +# This program is free without restrictions; do anything you like with +# it. +# +# Joel Rosdahl + +import irclib +import sys + +def on_connect(connection, event): + if irclib.is_channel(target): + connection.join(target) + else: + while 1: + line = sys.stdin.readline() + if not line: + break + connection.privmsg(target, line) + connection.quit("Using irclib.py") + +def on_join(connection, event): + while 1: + line = sys.stdin.readline() + if not line: + break + connection.privmsg(target, line) + connection.quit("Using irclib.py") + +if len(sys.argv) != 4: + print "Usage: irccat " + print "\ntarget is a nickname or a channel." + sys.exit(1) + +def on_disconnect(connection, event): + sys.exit(0) + +s = sys.argv[1].split(":", 1) +server = s[0] +if len(s) == 2: + try: + port = int(s[1]) + except ValueError: + print "Error: Erroneous port." + sys.exit(1) +else: + port = 6667 +nickname = sys.argv[2] +target = sys.argv[3] + +irc = irclib.IRC() +try: + c = irc.server().connect(server, port, nickname) +except irclib.ServerConnectionError, x: + print x + sys.exit(1) + +c.add_global_handler("welcome", on_connect) +c.add_global_handler("join", on_join) +c.add_global_handler("disconnect", on_disconnect) + +irc.process_forever() diff --git a/lib/irclib/irccat2 b/lib/irclib/irccat2 new file mode 100755 index 0000000..fff4458 --- /dev/null +++ b/lib/irclib/irccat2 @@ -0,0 +1,66 @@ +#! /usr/bin/env python +# +# Example program using irclib.py. +# +# This program is free without restrictions; do anything you like with +# it. +# +# Joel Rosdahl + +import irclib +import sys + +class IRCCat(irclib.SimpleIRCClient): + def __init__(self, target): + irclib.SimpleIRCClient.__init__(self) + self.target = target + + def on_welcome(self, connection, event): + if irclib.is_channel(self.target): + connection.join(self.target) + else: + self.send_it() + + def on_join(self, connection, event): + self.send_it() + + def on_disconnect(self, connection, event): + sys.exit(0) + + def send_it(self): + while 1: + line = sys.stdin.readline() + if not line: + break + self.connection.privmsg(self.target, line) + self.connection.quit("Using irclib.py") + +def main(): + if len(sys.argv) != 4: + print "Usage: irccat2 " + print "\ntarget is a nickname or a channel." + sys.exit(1) + + s = sys.argv[1].split(":", 1) + server = s[0] + if len(s) == 2: + try: + port = int(s[1]) + except ValueError: + print "Error: Erroneous port." + sys.exit(1) + else: + port = 6667 + nickname = sys.argv[2] + target = sys.argv[3] + + c = IRCCat(target) + try: + c.connect(server, port, nickname) + except irclib.ServerConnectionError, x: + print x + sys.exit(1) + c.start() + +if __name__ == "__main__": + main() diff --git a/lib/irclib/irclib.py b/lib/irclib/irclib.py new file mode 100755 index 0000000..c072ecc --- /dev/null +++ b/lib/irclib/irclib.py @@ -0,0 +1,1550 @@ +# Copyright (C) 1999--2002 Joel Rosdahl +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# keltus +# +# $Id: irclib.py,v 1.43 2005/12/24 22:12:40 keltus Exp $ + +"""irclib -- Internet Relay Chat (IRC) protocol client library. + +This library is intended to encapsulate the IRC protocol at a quite +low level. It provides an event-driven IRC client framework. It has +a fairly thorough support for the basic IRC protocol, CTCP, DCC chat, +but DCC file transfers is not yet supported. + +In order to understand how to make an IRC client, I'm afraid you more +or less must understand the IRC specifications. They are available +here: [IRC specifications]. + +The main features of the IRC client framework are: + + * Abstraction of the IRC protocol. + * Handles multiple simultaneous IRC server connections. + * Handles server PONGing transparently. + * Messages to the IRC server are done by calling methods on an IRC + connection object. + * Messages from an IRC server triggers events, which can be caught + by event handlers. + * Reading from and writing to IRC server sockets are normally done + by an internal select() loop, but the select()ing may be done by + an external main loop. + * Functions can be registered to execute at specified times by the + event-loop. + * Decodes CTCP tagging correctly (hopefully); I haven't seen any + other IRC client implementation that handles the CTCP + specification subtilties. + * A kind of simple, single-server, object-oriented IRC client class + that dispatches events to instance methods is included. + +Current limitations: + + * The IRC protocol shines through the abstraction a bit too much. + * Data is not written asynchronously to the server, i.e. the write() + may block if the TCP buffers are stuffed. + * There are no support for DCC file transfers. + * The author haven't even read RFC 2810, 2811, 2812 and 2813. + * Like most projects, documentation is lacking... + +.. [IRC specifications] http://www.irchelp.org/irchelp/rfc/ +""" + +import bisect +import re +import select +import socket +import string +import sys +import time +import types + +VERSION = 0, 4, 6 +DEBUG = 0 + +# TODO +# ---- +# (maybe) thread safety +# (maybe) color parser convenience functions +# documentation (including all event types) +# (maybe) add awareness of different types of ircds +# send data asynchronously to the server (and DCC connections) +# (maybe) automatically close unused, passive DCC connections after a while + +# NOTES +# ----- +# connection.quit() only sends QUIT to the server. +# ERROR from the server triggers the error event and the disconnect event. +# dropping of the connection triggers the disconnect event. + +class IRCError(Exception): + """Represents an IRC exception.""" + pass + + +class IRC: + """Class that handles one or several IRC server connections. + + When an IRC object has been instantiated, it can be used to create + Connection objects that represent the IRC connections. The + responsibility of the IRC object is to provide an event-driven + framework for the connections and to keep the connections alive. + It runs a select loop to poll each connection's TCP socket and + hands over the sockets with incoming data for processing by the + corresponding connection. + + The methods of most interest for an IRC client writer are server, + add_global_handler, remove_global_handler, execute_at, + execute_delayed, process_once and process_forever. + + Here is an example: + + irc = irclib.IRC() + server = irc.server() + server.connect(\"irc.some.where\", 6667, \"my_nickname\") + server.privmsg(\"a_nickname\", \"Hi there!\") + irc.process_forever() + + This will connect to the IRC server irc.some.where on port 6667 + using the nickname my_nickname and send the message \"Hi there!\" + to the nickname a_nickname. + """ + + def __init__(self, fn_to_add_socket=None, + fn_to_remove_socket=None, + fn_to_add_timeout=None): + """Constructor for IRC objects. + + Optional arguments are fn_to_add_socket, fn_to_remove_socket + and fn_to_add_timeout. The first two specify functions that + will be called with a socket object as argument when the IRC + object wants to be notified (or stop being notified) of data + coming on a new socket. When new data arrives, the method + process_data should be called. Similarly, fn_to_add_timeout + is called with a number of seconds (a floating point number) + as first argument when the IRC object wants to receive a + notification (by calling the process_timeout method). So, if + e.g. the argument is 42.17, the object wants the + process_timeout method to be called after 42 seconds and 170 + milliseconds. + + The three arguments mainly exist to be able to use an external + main loop (for example Tkinter's or PyGTK's main app loop) + instead of calling the process_forever method. + + An alternative is to just call ServerConnection.process_once() + once in a while. + """ + + if fn_to_add_socket and fn_to_remove_socket: + self.fn_to_add_socket = fn_to_add_socket + self.fn_to_remove_socket = fn_to_remove_socket + else: + self.fn_to_add_socket = None + self.fn_to_remove_socket = None + + self.fn_to_add_timeout = fn_to_add_timeout + self.connections = [] + self.handlers = {} + self.delayed_commands = [] # list of tuples in the format (time, function, arguments) + + self.add_global_handler("ping", _ping_ponger, -42) + + def server(self): + """Creates and returns a ServerConnection object.""" + + c = ServerConnection(self) + self.connections.append(c) + return c + + def process_data(self, sockets): + """Called when there is more data to read on connection sockets. + + Arguments: + + sockets -- A list of socket objects. + + See documentation for IRC.__init__. + """ + for s in sockets: + for c in self.connections: + if s == c._get_socket(): + c.process_data() + + def process_timeout(self): + """Called when a timeout notification is due. + + See documentation for IRC.__init__. + """ + t = time.time() + while self.delayed_commands: + if t >= self.delayed_commands[0][0]: + self.delayed_commands[0][1](*self.delayed_commands[0][2]) + del self.delayed_commands[0] + else: + break + + def process_once(self, timeout=0): + """Process data from connections once. + + Arguments: + + timeout -- How long the select() call should wait if no + data is available. + + This method should be called periodically to check and process + incoming data, if there are any. If that seems boring, look + at the process_forever method. + """ + sockets = map(lambda x: x._get_socket(), self.connections) + sockets = filter(lambda x: x != None, sockets) + if sockets: + (i, o, e) = select.select(sockets, [], [], timeout) + self.process_data(i) + else: + time.sleep(timeout) + self.process_timeout() + + def process_forever(self, timeout=0.2): + """Run an infinite loop, processing data from connections. + + This method repeatedly calls process_once. + + Arguments: + + timeout -- Parameter to pass to process_once. + """ + while 1: + self.process_once(timeout) + + def disconnect_all(self, message=""): + """Disconnects all connections.""" + for c in self.connections: + c.disconnect(message) + + def add_global_handler(self, event, handler, priority=0): + """Adds a global handler function for a specific event type. + + Arguments: + + event -- Event type (a string). Check the values of the + numeric_events dictionary in irclib.py for possible event + types. + + handler -- Callback function. + + priority -- A number (the lower number, the higher priority). + + The handler function is called whenever the specified event is + triggered in any of the connections. See documentation for + the Event class. + + The handler functions are called in priority order (lowest + number is highest priority). If a handler function returns + \"NO MORE\", no more handlers will be called. + """ + + if not event in self.handlers: + self.handlers[event] = [] + bisect.insort(self.handlers[event], ((priority, handler))) + + def remove_global_handler(self, event, handler): + """Removes a global handler function. + + Arguments: + + event -- Event type (a string). + + handler -- Callback function. + + Returns 1 on success, otherwise 0. + """ + if not event in self.handlers: + return 0 + for h in self.handlers[event]: + if handler == h[1]: + self.handlers[event].remove(h) + return 1 + + def execute_at(self, at, function, arguments=()): + """Execute a function at a specified time. + + Arguments: + + at -- Execute at this time (standard \"time_t\" time). + + function -- Function to call. + + arguments -- Arguments to give the function. + """ + self.execute_delayed(at-time.time(), function, arguments) + + def execute_delayed(self, delay, function, arguments=()): + """Execute a function after a specified time. + + Arguments: + + delay -- How many seconds to wait. + + function -- Function to call. + + arguments -- Arguments to give the function. + """ + bisect.insort(self.delayed_commands, (delay+time.time(), function, arguments)) + if self.fn_to_add_timeout: + self.fn_to_add_timeout(delay) + + def dcc(self, dcctype="chat"): + """Creates and returns a DCCConnection object. + + Arguments: + + dcctype -- "chat" for DCC CHAT connections or "raw" for + DCC SEND (or other DCC types). If "chat", + incoming data will be split in newline-separated + chunks. If "raw", incoming data is not touched. + """ + c = DCCConnection(self, dcctype) + self.connections.append(c) + return c + + def _handle_event(self, connection, event): + """[Internal]""" + h = self.handlers + for handler in h.get("all_events", []) + h.get(event.eventtype(), []): + if handler[1](connection, event) == "NO MORE": + return + + def _remove_connection(self, connection): + """[Internal]""" + self.connections.remove(connection) + if self.fn_to_remove_socket: + self.fn_to_remove_socket(connection._get_socket()) + +_rfc_1459_command_regexp = re.compile("^(:(?P[^ ]+) +)?(?P[^ ]+)( *(?P .+))?") + + +class Connection: + """Base class for IRC connections. + + Must be overridden. + """ + def __init__(self, irclibobj): + self.irclibobj = irclibobj + + def _get_socket(): + raise IRCError, "Not overridden" + + ############################## + ### Convenience wrappers. + + def execute_at(self, at, function, arguments=()): + self.irclibobj.execute_at(at, function, arguments) + + def execute_delayed(self, delay, function, arguments=()): + self.irclibobj.execute_delayed(delay, function, arguments) + + +class ServerConnectionError(IRCError): + pass + +class ServerNotConnectedError(ServerConnectionError): + pass + + +# Huh!? Crrrrazy EFNet doesn't follow the RFC: their ircd seems to +# use \n as message separator! :P +_linesep_regexp = re.compile("\r?\n") + +class ServerConnection(Connection): + """This class represents an IRC server connection. + + ServerConnection objects are instantiated by calling the server + method on an IRC object. + """ + + def __init__(self, irclibobj): + Connection.__init__(self, irclibobj) + self.connected = 0 # Not connected yet. + self.socket = None + + def connect(self, server, port, nickname, password=None, username=None, + ircname=None, localaddress="", localport=0): + """Connect/reconnect to a server. + + Arguments: + + server -- Server name. + + port -- Port number. + + nickname -- The nickname. + + password -- Password (if any). + + username -- The username. + + ircname -- The IRC name ("realname"). + + localaddress -- Bind the connection to a specific local IP address. + + localport -- Bind the connection to a specific local port. + + This function can be called to reconnect a closed connection. + + Returns the ServerConnection object. + """ + if self.connected: + self.disconnect("Changing servers") + + self.previous_buffer = "" + self.handlers = {} + self.real_server_name = "" + self.real_nickname = nickname + self.server = server + self.port = port + self.nickname = nickname + self.username = username or nickname + self.ircname = ircname or nickname + self.password = password + self.localaddress = localaddress + self.localport = localport + self.localhost = socket.gethostname() + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + self.socket.bind((self.localaddress, self.localport)) + self.socket.connect((self.server, self.port)) + except socket.error, x: + self.socket.close() + self.socket = None + raise ServerConnectionError, "Couldn't connect to socket: %s" % x + self.connected = 1 + if self.irclibobj.fn_to_add_socket: + self.irclibobj.fn_to_add_socket(self.socket) + + # Log on... + if self.password: + self.pass_(self.password) + self.nick(self.nickname) + self.user(self.username, self.ircname) + return self + + def close(self): + """Close the connection. + + This method closes the connection permanently; after it has + been called, the object is unusable. + """ + + self.disconnect("Closing object") + self.irclibobj._remove_connection(self) + + def _get_socket(self): + """[Internal]""" + return self.socket + + def get_server_name(self): + """Get the (real) server name. + + This method returns the (real) server name, or, more + specifically, what the server calls itself. + """ + + if self.real_server_name: + return self.real_server_name + else: + return "" + + def get_nickname(self): + """Get the (real) nick name. + + This method returns the (real) nickname. The library keeps + track of nick changes, so it might not be the nick name that + was passed to the connect() method. """ + + return self.real_nickname + + def process_data(self): + """[Internal]""" + + try: + new_data = self.socket.recv(2**14) + except socket.error, x: + # The server hung up. + self.disconnect("Connection reset by peer") + return + if not new_data: + # Read nothing: connection must be down. + self.disconnect("Connection reset by peer") + return + + lines = _linesep_regexp.split(self.previous_buffer + new_data) + + # Save the last, unfinished line. + self.previous_buffer = lines[-1] + lines = lines[:-1] + + for line in lines: + if DEBUG: + print "FROM SERVER:", line + + if not line: + continue + + prefix = None + command = None + arguments = None + self._handle_event(Event("all_raw_messages", + self.get_server_name(), + None, + [line])) + + m = _rfc_1459_command_regexp.match(line) + if m.group("prefix"): + prefix = m.group("prefix") + if not self.real_server_name: + self.real_server_name = prefix + + if m.group("command"): + command = m.group("command").lower() + + if m.group("argument"): + a = m.group("argument").split(" :", 1) + arguments = a[0].split() + if len(a) == 2: + arguments.append(a[1]) + + # Translate numerics into more readable strings. + if command in numeric_events: + command = numeric_events[command] + + if command == "nick": + if nm_to_n(prefix) == self.real_nickname: + self.real_nickname = arguments[0] + elif command == "welcome": + # Record the nickname in case the client changed nick + # in a nicknameinuse callback. + self.real_nickname = arguments[0] + + if command in ["privmsg", "notice"]: + target, message = arguments[0], arguments[1] + messages = _ctcp_dequote(message) + + if command == "privmsg": + if is_channel(target): + command = "pubmsg" + else: + if is_channel(target): + command = "pubnotice" + else: + command = "privnotice" + + for m in messages: + if type(m) is types.TupleType: + if command in ["privmsg", "pubmsg"]: + command = "ctcp" + else: + command = "ctcpreply" + + m = list(m) + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, m) + self._handle_event(Event(command, prefix, target, m)) + if command == "ctcp" and m[0] == "ACTION": + self._handle_event(Event("action", prefix, target, m[1:])) + else: + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, [m]) + self._handle_event(Event(command, prefix, target, [m])) + else: + target = None + + if command == "quit": + arguments = [arguments[0]] + elif command == "ping": + target = arguments[0] + else: + target = arguments[0] + arguments = arguments[1:] + + if command == "mode": + if not is_channel(target): + command = "umode" + + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, arguments) + self._handle_event(Event(command, prefix, target, arguments)) + + def _handle_event(self, event): + """[Internal]""" + self.irclibobj._handle_event(self, event) + if event.eventtype() in self.handlers: + for fn in self.handlers[event.eventtype()]: + fn(self, event) + + def is_connected(self): + """Return connection status. + + Returns true if connected, otherwise false. + """ + return self.connected + + def add_global_handler(self, *args): + """Add global handler. + + See documentation for IRC.add_global_handler. + """ + self.irclibobj.add_global_handler(*args) + + def remove_global_handler(self, *args): + """Remove global handler. + + See documentation for IRC.remove_global_handler. + """ + self.irclibobj.remove_global_handler(*args) + + def action(self, target, action): + """Send a CTCP ACTION command.""" + self.ctcp("ACTION", target, action) + + def admin(self, server=""): + """Send an ADMIN command.""" + self.send_raw(" ".join(["ADMIN", server]).strip()) + + def ctcp(self, ctcptype, target, parameter=""): + """Send a CTCP command.""" + ctcptype = ctcptype.upper() + self.privmsg(target, "\001%s%s\001" % (ctcptype, parameter and (" " + parameter) or "")) + + def ctcp_reply(self, target, parameter): + """Send a CTCP REPLY command.""" + self.notice(target, "\001%s\001" % parameter) + + def disconnect(self, message=""): + """Hang up the connection. + + Arguments: + + message -- Quit message. + """ + if not self.connected: + return + + self.connected = 0 + + self.quit(message) + + try: + self.socket.close() + except socket.error, x: + pass + self.socket = None + self._handle_event(Event("disconnect", self.server, "", [message])) + + def globops(self, text): + """Send a GLOBOPS command.""" + self.send_raw("GLOBOPS :" + text) + + def info(self, server=""): + """Send an INFO command.""" + self.send_raw(" ".join(["INFO", server]).strip()) + + def invite(self, nick, channel): + """Send an INVITE command.""" + self.send_raw(" ".join(["INVITE", nick, channel]).strip()) + + def ison(self, nicks): + """Send an ISON command. + + Arguments: + + nicks -- List of nicks. + """ + self.send_raw("ISON " + " ".join(nicks)) + + def join(self, channel, key=""): + """Send a JOIN command.""" + self.send_raw("JOIN %s%s" % (channel, (key and (" " + key)))) + + def kick(self, channel, nick, comment=""): + """Send a KICK command.""" + self.send_raw("KICK %s %s%s" % (channel, nick, (comment and (" :" + comment)))) + + def links(self, remote_server="", server_mask=""): + """Send a LINKS command.""" + command = "LINKS" + if remote_server: + command = command + " " + remote_server + if server_mask: + command = command + " " + server_mask + self.send_raw(command) + + def list(self, channels=None, server=""): + """Send a LIST command.""" + command = "LIST" + if channels: + command = command + " " + ",".join(channels) + if server: + command = command + " " + server + self.send_raw(command) + + def lusers(self, server=""): + """Send a LUSERS command.""" + self.send_raw("LUSERS" + (server and (" " + server))) + + def mode(self, target, command): + """Send a MODE command.""" + self.send_raw("MODE %s %s" % (target, command)) + + def motd(self, server=""): + """Send an MOTD command.""" + self.send_raw("MOTD" + (server and (" " + server))) + + def names(self, channels=None): + """Send a NAMES command.""" + self.send_raw("NAMES" + (channels and (" " + ",".join(channels)) or "")) + + def nick(self, newnick): + """Send a NICK command.""" + self.send_raw("NICK " + newnick) + + def notice(self, target, text): + """Send a NOTICE command.""" + # Should limit len(text) here! + self.send_raw("NOTICE %s :%s" % (target, text)) + + def oper(self, nick, password): + """Send an OPER command.""" + self.send_raw("OPER %s %s" % (nick, password)) + + def part(self, channels, message=""): + """Send a PART command.""" + if type(channels) == types.StringType: + self.send_raw("PART " + channels + (message and (" " + message))) + else: + self.send_raw("PART " + ",".join(channels) + (message and (" " + message))) + + def pass_(self, password): + """Send a PASS command.""" + self.send_raw("PASS " + password) + + def ping(self, target, target2=""): + """Send a PING command.""" + self.send_raw("PING %s%s" % (target, target2 and (" " + target2))) + + def pong(self, target, target2=""): + """Send a PONG command.""" + self.send_raw("PONG %s%s" % (target, target2 and (" " + target2))) + + def privmsg(self, target, text): + """Send a PRIVMSG command.""" + # Should limit len(text) here! + self.send_raw("PRIVMSG %s :%s" % (target, text)) + + def privmsg_many(self, targets, text): + """Send a PRIVMSG command to multiple targets.""" + # Should limit len(text) here! + self.send_raw("PRIVMSG %s :%s" % (",".join(targets), text)) + + def quit(self, message=""): + """Send a QUIT command.""" + # Note that many IRC servers don't use your QUIT message + # unless you've been connected for at least 5 minutes! + self.send_raw("QUIT" + (message and (" :" + message))) + + def sconnect(self, target, port="", server=""): + """Send an SCONNECT command.""" + self.send_raw("CONNECT %s%s%s" % (target, + port and (" " + port), + server and (" " + server))) + + def send_raw(self, string): + """Send raw string to the server. + + The string will be padded with appropriate CR LF. + """ + if self.socket is None: + raise ServerNotConnectedError, "Not connected." + try: + self.socket.send(string + "\r\n") + if DEBUG: + print "TO SERVER:", string + except socket.error, x: + # Ouch! + self.disconnect("Connection reset by peer.") + + def squit(self, server, comment=""): + """Send an SQUIT command.""" + self.send_raw("SQUIT %s%s" % (server, comment and (" :" + comment))) + + def stats(self, statstype, server=""): + """Send a STATS command.""" + self.send_raw("STATS %s%s" % (statstype, server and (" " + server))) + + def time(self, server=""): + """Send a TIME command.""" + self.send_raw("TIME" + (server and (" " + server))) + + def topic(self, channel, new_topic=None): + """Send a TOPIC command.""" + if new_topic is None: + self.send_raw("TOPIC " + channel) + else: + self.send_raw("TOPIC %s :%s" % (channel, new_topic)) + + def trace(self, target=""): + """Send a TRACE command.""" + self.send_raw("TRACE" + (target and (" " + target))) + + def user(self, username, realname): + """Send a USER command.""" + self.send_raw("USER %s 0 * :%s" % (username, realname)) + + def userhost(self, nicks): + """Send a USERHOST command.""" + self.send_raw("USERHOST " + ",".join(nicks)) + + def users(self, server=""): + """Send a USERS command.""" + self.send_raw("USERS" + (server and (" " + server))) + + def version(self, server=""): + """Send a VERSION command.""" + self.send_raw("VERSION" + (server and (" " + server))) + + def wallops(self, text): + """Send a WALLOPS command.""" + self.send_raw("WALLOPS :" + text) + + def who(self, target="", op=""): + """Send a WHO command.""" + self.send_raw("WHO%s%s" % (target and (" " + target), op and (" o"))) + + def whois(self, targets): + """Send a WHOIS command.""" + self.send_raw("WHOIS " + ",".join(targets)) + + def whowas(self, nick, max="", server=""): + """Send a WHOWAS command.""" + self.send_raw("WHOWAS %s%s%s" % (nick, + max and (" " + max), + server and (" " + server))) + + +class DCCConnectionError(IRCError): + pass + + +class DCCConnection(Connection): + """This class represents a DCC connection. + + DCCConnection objects are instantiated by calling the dcc + method on an IRC object. + """ + def __init__(self, irclibobj, dcctype): + Connection.__init__(self, irclibobj) + self.connected = 0 + self.passive = 0 + self.dcctype = dcctype + self.peeraddress = None + self.peerport = None + + def connect(self, address, port): + """Connect/reconnect to a DCC peer. + + Arguments: + address -- Host/IP address of the peer. + + port -- The port number to connect to. + + Returns the DCCConnection object. + """ + self.peeraddress = socket.gethostbyname(address) + self.peerport = port + self.socket = None + self.previous_buffer = "" + self.handlers = {} + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.passive = 0 + try: + self.socket.connect((self.peeraddress, self.peerport)) + except socket.error, x: + raise DCCConnectionError, "Couldn't connect to socket: %s" % x + self.connected = 1 + if self.irclibobj.fn_to_add_socket: + self.irclibobj.fn_to_add_socket(self.socket) + return self + + def listen(self): + """Wait for a connection/reconnection from a DCC peer. + + Returns the DCCConnection object. + + The local IP address and port are available as + self.localaddress and self.localport. After connection from a + peer, the peer address and port are available as + self.peeraddress and self.peerport. + """ + self.previous_buffer = "" + self.handlers = {} + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.passive = 1 + try: + self.socket.bind((socket.gethostbyname(socket.gethostname()), 0)) + self.localaddress, self.localport = self.socket.getsockname() + self.socket.listen(10) + except socket.error, x: + raise DCCConnectionError, "Couldn't bind socket: %s" % x + return self + + def disconnect(self, message=""): + """Hang up the connection and close the object. + + Arguments: + + message -- Quit message. + """ + if not self.connected: + return + + self.connected = 0 + try: + self.socket.close() + except socket.error, x: + pass + self.socket = None + self.irclibobj._handle_event( + self, + Event("dcc_disconnect", self.peeraddress, "", [message])) + self.irclibobj._remove_connection(self) + + def process_data(self): + """[Internal]""" + + if self.passive and not self.connected: + conn, (self.peeraddress, self.peerport) = self.socket.accept() + self.socket.close() + self.socket = conn + self.connected = 1 + if DEBUG: + print "DCC connection from %s:%d" % ( + self.peeraddress, self.peerport) + self.irclibobj._handle_event( + self, + Event("dcc_connect", self.peeraddress, None, None)) + return + + try: + new_data = self.socket.recv(2**14) + except socket.error, x: + # The server hung up. + self.disconnect("Connection reset by peer") + return + if not new_data: + # Read nothing: connection must be down. + self.disconnect("Connection reset by peer") + return + + if self.dcctype == "chat": + # The specification says lines are terminated with LF, but + # it seems safer to handle CR LF terminations too. + chunks = _linesep_regexp.split(self.previous_buffer + new_data) + + # Save the last, unfinished line. + self.previous_buffer = chunks[-1] + if len(self.previous_buffer) > 2**14: + # Bad peer! Naughty peer! + self.disconnect() + return + chunks = chunks[:-1] + else: + chunks = [new_data] + + command = "dccmsg" + prefix = self.peeraddress + target = None + for chunk in chunks: + if DEBUG: + print "FROM PEER:", chunk + arguments = [chunk] + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, arguments) + self.irclibobj._handle_event( + self, + Event(command, prefix, target, arguments)) + + def _get_socket(self): + """[Internal]""" + return self.socket + + def privmsg(self, string): + """Send data to DCC peer. + + The string will be padded with appropriate LF if it's a DCC + CHAT session. + """ + try: + self.socket.send(string) + if self.dcctype == "chat": + self.socket.send("\n") + if DEBUG: + print "TO PEER: %s\n" % string + except socket.error, x: + # Ouch! + self.disconnect("Connection reset by peer.") + +class SimpleIRCClient: + """A simple single-server IRC client class. + + This is an example of an object-oriented wrapper of the IRC + framework. A real IRC client can be made by subclassing this + class and adding appropriate methods. + + The method on_join will be called when a "join" event is created + (which is done when the server sends a JOIN messsage/command), + on_privmsg will be called for "privmsg" events, and so on. The + handler methods get two arguments: the connection object (same as + self.connection) and the event object. + + Instance attributes that can be used by sub classes: + + ircobj -- The IRC instance. + + connection -- The ServerConnection instance. + + dcc_connections -- A list of DCCConnection instances. + """ + def __init__(self): + self.ircobj = IRC() + self.connection = self.ircobj.server() + self.dcc_connections = [] + self.ircobj.add_global_handler("all_events", self._dispatcher, -10) + self.ircobj.add_global_handler("dcc_disconnect", self._dcc_disconnect, -10) + + def _dispatcher(self, c, e): + """[Internal]""" + m = "on_" + e.eventtype() + if hasattr(self, m): + getattr(self, m)(c, e) + + def _dcc_disconnect(self, c, e): + self.dcc_connections.remove(c) + + def connect(self, server, port, nickname, password=None, username=None, + ircname=None, localaddress="", localport=0): + """Connect/reconnect to a server. + + Arguments: + + server -- Server name. + + port -- Port number. + + nickname -- The nickname. + + password -- Password (if any). + + username -- The username. + + ircname -- The IRC name. + + localaddress -- Bind the connection to a specific local IP address. + + localport -- Bind the connection to a specific local port. + + This function can be called to reconnect a closed connection. + """ + self.connection.connect(server, port, nickname, + password, username, ircname, + localaddress, localport) + + def dcc_connect(self, address, port, dcctype="chat"): + """Connect to a DCC peer. + + Arguments: + + address -- IP address of the peer. + + port -- Port to connect to. + + Returns a DCCConnection instance. + """ + dcc = self.ircobj.dcc(dcctype) + self.dcc_connections.append(dcc) + dcc.connect(address, port) + return dcc + + def dcc_listen(self, dcctype="chat"): + """Listen for connections from a DCC peer. + + Returns a DCCConnection instance. + """ + dcc = self.ircobj.dcc(dcctype) + self.dcc_connections.append(dcc) + dcc.listen() + return dcc + + def start(self): + """Start the IRC client.""" + self.ircobj.process_forever() + + +class Event: + """Class representing an IRC event.""" + def __init__(self, eventtype, source, target, arguments=None): + """Constructor of Event objects. + + Arguments: + + eventtype -- A string describing the event. + + source -- The originator of the event (a nick mask or a server). + + target -- The target of the event (a nick or a channel). + + arguments -- Any event specific arguments. + """ + self._eventtype = eventtype + self._source = source + self._target = target + if arguments: + self._arguments = arguments + else: + self._arguments = [] + + def eventtype(self): + """Get the event type.""" + return self._eventtype + + def source(self): + """Get the event source.""" + return self._source + + def target(self): + """Get the event target.""" + return self._target + + def arguments(self): + """Get the event arguments.""" + return self._arguments + +_LOW_LEVEL_QUOTE = "\020" +_CTCP_LEVEL_QUOTE = "\134" +_CTCP_DELIMITER = "\001" + +_low_level_mapping = { + "0": "\000", + "n": "\n", + "r": "\r", + _LOW_LEVEL_QUOTE: _LOW_LEVEL_QUOTE +} + +_low_level_regexp = re.compile(_LOW_LEVEL_QUOTE + "(.)") + +def mask_matches(nick, mask): + """Check if a nick matches a mask. + + Returns true if the nick matches, otherwise false. + """ + nick = irc_lower(nick) + mask = irc_lower(mask) + mask = mask.replace("\\", "\\\\") + for ch in ".$|[](){}+": + mask = mask.replace(ch, "\\" + ch) + mask = mask.replace("?", ".") + mask = mask.replace("*", ".*") + r = re.compile(mask, re.IGNORECASE) + return r.match(nick) + +_special = "-[]\\`^{}" +nick_characters = string.ascii_letters + string.digits + _special +_ircstring_translation = string.maketrans(string.ascii_uppercase + "[]\\^", + string.ascii_lowercase + "{}|~") + +def irc_lower(s): + """Returns a lowercased string. + + The definition of lowercased comes from the IRC specification (RFC + 1459). + """ + return s.translate(_ircstring_translation) + +def _ctcp_dequote(message): + """[Internal] Dequote a message according to CTCP specifications. + + The function returns a list where each element can be either a + string (normal message) or a tuple of one or two strings (tagged + messages). If a tuple has only one element (ie is a singleton), + that element is the tag; otherwise the tuple has two elements: the + tag and the data. + + Arguments: + + message -- The message to be decoded. + """ + + def _low_level_replace(match_obj): + ch = match_obj.group(1) + + # If low_level_mapping doesn't have the character as key, we + # should just return the character. + return _low_level_mapping.get(ch, ch) + + if _LOW_LEVEL_QUOTE in message: + # Yup, there was a quote. Release the dequoter, man! + message = _low_level_regexp.sub(_low_level_replace, message) + + if _CTCP_DELIMITER not in message: + return [message] + else: + # Split it into parts. (Does any IRC client actually *use* + # CTCP stacking like this?) + chunks = message.split(_CTCP_DELIMITER) + + messages = [] + i = 0 + while i < len(chunks)-1: + # Add message if it's non-empty. + if len(chunks[i]) > 0: + messages.append(chunks[i]) + + if i < len(chunks)-2: + # Aye! CTCP tagged data ahead! + messages.append(tuple(chunks[i+1].split(" ", 1))) + + i = i + 2 + + if len(chunks) % 2 == 0: + # Hey, a lonely _CTCP_DELIMITER at the end! This means + # that the last chunk, including the delimiter, is a + # normal message! (This is according to the CTCP + # specification.) + messages.append(_CTCP_DELIMITER + chunks[-1]) + + return messages + +def is_channel(string): + """Check if a string is a channel name. + + Returns true if the argument is a channel name, otherwise false. + """ + return string and string[0] in "#&+!" + +def ip_numstr_to_quad(num): + """Convert an IP number as an integer given in ASCII + representation (e.g. '3232235521') to an IP address string + (e.g. '192.168.0.1').""" + n = long(num) + p = map(str, map(int, [n >> 24 & 0xFF, n >> 16 & 0xFF, + n >> 8 & 0xFF, n & 0xFF])) + return ".".join(p) + +def ip_quad_to_numstr(quad): + """Convert an IP address string (e.g. '192.168.0.1') to an IP + number as an integer given in ASCII representation + (e.g. '3232235521').""" + p = map(long, quad.split(".")) + s = str((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) + if s[-1] == "L": + s = s[:-1] + return s + +def nm_to_n(s): + """Get the nick part of a nickmask. + + (The source of an Event is a nickmask.) + """ + return s.split("!")[0] + +def nm_to_uh(s): + """Get the userhost part of a nickmask. + + (The source of an Event is a nickmask.) + """ + return s.split("!")[1] + +def nm_to_h(s): + """Get the host part of a nickmask. + + (The source of an Event is a nickmask.) + """ + return s.split("@")[1] + +def nm_to_u(s): + """Get the user part of a nickmask. + + (The source of an Event is a nickmask.) + """ + s = s.split("!")[1] + return s.split("@")[0] + +def parse_nick_modes(mode_string): + """Parse a nick mode string. + + The function returns a list of lists with three members: sign, + mode and argument. The sign is \"+\" or \"-\". The argument is + always None. + + Example: + + >>> irclib.parse_nick_modes(\"+ab-c\") + [['+', 'a', None], ['+', 'b', None], ['-', 'c', None]] + """ + + return _parse_modes(mode_string, "") + +def parse_channel_modes(mode_string): + """Parse a channel mode string. + + The function returns a list of lists with three members: sign, + mode and argument. The sign is \"+\" or \"-\". The argument is + None if mode isn't one of \"b\", \"k\", \"l\", \"v\" or \"o\". + + Example: + + >>> irclib.parse_channel_modes(\"+ab-c foo\") + [['+', 'a', None], ['+', 'b', 'foo'], ['-', 'c', None]] + """ + + return _parse_modes(mode_string, "bklvo") + +def _parse_modes(mode_string, unary_modes=""): + """[Internal]""" + modes = [] + arg_count = 0 + + # State variable. + sign = "" + + a = mode_string.split() + if len(a) == 0: + return [] + else: + mode_part, args = a[0], a[1:] + + if mode_part[0] not in "+-": + return [] + for ch in mode_part: + if ch in "+-": + sign = ch + elif ch == " ": + collecting_arguments = 1 + elif ch in unary_modes: + if len(args) >= arg_count + 1: + modes.append([sign, ch, args[arg_count]]) + arg_count = arg_count + 1 + else: + modes.append([sign, ch, None]) + else: + modes.append([sign, ch, None]) + return modes + +def _ping_ponger(connection, event): + """[Internal]""" + connection.pong(event.target()) + +# Numeric table mostly stolen from the Perl IRC module (Net::IRC). +numeric_events = { + "001": "welcome", + "002": "yourhost", + "003": "created", + "004": "myinfo", + "005": "featurelist", # XXX + "200": "tracelink", + "201": "traceconnecting", + "202": "tracehandshake", + "203": "traceunknown", + "204": "traceoperator", + "205": "traceuser", + "206": "traceserver", + "207": "traceservice", + "208": "tracenewtype", + "209": "traceclass", + "210": "tracereconnect", + "211": "statslinkinfo", + "212": "statscommands", + "213": "statscline", + "214": "statsnline", + "215": "statsiline", + "216": "statskline", + "217": "statsqline", + "218": "statsyline", + "219": "endofstats", + "221": "umodeis", + "231": "serviceinfo", + "232": "endofservices", + "233": "service", + "234": "servlist", + "235": "servlistend", + "241": "statslline", + "242": "statsuptime", + "243": "statsoline", + "244": "statshline", + "250": "luserconns", + "251": "luserclient", + "252": "luserop", + "253": "luserunknown", + "254": "luserchannels", + "255": "luserme", + "256": "adminme", + "257": "adminloc1", + "258": "adminloc2", + "259": "adminemail", + "261": "tracelog", + "262": "endoftrace", + "263": "tryagain", + "265": "n_local", + "266": "n_global", + "300": "none", + "301": "away", + "302": "userhost", + "303": "ison", + "305": "unaway", + "306": "nowaway", + "311": "whoisuser", + "312": "whoisserver", + "313": "whoisoperator", + "314": "whowasuser", + "315": "endofwho", + "316": "whoischanop", + "317": "whoisidle", + "318": "endofwhois", + "319": "whoischannels", + "321": "liststart", + "322": "list", + "323": "listend", + "324": "channelmodeis", + "329": "channelcreate", + "331": "notopic", + "332": "currenttopic", + "333": "topicinfo", + "341": "inviting", + "342": "summoning", + "346": "invitelist", + "347": "endofinvitelist", + "348": "exceptlist", + "349": "endofexceptlist", + "351": "version", + "352": "whoreply", + "353": "namreply", + "361": "killdone", + "362": "closing", + "363": "closeend", + "364": "links", + "365": "endoflinks", + "366": "endofnames", + "367": "banlist", + "368": "endofbanlist", + "369": "endofwhowas", + "371": "info", + "372": "motd", + "373": "infostart", + "374": "endofinfo", + "375": "motdstart", + "376": "endofmotd", + "377": "motd2", # 1997-10-16 -- tkil + "381": "youreoper", + "382": "rehashing", + "384": "myportis", + "391": "time", + "392": "usersstart", + "393": "users", + "394": "endofusers", + "395": "nousers", + "401": "nosuchnick", + "402": "nosuchserver", + "403": "nosuchchannel", + "404": "cannotsendtochan", + "405": "toomanychannels", + "406": "wasnosuchnick", + "407": "toomanytargets", + "409": "noorigin", + "411": "norecipient", + "412": "notexttosend", + "413": "notoplevel", + "414": "wildtoplevel", + "421": "unknowncommand", + "422": "nomotd", + "423": "noadmininfo", + "424": "fileerror", + "431": "nonicknamegiven", + "432": "erroneusnickname", # Thiss iz how its speld in thee RFC. + "433": "nicknameinuse", + "436": "nickcollision", + "437": "unavailresource", # "Nick temporally unavailable" + "441": "usernotinchannel", + "442": "notonchannel", + "443": "useronchannel", + "444": "nologin", + "445": "summondisabled", + "446": "usersdisabled", + "451": "notregistered", + "461": "needmoreparams", + "462": "alreadyregistered", + "463": "nopermforhost", + "464": "passwdmismatch", + "465": "yourebannedcreep", # I love this one... + "466": "youwillbebanned", + "467": "keyset", + "471": "channelisfull", + "472": "unknownmode", + "473": "inviteonlychan", + "474": "bannedfromchan", + "475": "badchannelkey", + "476": "badchanmask", + "477": "nochanmodes", # "Channel doesn't support modes" + "478": "banlistfull", + "481": "noprivileges", + "482": "chanoprivsneeded", + "483": "cantkillserver", + "484": "restricted", # Connection is restricted + "485": "uniqopprivsneeded", + "491": "nooperhost", + "492": "noservicehost", + "501": "umodeunknownflag", + "502": "usersdontmatch", +} + +generated_events = [ + # Generated events + "dcc_connect", + "dcc_disconnect", + "dccmsg", + "disconnect", + "ctcp", + "ctcpreply", +] + +protocol_events = [ + # IRC protocol events + "error", + "join", + "kick", + "mode", + "part", + "ping", + "privmsg", + "privnotice", + "pubmsg", + "pubnotice", + "quit", + "invite", + "pong", +] + +all_events = generated_events + protocol_events + numeric_events.values() diff --git a/lib/irclib/python-irclib.spec b/lib/irclib/python-irclib.spec new file mode 100755 index 0000000..7c9953b --- /dev/null +++ b/lib/irclib/python-irclib.spec @@ -0,0 +1,68 @@ +Summary: A set of Python modules for IRC support. +Name: python-irclib +Version: 0.4.6 +Release: 1 +Group: Development/Libraries +License: LGPL +URL: http://python-irclib.sourceforge.net +Source: %{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-root +Requires: python +BuildPrereq: python +BuildArch: noarch + +%description +This library is intended to encapsulate the IRC protocol at a quite +low level. It provides an event-driven IRC client framework. It has +a fairly thorough support for the basic IRC protocol, CTCP and DCC +connections. + +%prep +%setup -q +chmod 644 * + +%build +python -c "import py_compile; py_compile.compile('irclib.py')" +python -c "import py_compile; py_compile.compile('ircbot.py')" + +%install +[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT +%{__mkdir_p} $RPM_BUILD_ROOT/usr/lib/python1.5/site-packages +%{__install} -m 644 irclib.py* $RPM_BUILD_ROOT/usr/lib/python1.5/site-packages +%{__install} -m 644 ircbot.py* $RPM_BUILD_ROOT/usr/lib/python1.5/site-packages + +%clean +[ "$RPM_BUILD_ROOT" != "/" ] && rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%doc README ChangeLog COPYING irccat irccat2 servermap testbot.py dccsend dccreceive +/usr/lib/python*/site-packages/* + +%changelog +* Sat Dec 24 2005 Keltus 0.4.6-1 +- upgraded to 0.4.6 + +* Wed May 18 2005 Keltus 0.4.5-1 +- upgraded to 0.4.5 + +* Wed Feb 23 2005 Keltus 0.4.4-1 +- upgraded to 0.4.4 + +* Sun Jan 19 2005 Joel Rosdahl 0.4.3-1 +- upgraded to 0.4.3 + +* Fri Jul 9 2004 Joel Rosdahl 0.4.2-1 +- upgraded to 0.4.2 + +* Thu Oct 30 2003 Joel Rosdahl 0.4.1-1 +- upgraded to 0.4.1 + +* Mon Sep 1 2002 Gary Benson 0.4.0-1 +- upgraded to 0.4.0 + +* Wed Feb 20 2002 Gary Benson 0.3.4-1 +- upgraded to 0.3.4 + +* Wed Feb 20 2002 Gary Benson 0.3.3-1 +- initial revision diff --git a/lib/irclib/servermap b/lib/irclib/servermap new file mode 100755 index 0000000..f897f56 --- /dev/null +++ b/lib/irclib/servermap @@ -0,0 +1,164 @@ +#! /usr/bin/env python +# +# Example program using irclib.py. +# +# Copyright (C) 1999-2002 Joel Rosdahl +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Joel Rosdahl +# +# servermap connects to an IRC server and finds out what other IRC +# servers there are in the net and prints a tree-like map of their +# interconnections. +# +# Example: +# +# % ./servermap irc.dal.net somenickname +# Connecting to server... +# Getting links... +# +# 26 servers (18 leaves and 8 hubs) +# +# splitrock.tx.us.dal.net +# `-vader.ny.us.dal.net +# |-twisted.ma.us.dal.net +# |-sodre.nj.us.dal.net +# |-glass.oh.us.dal.net +# |-distant.ny.us.dal.net +# | |-algo.se.eu.dal.net +# | | |-borg.se.eu.dal.net +# | | | `-ced.se.eu.dal.net +# | | |-viking.no.eu.dal.net +# | | |-inco.fr.eu.dal.net +# | | |-paranoia.se.eu.dal.net +# | | |-gaston.se.eu.dal.net +# | | | `-powertech.no.eu.dal.net +# | | `-algo-u.se.eu.dal.net +# | |-philly.pa.us.dal.net +# | |-liberty.nj.us.dal.net +# | `-jade.va.us.dal.net +# `-journey.ca.us.dal.net +# |-ion.va.us.dal.net +# |-dragons.ca.us.dal.net +# |-toronto.on.ca.dal.net +# | `-netropolis-r.uk.eu.dal.net +# | |-traced.de.eu.dal.net +# | `-lineone.uk.eu.dal.net +# `-omega.ca.us.dal.net + +import irclib +import sys + +if len(sys.argv) != 3: + print "Usage: servermap " + sys.exit(1) + +links = [] + +def on_connect(connection, event): + sys.stdout.write("\nGetting links...") + sys.stdout.flush() + connection.links() + +def on_passwdmismatch(connection, event): + print "Password required." + sys.exit(1) + +def on_links(connection, event): + global links + + links.append((event.arguments()[0], + event.arguments()[1], + event.arguments()[2])) + +def on_endoflinks(connection, event): + global links + + print "\n" + + m = {} + for (to_node, from_node, desc) in links: + if from_node != to_node: + m[from_node] = m.get(from_node, []) + [to_node] + + if connection.get_server_name() in m: + if len(m[connection.get_server_name()]) == 1: + hubs = len(m) - 1 + else: + hubs = len(m) + else: + hubs = 0 + + print "%d servers (%d leaves and %d hubs)\n" % (len(links), len(links)-hubs, hubs) + + print_tree(0, [], connection.get_server_name(), m) + connection.quit("Using irclib.py") + +def on_disconnect(connection, event): + sys.exit(0) + +def indent_string(level, active_levels, last): + if level == 0: + return "" + s = "" + for i in range(level-1): + if i in active_levels: + s = s + "| " + else: + s = s + " " + if last: + s = s + "`-" + else: + s = s + "|-" + return s + +def print_tree(level, active_levels, root, map, last=0): + sys.stdout.write(indent_string(level, active_levels, last) + + root + "\n") + if root in map: + list = map[root] + for r in list[:-1]: + print_tree(level+1, active_levels[:]+[level], r, map) + print_tree(level+1, active_levels[:], list[-1], map, 1) + +s = sys.argv[1].split(":", 1) +server = s[0] +if len(s) == 2: + try: + port = int(s[1]) + except ValueError: + print "Error: Erroneous port." + sys.exit(1) +else: + port = 6667 +nickname = sys.argv[2] + +irc = irclib.IRC() +sys.stdout.write("Connecting to server...") +sys.stdout.flush() +try: + c = irc.server().connect(server, port, nickname) +except irclib.ServerConnectionError, x: + print x + sys.exit(1) + +c.add_global_handler("welcome", on_connect) +c.add_global_handler("passwdmismatch", on_passwdmismatch) +c.add_global_handler("links", on_links) +c.add_global_handler("endoflinks", on_endoflinks) +c.add_global_handler("disconnect", on_disconnect) + +irc.process_forever() diff --git a/lib/irclib/setup.py b/lib/irclib/setup.py new file mode 100755 index 0000000..4fc6f7a --- /dev/null +++ b/lib/irclib/setup.py @@ -0,0 +1,9 @@ +#! /usr/bin/env python + +from distutils.core import setup +setup(name="python-irclib", + version="0.4.6", + py_modules=["irclib", "ircbot"], + author="Joel Rosdahl", + author_email="joel@rosdahl.net", + url="http://python-irclib.sourceforge.net") diff --git a/lib/irclib/testbot.py b/lib/irclib/testbot.py new file mode 100755 index 0000000..6dd6034 --- /dev/null +++ b/lib/irclib/testbot.py @@ -0,0 +1,118 @@ +#! /usr/bin/env python +# +# Example program using ircbot.py. +# +# Joel Rosdahl + +"""A simple example bot. + +This is an example bot that uses the SingleServerIRCBot class from +ircbot.py. The bot enters a channel and listens for commands in +private messages and channel traffic. Commands in channel messages +are given by prefixing the text by the bot name followed by a colon. +It also responds to DCC CHAT invitations and echos data sent in such +sessions. + +The known commands are: + + stats -- Prints some channel information. + + disconnect -- Disconnect the bot. The bot will try to reconnect + after 60 seconds. + + die -- Let the bot cease to exist. + + dcc -- Let the bot invite you to a DCC CHAT connection. +""" + +from ircbot import SingleServerIRCBot +from irclib import nm_to_n, nm_to_h, irc_lower, ip_numstr_to_quad, ip_quad_to_numstr + +class TestBot(SingleServerIRCBot): + def __init__(self, channel, nickname, server, port=6667): + SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname) + self.channel = channel + + def on_nicknameinuse(self, c, e): + c.nick(c.get_nickname() + "_") + + def on_welcome(self, c, e): + c.join(self.channel) + + def on_privmsg(self, c, e): + self.do_command(e, e.arguments()[0]) + + def on_pubmsg(self, c, e): + a = e.arguments()[0].split(":", 1) + if len(a) > 1 and irc_lower(a[0]) == irc_lower(self.connection.get_nickname()): + self.do_command(e, a[1].strip()) + return + + def on_dccmsg(self, c, e): + c.privmsg("You said: " + e.arguments()[0]) + + def on_dccchat(self, c, e): + if len(e.arguments()) != 2: + return + args = e.arguments()[1].split() + if len(args) == 4: + try: + address = ip_numstr_to_quad(args[2]) + port = int(args[3]) + except ValueError: + return + self.dcc_connect(address, port) + + def do_command(self, e, cmd): + nick = nm_to_n(e.source()) + c = self.connection + + if cmd == "disconnect": + self.disconnect() + elif cmd == "die": + self.die() + elif cmd == "stats": + for chname, chobj in self.channels.items(): + c.notice(nick, "--- Channel statistics ---") + c.notice(nick, "Channel: " + chname) + users = chobj.users() + users.sort() + c.notice(nick, "Users: " + ", ".join(users)) + opers = chobj.opers() + opers.sort() + c.notice(nick, "Opers: " + ", ".join(opers)) + voiced = chobj.voiced() + voiced.sort() + c.notice(nick, "Voiced: " + ", ".join(voiced)) + elif cmd == "dcc": + dcc = self.dcc_listen() + c.ctcp("DCC", nick, "CHAT chat %s %d" % ( + ip_quad_to_numstr(dcc.localaddress), + dcc.localport)) + else: + c.notice(nick, "Not understood: " + cmd) + +def main(): + import sys + if len(sys.argv) != 4: + print "Usage: testbot " + sys.exit(1) + + s = sys.argv[1].split(":", 1) + server = s[0] + if len(s) == 2: + try: + port = int(s[1]) + except ValueError: + print "Error: Erroneous port." + sys.exit(1) + else: + port = 6667 + channel = sys.argv[2] + nickname = sys.argv[3] + + bot = TestBot(channel, nickname, server, port) + bot.start() + +if __name__ == "__main__": + main() diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/modules/echobox.py b/modules/echobox.py new file mode 100755 index 0000000..dd1b58e --- /dev/null +++ b/modules/echobox.py @@ -0,0 +1,28 @@ +"""An echobox is a device that, upon feeding it an input, stores it and +outputs a previously submitted input. +Usage: !echobox input""" + +import random +import urllib +import urllib2 + +def echobox(nick,channel,tenquestionmarks,input): + """The echobox command submits some text into an "echobox", or + a collection of quotes. After doing so, it outputs a random + quote from that colleciton.""" + + if input == None or input == "": + return tenquestionmarks.html("Echobox error: You need to type something after !echobox, because I'm too lazy to come up with something for you.") + + result = "" + if "web_echobox_url" in tenquestionmarks.config()["echobox"]: + service_target = "%s?%s" % (tenquestionmarks.config()["echobox"]["web_echobox_url"],urllib.urlencode({"input": input, "nick": nick, "chan": channel})) + result = urllib2.urlopen(service_target).read() + else: + echobox = tenquestionmarks.get_json("echobox.json") + if "echoes" not in echobox: + echobox["echoes"] = [] + echobox["echoes"].append(input) + tenquestionmarks.put_json("echobox.json",echobox) + result = random.choice(echobox["echoes"]) + return tenquestionmarks.html("Echobox: %s" % (result)) diff --git a/modules/eightball.py b/modules/eightball.py new file mode 100755 index 0000000..a4232be --- /dev/null +++ b/modules/eightball.py @@ -0,0 +1,7 @@ +"""Ask the eight-ball a yes/no question, and it will give you a reasonable answer. +Usage: !eightball question""" + +import random + +def eightball(nick,channel,tenquestionmarks,question): + return tenquestionmarks.html("Eightball: %s" % (random.choice(tenquestionmarks.config()["eightball"]["responses"]))) diff --git a/modules/help.py b/modules/help.py new file mode 100755 index 0000000..998fcf9 --- /dev/null +++ b/modules/help.py @@ -0,0 +1,65 @@ +"""Help system for this tenquestionmarks-based bot. +Usage: + - !tqmhelp -> display an overview of all modules + - !tqmhelp module -> display help for a specific module + - !tqmhelp module command -> display help for a specific command +""" + +import types +import inspect + +def tqmhelp(nick,channel,tenquestionmarks,module,command=None): + output = [] + bnick = tenquestionmarks.config()["nick"] + if module == "": + output.append("%s help index\n" % (bnick)) + output.append("--------------------------------\n") + output.append("In this bot, commands are grouped into modules.\n") + output.append("For help with a specific module, type !tqmhelp followed by \n") + output.append("the name of the module.\n") + output.append("This bot contains the following modules:\n") + output.append("--------------------------------\n") + for submod in tenquestionmarks.modules(): + output.append("%s\n" % (submod)) + submodobj = tenquestionmarks.modules()[submod] + if not submodobj.__doc__ == None: + output.append(submodobj.__doc__) + output.append("\n--------------------------------\n") + elif not command == None: + try: + modobj = tenquestionmarks.modules()[module] + except KeyError: + return tenquestionmarks.html("Help error: No module named %s" % (module)) + if not hasattr(modobj,command): + return tenquestionmarks.html("Help error: No command %s in module %s" % (command,module)) + output.append("%s help for command %s.%s\n" % (bnick, module, command)) + output.append("--------------------------------\n") + commandobj = getattr(modobj,command) + output.append("Command %s.%s\n" % (module, command)) + argspec = inspect.getargspec(commandobj) + del argspec.args[0] + del argspec.args[0] + del argspec.args[0] + output.append("Usage: !%s.%s %s\n" % (module, command, " ".join(argspec.args))) + if not commandobj.__doc__ == None: + output.append(commandobj.__doc__) + else: + try: + modobj = tenquestionmarks.modules()[module] + except KeyError: + return tenquestionmarks.html("Help error: No module named %s" % (module)) + output.append("%s help for module %s\n" % (bnick, module)) + output.append("--------------------------------\n") + for var in vars(modobj): + varvalue = vars(modobj)[var] + if not (var.startswith("on_") or var.startswith("_")) and isinstance(varvalue,types.FunctionType): + output.append("Command %s.%s\n" % (module, var)) + argspec = inspect.getargspec(varvalue) + del argspec.args[0] + del argspec.args[0] + del argspec.args[0] + output.append("Usage: !%s.%s %s\n" % (module, var, " ".join(argspec.args))) + if not varvalue.__doc__ == None: + output.append(varvalue.__doc__) + output.append("\n--------------------------------\n") + return tenquestionmarks.html("".join(output)) diff --git a/modules/rss.py b/modules/rss.py new file mode 100755 index 0000000..1df5239 --- /dev/null +++ b/modules/rss.py @@ -0,0 +1,57 @@ +"""RSS feed aggregator. This module contains no user-facing commands.""" + +import sys +import os +import threading +import traceback +import feedparser + +def on_connected(tenquestionmarks): + _rss_loop(tenquestionmarks,tenquestionmarks.config()["rss"]["frequency"]) + +def _rss_loop(tenquestionmarks, frequency=900.0): + try: + old_entries_file = os.path.join(tenquestionmarks.directory(),"old-feed-entries") + FILE = open(old_entries_file, "r") + filetext = FILE.read() + FILE.close() + except IOError: + filetext = "" + open(old_entries_file, "w").close() + + filetext = filetext.decode("UTF-8") + for feed in tenquestionmarks.config()["rss"]["feeds"]: + feedname = "" + if isinstance(tenquestionmarks.config()["rss"]["feeds"],dict): + feedname = feed + feed = tenquestionmarks.config()["rss"]["feeds"][feed] + + NextFeed = False + tenquestionmarks.log("refresh","Refreshing feed %s" % (feed)) + d = feedparser.parse(feed) + for entry in d.entries: + title = entry.title + try: + title = title.encode("ascii") + except UnicodeEncodeError, uee: + title = tenquestionmarks.degrade_to_ascii(title) + except UnicodeDecodeError, ude: + title = tenquestionmarks.degrade_to_ascii(title) + if title in filetext: + tenquestionmarks.log("refresh","Old entry: %s" % (title)) + NextFeed = True + else: + FILE = open(old_entries_file, "a") + try: + FILE.write(title + u"\n") + except Exception, e: + traceback.print_exc(file=sys.stdout) + tenquestionmarks.log("Error","%s %s" % (e,title)) + FILE.close() + tenquestionmarks.queue(tenquestionmarks.config()["rss"]["format"] % ({"title": title, "link": entry.link, "feedname": feedname})) + if NextFeed: + break + + def refresher(): _rss_loop(tenquestionmarks,frequency) + t = threading.Timer(frequency, refresher) # TODO: make this static + t.start() diff --git a/project.py b/project.py new file mode 100755 index 0000000..8748299 --- /dev/null +++ b/project.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +from monarchpass.beedrill import Project, Action, Listener, do + +class Tenquestionmarks(Project): + name = "Tenquestionmarks" + author = "Adrian Malacoda" + + @Action( + title="Hello World", + command="hello" + ) + def hello(self,person="world"): + print "Hello {person}!".format(person=person) + + hidave = do( + action="hello", + command="hidave", + kwargs={"person": "Dave"} + ) + + @Action( + title="Goodbye World", + command="goodbye", + prerequisites=["hello"], + clean_directory=False + ) + def goodbye(self): + print "Bye!" + + @Listener(before="goodbye") + def goodbyeListener(self): + print "Listened to goodbye!" \ No newline at end of file diff --git a/tenquestionmarks.py b/tenquestionmarks.py new file mode 100755 index 0000000..81e4242 --- /dev/null +++ b/tenquestionmarks.py @@ -0,0 +1,267 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +tenquestionmarks is an extensible, modular IRC bot. + +This file is governed by the following license: +Copyright (c) 2011 Adrian Malacoda, Monarch Pass + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License, version 3, as published by +the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Based on the IRC bot at: +http://kstars.wordpress.com/2009/09/05/a-python-irc-bot-for-keeping-up-with-arxiv-or-any-rss-feed/ +Copyright 2009 by Akarsh Simha +""" + +import sys +import os +import threading +import time +import optparse +import json +import traceback +import datetime +import inspect +import re + +for libdir in os.listdir(os.path.join(os.path.dirname(__file__),"lib")): + sys.path.append(os.path.join(os.path.dirname(__file__),"lib",libdir)) + +import irclib +from monarchpass import butterfree + +class Tenquestionmarks(butterfree.Base,irclib.SimpleIRCClient): + VERSION_NAME = "Tenquestionmarks" + VERSION_NUMBER = "0.16.15" + + def __init__(self): + irclib.SimpleIRCClient.__init__(self) + self.msgqueue = [] + butterfree.Base.__init__(self,"tenquestionmarks") + + def config(self): + conf = butterfree.Base.config(self) + if "port" not in conf: + conf["port"] = 6667 + return conf + + def _dispatcher(self,connection,event): + irclib.SimpleIRCClient._dispatcher(self,connection,event) + self.log(event.eventtype(),"%s from %s to %s" % (event.arguments(), event.source(), event.target())) + if not self.modules() == {}: + method = "on_" + event.eventtype() + self.dispatch(method,self,event) + + def version(self): + """Creates the version string that is returned from a CTCP version + request. This can be overridden.""" + return "%s %s" % (Tenquestionmarks.VERSION_NAME, Tenquestionmarks.VERSION_NUMBER) + + def connect(self): + """Connects to the IRC network given in the constructor and joins a + set of channels. + + When this method completes, the on_connected event is fired for modules + to handle.""" + + irclib.SimpleIRCClient.connect(self,self.config()["host"], self.config()["port"], self.config()["nick"], None, self.config()["nick"], ircname=self.config()["nick"]) + if "password" in self.config(): + self.server.privmsg("NickServ", "identify %s" % (self.config()["password"])) + for channel in self.config()["channels"]: + self.connection.join(channel) + self.dispatch("on_connected",self) + + def loop(self): + """Starts the IRC bot's loop.""" + + while 1: + while len(self.msgqueue) > 0: + msg = self.msgqueue.pop() + for channel in self.config()["channels"]: + self.log("post","Posting queued message %s" % (msg)) + try: + self.connection.privmsg(channel, msg) + except Exception, e: + traceback.print_exc(file=sys.stdout) + self.log("Error","%s %s" % (e,msg)) + time.sleep(1) # TODO: Fix bad code + self.ircobj.process_once() + time.sleep(1) # So that we don't hog the CPU! + + def queue(self, message): + """Adds a message to the end of the bot's message queue.""" + self.msgqueue.append(message.encode("UTF-8")) + + def on_ctcp(self, connection, event): + """Event handler for CTCP messages. If the CTCP message is a version request, + a version string is returned.""" + + self.log("ctcp","%s from %s" % (event.arguments(), event.source())) + ctcptype = event.arguments()[0] + if ctcptype == "VERSION": + self.connection.ctcp_reply(event.source().split("!")[0],self.version()) + + def on_privmsg(self, connection, event): + """Event handler for messages sent directly to the bot. These + are always treated as commands.""" + + message = event.arguments()[0] + nick = event.source().split("!")[0] + try: + (command, arg) = message.split(" ",1) + except ValueError: + command = message + arg = "" + self.log("cmd","%s called command %s with arg %s" % (nick,command,arg)) + self.call_command(nick, None, command, arg) + + def on_pubmsg(self, connection, event): + """Event handler for messages sent to a channel in which the + bot resides. If the message starts with a certain string, the + message is treated as a command.""" + + message = event.arguments()[0] + nick = event.source().split("!")[0] + channel = event.target() + if message.startswith(self.command_prefix): + try: + (command, arg) = message[1:].split(" ",1) + except ValueError: + command = message[1:] + arg = "" + self.log("cmd","%s called command %s in %s with arg %s" % (nick,command,channel,arg)) + self.call_command(nick, channel, command, arg) + + def call_command(self, nick, channel, command, arg): + """Calls a command defined in one or more modules. This method is + called indirectly through IRC messages sent from a user, who may + optionally be sending that message publicly in a channel. + + The argument is a single string. Depending on how many arguments + the command takes, this string may be broken up into a number + of strings separated by spaces. + + The return value of the command is output either back to the + user or to the channel in which the command was invoked.""" + + command_functions = butterfree.find_functions(self.modules(),command) + if len(command_functions) > 0: + for func in command_functions: + argspec = inspect.getargspec(func) + numargs = len(argspec.args) - 3 + varargs = not argspec.varargs == None + args = [] + if varargs: + args = arg.split(" ") + else: + args = arg.split(" ",numargs - 1) + args.insert(0,self) + args.insert(0,channel) + args.insert(0,nick) + try: + returnvalue = func(*args) + if not returnvalue == None: + if not channel == None: + self.multiline_privmsg(channel, returnvalue) + else: + self.multiline_privmsg(nick, returnvalue) + except Exception, e: + traceback.print_exc(file=sys.stdout) + self.log("Error","Command %s caused an %s" % (command, e)) + + def degrade_to_ascii(self,string): + """In order to allow as wide a range of inputs as possible, if + a Unicode string cannot be decoded, it can instead be run through + this function, which will change "fancy quotes" to regular quotes + and remove diacritics and accent marks, among other things. + + To doubly sure that the string is safe for ASCII, any non-ASCII + characters are then removed after any conversion is done.""" + + chars = { + u"’": "'", + u"‘": "'", + u"“": '"', + u"”": '"', + u"Æ": "AE", + u"À": "A", + u"Á": "A", + u"Â": "A", + u"Ã": "A", + u"Ä": "A", + u"Å": "A", + u"Ç": "C", + u"È": "E", + u"É": "E", + u"Ê": "E", + u"Ë": "E", + u"Ì": "I", + u"Í": "I", + u"Î": "I", + u"Ï": "I", + u"Ð": "D", + u"Ñ": "N", + u"Ò": "O", + u"Ó": "O", + u"Ô": "O", + u"Õ": "O", + u"Ö": "O", + u"Ø": "O", + u"Ù": "U", + u"Ú": "U", + u"Û": "U", + u"Ü": "U", + u"Ý": "Y", + u"Þ": "Th", + u"ß": "S", + u"–": "-" + } + for char in chars: + string = string.replace(char,chars[char]).replace(char.lower(),chars[char].lower()) + + # Strip away anything non-ascii that remains + string = "".join([char for char in string if ord(char) < 128]) + return string + def html(self,string): + """Basic parser that converts between a very limited subset of + HTML and IRC control codes. + + Note that this parser is very strict about the font tag. It expects + either color or color and bgcolor, but not bgcolor alone. It also wants + both color and bgcolor to be quoted and separated by no more or less than one space.""" + + tags = [ + [re.compile("(.+?)"),"\x02%(0)s\x02"], + [re.compile("(.+?)"),"\x1f%(0)s\x1f"], + [re.compile("(.+?)"),"\x03%(0)s%(1)s\x03"], + [re.compile("(.+?)"),"\x03%(0)s,%(1)s%(2)s\x03"], + [re.compile("(.+?)"),"\x03%(1)s,%(0)s%2(2)s\x03"] + ] + for (regex,replacement) in tags: + regex_match = regex.search(string) + while regex_match is not None: + groups_dict = {} + for i in xrange(len(regex_match.groups())): + groups_dict[str(i)] = regex_match.groups()[i] + + string = string.replace(regex_match.group(0), replacement % groups_dict) + regex_match = regex.search(string) + return string + def multiline_privmsg(self, target, message): + for line in message.split("\n"): + self.connection.privmsg(target, line) + +if __name__ == "__main__": + tqm = Tenquestionmarks() + tqm.connect() + tqm.loop() diff --git a/tenquestionmarks.py.old b/tenquestionmarks.py.old new file mode 100755 index 0000000..8bc70ef --- /dev/null +++ b/tenquestionmarks.py.old @@ -0,0 +1,383 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +tenquestionmarks is an extensible, modular IRC bot. + +This file is governed by the following license: +Copyright (c) 2011 Adrian Malacoda, Monarch Pass + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License, version 3, as published by +the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Based on the IRC bot at: +http://kstars.wordpress.com/2009/09/05/a-python-irc-bot-for-keeping-up-with-arxiv-or-any-rss-feed/ +Copyright 2009 by Akarsh Simha +""" + +import sys +import os +import threading +import time +import optparse +import json +import traceback +import datetime +import inspect +import re + +for libdir in os.listdir(os.path.join(os.path.dirname(__file__),"lib")): + sys.path.append(os.path.join(os.path.dirname(__file__),"lib",libdir)) + +import irclib + +class Tenquestionmarks(irclib.SimpleIRCClient): + VERSION_NAME = "Tenquestionmarks" + VERSION_NUMBER = "0.13.23" + + def __init__(self,hostname, channels, nick, command_prefix="!", password=None, port=6667, directory=None): + irclib.SimpleIRCClient.__init__(self) + self.hostname = hostname + self.port = port + self.channel_list = channels + self.nick = nick + self.password = password + if directory == None: + directory = os.path.join(os.environ.get("HOME"),".tenquestionmarks") + self.directory = directory + try: + os.makedirs(directory) + except OSError: pass + self.msgqueue = [] + self.logfile = open(os.path.join(self.directory,"log.log"),"w") + self.command_prefix = command_prefix + self.config = self.get_json("options.json") + self.modules = {} + + def load_modules(self,modules): + """Loads a set of modules given by name. The modules reside in the + modules subdirectory and so are prefixed with the string "module." """ + self.modules = {} + for module in modules: + self.log("module","Loading module %s" % (module)) + try: + self.modules[module] = getattr(__import__("modules.%s" % (module)),module) + self.log("module","%s loaded" % (module)) + except ImportError: + traceback.print_exc(file=sys.stdout) + self.log("module","No module named %s" % (module)) + + def _dispatcher(self,connection,event): + irclib.SimpleIRCClient._dispatcher(self,connection,event) + self.log(event.eventtype(),"%s from %s to %s" % (event.arguments(), event.source(), event.target())) + if self.modules: + method = "on_" + event.eventtype() + self._fire_method_call(method,self,event) + + def _fire_method_call(self,method,*args): + if self.modules: + for module in self.modules: + module = self.modules[module] + if hasattr(module, method): + getattr(module, method)(*args) + + def version(self): + """Creates the version string that is returned from a CTCP version + request. This can be overridden.""" + return "%s %s" % (Tenquestionmarks.VERSION_NAME, Tenquestionmarks.VERSION_NUMBER) + + def connect(self): + """Connects to the IRC network given in the constructor and joins a + set of channels. + + When this method completes, the on_connected event is fired for modules + to handle.""" + + irclib.SimpleIRCClient.connect(self,self.hostname, self.port, self.nick, None, self.nick, ircname=self.nick) + if not self.password == None: + self.server.privmsg("NickServ", "identify %s" % (self.nick)) + for channel in self.channel_list: + self.connection.join(channel) + self._fire_method_call("on_connected",self) + + def loop(self): + """Starts the IRC bot's loop.""" + + while 1: + while len(self.msgqueue) > 0: + msg = self.msgqueue.pop() + for channel in self.channel_list: + self.log("post","Posting queued message %s" % (msg)) + try: + self.connection.privmsg(channel, msg) + except Exception, e: + traceback.print_exc(file=sys.stdout) + self.log("Error","%s %s" % (e,msg)) + time.sleep(1) # TODO: Fix bad code + self.ircobj.process_once() + time.sleep(1) # So that we don't hog the CPU! + + def queue(self, message): + """Adds a message to the end of the bot's message queue.""" + self.msgqueue.append(message.encode("UTF-8")) + + def on_ctcp(self, connection, event): + """Event handler for CTCP messages. If the CTCP message is a version request, + a version string is returned.""" + + self.log("ctcp","%s from %s" % (event.arguments(), event.source())) + ctcptype = event.arguments()[0] + if ctcptype == "VERSION": + self.connection.ctcp_reply(event.source().split("!")[0],self.version()) + + def on_privmsg(self, connection, event): + """Event handler for messages sent directly to the bot. These + are always treated as commands.""" + + message = event.arguments()[0] + nick = event.source().split("!")[0] + try: + (command, arg) = message.split(" ",1) + except ValueError: + command = message + arg = "" + self.log("cmd","%s called command %s with arg %s" % (nick,command,arg)) + self.call_command(nick, None, command, arg) + + def on_pubmsg(self, connection, event): + """Event handler for messages sent to a channel in which the + bot resides. If the message starts with a certain string, the + message is treated as a command.""" + + message = event.arguments()[0] + nick = event.source().split("!")[0] + channel = event.target() + if message.startswith(self.command_prefix): + try: + (command, arg) = message[1:].split(" ",1) + except ValueError: + command = message[1:] + arg = "" + self.log("cmd","%s called command %s in %s with arg %s" % (nick,command,channel,arg)) + self.call_command(nick, channel, command, arg) + + def call_command(self, nick, channel, command, arg): + """Calls a command defined in one or more modules. This method is + called indirectly through IRC messages sent from a user, who may + optionally be sending that message publicly in a channel. + + The argument is a single string. Depending on how many arguments + the command takes, this string may be broken up into a number + of strings separated by spaces. + + The return value of the command is output either back to the + user or to the channel in which the command was invoked.""" + + command_functions = self.resolve_command(command) + if len(command_functions) > 0: + for func in command_functions: + argspec = inspect.getargspec(func) + numargs = len(argspec.args) - 3 + varargs = not argspec.varargs == None + args = [] + if varargs: + args = arg.split(" ") + else: + args = arg.split(" ",numargs - 1) + args.insert(0,self) + args.insert(0,channel) + args.insert(0,nick) + try: + returnvalue = func(*args) + if not returnvalue == None: + if not channel == None: + self.multiline_privmsg(channel, returnvalue) + else: + self.multiline_privmsg(nick, returnvalue) + except Exception, e: + traceback.print_exc(file=sys.stdout) + self.log("Error","Command %s caused an %s" % (command, e)) + + def resolve_command(self, cmdname): + """Given a command name, traverses through the modules loaded into + the bot and finds functions that match the command name. + + If given a command name by itself, this method will look through + all modules for the command. If the command name is given in + the form [module].[command], only the given module is considered.""" + + funcs = [] + module = "" + if "." in cmdname: + (module, cmdname) = cmdname.split(".") + self.log("cmd","%s is located in module %s" % (cmdname, module)) + modobj = self.modules[module] + if hasattr(modobj,cmdname): + func = getattr(modobj,cmdname) + funcs.append(func) + else: + self.log("cmd","%s is not looking for a specific module" % (cmdname)) + for modname in self.modules: + modobj = self.modules[modname] + if hasattr(modobj,cmdname): + func = getattr(modobj,cmdname) + funcs.append(func) + return funcs + + def log(self, operation, message): + """Logs a message onto standard out and into a file in the bot's directory.""" + + try: + logmessage = u"[%s] %s: %s" % (datetime.datetime.now(), operation.upper(), message) + print logmessage + self.logfile.write(logmessage) + except Exception, e: + traceback.print_exc(file=sys.stdout) + + def get_json(self,filename): + """Retrieves a JSON object (Python dictionary) from a file in this bot's directory. + + If the file does not exist, one is created and an empty dictionary + is returned.""" + + try: + target = open(os.path.join(options.directory,filename)) + obj = json.loads(target.read()) + target.close() + return obj + except IOError: + self.put_json(filename, {}) + return {} + + def put_json(self,filename,obj): + """Saves a JSON object (Python dictionary) as a file in this bot's directory.""" + + target = open(os.path.join(options.directory,filename),"w") + target.write(json.dumps(obj)) + target.close() + def degrade_to_ascii(self,string): + """In order to allow as wide a range of inputs as possible, if + a Unicode string cannot be decoded, it can instead be run through + this function, which will change "fancy quotes" to regular quotes + and remove diacritics and accent marks, among other things. + + To doubly sure that the string is safe for ASCII, any non-ASCII + characters are then removed after any conversion is done.""" + + chars = { + u"’": "'", + u"‘": "'", + u"“": '"', + u"”": '"', + u"Æ": "AE", + u"À": "A", + u"Á": "A", + u"Â": "A", + u"Ã": "A", + u"Ä": "A", + u"Å": "A", + u"Ç": "C", + u"È": "E", + u"É": "E", + u"Ê": "E", + u"Ë": "E", + u"Ì": "I", + u"Í": "I", + u"Î": "I", + u"Ï": "I", + u"Ð": "D", + u"Ñ": "N", + u"Ò": "O", + u"Ó": "O", + u"Ô": "O", + u"Õ": "O", + u"Ö": "O", + u"Ø": "O", + u"Ù": "U", + u"Ú": "U", + u"Û": "U", + u"Ü": "U", + u"Ý": "Y", + u"Þ": "Th", + u"ß": "S", + u"–": "-" + } + for char in chars: + string = string.replace(char,chars[char]).replace(char.lower(),chars[char].lower()) + + # Strip away anything non-ascii that remains + string = "".join([char for char in string if ord(char) < 128]) + return string + def html(self,string): + """Basic parser that converts between a very limited subset of + HTML and IRC control codes. + + Note that this parser is very strict about the font tag. It expects + either color or color and bgcolor, but not bgcolor alone. It also wants + both color and bgcolor to be quoted and separated by no more or less than one space.""" + + tags = [ + [re.compile("(.+?)"),"\x02%(0)s\x02"], + [re.compile("(.+?)"),"\x1f%(0)s\x1f"], + [re.compile("(.+?)"),"\x03%(0)s%(1)s\x03"], + [re.compile("(.+?)"),"\x03%(0)s,%(1)s%(2)s\x03"], + [re.compile("(.+?)"),"\x03%(1)s,%(0)s%2(2)s\x03"] + ] + for (regex,replacement) in tags: + regex_match = regex.search(string) + while regex_match is not None: + groups_dict = {} + for i in xrange(len(regex_match.groups())): + groups_dict[str(i)] = regex_match.groups()[i] + + string = string.replace(regex_match.group(0), replacement % groups_dict) + regex_match = regex.search(string) + return string + def multiline_privmsg(self, target, message): + for line in message.split("\n"): + self.connection.privmsg(target, line) + +if __name__ == "__main__": + optparser = optparse.OptionParser() + optparser.add_option("-n", "--nick", action="store", type="string", dest="nick", help="IRC bot nick") + optparser.add_option("-s", "--host", action="store", type="string", dest="host", help="Hostname of the IRC server") + optparser.add_option("-d", "--directory", action="store", type="string", dest="directory", default=os.path.join(os.environ.get("HOME"),".tenquestionmarks"), + help="Directory where the bot stores things (default: ~/.tenquestionmarks)") + optparser.add_option("-p", "--password", action="store", type="string", dest="password", help="Nickserv password") + optparser.add_option("-c", "--channels", action="store", type="string", dest="channels", help="Comma-separated list of channels to join") + optparser.add_option("-m", "--modules", action="store", type="string", dest="modules", help="Comma-separated names of modules to load") + optparser.add_option("-r", "--command-prefix", action="store", type="string", dest="commandprefix", help="Prefix put in front of user commands (default: !)") + (options, args) = optparser.parse_args() + conf_file = open(os.path.join(options.directory,"options.json")) + conf = json.loads(conf_file.read()) + conf_file.close() + + if options.nick == None and "nick" in conf: + options.nick = conf["nick"] + if options.password == None and "password" in conf: + options.password = conf["password"] + if options.channels == None and "channels" in conf: + options.channels = conf["channels"] + else: + options.channels = options.channels.split(",") + if options.modules == None and "modules" in conf: + options.modules = conf["modules"] + elif not options.modules == None: + options.modules = options.modules.split(",") + if options.host == None and "host" in conf: + options.host = conf["host"] + if options.commandprefix == None and "commandprefix" in conf: + options.commandprefix = conf["commandprefix"] + tqm = Tenquestionmarks(options.host, options.channels, options.nick, options.commandprefix, options.password) + if not options.modules == None: + tqm.load_modules(options.modules) + tqm.connect() + tqm.loop()