Wikipedia:Nokkakala/lähdekoodi

Replacement filing cabinet.svg Tämä sivu on arkisto. Älä muokkaa tätä sivua.

#!/usr/bin/env python
# Nagano - Parse IP addresses from IRC and resolve the attached hostname
#
# This work is in the public domain; Santtu Pajukanta, the original author,
# hereby disclaims all copyright and/or IP interests on this program.
# Blah blah blah bla blha blah blah blah blah blal bla blal blah blah
# Blah blah blah bla blha blah blah blah blah blal bla blal phks blah
# Blah blah blah bla blha blah blah blah blah blal bla blal blah blah
# Blah blah blah bla blha blah blah blah blah blal bla blal blah blah

from twisted.protocols import irc
from twisted.internet import reactor, protocol
from twisted.names import client
from twisted.web.client import getPage

from urllib import urlencode

from re import compile, sub, match
from xml.dom import minidom

import socket
import time

import os
pidfile = "wha.pid"
fhandle = open (pidfile, 'w')
print >> fhandle, os.getpid()
fhandle.close ()


def normalizeLine(line):
  line = toUnicode(line)
  line = sub("\x02|\x16|\x1F|\x0F","",line)
  line = sub(k + r"\d?\d?,\d?\d?","",line)
  line = sub(k + r"\d?\d?","",line)
  return line

# Guess wheter it is utf-8 or iso-8859-15, fail if something else
def toUnicode(line):
  try:
    return line.decode('utf-8')
  except:
    try:
      return line.decode('iso-8859-15')
    except:
      return line


ipRe  = compile(r".*?((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)).*?")
whole   = compile("^\[\[(.+)\]\] (M?N?) (http[^ ]+) \* ([^*]+) \* (.*)$")
logLine = compile("^\[\[(.+)\]\] ([a-z]+)() +\* ([^*]+) \* +(.*)$")
textRe  = compile(".*<text>(.*)</text>.*")
fetchRe = compile("!fetch (.*)")
lenRe   = compile("!len (.*)")
k = u"\x03"
b = u"\x02"

strings = {
  "a" : "b",
}




class Nagano:

  def __init__(self):
    Nagano.resolve = 1;
    Nagano.fetch = 1;
    Nagano.fetchURL = 'http://fi.$1.org/w/index.php?'
    Nagano.iChans = "#fi.wikipedia,#fi.wikibooks,#fi.wikiquote,#fi.wikinews,#fi.wikisource"
    Nagano.oChans = "#wikipedia-fi-rc"
    Nagano.iNick = "nokkakala"
    Nagano.oNick = "nokkakala"
    Nagano.lineRate = 0.5
    self.iPort = 6667;
    self.oPort = 6667;

    self.oFact = Nagano.NaganoFactory("output",self);
    self.iFact = Nagano.NaganoFactory("input",self);

    reactor.connectTCP("irc.wikimedia.org", self.iPort, self.iFact)
    reactor.connectTCP("irc.freenode.net", self.oPort, self.oFact)
    reactor.run()

  class InputNagano(irc.IRCClient):
    def __init__(self):
      self.nickname = Nagano.iNick
      self.lineRate = Nagano.lineRate

    def signedOn(self):
      self.join(Nagano.iChans)
      Nagano.inn = self

    def sayline(self, line):
      if Nagano.out:
       Nagano.out.sayline(line)


    def privmsg(self, user, channel, message):
      # No styles and in unicode
      msg = normalizeLine(message)
      data = {}

#      if channel not in Nagano.iChans.split(','): return

      if   channel == "#fi.wikipedia":
        data["pref"] = ""
      elif channel == "#fi.wikinews":
        data["pref"] = "n:"
      elif channel == "#fi.wikibooks":
        data["pref"] = "b:"
      elif channel == "#fi.wikiquote":
        data["pref"] = "q:"
      elif channel == "#fi.wikisource":
        data["pref"] = "s:"
      else:
        data["pref"] = "bogus:"

      prefix = data["pref"]

      mtc = logLine.match(msg)
      if mtc:
        data["type"] = "log/" + mtc.group(2)
        data["page"] = mtc.group(1)
        data["flag"] = ""
        data["url" ] = ""
        data["user"] = mtc.group(4)
        data["text"] = mtc.group(5)

      else:
        # 0 whole 1 article 2 flags 3 url 4 who 5 comments
        mtc = whole.match(msg)
        if mtc:
          data["type"] = "page"
          data["page"] = mtc.group(1)
          data["flag"] = mtc.group(2)
          data["url" ] = mtc.group(3)
          data["user"] = mtc.group(4)
          data["text"] = mtc.group(5)
        else:
          print "Got no match"
          return

      # New and not minor page, still fall trough
      if data["flag"] == "N":
        Nagano.out.fetchPage(prefix + data["page"].encode('utf-8'),1)

      # Logged in edit
      if not ipRe.match(data["user"]):
        self.sayline(Nagano.out.sayclean(data))
        return

      # Try to resolve the ip address
      addr = ipRe.match(mtc.group(4)).group(1)
      ptr = '.'.join(addr.split('.')[::-1]) + '.in-addr.arpa'

      # Do the asynchronous DNS query
      d = client.lookupPointer(ptr)

      # Set callbacks
      d.addCallback(self.sayResolved, prefix + mtc.group(1), mtc.group(2) + "R", mtc.group(3), addr, mtc.group(5))
      d.addErrback(self.handleResolveError, prefix + mtc.group(1), mtc.group(2) + "R", mtc.group(3), addr, mtc.group(5))


    def lineReceived(self, line):
      timestamp = time.strftime("[%H:%M:%S]", time.localtime(time.time()))
      print timestamp + " " + line
      Nagano.inn = self
      irc.IRCClient.lineReceived(self, line)


    def sayResolved(self, dns, article, flags, url, who, comments):
      print dns
      self.sayline(Nagano.out.saycolor(article, flags, url, who + " (%s)" % dns[0][0].payload.name, comments))


    def handleResolveError(self, err, article, flags, url, who, comments):
      self.sayline(Nagano.out.saycolor(article, flags, url, who, comments))



  class OutputNagano(irc.IRCClient):
    def __init__(self):
      self.nickname = Nagano.oNick
      Nagano.put = None


    def signedOn(self):
      # Join the target channel
      self.join(Nagano.oChans)

      # Keep the thing alive
      #self.join("#en.wikipedia")

      # Sailyta topic vanhalla kanavalla
      #self.join("#firk.wikipedia")

      # err
      #self.say(Nagano.oChans, "I'm so retarted I should kill you right there")

      # Keek up
      Nagano.out = self


    def sayline(self, line):
      self.say(Nagano.oChans, line);

    # this is new one!!
    def sayclean(self, data):
      for i in ('page', 'user', 'flag', 'text', 'url'):
        if data[i].strip() == "":
          data[i] = ""
        else:
          data[i] = data[i].strip() + " "

      data["url"] = sub(r'''http://(.+)/w/index.php\?title=.+&diff=([0-9]+)&oldid=([0-9]+)&rcid=([0-9]+)''', r'''http://\1/wiki/?diff=\2&oldid=\3&rcid=\4''', data["url"])

      if data["type"] == "log/newusers":
        msg = k + u"04" + b + data["type"] + b + k + u"10" + data["user"]
      else:
        msg = k + u"03" + data["pref"] + data["page"] + k + u"04" + b + data["flag"] + b + k + u"10" +\
          data["user"] + k + u"15" + data["text"] + k + u"14" + data["url"]


      # Output in UTF-8?
      try:
        return msg.encode('UTF-8')
        #return msg.encode('iso-8859-15','replace')
      except:
        return "DIE DIE DIE IN SAY"


    def saycolor(self, article, flags, url, who, comments):
      article = article.strip() + " "
      if flags.strip() == "":
        flags = ""
      else:
        flags = flags.strip() + " "

      if url.strip() == "":
        url = ""
      else:
        url = url.strip() + " "

      if who.strip() == "":
        who = ""
      else:
        who = who.strip() + " "

      if comments.strip() == "":
        comments = ""
      else:
        comments = comments.strip() + " "

      # Colors and stuff
      msg = k + u"03" + article + k + u"04" + b + flags + b + k + u"10" + who + k + u"15" + comments + k + u"14" + url
      # Output in iso-8859-15
      try:
        return msg.encode('iso-8859-15','replace')
      except:
        return "DIE DIE DIE IN SAY"


    def privmsg(self, user, channel, message):
      # No styles and in unicode
      msg = normalizeLine(message)

      if message == "!quit" and user == "ilaiho!i=MULLEEISPAMMATA":
        reactor.stop()
        return

      if message == "!tryfix":
        self.quit("Testing")
        if Nagano.inn:
          Nagano.inn.quit("Testing")
        return


      # Fetch command?
      if match ("!fetch (.*)",msg):
        page = fetchRe.match(msg).group(1)
        self.fetchPage(page.encode('utf-8'),1)
        return

      # Len command?
      if match ("!len (.*)",msg):
        page = lenRe.match(msg).group(1)
        self.fetchPage(page.encode('utf-8'),0)
        return


    def lineReceived(self, line):
      # Keep up2date
      Nagano.out = self
      timestamp = time.strftime("[%H:%M:%S]", time.localtime(time.time()))
      print timestamp + " " + line
      irc.IRCClient.lineReceived(self, line)



    def fetchPage(self, page, fetch):
      self.fetchPageReal(page).addCallback(self.gotPage, page, fetch).addErrback(self.handleFetchError, page)


    def fetchPageReal(self, page):
      # Normalize the shit
      items = page.split(':')
      finalpage = page.split(':')
      domain = 'wikipedia'

      for item in items[:-1]:
        if item not in ('', 'w', 'n', 'wikt', 'b', 'q', 's'): break
        finalpage = finalpage[1:]
        if item == '' or item == 'w': domain = 'wikipedia'
        if item == 'n': domain = 'wikinews'
        if item == 'wikt': domain = 'wiktionary'
        if item == 'b': domain = 'wikibooks'
        if item == 'q': domain = 'wikiquote'
        if item == 's': domain = 'wikisource'

      finalpage = ':'.join(finalpage)
      url = Nagano.fetchURL.replace('$1', domain)
      url += urlencode([('title',finalpage),('action','raw')])
      return getPage(url)


    def gotPage(self, data, page, snippet):
      data = data.decode("utf-8").replace("\n", u"\xB6")
      length = len(data);
      # remove wikilinks to fit more content
      data = sub('\[\[([^\x5d]+?)\|([^\x5d]+)\]\]', '\\2', data)
      data = sub('\[\[([^\x5d]+?)\]\]', '\\1', data)
      data= data[:380]
      page = page.decode('utf-8').replace("_"," ")

      if snippet:
        # Does it fit?
        if len(data) == 380: data += "..."
        # Say it
        self.sayline(self.saycolor(page, "F", "", str(length), data))
      else:
        self.say(Nagano.oChans, "Page %s length is %s" % (page.encode('iso-8859-15','replace'), length))


    def handleFetchError(self, err, page):
      self.say(Nagano.oChans, "Could not fetch page %s: %s" % (page.decode('utf-8').encode('iso-8859-15','replace'), err.value))

    def connectionLost(self, reason):
#      Nagano.out = None
      irc.IRCClient.connectionLost(self,reason)



  class NaganoFactory(protocol.ReconnectingClientFactory):
    def __init__(self, put, host):
      self.dire = put;
      self.host = host;
      if put == "input":
        self.protocol = Nagano.InputNagano

      if put == "output":
        self.protocol = Nagano.OutputNagano



if __name__ == "__main__":
  Nagano()