974 lines
32 KiB
Python
974 lines
32 KiB
Python
#!/soft/python-2.3.4/bin/python
|
|
|
|
# webTunnel.py. A simple mail tunnel to get web pages.
|
|
# Copyright (C) 2004 David Soulayrol <dsoulayrol@free.fr>
|
|
#
|
|
# webTunnel.py is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
#
|
|
#
|
|
# 2004-08-27 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Client class now holds a name and an address.
|
|
# * Now using email package from standard library.
|
|
# * Changed XML DTD so that Configuration object doesn't have to
|
|
# keep a parsing context anymore. Validation is up to the parser,
|
|
# if a validating parser is available.
|
|
# * Release 0.2
|
|
#
|
|
# 2004-08-20 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Implemented webtunnel-contact to authorize anyone to mail admins.
|
|
# * Implemented cleanup method into WebtunnelRequest objects.
|
|
# * Merged RequestHolder class code into Processor class.
|
|
# * Long lines can now be splitted with \ character at each end of line.
|
|
# * WebtunnelError now sends back the unknown request in its content.
|
|
# * Factorized Wget code into __startWget.
|
|
# * Updated docstrings. Help text is now this module docstring.
|
|
#
|
|
# 2004-08-13 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Implemented webtunnel-announce request.
|
|
# * Implemented webtunnel-code request.
|
|
# * Modified search order to specify the number of answers.
|
|
# * Added the LSEARCH order to specify more parameters,
|
|
# currently the number of answers.
|
|
# * Moved the authentication system into WebtunnelRequest objects.
|
|
#
|
|
# 2004-08-05 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Commented out wget traces in body to fool Interscan filter.
|
|
# * Added quotes around the URL in wget command (just in case).
|
|
# * Added options for cookie management in wget command line.
|
|
#
|
|
# 2004-05-10 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Now keep and send back wget traces in response body (with popen).
|
|
# * Updated docstrings and help text.
|
|
# * Now using introspection (with eval()) to manage requests.
|
|
# See RequestHolder.execute().
|
|
#
|
|
# 2004-05-05 Karine Lamie
|
|
# * Added the SEARCH order which launches a search request to google
|
|
# in a simple form (takes only words in parameter and no symbol like
|
|
# +, -, ...). In a second time, this command could take in parameter
|
|
# the name of search engine (google.fr, google.com, altavista, yahoo...).
|
|
#
|
|
# 2004-05-04 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Added --useragent option to wget to fool the Google redirection.
|
|
#
|
|
# 2004-04-30 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Moved tunnel mail address and SMTP server into configuration file.
|
|
# * Slighly modified the request system to understand
|
|
# more than one command via the subject.
|
|
# * Added the webtunnel-help command and its answer.
|
|
#
|
|
# 2004-04-22 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Use pyGround mail wrapper.
|
|
# * Use pyGround XML parsing to externalize authorized users.
|
|
# * The returned Zip file doesn't contains the temporary folder anymore.
|
|
# * Cosmetic modification on the returned body.
|
|
# * Fixed bug when two blanks follow the command GET.
|
|
#
|
|
# 2004-03-21 David Soulayrol <dsoulayrol@free.fr>
|
|
# * Release 0.1.
|
|
#
|
|
|
|
"""webTunnel.py retrieves web pages on requests transmitted by mail.
|
|
|
|
Webtunnel.py reads requests from subject lines of the mails it
|
|
receives. If the request name is correct and its sender authorized
|
|
to ask for it, the request is treated. This can involve parsing
|
|
the body of the mail, but not necessary.
|
|
|
|
The program recognizes the following requests:
|
|
|
|
webtunnel-announce
|
|
Availability: admins
|
|
If this request is emitted by an administrator, then the
|
|
body content is forwarded to all the users, otherwise an
|
|
error is returned to the sender.
|
|
|
|
webtunnel-contact
|
|
Availability: anyone
|
|
This request allows anybody to send a message to all the
|
|
administrators.
|
|
|
|
webtunnel-code
|
|
Availability: anyone
|
|
Returns the complete code of the program. You can
|
|
redistribute it, with or without modifications, as long
|
|
as you follow the GNU Public Licence.
|
|
|
|
webtunnel-get
|
|
Disponiblity: admins and users
|
|
The server analyses the body content to determine the
|
|
orders to execute. It is possible to specify one or more
|
|
of the following orders in one mail:
|
|
|
|
GET <url>
|
|
The url is passed to wget.
|
|
|
|
SEARCH <keyword> [<keyword>...]
|
|
A Google request is built from the given keywords and
|
|
is passed to wget.
|
|
|
|
LSEARCH <number> <keyword> [<keyword>...]
|
|
A Google request is built from the keywords and the
|
|
given number of results and is passed to wget.
|
|
|
|
The result of all the orders is sent back packed in
|
|
a ZIP archive.
|
|
|
|
webtunnel-help
|
|
Availability: anyone
|
|
The program returns this mail.
|
|
|
|
|
|
Be aware that all your requests can be logged for debug purpose.
|
|
"""
|
|
|
|
import logging, os, shutil, sys, smtplib, time, zipfile
|
|
import email, email.Utils, email.Encoders
|
|
|
|
from email.MIMEMultipart import MIMEMultipart
|
|
from email.MIMEBase import MIMEBase
|
|
from email.MIMEText import MIMEText
|
|
|
|
from ground.util import log
|
|
from ground.xml.parsers import XmlBootstrapParser
|
|
from ground.xml.consumers import SAXConsumer, RootSAXConsumer
|
|
|
|
#
|
|
# Global definitions so as to keep the rest a bit abstract
|
|
#
|
|
|
|
VERSION = '0.2 (27/08/2004)'
|
|
|
|
# Wget interface
|
|
WGET_COMMAND = '/soft/wget-1.9.1/bin/wget'
|
|
WGET_OPTIONS = ' --user-agent=%s -nv -E -x -p -k \'%s\' 2>&1'
|
|
WGET_COOKIES = ' --cookies=on --load-cookies %s --save-cookies %s'
|
|
WGET_USER_AGENT = 'Mozilla/5.0'
|
|
|
|
# Program installation
|
|
HOME = '/home/dsoulayr'
|
|
CONFIGURATION_FILE = 'webtunnel.conf'
|
|
PROGRAM_FILE = 'webtunnel-0.2.py'
|
|
TRACE_FILE = '/home/dsoulayr/log/webtunnel.log'
|
|
|
|
# Mail attachments
|
|
PART_TEXT = 'text/plain'
|
|
# This one modified to get through Thalhes filter
|
|
#PART_ZIP = 'application/x-zip-compressed'
|
|
PART_ZIP = 'application/x-tunnel-compressed'
|
|
|
|
|
|
class WebTunnelException(Exception):
|
|
"""A generic WebTunnel-related error.
|
|
"""
|
|
|
|
|
|
class UnauthorizedAccessException(WebTunnelException):
|
|
"""Raised when an unauthorized client sent a request.
|
|
"""
|
|
|
|
|
|
class BadRequestException(WebTunnelException):
|
|
"""Raised when a valid client sent a bad request.
|
|
"""
|
|
|
|
|
|
class ConfigurationException(Exception):
|
|
"""An exception related to configuration parsing.
|
|
"""
|
|
|
|
|
|
class InvalidAddressException(ConfigurationException):
|
|
"""Raised when an invalid instance of Client class is built.
|
|
|
|
This happens when one try to build a new Client instance
|
|
without the aAddress field, of from an invalid RFC822 address.
|
|
"""
|
|
|
|
|
|
class Client(object):
|
|
"""A wrapper for client well-formed address.
|
|
|
|
An RFC822 address is composed of a name and an address. The address
|
|
is mandatory here. One should use the class method FromRFC822Header
|
|
to build a Client instance from a To: or From: mail header field.
|
|
"""
|
|
def __init__(self, aName, aAddress):
|
|
"""The constructor.
|
|
|
|
A Client instance is built from a name and an address. The
|
|
address is mandatory.
|
|
"""
|
|
if not type(aName) is str and not type(aName) is unicode:
|
|
raise InvalidAddressException('aName must be str or unicode')
|
|
if not type(aAddress) is str and not type(aAddress) is unicode:
|
|
raise InvalidAddressException('aAddress must be str or unicode')
|
|
|
|
if len(aAddress) == 0:
|
|
raise InvalidAddressException('aAddress agrument is mandatory')
|
|
|
|
self.__name = aName.strip()
|
|
self.__address = aAddress.strip()
|
|
|
|
|
|
def getName(self):
|
|
"""Return this client name.
|
|
"""
|
|
return self.__name
|
|
|
|
|
|
def getAddress(self):
|
|
"""Return this client address.
|
|
"""
|
|
return self.__address
|
|
|
|
|
|
def __str__(self):
|
|
"""Return this Client as a valid RFC822 address.
|
|
"""
|
|
return self.__name + ' <' + self.__address + '>'
|
|
|
|
|
|
def __eq__(self, aAddress):
|
|
"""Redefine equal operator.
|
|
|
|
aAddress can be a Client instance or a string. In the first
|
|
case, return true if this instance has the same address than
|
|
the other Client instance. Else, return trus if the address
|
|
of this instance equals the string.
|
|
"""
|
|
if type(aAddress) is str:
|
|
return str(self.__address).__eq__(aAddress)
|
|
elif type(aAddress) is Client:
|
|
return str(self.__address).__eq__(aAddress.getAddress())
|
|
else:
|
|
raise TypeError('type of aAddress should be str or Client')
|
|
|
|
|
|
def FromRFC822Header(cls, aRFC822Address):
|
|
"""Build a new Client instance from the given RFC822 address.
|
|
"""
|
|
lName, lAddress = email.Utils.parseaddr(aRFC822Address)
|
|
return cls(lName, lAddress)
|
|
FromRFC822Header = classmethod(FromRFC822Header)
|
|
|
|
|
|
|
|
# Request Handlers
|
|
class WebtunnelRequest(object):
|
|
"""The mother class of all request handlers.
|
|
|
|
A request handler is an object able to authenticate a user (or client),
|
|
run the orders of the client, and send back its result.
|
|
|
|
Adding a new request handler to this program is simply done by subclassing
|
|
this class. For example, by creating the WebtunnelSpam class, the program
|
|
will be able to handle incoming mails with 'webtunnel-spam' subjects.
|
|
|
|
Defining the behaviour of a new request handler is done by redefining some
|
|
methods, namely authenticate, run and possibly cleanup, getRecipients,
|
|
getResponseSubject or __init__ if necessary.
|
|
"""
|
|
def __init__(self, aConfiguration, aClient):
|
|
"""The constructor.
|
|
|
|
aConfiguration must be a valid Configuration instance. aClient is
|
|
a Client instance.
|
|
"""
|
|
self.__configuration = aConfiguration
|
|
self.__client = aClient
|
|
|
|
self.__mail = MIMEMultipart(charset = 'ISO-8859-1')
|
|
|
|
|
|
def authenticate(self):
|
|
"""Authenticate the user given at the request build time.
|
|
|
|
Default behaviour is to accept every request.
|
|
"""
|
|
pass
|
|
|
|
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
Default behaviour is to do nothing.
|
|
"""
|
|
pass
|
|
|
|
|
|
def cleanup(self):
|
|
"""Remove temporary files or trash producted during the run.
|
|
|
|
Every request creating temporary files or other objects should
|
|
redefine this method to cleanup everything.
|
|
"""
|
|
pass
|
|
|
|
|
|
def sendMail(self):
|
|
"""Send the answer.
|
|
|
|
The recipients of the mail sent here are defined by the
|
|
getRecipients method (by default, the client that emitted the
|
|
request). Therefore, modifying the recipients of this
|
|
mail is done by redefining the getRecipients method. This method
|
|
should not be redefined.
|
|
"""
|
|
self.__mail['Subject'] = self.getResponseSubject()
|
|
self.__mail['From'] = str(self.__configuration.getTunnelAddress())
|
|
self.__mail['To'] = ', '.join(
|
|
[str(lClient) for lClient in self.getRecipients()])
|
|
self.attach('--\nBrought to you by WebTunnel.py v.' + VERSION,
|
|
PART_TEXT, 'Webtunnel signature', 'Signature')
|
|
|
|
lConnection = smtplib.SMTP(self.__configuration.getSMTPServer())
|
|
lConnection.sendmail(self.__configuration.getTunnelAddress().getAddress(),
|
|
[lRecipient.getAddress() for lRecipient in self.getRecipients()],
|
|
self.__mail.as_string())
|
|
lConnection.quit()
|
|
|
|
|
|
def getConf(self):
|
|
"""Return the Configuration instance given at this request build time.
|
|
"""
|
|
return self.__configuration
|
|
|
|
|
|
def getClient(self):
|
|
"""Return the client of this request.
|
|
"""
|
|
return self.__client
|
|
|
|
|
|
def getRecipients(self):
|
|
"""Return a list containing all the recipients of the result mail.
|
|
"""
|
|
return [self.__client]
|
|
|
|
|
|
def getResponseSubject(self):
|
|
"""Return the subject of the result mail.
|
|
"""
|
|
return 'Webtunnel-Answer'
|
|
|
|
|
|
def attach(self, aAttachment, aType, aDescription = '', aFilename = ''):
|
|
"""Attach a new part to the result mail.
|
|
|
|
Subclasses of WebtunnelRequest must use this method to build
|
|
the result mail. Currently, request handlers can add this way
|
|
text parts and binary attachments.
|
|
|
|
If aType is PART_TEXT, aAttachment must be a string, and will
|
|
be attached as a textual part of the result mail. If aType is
|
|
PART_ZIP, aAttachment should be the name of the file to attach.
|
|
|
|
The aDescription parameter is used in both cases if present to
|
|
add the Content-Description header. The aFilename attribute is
|
|
used in the case of a PART_ZIP to add a Content-Disposition
|
|
header.
|
|
"""
|
|
lMessage = None
|
|
if aType == PART_TEXT:
|
|
lMessage = MIMEText(aAttachment, _charset = 'ISO-8859-1')
|
|
|
|
elif aType == PART_ZIP:
|
|
lMessage = MIMEBase('application', PART_ZIP)
|
|
|
|
lFile = open(aAttachment, 'rb')
|
|
lMessage.set_payload(lFile.read())
|
|
lFile.close()
|
|
email.Encoders.encode_base64(lMessage)
|
|
if aFilename:
|
|
lMessage.add_header(
|
|
'Content-Disposition', 'attachment', filename = aFilename)
|
|
|
|
if aDescription:
|
|
lMessage.add_header('Content-Description', aDescription)
|
|
self.__mail.attach(lMessage)
|
|
|
|
|
|
|
|
class WebtunnelAnnounce(WebtunnelRequest):
|
|
"""A handler to forward messages to all the users.
|
|
|
|
This class handles 'webtunnel-announce' requests. These requests can
|
|
only be sent by administrators. The content of these administrator mails
|
|
is forwarded to every user and administrator. The WebtunnelAnnounce
|
|
request is the recommanded way for administrators to make announces
|
|
(ie. about the webtunnel service).
|
|
"""
|
|
def authenticate(self):
|
|
"""Authenticate the given user.
|
|
|
|
Raise UnauthorizedAccessException if the client is
|
|
not an administrator.
|
|
"""
|
|
if not self.getClient() in self.getConf().getAdministrators():
|
|
raise UnauthorizedAccessException(
|
|
'Unauthorized access for ' + str(self.getClient()))
|
|
|
|
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
This request handler simply duplicates the incoming mail body to
|
|
the result mail.
|
|
"""
|
|
self.attach(aContent, PART_TEXT, 'Webtunnem announce', 'announce')
|
|
logging.getLogger('webtunnel').info(
|
|
'Announce made for ' + str(self.getClient()))
|
|
|
|
|
|
def getRecipients(self):
|
|
"""Return a list containing all the recipients of the result mail.
|
|
|
|
Here, the recipients are all the users and administrators
|
|
of the software.
|
|
"""
|
|
return (self.getConf().getUsers() + self.getConf().getAdministrators())
|
|
|
|
|
|
def getResponseSubject(self):
|
|
"""Return the subject of the result mail.
|
|
"""
|
|
return 'Webtunnel-Message'
|
|
|
|
|
|
|
|
class WebtunnelContact(WebtunnelRequest):
|
|
"""A handler to contact the administrators.
|
|
|
|
This class handles 'webtunnel-contact' requests, which allows
|
|
anybody to send a mail to all the administrators at once. This is the
|
|
recommanded way for someone to contact the administrators (ie. for
|
|
feature or bug requests).
|
|
"""
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
This request handler simply duplicates the incoming mail body to
|
|
the result mail.
|
|
"""
|
|
# TODO: better copy in case of multimime. Simply use set_payload ?
|
|
self.attach(aContent, PART_TEXT, 'Webtunnel request', 'request')
|
|
logging.getLogger('webtunnel').info(
|
|
'Message registered for ' + str(self.getClient()))
|
|
|
|
|
|
def getRecipients(self):
|
|
"""Return a list containing all the recipients of the result mail.
|
|
|
|
Here, the recipients are all the administrators.
|
|
"""
|
|
return self.getConf().getAdministrators()
|
|
|
|
|
|
def getResponseSubject(self):
|
|
"""Return the subject of the result mail.
|
|
"""
|
|
return 'Webtunnel-Contact'
|
|
|
|
|
|
|
|
class WebtunnelCode(WebtunnelRequest):
|
|
"""A handler to retrieve this program code.
|
|
|
|
The request 'webtunnel-code' is available to everyone. You can use or
|
|
modify the returned code, and even redistribute it, as long as you
|
|
follow the GNU Public Licence.
|
|
"""
|
|
def __init__(self, aConfiguration, aClient):
|
|
"""The constructor.
|
|
|
|
aConfiguration must be a valid Configuration instance. aClient is
|
|
a Client instance.
|
|
"""
|
|
super(WebtunnelCode, self).__init__(aConfiguration, aClient)
|
|
lDirName = 'webtunnel-' + str(os.getpid()) + str(int(time.time()))
|
|
self.__cache = os.path.join('/tmp', lDirName + '.d')
|
|
self.__archive = 'webtunnel.zip'
|
|
os.mkdir(self.__cache)
|
|
os.chdir(self.__cache)
|
|
|
|
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
This request handler builds a zip file containing the program
|
|
and attaches it to the result mail.
|
|
"""
|
|
shutil.copyfile(os.path.join(HOME, os.path.join('bin', PROGRAM_FILE)),
|
|
os.path.join(self.__cache, PROGRAM_FILE))
|
|
lZipFile = zipfile.ZipFile(self.__archive, 'w')
|
|
lZipFile.write(PROGRAM_FILE, PROGRAM_FILE, zipfile.ZIP_DEFLATED)
|
|
lZipFile.close()
|
|
self.attach(
|
|
self.__archive, PART_ZIP, 'The Webtunnel code', self.__archive)
|
|
logging.getLogger('webtunnel').info(
|
|
'Code copied for ' + str(self.getClient()))
|
|
|
|
|
|
def cleanup(self):
|
|
"""Remove the temporary zip file and its directory.
|
|
"""
|
|
os.remove(PROGRAM_FILE)
|
|
os.remove(self.__archive)
|
|
os.rmdir(self.__cache)
|
|
|
|
|
|
|
|
class WebtunnelHelp(WebtunnelRequest):
|
|
"""Answer the help requests.
|
|
|
|
The request 'webtunnel-help' is available to everyone.
|
|
"""
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
This request handler simply copies the module docstring to
|
|
the result mail.
|
|
"""
|
|
self.attach(__doc__, PART_TEXT, 'Webtunnel documentation', 'doc')
|
|
|
|
|
|
def getResponseSubject(self):
|
|
"""Return the subject of the result mail.
|
|
"""
|
|
return 'Webtunnel-Help'
|
|
|
|
|
|
|
|
class WebtunnelGet(WebtunnelRequest):
|
|
"""A handler to retrieve web pages.
|
|
|
|
This handler is the most interesting for the users since it allows
|
|
them (and the administrators as well) to send orders to retrieve
|
|
files via http (ie. web pages).
|
|
|
|
The body of incoming mails having 'webtunnel-get' as subject is a
|
|
set of orders. This handler parses this body to answer these orders.
|
|
"""
|
|
def __init__(self, aConfiguration, aClient):
|
|
"""The constructor.
|
|
|
|
aConfiguration must be a valid Configuration instance. aClient is
|
|
a Client instance.
|
|
"""
|
|
super(WebtunnelGet, self).__init__(aConfiguration, aClient)
|
|
self.__name = 'webtunnel-' + str(os.getpid()) + str(int(time.time()))
|
|
self.__cache = os.path.join('/tmp', self.__name + '.d')
|
|
self.__traces = ''
|
|
os.mkdir(self.__cache)
|
|
os.chdir(self.__cache)
|
|
|
|
|
|
def authenticate(self):
|
|
"""Authenticate the given user.
|
|
|
|
Raise UnauthorizedAccessException if the client is neither a
|
|
user nor an administrator.
|
|
"""
|
|
|
|
if not self.getClient() in self.getConf().getAdministrators():
|
|
if not self.getClient() in self.getConf().getUsers():
|
|
raise UnauthorizedAccessException(
|
|
'Unauthorized access for: ' + str(self.getClient()))
|
|
|
|
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
This request handler parses the content of incoming mail to
|
|
read the orders it contains. An order is a command followed by
|
|
one or more spaces and some arguments. Available commands are
|
|
GET, SEARCH and LSEARCH. A line too long can be splitted by the
|
|
antislash character followed by a carriage return. Every other
|
|
lines are ignored.
|
|
|
|
The files are fetched with wget and are stored in a temporary
|
|
directory. When all the requested files are fetched, a Zip
|
|
archive is built and attached to the result mail.
|
|
"""
|
|
lOrder = ''
|
|
for lLine in aContent.split('\n'):
|
|
try:
|
|
if lLine.endswith('\\'):
|
|
lOrder += lLine[:-1]
|
|
continue
|
|
|
|
lOrder += lLine.strip()
|
|
lSplit = lOrder.index(' ')
|
|
self.runOrder(lOrder[:lSplit].upper(), lOrder[lSplit:].strip())
|
|
lOrder = ''
|
|
|
|
except ValueError:
|
|
lOrder = ''
|
|
continue
|
|
|
|
#self.attach(self.__traces)
|
|
lZipName = self.pack()
|
|
self.attach(lZipName, PART_ZIP, 'Answer from Webtunnel.py', lZipName)
|
|
|
|
|
|
def runOrder(self, aCommand, aArgument):
|
|
"""Execute exactly one order.
|
|
|
|
aCommand and aArgument are two strings representing respectivly
|
|
the command and the arguments of the order.
|
|
"""
|
|
if aCommand == 'GET':
|
|
self.doDownload(aArgument)
|
|
elif aCommand == 'SEARCH':
|
|
self.doSearch(aArgument.split())
|
|
elif aCommand == 'LSEARCH':
|
|
lList = aArgument.split()
|
|
lCount = aArgument.split()[0]
|
|
lList.remove(lCount)
|
|
self.doSearch(lList, lCount)
|
|
|
|
|
|
def cleanup(self):
|
|
"""Remove the temporary directory cache
|
|
"""
|
|
shutil.rmtree(self.__cache)
|
|
|
|
|
|
def doSearch(self, aKeywordList, aCount = '20'):
|
|
"""Start a new search using Google.
|
|
|
|
aCount is the number of answers to retrieve.
|
|
Download is achieved using wget.
|
|
"""
|
|
lKeywords = '+'.join(aKeywordList)
|
|
lURL = 'www.google.fr/search?q=' + lKeywords + '&num=' + aCount
|
|
self.__startWget(lURL)
|
|
|
|
|
|
def doDownload(self, aURL):
|
|
"""Start a new download.
|
|
|
|
Download is achieved using wget.
|
|
"""
|
|
self.__startWget(aURL)
|
|
|
|
|
|
def pack(self):
|
|
"""Zip the current directory cache.
|
|
"""
|
|
lZipName = self.__name + '.tunneled' #'.zip'
|
|
lZipFile = zipfile.ZipFile(lZipName, 'w')
|
|
for lRoot, lDirs, lFiles in os.walk(self.__cache):
|
|
for lFile in lFiles:
|
|
if lFile == lZipName:
|
|
continue
|
|
lPath = os.path.join(lRoot.replace(self.__cache + '/', ''),
|
|
lFile)
|
|
lZipFile.write(lPath, lPath, zipfile.ZIP_DEFLATED)
|
|
lZipFile.close()
|
|
return lZipName
|
|
|
|
|
|
def __startWget(self, aURL):
|
|
"""Launch a new wget command.
|
|
|
|
aURL is the URL to retrieve. Wget traces are stored using a pipe.
|
|
"""
|
|
lCookieDir = os.path.join(
|
|
HOME, 'cookies', self.getClient().getAddress())
|
|
lCommand = WGET_COMMAND
|
|
lCommand += WGET_COOKIES % (lCookieDir, lCookieDir)
|
|
lCommand += WGET_OPTIONS % (WGET_USER_AGENT, aURL)
|
|
|
|
logging.getLogger('webtunnel').debug('launching ' + lCommand)
|
|
logging.getLogger('webtunnel').info('retrieving ' + aURL)
|
|
for lLine in os.popen(lCommand).readlines():
|
|
logging.getLogger('webtunnel').debug('WGET: ' + lLine)
|
|
self.__traces += lLine
|
|
|
|
|
|
|
|
class WebtunnelError(WebtunnelRequest):
|
|
"""A handler for returning errors to the client.
|
|
|
|
This handler is instanciated when no other handler corresponds to
|
|
a request, or when an error has occured when running a valid request.
|
|
It sends back to the client an error message with the erroneous request.
|
|
"""
|
|
def __init__(self, aConfiguration, aClient):
|
|
"""The constructor.
|
|
|
|
aConfiguration must be a valid Configuration instance. aClient is
|
|
a Client instance.
|
|
"""
|
|
super(WebtunnelError, self).__init__(aConfiguration, aClient)
|
|
self.__request = ''
|
|
self.__exception = None
|
|
|
|
def setRequest(self, aRequest):
|
|
"""Set the request name asked by the client..
|
|
"""
|
|
self.__request = aRequest
|
|
|
|
|
|
def setException(self, aException):
|
|
"""Set the exception that occured.
|
|
"""
|
|
self.__exception = aException
|
|
|
|
def run(self, aContent):
|
|
"""Execute the request.
|
|
|
|
This request handler simply duplicate the incoming request to
|
|
the result mail with an error message.
|
|
"""
|
|
lMessage = str(self.__exception)
|
|
lMessage += '\n\nTry sending webtunnel-help in the subject line.'
|
|
lMessage += '\nMessage was:\n'
|
|
lMessage += aContent
|
|
self.attach(lMessage, PART_TEXT, 'Webtunnel error', 'error')
|
|
|
|
|
|
def getResponseSubject(self):
|
|
"""Return the subject of the result mail.
|
|
"""
|
|
return 'Webtunnel-Error'
|
|
|
|
|
|
|
|
class Processor(object):
|
|
"""The requests manager.
|
|
|
|
The Processor class manages exactly one request. It first instanciates
|
|
the appropriate handler, and then make this handler execute each step
|
|
to resolve the request.
|
|
"""
|
|
def __init__(self, aConfiguration, aInput):
|
|
"""The constructor.
|
|
|
|
aConfiguration must be a valid Configuration instance. aInput is the
|
|
stream from which the request will be read.
|
|
"""
|
|
self.__configuration = aConfiguration
|
|
self.__inputStream = aInput
|
|
|
|
|
|
def run(self):
|
|
"""Parse and resolve the request.
|
|
|
|
The client address, request name and content are first retrieved
|
|
from the incoming mail. A handler is instanciated and runned step
|
|
by step. These steps are namely authenticate, run, sendMail
|
|
and cleanup.
|
|
|
|
On any exception, a WebtunnelError request handler is built and
|
|
is run to achieve the processus and let the client know what
|
|
has happened.
|
|
"""
|
|
try:
|
|
lSender, lRequest, lContent = self.parseInput()
|
|
lHandler = self.getHandlerFor(lRequest, lSender)
|
|
lHandler.authenticate()
|
|
lHandler.run(lContent)
|
|
lHandler.sendMail()
|
|
lHandler.cleanup()
|
|
|
|
logging.getLogger('webtunnel').info(
|
|
'Processing successfully achieved for ' + str(lSender))
|
|
|
|
except Exception, e:
|
|
logging.getLogger('webtunnel').error(e)
|
|
lHandler = WebtunnelError(self.__configuration, lSender)
|
|
lHandler.setRequest(lRequest)
|
|
lHandler.setException(e)
|
|
lHandler.run(lContent)
|
|
lHandler.sendMail()
|
|
|
|
|
|
def getHandlerFor(self, aRequest, aSender):
|
|
"""Build the correct handler for the given request.
|
|
|
|
This method instantiates the correct class to do the real job.
|
|
The name of this handler is built from the name of the request.
|
|
Return an instance of WebtunnelRequest class. If this is
|
|
impossible, throw a BadRequestException instead.
|
|
"""
|
|
try:
|
|
lSplit = aRequest.index('-')
|
|
lClassName = aRequest[:lSplit].capitalize()
|
|
lClassName += aRequest[lSplit + 1:].capitalize()
|
|
return eval(lClassName)(self.__configuration, aSender)
|
|
except ValueError:
|
|
raise BadRequestException()
|
|
|
|
|
|
def parseInput(self):
|
|
"""Extract information from incoming mail.
|
|
|
|
First build a mail representation from input stream. Then
|
|
extract from this representation the sender address, the
|
|
subject line - which is also the name of the resquest, and
|
|
the body content. These values are returned in a 3-tuple.
|
|
"""
|
|
lMail = email.message_from_file(self.__inputStream)
|
|
|
|
# Quick hack
|
|
if lMail['Subject'] == 'Re: Webtunnel-Answer':
|
|
lMail['Subject'] = 'webtunnel-get'
|
|
|
|
lSender = Client.FromRFC822Header(lMail['From'])
|
|
lSubject = lMail['Subject']
|
|
|
|
# TODO: check mail is not multipart.
|
|
lContent = lMail.get_payload()
|
|
|
|
logging.getLogger('webtunnel').info(
|
|
str(lSender) + ' asked for ' + lSubject)
|
|
|
|
return lSender, lSubject, lContent
|
|
|
|
#
|
|
# Config
|
|
#
|
|
class ConfigurationParser(XmlBootstrapParser):
|
|
"""The configuration parser.
|
|
|
|
This class specializes the XmlBootstrapParser to correctly parse
|
|
configuration files of webtunnel. It uses a Configuration
|
|
instance.
|
|
|
|
aConfiguration must be a valid Configuration instance.
|
|
"""
|
|
def __init__(self, aConfiguration):
|
|
super(ConfigurationParser, self).__init__(aConfiguration)
|
|
|
|
|
|
|
|
class Configuration(RootSAXConsumer):
|
|
"""The program configuration class.
|
|
|
|
This class implements the RootSAXConsumer interface to parse
|
|
the configuration file of the program. This file is stored in
|
|
an XML file with the following definition:
|
|
|
|
<!DOCTYPE webtunnel [
|
|
|
|
|
|
<!ELEMENT webtunnel (SMTPServer, clients)>
|
|
<!ATTLIST webtunnel name CDATA #REQUIRED
|
|
address CDATA #REQUIRED>
|
|
|
|
<!ELEMENT SMTPServer EMPTY>
|
|
<!ATTLIST SMTPServer url CDATA #REQUIRED>
|
|
|
|
<!ELEMENT clients (administrator*, user*)>
|
|
|
|
<!ELEMENT administrator EMPTY>
|
|
<!ATTLIST administrator name CDATA #REQUIRED
|
|
address CDATA #REQUIRED>
|
|
|
|
<!ELEMENT user EMPTY>
|
|
<!ATTLIST user name CDATA #REQUIRED
|
|
address CDATA #REQUIRED>
|
|
|
|
]>
|
|
"""
|
|
|
|
def __init__(self, aFilename):
|
|
"""The constructor.
|
|
"""
|
|
super(Configuration, self).__init__()
|
|
self.__filename = aFilename
|
|
self.__tunnelAddress = ''
|
|
self.__smtpServer = ''
|
|
self.__administrators = []
|
|
self.__users = []
|
|
|
|
|
|
def getFilename(self):
|
|
"""Return the name of the configuration file.
|
|
"""
|
|
return self.__filename
|
|
|
|
|
|
def getTunnelAddress(self):
|
|
"""Return the address of the tunnel as a Client instance.
|
|
"""
|
|
return self.__tunnelAddress
|
|
|
|
|
|
def getSMTPServer(self):
|
|
"""Return the URL of the SMTP server used by the tunnel.
|
|
"""
|
|
return self.__smtpServer
|
|
|
|
|
|
def parse(self):
|
|
"""Start parsing the configuration file using ConfigurationParser.
|
|
"""
|
|
lParser = ConfigurationParser(self)
|
|
lParser.parse(self.__filename)
|
|
return self
|
|
|
|
|
|
def getAdministrators(self):
|
|
"""Return the administrators as a Client instances list.
|
|
"""
|
|
return self.__administrators
|
|
|
|
|
|
def getUsers(self):
|
|
"""Return the users as a Client instances list.
|
|
"""
|
|
return self.__users
|
|
|
|
|
|
def consumeElement(self, aName, aQName, aAttributes):
|
|
if aName == u'smtpServer':
|
|
self.__smtpServer = aAttributes.getValueByQName(u'url')
|
|
|
|
elif aName == u'administrator':
|
|
lName = aAttributes.getValueByQName(u'name')
|
|
lAddress = aAttributes.getValueByQName(u'address')
|
|
self.__administrators.append(Client(lName, lAddress))
|
|
|
|
elif aName == u'user':
|
|
lName = aAttributes.getValueByQName(u'name')
|
|
lAddress = aAttributes.getValueByQName(u'address')
|
|
self.__users.append(Client(lName, lAddress))
|
|
|
|
return self
|
|
|
|
|
|
def consumeCharacters(self, aContent):
|
|
raise ConfigurationException(
|
|
'Element webtunnel does not consume characters')
|
|
|
|
|
|
def buildFromElement(self, aName, aQName, aAttributes):
|
|
lName = aAttributes.getValueByQName(u'name')
|
|
lAddress = aAttributes.getValueByQName(u'address')
|
|
self.__tunnelAddress = Client(lName, lAddress)
|
|
|
|
|
|
|
|
#
|
|
# Main
|
|
#
|
|
if __name__ == '__main__':
|
|
log.createWithFilename('webtunnel', logging.DEBUG, os.path.join(HOME, TRACE_FILE))
|
|
#log.createWithFilename('ground.xml.parsers', logging.ERROR, os.path.join(HOME, TRACE_FILE))
|
|
Processor(Configuration(CONFIGURATION_FILE).parse(), sys.stdin).run()
|