Index: python/cms_cmssw.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/cms_cmssw.py,v
retrieving revision 1.223
diff -u -r1.223 cms_cmssw.py
--- python/cms_cmssw.py	19 Jun 2008 14:29:18 -0000	1.223
+++ python/cms_cmssw.py	23 Jun 2008 01:34:54 -0000
@@ -2,7 +2,7 @@
 from crab_logger import Logger
 from crab_exceptions import *
 from crab_util import *
-from BlackWhiteListParser import BlackWhiteListParser
+from BlackWhiteListParser import SEBlackWhiteListParser
 import common
 import Scram
 from LFNBaseName import *
@@ -20,7 +20,7 @@
         self._params = {}
         self.cfg_params = cfg_params
         # init BlackWhiteListParser
-        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
+        self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
 
         self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
 
Index: python/Status.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/Status.py,v
retrieving revision 1.38
diff -u -r1.38 Status.py
--- python/Status.py	12 Jun 2008 18:53:12 -0000	1.38
+++ python/Status.py	23 Jun 2008 01:34:53 -0000
@@ -3,6 +3,8 @@
 import string, os, time
 from crab_util import *
 
+import CmsSiteMapper
+
 class Status(Actor):
     def __init__(self, *args):
         self.cfg_params = args[0]
@@ -39,6 +41,9 @@
         toPrint=[]
         taskId= str("_".join(str(up_task['name']).split('_')[:-1]))
         self.wrapErrorList = []
+
+        ce_cms = CmsSiteMapper.CECmsMap()
+
         for job in up_task.jobs :
             id = str(job.runningJob['jobId'])
             jobStatus =  str(job.runningJob['statusScheduler'])
@@ -49,6 +54,10 @@
             printline=''
             header = ''
             if dest == 'None' :  dest = ''
+            try:
+                dest = ce_cms[dest]
+            except:
+                pass
             if exe_exit_code == 'None' :  exe_exit_code = ''
             if job_exit_code == 'None' :  job_exit_code = ''
             printline+="%-8s %-18s %-40s %-13s %-15s" % (id,jobStatus,dest,exe_exit_code,job_exit_code)
Index: python/Submitter.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/Submitter.py,v
retrieving revision 1.124
diff -u -r1.124 Submitter.py
--- python/Submitter.py	19 Jun 2008 15:12:01 -0000	1.124
+++ python/Submitter.py	23 Jun 2008 01:34:53 -0000
@@ -47,8 +47,8 @@
         if chosenJobsList != None:
             tmp_jList = chosenJobsList
         # build job list
-        from BlackWhiteListParser import BlackWhiteListParser
-        self.blackWhiteListParser = BlackWhiteListParser(self.cfg_params)
+        from BlackWhiteListParser import SEBlackWhiteListParser
+        self.blackWhiteListParser = SEBlackWhiteListParser(self.cfg_params)
         dlsDest=common._db.queryJob('dlsDestination',tmp_jList)
         jStatus=common._db.queryRunJob('status',tmp_jList)
         for nj in range(len(tmp_jList)):
Index: python/SchedulerCondorCommon.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/SchedulerCondorCommon.py,v
retrieving revision 1.20
diff -u -r1.20 SchedulerCondorCommon.py
--- python/SchedulerCondorCommon.py	6 Jun 2008 20:32:01 -0000	1.20
+++ python/SchedulerCondorCommon.py	23 Jun 2008 01:34:53 -0000
@@ -9,7 +9,6 @@
 import common
 import popen2
 import os
-from BlackWhiteListParser import BlackWhiteListParser
 from BlackWhiteListParser import CEBlackWhiteListParser
 import Scram
 import CondorGLoggingInfo
Index: python/BlackWhiteListParser.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/BlackWhiteListParser.py,v
retrieving revision 1.5
diff -u -r1.5 BlackWhiteListParser.py
--- python/BlackWhiteListParser.py	24 Apr 2008 14:58:12 -0000	1.5
+++ python/BlackWhiteListParser.py	23 Jun 2008 01:34:53 -0000
@@ -1,9 +1,15 @@
+
+import os
+import sys
+import sets
+import time
+
 from crab_logger import Logger
 from crab_exceptions import *
 from crab_util import *
 import common
 
-import os, sys, time
+import CmsSiteMapper
 
 # A more general design would have SEBlackWhiteListParser and CEBlackWhiteListParser
 # inheriting from BlackWhiteListParser, but BlackWhiteListParser is used for SEs in many
@@ -11,118 +17,140 @@
 #                   -- ewv
 
 class BlackWhiteListParser:
-    def __init__(self,cfg_params):
+
+    """
+    A class which applies blacklist and whitelist; designed to allow the user
+    to filter out sites.  This uses the CmsSiteMapper module to allow users to
+    specify only the CMS or SiteDB `Node` name (or some regular expression
+    or wildcard), but internally filter only on the CE/SE name.
+    """
+
+    kind = 'se'
+
+    def __init__(self, cfg_params):
+        self.kind = 'se'
+        self.mapper = CmsSiteMapper.CmsSEMap()
         self.configure(cfg_params)
-        return
 
     def configure(self, cfg_params):
-
-        SEBlackList = []
-        if cfg_params.has_key('EDG.se_black_list'):
-            tmpBad = string.split(cfg_params['EDG.se_black_list'],',')
-            for tmp in tmpBad:
-                tmp=string.strip(tmp)
-                SEBlackList.append(tmp)
-            pass
-        common.logger.debug(5,'SEBlackList: '+str(SEBlackList))
-        self.reSEBlackList=[]
-        for bad in SEBlackList:
-            self.reSEBlackList.append(re.compile( string.lower(bad) ))
-
-        SEWhiteList = []
-        if cfg_params.has_key('EDG.se_white_list'):
-            tmpGood = string.split(cfg_params['EDG.se_white_list'],',')
-            for tmp in tmpGood:
-                tmp=string.strip(tmp)
-                SEWhiteList.append(tmp)
-            pass
-        common.logger.debug(5,'SEWhiteList: '+str(SEWhiteList))
-        self.reSEWhiteList=[]
-        for good in SEWhiteList:
-            self.reSEWhiteList.append(re.compile( string.lower(good) ))
-
-    def checkBlackList(self, Sites, fileblocks):
-        """
-        select sites that are not excluded by the user (via SE black list)
-        """
-        goodSites = []
-        for aSite in Sites:
-            common.logger.debug(10,'Site '+aSite)
-            good=1
-            for re in self.reSEBlackList:
-                if re.search(string.lower(aSite)):
-                    common.logger.debug(5,'SE in black list, skipping site '+aSite)
-                    good=0
-                pass
-            if good: goodSites.append(aSite)
-        if len(goodSites) == 0:
-            msg = "No sites hosting the block %s after BlackList" % fileblocks
+        """
+        Load up the black and white list from the configuation parameters
+           * EDG.%s_black_list
+           * EDG.%s_white_list
+        """ % (self.kind, self.kind)
+        self.blacklist = []
+        if cfg_params.has_key('EDG.%s_black_list' % self.kind):
+            user_input = cfg_params['EDG.%s_black_list' % self.kind]
+            self.blacklist = self.mapper.match(user_input)
+        common.logger.debug(5,'Converted %s blacklist: %s' % \
+            (self.kind, ', '.join(self.blacklist)))
+
+        self.whitelist = []
+        if cfg_params.has_key('EDG.%s_white_list' % self.kind):
+            user_input = cfg_params['EDG.%s_white_list' % self.kind]
+            self.whitelist = self.mapper.match(user_input)
+        common.logger.debug(5, 'Converted %s whitelist: %s' % \
+            (self.kind, ', '.join(self.whitelist)))
+
+        self.blacklist = sets.Set(self.blacklist)
+        self.whitelist = sets.Set(self.whitelist)
+
+    def checkBlackList(self, Sites, fileblocks=''):
+        """
+        Select sites that are not excluded by the user (via blacklist)
+        
+        The sites returned are the input sites minus the contents of the
+        self.blacklist
+
+        @param Sites: The sites which will be filtered
+        @keyword fileblocks: The block this is used for; only used in a pretty
+           debug message.
+        @returns: The input sites minus the blacklist.
+        """
+        Sites = sets.Set(Sites)
+        blacklist = self.blacklist
+        blacklist = sets.Set(CmsSiteMapper.match_list(Sites, self.blacklist))
+        goodSites = Sites.difference(blacklist)
+        goodSites = list(goodSites)
+        if not goodSites and fileblocks:
+            msg = "No sites hosting the block %s after blackList" % fileblocks
             common.logger.debug(5,msg)
             common.logger.debug(5,"Proceeding without this block.\n")
-        else:
-            common.logger.debug(5,"Selected sites for block "+str(fileblocks)+" via BlackList are "+str(goodSites)+"\n")
+        elif fileblocks:
+            common.logger.debug(5,"Selected sites for block %s via blacklist " \
+                "are %s.\n" % (', '.join(fileblocks), ', '.join(goodSites)))
         return goodSites
 
-    def checkWhiteList(self, Sites, fileblocks):
-        """
-        select sites that are defined by the user (via SE white list)
+    def checkWhiteList(self, Sites, fileblocks=''):
         """
-        if len(self.reSEWhiteList)==0: return Sites
-        goodSites = []
-        for aSite in Sites:
-            good=0
-            for re in self.reSEWhiteList:
-                if re.search(string.lower(aSite)):
-                    common.logger.debug(5,'SE in white list, adding site '+aSite)
-                    good=1
-                pass
-            if good: goodSites.append(aSite)
+        Select sites that are defined by the user (via white list).
 
-        if len(goodSites) == 0:
-            msg = "No sites hosting the block %s after WhiteList" % fileblocks
+        The sites returned are the intersection of the input sites and the
+        contents of self.whitelist
+
+        @param Sites: The sites which will be filtered
+        @keyword fileblocks: The block this is applied for; only used for a 
+           pretty debug message
+        @returns: The intersection of the input Sites and self.whitelist.
+        """
+        if not self.whitelist:
+            return Sites
+        whitelist = self.whitelist
+        whitelist = CmsSiteMapper.match_list(Sites, self.whitelist)
+        Sites = sets.Set(Sites)
+        goodSites = Sites.intersection(whitelist)
+        goodSites = list(goodSites)
+        if not goodSites and fileblocks:
+            msg = "No sites hosting the block %s after whiteList" % fileblocks
             common.logger.debug(5,msg)
             common.logger.debug(5,"Proceeding without this block.\n")
-        else:
-            common.logger.debug(5,"Selected sites for block "+str(fileblocks)+" via WhiteList are "+str(goodSites)+"\n")
+        elif fileblocks:
+            common.logger.debug(5,"Selected sites for block %s via whitelist "\
+                " are %s.\n" % (', '.join(fileblocks), ', '.join(goodSites)))
 
         return goodSites
 
-    def cleanForBlackWhiteList(self,destinations,list=''):
+    def cleanForBlackWhiteList(self,destinations,list=False):
         """
-        clean for black/white lists using parser
+        Clean for black/white lists using parser.
+
+        Take the input list and apply the blacklist, then the blacklist that
+        the user specified.
+
+        @param destinations: A list of all the input sites
+        @keyword list: Set to True or the string 'list' to return a list
+           object.  Set to False or the string '' to return a string object.
+           The default is False.
+        @returns: The list of all input sites, first filtered by the blacklist,
+           then filtered by the whitelist.  If list=True, returns a list; if
+           list=False, return a string.
         """
-        if list=='list':
-            return self.checkWhiteList(self.checkBlackList(destinations,''),'')
+        if list:
+            return self.checkWhiteList(self.checkBlackList(destinations))
         else:
-            return ','.join(self.checkWhiteList(self.checkBlackList(destinations,''),''))
+            return ','.join(self.checkWhiteList(self.checkBlackList( \
+                destinations)))
+
+class SEBlackWhiteListParser(BlackWhiteListParser):
+    """
+    Use the BlackWhiteListParser to filter out the possible list of SEs
+    from the user's input; see the documentation for BlackWhiteListParser.
+    """
+
+    def __init__(self, cfg_params):
+        self.kind = 'se'
+        self.mapper = CmsSiteMapper.CmsSEMap()
+        self.configure(cfg_params)
 
 class CEBlackWhiteListParser(BlackWhiteListParser):
-    def __init__(self,cfg_params):
-        BlackWhiteListParser.__init__(self,cfg_params)
-        return
 
-    def configure(self, cfg_params):
+    """
+    Use the BlackWhiteListParser to filter out the possible list of SEs
+    from the user's input; see the documentation for BlackWhiteListParser.
+    """
+
+    def __init__(self,cfg_params):
+        self.kind = 'ce'
+        self.mapper = CmsSiteMapper.CmsCEMap()
+        self.configure(cfg_params)
 
-        SEBlackList = []
-        if cfg_params.has_key('EDG.ce_black_list'):
-            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
-            for tmp in tmpBad:
-                tmp=string.strip(tmp)
-                SEBlackList.append(tmp)
-            pass
-        common.logger.debug(5,'CEBlackList: '+str(SEBlackList))
-        self.reSEBlackList=[]
-        for bad in SEBlackList:
-            self.reSEBlackList.append(re.compile( string.lower(bad) ))
-
-        SEWhiteList = []
-        if cfg_params.has_key('EDG.ce_white_list'):
-            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
-            for tmp in tmpGood:
-                tmp=string.strip(tmp)
-                SEWhiteList.append(tmp)
-            pass
-        common.logger.debug(5,'CEWhiteList: '+str(SEWhiteList))
-        self.reSEWhiteList=[]
-        for good in SEWhiteList:
-            self.reSEWhiteList.append(re.compile( string.lower(good) ))
Index: python/Scheduler.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/Scheduler.py,v
retrieving revision 1.29
diff -u -r1.29 Scheduler.py
--- python/Scheduler.py	17 Jun 2008 12:32:24 -0000	1.29
+++ python/Scheduler.py	23 Jun 2008 01:34:53 -0000
@@ -1,8 +1,13 @@
-from crab_exceptions import *
-from Boss import Boss
+
+import os
+import time
+import string
+
 import common
-import string, time, os
+from Boss import Boss
+from crab_exceptions import *
 from crab_util import *
+from BlackWhiteListParser import SEBlackWhiteListParser
 
 #
 #  Naming convention:
@@ -37,8 +42,7 @@
 
     def configure(self, cfg_params):
         self._boss.configure(cfg_params)
-        from BlackWhiteListParser import BlackWhiteListParser
-        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
+        self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
         return
 
     def boss(self):
Index: python/SchedulerGrid.py
===================================================================
RCS file: /cvs_server/repositories/CMSSW/COMP/CRAB/python/SchedulerGrid.py,v
retrieving revision 1.60
diff -u -r1.60 SchedulerGrid.py
--- python/SchedulerGrid.py	16 Jun 2008 14:32:00 -0000	1.60
+++ python/SchedulerGrid.py	23 Jun 2008 01:34:53 -0000
@@ -2,7 +2,7 @@
 from crab_logger import Logger
 from crab_exceptions import *
 from crab_util import *
-from BlackWhiteListParser import BlackWhiteListParser
+from BlackWhiteListParser import SEBlackWhiteListParser
 import common
 from LFNBaseName import *
 from JobList import JobList
@@ -31,7 +31,7 @@
         Scheduler.configure(self,cfg_params)
 
         # init BlackWhiteListParser
-        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
+        self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
 
         self.proxyValid=0
         self.dontCheckProxy=int(cfg_params.get("EDG.dont_check_proxy",0))
Index: TestSuite/WhiteBlackListTest.py
===================================================================
RCS file: TestSuite/WhiteBlackListTest.py
diff -N TestSuite/WhiteBlackListTest.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ TestSuite/WhiteBlackListTest.py	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,92 @@
+
+import sets
+import unittest
+
+import common
+import CmsSiteMapper
+import BlackWhiteListParser
+
+cfg_params = {
+  'EDG.se_black_list': 'Hungary, CIEMAT, ccsrm.in2p3.fr, T1_*',
+  'EDG.se_white_list': 'T2_(US|UK), T2_KR_KNU',
+  'EDG.ce_black_list': 'lcg02.ciemat.es, Bristol, *.fnal.gov',
+  'EDG.ce_white_list': 'London, T3',
+}
+
+class FakeLogger():
+
+    def debug(*args):
+        pass
+
+    def msg(*args):
+        pass
+
+class TestBlackWhiteList(unittest.TestCase):
+
+    def setUp(self):
+        self.separser = BlackWhiteListParser.SEBlackWhiteListParser(cfg_params)
+        self.ceparser = BlackWhiteListParser.CEBlackWhiteListParser(cfg_params)
+
+    def test_se_black_list(self):
+        blacklist = ['grid143.kfki.hu', 'srm.ciemat.es',
+            'ccsrm.in2p3.fr', 'srm.grid.sinica.edu.tw']
+        other = ['t2-srm-02.lnl.infn.it', 'se-dcache.hepgrid.uerj.br']
+        results = self.separser.checkBlackList(other + blacklist)
+        results = sets.Set(results)
+        self.failUnless(results == sets.Set(other))
+
+    def test_se_white_list(self):
+        whitelist = ['srm.ihepa.ufl.edu', 'heplnx204.pp.rl.ac.uk',
+            'cluster142.knu.ac.kr']
+        other = ['f-dpm001.grid.sinica.edu.tw', 'cmsrm-se01.roma1.infn.it']
+        results = self.separser.checkWhiteList(other + whitelist)
+        results = sets.Set(results)
+        self.failUnless(results == sets.Set(whitelist))
+
+    def test_ce_black_list(self):
+        blacklist = ['lcg02.ciemat.es', 'lcgce01.phy.bris.ac.uk',
+            'lcgce02.phy.bris.ac.uk', 'cmsosgce4.fnal.gov']
+        other = ['osgce.hepgrid.uerj.br', 'egeece01.ifca.es',
+            'grid006.lca.uc.pt']
+        results = self.ceparser.checkBlackList(other + blacklist)
+        results = sets.Set(results)
+        self.failUnless(results == sets.Set(other))
+
+    def test_ce_white_list(self):
+        whitelist = ['ce2.ppgrid1.rhul.ac.uk', 'ce00.hep.ph.ic.ac.uk',
+            'ic-kit-lcgce.rz.uni-karlsruhe.de']
+        other = ['gridce2.pi.infn.it', 'lcg02.ciemat.es']
+        results = self.ceparser.checkWhiteList(other + whitelist)
+        results = sets.Set(results)
+        self.failUnless(results == sets.Set(whitelist))
+
+class TestCmsSiteMapper(unittest.TestCase):
+
+    def test_ce_cms(self):
+        ce_cms = CmsSiteMapper.CECmsMap()
+        self.failUnless(ce_cms['blah'] == 'blah')
+        self.failUnless(ce_cms['red.unl.edu'] == 'T2_US_Nebraska')
+
+    def test_se_cms(self):
+        se_cms = CmsSiteMapper.SECmsMap()
+        self.failUnless(se_cms['srm.unl.edu'] == 'T2_US_Nebraska')
+
+    def test_cms_ce(self):
+        cms_ce = CmsSiteMapper.CmsCEMap()
+        self.failUnless(cms_ce['T2_US_Nebraska'] == 'red.unl.edu')
+
+    def test_cms_se(self):
+        cms_se = CmsSiteMapper.CmsSEMap()
+        self.failUnless(cms_se['T2_US_Nebraska'] == 'srm.unl.edu')
+
+    def test_cms_ce_match(self):
+        cms_ce = CmsSiteMapper.CmsCEMap()
+        correct_results = sets.Set(['red.unl.edu',
+            'cit-gatekeeper.ultralight.org'])
+        results = cms_ce.match('T2_US_(Nebraska|Caltech)')
+        self.failUnless(sets.Set(results) == correct_results)
+
+if __name__ == '__main__':
+    common.logger = FakeLogger()
+    unittest.main()
+
Index: python/CmsSiteMapper.py
===================================================================
RCS file: python/CmsSiteMapper.py
diff -N python/CmsSiteMapper.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ python/CmsSiteMapper.py	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,467 @@
+
+"""
+The CmsSiteMapper module provides information from SiteDB and assists in the
+mappings to and from CMS site names and some resource name (both SEs and CEs).
+
+Elaborate caching mechanisms are employed to keep the load off SiteDB and 
+improve response time.
+"""
+
+import re
+import os
+import sets
+import time
+import types
+import signal
+import urllib
+import fnmatch
+import urllib2
+
+from xml.dom.minidom import parse
+
+import common
+
+alarm_timeout = 15
+class AlarmClock(Exception):
+    """
+    Exception indicating that the alarm clock went off
+    """
+
+def match_list(names, match_list):
+    """
+    Filter a list of names against a comma-separated list of expressions.
+
+    This uses the `match` function to do the heavy lifting
+
+    @param names: A list of input names to filter
+    @type names: list
+    @param match_list: A comma-separated list of expressions
+    @type match_list: str
+    @returns: A list, filtered from `names`, of all entries which match an
+       expression in match_list
+    @rtype: list
+    """
+    results = []
+    if isinstance(match_list, types.StringType):
+        match_list = match_list.split(',')
+    for expr in match_list:
+        expr = expr.strip()
+        matching = match(names, expr)
+        if matching:
+            results.extend(matching)
+        else:
+            results.append(expr)
+    return results
+
+def match(names, expr):
+    """
+    Return all the entries in `names` which match `expr`
+
+    First, try to apply wildcard-based filters, then look at substrings,
+    then interpret expr as a regex.
+
+    @param names: An input list of strings to match
+    @param expr: A string expression to use for matching
+    @returns: All entries in the list `names` which match `expr`
+    """
+    results = fnmatch.filter(names, expr)
+    results.extend([i for i in names if i.find(expr) >= 0])
+    try:
+        my_re = re.compile(expr)
+    except:
+        my_re = None
+    if not my_re:
+        return results
+    results.extend([i for i in names if my_re.search(i)])
+    return results
+
+class SiteDBInfo:
+    """
+    An module-internal object which provides information from SiteDB and
+    attempts to cache the data
+    """
+
+    # The expire is the age of the cache which will trigger a new query to
+    # SiteDB.  The force_expire is the age of the cache where the contents of
+    # the cache will be ignored, even if the SiteDB query fails.
+    expire = 1 # In days
+    force_expire = 5 # In days
+
+    # SiteDB URL
+    sitedb_url = 'https://cmsweb.cern.ch/sitedb/sitedb/reports/showXMLReport'
+
+    def __init__(self):
+        self.ce_results = []
+        self.se_results = []
+
+    def write_cache(self, name, contents):
+        """
+        Write the contents of a report into the cache
+        """
+        filename = '$HOME/.crab_%s.cache' % name
+        filename = os.path.expandvars(filename)
+        try:
+            fd = open(filename, 'w')
+            fd.write(contents)
+        except:
+            pass
+
+    def check_cache(self, name):
+        """
+        Check the cache for a report named `name`
+
+        If the file has not been modified in self.expire days, the `fresh`
+        variable will return True.  If the file has been modified within the
+        last self.force_expire days, the `fresh` variable will return False.
+        If the last modification was greater than self.force_expire days ago,
+        then return nothing.
+
+        @param name: The name of the report to look for
+        @returns: A
+        @rtype: (bool, file descriptor)
+        """
+        filename = '$HOME/.crab_%s.cache' % name
+        filename = os.path.expandvars(filename)
+        # Check the status of the cache; catch common harmless problems
+        try:
+           si =  os.stat(filename)
+        except OSError, e:
+            if e.errno == 2: # No such file or directory; cache miss
+                return False, None
+            elif e.errno == 13: # Permission denied
+                return False, None
+            else:
+                raise
+
+        # Look at the last-modified time; if the age is too old, return
+        try:
+            mtime = si.st_mtime
+        except AttributeError:
+            mtime = si[8]
+        if time.time()-mtime > self.expire*86400:
+            fresh = False
+        elif time.time()-mtime > self.force_expire*86400:
+            return False, None
+        else:
+            fresh = True
+
+        # Read contents of the cache, return the freshness
+        fd = open(filename, 'r')
+        return fresh, fd
+        
+    def load_siteDB(self, query):
+        """
+        Load the contents of `query` from SiteDB
+
+        @param query: Name of the SiteDB query.
+        @returns: File-descriptor like object.
+        """
+        params = {'reportid': query}
+        params = urllib.urlencode(params)
+        fd = urllib2.urlopen(self.sitedb_url, params)
+        return fd
+
+    def load_report(self, report):
+        """
+        Load the SiteDB report and return the DOM contents.
+
+        Great care is taken to make this resilient, including timeouts and
+        fallback to the contents of the cache.
+
+        @param report: The name of the SiteDB report
+        @returns: The DOM object representing the contents of the report
+        """
+
+        # Start off by setting the alarm clock to timeout faulty operations.
+        def interrupt_op(*args):
+            raise AlarmClock()
+        signal.signal(signal.SIGALRM, interrupt_op)
+        try:
+            # Check the contents of the local cache; only return here if they
+            # are fresh and parse cleanly.
+            try:
+                signal.alarm(alarm_timeout)
+                fresh, results = self.check_cache(report)
+                if results:
+                    try:
+                        results = parse(results)
+                        if fresh:
+                            return results
+                    except:
+                        results = None
+            except AlarmClock:
+                fresh = False
+                results = None
+            # Check SiteDB for the report.  If there's a problem, no urlresults
+            # are available.  SiteDB results are save to the cache.
+            try:
+                signal.alarm(alarm_timeout)
+                urlresults = self.load_siteDB(report)
+                try:
+                    urlresults = parse(urlresults)
+                    self.write_cache(report, urlresults.toprettyxml())
+                except:
+                    urlresults = None
+            except (AlarmClock, urllib2.URLError):
+                fresh, urlresults = False, None
+        finally:
+            # Restore alarm handlers
+            signal.alarm(0)
+            signal.signal(signal.SIGALRM, signal.SIG_DFL)
+        # Default to the SiteDB results if available; if there was a problem
+        # with SiteDB, the cached results will return.
+        if urlresults:
+            return urlresults
+        if results:
+            return results
+
+        # Neither SiteDB nor cached results; raise an Exception.
+        raise Exception("Unable to get CMS info from SiteDB.")
+
+    def parse_report(self, dom, kind='ce'):
+        """
+        Parse the contents of the SiteDB report.
+
+        The SiteDB row results are expected to have three columns - sitename,
+        the PhEDEx node, and a column given by `kind`; usually "ce" or "se".
+        See the SiteDB results to understand what this is parsing.
+
+        A list of tuples is returned; none of the entries in the tuples are
+        guaranteed to be unique.
+
+        @param dom: DOM object containing the contents of the SiteDB report
+        @keyword kind: The kind of report - "ce" or "se"
+        @returns: A list of 3-tuples: (sitename, phedex node, ce/se name).
+        """
+        result = dom.getElementsByTagName('result')
+        if not result:
+            return []
+        result = result[0]
+        results = []
+        items = result.getElementsByTagName('item')
+        for item in items:
+            try:
+                name = str(item.getElementsByTagName('name')[0].firstChild.data)
+            except:
+                name = None
+            try:
+                node = str(item.getElementsByTagName('node')[0].firstChild.data)
+            except: 
+                node = None
+            try:
+                ce = str(item.getElementsByTagName(kind)[0].firstChild.data)
+            except: 
+                ce = None
+            results.append((name.strip(), node.strip(), ce.strip()))
+        return results
+
+    def load_SE(self):
+        """
+        Load the contents of the se_node_map.ini report from SiteDB.
+
+        For the returned format, see the parse_report method documentation.
+        """
+        if not self.se_results:
+            dom = self.load_report('se_node_map.ini')
+            self.se_results = self.parse_report(dom, kind='se')
+        return self.se_results
+
+    def load_CE(self):
+        """
+        Load the contents of the ce_node_map.ini report from SiteDB.
+
+        For the returned format, see the parse_report method documentation.
+        """
+        if not self.ce_results:
+            dom = self.load_report('ce_node_map.ini')
+            self.ce_results = self.parse_report(dom, kind='ce')
+        return self.ce_results
+
+# Create a global instance of the SiteDBInfo object
+siteDB = SiteDBInfo()
+
+class CmsResourceMap(dict):
+    """
+    A dictionary-like object which maps from the CMS name to some resource;
+    this is meant to be sub-classed; one for CEs, one for SEs.
+    """
+
+    def __init__(self):
+        self._loaded = False
+        self._tuples = []
+        self._map = {}
+        self._cmsnames = []
+
+    def load_tuples(self):
+        """
+        Internal method to load the tuples; return a value like SiteDBInfo's
+        parse_report method.
+
+        This is an abtract method; overload it.
+        """
+        raise NotImplementedError()
+
+    def load(self):
+        """
+        Load the contents of a SiteDB report.
+        
+        This uses self.load_tuples to load a list of tuples from the SiteDBInfo
+        object, then does some preliminary parsing of the results.
+        
+        All methods which need the preliminary parsing already call this
+        method, so there's no need to call it directly.
+        """
+        if not self._loaded:
+            self._loaded = True
+            self._tuples = self.load_tuples()
+            for tuple in self._tuples:
+                name, node, resource = tuple
+                self._map[name] = self._map.get(name, [])
+                self._map[node] = self._map.get(node, [])
+                self._map[name].append(resource)
+                self._map[node].append(resource)
+            self._cmsnames = [i[0] for i in self._tuples]
+            self._cmsnames.extend([i[1] for i in self._tuples])
+
+    def __getitem__(self, cmsname):
+        if not self._loaded:
+            self.load()
+        return self._map.get(cmsname, [cmsname])[0]
+
+    def match(self, cmsname):
+        """
+        Given a string which contains a comma-separated list of expressions
+        which match some CMS site name, return a list of all the resources
+        that match at least one expression
+
+        If no CMS site matches one of the expressions, return the expression in
+        the list (this behavior is for backward compatibility, in case if the
+        expression is actually a CMS resource expression, not CMS.
+
+        Note: This method uses match_list to do the heavy liftin.
+
+        @param cmsname: A string containing a comma-separated list of
+            expressions.
+        """
+        if not self._loaded:
+            self.load()
+        matching_names = match_list(self._cmsnames, cmsname)
+        results = []
+        for i in matching_names:
+            if i in self._map:
+                results.extend(self._map[i])
+            else:
+                results.append(i)
+        return list(sets.Set(results))
+
+class CmsSEMap(CmsResourceMap):
+    """
+    A dictionary-like object which maps from the CMS name to the SE name.
+    After the object is created, run the `load` method to perform the lookups;
+    if this is not run, it will be done at the first lookup
+    """
+
+    def load_tuples(self):
+        """
+        Return the results from SiteDBInfo::load_SE.
+
+        Internal method; the bulk of the work is done by CmsResourceMap
+        """
+        return siteDB.load_SE()
+
+
+class CmsCEMap(CmsResourceMap):
+    """
+    A dictionary-like object which maps from the CMS name to the CE name.
+    After the object is created, run the `load` method to perform the lookups;
+    if this is not run, it will be done at the first lookup
+    """
+
+    def load_tuples(self):
+        """
+        Return the results from SiteDBInfo::load_CE.
+
+        Internal method; the bulk of the work is done by CmsResourceMap
+        """
+        return siteDB.load_CE()
+
+class ResourceCmsMap(dict):
+    """
+    This dictionary-like class is the base class for objects which map some
+    resource name to a CMS site name.
+    """
+
+    def __init__(self):
+        self._loaded = False
+        self._tuples = []
+        self._map = {}
+
+    def load_tuples(self):
+        """
+        Return the results from a SiteDB report as provided by SiteDBInfo; this
+        needs to be implemented by the subclass.  See SiteDBInfo::parse_report
+        documentation for the expected result format
+        """
+        raise NotImplementedError()
+
+    def load(self):
+        """
+        Load up the SiteDB information and do some preliminary parsing; this
+        is automatically called by functions which need it, so there's no
+        need to call it directly.
+        """
+        if not self._loaded:
+            self._loaded = True
+            self._tuples = self.load_tuples()
+            for tuple in self._tuples:
+                name, node, resource = tuple
+                self._map[resource] = node
+
+    def __getitem__(self, resource):
+        if not self._loaded:
+            self.load()
+        return self._map.get(resource, resource)
+
+class CECmsMap(ResourceCmsMap):
+    """
+    A dictionary-like class which maps from CE name to CMS name.
+
+    Most of the work is done by the ResourceCmsMap.
+    """
+
+    def load_tuples(self):
+        """
+        Return the results from SiteDBInfo::load_CE.
+
+        Internal method; the bulk of the work is done by ResourceCmsMap.
+        """
+        return siteDB.load_CE()
+
+class SECmsMap(ResourceCmsMap):
+    """
+    A dictionary-like class which maps from CE name to CMS name.
+
+    Most of the work is done by the ResourceCmsMap.
+    """
+
+    def load_tuples(self):
+        """
+        Return the results from SiteDBInfo::load_SE.
+
+        Internal method; the bulk of the work is done by ResourceCmsMap.
+        """
+        return siteDB.load_SE()
+
+if __name__ == '__main__':
+    ce_cms = CECmsMap() 
+    se_cms = SECmsMap()
+    cms_ce = CmsCEMap()
+    cms_se = CmsSEMap()
+    assert ce_cms['blah'] == 'blah'
+    assert ce_cms['red.unl.edu'] == 'T2_US_Nebraska'
+    assert se_cms['srm.unl.edu'] == 'T2_US_Nebraska'
+    assert cms_ce['T2_US_Nebraska'] == 'red.unl.edu'
+    assert cms_se['T2_US_Nebraska'] == 'srm.unl.edu'
+    print "All sites matching 'T2_US_*,T1_US':"
+    print ', '.join(cms_ce.match('T2_US_*,T1_US'))
+
