from sgmllib import SGMLParser
import urllib
import htmlentitydefs
import shutil
import os
import os.path
from os.path import join, getsize, split
global basePath
global targetPath
global pathVar
global subFolder
global allOldFiles
global allNewFiles
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k=='href']
if href:
self.urls.extend(href)
class BaseHTMLProcessor(SGMLParser):
def reset(self):
self.pieces = []
SGMLParser.reset(self)
def unknown_starttag(self, tag, attrs):
strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
self.pieces.append("<%(tag)s%(strattrs)s>" % locals())
def unknown_endtag(self, tag):
self.pieces.append("%(tag)s>" % locals())
def handle_charref(self, ref):
self.pieces.append("%(ref)s;" % locals())
def handle_entityref(self, ref):
self.pieces.append("&%(ref)s" % locals())
if htmlentitydefs.entitydefs.has_key(ref):
self.pieces.append(";")
def handle_data(self, text):
self.pieces.append(text)
def handle_comment(self, text):
self.pieces.append("" % locals())
def handle_pi(self, text):
self.pieces.append("%(text)s>" % locals())
def handle_decl(self, text):
self.pieces.append("" % locals())
def start_a(self, attrs):
strattrs = ""
for key, value in attrs:
if key == 'href':
value = FileMover().Move(basePath, targetPath, value)
strattrs += "".join(' %s="%s"' % (key, value))
self.pieces.append("" % locals())
def output(self):
"""Return processed HTML as a single string"""
return "".join(self.pieces)
class MofifyFileProcessor(SGMLParser):
def reset(self):
self.pieces = []
SGMLParser.reset(self)
def unknown_starttag(self, tag, attrs):
strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
self.pieces.append("<%(tag)s%(strattrs)s>" % locals())
def unknown_endtag(self, tag):
self.pieces.append("%(tag)s>" % locals())
def handle_charref(self, ref):
self.pieces.append("%(ref)s;" % locals())
def handle_entityref(self, ref):
self.pieces.append("&%(ref)s" % locals())
if htmlentitydefs.entitydefs.has_key(ref):
self.pieces.append(";")
def handle_data(self, text):
self.pieces.append(text)
def handle_comment(self, text):
self.pieces.append("" % locals())
def handle_pi(self, text):
self.pieces.append("%(text)s>" % locals())
def handle_decl(self, text):
self.pieces.append("" % locals())
def start_a(self, attrs):
strattrs = ""
for key, value in attrs:
if key == 'href':
value = self.FindNewLocation(value)
strattrs += "".join(' %s="%s"' % (key, value))
self.pieces.append("" % locals())
def output(self):
"""Return processed HTML as a single string"""
return "".join(self.pieces)
def FindNewLocation(self, fileName):
global allOldFiles
for path, name in allOldFiles:
if name == fileName:
folder = split(path + f)
folder = split(folder[0])
folder = folder[1]
return "../" + folder + "/" + fileName
"""If we don't find it, return original, might be http ref to ms docs"""
return fileName
class FileMover:
def Move(self, base, target, currentFile):
global pathVar
relFile = currentFile
src = base + currentFile
if os.path.isfile(src):
(dirName, fileName) = os.path.split(src)
targetFolder = self.TestIfFileExists(fileName, target)
if targetFolder == "":
targetFolder = self.EnsureCorrectTargetPath(target, pathVar)
dst = targetFolder + fileName
folderName = split(dst)
folderName = split(folderName[0])
folderName = folderName[1]
shutil.copyfile(src, dst)
relFile = "%s/%s" % (folderName, fileName)
# now remove the basePath and return only the relative portion
# which just assumes that the index is in the directory above
return relFile
def TestIfFileExists(self, fileName, target):
global allOldFiles
for path, name in allOldFiles:
if name == fileName:
return path
return ""
def EnsureCorrectTargetPath(self, basePath, varPath):
global pathVar
testPath = "%s%s%s/" % (basePath, subFolder, varPath)
if os.path.isdir(testPath):
file_count = sum((len(f) for _, _, f in os.walk(testPath)))
if file_count > 100:
pathVar += 1
return self.EnsureCorrectTargetPath(basePath, pathVar)
return testPath
os.mkdir(testPath)
return testPath
basePath = "../../../../docs/"
targetPath = "../../../../docs/"
subFolder = "folder"
pathVar = 1
allOldFiles = []
#populate the list of the current files, all files in folderx subfolders
for root, dirs, files in os.walk(basePath):
if 'folder' in root:
if '.svn' not in root:
for f in files:
allOldFiles.append((root + "/", f))
usock = urllib.urlopen("../../../../docs/orgindex.html")
parser = BaseHTMLProcessor()
parser.feed(usock.read())
usock.close()
parser.close()
print "writing new index file index.html"
file = open("../../../../docs/index.html", 'w')
file.write(parser.output())
file.close()
allOldFiles = []
#now walk over the list of files in the foderx subfolders and manage the references in there
for root, dirs, files in os.walk(basePath):
if 'folder' in root:
if '.svn' not in root:
for f in files:
allOldFiles.append((root + "/", f))
for path, name in allOldFiles:
usock = urllib.urlopen(path+name)
parser = MofifyFileProcessor()
parser.feed(usock.read())
usock.close()
parser.close()
print "modifying file: " + path + name
file = open(path+name, 'w')
file.write(parser.output())
file.close()