from sgmllib import SGMLParser import urllib import htmlentitydefs import shutil import os import os.path from os.path import join, getsize, split global basePath global targetPath global pathVar global subFolder global allOldFiles global allNewFiles class URLLister(SGMLParser): def reset(self): SGMLParser.reset(self) self.urls = [] def start_a(self, attrs): href = [v for k, v in attrs if k=='href'] if href: self.urls.extend(href) class BaseHTMLProcessor(SGMLParser): def reset(self): self.pieces = [] SGMLParser.reset(self) def unknown_starttag(self, tag, attrs): strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs]) self.pieces.append("<%(tag)s%(strattrs)s>" % locals()) def unknown_endtag(self, tag): self.pieces.append("" % locals()) def handle_charref(self, ref): self.pieces.append("&#%(ref)s;" % locals()) def handle_entityref(self, ref): self.pieces.append("&%(ref)s" % locals()) if htmlentitydefs.entitydefs.has_key(ref): self.pieces.append(";") def handle_data(self, text): self.pieces.append(text) def handle_comment(self, text): self.pieces.append("" % locals()) def handle_pi(self, text): self.pieces.append("" % locals()) def handle_decl(self, text): self.pieces.append("" % locals()) def start_a(self, attrs): strattrs = "" for key, value in attrs: if key == 'href': value = FileMover().Move(basePath, targetPath, value) strattrs += "".join(' %s="%s"' % (key, value)) self.pieces.append("" % locals()) def output(self): """Return processed HTML as a single string""" return "".join(self.pieces) class MofifyFileProcessor(SGMLParser): def reset(self): self.pieces = [] SGMLParser.reset(self) def unknown_starttag(self, tag, attrs): strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs]) self.pieces.append("<%(tag)s%(strattrs)s>" % locals()) def unknown_endtag(self, tag): self.pieces.append("" % locals()) def handle_charref(self, ref): self.pieces.append("&#%(ref)s;" % locals()) def handle_entityref(self, ref): self.pieces.append("&%(ref)s" % locals()) if htmlentitydefs.entitydefs.has_key(ref): self.pieces.append(";") def handle_data(self, text): self.pieces.append(text) def handle_comment(self, text): self.pieces.append("" % locals()) def handle_pi(self, text): self.pieces.append("" % locals()) def handle_decl(self, text): self.pieces.append("" % locals()) def start_a(self, attrs): strattrs = "" for key, value in attrs: if key == 'href': value = self.FindNewLocation(value) strattrs += "".join(' %s="%s"' % (key, value)) self.pieces.append("" % locals()) def output(self): """Return processed HTML as a single string""" return "".join(self.pieces) def FindNewLocation(self, fileName): global allOldFiles for path, name in allOldFiles: if name == fileName: folder = split(path + f) folder = split(folder[0]) folder = folder[1] return "../" + folder + "/" + fileName """If we don't find it, return original, might be http ref to ms docs""" return fileName class FileMover: def Move(self, base, target, currentFile): global pathVar relFile = currentFile src = base + currentFile if os.path.isfile(src): (dirName, fileName) = os.path.split(src) targetFolder = self.TestIfFileExists(fileName, target) if targetFolder == "": targetFolder = self.EnsureCorrectTargetPath(target, pathVar) dst = targetFolder + fileName folderName = split(dst) folderName = split(folderName[0]) folderName = folderName[1] shutil.copyfile(src, dst) relFile = "%s/%s" % (folderName, fileName) # now remove the basePath and return only the relative portion # which just assumes that the index is in the directory above return relFile def TestIfFileExists(self, fileName, target): global allOldFiles for path, name in allOldFiles: if name == fileName: return path return "" def EnsureCorrectTargetPath(self, basePath, varPath): global pathVar testPath = "%s%s%s/" % (basePath, subFolder, varPath) if os.path.isdir(testPath): file_count = sum((len(f) for _, _, f in os.walk(testPath))) if file_count > 100: pathVar += 1 return self.EnsureCorrectTargetPath(basePath, pathVar) return testPath os.mkdir(testPath) return testPath basePath = "../../../../docs/" targetPath = "../../../../docs/" subFolder = "folder" pathVar = 1 allOldFiles = [] #populate the list of the current files, all files in folderx subfolders for root, dirs, files in os.walk(basePath): if 'folder' in root: if '.svn' not in root: for f in files: allOldFiles.append((root + "/", f)) usock = urllib.urlopen("../../../../docs/orgindex.html") parser = BaseHTMLProcessor() parser.feed(usock.read()) usock.close() parser.close() print "writing new index file index.html" file = open("../../../../docs/index.html", 'w') file.write(parser.output()) file.close() allOldFiles = [] #now walk over the list of files in the foderx subfolders and manage the references in there for root, dirs, files in os.walk(basePath): if 'folder' in root: if '.svn' not in root: for f in files: allOldFiles.append((root + "/", f)) for path, name in allOldFiles: usock = urllib.urlopen(path+name) parser = MofifyFileProcessor() parser.feed(usock.read()) usock.close() parser.close() print "modifying file: " + path + name file = open(path+name, 'w') file.write(parser.output()) file.close()