#!/usr/bin/env python # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """This program converts a Subversion WC from 1.7-dev format 18 to 1.7-dev format 19 by migrating data from multiple DBs to a single DB. Usage: bump-to-19.py WC_ROOT_DIR where WC_ROOT_DIR is the path to the WC root directory. Skips non-WC dirs and WC dirs that are not at format 18.""" # TODO: Detect '_svn' as an alternative to '.svn'. # TODO: Probably should remove any directory that is in state to-be-deleted # and doesn't have its 'keep_local' flag set. Otherwise it will # become unversioned after commit, whereas format-18 and earlier would # have deleted it after commit. Before deleting we should check there # are no unversioned things inside, and maybe even check for "local # mods" even though that's logically impossible. On the other hand # it's not a big deal for the user to clean these up manually. import sys, os, shutil, sqlite3 dot_svn = '.svn' def dotsvn_path(wc_path): return os.path.join(wc_path, dot_svn) def db_path(wc_path): return os.path.join(wc_path, dot_svn, 'wc.db') def pristine_path(wc_path): return os.path.join(wc_path, dot_svn, 'pristine') def tmp_path(wc_path): return os.path.join(wc_path, dot_svn, 'tmp') class NotASubversionWC(Exception): def __init__(self, wc_path): self.wc_path = wc_path def __str__(self): return "not a Subversion WC: '" + self.wc_path + "'" class WrongFormatException(Exception): def __init__(self, wc_dir, format): self.wc_dir = wc_dir self.format = format def __str__(self): return "format is " + str(self.format) + " not 18: '" + self.wc_dir + "'" STMT_COPY_BASE_NODE_TABLE_TO_WCROOT_DB1 = \ "INSERT OR REPLACE INTO root.BASE_NODE ( " \ " wc_id, local_relpath, repos_id, repos_relpath, parent_relpath, " \ " presence, kind, revnum, checksum, translated_size, changed_rev, " \ " changed_date, changed_author, depth, symlink_target, last_mod_time, " \ " properties, dav_cache, incomplete_children, file_external ) " \ "SELECT wc_id, ?1, repos_id, repos_relpath, ?2 AS parent_relpath, " \ " presence, kind, revnum, checksum, translated_size, changed_rev, " \ " changed_date, changed_author, depth, symlink_target, last_mod_time, " \ " properties, dav_cache, incomplete_children, file_external " \ "FROM BASE_NODE WHERE local_relpath = ''; " STMT_COPY_BASE_NODE_TABLE_TO_WCROOT_DB2 = \ "INSERT INTO root.BASE_NODE ( " \ " wc_id, local_relpath, repos_id, repos_relpath, parent_relpath, " \ " presence, kind, revnum, checksum, translated_size, changed_rev, " \ " changed_date, changed_author, depth, symlink_target, last_mod_time, " \ " properties, dav_cache, incomplete_children, file_external ) " \ "SELECT wc_id, ?1 || '/' || local_relpath, repos_id, repos_relpath, " \ " ?1 AS parent_relpath, " \ " presence, kind, revnum, checksum, translated_size, changed_rev, " \ " changed_date, changed_author, depth, symlink_target, last_mod_time, " \ " properties, dav_cache, incomplete_children, file_external " \ "FROM BASE_NODE WHERE local_relpath != ''; " STMT_COPY_WORKING_NODE_TABLE_TO_WCROOT_DB1 = \ "INSERT OR REPLACE INTO root.WORKING_NODE ( " \ " wc_id, local_relpath, parent_relpath, presence, kind, checksum, " \ " translated_size, changed_rev, changed_date, changed_author, depth, " \ " symlink_target, copyfrom_repos_id, copyfrom_repos_path, copyfrom_revnum, " \ " moved_here, moved_to, last_mod_time, properties, keep_local ) " \ "SELECT wc_id, ?1, ?2 AS parent_relpath, " \ " presence, kind, checksum, " \ " translated_size, changed_rev, changed_date, changed_author, depth, " \ " symlink_target, copyfrom_repos_id, copyfrom_repos_path, copyfrom_revnum, " \ " moved_here, moved_to, last_mod_time, properties, keep_local " \ "FROM WORKING_NODE WHERE local_relpath = ''; " STMT_COPY_WORKING_NODE_TABLE_TO_WCROOT_DB2 = \ "INSERT INTO root.WORKING_NODE ( " \ " wc_id, local_relpath, parent_relpath, presence, kind, checksum, " \ " translated_size, changed_rev, changed_date, changed_author, depth, " \ " symlink_target, copyfrom_repos_id, copyfrom_repos_path, copyfrom_revnum, " \ " moved_here, moved_to, last_mod_time, properties, keep_local ) " \ "SELECT wc_id, ?1 || '/' || local_relpath, ?1 AS parent_relpath, " \ " presence, kind, checksum, " \ " translated_size, changed_rev, changed_date, changed_author, depth, " \ " symlink_target, copyfrom_repos_id, copyfrom_repos_path, copyfrom_revnum, " \ " moved_here, moved_to, last_mod_time, properties, keep_local " \ "FROM WORKING_NODE WHERE local_relpath != ''; " STMT_COPY_ACTUAL_NODE_TABLE_TO_WCROOT_DB1 = \ "INSERT OR REPLACE INTO root.ACTUAL_NODE ( " \ " wc_id, local_relpath, parent_relpath, properties, " \ " conflict_old, conflict_new, conflict_working, " \ " prop_reject, changelist, text_mod, tree_conflict_data, " \ " conflict_data, older_checksum, left_checksum, right_checksum ) " \ "SELECT wc_id, ?1, ?2 AS parent_relpath, properties, " \ " conflict_old, conflict_new, conflict_working, " \ " prop_reject, changelist, text_mod, tree_conflict_data, " \ " conflict_data, older_checksum, left_checksum, right_checksum " \ "FROM ACTUAL_NODE WHERE local_relpath = ''; " STMT_COPY_ACTUAL_NODE_TABLE_TO_WCROOT_DB2 = \ "INSERT INTO root.ACTUAL_NODE ( " \ " wc_id, local_relpath, parent_relpath, properties, " \ " conflict_old, conflict_new, conflict_working, " \ " prop_reject, changelist, text_mod, tree_conflict_data, " \ " conflict_data, older_checksum, left_checksum, right_checksum ) " \ "SELECT wc_id, ?1 || '/' || local_relpath, ?1 AS parent_relpath, properties, " \ " conflict_old, conflict_new, conflict_working, " \ " prop_reject, changelist, text_mod, tree_conflict_data, " \ " conflict_data, older_checksum, left_checksum, right_checksum " \ "FROM ACTUAL_NODE WHERE local_relpath != ''; " STMT_COPY_LOCK_TABLE_TO_WCROOT_DB = \ "INSERT INTO root.LOCK " \ "SELECT * FROM LOCK; " STMT_COPY_PRISTINE_TABLE_TO_WCROOT_DB = \ "INSERT OR REPLACE INTO root.PRISTINE " \ "SELECT * FROM PRISTINE; " STMT_SELECT_SUBDIR = \ "SELECT 1 FROM BASE_NODE WHERE local_relpath=?1 AND kind='subdir'" \ "UNION " \ "SELECT 0 FROM WORKING_NODE WHERE local_relpath=?1 AND kind='subdir';" def copy_db_rows_to_wcroot(wc_subdir_relpath): """Copy all relevant table rows from the $PWD/WC_SUBDIR_RELPATH/.svn/wc.db into $PWD/.svn/wc.db.""" wc_root_path = '' wc_subdir_path = wc_subdir_relpath wc_subdir_parent_relpath = os.path.dirname(wc_subdir_relpath) try: db = sqlite3.connect(db_path(wc_subdir_path)) except: raise NotASubversionWC(wc_subdir_path) c = db.cursor() c.execute("ATTACH '" + db_path(wc_root_path) + "' AS 'root'") ### TODO: the REPOSITORY table. At present we assume there is only one # repository in use and its repos_id is consistent throughout the WC. # That's not always true - e.g. "svn switch --relocate" creates repos_id # 2, and then "svn mkdir" uses repos_id 1 in the subdirectory. */ c.execute(STMT_COPY_BASE_NODE_TABLE_TO_WCROOT_DB1, (wc_subdir_relpath, wc_subdir_parent_relpath)) c.execute(STMT_COPY_BASE_NODE_TABLE_TO_WCROOT_DB2, (wc_subdir_relpath, )) c.execute(STMT_COPY_WORKING_NODE_TABLE_TO_WCROOT_DB1, (wc_subdir_relpath, wc_subdir_parent_relpath)) c.execute(STMT_COPY_WORKING_NODE_TABLE_TO_WCROOT_DB2, (wc_subdir_relpath, )) c.execute(STMT_COPY_ACTUAL_NODE_TABLE_TO_WCROOT_DB1, (wc_subdir_relpath, wc_subdir_parent_relpath)) c.execute(STMT_COPY_ACTUAL_NODE_TABLE_TO_WCROOT_DB2, (wc_subdir_relpath, )) c.execute(STMT_COPY_LOCK_TABLE_TO_WCROOT_DB) c.execute(STMT_COPY_PRISTINE_TABLE_TO_WCROOT_DB) db.commit() db.close() def move_and_shard_pristine_files(old_wc_path, new_wc_path): """Move all pristine text files from 'OLD_WC_PATH/.svn/pristine/' into 'NEW_WC_PATH/.svn/pristine/??/', creating shard dirs where necessary.""" old_pristine_dir = pristine_path(old_wc_path) new_pristine_dir = pristine_path(new_wc_path) if not os.path.exists(old_pristine_dir): # That's fine, assuming there are no pristine texts. return for basename in os.listdir(old_pristine_dir): shard = basename[:2] if shard == basename: # already converted continue old = os.path.join(old_pristine_dir, basename) new = os.path.join(new_pristine_dir, shard, basename) os.renames(old, new) def select_subdir(wc_subdir_path): """ Return True if wc_subdir_path is a known to be a versioned subdir, False otherwise.""" try: db = sqlite3.connect(db_path('')) except: raise NotASubversionWC(wc_subdir_path) c = db.cursor() c.execute(STMT_SELECT_SUBDIR, (wc_subdir_path,)) if c.fetchone() is None: return False else: return True def migrate_wc_subdirs(wc_root_path): """Move Subversion metadata from the admin dir of each subdirectory below WC_ROOT_PATH into WC_ROOT_PATH's own admin dir.""" old_cwd = os.getcwd() os.chdir(wc_root_path) # Keep track of which dirs we've migrated so we can delete their .svn's # afterwards. Done this way because the tree walking is top-down and if # we deleted the .svn before walking into the subdir, it would look like # an unversioned subdir. migrated_subdirs = [] # For each directory in the WC, try to migrate each of its subdirs (DIRS). # Done this way because (a) os.walk() gives us lists of subdirs, and (b) # it's easy to skip the WC root dir. for dir_path, dirs, files in os.walk('.'): # don't walk into the '.svn' subdirectory try: dirs.remove(dot_svn) except ValueError: # a non-WC dir: don't walk into any subdirectories print("skipped: ", NotASubversionWC(dir_path)) del dirs[:] continue # Try to migrate each other subdirectory for dir in dirs[:]: # copy so we can remove some wc_subdir_path = os.path.join(dir_path, dir) if wc_subdir_path.startswith('./'): wc_subdir_path = wc_subdir_path[2:] if not select_subdir(wc_subdir_path): print("skipped:", wc_subdir_path) dirs.remove(dir) continue try: check_wc_format_number(wc_subdir_path) print("migrating '" + wc_subdir_path + "'") copy_db_rows_to_wcroot(wc_subdir_path) move_and_shard_pristine_files(wc_subdir_path, '.') migrated_subdirs += [wc_subdir_path] except (WrongFormatException, NotASubversionWC) as e: print("skipped:", e) # don't walk into it dirs.remove(dir) continue # Delete the remaining parts of the migrated .svn dirs # Make a note of any problems in deleting. failed_delete_subdirs = [] for wc_subdir_path in migrated_subdirs: print("deleting " + dotsvn_path(wc_subdir_path)) try: os.remove(db_path(wc_subdir_path)) if os.path.exists(pristine_path(wc_subdir_path)): os.rmdir(pristine_path(wc_subdir_path)) shutil.rmtree(tmp_path(wc_subdir_path)) os.rmdir(dotsvn_path(wc_subdir_path)) except Exception as e: print(e) failed_delete_subdirs += [wc_subdir_path] # Notify any problems in deleting if failed_delete_subdirs: print("Failed to delete the following directories. Please delete them manually.") for wc_subdir_path in failed_delete_subdirs: print(" " + dotsvn_path(wc_subdir_path)) os.chdir(old_cwd) def check_wc_format_number(wc_path): """Check that the WC format of the WC dir WC_PATH is 18. Raise a WrongFormatException if not.""" try: db = sqlite3.connect(db_path(wc_path)) except sqlite3.OperationalError: raise NotASubversionWC(wc_path) c = db.cursor() c.execute("PRAGMA user_version;") format = c.fetchone()[0] db.commit() db.close() if format != 18: raise WrongFormatException(wc_path, format) def bump_wc_format_number(wc_path): """Bump the WC format number of the WC dir WC_PATH to 19.""" try: db = sqlite3.connect(db_path(wc_path)) except sqlite3.OperationalError: raise NotASubversionWC(wc_path) c = db.cursor() c.execute("PRAGMA user_version = 19;") db.commit() db.close() if __name__ == '__main__': if len(sys.argv) != 2: print(__doc__) sys.exit(1) wc_root_path = sys.argv[1] try: check_wc_format_number(wc_root_path) except (WrongFormatException, NotASubversionWC) as e: print("error:", e) sys.exit(1) print("merging subdir DBs into single DB '" + wc_root_path + "'") move_and_shard_pristine_files(wc_root_path, wc_root_path) migrate_wc_subdirs(wc_root_path) bump_wc_format_number(wc_root_path)