#!/usr/bin/perl # File : fixdups.pl # Author: Lyndon Hill, http://www.lyndonhill.com # Note : This script is free to use. If you make significant improvements please # send me a copy. # # Script to fix duplicate entries (generate duplicate list first) # Usage: # fixdups.pl # # A temporary file "dupscript" will be created. Please delete it. # The output of the script will be a file with the name dictionary-new # where "dictionary" is the name you specify in the $dictionary variable # a few lines below. # User editable variables # the dictionary file with duplicates $dictionary = "en-ka.bedic"; # the name of the duplicate list file $duplicatelist = "duplicate-list"; # End of user editable variables # Load in duplicate list open(DUPLIST, $duplicatelist); $duplicates = 0; $i = 0; while() { $line = $_; chop $line; ($blah1, $cutstart[$duplicates], $blah2, $cutend[$duplicates], $blah3, $blah4, $entrystart, $blah4, $destination[$duplicates]) = split / /, $line; # find parameters $duplicates++; } close(DUPLIST); # modify line numbers to take into account cut and paste for($i = 0; $i < $duplicates; $i++) { for($j = $i+1; $j < $duplicates; $j++) { # shift every following duplicate up by two lines $cutstart[$j] -= 2; $cutend[$j] -= 2; if($destination[$j] > $destination[$i]) { if($destination[$j] > $cutend[$i]) { # small shift if straightforward merging $destination[$j] -= 2; } else { # shift down destinations for entries after paste position (ordered merging) $destination[$j] += ($cutend[$i] - $cutstart[$i]); } } # make multiple merges point to end of current merge if($destination[$j] == $destination[$i]) { $destination[$j] = $destination[$j] + ($cutend[$i] - $cutstart[$i]); } } } # output modification file open(DUPMODIFY, ">dupscript"); print DUPMODIFY "H\n"; for($i = 0; $i < $duplicates; $i++) { print DUPMODIFY ($cutstart[$i]-1) . ",$cutstart[$i]d\n"; # delete entry line at duplicate and blank line before print DUPMODIFY ($cutstart[$i]-1) . "," . ($cutend[$i]-2) . "m" . ($destination[$i]) . "\n"; # move body of duplicate to end of original entry } print DUPMODIFY "w $dictionary-new\n"; print DUPMODIFY "q\n"; close(DUPMODIFY); # perform the moves using ed `ed - $dictionary < dupscript`; print "Dictionary file duplicate entries now joined together.\n"; exit;