PDF-ShellTools > Ideas/Suggestions

Batch Create Multipage PDFs based on Part of File name.

(1/4) > >>

RW:
I have a bunch of pdfs labeled like below in the same folder

00001235.001.pdf
00001235.002.pdf
00001235.003.pdf

00001236.001.pdf
00001236.002.pdf
00001236.003.pdf
00001236.004.pdf

with the first 8 characters representing the Document Name and the ".001" representing pg 1 of the document", and ".002" representing pg 2, etc.  I would like to merge all the pdfs that have the same first 8 characters of the file name as one pdf and label the resulting file with the 8 character Document Name, and then move down and merge the next similar Document Named pages, etc.  So after it completed this batch process, it would result in having two multipage pdfs with first being a 3 page pdf and being saved with the file name "00001235.pdf", and the next would be a 4 page pdf and being saved as "00001236.pdf"  Of course most of my folders have thousands of pdfs in them, onoy have with a few hundred different Document Name, except for the page numbers that follow.

Ultimately, it would be nice if you could tell it to merge all pdfs that have the same "X" # of characters, not just be limited to 8 characters, as some may only have 5 similar characters or 10 similar.

One other option to add, would be to have the ability to say merge all pdfs with same Document Name before the first Delimiter that you could choose, as some people may have files labeled like this:  Image1_0001.pdf, image1_0002.pdf and some like this Summer Pics_001.pdf, Summer Pics_002.pdf, Summer Pics_003.pdf  and all being in the same folder and would want to merge all similar Document names that are in front of the underscore. Since these Document names have different # of characters before the underscore, the character count wouldn’t work.  The results would be two files with one being a 2 page document labeled "image1.pdf" and the other being a 3 page document saved as "Summer Pics.pdf" 

If this function could also be used for Tifs or jpgs, and convert to pdf and then merge these into a multipage pdfs, would be really nice.  Since many people have have all sorts of different file types in one folder (pdfs, tif, jpg).

Having the ability to do this in the context menu and in command line would be ideal.  Even adding this function to PDF Explorer would be beneficial, especially if you could add a column to the Explorer window that would count the # of characters of the file name, then you could sort on this column, select the files and merge accordingly.


RTT:
A my scripts script will achieve this easily. I'm going to code a sample one and post it here soon.
Are the result PDFs to be saved at the same folder of the source files?

RTT:
Here's a script that, I think, covers all the above scenarios.To have it at your system, just import the attached MergeByFilenamePartMatch.myscript file, into your PDF-ShellTools list of My Scripts.

--- Code: ---var ListOfFiles = pdfe.Arguments;
var SortedFilesIndexArray = GetSortedFilesIndexArray(ListOfFiles);
var ExcludedNodeName = 'Excluded';
var MergeGroups = {};
var nValidMerges = 0;
var OutputFolder = null;
var LastGroupingMethodFunction = applyBtn_ByNChars_onClick;

//Create the GUI
var html = function() {
/*<!DOCTYPE html>
<html>
<head>
<title>Merge by filename part match</title>
<style>
.fixed-opt-pannel {
  position: fixed;
  top: 0;
  left: 0;
  z-index: 9999;
  width: 100%;
  height: 50px;
  background-color: #E6E6FA;
}
label {
  padding-left:10px;
  display: inline-block;
  width: 120px;
}
#row {
    white-space: nowrap;
}
#row > div {
    display: inline-block;
    margin-top:3px;
}
#row > div + div {
    margin-left: 10px
}
</style>
</head>
<body bgcolor="#E6E6FA">
<nav class="fixed-opt-pannel">
<div style="float:right;margin-top:2px;margin-right:10px;width:100px;">
<button type="button" style="Width:100px;" onClick="window.open('http://www.rttsoftware.com/forum.php?dsturl=http%3A%2F%2Fwww.rttsoftware.com%2Fforum%2Findex.php%3Ftopic%3D503.msg1377')">Help</button>
<button type="button" id="MergeBtn" style="Width:100px;margin-top:2px">Merge</button>
</div>
<div id="row">
<div>
<label for="nChars">By first n chars:</label><input name="nChars" id="nChars" value="8" type="number" onkeypress='return event.charCode >= 48 && event.charCode <= 57'></input>
<input type="button" id="applyBtn_ByNChars" value="Apply"><br>
<label for="sepChar">By separator:</label><input name="sepChar" id="sepChar"  value="_" type="text"></input>
<input type="button" id="applyBtn_BySeparator" value="Apply">
</div>
<div>
<fieldset>
<legend>Output folder:</legend>
<input type="radio" id="ofsame" name="outfolder" value="Same" checked>Same
<input type="radio" id="ofspecify" name="outfolder" value="Specify">Specify
</fieldset>
</div>
</div>
</nav>
<div style="margin-top:55px;">
<select style="width:100%;" name="MergeGroups" id="MergeGroups">
</div>       
</body>
</html>
*/}.toString().replace(/^[^\/]+\/\*!?/, '').replace(/\*\/[^\/]+$/, '');

var objIE = pdfe.CreateObject("InternetExplorer.Application", "objIE_");
objIE.toolbar = false;
objIE.Visible = true;
pdfe.BringWindowToFront(objIE.HWND /*, true*/ );
objIE.Navigate("about:blank");
IE_waitLoad(objIE);
objIE.Document.writeln(html);
objIE.Refresh();
IE_waitLoad(objIE);

//buttons click event functions
function applyBtn_ByNChars_onClick(e) {
    ShowGroups(partial(NameByFirstNCharacters, parseInt(objIE.document.getElementById('nChars').value)));
    LastGroupingMethodFunction = applyBtn_ByNChars_onClick;
}
objIE.document.getElementById('applyBtn_ByNChars').onclick = applyBtn_ByNChars_onClick;

function applyBtn_BySeparator_onClick(e) {
    ShowGroups(partial(NameBySeparator, objIE.document.getElementById('sepChar').value));
    LastGroupingMethodFunction = applyBtn_BySeparator_onClick;
}
objIE.document.getElementById('applyBtn_BySeparator').onclick = applyBtn_BySeparator_onClick;

function SpecifyOutFolder_onclick(e) {
    if (objIE.document.getElementById('ofspecify').checked) {
        var folder = BrowseForFolder(objIE.HWND, 'Select output folder');
        if (folder != null) {
            if (OutputFolder != folder) {
                OutputFolder = folder;
                LastGroupingMethodFunction();
            }
        } else if (OutputFolder == null) {
            objIE.document.getElementById('ofsame').checked = true;
        }
    } else if (OutputFolder != null) {
        OutputFolder = null;
        LastGroupingMethodFunction();
    }

};
objIE.document.getElementById('ofspecify').onclick = SpecifyOutFolder_onclick;
objIE.document.getElementById('ofsame').onclick = SpecifyOutFolder_onclick;

//handle the deletion of files from group with the DEL keypress event
function onMergeGroupKeyDown(e) {
    var sel = e.target ? e.target : e.srcElement;
    if (e.keyCode && e.keyCode == 46 || e.which == 46) { //if DEL key
        if (sel.selectedIndex >= 0 && ExcludedNodeName !== sel.options[sel.selectedIndex].parentNode.label) { //if not from the excluded node
            //remove from MergeGroups
            var optgroup = sel.options[sel.selectedIndex].parentNode;
            var index = findIndex(sel.options[sel.selectedIndex]);
            var ExcludedFileIndex = MergeGroups[optgroup.label].splice(index, 1)[0];
            objIE.document.getElementById('MergeBtn').disabled = --nValidMerges == 0;
            //remove from the html selector
            sel.remove(sel.selectedIndex);
            if (MergeGroups[optgroup.label].length == 0) {
                sel.removeChild(optgroup);
                sel.size--;
                delete MergeGroups[optgroup.label];
            }

            //add the removed file to the excluded node list   
            if (!(ExcludedNodeName in MergeGroups)) {
                MergeGroups[ExcludedNodeName] = [];
                optgroup = objIE.document.createElement("OPTGROUP");
                optgroup.label = ExcludedNodeName;
                sel.insertBefore(optgroup, sel.children[0]);
                sel.size++;
            } else optgroup = sel.options[0].parentNode;

            MergeGroups[ExcludedNodeName].push(ExcludedFileIndex);
            //now in the html
            var option = objIE.document.createElement("OPTION");
            var filename = ListOfFiles(ExcludedFileIndex);
            option.title = htmlspecialchars(filename);
            option.text = htmlspecialchars(filename.substring(filename.lastIndexOf('\\') + 1));
            optgroup.appendChild(option);

        }
    }
};
objIE.document.getElementById("MergeGroups").onkeydown = onMergeGroupKeyDown;

//Where the files merge happens
function Merge_onclick() {
    BrowserRunning = false;
    objIE.Visible = false;
    objIE.Quit();

    var Merger = pdfe.CreateDocumentMerger();
    var ProgressBar = pdfe.ProgressBar;
    ProgressBar.max = nValidMerges;

    for (var groupFilename in MergeGroups)
    if (groupFilename != ExcludedNodeName) {
        pdfe.echo('>' + groupFilename);
        for (var ii = 0; ii < MergeGroups[groupFilename].length; ii++) {
            ProgressBar.position++;
            var srcFilename = ListOfFiles(MergeGroups[groupFilename][ii]);
            pdfe.echo('   Merging: ' + srcFilename);
            if (Merger.MergeDocument(srcFilename)) pdfe.echo(' [OK]', 0, true)
            else pdfe.echo(' [Failed]', 0xFF0000, true);
        }
        //save the new file
        if (!Merger.EndAndSaveTo(groupFilename)) pdfe.echo(' [Failed]', 0xFF0000);
        pdfe.echo('');
    }
    pdfe.echo('Done');
}
objIE.document.getElementById('MergeBtn').onclick = Merge_onclick;

function GetSortedFilesIndexArray(FilesList) {
    var filesIndexArray = new Array(FilesList.length);
    for (var i = 0; i < FilesList.length; i++) {
        filesIndexArray[i] = i;
    }
    filesIndexArray.sort(function(a, b) {
        return FilesList(a) > FilesList(b) ? 1 : FilesList(a) < FilesList(b) ? -1 : 0;
    });
    return filesIndexArray;
};

//Compute filename by start number of characters
function NameByFirstNCharacters(nChars, filename) {
    var name = filename.substring(filename.lastIndexOf('\\') + 1, filename.lastIndexOf('.'));
    if (nChars && name.length > nChars) return (OutputFolder ? OutputFolder : filename.substring(0, filename.lastIndexOf('\\') + 1)) + name.substr(0, nChars) + '.pdf'
    else return '';
}

//Compute filename by separator
function NameBySeparator(separator, filename) {
    var name = filename.substring(filename.lastIndexOf('\\') + 1, filename.lastIndexOf('.'));
    var s = name.substring(0, name.indexOf(separator));
    if (s.length > 0) return (OutputFolder ? OutputFolder : filename.substring(0, filename.lastIndexOf('\\') + 1)) + s + '.pdf'
    else return '';
}

//Returns the list of files grouped by equal part off the filename, part computed by the passed GetNameFunction 
function GetMergeGroups(GetNameFunction) {
    var groups = {}
    groups[ExcludedNodeName] = [];
    nValidMerges = 0;
    for (var i = 0; i < SortedFilesIndexArray.length; i++) {
        var name = GetNameFunction(ListOfFiles(SortedFilesIndexArray[i]));
        if (name.length == 0) name = ExcludedNodeName
        else nValidMerges++;
        if (!(name in groups)) {
            groups[name] = [];
        }
        groups[name].push(SortedFilesIndexArray[i]);
    }
    if (nValidMerges == SortedFilesIndexArray.length) delete groups[ExcludedNodeName];
    return groups;
}

//Compute the merge groups, and show it in the GUI
function ShowGroups(Namefunct) {
    MergeGroups = GetMergeGroups(Namefunct);
    var groupsHtml = '';
    for (var item in MergeGroups) {
        groupsHtml += '<optgroup  label="' + item + '">\n';
        for (var ii = 0; ii < MergeGroups[item].length; ii++) {
            var filename = ListOfFiles(MergeGroups[item][ii]);
            filename = filename.substring(filename.lastIndexOf('\\') + 1);
            groupsHtml += '<option title="' + htmlspecialchars(ListOfFiles(MergeGroups[item][ii])) + '">' + htmlspecialchars(filename) + '<img src="dummy.gif" width="16" height="16">';
        }
        groupsHtml += '\n</optgroup>\n';
    }
    var selector = objIE.document.getElementById('MergeGroups');
    selector.innerHTML = groupsHtml;
    selector.size = selector.length + selector.children.length;
    objIE.document.getElementById('MergeBtn').disabled = nValidMerges == 0;
}

//start with the default grouping method
ShowGroups(partial(NameByFirstNCharacters, 8));

//pass control to the GUI
var BrowserRunning = true;
while (BrowserRunning && objIE.Visible) {
    pdfe.Sleep(500);
}

function objIE_OnQuit() {
    BrowserRunning = false;
}

/*************************************************************************/
/*************************************************************************/

//http://stackoverflow.com/questions/373157/how-can-i-pass-a-reference-to-a-function-with-parameters
function partial(func) {
    var args = new Array();
    for (var i = 1; i < arguments.length; i++) {
        args.push(arguments[i]);
    }
    return function() {
        var allArguments = args.concat(Array.prototype.slice.call(arguments));
        return func.apply(this, allArguments);
    }
}

function htmlspecialchars(str) {
    if (typeof(str) == "string") {
        str = str.replace(/&/g, "&amp;"); /* must do &amp; first */
        str = str.replace(/"/g, "&quot;");
        str = str.replace(/'/g, "&#039;");
        str = str.replace(/</g, "&lt;");
        str = str.replace(/>/g, "&gt;");
    }
    return str;
}

//Wait until Internet Explorer document loading is complete.
function IE_waitLoad(pIE) {
    var stat, dstart;
    stat = 0;
    while (true) {
        if (stat == 0) {
            if (!pIE.Busy) {
                if (pIE.Document.readyState == "complete") {
                    dstart = new Date().getTime();
                    stat = 1;
                }
            }
        } else {
            if (!pIE.Busy && pIE.Document.readyState == "complete") {
                if (new Date().getTime() >= dstart + 50) {
                    break;
                }
            } else {
                stat = 0;
            }
        }
        pdfe.sleep(50)
    }
}

//Finds the index of an html element in the parent children's list
function findIndex(node) {
    var i = 0,
        prev = node.previousElementSibling;

    if (prev) {
        do ++i;
        while (prev = prev.previousElementSibling);
    } else {
        while (node = node.previousSibling) {
            if (node.nodeType === 1) {
                ++i;
            }
        }
    }
    return i;
}

function BrowseForFolder(HWND, sTitle, rootFolder) {
    var objShell = new ActiveXObject("shell.application");
    var ofolder = objShell.BrowseForFolder(HWND, sTitle, 0x00000001, rootFolder);
    if (ofolder != null) return ofolder.Self.Path + (ofolder.Self.Path.charAt(ofolder.Self.Path.length - 1) == '\\' ? "" : "\\");
    else return null;
}

--- End code ---

RW:
thank you very much.

nightslayer23:
I can;t get this t work..
Isn't it supposed to merge files? Mine is a preview only, nothing happens when you hit "merge"

Navigation

[0] Message Index

[#] Next page

Go to full version