PDF-ShellTools > Ideas/Suggestions

Rasterizing PDFs?

(1/4) > >>

nightslayer23:
Got any thoughts on a way to rasterize PDF documents?
Would look to highlight a group of files and right click, select rasterize.
Would be for very heavily layered files that take too long to load / print.

RTT:
The next script rasterizes all the document pages into image files, using the extract images tool command line interface, and then convert all these image files to a PDF with the same name as the original and a "_rasterized.pdf" suffix.


--- Code: ---var RenderDPIs = 120;

//RunAsync = true: Rendered pages image files are converted to PDF as created.
//RunAsync = false: Wait until all the PDF pages have being rendered, before converting all back to PDF
var RunAsync = true;

//================================================================================
var fso = new ActiveXObject("Scripting.FileSystemObject");
var objShell = new ActiveXObject("Wscript.Shell");

var st_exe = fso.GetParentFolderName(pdfe.FullName) + '\\pdfshelltools.exe';
var cmd = '"' + st_exe + '" ExtractImages -s "OutputPath=' + fso.GetSpecialFolder(2 /*TemporaryFolder*/ ) + '\\\\" ExtractType=0 ImageType=3 RenderDPIs=' + RenderDPIs + ' NamePrefix=';

function ExtractImages_GetImageFilePrefix(filename, WaitOnReturn) {
    var NamePrefix = fso.GetTempName();
    objShell.Run(cmd + NamePrefix + ' "' + filename + '"', 0, WaitOnReturn);
    return fso.GetSpecialFolder(2 /*TemporaryFolder*/ ) + '\\' + NamePrefix;
}

var Merger = pdfe.CreateDocumentMerger();
var ProgressBar = pdfe.ProgressBar;
ProgressBar.max = pdfe.SelectedFiles.Count;

for (var i = 0; i < pdfe.SelectedFiles.Count; i++) {
    ProgressBar.position = i + 1;
    try {
        var File = pdfe.SelectedFiles(i),
            Filename = File.Filename,
            Pages = File.Pages;
        var Path = Filename.substr(0, Filename.lastIndexOf('\\') + 1),
            Name = Filename.substring(Path.length, Filename.lastIndexOf('.'));

        pdfe.echo(' > rasterizing ' + Filename);
        pdfe.echo(' ');
        var ImageFilesPrefix = ExtractImages_GetImageFilePrefix(Filename, !RunAsync);

        for (var PageIndex = 0; PageIndex < Pages.Count; PageIndex++) {
            var imgfilename = ImageFilesPrefix + pad(PageIndex, 4) + '.png';
            var OKtoMerge = false;
            if (RunAsync) {
                pdfe.echo(' Page ' + (PageIndex + 1) + '/' + Pages.Count, 0, 2);

                //wait until page rendered image file has been created
                while (!fso.FileExists(imgfilename)) {
                    pdfe.sleep(1000);
                }
                //wait until image file not in use.
                while (true) {
                    try {
                        var ots = fso.opentextfile(imgfilename, 8, false);
                        ots.close();
                        break;
                    } catch (e) {
                        pdfe.sleep(1000);
                    }
                }

                OKtoMerge = true;
            } else {
                OKtoMerge = fso.FileExists(imgfilename);
            }

            if (OKtoMerge) {
                Merger.MergeDocument(imgfilename);
                fso.DeleteFile(imgfilename);
            } else {
                pdfe.echo('     Page ' + (PageIndex + 1) + ' failed to render', 0xFF0000, 2);
                pdfe.echo(' ');
            }
        }

        var NewFilename = Path + Name + '_rasterized.pdf';
        if (Merger.EndAndSaveTo(NewFilename)) {
            pdfe.echo('     Saved to: ' + NewFilename + ' [OK]', 0, 2)
        } else {
            pdfe.echo('     Saving to: ' + NewFilename + ' [Failed]', 0xFF0000, 2);
        }
    } catch (e) {
        pdfe.echo(e.message, 0xFF0000);
    }
}
pdfe.echo('Done');

function pad(num, size) {
    var s = "000000000" + num;
    return s.substr(s.length - size);
}

--- End code ---

Not entirely sure if this is what you are asking for. Let me know if not.
In the first line of the script you may change the RenderDPIs variable, if you need higher resolution.

nightslayer23:
This doesn't work for me..

So, the extract images tool doesn't work - sort of. It looks like it does something but no output file, no error..
The extract images part works, but not extract pages part.

Any thoughts?

RTT:
Check now. I've made a little modification in the above script extract images command line parameters, in order to handle paths with spaces properly. I was testing with my PDF-ShellTools installed in a non-standard folder, so missed that issue.

If the problem continues, delete the -s parameter from the var cmd = '"' + st_exe + '" ExtractImages -s "OutputPath=' + fso.GetSpecialFolder(2 /*TemporaryFolder*/ ) + '\\\\" ExtractType=0 ImageType=3 RenderDPIs=' + RenderDPIs + ' NamePrefix='; line, and run the script with a single PDF. This way, the extract images tool will run in GUI mode, so you can check if it is working or not. It should show the thumbnails of each of the PDF pages. If yes, just hit the "extract" button, and the script will do its job and create a PDF with these rasterized page images. If not, let me know the details.

nightslayer23:
I managed to get a different file to work with the image extractor, but it is only letting me do a PNG?

Navigation

[0] Message Index

[#] Next page

Go to full version