Replace Specific Document in PDF

Includes:

using Ghostscript.NET;
using Ghostscript.NET.Processor;
using Ghostscript.NET.Rasterizer;

Right now, I am using Ghostscript.Net to merge several single PDFs into a single document:

/// <summary>
/// Ghostscripts the file specified in parameter 1 as a PDF to the file specified in parameter 2
/// </summary>
/// <param name="fileNames">String[]. Array of Full Paths to a file to convert to a single PDF</param>
/// <param name="outputPath">String. Full Path to where Ghostscript will write the PDF</param>
public static void GhostscriptNetJoin(String[] fileNames, String outputPath)
{
    var sb = new StringBuilder();
    foreach (var fileName in fileNames)
    {
        var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("\"{0}\"", fileName);
        sb.Append(source + " ");
    }
    var output_file = (outputPath.IndexOf(' ') == -1) ? outputPath : String.Format("\"{0}\"", outputPath);
    var gsArgs = new List<String>();
    gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
    gsArgs.Add("-dBATCH");
    gsArgs.Add("-q");
    gsArgs.Add("-dNOPAUSE");
    gsArgs.Add("-dNOPROMPT");
    gsArgs.Add("-sDEVICE=pdfwrite");
    gsArgs.Add("-dPDFSETTINGS=/prepress");
    gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
    gsArgs.Add(sb.ToString());
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    using (var processor = new GhostscriptProcessor(version, false))
    {
        processor.Process(gsArgs.ToArray());
    }
}

How could I come back later to REPLACE or UPDATE page N?

I have stubbed out a routine that has my plan, but at this time I do not know how to complete it. Can I supply arg values or is there a different tool I should be using?

/// <summary>
/// Replace Specific Document from source PDF file
/// </summary>
/// <param name="source">String. Full path to the multi-page PDF</param>
/// <param name="documentN">String. Full path to the document to insert</param>
/// <param name="indexN">int. Page Index where the new document should be inserted</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
    var list = new List<String>();
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        // what arguments are needed?
        throw new NotImplementedException("I don't know how to code for this yet.");
        processor.Process(gsArgs.ToArray());
    }
    list.RemoveAt(indexN);
    list.Insert(indexN, documentN);
    var sb = new StringBuilder();
    foreach (var fileName in list)
    {
        var fmtSource = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("\"{0}\"", fileName);
        sb.Append(fmtSource + " ");
    }
    var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("\"{0}\"", source);
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
        gsArgs.Add(sb.ToString());
        processor.Process(gsArgs.ToArray());
    }
}

标签： c# ghostscript ghostscript.net

3条回答

Melony?

2楼-- · 2019-08-15 09:59

I'm going to add an answer based on what I read in baaron's post here:

Convert PDF to JPG / Images without using a specific C# Library

I modified his code, and I think it will satisfy my needs. Like KenS posted in a comment above, though, this will continue to lose quality each time it is run.

/// <summary>
/// Replaces document at provided index with new document.
/// Use with Caution! If you continuously cycle using the output as the input,
/// then you run repeated risks of information or quality loss.
/// </summary>
/// <param name="source">String. Full File Path to Source</param>
/// <param name="documentN">String. Full File Path to new document</param>
/// <param name="indexN">int. Index where file needs to go</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
    var list = new List<String>();
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    var fullPath = Path.GetFullPath(source);
    int index = -1;
    using (var rasterizer = new Ghostscript.NET.Rasterizer.GhostscriptRasterizer())
    {
        rasterizer.Open(source, version, false);
        for (index = 0; index < rasterizer.PageCount; index++)
        {
            if (index != indexN)
            {
                var extracted = Path.Combine(fullPath, String.Format("~1_{0}.jpg", index));
                if (File.Exists(extracted))
                {
                    File.Delete(extracted);
                }
                var img = rasterizer.GetPage(300, 300, index);
                img.Save(extracted, ImageFormat.Jpeg);
                list.Add(extracted);
            } else
            {
                list.Add(documentN);
            }
        }
        if (index == indexN) // occurs if adding a page to the end
        {
            list.Add(documentN);
        }
    }
    var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("\"{0}\"", source);
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        gsArgs.Add("-empty"); // first argument is ignored. REF: https://stackoverflow.com/q/25202577/153923
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
        foreach (var fileName in list)
        {
            var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("\"{0}\"", fileName);
            gsArgs.Add(source);
        }
        processor.Process(gsArgs.ToArray());
    }
    foreach (var fileName in list) // delete the temp files
    {
        File.Delete(fileName);
    }
}

Work has decided to put this off for now because they are not ready to risk losing information quality.

This code, then, is put out there as untested.

In theory, it should work.

If it helps, please let me know. I hate following up with answers to my own questions if no one ever looks at it.

0人赞添加讨论(0) 举报

来，给爷笑一个

3楼-- · 2019-08-15 10:18

You might be able to do something like this (unable to test code right now, but the principle of it checks out based on the Ghostscript.NET repo):

var prcPath = "PATH"; //a path to store the temporary files
var pageCount = GetPDFPageCount(source);
var list = SplitPDFatIndex(source, prcPath, indexN);

private static List<String> SplitPDFatIndex(String pathToFile, String tempPath, int index)
{
    var outList = new List<String>();
    outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, true);
    outlist.Add(null); // Alternatively modify method below to permit pulling page N
    outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, false);

    return outList;
}

private static String SlicePDFatIndex(String pathToFile, String tempPath, int index, bool lessThanIndex)
{
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var pageFrom = 1;
        var pageTo = index - 1;
        var name = tempPath + "temp_left.pdf";

        if (!lessThanIndex)
        {
            pageFrom = index + 1;
            pageTo = pageCount;
            name = tempPath + "temp_right.pdf";
        }

        var gsArgs = new List<String>();
        gsArgs.Add("-empty");
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-f{0}", pathToFile);
        gsArgs.Add("-dFirstPage=" + pageFrom.ToString());
        gsArgs.Add("-dLastPage=" + pageTo.ToString());
        gsArgs.Add(String.Format(@"-sOutputFile={0}", name));
        processor.Process(@"-f{0}", pathToFile);

        return name;
}

private static int GetPDFPageCount(String pathToFile)
{
    var count;
    var GhostscriptViewer viewer;

    viewer = new GhostscriptViewer();
    viewer.ShowPageAfterOpen = false;
    viewer.ProgressiveUpdate = false;
    viewer.Open(source); // try (source, version, false) or (source, version, true) if for some reason it hangs up here
    count = viewer.LastPageNumber;
    viewer.Close()

    return count;
}

0人赞添加讨论(0) 举报

你好瞎i

4楼-- · 2019-08-15 10:21

From my related post:

You could use the PDF Toolkit PDFtk:

Example:

pdftk A=inA.pdf B=inB.pdf cat A1-12 B3 A14-end output out1.pdf

The output consists of the first 12 pages of inA.pdf, followed by page 3 of inB.pdf and then pages 14 until end of inA.pdf.

Many Linux distributions provide a PDFtk package you can download and install using their package manager.

0人赞添加讨论(0) 举报

Replace Specific Document in PDF

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间