extract image from word file

2019-01-07 20:41发布

问题:

I have been trying the following C# code to extract image from the doc file but it is not working:

object missing = System.Reflection.Missing.Value;            
            Microsoft.Office.Interop.Word.Application oWord = new Microsoft.Office.Interop.Word.Application();
            Microsoft.Office.Interop.Word.Document oDoc = new Microsoft.Office.Interop.Word.Document();
            oWord.Visible = false;
            object str1 = "C:\\doc.doc";
            oDoc = oWord.Documents.Open(ref str1, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing);

            if (oDoc.InlineShapes.Count > 0)            {


                for (int j = 0; j < oDoc.InlineShapes.Count; j++)
                {  

                    oWord.ActiveDocument.Select();
                    oDoc.ActiveWindow.Selection.CopyAsPicture();

                    IDataObject data = Clipboard.GetDataObject();                    

                    if (data.GetDataPresent(typeof(System.Drawing.Bitmap)))
                    {
                        object bm = data.GetData(DataFormats.Bitmap);

                        Bitmap bmp;
                        bmp = (Bitmap)data.GetData(typeof(System.Drawing.Bitmap));

                        bmp.Save("C:\\test.bmp");
                    }



                }

Can anybody give the proper code for extracting the image from word file?

回答1:

using System;
using System.Drawing;
using System.IO;
using System.Threading;
using Page = System.Web.UI.Page;
using Microsoft.Office.Interop.Word;
using Microsoft.VisualBasic.Devices;
public partial class ReadIMG : System.Web.UI.Page
{   
    private Application m_word;
    private int m_i;
    protected void Page_Load(object sender, EventArgs e)
    {
        object missing = Type.Missing;
        object FileName = Server.MapPath("~/LectureOrig/Word.docx");
        object readOnly = true;
        m_word = new Application();
        m_word.Documents.Open(ref FileName,
                                ref missing, ref readOnly, ref missing, ref missing,
                                ref missing, ref missing, ref missing, ref missing,
                                ref missing, ref missing, ref missing, ref missing, ref missing,ref missing,ref missing);
        try
        {
            for (int i = 1; i <= m_word.ActiveDocument.InlineShapes.Count; i++)
            {
                m_i = i;
               // CopyFromClipboardShape();
                Thread thread = new Thread(CopyFromClipbordInlineShape);
                thread.SetApartmentState(ApartmentState.STA);
                thread.Start();
                thread.Join();
            }
        }
        finally
        {
            object save = false;
            m_word.Quit(ref save, ref missing, ref missing);
            m_word = null;
        }
    }
    protected void CopyFromClipbordInlineShape()
    {   
        InlineShape inlineShape = m_word.ActiveDocument.InlineShapes[m_i];
        inlineShape.Select();
        m_word.Selection.Copy();
        Computer computer = new Computer();
        //Image img = computer.Clipboard.GetImage();
        if (computer.Clipboard.GetDataObject() != null)
        {
            System.Windows.Forms.IDataObject data = computer.Clipboard.GetDataObject();
            if (data.GetDataPresent(System.Windows.Forms.DataFormats.Bitmap))
            {
                Image image = (Image)data.GetData(System.Windows.Forms.DataFormats.Bitmap, true);                
                image.Save(Server.MapPath("~/ImagesGet/image.gif"), System.Drawing.Imaging.ImageFormat.Gif);
                image.Save(Server.MapPath("~/ImagesGet/image.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);

            }
            else
            {
                LabelMessage.Text="The Data In Clipboard is not as image format";
            }
        }
        else
        {
            LabelMessage.Text="The Clipboard was empty";
        }
    }

Code copy from How To Exctract images from Doc (Word) file in C#?



回答2:

Another option if it's a .docx file:

  1. Rename the file to a .zip
  2. Extract the contents
  3. Look for the following directory in the extracted folder word/media

Yeah, it's not the C# way to do it as posted, but even writing the code to perform the 3 steps above would be a way of automating the process if that's what you are looking for.



回答3:

Here's a local/non-web-page version.

Most of this code is copied from: http://www.csharphelp.com/2007/05/save-picture-from-clipboard-to-file-using-c/ - plus a few lines from Ekk's answer.

InlineShape inlineShape = m_word.ActiveDocument.InlineShapes[m_i];
inlineShape.Select();
m_word.Selection.Copy();
if (Clipboard.GetDataObject() != null)
{
    IDataObject data = Clipboard.GetDataObject();

    if (data.GetDataPresent(DataFormats.Bitmap))
    {
        Image image = (Image)data.GetData(DataFormats.Bitmap,true);

        image.Save("image.bmp",System.Drawing.Imaging.ImageFormat.Bmp);
        image.Save("image.jpg",System.Drawing.Imaging.ImageFormat.Jpeg);
        image.Save("image.gif",System.Drawing.Imaging.ImageFormat.Gif);
    }
    else
    {
        MessageBox.Show("The Data In Clipboard is not as image format");
    }
}
else
{
    MessageBox.Show("The Clipboard was empty");
}


回答4:

I had the same problem I used spire library and i got the solution i am giving the link of that library use just add that dll files in your visual studio and copy the below code :

enter code here



        if (file.ShowDialog() == DialogResult.OK) //if there is a file choosen by the user  
        {
            object path = file.FileName; //get the path of the file  
            object readOnly = true;

            Spire.Doc.Document document = new Spire.Doc.Document(file.FileName);
            int index = 1;

            //Get Each Section of Document  
            foreach (Spire.Doc.Section section in document.Sections)
            {
                //Get Each Paragraph of Section  
                foreach (Spire.Doc.Documents.Paragraph paragraph in section.Paragraphs)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.AppendLine(paragraph.Text);//storing the text of word in string builder
                    Console.WriteLine(sb);
                    //Get Each Document Object of Paragraph Items  
                    foreach (DocumentObject docObject in paragraph.ChildObjects)
                    {
                        //If Type of Document Object is Picture, Extract.  
                        if (docObject.DocumentObjectType == DocumentObjectType.Picture)
                        {
                            DocPicture pic = docObject as DocPicture;

                            String imgName = String.Format(@"E:\C#\OnlineExam\Question\{0}.png", index);

                            //Save Image  
                            pic.Image.Save(imgName, System.Drawing.Imaging.ImageFormat.Png);
                            index++;
                        }
                    }
                }
            }}

You can find dll files from this link