What I planned was to append <image>@matchedFilePathToAnImageHere</image>
only to those <item></item>
nodes whose values between <name></name>
tags, when converted to lower case, replaced spaces with underscores and etc, would match actual image file names in a separate folder.
The code properly matches about 95% of images to the items, but ends up appending every matched image file name with <image></image>
to the very first <item></item>
.
How would I append every <image></image>
to their appropriate <item></item>
? Every item needs only one image.
Images folder:
name1.jpg
name_2.jpg
name3.jpg
...
name 998.jpg
XML(before parsing):
<items>
<item>
<name>Name1</name>
<price>Price1</price>
<description>Description1</description>
</item>
<item>
<name>Name2</name>
<price>Price2</price>
<description>Description2</description>
</item>
<item>
<name>Name3</name>
<price>Price3</price>
<description>Description3</description>
</item>
</items>
XML(desired result after parsing):
<items>
<item>
<name>name1</name>
<price>Price1</price>
<description>Description1</description>
<image>C:\path\to\name1.jpg</image>
</item>
<item>
<name>Name2</name>
<price>Price2</price>
<description>Description2</description>
<!-- no image file name matched `name2`(command line notice), so skip appending image tags here BUT I add the image tag here later by hand, because I find out that there's an image `name_2.jpg` -->
</item>
<item>
<name>Name3</name>
<price>Price3</price>
<description>Description3</description>
<image>C:\path\to\name3.jpg</image>
</item>
</items>
Code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.IO;
namespace myXmlParser
{
class Program
{
static void Main(string[] args)
{
// load the xml
XmlDocument doc = new XmlDocument();
doc.Load(@"C:\items.xml");
// retrieve the values between <name></name> for the every item element
XmlNodeList nodes = doc.SelectNodes("//item/name/text()");
// convert every extracted name value to lower case
// replace spaces with underscores
// remove the ' symbols
// to have higher chance of matching images' file names
// ",_the" and "_,a" replaces don't seem to work( oh well )
for (int i = 0; i < nodes.Count; i++)
{
// do the magic!
string searchKeyword = nodes.Item(i).Value.Replace(" ", "_").Replace("'","").Replace(",_the",(string)"").Replace(",_a","").ToLower();
//Console.WriteLine(searchKeyword);
// Now find me any images whose filenames match the searchKeyword minus the extensions
string[] filePaths = Directory.GetFiles(@"C:\images", searchKeyword + "*", SearchOption.TopDirectoryOnly);
// if something was found/matched then append <image>@pathToImageHere</image> to the current
// item node, otherwise log any item nodes that didn't have a match to an image
// ! Doesn't APPEND properly !
if (filePaths.Length > 0)
{
XmlDocumentFragment frag = doc.CreateDocumentFragment();
frag.InnerXml = @"<image>" + filePaths[0] + @"</image>";
doc.DocumentElement.FirstChild.AppendChild(frag);
}
else
{
Console.WriteLine("NO IMAGE WAS FOUND!!! {0}", searchKeyword);
}
//Console.WriteLine(filePaths[j]);
//foreach (string filePath in filePaths)
//{
//blah
//}
}
// now save the new parsed xml somewhere
doc.Save("items_with_images.xml");
Console.ReadKey();
}// main
}// class
}// namespace