Parsrovani HTML je easy jen musis umet pouzit jiz vymyslene
z webu si stahni knihovnu http://developer.mindtouch.com/en/docs/SgmlReader hned na te uvodni strance mas i jednoduchy navod jak ji pouzit ja ti dodam vytah z meho kodu, zjistis ze se jedna o totez.
{
WebClient wc=new WebClient();
Uri u=new Uri("http://....");
String tx=wc.DownloadString(u); // kompletni kod stranky
String xmlTx=getXhtmlResponse(tx);
XmlDocument xDoc=new XmlDocument();
xDoc.LoadXml(xmlTx);
String xmlPath="rootTag/nextTag"; // skladba XPath je jiny hrnecek s kavou
NodeList nl= xDoc.SelectNodes(xmlPath );
Node n=xDoc.SelectSingleNode(xmlPath);
}
private string getXhtmlResponse(String html)
{
string xhtml = "";
try
{
string strOutputXhtml = String.Empty;
SgmlReader reader = new SgmlReader();
reader.DocType = "HTML";
StringReader sr = new System.IO.StringReader(html);
reader.InputStream = sr;
StringWriter sw = new StringWriter();
XmlTextWriter w = new XmlTextWriter(sw);
reader.Read();
while (!reader.EOF)
{
w.WriteNode(reader, true);
}
w.Flush();
w.Close();
xhtml = sw.ToString();
xhtml = @"<?xml version=""1.0"" encoding=""windows-1250""?>" + xhtml;
}
catch (Exception)
{
}
return xhtml;
}
Hodne stesti