Next I will explain you a solution that may come handy to you. First of all, the problem. Quite simple: you have some HTML pages, that you would like to include them in your pages, but in a different format/layout. Unfortunately you cannot modify the pages as you’re not the author. Just to understand better, let’s say that you want to include some Wikipedia articles in your pages. But they have to match with your own layout and you want only some sections of them.
So you have to transform an HTML into another. This sounds pretty much like XSLT and this is what we will use. It will be your task to write the XSLT. I’m only going to tell you how to integrate in your web application.
For the sake of simplicity and reuse, we will write an ASP.net handler for XSLT file that it will take the XSLT file and receive as a parameter the XML/HTML/XHTML URL and it will output the transformed result.
Find below the code for it:
public class XSLTHandler : IHttpHandler
{
// process the request
public void ProcessRequest(HttpContext context)
{
// the XSLT argument list
XsltArgumentList xslArg = new XsltArgumentList();
String url = context.Request.QueryString.Get("u");
// if no URL, either special or from u parameter then send a NOT_FOUND error
if (url == null)
{
send(context.Response, HttpStatusCode.NotFound);
return;
}
// adding the URL as an XSLT parameter
xslArg.AddParam("url", "", url);
try
{
// load the XML document at the specified URL
XmlDocument xmlDoc = loadXMLDocument(url,
"POST".Equals(context.Request.HttpMethod)
? context.Request.Form.ToString() : null);
// load xsl
String xslPath = context.Request.MapPath(context.Request.Url.LocalPath);
// try to get the compiled XSLT from cache
XslCompiledTransform xslTransform = (XslCompiledTransform)context.Cache.Get(xslPath);
if (xslTransform == null)
{
// if the XSLT file doesn't exist, send a 404
if (!File.Exists(xslPath))
{
send(context.Response, HttpStatusCode.NotFound);
return;
}
xslTransform.Load(xslPath);
// put the XSLT into cache
context.Cache.Insert(xslPath, xslTransform, new CacheDependency(xslPath));
}
// transform
context.Response.ContentType = "text/html";
context.Response.Clear();
xslTransform.Transform(xmlDoc, xslArg, context.Response.Output);
}
catch (HttpException exc)
{
if (exc.GetHttpCode() == (int) HttpStatusCode.Redirect)
{
// if it is to be redirected, redirect to the url
context.Response.Redirect(url);
}
else
{
// if any other error, then send the error code
send(context.Response, (HttpStatusCode)exc.GetHttpCode());
}
}
catch (Exception)
{
// if other error send an internal server error response
send(context.Response, HttpStatusCode.InternalServerError);
}
}
// IHttpHandler method
public bool IsReusable
{
get
{
return false;
}
}
private void send(HttpResponse Response, HttpStatusCode StatusCode)
{
Response.Clear();
Response.StatusCode = (int)StatusCode;
Response.StatusDescription = StatusCode.ToString();
Response.End();
}
///
/// Load and returns an XML document from the given url.
///
/// the url where is the XML content
/// if not null the request will be a POST request
/// and the parameters are sent along with the request; if null then the request method is GET
/// the loaded XML document
public static XmlDocument loadXMLDocument(String url, String postParams)
{
Object reader;
if (url.StartsWith("/") || (!url.Contains(Uri.SchemeDelimiter)))
{
// if it is a local request then use the internal execute instead of making a web request
// also the form and query string parameters are preserved
StringWriter writer = new StringWriter();
HttpContext.Current.Server.Execute(url, writer, true);
reader = new StringReader(writer.ToString());
}
else
{
// create webrequest
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = RequestTimeout;
// you can also add a proxy
//request.Proxy = WebProxy;
if (postParameters != null)
{
// Set values for the request back
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded";
request.ContentLength = postParameters.Length;
// Write the request
StreamWriter stOut = new StreamWriter(request.GetRequestStream(), System.Text.Encoding.ASCII);
stOut.Write(postParameters);
stOut.Close();
}
// get the response of the request
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
// if something went wrong don't try to parse the document
if (response.StatusCode != HttpStatusCode.OK)
throw new HttpException((int)response.StatusCode, response.StatusDescription);
if (response.ContentType.StartsWith("text/html"))
{
// if it is HTML transform it with SgmlReader
// create instance of SGMLReader and change settings
SgmlReader sgmlReader = new SgmlReader();
sgmlReader.WebProxy = Configuration.Proxy;
sgmlReader.DocType = "HTML";
sgmlReader.CaseFolding = CaseFolding.ToLower;
sgmlReader.InputStream = new StreamReader(response.GetResponseStream(), Encoding.UTF8, true);
reader = sgmlReader;
}
else if (response.ContentType.StartsWith("text/xml"))
{
// if not html simply get it from the response
reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8, true);
}
else
{
// redirect to the normal page, as it is nor HTML nor XML
throw new HttpException((int)HttpStatusCode.Redirect, url);
}
}
// load the XML document
XmlDocument xmlDoc = new XmlDocument();
if (reader is TextReader)
xmlDoc.Load((TextReader)reader);
else if (reader is XmlReader)
xmlDoc.Load((XmlReader)reader);
return xmlDoc;
}
}
To explain shortly what happens: when a file like t.xslt is requested, the XSLTHandler will process the request. First it will load and compile the XSLT file using XSLTCompiledTransform, trying first to load the XSLT compiled object from cache.
Next step is to retrieve the XML/XHTML/HTML content from the URL given as the url parameter. If the content is HTML, then it will be transformed using an SGML parser into XHTML, so it can be transformed with XSLT. Then the actual processing takes place and the result is generated into the response output.
Of course the above code can be extended. You can put in the app configuration if you want to use the cache, what proxy to use, if you want to allow only URLs from some hosts to be parsed and so on. Also other parameters can be added as XSLT arguments: the URL of the XSLT file, the HTTP headers, the request parameters etc.
You saw in the code references to an SgmlReader and I also told you above about feeding the HTML content first into an SGML parser. But how to do it? The SgmlReader is already implemented and you can download it from here and use it like an XmlReader.
Last step is to configure the handler into the Web.config, by simply adding the handler to process *.xsl files:
<httpHandlers>
<add verb="*" path="*.xsl" type="XSLTHandler" />
</httpHandlers>
That’s it!