
When dealing with external text data such as RSS/Atom Feeds you often get articles that look ugly and is pretty hard to read. Text information in feeds are often stripped of any HTML/CSS formatting to keep the payload down, but if you want to display that text on your web site it often does not look good.
I wanted to create a function that at least split up large text chunks into paragraphs so that it becomes more readable. That is of course if there is no HTML formatting already.
/// <summary>
/// Splits text into 200chars text chunks separated by <br /><br />
/// </summary>
/// <param name="text">The text.</param>
/// <returns></returns>
public static string HtmliFyText(string text)
{
if(text.Contains("<br>") || text.Contains("<br/>") || text.Contains("<br />") || text.Contains("<p>"))
{
return text;
}
const int threshold = 200;
const string sep = "<br /><br />"; //the html tag that separates paragraphs
int currentChunkLen = 0;
StringBuilder outs = new StringBuilder();
string[] chunks = Regex.Split(text, @"\.\s"); // split on ". "
foreach (string chunk in chunks)// loop all chunks/paragraphs
{
if (chunk.Length > 0)
{
var len = chunk.Length; //how long is the current paragraph
if (len + currentChunkLen >= threshold) //have a big enough chunk?
{
outs.Append(chunk +". "+ sep); //close current paragraph
currentChunkLen = 0;
}
else
{
currentChunkLen += len;//calc new length of current paragraph
outs.Append(chunk);//append to current paragraph
}
}
}
return outs.ToString();
}