HeavenCore | IT Solutions Built on Open Source Software
View Thread
 Print Thread
Function to strip HTML and leave content in plain text
admin
Function to strip HTML and leave content in plain text - handy for dropping html content into excel etc

Credit to: http://blog.sqlauthority.com/


SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO


CREATE FUNCTION [dbo].[udf_StripHTML]
(
@HTMLText varchar(8000)
)
RETURNS varchar(8000)
AS
BEGIN
DECLARE @Start  int
DECLARE @End    int
DECLARE @Length int

-- Replace the HTML entity & with the '&' character (this needs to be done first, as-- '&' might be double encoded as '&')
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
   SET @Start = CHARINDEX('&', @HTMLText)
   SET @End = @Start + 4
   SET @Length = (@End - @Start) + 1
END

-- Replace the HTML entity &lt; with the '<' character
SET @Start = CHARINDEX('&lt;', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '<')
   SET @Start = CHARINDEX('&lt;', @HTMLText)
   SET @End = @Start + 3
   SET @Length = (@End - @Start) + 1
END

-- Replace the HTML entity &gt; with the '>' character
SET @Start = CHARINDEX('&gt;', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '>')
   SET @Start = CHARINDEX('&gt;', @HTMLText)
   SET @End = @Start + 3
   SET @Length = (@End - @Start) + 1
END

-- Replace the HTML entity &amp; with the '&' character
SET @Start = CHARINDEX('&amp;amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
   SET @Start = CHARINDEX('&amp;amp;', @HTMLText)
   SET @End = @Start + 4
   SET @Length = (@End - @Start) + 1
END

-- Replace the HTML entity &nbsp; with the ' ' character
SET @Start = CHARINDEX('&nbsp;', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ' ')
   SET @Start = CHARINDEX('&nbsp;', @HTMLText)
   SET @End = @Start + 5
   SET @Length = (@End - @Start) + 1
END

-- Replace any <br> tags with a comma and a space (mainly for excel purposes)
SET @Start = CHARINDEX('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ', ')
   SET @Start = CHARINDEX('<br>', @HTMLText)
   SET @End = @Start + 3
   SET @Length = (@End - @Start) + 1
END

-- Replace any <br/> tags with a newline
SET @Start = CHARINDEX('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ', ')
   SET @Start = CHARINDEX('<br/>', @HTMLText)
   SET @End = @Start + 4
   SET @Length = (@End - @Start) + 1
END

-- Replace any <br /> tags with a newline
SET @Start = CHARINDEX('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ', ')
   SET @Start = CHARINDEX('<br />', @HTMLText)
   SET @End = @Start + 5
   SET @Length = (@End - @Start) + 1
END

-- Replace any <br> tags with a newline
--SET @Start = CHARINDEX('<br>', @HTMLText)
--SET @End = @Start + 3
--SET @Length = (@End - @Start) + 1
--WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
--   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, CHAR(13) + CHAR(10))
--   SET @Start = CHARINDEX('<br>', @HTMLText)
--   SET @End = @Start + 3
--   SET @Length = (@End - @Start) + 1
--END

-- Replace any <br/> tags with a newline
--SET @Start = CHARINDEX('<br/>', @HTMLText)
--SET @End = @Start + 4
--SET @Length = (@End - @Start) + 1
--WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
--   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
--   SET @Start = CHARINDEX('<br/>', @HTMLText)
--   SET @End = @Start + 4
--   SET @Length = (@End - @Start) + 1
--END

-- Replace any <br /> tags with a newline
--SET @Start = CHARINDEX('<br />', @HTMLText)
--SET @End = @Start + 5
--SET @Length = (@End - @Start) + 1
--WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
--   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
--   SET @Start = CHARINDEX('<br />', @HTMLText)
--   SET @End = @Start + 5
--   SET @Length = (@End - @Start) + 1
--END

-- Remove anything between <whatever> tags
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
   SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '')
   SET @Start = CHARINDEX('<', @HTMLText)
   SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
   SET @Length = (@End - @Start) + 1
END
RETURN LTRIM(RTRIM(@HTMLText))
END

GO

Edited by admin on 26-06-2010 19:00
Jordon Pilling | Heavencore Administrator
 
Jump to Forum