If we have uneven number of columns coming in the source file and if it varies time to time, then we can load the data into a single column and then use the below function to split it into multiple columns
SQL Server
When it comes to SQL Server we can find the position of the substring or in our case delimiter is done using built-in function CHARINDEX but unlike oracle charindex does not allow nth occurrence, so we need to use recursive common table expression (CTE) to achieve it as shown below.
Note: we can use PATINDEX if we need to use wild card in our search criteria
So to achieve our goal we need to create a function
Script:
Create function nth_stringvalue( @InputString varchar(max),@delimiter varchar(50),@occurrence int)
returns varchar(max)
as
begin
declare @StartPosition int,
@EndPosition int,
@Outputstring varchar(max)
--recursive CTE
;with T as (
select @InputString as col1,charindex(@delimiter,@InputString) as pos,1 as occurrence
union all
select @InputString,charindex(@delimiter,@InputString,pos+1) as pos,occurrence+1 as occurrence from T
where charindex(@delimiter,@InputString,pos+1)>0
)
--get start position and end position
select @StartPosition=(select pos from T where occurrence=@occurrence) ,
@EndPosition=(select pos from T where occurrence=@occurrence+1)
from T
where occurrence=1 --just to limit the number of iterations of assignment
-- data before the delimiter
IF @occurrence=0
begin
set @Outputstring= SUBSTRING(@InputString,0,@EndPosition)
end
-- data after the last delimiter
else if @EndPosition is null
begin
set @Outputstring= SUBSTRING(@InputString,@StartPosition+len(@delimiter),len(@InputString)-(@StartPosition))
end
--other data
else
begin
set @Outputstring= SUBSTRING(@InputString,@StartPosition+len(@delimiter),@EndPosition-(@StartPosition+len(@delimiter)))
end
return @Outputstring
end
Now that we have a scalar function all we have to do is to access the data from delimited file.
The syntax for the function will look like “select [dbo].[nth_occurrence_string](Mycolumn,Delmiter,occurrence) from Mytable”
Example:
select [dbo].[nth_occurrence_string]('SAM_ROB_PAT_BOB','_',1) -- will return “ROB”
select [dbo].[nth_occurrence_string]('SAM_ROB_PAT_BOB','_',0) -- will return “SAM”
select [dbo].[nth_occurrence_string]('SAM_ROB_PAT_BOB','_',3) -- will return “BOB”
Oracle
In Oracle we can find the position of the substring or in our case delimiter is done using built-in oracle function INSTR, unlike charindex function in SQL Server, instr function allows nth occurrence
So to achieve our goal we need to create a function
Script:
CREATE OR REPLACE
FUNCTION NTH_STRINGVALUE( INPUTSTRING VARCHAR2,DELIMITER VARCHAR2,OCCURRENCE NUMBER)
RETURN VARCHAR2 IS OUTPUTSTRING VARCHAR2(5000);STARTPOSITION NUMBER;
ENDPOSITION Number;
Begin
IF OCCURRENCE<>0
THEN
SELECT INSTR(INPUTSTRING,DELIMITER,1,OCCURRENCE) INTO STARTPOSITION FROM DUAL;
end if;
select instr(InputString,DELIMITER,1,occurrence+1) into ENDPOSITION from dual;
-- data before the delimiter
IF OCCURRENCE=0
THEN
select SUBSTR(InputString,0,ENDPOSITION-LENGTH(DELIMITER)) into Outputstring from dual;
-- data after the last delimiter
ELSIF (nvl(STARTPOSITION,0)>0 AND ENDPOSITION=0)
THEN
select SUBSTR(InputString,StartPosition+length(delimiter),length(InputString)) into Outputstring from dual;
--other data
else
SELECT SUBSTR(INPUTSTRING,STARTPOSITION+LENGTH(DELIMITER),ENDPOSITION-(STARTPOSITION+LENGTH(DELIMITER))) INTO OUTPUTSTRING FROM DUAL;
END IF;
RETURN OUTPUTSTRING;
END NTH_STRINGVALUE;
Now that we have a scalar function all we have to do is to access the data from delimited file.
The syntax for the function will look like “select [nth_occurrence_string](Mycolumn,Delmiter,occurrence) from Mytable”
Example:
select [nth_occurrence_string]('SAM_ROB_PAT_BOB','_',1) from dual -- will return “ROB”
select [nth_occurrence_string]('SAM_ROB_PAT_BOB','_',0) from dual -- will return “SAM”
select [nth_occurrence_string]('SAM_ROB_PAT_BOB','_',3) from dual -- will return “BOB”
Thank you for reading this article,
check out my other SQL Server vs. Oracle posts.