Technology Blogs by SAP
Learn how to extend and personalize SAP applications. Follow the SAP technology blog for insights into SAP BTP, ABAP, SAP Analytics Cloud, SAP HANA, and more.
cancel
Showing results for 
Search instead for 
Did you mean: 

    前一篇博客已经对常用的CE function的功能以及使用作了简要介绍。本篇博客将通过几个测试场景,来对几个常用的CE function与其相对应的SQL查询语句进行性能的对比。通过这些测试场景的测试,我们可以得出一些相应的tips,可以作为相关的调优点,在对SQL Procedure的性能调优时选择使用。


CE_UNION_ALL

      CE_UNION_ALL函数的功能是实现两个表变量的UNION ALL 操作,即合并操作,并将结果绑定到一个新的表变量上。在SPS6 版本之前,即使是列表UNION  ALL的操作还是转换到row  engine上执行。在SPS6 版本之后,对其进行了优化,使其可以在column engine上执行。另外需要注意的是,对于SQLScript的代码的执行,互相独立即没有依赖的语句会被并行的执行。因此,进行UNION ALL的两部分一般被认为是被并行执行的。

    例如以下这个例子,分别使用CE_UNION_ALL()UNION ALL对两张表进行合并操作,进行合并操作的两张表数据量分别为189W15W



drop procedure union_all_test_1;
create procedure union_all_test_1(out trans transport)
language sqlscript as
begin         
d5300_transport = CE_COLUMN_TABLE("D5300", ["ACT_GI_DATE", "TRANSPORT_ID", "MATERIAL", "BATCH", "SOURCE_PLANT", "SOURCE_STORE", "TARGET_PLANT", "TARGET_STORE", "TOTAL_COST", "WEIGHT"]);
                             
d5200_transport = CE_COLUMN_TABLE("D5200", ["ACT_GI_DATE", "TRANSPORT_ID", "MATERIAL", "BATCH", "SOURCE_PLANT", "SOURCE_STORE", "DISTR_CHAN", "SALES_OFF", "TOTAL_COST", "WEIGHT"]);
                             
d5200_transport2 = CE_PROJECTION(:d5200_transport, ["ACT_GI_DATE", "TRANSPORT_ID", "MATERIAL", "BATCH", "SOURCE_PLANT", "SOURCE_STORE","DISTR_CHAN" AS "TARGET_PLANT", "SALES_OFF" AS "TARGET_STORE", "TOTAL_COST", "WEIGHT"]);
                             
trans = CE_UNION_ALL(:d5300_transport,:d5200_transport2);
end;


dropprocedure union_all_test_2;
create procedure union_all_test_2(out trans transport)
language sqlscript as
begin
        
trans =
select "ACT_GI_DATE", "TRANSPORT_ID", "MATERIAL", "BATCH", "SOURCE_PLANT", "SOURCE_STORE",  "TARGET_PLANT", "TARGET_STORE", "TOTAL_COST", "WEIGHT" from "D5300"
union all
select "ACT_GI_DATE", "TRANSPORT_ID", "MATERIAL", "BATCH", "SOURCE_PLANT", "SOURCE_STORE", "DISTR_CHAN"  AS "TARGET_PLANT", "SALES_OFF" AS "TARGET_STORE", "TOTAL_COST", "WEIGHT" from "D5200";  
                      
end;

    在HANA Studio中调用两个存储过程所用时间分别如下图所示:

    可见,在这个测试场景中,两种方式所使用的时间几乎相差不多。


CE_AGGREGATION

      CE_AGGREGATION函数的功能,是通过给出列对数据分组,并对每个组进行统计运算。其功能相当于GROUP BY语句和聚合函数结合使用。

      例如,以下例子中,分别使用CE_AGGREGATION()GROUP BY对一个1184W条记录的表进行GROUP BY操作,并求每组的COUNT()值。



dropprocedure groupby_test_1;
create procedure groupby_test_1(out tmp2 groupby)
language sqlscript as
begin
tmp1 = CE_COLUMN_TABLE("RECORD",["USERID","CAMPID","MEDIAID","TYPE"]);
tmp2 = CE_AGGREGATION (:TMP1,[COUNT(*) AS CNT], ["USERID","CAMPID","MEDIAID","TYPE"]);
end;


dropprocedure groupby_test_2;
create procedure groupby_test_2(out tmp1 groupby)
language sqlscript as
begin
tmp1 = SELECT COUNT(*) AS CNT,USERID,CAMPID,MEDIAID,TYPE FROM "RECORD" GROUP BY USERID,CAMPID,MEDIAID,TYPE;   
end;

        HANA Studio中调用两个存储过程所用时间分别如下图所示:

        可见,对于GROUP BY操作,这两种方式所使用的时间几乎相差不多。

CE_JOIN    

         CE_JOIN函数的功能是根据给出的连接条件,实现两个表变量的内连接操作,即 INNER JOIN的操作。

     例如,如下的例子根据上一个例子中GROUP BY所得的结果,分别使用CE_JOIN()INNER JOIN进行内连接操作。进行内连接的两部分分别有1087W数据量。



dropprocedure join_test_1;
create procedure join_test_1(out join_result innerjoin)
language sqlscript as
begin
tmp1 = CE_COLUMN_TABLE("RECORD",["USERID","CAMPID","MEDIAID","TYPE"]);
tmp2 = CE_AGGREGATION (:TMP1,[COUNT(*) AS CNT], ["USERID","CAMPID","MEDIAID","TYPE"]);
tmp3 = CE_PROJECTION(:tmp2,["USERID","MEDIAID" as "MEDIAID1"]);
tmp4 = CE_PROJECTION(:tmp2,["USERID","MEDIAID" as "MEDIAID2"]);
join_result = CE_JOIN(:tmp3,:tmp4,["USERID"],["USERID","MEDIAID1","MEDIAID2"]);
                 
end;

            


dropprocedure join_test_2;
create procedure join_test_2(out join_result innerjoin)
language sqlscript as
begin
tmp1 = SELECT USERID,CAMPID,MEDIAID,TYPE FROM "RECORD" GROUP BY USERID,CAMPID,MEDIAID,TYPE;   
join_result = select f.userid,f.mediaid as mediaid1,l.mediaid as mediaid2 from :tmp1 as f join :tmp1 as l on f.userid=l.userid;
end;


      HANA Studio中调用两个存储过程所用时间分别如下图所示:

      可见,使用CE_JOIN比使用INNER  JOIN明显性能较高,使用INNER JOIN 所用时间几乎为使用CE_JOIN的两倍。

注意

      根据上边的例子,对于与CE function其实现相同功能的SQL语句,其性能相差不大。但在某些情况下CE function的性能优于其等价的SQL语句,甚至性能相差明显。

      但同时需注意,对于CE function的使用时,因为其使用的限制,每个CE function返回的结果必须绑定到一个表变量上,使得每次使用都要将结果数据具体化。因此对于较复杂的表操作,要使用多步的CE function才能实现,这些都可能带来性能上的损耗。所以,对于CE functionSQL语句的选择上,CE function的性能并不一定优于SQL语句。

想获取更多SAP HANA学习资料或有任何疑问,请关注新浪微博@HANAGeek!我们欢迎你的加入!