New subject: GPC preformance kludge with local

7 Jun 2003


      Note the following lsiting, and resulting assembly code (i686, Linux):
You might think that the function is very optimized, since it requires
only two comparisons and a lookup in table per character checked?
Alas, GPC does a proper call to the real memcpy() function of complete
``v'' array on each call of function DigitValue() !!!
This makes the run twice as slow as when ``v'' array is made global
(edited):
-----------------------------------------------------
make test arr a=8
116.290u 0.400s 1:59.62 97.5%   0+0k 0+0io 10385pf+0w
time a.out arr a=8
114.190u 0.280s 1:58.53 96.5%   0+0k 0+0io 120pf+0w
114.500u 0.210s 2:03.95 92.5%   0+0k 0+0io 120pf+0w
114.650u 0.040s 1:57.13 97.9%   0+0k 0+0io 121pf+0w
114.840u 0.020s 1:57.00 98.1%   0+0k 0+0io 120pf+0w
115.180u 0.040s 2:05.03 92.1%   0+0k 0+0io 120pf+0w
106.200u 1.390s 2:07.25 84.5%   0+0k 0+0io 120pf+0w
111.700u 0.080s 2:04.02 90.1%   0+0k 0+0io 120pf+0w
make test arr a=8 glob
65.840u 0.160s 1:07.62 97.6%    0+0k 0+0io 10385pf+0w
time a.out arr a=8 glob
63.610u 0.060s 1:06.63 95.5%    0+0k 0+0io 120pf+0w
63.710u 0.030s 1:05.21 97.7%    0+0k 0+0io 120pf+0w
63.620u 0.040s 1:06.02 96.4%    0+0k 0+0io 120pf+0w
63.610u 0.030s 1:05.41 97.2%    0+0k 0+0io 120pf+0w
63.840u 0.000s 1:05.15 97.9%    0+0k 0+0io 120pf+0w
63.980u 0.010s 1:05.29 98.0%    0+0k 0+0io 120pf+0w
64.000u 0.020s 1:05.19 98.2%    0+0k 0+0io 120pf+0w
-----------------------------------------------------
Just FYI, making ``v'' array [0..255] of Integer (for aligned access) made
it even 10s slower (probably problems with FSB and cache), instead of what
is commonly said, and complete code is not a bit faster from this variant:
function DigitValue (Dig: Char): Integer; attribute (inline, const);
  var d : Integer; attribute (register);
  begin
    if      (Dig >= '0') and (Dig <= '9' ) then
      DigitValue  := Ord (Dig) - Ord ('0')
    else if (Dig >= 'a') and (Dig <= 'z') then
      DigitValue  := Ord (Dig) - Ord ('a') + 10
    else if (Dig >= 'A') and (Dig <= 'Z') then
      DigitValue  := Ord (Dig) - Ord ('A') + 10
    else
      DigitValue := -1
  end;
... even though this code has six branches.
Is it the GNU Pascal problem or the back-end problem?
NOTE: even attribute (const) after initialization of the array didn't help
to evade memcpy().
Mirsad
isvalidnumberbase.pas:
----------------------------------------------------------------------------
program isvalidnum (output);
var i, Base: Integer;
    OK     : Boolean;
function IsValidNumberBase2 (s: String; Base: Integer): Boolean; attribute(const);
var i, dv  : Integer; attribute (register);
    b      : Byte;    attribute (register);
      function DigitValue (Dig: Char): Integer; {attribute (inline, const);}
        var d : Integer; attribute (register);
{$if 0}
{$if Low (Char) < 0}
{$error "this won't work: negative Char used as index}
{$endif}
{$endif}
      v : array [0..255] of ByteInt = (
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
               -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
               25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, -1,
               -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
               25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 );
begin
    if (Dig >= '0') and (Dig <= 'z') then
      DigitValue := v[Ord (Dig)]
    else
      DigitValue := -1;
  end;
begin
  b := Base;
  Assert ((b >= 2) and (b <= 36), 'base out of range (2..36)');
  i := 1;
  if s[i] = '-' then
    Inc (i);
  for i := i to Length (s) do
{$define VERSION1}
{$ifdef  VERSION1}
  begin
    dv := DigitValue (s[i]);
    if (dv < 0) or (dv >= b) then
      Return False
  end;
 {$else}
  if not (DigitValue (s[i]) in [0 .. b-1]) then
    Return False;
   {$endif}
  Return True;
end;
begin { main }
  OK := IsValidNumberBase2 ('010101010101101010101', 2) and
        not IsValidNumberBase2 ('0101010210101101010101', 2) and
        IsValidNumberBase2 ('0121012210101101010101', 3);
if OK then
    WriteLn ('OK');
end.
-------------------------------------------------------------------------
isvalidnumberbase.s (edited):
-------------------------------------------------------------------------
    .file	"isvalidnumberbase.pas"
    .local	I
    .comm	I,4,4
    .local	Base
    .comm	Base,4,4
    .local	Ok
    .comm	Ok,1,1
    .section	.rodata
.LC0:
    .byte	-1
.
.
.
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	0
    .byte	1
    .byte	2
    .byte	3
    .byte	4
    .byte	5
    .byte	6
    .byte	7
    .byte	8
    .byte	9
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	10
    .byte	11
    .byte	12
    .byte	13
    .byte	14
    .byte	15
    .byte	16
    .byte	17
    .byte	18
    .byte	19
    .byte	20
    .byte	21
    .byte	22
    .byte	23
    .byte	24
    .byte	25
    .byte	26
    .byte	27
    .byte	28
    .byte	29
    .byte	30
    .byte	31
    .byte	32
    .byte	33
    .byte	34
    .byte	35
    .byte	36
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	10
    .byte	11
    .byte	12
    .byte	13
    .byte	14
    .byte	15
    .byte	16
    .byte	17
    .byte	18
    .byte	19
    .byte	20
    .byte	21
    .byte	22
    .byte	23
    .byte	24
    .byte	25
    .byte	26
    .byte	27
    .byte	28
    .byte	29
    .byte	30
    .byte	31
    .byte	32
    .byte	33
    .byte	34
    .byte	35
    .byte	36
    .byte	-1
    .byte	-1
.
.
.
    .byte	-1
    .byte	-1
    .byte	-1
    .byte	-1
    .text
    .p2align 4,,15
    .type	Digitvalue.0,@function
Digitvalue.0:
    pushl	%ebp
    movl	%esp, %ebp
    subl	$296, %esp
    movl	%ebx, -4(%ebp)
    leal	-280(%ebp), %eax
    movzbl	8(%ebp), %ebx
    movl	%ecx, -12(%ebp)
    movl	$256, 8(%esp)
    movl	$.LC0, 4(%esp)
    movl	%eax, (%esp)
    call	memcpy
    cmpb	$47, %bl
    jbe	.L4
    cmpb	$122, %bl
    ja	.L4
    movzbl	%bl, %eax
    movsbl	-280(%eax,%ebp),%eax
.L5:
    movl	-4(%ebp), %ebx
    movl	%ebp, %esp
    popl	%ebp
    ret
    .p2align 4,,7
.L4:
    movl	$-1, %eax
    jmp	.L5
.Lfe1:
    .size	Digitvalue.0,.Lfe1-Digitvalue.0
    .section	.rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string	"base out of range (2..36)"
    .text
    .p2align 4,,15
.globl Isvalidnumberbase2
    .type	Isvalidnumberbase2,@function
Isvalidnumberbase2:
    pushl	%ebp
    movl	%esp, %ebp
    pushl	%edi
    pushl	%esi
    pushl	%ebx
    subl	$92, %esp
    movl	8(%ebp), %edi
    movzbl	12(%ebp), %eax
    cmpb	$1, %al
    seta	%bl
    movb	%al, -73(%ebp)
    testb	%bl, %bl
    je	.L6
    cmpb	$36, %al
    setbe	%bl
.L6:
    movl	$25, -72(%ebp)
    leal	-64(%ebp), %eax
    movl	$25, 8(%esp)
    movl	$.LC1, 4(%esp)
    movl	%eax, (%esp)
    call	memcpy
    leal	-72(%ebp), %eax
    movl	%eax, 4(%esp)
    movzbl	%bl, %eax
    movl	$1, %ebx
    movl	$25, -68(%ebp)
    movl	%eax, (%esp)
    call	_p_Assert
    cmpb	$45, 8(%edi)
    je	.L18
.L8:
    movl	4(%edi), %esi
    cmpl	%esi, %ebx
    jg	.L9
    movb	$0, -74(%ebp)
    .p2align 4,,15
.L10:
    cmpb	$0, -74(%ebp)
    je	.L12
    cmpl	%esi, %ebx
    je	.L9
    incl	%ebx
.L12:
    movb	$1, -74(%ebp)
    leal	-24(%ebp), %ecx
    movzbl	7(%edi,%ebx), %eax
    movl	%eax, (%esp)
    call	Digitvalue.0
    movl	%eax, %ecx
    shrl	$31, %ecx
    testb	%cl, %cl
    movl	%eax, %edx
    jne	.L17
    movzbl	-73(%ebp), %eax
    cmpl	%eax, %edx
    jl	.L14
    testl	%edx, %edx
    js	.L14
    movb	$1, %cl
.L14:
    testb	%cl, %cl
    je	.L10
.L17:
    xorl	%eax, %eax
.L1:
    addl	$92, %esp
    popl	%ebx
    popl	%esi
    popl	%edi
    popl	%ebp
    ret
    .p2align 4,,7
.L9:
    movb	$1, %al
    jmp	.L1
.L18:
    movl	$2, %ebx
    jmp	.L8
.Lfe2:
    .size	Isvalidnumberbase2,.Lfe2-Isvalidnumberbase2
    .section	.rodata.str1.1
.LC2:
    .string	"010101010101101010101"
.LC5:
    .string	"OK"
.LC4:
    .string	"0121012210101101010101"
.LC3:
    .string	"0101010210101101010101"
    .text
    .p2align 4,,15
.globl pascal_main_program
    .type	pascal_main_program,@function
pascal_main_program:
    pushl	%ebp
    movl	%esp, %ebp
    subl	$136, %esp
    movl	$21, 8(%esp)
    leal	-32(%ebp), %eax
    movl	$21, -40(%ebp)
    movl	$.LC2, 4(%esp)
    movl	%eax, (%esp)
    call	memcpy
    leal	-40(%ebp), %eax
    movl	$21, -36(%ebp)
    movl	$2, 4(%esp)
    movl	%eax, (%esp)
    call	Isvalidnumberbase2
    testb	%al, %al
    jne	.L28
.L21:
    testb	%al, %al
    jne	.L29
.L23:
    movb	%al, Ok
    testb	%al, %al
    jne	.L30
.L19:
    movl	%ebp, %esp
    popl	%ebp
    ret
    .p2align 4,,7
.L30:
    movl	$22, 24(%esp)
    movl	$2, 20(%esp)
    movl	$.LC5, 16(%esp)
    movl	$17, 12(%esp)
    movl	$2, 8(%esp)
    movl	$784, 4(%esp)
    movl	$_p_Output, (%esp)
    call	_p_Internal_Write
    movl	_p_InOutRes, %eax
    testl	%eax, %eax
    je	.L19
    jmp	.L31
    .p2align 4,,7
.L29:
    movl	$22, -104(%ebp)
    leal	-96(%ebp), %eax
    movl	$22, 8(%esp)
    movl	$.LC4, 4(%esp)
    movl	%eax, (%esp)
    call	memcpy
    leal	-104(%ebp), %eax
    movl	$22, -100(%ebp)
    movl	$3, 4(%esp)
    movl	%eax, (%esp)
    call	Isvalidnumberbase2
    jmp	.L23
    .p2align 4,,7
.L28:
    movl	$22, -72(%ebp)
    leal	-64(%ebp), %eax
    movl	$22, 8(%esp)
    movl	$.LC3, 4(%esp)
    movl	%eax, (%esp)
    call	memcpy
    leal	-72(%ebp), %eax
    movl	$22, -68(%ebp)
    movl	$2, 4(%esp)
    movl	%eax, (%esp)
    call	Isvalidnumberbase2
    testb	%al, %al
    sete	%al
    jmp	.L21
.L31:
    call	_p_CheckInOutRes
.Lfe3:
    .size	pascal_main_program,.Lfe3-pascal_main_program
    .data
    .type	ctor_run_condition_14.1,@object
    .size	ctor_run_condition_14.1,1
ctor_run_condition_14.1:
    .byte	0
    .text
    .p2align 4,,15
.globl init_pascal_main_program
    .type	init_pascal_main_program,@function
init_pascal_main_program:
    cmpb	$0, ctor_run_condition_14.1
    pushl	%ebp
    movl	%esp, %ebp
    je	.L34
    popl	%ebp
    ret
    .p2align 4,,7
.L34:
    popl	%ebp
    movb	$1, ctor_run_condition_14.1
    jmp	_p_DoInitProc
.Lfe4:
    .size	init_pascal_main_program,.Lfe4-init_pascal_main_program
    .p2align 4,,15
.globl main
    .type	main,@function
main:
    pushl	%ebp
    movl	%esp, %ebp
    subl	$24, %esp
    movl	__GPC_RTS_VERSION_20030507__, %eax
    movl	16(%ebp), %eax
    andl	$-16, %esp
    movl	%eax, 8(%esp)
    movl	12(%ebp), %eax
    movl	%eax, 4(%esp)
    movl	8(%ebp), %eax
    movl	%eax, (%esp)
    call	_p_initialize
    call	init_pascal_main_program
    call	pascal_main_program
    call	_p_finalize
    movl	%ebp, %esp
    xorl	%eax, %eax
    popl	%ebp
    ret
.Lfe5:
    .size	main,.Lfe5-main
    .ident	"GCC: (GNU) 3.2.1"
-------------------------------------------------------------------------------

GPC preformance kludge with local initialized arrays