-
Notifications
You must be signed in to change notification settings - Fork 4
/
semver_amd64.s
97 lines (84 loc) · 1.83 KB
/
semver_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// Copyright 2014 The Semver Package Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !purego
// +build !go1.16
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-24
MOVQ a+0(FP), SI
MOVQ b+8(FP), DI
XORQ CX, CX // Index of the last examined element.
MOVOU (SI), X2
MOVOU (DI), X5
PCMPEQL X5, X2
MOVMSKPS X2, AX
CMPL AX, $0x0f
JNE diff
MOVQ $4, CX
MOVOU 16(SI), X3
MOVOU 16(DI), X6
PCMPEQL X6, X3
MOVMSKPS X3, AX
CMPL AX, $0x0f
JNE diff
MOVQ $8, CX
MOVOU 32(SI), X4
MOVOU 32(DI), X7
PCMPEQL X7, X4
MOVMSKPS X4, AX
CMPL AX, $0x0f
JNE diff
MOVQ $12, CX
MOVOU 48(SI), X0
MOVOU 48(DI), X1
PCMPEQL X1, X0
MOVMSKPS X0, AX
ORQ $0xc, AX // Mask undefined space, due to 'build' and then nothing.
CMPL AX, $0x0f
JNE diff
equal:
MOVQ $0, ret+16(FP)
RET
diff:
XORQ $0xffff, AX // Invert mask from "equal" to "differ".
BSFQ AX, BX // Number of the first bit 1 from LSB on counted.
XORQ AX, AX
ADDQ BX, CX
// Now compare those diverging elements. (AX, BX, DX are free)
MOVL (DI)(CX*4), BX
CMPL BX, (SI)(CX*4)
SETLT AX
LEAQ -1(AX*2), AX
MOVQ AX, ret+16(FP)
RET
TEXT ·less(SB),NOSPLIT,$0-17
MOVQ a+0(FP), SI
MOVQ b+8(FP), DI
XORQ DX, DX
XORQ BX, BX
less_loop:
MOVOU (DI)(DX*1), X1
MOVOU (SI)(DX*1), X0
MOVAPS X1, X3
PCMPEQL X0, X1
MOVMSKPS X1, AX
CMPB AX, $0x0f
JNE less_determine
ADDB $16, DX
CMPB DX, $64
JE less_eol
JMP less_loop
less_determine:
MOVAPS X3, X1
PCMPGTL X0, X3 // 3.0.1.0 |>| 2.1.0.0 -> 1.0.1.0
PCMPGTL X1, X0 // 2.1.0.0 |>| 3.0.1.0 -> 0.1.0.0
PSHUFL $27, X3, X3 // $27 is [0, 1, 2, 3], reverse order of elements to get a workable mask below.
PSHUFL $27, X0, X0
MOVMSKPS X3, CX // 1010
MOVMSKPS X0, AX // 0100
CMPB CX, AX
SETGT BX
less_eol:
MOVB BX, ret+16(FP)
RET