You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

445 lines
11 KiB

  1. // Copyright 2017, The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE.md file.
  4. package diff
  5. import (
  6. "fmt"
  7. "math/rand"
  8. "strings"
  9. "testing"
  10. "unicode"
  11. )
  12. func TestDifference(t *testing.T) {
  13. tests := []struct {
  14. // Before passing x and y to Difference, we strip all spaces so that
  15. // they can be used by the test author to indicate a missing symbol
  16. // in one of the lists.
  17. x, y string
  18. want string
  19. }{{
  20. x: "",
  21. y: "",
  22. want: "",
  23. }, {
  24. x: "#",
  25. y: "#",
  26. want: ".",
  27. }, {
  28. x: "##",
  29. y: "# ",
  30. want: ".X",
  31. }, {
  32. x: "a#",
  33. y: "A ",
  34. want: "MX",
  35. }, {
  36. x: "#a",
  37. y: " A",
  38. want: "XM",
  39. }, {
  40. x: "# ",
  41. y: "##",
  42. want: ".Y",
  43. }, {
  44. x: " #",
  45. y: "@#",
  46. want: "Y.",
  47. }, {
  48. x: "@#",
  49. y: " #",
  50. want: "X.",
  51. }, {
  52. x: "##########0123456789",
  53. y: " 0123456789",
  54. want: "XXXXXXXXXX..........",
  55. }, {
  56. x: " 0123456789",
  57. y: "##########0123456789",
  58. want: "YYYYYYYYYY..........",
  59. }, {
  60. x: "#####0123456789#####",
  61. y: " 0123456789 ",
  62. want: "XXXXX..........XXXXX",
  63. }, {
  64. x: " 0123456789 ",
  65. y: "#####0123456789#####",
  66. want: "YYYYY..........YYYYY",
  67. }, {
  68. x: "01234##########56789",
  69. y: "01234 56789",
  70. want: ".....XXXXXXXXXX.....",
  71. }, {
  72. x: "01234 56789",
  73. y: "01234##########56789",
  74. want: ".....YYYYYYYYYY.....",
  75. }, {
  76. x: "0123456789##########",
  77. y: "0123456789 ",
  78. want: "..........XXXXXXXXXX",
  79. }, {
  80. x: "0123456789 ",
  81. y: "0123456789##########",
  82. want: "..........YYYYYYYYYY",
  83. }, {
  84. x: "abcdefghij0123456789",
  85. y: "ABCDEFGHIJ0123456789",
  86. want: "MMMMMMMMMM..........",
  87. }, {
  88. x: "ABCDEFGHIJ0123456789",
  89. y: "abcdefghij0123456789",
  90. want: "MMMMMMMMMM..........",
  91. }, {
  92. x: "01234abcdefghij56789",
  93. y: "01234ABCDEFGHIJ56789",
  94. want: ".....MMMMMMMMMM.....",
  95. }, {
  96. x: "01234ABCDEFGHIJ56789",
  97. y: "01234abcdefghij56789",
  98. want: ".....MMMMMMMMMM.....",
  99. }, {
  100. x: "0123456789abcdefghij",
  101. y: "0123456789ABCDEFGHIJ",
  102. want: "..........MMMMMMMMMM",
  103. }, {
  104. x: "0123456789ABCDEFGHIJ",
  105. y: "0123456789abcdefghij",
  106. want: "..........MMMMMMMMMM",
  107. }, {
  108. x: "ABCDEFGHIJ0123456789 ",
  109. y: " 0123456789abcdefghij",
  110. want: "XXXXXXXXXX..........YYYYYYYYYY",
  111. }, {
  112. x: " 0123456789abcdefghij",
  113. y: "ABCDEFGHIJ0123456789 ",
  114. want: "YYYYYYYYYY..........XXXXXXXXXX",
  115. }, {
  116. x: "ABCDE0123456789 FGHIJ",
  117. y: " 0123456789abcdefghij",
  118. want: "XXXXX..........YYYYYMMMMM",
  119. }, {
  120. x: " 0123456789abcdefghij",
  121. y: "ABCDE0123456789 FGHIJ",
  122. want: "YYYYY..........XXXXXMMMMM",
  123. }, {
  124. x: "ABCDE01234F G H I J 56789 ",
  125. y: " 01234 a b c d e56789fghij",
  126. want: "XXXXX.....XYXYXYXYXY.....YYYYY",
  127. }, {
  128. x: " 01234a b c d e 56789fghij",
  129. y: "ABCDE01234 F G H I J56789 ",
  130. want: "YYYYY.....XYXYXYXYXY.....XXXXX",
  131. }, {
  132. x: "FGHIJ01234ABCDE56789 ",
  133. y: " 01234abcde56789fghij",
  134. want: "XXXXX.....MMMMM.....YYYYY",
  135. }, {
  136. x: " 01234abcde56789fghij",
  137. y: "FGHIJ01234ABCDE56789 ",
  138. want: "YYYYY.....MMMMM.....XXXXX",
  139. }, {
  140. x: "ABCAB BA ",
  141. y: " C BABAC",
  142. want: "XX.X.Y..Y",
  143. }, {
  144. x: "# #### ###",
  145. y: "#y####yy###",
  146. want: ".Y....YY...",
  147. }, {
  148. x: "# #### # ##x#x",
  149. y: "#y####y y## # ",
  150. want: ".Y....YXY..X.X",
  151. }, {
  152. x: "###z#z###### x #",
  153. y: "#y##Z#Z###### yy#",
  154. want: ".Y..M.M......XYY.",
  155. }, {
  156. x: "0 12z3x 456789 x x 0",
  157. y: "0y12Z3 y456789y y y0",
  158. want: ".Y..M.XY......YXYXY.",
  159. }, {
  160. x: "0 2 4 6 8 ..................abXXcdEXF.ghXi",
  161. y: " 1 3 5 7 9..................AB CDE F.GH I",
  162. want: "XYXYXYXYXY..................MMXXMM.X..MMXM",
  163. }, {
  164. x: "I HG.F EDC BA..................9 7 5 3 1 ",
  165. y: "iXhg.FXEdcXXba.................. 8 6 4 2 0",
  166. want: "MYMM..Y.MMYYMM..................XYXYXYXYXY",
  167. }, {
  168. x: "x1234",
  169. y: " 1234",
  170. want: "X....",
  171. }, {
  172. x: "x123x4",
  173. y: " 123 4",
  174. want: "X...X.",
  175. }, {
  176. x: "x1234x56",
  177. y: " 1234 ",
  178. want: "X....XXX",
  179. }, {
  180. x: "x1234xxx56",
  181. y: " 1234 56",
  182. want: "X....XXX..",
  183. }, {
  184. x: ".1234...ab",
  185. y: " 1234 AB",
  186. want: "X....XXXMM",
  187. }, {
  188. x: "x1234xxab.",
  189. y: " 1234 AB ",
  190. want: "X....XXMMX",
  191. }, {
  192. x: " 0123456789",
  193. y: "9012345678 ",
  194. want: "Y.........X",
  195. }, {
  196. x: " 0123456789",
  197. y: "8901234567 ",
  198. want: "YY........XX",
  199. }, {
  200. x: " 0123456789",
  201. y: "7890123456 ",
  202. want: "YYY.......XXX",
  203. }, {
  204. x: " 0123456789",
  205. y: "6789012345 ",
  206. want: "YYYY......XXXX",
  207. }, {
  208. x: "0123456789 ",
  209. y: " 5678901234",
  210. want: "XXXXX.....YYYYY",
  211. }, {
  212. x: "0123456789 ",
  213. y: " 4567890123",
  214. want: "XXXX......YYYY",
  215. }, {
  216. x: "0123456789 ",
  217. y: " 3456789012",
  218. want: "XXX.......YYY",
  219. }, {
  220. x: "0123456789 ",
  221. y: " 2345678901",
  222. want: "XX........YY",
  223. }, {
  224. x: "0123456789 ",
  225. y: " 1234567890",
  226. want: "X.........Y",
  227. }, {
  228. x: "0 1 2 3 45 6 7 8 9 ",
  229. y: " 9 8 7 6 54 3 2 1 0",
  230. want: "XYXYXYXYX.YXYXYXYXY",
  231. }, {
  232. x: "0 1 2345678 9 ",
  233. y: " 6 72 5 819034",
  234. want: "XYXY.XX.XX.Y.YYY",
  235. }, {
  236. x: "F B Q M O I G T L N72X90 E 4S P 651HKRJU DA 83CVZW",
  237. y: " 5 W H XO10R9IV K ZLCTAJ8P3N SEQM4 7 2G6 UBD F ",
  238. want: "XYXYXYXY.YYYY.YXYXY.YYYYYYY.XXXXXY.YY.XYXYY.XXXXXX.Y.XYXXXXXX",
  239. }}
  240. for _, tt := range tests {
  241. t.Run("", func(t *testing.T) {
  242. x := strings.Replace(tt.x, " ", "", -1)
  243. y := strings.Replace(tt.y, " ", "", -1)
  244. es := testStrings(t, x, y)
  245. if got := es.String(); got != tt.want {
  246. t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want)
  247. }
  248. })
  249. }
  250. }
  251. func TestDifferenceFuzz(t *testing.T) {
  252. tests := []struct{ px, py, pm float32 }{
  253. {px: 0.0, py: 0.0, pm: 0.1},
  254. {px: 0.0, py: 0.1, pm: 0.0},
  255. {px: 0.1, py: 0.0, pm: 0.0},
  256. {px: 0.0, py: 0.1, pm: 0.1},
  257. {px: 0.1, py: 0.0, pm: 0.1},
  258. {px: 0.2, py: 0.2, pm: 0.2},
  259. {px: 0.3, py: 0.1, pm: 0.2},
  260. {px: 0.1, py: 0.3, pm: 0.2},
  261. {px: 0.2, py: 0.2, pm: 0.2},
  262. {px: 0.3, py: 0.3, pm: 0.3},
  263. {px: 0.1, py: 0.1, pm: 0.5},
  264. {px: 0.4, py: 0.1, pm: 0.5},
  265. {px: 0.3, py: 0.2, pm: 0.5},
  266. {px: 0.2, py: 0.3, pm: 0.5},
  267. {px: 0.1, py: 0.4, pm: 0.5},
  268. }
  269. for i, tt := range tests {
  270. t.Run(fmt.Sprintf("P%d", i), func(t *testing.T) {
  271. // Sweep from 1B to 1KiB.
  272. for n := 1; n <= 1024; n <<= 1 {
  273. t.Run(fmt.Sprintf("N%d", n), func(t *testing.T) {
  274. for j := 0; j < 10; j++ {
  275. x, y := generateStrings(n, tt.px, tt.py, tt.pm, int64(j))
  276. testStrings(t, x, y)
  277. }
  278. })
  279. }
  280. })
  281. }
  282. }
  283. func BenchmarkDifference(b *testing.B) {
  284. for n := 1 << 10; n <= 1<<20; n <<= 2 {
  285. b.Run(fmt.Sprintf("N%d", n), func(b *testing.B) {
  286. x, y := generateStrings(n, 0.05, 0.05, 0.10, 0)
  287. b.ReportAllocs()
  288. b.SetBytes(int64(len(x) + len(y)))
  289. for i := 0; i < b.N; i++ {
  290. Difference(len(x), len(y), func(ix, iy int) Result {
  291. return compareByte(x[ix], y[iy])
  292. })
  293. }
  294. })
  295. }
  296. }
  297. func generateStrings(n int, px, py, pm float32, seed int64) (string, string) {
  298. if px+py+pm > 1.0 {
  299. panic("invalid probabilities")
  300. }
  301. py += px
  302. pm += py
  303. b := make([]byte, n)
  304. r := rand.New(rand.NewSource(seed))
  305. r.Read(b)
  306. var x, y []byte
  307. for len(b) > 0 {
  308. switch p := r.Float32(); {
  309. case p < px: // UniqueX
  310. x = append(x, b[0])
  311. case p < py: // UniqueY
  312. y = append(y, b[0])
  313. case p < pm: // Modified
  314. x = append(x, 'A'+(b[0]%26))
  315. y = append(y, 'a'+(b[0]%26))
  316. default: // Identity
  317. x = append(x, b[0])
  318. y = append(y, b[0])
  319. }
  320. b = b[1:]
  321. }
  322. return string(x), string(y)
  323. }
  324. func testStrings(t *testing.T, x, y string) EditScript {
  325. es := Difference(len(x), len(y), func(ix, iy int) Result {
  326. return compareByte(x[ix], y[iy])
  327. })
  328. if es.LenX() != len(x) {
  329. t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x))
  330. }
  331. if es.LenY() != len(y) {
  332. t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y))
  333. }
  334. if !validateScript(x, y, es) {
  335. t.Errorf("invalid edit script: %v", es)
  336. }
  337. return es
  338. }
  339. func validateScript(x, y string, es EditScript) bool {
  340. var bx, by []byte
  341. for _, e := range es {
  342. switch e {
  343. case Identity:
  344. if !compareByte(x[len(bx)], y[len(by)]).Equal() {
  345. return false
  346. }
  347. bx = append(bx, x[len(bx)])
  348. by = append(by, y[len(by)])
  349. case UniqueX:
  350. bx = append(bx, x[len(bx)])
  351. case UniqueY:
  352. by = append(by, y[len(by)])
  353. case Modified:
  354. if !compareByte(x[len(bx)], y[len(by)]).Similar() {
  355. return false
  356. }
  357. bx = append(bx, x[len(bx)])
  358. by = append(by, y[len(by)])
  359. }
  360. }
  361. return string(bx) == x && string(by) == y
  362. }
  363. // compareByte returns a Result where the result is Equal if x == y,
  364. // similar if x and y differ only in casing, and different otherwise.
  365. func compareByte(x, y byte) (r Result) {
  366. switch {
  367. case x == y:
  368. return equalResult // Identity
  369. case unicode.ToUpper(rune(x)) == unicode.ToUpper(rune(y)):
  370. return similarResult // Modified
  371. default:
  372. return differentResult // UniqueX or UniqueY
  373. }
  374. }
  375. var (
  376. equalResult = Result{NumDiff: 0}
  377. similarResult = Result{NumDiff: 1}
  378. differentResult = Result{NumDiff: 2}
  379. )
  380. func TestResult(t *testing.T) {
  381. tests := []struct {
  382. result Result
  383. wantEqual bool
  384. wantSimilar bool
  385. }{
  386. // equalResult is equal since NumDiff == 0, by definition of Equal method.
  387. {equalResult, true, true},
  388. // similarResult is similar since it is a binary result where only one
  389. // element was compared (i.e., Either NumSame==1 or NumDiff==1).
  390. {similarResult, false, true},
  391. // differentResult is different since there are enough differences that
  392. // it isn't even considered similar.
  393. {differentResult, false, false},
  394. // Zero value is always equal.
  395. {Result{NumSame: 0, NumDiff: 0}, true, true},
  396. // Binary comparisons (where NumSame+NumDiff == 1) are always similar.
  397. {Result{NumSame: 1, NumDiff: 0}, true, true},
  398. {Result{NumSame: 0, NumDiff: 1}, false, true},
  399. // More complex ratios. The exact ratio for similarity may change,
  400. // and may require updates to these test cases.
  401. {Result{NumSame: 1, NumDiff: 1}, false, true},
  402. {Result{NumSame: 1, NumDiff: 2}, false, true},
  403. {Result{NumSame: 1, NumDiff: 3}, false, false},
  404. {Result{NumSame: 2, NumDiff: 1}, false, true},
  405. {Result{NumSame: 2, NumDiff: 2}, false, true},
  406. {Result{NumSame: 2, NumDiff: 3}, false, true},
  407. {Result{NumSame: 3, NumDiff: 1}, false, true},
  408. {Result{NumSame: 3, NumDiff: 2}, false, true},
  409. {Result{NumSame: 3, NumDiff: 3}, false, true},
  410. {Result{NumSame: 1000, NumDiff: 0}, true, true},
  411. {Result{NumSame: 1000, NumDiff: 1}, false, true},
  412. {Result{NumSame: 1000, NumDiff: 2}, false, true},
  413. {Result{NumSame: 0, NumDiff: 1000}, false, false},
  414. {Result{NumSame: 1, NumDiff: 1000}, false, false},
  415. {Result{NumSame: 2, NumDiff: 1000}, false, false},
  416. }
  417. for _, tt := range tests {
  418. if got := tt.result.Equal(); got != tt.wantEqual {
  419. t.Errorf("%#v.Equal() = %v, want %v", tt.result, got, tt.wantEqual)
  420. }
  421. if got := tt.result.Similar(); got != tt.wantSimilar {
  422. t.Errorf("%#v.Similar() = %v, want %v", tt.result, got, tt.wantSimilar)
  423. }
  424. }
  425. }