Schema-agnostic Progressive Entity Resolution (extended version)

Schema-agnostic Progressive Entity Resolution (extended version)

$`B`$ $`\leftarrow`$ buildRedundancyPositiveBlocks($`P`$) $`ProfileIndex`$ $`\leftarrow`$ buildProfileIndex($`B`$) $`SortedProfileList`$ $`\leftarrow`$ $`\emptyset`$ $`topComparisonsSet`$ $`\leftarrow`$ $`\emptyset`$

$`ComparisonList`$.addAll($`topComparisonsSet`$) sortInDescreasingWeight($`ComparisonList`$) sortInDescreasingWeight($`SortedProfileList`$)

$`windowSize`$ = 1 $`ComparisonList`$ $`\leftarrow`$ $`\emptyset`$ $`NL`$[] $`\leftarrow`$ buildNeighborList($`P`$) $`PI`$[] $`\leftarrow`$ buildPositionIndex($`NL`$[])

sortInDescreasingWeight($`ComparisonList`$)

$`windowSize`$++ return null

$`checkedEntities`$ $`\leftarrow`$ $`\emptyset`$

$`p_i`$ = $`SortedProfileList.removeFirst()`$ $`checkedEntities`$.add($`i`$) $`weights`$[] $`\leftarrow`$ $`\emptyset`$ $`distinctNeighbors`$ $`\leftarrow`$ $`\emptyset`$ $`SortedStack`$ $`\leftarrow`$ $`\emptyset`$

$`SortedStack`$.push(getComparison($`i`$, $`j`$, $`weights`$[$`j`$]) $`SortedStack`$.pop() $`ComparisonList`$ $`\leftarrow`$ sortInDescreasingWeight($`SortedStack`$)

$`B`$ $`\leftarrow`$ buildRedundancyPositiveBlocks($`P`$) $`B'`$ $`\leftarrow`$ blockScheduling($`B`$) $`ProfileIndex`$ $`\leftarrow`$ buildProfileIndex($`B'`$) $`b_k`$ $`\leftarrow`$ $`B'`$.removeFirst() $`ComparisonList`$ $`\leftarrow`$ $`\emptyset`$ sortInDescreasingWeight($`ComparisonList`$)