Skip to content

Commit 1adfd71

Browse files
committed
Fix error in some cases Union find algorithm
1 parent 3adb611 commit 1adfd71

File tree

4 files changed

+56
-28
lines changed

4 files changed

+56
-28
lines changed

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,21 @@ Example of input data (based on the csv file):
1414
```
1515
ID,PARENT_ID,EMAIL,CARD,PHONE,TMP
1616
1,NULL,email1,card1,phone1,
17-
2,NULL,email2,card2,phone2,
17+
2,NULL,email2,card1,phone2,
1818
3,NULL,email3,card3,phone3,
1919
4,NULL,email1,card2,phone4,
2020
5,NULL,email5,card5,phone2,
2121
6,NULL,email6,card6,phone6,
2222
7,NULL,email3,card9,phone7,
2323
8,NULL,email8,card10,phone8,
24-
9,NULL,email9,card9,phone3,
25-
10,NULL,email10,card10,phone10,
24+
9,NULL,email9,card9,phone3,
25+
10,NULL,email2,card10,phone10,
2626
```
2727

28+
In the example of the element with **ID 10** it was associated with 2,8,4,1. Original duplicate 1. Brief visualization of dependencies:
29+
- **ID1 => ID2 => ID10 => ID8**
30+
31+
2832
Require
2933
--
3034
- php

README_RU.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,20 @@ Problem/Motivation
1414
```
1515
ID,PARENT_ID,EMAIL,CARD,PHONE,TMP
1616
1,NULL,email1,card1,phone1,
17-
2,NULL,email2,card2,phone2,
17+
2,NULL,email2,card1,phone2,
1818
3,NULL,email3,card3,phone3,
1919
4,NULL,email1,card2,phone4,
2020
5,NULL,email5,card5,phone2,
2121
6,NULL,email6,card6,phone6,
2222
7,NULL,email3,card9,phone7,
2323
8,NULL,email8,card10,phone8,
24-
9,NULL,email9,card9,phone3,
25-
10,NULL,email10,card10,phone10,
24+
9,NULL,email9,card9,phone3,
25+
10,NULL,email2,card10,phone10,
2626
```
2727

28+
На примере элемента с **ID 10** его связали с 2,8,4,1. Оригинальный дубликат 1. Краткая визуализация зависимостей:
29+
- **ID1 => ID2 => ID10 => ID8**
30+
2831
Require
2932
--
3033
- php

README_UA.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,23 @@ Problem/Motivation
1414
```
1515
ID,PARENT_ID,EMAIL,CARD,PHONE,TMP
1616
1,NULL,email1,card1,phone1,
17-
2,NULL,email2,card2,phone2,
17+
2,NULL,email2,card1,phone2,
1818
3,NULL,email3,card3,phone3,
1919
4,NULL,email1,card2,phone4,
2020
5,NULL,email5,card5,phone2,
2121
6,NULL,email6,card6,phone6,
2222
7,NULL,email3,card9,phone7,
2323
8,NULL,email8,card10,phone8,
24-
9,NULL,email9,card9,phone3,
25-
10,NULL,email10,card10,phone10,
24+
9,NULL,email9,card9,phone3,
25+
10,NULL,email2,card10,phone10,
2626
```
2727

28+
На прикладі елементу з **ID 10** його зв'язали з 2,8,4,1. Оригінальний дублікат 1. Коротка візуалізація залежностей:
29+
ID1 > ID2> ID10 > ID8
30+
- **ID1 => ID2 => ID10 => ID8**
31+
32+
33+
2834
Require
2935
--
3036
- php

index.php

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,23 @@
77
*/
88

99
// Define constants.
10-
define('FIELDS', ['EMAIL', 'CARD', 'PHONE']);
10+
define('DUPLICATES_FIELDS', ['EMAIL', 'CARD', 'PHONE']);
1111

1212
// Default example data.
1313
$csv = 'ID,PARENT_ID,EMAIL,CARD,PHONE,TMP
1414
1,NULL,email1,card1,phone1,
1515
2,NULL,email2,card1,phone2,
1616
3,NULL,email3,card3,phone3,
17-
4,NULL,email1,card2,phone4,
17+
4,NULL,email1,card2,phone4,
1818
5,NULL,email5,card5,phone2,
1919
6,NULL,email6,card6,phone6,
2020
7,NULL,email3,card9,phone7,
2121
8,NULL,email8,card10,phone8,
22-
9,NULL,email9,card9,phone3,
23-
10,NULL,email10,card10,phone10,';
22+
9,NULL,email9,card9,phone3,
23+
10,NULL,email2,card10,phone10,';
2424

2525
$rows = explode(PHP_EOL, $csv);
26+
$fields_array = [];
2627

2728
// Prepare array data.
2829
foreach ($rows as $key => $row) {
@@ -41,6 +42,7 @@
4142
$csv_string = 'ID,PARENT_ID' . PHP_EOL;
4243

4344
$mapping_fields = [];
45+
$grouping_key = [];
4446

4547
// Find duplicates and save to mapping.
4648
foreach ($fields_array as $key => $array) {
@@ -51,32 +53,40 @@
5153
}
5254

5355
// Set default value for each iteration.
54-
$group = NULL;
56+
$group = $group_key = NULL;
5557
$group_to_merge = [];
5658

5759
// Grouping by fields.
58-
foreach (FIELDS as $field) {
60+
foreach (DUPLICATES_FIELDS as $field) {
5961
$field_value = $array[$field];
6062
if (array_key_exists($array[$field], $mapping_fields)) {
61-
$group = $mapping_fields[$field_value];
62-
$group_to_merge[] = $group;
63+
$group_key = $mapping_fields[$field_value];
64+
$group_to_merge[] = $group_key;
6365
}
6466
}
6567

66-
// Setting minimal group if have more one group ID.
67-
if (count($group_to_merge) > 1) {
68-
$group = min($group_to_merge);
68+
// Setting group if do not have any duplicates.
69+
if ($group_key === NULL) {
70+
$grouping_key[] = $array['ID'];
71+
$group_key = array_search($array['ID'], $grouping_key);
6972
}
73+
$group = $grouping_key[$group_key];
7074

71-
// Setting group if do not have any duplicates.
72-
if ($group === NULL) {
73-
$group = $array['ID'];
75+
// Setting minimal group if have more one group ID.
76+
if (count($group_to_merge) > 1) {
77+
for ($i = 0; $i < count($group_to_merge); $i++) {
78+
$merging_array[] = $grouping_key[$group_to_merge[$i]];
79+
}
80+
if (!empty($merging_array)) {
81+
$group = min($merging_array);
82+
$group_key = array_search($group, $grouping_key);
83+
}
7484
}
7585

7686
// Save fields to mapping.
77-
$mapping_fields[$array['EMAIL']] = $group;
78-
$mapping_fields[$array['CARD']] = $group;
79-
$mapping_fields[$array['PHONE']] = $group;
87+
$mapping_fields[$array['EMAIL']] = $group_key;
88+
$mapping_fields[$array['CARD']] = $group_key;
89+
$mapping_fields[$array['PHONE']] = $group_key;
8090

8191
}
8292

@@ -85,8 +95,13 @@
8595
if ($key === 0) {
8696
continue;
8797
}
88-
// Searching PARENT_ID by email field. May be any field (like: CARD, PHONE).
89-
$fields_array[$key]['PARENT_ID'] = $mapping_fields[$array['EMAIL']];
98+
99+
$parent_ids = NULL;
100+
// Searching PARENT_ID by fields.
101+
foreach (DUPLICATES_FIELDS as $field) {
102+
$parent_ids[] = $grouping_key[$mapping_fields[$array[$field]]];
103+
}
104+
$fields_array[$key]['PARENT_ID'] = min($parent_ids);
90105

91106
// Prepare data from csv.
92107
if ($key !== 0) {

0 commit comments

Comments
 (0)