|
2 | 2 | /**
|
3 | 3 | * Find Chain of duplicates in a CSV file.
|
4 | 4 | *
|
5 |
| - * @author Roman Zakharchuk <extatic.dancer@gmail.com> |
6 | 5 | * @author Volodymyr Melnychuk <540991@i.ua>
|
7 | 6 | *
|
8 | 7 | */
|
9 | 8 |
|
| 9 | +// Define constants. |
| 10 | +define('FIELDS', ['EMAIL', 'CARD', 'PHONE']); |
| 11 | + |
10 | 12 | // Default example data.
|
11 | 13 | $csv = 'ID,PARENT_ID,EMAIL,CARD,PHONE,TMP
|
12 | 14 | 1,NULL,email1,card1,phone1,
|
13 |
| -2,NULL,email2,card2,phone2, |
| 15 | +2,NULL,email2,card1,phone2, |
14 | 16 | 3,NULL,email3,card3,phone3,
|
15 | 17 | 4,NULL,email1,card2,phone4,
|
16 | 18 | 5,NULL,email5,card5,phone2,
|
|
36 | 38 | ];
|
37 | 39 | }
|
38 | 40 |
|
39 |
| -// Get all rows for fields. |
40 |
| -$ids = array_column($fields_array, 'ID'); |
41 |
| -$emails = array_column($fields_array, 'EMAIL'); |
42 |
| -$cars = array_column($fields_array, 'CARD'); |
43 |
| -$phones = array_column($fields_array, 'PHONE'); |
44 |
| - |
45 |
| -$results = []; |
46 | 41 | $csv_string = 'ID,PARENT_ID' . PHP_EOL;
|
47 | 42 |
|
48 |
| -// Prepare data for csv. |
| 43 | +$mapping_fields = []; |
| 44 | + |
| 45 | +// Find duplicates and save to mapping. |
49 | 46 | foreach ($fields_array as $key => $array) {
|
50 | 47 |
|
51 |
| - $ids_by_mail = get_duplicate_array($emails, 'EMAIL', $array['EMAIL']); |
52 |
| - $ids_by_card = get_duplicate_array($cars, 'CARD', $array['CARD']); |
53 |
| - $ids_by_phone = get_duplicate_array($phones, 'PHONE', $array['PHONE']); |
| 48 | + // Skip first element in array. |
| 49 | + if ($key === 0) { |
| 50 | + continue; |
| 51 | + } |
| 52 | + |
| 53 | + // Set default value for each iteration. |
| 54 | + $group = NULL; |
| 55 | + $group_to_merge = []; |
54 | 56 |
|
55 |
| - $min_ids = [ |
56 |
| - min($ids_by_mail), |
57 |
| - min($ids_by_card), |
58 |
| - min($ids_by_phone), |
59 |
| - ]; |
| 57 | + // Grouping by fields. |
| 58 | + foreach (FIELDS as $field) { |
| 59 | + $field_value = $array[$field]; |
| 60 | + if (array_key_exists($array[$field], $mapping_fields)) { |
| 61 | + $group = $mapping_fields[$field_value]; |
| 62 | + $group_to_merge[] = $group; |
| 63 | + } |
| 64 | + } |
60 | 65 |
|
61 |
| - $min_id = min($min_ids); |
| 66 | + // Setting minimal group if have more one group ID. |
| 67 | + if (count($group_to_merge) > 1) { |
| 68 | + $group = min($group_to_merge); |
| 69 | + } |
| 70 | + |
| 71 | + // Setting group if do not have any duplicates. |
| 72 | + if ($group === NULL) { |
| 73 | + $group = $array['ID']; |
| 74 | + } |
62 | 75 |
|
63 |
| - fill_results($ids_by_mail, $results, $min_id); |
64 |
| - fill_results($ids_by_card, $results, $min_id); |
65 |
| - fill_results($ids_by_phone, $results, $min_id); |
| 76 | + // Save fields to mapping. |
| 77 | + $mapping_fields[$array['EMAIL']] = $group; |
| 78 | + $mapping_fields[$array['CARD']] = $group; |
| 79 | + $mapping_fields[$array['PHONE']] = $group; |
66 | 80 |
|
67 | 81 | }
|
68 | 82 |
|
69 |
| -ksort($results); |
| 83 | +foreach ($fields_array as $key => $array) { |
| 84 | + // Skip first element in array. |
| 85 | + if ($key === 0) { |
| 86 | + continue; |
| 87 | + } |
| 88 | + // Searching PARENT_ID by email field. May be any field (like: CARD, PHONE). |
| 89 | + $fields_array[$key]['PARENT_ID'] = $mapping_fields[$array['EMAIL']]; |
70 | 90 |
|
71 |
| -// Prepare string for csv. |
72 |
| -foreach ($results as $key => $result) { |
| 91 | + // Prepare data from csv. |
73 | 92 | if ($key !== 0) {
|
74 |
| - $csv_string .= implode(',', [$key, $results[$key]['PARENT_ID']]) . PHP_EOL; |
| 93 | + $csv_string .= implode(',', |
| 94 | + [$key, $fields_array[$key]['PARENT_ID']]) . PHP_EOL; |
75 | 95 | }
|
76 | 96 | }
|
77 | 97 |
|
78 |
| -//print_r($results); |
79 | 98 | // Show results as string.
|
80 | 99 | print_r($csv_string);
|
81 |
| - |
82 |
| -/** |
83 |
| - * Return founded duplicates key. |
84 |
| - * |
85 |
| - * @param array $array |
86 |
| - * Array with fields. |
87 |
| - * @param $column |
88 |
| - * Field name. |
89 |
| - * @param $string |
90 |
| - * Search string. |
91 |
| - * |
92 |
| - * @return array|bool |
93 |
| - */ |
94 |
| -function get_duplicate_array($array, $column, $string) { |
95 |
| - |
96 |
| - $results = array_filter($array, |
97 |
| - function ($value) use ($string) { |
98 |
| - if ($value === $string) { |
99 |
| - return TRUE; |
100 |
| - } |
101 |
| - return FALSE; |
102 |
| - }, |
103 |
| - ARRAY_FILTER_USE_BOTH); |
104 |
| - |
105 |
| - $results = array_fill_keys(array_keys($results), min(array_keys($results))); |
106 |
| - |
107 |
| - if (count($results) > 0) { |
108 |
| - return $results; |
109 |
| - } |
110 |
| - else { |
111 |
| - return []; |
112 |
| - } |
113 |
| - |
114 |
| -} |
115 |
| - |
116 |
| -/** |
117 |
| - * Get fill results. |
118 |
| - * |
119 |
| - * @param $array |
120 |
| - * Array. |
121 |
| - * @param $results |
122 |
| - * Row results. |
123 |
| - * @param $min_id |
124 |
| - * Minimal ID. |
125 |
| - */ |
126 |
| -function fill_results($array, &$results, $min_id) { |
127 |
| - foreach ($array as $id => $value) { |
128 |
| - if (empty($results[$id]) || $results[$id] > $min_id) { |
129 |
| - $results[$id]['PARENT_ID'] = $min_id; |
130 |
| - } |
131 |
| - } |
132 |
| -} |
0 commit comments