@@ -43,7 +43,7 @@ class Utility():
43
43
Get all assignees as one string.
44
44
extract_labels(github_labels)
45
45
Get all labels as one string.
46
- extract_user_data(user, users_ids, data_root_dir)
46
+ extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False )
47
47
Extracting general user data.
48
48
extract_author_data_from_commit(repo, sha, users_ids, data_root_dir)
49
49
Extracting general author data from a commit.
@@ -55,8 +55,8 @@ class Utility():
55
55
Extracting general event data from a issue or pull request.
56
56
extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_dir)
57
57
Extracting general comment data from a pull request or issue.
58
- define_unknown_user(user_dict, unknown_user , data_root_dir)
59
- Defines a unknown user. Add unknown user to alias.
58
+ define_unknown_user(unknown_user_name, uuid , data_root_dir, new_user=False )
59
+ Defines a unknown user. Add unknown user to alias or creates new user
60
60
61
61
"""
62
62
USERS = "Users.p"
@@ -407,9 +407,9 @@ def extract_labels(github_labels):
407
407
return labels
408
408
409
409
@staticmethod
410
- def extract_user_data (user , users_ids , data_root_dir ):
410
+ def extract_user_data (user , users_ids , data_root_dir , node_id_to_anonym_uuid = False ):
411
411
"""
412
- extract_user_data(user, users_ids, data_root_dir)
412
+ extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False )
413
413
414
414
Extracting general user data.
415
415
@@ -421,6 +421,8 @@ def extract_user_data(user, users_ids, data_root_dir):
421
421
Dict of User Ids as Keys and anonym Ids as Value.
422
422
data_root_dir : str
423
423
Repo dir of the project.
424
+ node_id_to_anonym_uuid : bool, default=False
425
+ Node_id will be the anonym_uuid
424
426
425
427
Returns
426
428
-------
@@ -441,7 +443,10 @@ def extract_user_data(user, users_ids, data_root_dir):
441
443
if users_file .is_file ():
442
444
users_df = pd .read_pickle (users_file )
443
445
user_data = {}
444
- user_data ["anonym_uuid" ] = generate_id (seed = user .node_id )
446
+ if node_id_to_anonym_uuid :
447
+ user_data ["anonym_uuid" ] = user .node_id
448
+ else :
449
+ user_data ["anonym_uuid" ] = generate_id (seed = user .node_id )
445
450
user_data ["id" ] = user .node_id
446
451
try :
447
452
user_data ["name" ] = user .name
@@ -671,59 +676,57 @@ def extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_d
671
676
return comment_data
672
677
673
678
@staticmethod
674
- def define_unknown_user (user_dict , unknown_user , data_root_dir ):
679
+ def define_unknown_user (unknown_user_name , uuid , data_root_dir , new_user = False ):
675
680
"""
676
- define_unknown_user(user_dict, unknown_user , data_root_dir)
681
+ define_unknown_user(unknown_user_name, uuid , data_root_dir, new_user=False )
677
682
678
- Defines a unknown user. Add unknown user to alias.
683
+ Defines a unknown user. Add unknown user to alias or creates new user
679
684
680
685
Parameters
681
686
----------
682
- user_dict: dict
683
- Dictionary which contains users .
684
- unknown_user : str
685
- Name of a unknown user.
687
+ unknown_user_name: str
688
+ Name of unknown user .
689
+ uuid : str
690
+ Uuid can be the anonym uuid of another user or random uuid for a new user.
686
691
data_root_dir : str
687
- Repo dir of the project.
692
+ Data root directory for the repository.
693
+ new_user : bool, default=False
694
+ A complete new user with anonym_uuid will be generated.
688
695
689
696
Returns
690
697
-------
691
698
str
692
699
Uuid of the user.
693
700
694
- Notes
695
- -----
696
- Example User Dict: {"unknown_user": "user uuid"}
697
- If the real user node id does not exist in the users table then a new user will be created and the user uuid will be the node Id
698
-
699
701
"""
700
702
users = Utility .get_users (data_root_dir )
701
- if unknown_user in user_dict :
702
- p_user = users .loc [users .anonym_uuid == user_dict [unknown_user ]]
703
- if not p_user .empty :
704
- alias = ""
705
- user = p_user .iloc [0 ]
706
- if "alias" in user :
707
- if pd .isnull (user ["alias" ]) or (user ["alias" ] is None ):
708
- alias = unknown_user
709
- else :
710
- all_alias = user ["alias" ].split (';' )
711
- if not unknown_user in all_alias :
712
- alias = user ["alias" ] + ";" + unknown_user
713
- else :
714
- alias = user ["alias" ]
703
+ p_user = users .loc [users .anonym_uuid == uuid ]
704
+ if not p_user .empty :
705
+ alias = ""
706
+ user = p_user .iloc [0 ]
707
+ if "alias" in user :
708
+ if pd .isnull (user ["alias" ]) or (user ["alias" ] is None ):
709
+ alias = unknown_user_name
715
710
else :
716
- alias = unknown_user
717
- users .loc [users .anonym_uuid == user_dict [unknown_user ], 'alias' ] = alias
718
- pd_file = Path (data_root_dir , Utility .USERS )
719
- with open (pd_file , "wb" ) as f :
720
- pickle .dump (users , f )
721
- return user ["anonym_uuid" ]
722
-
723
- class UserData :
724
- node_id = user_dict [unknown_user ]
725
- name = unknown_user
726
- email = numpy .NaN
727
- login = numpy .NaN
728
- users_ids = Utility .get_users_ids (data_root_dir )
729
- return Utility .extract_user_data (UserData (),users_ids ,data_root_dir )
711
+ all_alias = user ["alias" ].split (';' )
712
+ if not unknown_user_name in all_alias :
713
+ alias = user ["alias" ] + ";" + unknown_user_name
714
+ else :
715
+ alias = user ["alias" ]
716
+ else :
717
+ alias = unknown_user_name
718
+ users .loc [users .anonym_uuid == uuid , 'alias' ] = alias
719
+ pd_file = Path (data_root_dir , Utility .USERS )
720
+ with open (pd_file , "wb" ) as f :
721
+ pickle .dump (users , f )
722
+ return user ["anonym_uuid" ]
723
+
724
+ class UserData :
725
+ node_id = uuid
726
+ name = unknown_user_name
727
+ email = numpy .NaN
728
+ login = numpy .NaN
729
+ users_ids = Utility .get_users_ids (data_root_dir )
730
+ if new_user :
731
+ return Utility .extract_user_data (UserData (),users_ids ,data_root_dir )
732
+ return Utility .extract_user_data (UserData (),users_ids ,data_root_dir , node_id_to_anonym_uuid = True )
0 commit comments