Skip to content

Commit 437fecc

Browse files
author
Maximilian Karl
committed
define unknown user in Version works for one user
1 parent 4a071c3 commit 437fecc

File tree

5 files changed

+83
-76
lines changed

5 files changed

+83
-76
lines changed

docs/changes.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,14 @@ Version 1.1.13 (July 29, 2021)
105105

106106
* solved error: ignore Alias if already there in Utility(define_unknown_user)
107107

108-
Version 1.1.14 (July 29, 2021)
108+
Version 1.1.14 (July 30, 2021)
109109
-----------------------------------
110110
* version download will check if there are defined user for unknown user
111111
* comment out some print
112-
* verion checks now if there are updates before downloading
112+
* verion checks now if there are updates before downloading
113+
114+
Version 1.1.15 (July 30, 2021)
115+
-----------------------------------
116+
* define unknown user in Version works now only for one user
117+
* if a anonym_uuid is known from a different repository for this unknown user then this anonym uuid will be extract_user_data
118+
* The same unknown Author name will be connected to the same anonym_uuid

github2pandas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.1.14'
1+
__version__ = '1.1.15'

github2pandas/utility.py

Lines changed: 50 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class Utility():
4343
Get all assignees as one string.
4444
extract_labels(github_labels)
4545
Get all labels as one string.
46-
extract_user_data(user, users_ids, data_root_dir)
46+
extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False)
4747
Extracting general user data.
4848
extract_author_data_from_commit(repo, sha, users_ids, data_root_dir)
4949
Extracting general author data from a commit.
@@ -55,8 +55,8 @@ class Utility():
5555
Extracting general event data from a issue or pull request.
5656
extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_dir)
5757
Extracting general comment data from a pull request or issue.
58-
define_unknown_user(user_dict, unknown_user, data_root_dir)
59-
Defines a unknown user. Add unknown user to alias.
58+
define_unknown_user(unknown_user_name, uuid, data_root_dir, new_user=False)
59+
Defines a unknown user. Add unknown user to alias or creates new user
6060
6161
"""
6262
USERS = "Users.p"
@@ -407,9 +407,9 @@ def extract_labels(github_labels):
407407
return labels
408408

409409
@staticmethod
410-
def extract_user_data(user, users_ids, data_root_dir):
410+
def extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False):
411411
"""
412-
extract_user_data(user, users_ids, data_root_dir)
412+
extract_user_data(user, users_ids, data_root_dir, node_id_to_anonym_uuid=False)
413413
414414
Extracting general user data.
415415
@@ -421,6 +421,8 @@ def extract_user_data(user, users_ids, data_root_dir):
421421
Dict of User Ids as Keys and anonym Ids as Value.
422422
data_root_dir : str
423423
Repo dir of the project.
424+
node_id_to_anonym_uuid : bool, default=False
425+
Node_id will be the anonym_uuid
424426
425427
Returns
426428
-------
@@ -441,7 +443,10 @@ def extract_user_data(user, users_ids, data_root_dir):
441443
if users_file.is_file():
442444
users_df = pd.read_pickle(users_file)
443445
user_data = {}
444-
user_data["anonym_uuid"] = generate_id(seed=user.node_id)
446+
if node_id_to_anonym_uuid:
447+
user_data["anonym_uuid"] = user.node_id
448+
else:
449+
user_data["anonym_uuid"] = generate_id(seed=user.node_id)
445450
user_data["id"] = user.node_id
446451
try:
447452
user_data["name"] = user.name
@@ -671,59 +676,57 @@ def extract_comment_data(comment, parent_id, parent_name, users_ids, data_root_d
671676
return comment_data
672677

673678
@staticmethod
674-
def define_unknown_user(user_dict, unknown_user, data_root_dir):
679+
def define_unknown_user(unknown_user_name, uuid, data_root_dir, new_user=False):
675680
"""
676-
define_unknown_user(user_dict, unknown_user, data_root_dir)
681+
define_unknown_user(unknown_user_name, uuid, data_root_dir, new_user=False)
677682
678-
Defines a unknown user. Add unknown user to alias.
683+
Defines a unknown user. Add unknown user to alias or creates new user
679684
680685
Parameters
681686
----------
682-
user_dict: dict
683-
Dictionary which contains users.
684-
unknown_user : str
685-
Name of a unknown user.
687+
unknown_user_name: str
688+
Name of unknown user.
689+
uuid: str
690+
Uuid can be the anonym uuid of another user or random uuid for a new user.
686691
data_root_dir : str
687-
Repo dir of the project.
692+
Data root directory for the repository.
693+
new_user : bool, default=False
694+
A complete new user with anonym_uuid will be generated.
688695
689696
Returns
690697
-------
691698
str
692699
Uuid of the user.
693700
694-
Notes
695-
-----
696-
Example User Dict: {"unknown_user": "user uuid"}
697-
If the real user node id does not exist in the users table then a new user will be created and the user uuid will be the node Id
698-
699701
"""
700702
users = Utility.get_users(data_root_dir)
701-
if unknown_user in user_dict:
702-
p_user = users.loc[users.anonym_uuid == user_dict[unknown_user]]
703-
if not p_user.empty:
704-
alias = ""
705-
user = p_user.iloc[0]
706-
if "alias" in user:
707-
if pd.isnull(user["alias"]) or (user["alias"] is None):
708-
alias = unknown_user
709-
else:
710-
all_alias = user["alias"].split(';')
711-
if not unknown_user in all_alias:
712-
alias = user["alias"] + ";" + unknown_user
713-
else:
714-
alias = user["alias"]
703+
p_user = users.loc[users.anonym_uuid == uuid]
704+
if not p_user.empty:
705+
alias = ""
706+
user = p_user.iloc[0]
707+
if "alias" in user:
708+
if pd.isnull(user["alias"]) or (user["alias"] is None):
709+
alias = unknown_user_name
715710
else:
716-
alias = unknown_user
717-
users.loc[users.anonym_uuid == user_dict[unknown_user], 'alias'] = alias
718-
pd_file = Path(data_root_dir, Utility.USERS)
719-
with open(pd_file, "wb") as f:
720-
pickle.dump(users, f)
721-
return user["anonym_uuid"]
722-
723-
class UserData:
724-
node_id = user_dict[unknown_user]
725-
name = unknown_user
726-
email = numpy.NaN
727-
login = numpy.NaN
728-
users_ids = Utility.get_users_ids(data_root_dir)
729-
return Utility.extract_user_data(UserData(),users_ids,data_root_dir)
711+
all_alias = user["alias"].split(';')
712+
if not unknown_user_name in all_alias:
713+
alias = user["alias"] + ";" + unknown_user_name
714+
else:
715+
alias = user["alias"]
716+
else:
717+
alias = unknown_user_name
718+
users.loc[users.anonym_uuid == uuid, 'alias'] = alias
719+
pd_file = Path(data_root_dir, Utility.USERS)
720+
with open(pd_file, "wb") as f:
721+
pickle.dump(users, f)
722+
return user["anonym_uuid"]
723+
724+
class UserData:
725+
node_id = uuid
726+
name = unknown_user_name
727+
email = numpy.NaN
728+
login = numpy.NaN
729+
users_ids = Utility.get_users_ids(data_root_dir)
730+
if new_user:
731+
return Utility.extract_user_data(UserData(),users_ids,data_root_dir)
732+
return Utility.extract_user_data(UserData(),users_ids,data_root_dir, node_id_to_anonym_uuid=True)

github2pandas/version.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ class Version():
5050
Extracting version data from a local repository and storing them in a mysql data base.
5151
generate_version_pandas_tables(repo, data_root_dir, check_for_updates=True)
5252
Extracting edits and commits in a pandas table.
53-
define_unknown_users(user_list, data_root_dir)
54-
Define unknown users in commits pandas table.
53+
define_unknown_user(unknown_user_name, uuid, data_root_dir, new_user=False)
54+
Define unknown user in commits pandas table.
5555
get_unknown_users(data_root_dir)
5656
Get all unknown users in from commits.
5757
get_version(data_root_dir, filename=VERSION_COMMITS)
@@ -352,35 +352,33 @@ def generate_version_pandas_tables(repo, data_root_dir, check_for_updates=True):
352352
pickle.dump(pd_Branches, f)
353353

354354
@staticmethod
355-
def define_unknown_users(user_dict, data_root_dir):
355+
def define_unknown_user(unknown_user_name, uuid, data_root_dir, new_user=False):
356356
"""
357-
define_unknown_users(user_dict, data_root_dir)
357+
define_unknown_user(unknown_user_name, uuid, data_root_dir, new_user=False)
358358
359-
Define unknown users in commits pandas table.
359+
Define unknown user in commits pandas table.
360360
361361
Parameters
362362
----------
363-
user_dict: dict
364-
Dictionary which contains users.
363+
unknown_user_name: str
364+
Name of unknown user.
365+
uuid: str
366+
Uuid can be the anonym uuid of another user or random uuid for a new user.
365367
data_root_dir : str
366368
Data root directory for the repository.
369+
new_user : bool, default=False
370+
A complete new user with uuid will be generated.
367371
368-
Notes
369-
-----
370-
Example User: {"unknown_user": "real user node id"}
371-
If the real user node id does not exist in the users table then a new user will be created
372-
373372
"""
374373
pd_commits = Version.get_version(data_root_dir)
375374
if "unknown_user" in pd_commits:
376-
unknown_user_commits = pd_commits.loc[pd_commits.unknown_user.notna()]
377-
unknown_users = unknown_user_commits.unknown_user.unique()
378-
for unknown_user in unknown_users:
379-
uuid = Utility.define_unknown_user(user_dict,unknown_user,data_root_dir)
380-
if uuid is not None:
381-
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'author'] = uuid
382-
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'committer'] = uuid
383-
pd_commits.loc[pd_commits.unknown_user == unknown_user, 'unknown_user'] = numpy.NaN
375+
unknown_users = pd_commits.unknown_user.unique()
376+
if unknown_user_name in unknown_users:
377+
new_uuid = Utility.define_unknown_user(unknown_user_name, uuid,data_root_dir, new_user=new_user)
378+
if new_uuid is not None:
379+
pd_commits.loc[pd_commits.unknown_user == unknown_user_name, 'author'] = new_uuid
380+
pd_commits.loc[pd_commits.unknown_user == unknown_user_name, 'committer'] = new_uuid
381+
pd_commits.loc[pd_commits.unknown_user == unknown_user_name, 'unknown_user'] = numpy.NaN
384382

385383
version_folder = Path(data_root_dir, Version.VERSION_DIR)
386384
pd_commits_file = Path(version_folder, Version.VERSION_COMMITS)

tests/test_utility.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,17 +294,17 @@ class User:
294294
email = "test_define_unknown_user@test.de"
295295
login = "test_define_unknown_user"
296296
user = Utility.extract_user_data(User(), self.users_ids, self.default_data_folder)
297-
new_user = Utility.define_unknown_user({"test":user},"test",self.default_data_folder)
297+
new_user = Utility.define_unknown_user("test",user,self.default_data_folder)
298298
class User2:
299299
node_id = "test_define_unknown_user2"
300-
name = "test_define_unknown_use2r"
300+
name = "test_define_unknown_user2"
301301
email = "test_define_unknown_user2@test.de"
302302
login = "test_define_unknown_user2"
303303
user2 = Utility.extract_user_data(User2(), self.users_ids, self.default_data_folder)
304-
new_user = Utility.define_unknown_user({"test2":user2},"test2",self.default_data_folder)
305-
new_user = Utility.define_unknown_user({"test3":user2},"test3",self.default_data_folder)
306-
new_user = Utility.define_unknown_user({"test3":user2},"test3",self.default_data_folder)
307-
print(Utility.get_users(self.default_data_folder))
304+
new_user = Utility.define_unknown_user("test2",user2,self.default_data_folder)
305+
new_user = Utility.define_unknown_user("test3",user2,self.default_data_folder)
306+
new_user = Utility.define_unknown_user("test3",user2,self.default_data_folder)
307+
#print(Utility.get_users(self.default_data_folder))
308308

309309
def setUp(self):
310310
self.default_data_folder.mkdir(parents=True, exist_ok=True)

0 commit comments

Comments
 (0)