initial commit

This commit is contained in:
Greg Gauthier 2020-10-21 21:01:26 +01:00
commit 73c243d83d
2 changed files with 65 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
.idea/
*.iml
*.pyc

62
email_pruner.py Normal file
View File

@ -0,0 +1,62 @@
from string import ascii_letters
from secrets import choice
from timeit import default_timer as timer
from datetime import timedelta
def reset_stopwatch():
return timer()
def get_elapsed(starttime):
end = timer()
return timedelta(seconds=end-starttime)
def randstring(strlen=64):
return ''.join(choice(ascii_letters) for _ in range(strlen))
def spawn(listlen=100):
base_list = [randstring(10)+"."+randstring(10)+"@"+randstring(15)+".com" for _ in range(listlen)]
dup_list = [choice(base_list) for _ in range(len(base_list))]
final_list = []
for i in range(listlen):
final_list.append(base_list[i])
final_list.append(dup_list[i])
return final_list
def dups(biglist):
seen = set()
uneek = []
for x in biglist:
if x not in seen:
uneek.append(x)
seen.add(x)
return seen
if __name__ == "__main__":
start = reset_stopwatch()
list_with_dups = spawn(50000)
print(f"GENERATED COMPLETE LIST WITH DUPLICATES: (count = {len(list_with_dups)})")
# [print(i) for i in list_with_dups]
t1 = get_elapsed(start)
print("Elapsed Time: ", t1)
start = reset_stopwatch()
dup_list = dups(list_with_dups)
print(f"IDENTIFIED DUPLICATES IN COMPLETE LIST: (count = {len(dup_list)})")
# [print(i) for i in dup_list]
t2 = get_elapsed(start)
print("Elapsed time: ", t2)
start = reset_stopwatch()
list_with_dups = list(dict.fromkeys(list_with_dups))
print(f"GENERATED PRUNED LIST WITHOUT DUPLICATES: (count = {len(list_with_dups)})")
# [print(i) for i in list_with_dups]
t3 = get_elapsed(start)
print("Elapsed Time: ", t3)
print(f"TOTAL ELAPSED TIME: {t1+t2+t3}")
print(f"ELAPSED TIME WITHOUT GENERATOR: {t2+t3}")