initial commit
This commit is contained in:
commit
73c243d83d
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
.idea/
|
||||
*.iml
|
||||
*.pyc
|
62
email_pruner.py
Normal file
62
email_pruner.py
Normal file
@ -0,0 +1,62 @@
|
||||
from string import ascii_letters
|
||||
from secrets import choice
|
||||
from timeit import default_timer as timer
|
||||
from datetime import timedelta
|
||||
|
||||
|
||||
def reset_stopwatch():
|
||||
return timer()
|
||||
|
||||
|
||||
def get_elapsed(starttime):
|
||||
end = timer()
|
||||
return timedelta(seconds=end-starttime)
|
||||
|
||||
|
||||
def randstring(strlen=64):
|
||||
return ''.join(choice(ascii_letters) for _ in range(strlen))
|
||||
|
||||
|
||||
def spawn(listlen=100):
|
||||
base_list = [randstring(10)+"."+randstring(10)+"@"+randstring(15)+".com" for _ in range(listlen)]
|
||||
dup_list = [choice(base_list) for _ in range(len(base_list))]
|
||||
final_list = []
|
||||
for i in range(listlen):
|
||||
final_list.append(base_list[i])
|
||||
final_list.append(dup_list[i])
|
||||
return final_list
|
||||
|
||||
|
||||
def dups(biglist):
|
||||
seen = set()
|
||||
uneek = []
|
||||
for x in biglist:
|
||||
if x not in seen:
|
||||
uneek.append(x)
|
||||
seen.add(x)
|
||||
return seen
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
start = reset_stopwatch()
|
||||
list_with_dups = spawn(50000)
|
||||
print(f"GENERATED COMPLETE LIST WITH DUPLICATES: (count = {len(list_with_dups)})")
|
||||
# [print(i) for i in list_with_dups]
|
||||
t1 = get_elapsed(start)
|
||||
print("Elapsed Time: ", t1)
|
||||
|
||||
start = reset_stopwatch()
|
||||
dup_list = dups(list_with_dups)
|
||||
print(f"IDENTIFIED DUPLICATES IN COMPLETE LIST: (count = {len(dup_list)})")
|
||||
# [print(i) for i in dup_list]
|
||||
t2 = get_elapsed(start)
|
||||
print("Elapsed time: ", t2)
|
||||
|
||||
start = reset_stopwatch()
|
||||
list_with_dups = list(dict.fromkeys(list_with_dups))
|
||||
print(f"GENERATED PRUNED LIST WITHOUT DUPLICATES: (count = {len(list_with_dups)})")
|
||||
# [print(i) for i in list_with_dups]
|
||||
t3 = get_elapsed(start)
|
||||
print("Elapsed Time: ", t3)
|
||||
print(f"TOTAL ELAPSED TIME: {t1+t2+t3}")
|
||||
print(f"ELAPSED TIME WITHOUT GENERATOR: {t2+t3}")
|
Loading…
Reference in New Issue
Block a user