Commit 888f8323 authored by cb's avatar cb

Option to select names containing only letters and spaces for better rendering

parent 5c464257
NOM _1891_1900 _1901_1910 _1911_1920 _1921_1930 _1931_1940 _1941_1950 _1951_1960 _1961_1970 _1971_1980 _1981_1990 _1991_2000
AABI 0 0 0 0 0 0 0 0 4 21 7
AABID 0 0 0 0 0 0 0 0 7 13 17
AALBERG 1 5 2 5 4 6 4 2 1 0 0
......@@ -218981,4 +218980,3 @@ ZYSK 0 0 0 4 4 3 12 4 7 10 11
ZYSMAN 0 0 0 8 19 3 14 17 7 10 8
ZYTA 0 0 0 1 1 4 12 7 8 11 1
ZYTO 0 0 1 9 3 8 9 8 8 8 3
AUTRES NOMS 227267 319995 284557 409790 382905 441726 522040 604057 666868 757419 857392
import random
import string
# English names
fo = open("words", "r")
......@@ -7,6 +8,7 @@ words = long_txt.splitlines()
# French firstnames https://www.data.gouv.fr/fr/datasets/liste-de-prenoms/
fo_firstname = open("Prenoms.csv", "r", encoding='Windows 1252')
#fo_firstname = open("Prenoms.csv", "r", encoding='latin-1')
long_firstname = fo_firstname.read()
firstnames = long_firstname.splitlines()
del firstnames[0]
......@@ -20,8 +22,32 @@ lastnames = long_lastname.splitlines()
del lastnames[0]
lastnames = [items.split("\t")[0].title() for items in lastnames]
def rand_name():
firstname = firstnames[random.randint(0, len(firstnames))]
lastname = lastnames[random.randint(0, len(lastnames))]
name = ' '.join([firstname, lastname])
return name
def rand_name(letters=True):
while True:
print(len(firstnames))
firstname = firstnames[random.randint(0, len(firstnames))]
lastname = lastnames[random.randint(0, len(lastnames))]
name = ' '.join([firstname, lastname])
# Loop till there is no non printable characters
if verify_name(name, letters):
print(name)
return name
def verify_name(name, letters=True):
'''
Brute Version of cleaning names:
Two options:
* Only printable characters (but we'll have some punctuations and (1) (2)...
* Only letters -> charset "abcdefghijklmnopqrstuvwxyz" + Uppercase
'''
if letters:
# Only ascii
charset = string.ascii_letters + string.whitespace
else:
# All printable including punctuation
charset = string.printable
test = all(c in (charset) for c in name)
return test
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment