Commit fff3fc0a authored by klorydryk's avatar klorydryk
Browse files

Créations des couples prénom-nom selon fréquence d'apparition

parent 773fc9d1
This diff is collapsed.
This diff is collapsed.
......@@ -7,20 +7,36 @@ long_txt = fo.read()
words = long_txt.splitlines()
# French firstnames https://www.data.gouv.fr/fr/datasets/liste-de-prenoms/
fo_firstname = open("Prenoms.csv", "r", encoding='Windows 1252')
#fo_firstname = open("Prenoms.csv", "r", encoding='Windows 1252')
#fo_firstname = open("Prenoms.csv", "r", encoding='latin-1')
# French lastnames https://www.insee.fr/fr/statistiques/3536630
#fo_lastname = open("noms2008nat_txt.txt", "r")
# Nouvelle base de données
# https://www.data.gouv.fr/fr/datasets/liste-de-prenoms-et-patronymes/
fo_firstname = open("prenom.csv", "r")
fo_lastname = open("patronymes.csv", "r")
long_firstname = fo_firstname.read()
firstnames = long_firstname.splitlines()
del firstnames[0]
firstnames = [items.split(";")[0].title() for items in firstnames]
# print(firstnames)
del firstnames[0] # Remove titles
firstnames_and_freq = [items.split(",") for items in firstnames]
sum_firstnames = 0
for i in range(len(firstnames_and_freq)):
sum_firstnames += int(firstnames_and_freq[i][1])
print("Nombre de prénoms: " + str(sum_firstnames))
# French lastnames https://www.insee.fr/fr/statistiques/3536630
fo_lastname = open("noms2008nat_txt.txt", "r")
long_lastname = fo_lastname.read()
lastnames = long_lastname.splitlines()
del lastnames[0]
lastnames = [items.split("\t")[0].title() for items in lastnames]
del lastnames[0] # Remove titles
lastnames_and_freq = [items.split(",") for items in lastnames]
sum_lastnames = 0
for i in range(len(lastnames_and_freq)):
sum_lastnames += int(lastnames_and_freq[i][1])
print("Nombre de patronymes: " + str(sum_lastnames))
def rand_name(charset):
while True:
......@@ -31,9 +47,30 @@ def rand_name(charset):
if verify_name(name, charset):
return name
def verify_name(name, charset):
def rand_name_statistic(charset):
while True:
firstname_nb = random.randint(0, sum_firstnames)
line = 0
while firstname_nb > 0:
firstname_nb -= int(firstnames_and_freq[line][1])
line += 1
firstname = firstnames_and_freq[line][0]
lastname_nb = random.randint(0, sum_lastnames)
line = 0
while lastname_nb > 0:
lastname_nb -= int(lastnames_and_freq[line][1])
line += 1
lastname = lastnames_and_freq[line][0]
name = ' '.join([firstname, lastname])
# Loop till there is no non printable characters
if verify_name(name):
return name
def verify_name(name, charset = ""):
'''
Brute Version of cleaning names:
Brute Version of cleaning names:
Two options:
* Only printable characters (but we'll have some punctuations and (1) (2)...)
* Only letters -> charset "abcdefghijklmnopqrstuvwxyz" + Uppercase
......@@ -44,8 +81,7 @@ def verify_name(name, charset):
else:
# All printable including punctuation
charset = string.printable
test = all(c in (charset) for c in name)
return test
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment