Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
klorydryk
FacialRecoTrackingScore
Commits
fff3fc0a
Commit
fff3fc0a
authored
Apr 30, 2021
by
klorydryk
Browse files
Créations des couples prénom-nom selon fréquence d'apparition
parent
773fc9d1
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
patronymes.csv
0 → 100644
View file @
fff3fc0a
This diff is collapsed.
Click to expand it.
prenom.csv
0 → 100644
View file @
fff3fc0a
This diff is collapsed.
Click to expand it.
randomnames.py
View file @
fff3fc0a
...
...
@@ -7,20 +7,36 @@ long_txt = fo.read()
words
=
long_txt
.
splitlines
()
# French firstnames https://www.data.gouv.fr/fr/datasets/liste-de-prenoms/
fo_firstname
=
open
(
"Prenoms.csv"
,
"r"
,
encoding
=
'Windows 1252'
)
#
fo_firstname = open("Prenoms.csv", "r", encoding='Windows 1252')
#fo_firstname = open("Prenoms.csv", "r", encoding='latin-1')
# French lastnames https://www.insee.fr/fr/statistiques/3536630
#fo_lastname = open("noms2008nat_txt.txt", "r")
# Nouvelle base de données
# https://www.data.gouv.fr/fr/datasets/liste-de-prenoms-et-patronymes/
fo_firstname
=
open
(
"prenom.csv"
,
"r"
)
fo_lastname
=
open
(
"patronymes.csv"
,
"r"
)
long_firstname
=
fo_firstname
.
read
()
firstnames
=
long_firstname
.
splitlines
()
del
firstnames
[
0
]
firstnames
=
[
items
.
split
(
";"
)[
0
].
title
()
for
items
in
firstnames
]
# print(firstnames)
del
firstnames
[
0
]
# Remove titles
firstnames_and_freq
=
[
items
.
split
(
","
)
for
items
in
firstnames
]
sum_firstnames
=
0
for
i
in
range
(
len
(
firstnames_and_freq
)):
sum_firstnames
+=
int
(
firstnames_and_freq
[
i
][
1
])
print
(
"Nombre de prénoms: "
+
str
(
sum_firstnames
))
# French lastnames https://www.insee.fr/fr/statistiques/3536630
fo_lastname
=
open
(
"noms2008nat_txt.txt"
,
"r"
)
long_lastname
=
fo_lastname
.
read
()
lastnames
=
long_lastname
.
splitlines
()
del
lastnames
[
0
]
lastnames
=
[
items
.
split
(
"
\t
"
)[
0
].
title
()
for
items
in
lastnames
]
del
lastnames
[
0
]
# Remove titles
lastnames_and_freq
=
[
items
.
split
(
","
)
for
items
in
lastnames
]
sum_lastnames
=
0
for
i
in
range
(
len
(
lastnames_and_freq
)):
sum_lastnames
+=
int
(
lastnames_and_freq
[
i
][
1
])
print
(
"Nombre de patronymes: "
+
str
(
sum_lastnames
))
def
rand_name
(
charset
):
while
True
:
...
...
@@ -31,9 +47,30 @@ def rand_name(charset):
if
verify_name
(
name
,
charset
):
return
name
def
verify_name
(
name
,
charset
):
def
rand_name_statistic
(
charset
):
while
True
:
firstname_nb
=
random
.
randint
(
0
,
sum_firstnames
)
line
=
0
while
firstname_nb
>
0
:
firstname_nb
-=
int
(
firstnames_and_freq
[
line
][
1
])
line
+=
1
firstname
=
firstnames_and_freq
[
line
][
0
]
lastname_nb
=
random
.
randint
(
0
,
sum_lastnames
)
line
=
0
while
lastname_nb
>
0
:
lastname_nb
-=
int
(
lastnames_and_freq
[
line
][
1
])
line
+=
1
lastname
=
lastnames_and_freq
[
line
][
0
]
name
=
' '
.
join
([
firstname
,
lastname
])
# Loop till there is no non printable characters
if
verify_name
(
name
):
return
name
def
verify_name
(
name
,
charset
=
""
):
'''
Brute Version of cleaning names:
Brute Version of cleaning names:
Two options:
* Only printable characters (but we'll have some punctuations and (1) (2)...)
* Only letters -> charset "abcdefghijklmnopqrstuvwxyz" + Uppercase
...
...
@@ -44,8 +81,7 @@ def verify_name(name, charset):
else
:
# All printable including punctuation
charset
=
string
.
printable
test
=
all
(
c
in
(
charset
)
for
c
in
name
)
return
test
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment