-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
96 lines (87 loc) · 4.21 KB
/
data.py
File metadata and controls
96 lines (87 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pandas as pd # type: ignore
from faker import Faker # type: ignore
import random
from datetime import datetime, timedelta
# Initialisation de Faker pour des données en français
fake = Faker('fr_FR')
# Paramètres
n_users = 500 # Nombre d'utilisateurs
n_jobs = 1000 # Nombre de jobs
n_interactions = 2000 # Nombre d'interactions (candidatures, jobs sauvegardés)
# Listes pour personnaliser les données
skills_list = ['Python', 'JavaScript', 'UI/UX', 'SQL', 'Django', 'React', 'Figma', 'Marketing Digital', 'Gestion de Projet', 'Cybersécurité', 'Traduction', 'Logistique']
locations = ['Tanger', 'Casablanca', 'Rabat', 'Marrakech', 'Fès', 'Agadir']
languages_list = ['Français', 'Anglais', 'Arabe', 'Espagnol']
categories = ['Développement Web', 'Design', 'Data Science', 'Marketing', 'Événementiel', 'Logistique', 'Cybersécurité']
professions = ['Développeur', 'Designer', 'Data Scientist', 'Marketeur', 'Manager de Projet', 'Traducteur', 'Logisticien']
# Générer les utilisateurs
users = []
for i in range(n_users):
created_at = fake.date_time_between(start_date='-2y', end_date='now')
user = {
'id': f'user_{i+1}',
'email': fake.email(),
'fullName': fake.name(),
'photoUrl': fake.image_url() if random.random() > 0.5 else None,
'phoneNumber': fake.phone_number() if random.random() > 0.7 else None,
'bio': fake.text(max_nb_chars=100) if random.random() > 0.6 else None,
'location': random.choice(locations),
'profession': random.choice(professions) if random.random() > 0.4 else None,
'skills': random.sample(skills_list, random.randint(2, 6)),
'languages': random.sample(languages_list, random.randint(1, 3)),
'rating': round(random.uniform(3.0, 5.0), 1) if random.random() > 0.3 else None,
'jobsCompleted': random.randint(0, 20) if random.random() > 0.2 else None,
'jobsPosted': random.randint(0, 10) if random.random() > 0.5 else None,
'isEmployer': random.random() > 0.7,
'isWorker': random.random() > 0.3,
'createdAt': created_at,
'lastActive': created_at + timedelta(days=random.randint(1, 700)) if random.random() > 0.4 else None,
'preferences': {'preferred_category': random.choice(categories), 'min_budget': random.randint(500, 3000)} if random.random() > 0.5 else None,
'settings': {'notifications': random.choice(['email', 'push', 'none'])} if random.random() > 0.5 else None,
'savedJobs': [],
'appliedJobs': [],
'postedJobs': [],
'connections': []
}
users.append(user)
# Générer les jobs
jobs = []
for i in range(n_jobs):
job = {
'job_id': f'job_{i+1}',
'title': fake.job(),
'category': random.choice(categories),
'required_skills': random.sample(skills_list, random.randint(2, 5)),
'location': random.choice(locations),
'budget': random.randint(500, 5000),
'duration_days': random.randint(5, 90),
'description': fake.text(max_nb_chars=200),
'posted_by': random.choice([u['id'] for u in users if u['isEmployer']]),
'created_at': fake.date_time_between(start_date='-1y', end_date='now')
}
jobs.append(job)
# Générer des interactions (candidatures et jobs sauvegardés)
interactions = []
for _ in range(n_interactions):
user = random.choice([u for u in users if u['isWorker']])
job = random.choice(jobs)
interaction_type = random.choice(['applied', 'saved'])
if interaction_type == 'applied':
user['appliedJobs'].append(job['job_id'])
else:
user['savedJobs'].append(job['job_id'])
interactions.append({
'user_id': user['id'],
'job_id': job['job_id'],
'interaction_type': interaction_type,
'timestamp': fake.date_time_between(start_date='-1y', end_date='now')
})
# Convertir en DataFrames
users_df = pd.DataFrame(users)
jobs_df = pd.DataFrame(jobs)
interactions_df = pd.DataFrame(interactions)
# Sauvegarder en CSV
users_df.to_csv('users_synthetic.csv', index=False)
jobs_df.to_csv('jobs_synthetic.csv', index=False)
interactions_df.to_csv('interactions_synthetic.csv', index=False)
print("Données synthétiques générées : users_synthetic.csv, jobs_synthetic.csv, interactions_synthetic.csv")