-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschema_analyzer_mongo.js
More file actions
47 lines (42 loc) · 1.44 KB
/
schema_analyzer_mongo.js
File metadata and controls
47 lines (42 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// This is schema analyzer for MongoDB. It samples documents from a specified collection to determine the field names and their corresponding data types.
// The output includes the total count of documents and a list of fields with their types, as the feature in MongoDB Compass.
//
// Set your collection name
const collectionName = "collection"; // Replace with your collection name
// Sample 1000 documents (or adjust this number based on your data size)
const sampleSize = 100000;
// Step 1: Count the total number of documents
const countOfDocuments = db.getCollection(collectionName).countDocuments();
// Step 2: Use aggregation to analyze field types
const schema = db.getCollection(collectionName).aggregate([
{ $sample: { size: sampleSize } }, // Sample documents to get field types
{
$project: {
fields: { $objectToArray: "$$ROOT" } // Convert each document into an array of key-value pairs
}
},
{ $unwind: "$fields" },
{
$group: {
_id: "$fields.k", // Group by field name
types: { $addToSet: { $type: "$fields.v" } } // Gather field types
}
},
{
$project: {
field: "$_id",
types: 1,
_id: 0
}
},
{ $sort: { field: 1 } } // Sort fields alphabetically
]).toArray();
// Format the schema output as required
const formattedSchema = {
count_of_documents: countOfDocuments,
fields: schema.map(field => ({
name: field.field,
types: field.types
}))
};
printjson(formattedSchema);