-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataFrame
More file actions
84 lines (67 loc) · 1.6 KB
/
DataFrame
File metadata and controls
84 lines (67 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// Spark 2.0 以上用SparkSession代替SQLContext & HQLContext
//spark-shell
scala> import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SparkSession
scala> val spark = SparkSession.builder().getOrCreate()
spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@6a2badb1
// 隐士转换的目的是让接下来读进来的RDD转换成DataFrame
scala> import spark.implicits._
import spark.implicits._
val df = spark.read.json("file:///var/groupon/spark-2.4.3/examples/src/main/resources/people.json")
scala> df.show()
+----+-------+
| age| name|
+----+-------+
|null|Michael|
| 30| Andy|
| 19| Justin|
scala> df.printSchema()
root
|-- age: long (nullable = true)
|-- name: string (nullable = true)
scala> df.select(df("name"),df("age")+1).show()
+-------+---------+
| name|(age + 1)|
+-------+---------+
|Michael| null|
| Andy| 31|
| Justin| 20|
+-------+---------+
scala> df.select(df("name"),(df("age")+1).as("age")).show()
+-------+----+
| name| age|
+-------+----+
|Michael|null|
| Andy| 31|
| Justin| 20|
+-------+----+
scala> df.filter(df("age")>20).show()
+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+
scala> df.groupBy("age").count().show()
+----+-----+
| age|count|
+----+-----+
| 19| 1|
|null| 1|
| 30| 1|
+----+-----+
scala> df.sort(df("age").desc).show()
+----+-------+
| age| name|
+----+-------+
| 30| Andy|
| 19| Justin|
|null|Michael|
+----+-------+
scala> df.sort(df("age").desc, df("name").asc).show()
+----+-------+
| age| name|
+----+-------+
| 30| Andy|
| 19| Justin|
|null|Michael|
+----+-------+