Skip to content

Commit 38ab03b

Browse files
committed
Spark 01: Movie Rating Counter
1 parent 502f06f commit 38ab03b

3 files changed

Lines changed: 40 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
name := "Movie Ratings Counter"
2+
3+
version := "0.1"
4+
5+
scalaVersion := "2.12.8"
6+
7+
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.0"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sbt.version = 1.2.8
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.javahelps.spark
2+
3+
import org.apache.log4j.{Level, Logger}
4+
import org.apache.spark.SparkContext
5+
6+
7+
object MovieRatingsCounter {
8+
9+
def main(args: Array[String]): Unit = {
10+
11+
Logger.getLogger("org").setLevel(Level.ERROR)
12+
13+
val sc = new SparkContext("local[*]", "MovieRatingsCounter")
14+
15+
// Read a text file
16+
var data = sc.textFile("/tmp/ml-latest-small/ratings.csv")
17+
18+
// Extract the first row which is the header
19+
val header = data.first();
20+
21+
// Filter out the header from the dataset
22+
data = data.filter(row => row != header)
23+
24+
val result = data.map(line => line.split(',')(2).toFloat) // Extract rating from line as float
25+
.countByValue() // Count number of occurrences of each number
26+
27+
// Sort and print the result
28+
result.toSeq
29+
.sorted
30+
.foreach(println)
31+
}
32+
}

0 commit comments

Comments
 (0)