File tree Expand file tree Collapse file tree
spark/Movie Ratings Counter
src/main/scala/com/javahelps/spark Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ name := " Movie Ratings Counter"
2+
3+ version := " 0.1"
4+
5+ scalaVersion := " 2.12.8"
6+
7+ libraryDependencies += " org.apache.spark" %% " spark-core" % " 2.4.0"
Original file line number Diff line number Diff line change 1+ sbt.version = 1.2.8
Original file line number Diff line number Diff line change 1+ package com .javahelps .spark
2+
3+ import org .apache .log4j .{Level , Logger }
4+ import org .apache .spark .SparkContext
5+
6+
7+ object MovieRatingsCounter {
8+
9+ def main (args : Array [String ]): Unit = {
10+
11+ Logger .getLogger(" org" ).setLevel(Level .ERROR )
12+
13+ val sc = new SparkContext (" local[*]" , " MovieRatingsCounter" )
14+
15+ // Read a text file
16+ var data = sc.textFile(" /tmp/ml-latest-small/ratings.csv" )
17+
18+ // Extract the first row which is the header
19+ val header = data.first();
20+
21+ // Filter out the header from the dataset
22+ data = data.filter(row => row != header)
23+
24+ val result = data.map(line => line.split(',' )(2 ).toFloat) // Extract rating from line as float
25+ .countByValue() // Count number of occurrences of each number
26+
27+ // Sort and print the result
28+ result.toSeq
29+ .sorted
30+ .foreach(println)
31+ }
32+ }
You can’t perform that action at this time.
0 commit comments