This repository was archived by the owner on Apr 2, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReccomender.json
More file actions
1 lines (1 loc) · 23.4 KB
/
Reccomender.json
File metadata and controls
1 lines (1 loc) · 23.4 KB
1
{"paragraphs":[{"text":"// import dependencies \nimport java.io.File\nimport scala.io.Source\n\nimport org.apache.log4j.Logger\nimport org.apache.log4j.Level\n\nimport org.apache.spark.SparkConf\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd._\nimport org.apache.spark.mllib.recommendation.{ALS, Rating, MatrixFactorizationModel}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"import java.io.File\nimport scala.io.Source\nimport org.apache.log4j.Logger\nimport org.apache.log4j.Level\nimport org.apache.spark.SparkConf\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd._\nimport org.apache.spark.mllib.recommendation.{ALS, Rating, MatrixFactorizationModel}\n"}]},"apps":[],"jobName":"paragraph_1582994984482_-1636521383","id":"20200229-164944_781186348","dateCreated":"2020-02-29T16:49:44+0000","dateStarted":"2020-03-01T08:06:10+0000","dateFinished":"2020-03-01T08:06:10+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:2018"},{"text":"// read movie data from s3 bucket -hashan-assignment-zepplin\nval movieLensHomeDir = \"s3://hashan-assignment-zepplin/movieLens/\"\nval movies = sc.textFile(movieLensHomeDir + \"movies.dat\").map { line =>\n val fields = line.split(\"::\")\n // format: (movieId, movieName)\n (fields(0).toInt, fields(1))\n}.collect.toMap\n","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"movieLensHomeDir: String = s3://hashan-assignment-zepplin/movieLens/\nmovies: scala.collection.immutable.Map[Int,String] = Map(2163 -> Attack of the Killer Tomatoes! (1980), 8607 -> Tokyo Godfathers (2003), 645 -> Nelly & Monsieur Arnaud (1995), 42900 -> Cul-de-sac (1966), 892 -> Twelfth Night (1996), 69 -> Friday (1995), 53550 -> Rescue Dawn (2006), 37830 -> Final Fantasy VII: Advent Children (2004), 5385 -> Last Waltz, The (1978), 5810 -> 8 Mile (2002), 7375 -> Prince & Me, The (2004), 5659 -> Rocking Horse Winner, The (1950), 2199 -> Phoenix (1998), 8062 -> Dahmer (2002), 3021 -> Funhouse, The (1981), 8536 -> Intended, The (2002), 5437 -> Manhattan Project, The (1986), 1322 -> Amityville 1992: It's About Time (1992), 1665 -> Bean (1997), 5509 -> Biggie and Tupac (2002), 5686 -> Russia..."}]},"apps":[],"jobName":"paragraph_1582996217386_-1776340493","id":"20200229-171017_736798969","dateCreated":"2020-02-29T17:10:17+0000","dateStarted":"2020-03-01T08:06:27+0000","dateFinished":"2020-03-01T08:06:27+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2019"},{"text":"// read ratings data from s3 bucket -hashan-assignment-zepplin\nval ratings = sc.textFile(movieLensHomeDir + \"ratings.dat\").map { line =>\n val fields = line.split(\"::\")\n // format: (timestamp % 10, Rating(userId, movieId, rating))\n (fields(3).toLong % 10, Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble))\n}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"ratings: org.apache.spark.rdd.RDD[(Long, org.apache.spark.mllib.recommendation.Rating)] = MapPartitionsRDD[10572] at map at <console>:93\n"}]},"apps":[],"jobName":"paragraph_1582996882995_-438065735","id":"20200229-172122_1957344519","dateCreated":"2020-02-29T17:21:22+0000","dateStarted":"2020-03-01T08:06:44+0000","dateFinished":"2020-03-01T08:06:44+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2020"},{"text":"// verifying read data \nval numRatings = ratings.count\nval numUsers = ratings.map(_._2.user).distinct.count\nval numMovies = ratings.map(_._2.product).distinct.count\n\nprintln(\"Got \" + numRatings + \" ratings from \"\n + numUsers + \" users on \" + numMovies + \" movies.\")","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"Got 10000054 ratings from 69878 users on 10677 movies.\nnumRatings: Long = 10000054\nnumUsers: Long = 69878\nnumMovies: Long = 10677\n"}]},"apps":[],"jobName":"paragraph_1583026896035_1393618151","id":"20200301-014136_1200722792","dateCreated":"2020-03-01T01:41:36+0000","dateStarted":"2020-03-01T08:06:50+0000","dateFinished":"2020-03-01T08:07:03+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2021"},{"text":"// split that dataset into a few parts, one for training (60%), one for validation (20%), and one for testing (20%)\nval training = ratings.filter(x => x._1 < 6)\n .values\n .cache()\nval validation = ratings.filter(x => x._1 >= 6 && x._1 < 8)\n .values\n .cache()\nval test = ratings.filter(x => x._1 >= 8).values.cache()\n\nval numTraining = training.count()\nval numValidation = validation.count()\nval numTest = test.count()\n\nprintln(\"Training: \" + numTraining + \", validation: \" + numValidation + \", test: \" + numTest)","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"Training: 6002473, validation: 1999675, test: 1997906\ntraining: org.apache.spark.rdd.RDD[org.apache.spark.mllib.recommendation.Rating] = MapPartitionsRDD[10582] at values at <console>:99\nvalidation: org.apache.spark.rdd.RDD[org.apache.spark.mllib.recommendation.Rating] = MapPartitionsRDD[10584] at values at <console>:102\ntest: org.apache.spark.rdd.RDD[org.apache.spark.mllib.recommendation.Rating] = MapPartitionsRDD[10586] at values at <console>:104\nnumTraining: Long = 6002473\nnumValidation: Long = 1999675\nnumTest: Long = 1997906\n"}]},"apps":[],"jobName":"paragraph_1583032462415_602199673","id":"20200301-031422_932821873","dateCreated":"2020-03-01T03:14:22+0000","dateStarted":"2020-03-01T08:07:14+0000","dateFinished":"2020-03-01T08:07:27+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2022"},{"text":"/** Compute RMSE (Root Mean Squared Error). */\ndef computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], n: Long): Double = {\n val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product)))\n val predictionsAndRatings = predictions.map(x => ((x.user, x.product), x.rating))\n .join(data.map(x => ((x.user, x.product), x.rating))).values\n math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).reduce(_ + _) / n)\n}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"computeRmse: (model: org.apache.spark.mllib.recommendation.MatrixFactorizationModel, data: org.apache.spark.rdd.RDD[org.apache.spark.mllib.recommendation.Rating], n: Long)Double\n"}]},"apps":[],"jobName":"paragraph_1583033734196_-1640747471","id":"20200301-033534_792828560","dateCreated":"2020-03-01T03:35:34+0000","dateStarted":"2020-03-01T08:07:30+0000","dateFinished":"2020-03-01T08:07:30+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2023"},{"text":"// choosing the best parameters for the training algorithm.\nval ranks = List(8, 12)\nval lambdas = List(0.1, 10.0)\nval numIters = List(10, 20)\nvar bestModel: Option[MatrixFactorizationModel] = None\nvar bestValidationRmse = Double.MaxValue\nvar bestRank = 0\nvar bestLambda = -1.0\nvar bestNumIter = -1\nfor (rank <- ranks; lambda <- lambdas; numIter <- numIters) {\n val model = ALS.train(training, rank, numIter, lambda)\n val validationRmse = computeRmse(model, validation, numValidation)\n println(\"RMSE (validation) = \" + validationRmse + \" for the model trained with rank = \" \n + rank + \", lambda = \" + lambda + \", and numIter = \" + numIter + \".\")\n if (validationRmse < bestValidationRmse) {\n bestModel = Some(model)\n bestValidationRmse = validationRmse\n bestRank = rank\n bestLambda = lambda\n bestNumIter = numIter\n }\n}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"RMSE (validation) = 0.8229959738402886 for the model trained with rank = 8, lambda = 0.1, and numIter = 10.\nRMSE (validation) = 0.8197124782979465 for the model trained with rank = 8, lambda = 0.1, and numIter = 20.\nRMSE (validation) = 3.667982949261605 for the model trained with rank = 8, lambda = 10.0, and numIter = 10.\nRMSE (validation) = 3.667982949261605 for the model trained with rank = 8, lambda = 10.0, and numIter = 20.\nRMSE (validation) = 0.8193237948586417 for the model trained with rank = 12, lambda = 0.1, and numIter = 10.\nRMSE (validation) = 0.8151540496611714 for the model trained with rank = 12, lambda = 0.1, and numIter = 20.\nRMSE (validation) = 3.667982949261605 for the model trained with rank = 12, lambda = 10.0, and numIter = 10.\nRMSE (validation) = 3.667982949261605 for the model trained with rank = 12, lambda = 10.0, and numIter = 20.\nranks: List[Int] = List(8, 12)\nlambdas: List[Double] = List(0.1, 10.0)\nnumIters: List[Int] = List(10, 20)\nbestModel: Option[org.apache.spark.mllib.recommendation.MatrixFactorizationModel] = Some(org.apache.spark.mllib.recommendation.MatrixFactorizationModel@60216681)\nbestValidationRmse: Double = 0.8151540496611714\nbestRank: Int = 12\nbestLambda: Double = 0.1\nbestNumIter: Int = 20\n"}]},"apps":[],"jobName":"paragraph_1583033809366_1675928767","id":"20200301-033649_2001402029","dateCreated":"2020-03-01T03:36:49+0000","dateStarted":"2020-03-01T08:07:52+0000","dateFinished":"2020-03-01T08:12:03+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2024"},{"text":"// evaluate the best model on the test set\nval testRmse = computeRmse(bestModel.get, test, numTest)\n\nprintln(\"The best model was trained with rank = \" + bestRank + \" and lambda = \" + bestLambda\n + \", and numIter = \" + bestNumIter + \", and its RMSE on the test set is\" + testRmse + \".\")","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"The best model was trained with rank = 12 and lambda = 0.1, and numIter = 20, and its RMSE on the test set is0.8152599320680729.\ntestRmse: Double = 0.8152599320680729\n"}]},"apps":[],"jobName":"paragraph_1583033871815_-322885433","id":"20200301-033751_875074053","dateCreated":"2020-03-01T03:37:51+0000","dateStarted":"2020-03-01T08:13:41+0000","dateFinished":"2020-03-01T08:13:46+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2025"},{"text":"// create a naive baseline and compare it with the best model\nval meanRating = training.union(validation).map(_.rating).mean\nval baselineRmse = \n math.sqrt(test.map(x => (meanRating - x.rating) * (meanRating - x.rating)).mean)\nval improvement = (baselineRmse - testRmse) / baselineRmse * 100\nprintln(\"The best model improves the baseline by \" + \"%1.2f\".format(improvement) + \"%.\")","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"The best model improves the baseline by 23.07%.\nmeanRating: Double = 3.512362305720929\nbaselineRmse: Double = 1.05978282646606\nimprovement: Double = 23.07292478152061\n"}]},"apps":[],"jobName":"paragraph_1583034499891_1460075980","id":"20200301-034819_1444295229","dateCreated":"2020-03-01T03:48:19+0000","dateStarted":"2020-03-01T08:14:00+0000","dateFinished":"2020-03-01T08:14:00+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2026"},{"text":"// top 10 movie recommendations for one of your users (userid 100)\nval candidates = sc.parallelize(movies.keys.toSeq)\nval recommendations = bestModel.get\n .predict(candidates.map((100, _)))\n .collect()\n .sortBy(- _.rating)\n .take(10)\n\nvar i = 1\nprintln(\"Movies recommended for you:\")\nrecommendations.foreach { r =>\n println(\"%2d\".format(i) + \": \" + movies(r.product))\n i += 1\n}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"Movies recommended for you:\n 1: Eve and the Fire Horse (2005)\n 2: Maradona by Kusturica (2008)\n 3: Power of Nightmares: The Rise of the Politics of Fear, The (2004)\n 4: Shadows of Forgotten Ancestors (1964)\n 5: Tunnel, The (Der Tunnel) (2001)\n 6: Godfather, The (1972)\n 7: Low Life, The (1995)\n 8: Unreasonable Man, An (2006)\n 9: Godfather: Part II, The (1974)\n10: Gonzo: The Life and Work of Dr. Hunter S. Thompson (2008)\ncandidates: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[13121] at parallelize at <console>:99\nrecommendations: Array[org.apache.spark.mllib.recommendation.Rating] = Array(Rating(100,60983,4.317871504838677), Rating(100,61742,3.8252927838551116), Rating(100,53883,3.750461577499404), Rating(100,42783,3.721632107209558), Rating(100,27376,3.5193229821400434), Rating(100,858,3.5169105337093063), Rating(100,32090,3.511850878064036), Rating(100,55156,3.5118456435163785), Rating(100,1221,3.507541920872527), Rating(100,60291,3.4978607551791674))\ni: Int = 11\n"}]},"apps":[],"jobName":"paragraph_1583034625317_-1087194193","id":"20200301-035025_703188771","dateCreated":"2020-03-01T03:50:25+0000","dateStarted":"2020-03-01T08:14:14+0000","dateFinished":"2020-03-01T08:14:15+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2027"},{"text":"// get a movies details with movieId, movieName, genre\nval moviesWithGenres = sc.textFile(movieLensHomeDir + \"movies.dat\").map { line =>\n val fields = line.split(\"::\")\n // format: (movieId, movieName, genre information)\n (fields(0).toInt, fields(2))\n}.collect.toMap","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"moviesWithGenres: scala.collection.immutable.Map[Int,String] = Map(2163 -> Comedy|Horror, 8607 -> Adventure|Animation|Drama, 645 -> Drama, 42900 -> Comedy|Crime|Drama|Thriller, 892 -> Comedy|Drama|Romance, 69 -> Comedy, 53550 -> Action|Adventure|Drama|War, 37830 -> Action|Adventure|Animation|Fantasy|Sci-Fi, 5385 -> Documentary, 5810 -> Drama, 7375 -> Comedy|Romance, 5659 -> Drama|Horror, 2199 -> Crime|Drama, 8062 -> Drama|Horror|Thriller, 3021 -> Horror, 8536 -> Drama|Thriller, 5437 -> Comedy|Thriller, 1322 -> Horror, 1665 -> Comedy, 5509 -> Documentary, 5686 -> Drama|Fantasy|War, 1036 -> Action|Crime|Thriller, 2822 -> Adventure|Romance, 7304 -> Animation|Comedy|Fantasy|Musical, 54999 -> Action|Adventure|Thriller, 2630 -> Drama, 6085 -> Comedy|Drama, 3873 -> Comedy|Western, 4188 -> Chil..."}]},"apps":[],"jobName":"paragraph_1583034677503_593986364","id":"20200301-035117_1636215144","dateCreated":"2020-03-01T03:51:17+0000","dateStarted":"2020-03-01T08:14:38+0000","dateFinished":"2020-03-01T08:14:39+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2028"},{"text":"// filter the movies to include only the ones with “Comedy”\nval comedyMovies = moviesWithGenres.filter(_._2.matches(\".*Comedy.*\")).keys\nval candidates = sc.parallelize(comedyMovies.toSeq)\nval recommendations = bestModel.get\n .predict(candidates.map((100, _)))\n .collect()\n .sortBy(- _.rating)\n .take(5)\n\nvar i = 1\nprintln(\"Comedy Movies recommended for you:\")\nrecommendations.foreach { r =>\n println(\"%2d\".format(i) + \": \" + movies(r.product))\n i += 1\n}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"Comedy Movies recommended for you:\n 1: Pulp Fiction (1994)\n 2: Yojimbo (1961)\n 3: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)\n 4: Mafioso (1962)\n 5: One Flew Over the Cuckoo's Nest (1975)\ncomedyMovies: Iterable[Int] = Set(2163, 42900, 892, 69, 7375, 5437, 1665, 7304, 6085, 3873, 26413, 4201, 4447, 33004, 3962, 5422, 5469, 3944, 6387, 3883, 62851, 5116, 4094, 6167, 5088, 2889, 59858, 2295, 2306, 4571, 5857, 4464, 101, 2109, 1454, 4909, 2031, 5896, 59625, 2072, 8663, 4062, 3399, 54256, 33675, 6544, 4169, 4899, 53578, 6712, 55020, 5950, 3167, 31160, 4183, 909, 4290, 3477, 333, 3979, 2463, 3397, 49110, 3581, 8784, 3830, 6317, 518, 7990, 2499, 8843, 1083, 468, 54193, 5205, 6172, 4015, 26842, 234, 6690, 2331, 3566, 4728, 6954, 4877, 6014, 5582, 4992, 5131, 6374, 88, 50354, 47047, 32289, 352, 53993, 33145, 1855, 45722, 5454, 56176, 1211, 3990, 7888, 4714, 1158, 582, 762, 3072, 8883, 1005, 5141, 115, 6944, 3317, 5168, 4500, 65027, 7409, 5718, 34018, 37384, 46976, 276, 2622, 4402..."}]},"apps":[],"jobName":"paragraph_1583034701414_1508784785","id":"20200301-035141_398965358","dateCreated":"2020-03-01T03:51:41+0000","dateStarted":"2020-03-01T08:14:53+0000","dateFinished":"2020-03-01T08:14:54+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2029"},{"text":"// Save and load model\n//trained model will be saved to s3://hashan-assignment-zepplin/movieLens/model/recommendation location \nbestModel.get.save(sc, \"s3://hashan-assignment-zepplin/movieLens/model/recommendation\")\n// loading saved model\nval sameModel = MatrixFactorizationModel.load(sc, \"s3://hashan-assignment-zepplin/movieLens/model/recommendation\")\n","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"sameModel: org.apache.spark.mllib.recommendation.MatrixFactorizationModel = org.apache.spark.mllib.recommendation.MatrixFactorizationModel@6646e7d2\n"}]},"apps":[],"jobName":"paragraph_1583034725983_998433158","id":"20200301-035205_192471476","dateCreated":"2020-03-01T03:52:05+0000","dateStarted":"2020-03-01T08:15:20+0000","dateFinished":"2020-03-01T08:15:23+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2030"},{"text":"// resusing saved model to filter the movies to include only the ones with “Comedy”\nval comedyMovies = moviesWithGenres.filter(_._2.matches(\".*Comedy.*\")).keys\nval candidates = sc.parallelize(comedyMovies.toSeq)\nval recommendations = sameModel\n .predict(candidates.map((100, _)))\n .collect()\n .sortBy(- _.rating)\n .take(5)\n\nvar i = 1\nprintln(\"Comedy Movies recommended for you:\")\nrecommendations.foreach { r =>\n println(\"%2d\".format(i) + \": \" + movies(r.product))\n i += 1\n}","user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"Comedy Movies recommended for you:\n 1: Pulp Fiction (1994)\n 2: Yojimbo (1961)\n 3: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)\n 4: Mafioso (1962)\n 5: One Flew Over the Cuckoo's Nest (1975)\ncomedyMovies: Iterable[Int] = Set(2163, 42900, 892, 69, 7375, 5437, 1665, 7304, 6085, 3873, 26413, 4201, 4447, 33004, 3962, 5422, 5469, 3944, 6387, 3883, 62851, 5116, 4094, 6167, 5088, 2889, 59858, 2295, 2306, 4571, 5857, 4464, 101, 2109, 1454, 4909, 2031, 5896, 59625, 2072, 8663, 4062, 3399, 54256, 33675, 6544, 4169, 4899, 53578, 6712, 55020, 5950, 3167, 31160, 4183, 909, 4290, 3477, 333, 3979, 2463, 3397, 49110, 3581, 8784, 3830, 6317, 518, 7990, 2499, 8843, 1083, 468, 54193, 5205, 6172, 4015, 26842, 234, 6690, 2331, 3566, 4728, 6954, 4877, 6014, 5582, 4992, 5131, 6374, 88, 50354, 47047, 32289, 352, 53993, 33145, 1855, 45722, 5454, 56176, 1211, 3990, 7888, 4714, 1158, 582, 762, 3072, 8883, 1005, 5141, 115, 6944, 3317, 5168, 4500, 65027, 7409, 5718, 34018, 37384, 46976, 276, 2622, 4402..."}]},"apps":[],"jobName":"paragraph_1583035029177_-834942329","id":"20200301-035709_2068297506","dateCreated":"2020-03-01T03:57:09+0000","dateStarted":"2020-03-01T08:15:45+0000","dateFinished":"2020-03-01T08:15:46+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2031"},{"user":"anonymous","dateUpdated":"2020-03-01T12:08:51+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/scala","tableHide":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1583043735755_523802710","id":"20200301-062215_1758850977","dateCreated":"2020-03-01T06:22:15+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:2032"}],"name":"Reccomender","id":"2F384BE16","noteParams":{},"noteForms":{},"angularObjects":{"spark:shared_process":[]},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"simple","personalizedMode":"false"},"info":{}}