首页 技术 正文
技术 2022年11月14日
0 收藏 759 点赞 3,866 浏览 3305 个字

RowMatrix行矩阵

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.RowMatrixval df1 = Seq(
| (1.0, 2.0, 3.0),
| (1.1, 2.1, 3.1),
| (1.2, 2.2, 3.2)).toDF("c1", "c2", "c3")
df1: org.apache.spark.sql.DataFrame = [c1: double, c2: double ... 1 more field]df1.show
+---+---+---+
| c1| c2| c3|
+---+---+---+
|1.0|2.0|3.0|
|1.1|2.1|3.1|
|1.2|2.2|3.2|
+---+---+---+// DataFrame转换成RDD[Vector]
val rowsVector= df1.rdd.map {
| x =>
| Vectors.dense(
| x(0).toString().toDouble,
| x(1).toString().toDouble,
| x(2).toString().toDouble)
| }
rowsVector: org.apache.spark.rdd.RDD[org.apache.spark.mllib.linalg.Vector] = MapPartitionsRDD[4] at map// Create a RowMatrix from an RDD[Vector].
val mat1: RowMatrix = new RowMatrix(rowsVector)
mat1: org.apache.spark.mllib.linalg.distributed.RowMatrix = org.apache.spark.mllib.linalg.distributed.RowMatrix@7ba821ef// Get its size.
val m = mat1.numRows()
m: Long = 3 val n = mat1.numCols()
n: Long = 3// 将RowMatrix转换成DataFrame
val resDF = mat1.rows.map {
| x =>
| (x(0).toDouble,
| x(1).toDouble,
| x(2).toDouble)
| }.toDF("c1", "c2", "c3")
resDF: org.apache.spark.sql.DataFrame = [c1: double, c2: double ... 1 more field]resDF.show
+---+---+---+
| c1| c2| c3|
+---+---+---+
|1.0|2.0|3.0|
|1.1|2.1|3.1|
|1.2|2.2|3.2|
+---+---+---+mat1.rows.collect().take(10)
res3: Array[org.apache.spark.mllib.linalg.Vector] = Array([1.0,2.0,3.0], [1.1,2.1,3.1], [1.2,2.2,3.2])

CoordinateMatrix坐标矩阵

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}// 第一列:行坐标;第二列:列坐标;第三列:矩阵元素
val df = Seq(
| (0, 0, 1.1), (0, 1, 1.2), (0, 2, 1.3),
| (1, 0, 2.1), (1, 1, 2.2), (1, 2, 2.3),
| (2, 0, 3.1), (2, 1, 3.2), (2, 2, 3.3),
| (3, 0, 4.1), (3, 1, 4.2), (3, 2, 4.3)).toDF("row", "col", "value")
df: org.apache.spark.sql.DataFrame = [row: int, col: int ... 1 more field]df.show
+---+---+-----+
|row|col|value|
+---+---+-----+
| 0| 0| 1.1|
| 0| 1| 1.2|
| 0| 2| 1.3|
| 1| 0| 2.1|
| 1| 1| 2.2|
| 1| 2| 2.3|
| 2| 0| 3.1|
| 2| 1| 3.2|
| 2| 2| 3.3|
| 3| 0| 4.1|
| 3| 1| 4.2|
| 3| 2| 4.3|
+---+---+-----+// 生成入口矩阵
val entr = df.rdd.map { x =>
| val a = x(0).toString().toLong
| val b = x(1).toString().toLong
| val c = x(2).toString().toDouble
| MatrixEntry(a, b, c)
| }
entr: org.apache.spark.rdd.RDD[org.apache.spark.mllib.linalg.distributed.MatrixEntry] = MapPartitionsRDD[20] at map// 生成坐标矩阵
val mat: CoordinateMatrix = new CoordinateMatrix(entr)
mat: org.apache.spark.mllib.linalg.distributed.CoordinateMatrix = org.apache.spark.mllib.linalg.distributed.CoordinateMatrix@5381deecmat.numRows()
res5: Long = 4 mat.numCols()
res6: Long = 3mat.entries.collect().take(10)
res7: Array[org.apache.spark.mllib.linalg.distributed.MatrixEntry] = Array(MatrixEntry(0,0,1.1), MatrixEntry(0,1,1.2), MatrixEntry(0,2,1.3), MatrixEntry(1,0,2.1), MatrixEntry(1,1,2.2), MatrixEntry(1,2,2.3), MatrixEntry(2,0,3.1), MatrixEntry(2,1,3.2), MatrixEntry(2,2,3.3), MatrixEntry(3,0,4.1))// 坐标矩阵转成,带行索引的DataFrame,行索引为行坐标
val t = mat.toIndexedRowMatrix().rows.map { x =>
| val v=x.vector
| (x.index,v(0).toDouble, v(1).toDouble, v(2).toDouble)
| }
t: org.apache.spark.rdd.RDD[(Long, Double, Double, Double)] = MapPartitionsRDD[33] at mapt.toDF().show
+---+---+---+---+
| _1| _2| _3| _4|
+---+---+---+---+
| 0|1.1|1.2|1.3|
| 1|2.1|2.2|2.3|
| 2|3.1|3.2|3.3|
| 3|4.1|4.2|4.3|
+---+---+---+---+// 坐标矩阵转成DataFrame
val t1 = mat.toRowMatrix().rows.map { x =>
| (x(0).toDouble, x(1).toDouble, x(2).toDouble)
| }
t1: org.apache.spark.rdd.RDD[(Double, Double, Double)] = MapPartitionsRDD[26] at mapt1.toDF().show
+---+---+---+
| _1| _2| _3|
+---+---+---+
|1.1|1.2|1.3|
|3.1|3.2|3.3|
|2.1|2.2|2.3|
|4.1|4.2|4.3|
+---+---+---+
相关推荐
python开发_常用的python模块及安装方法
adodb:我们领导推荐的数据库连接组件bsddb3:BerkeleyDB的连接组件Cheetah-1.0:我比较喜欢这个版本的cheeta…
日期:2022-11-24 点赞:878 阅读:9,154
Educational Codeforces Round 11 C. Hard Process 二分
C. Hard Process题目连接:http://www.codeforces.com/contest/660/problem/CDes…
日期:2022-11-24 点赞:807 阅读:5,622
下载Ubuntn 17.04 内核源代码
zengkefu@server1:/usr/src$ uname -aLinux server1 4.10.0-19-generic #21…
日期:2022-11-24 点赞:569 阅读:6,465
可用Active Desktop Calendar V7.86 注册码序列号
可用Active Desktop Calendar V7.86 注册码序列号Name: www.greendown.cn Code: &nb…
日期:2022-11-24 点赞:733 阅读:6,239
Android调用系统相机、自定义相机、处理大图片
Android调用系统相机和自定义相机实例本博文主要是介绍了android上使用相机进行拍照并显示的两种方式,并且由于涉及到要把拍到的照片显…
日期:2022-11-24 点赞:512 阅读:7,874
Struts的使用
一、Struts2的获取  Struts的官方网站为:http://struts.apache.org/  下载完Struts2的jar包,…
日期:2022-11-24 点赞:671 阅读:5,042