C#学习(十二)——Linq
一、Linq
Language-Integrated Query 语言集成查询
对内存中数据、关系数据和XML数据执行的查询进行检查
例如,在不使用Linq语法时,想要实现查看C盘windows文件夹下最大的前五个文件
class Program
{
static void Main(string[] args)
{
//实现文件排序功能
string path = @"c:\windows";
ShowLargestFiles(path);
Console.Read();
}
private static void ShowLargestFiles(string path)
{
DirectoryInfo directory = new DirectoryInfo(path);
FileInfo[] files = directory.GetFiles();
Array.Sort(files, new FileInfoCompare());
//输出最大的前五个文件
for(int i = 0; i < 5; i++)
{
var f = files[i];
Console.WriteLine($"{f.Name,-20} : {f.Length,10:N}");
}
}
}
class FileInfoCompare : IComparer<FileInfo>
{
public int Compare(FileInfo x, FileInfo y)
{
return y.Length.CompareTo(x.Length);
}
}
那么使用Linq语法后的代码实现
Linq就是将命令式语法转变成声明式语法
- [ 命令式是过程导向的,而声明式则是结果导向的 ]
声明式语言更加友好,可以帮助我们脱离细节,而更专注于业务操作和数据之间的逻辑关系
命令式更适合处理对象关系和逻辑过程,而声明式更适合处理数据关系
class Program
{
static void Main(string[] args)
{
//实现文件排序功能
string path = @"c:\windows";
ShowLargestFilesWithLinq(path);
Console.Read();
}
private static void ShowLargestFilesWithLinq(string path)
{
var query = from file in new DirectoryInfo(path).GetFiles()
orderby file.Length descending
select file;
foreach (var f in query.Take(5))
{
Console.WriteLine($"{f.Name,-20} : {f.Length,10:N}");
}
}
}
关于query也可以通过对象的链式表达来写
private static void ShowLargestFilesWithLinq(string path)
{
//var query = from file in new DirectoryInfo(path).GetFiles()
// orderby file.Length descending
// select file;
var query = new DirectoryInfo(path).GetFiles()
.OrderByDescending(f => f.Length)
.Take(5);
foreach (var f in query)
{
Console.WriteLine($"{f.Name,-20} : {f.Length,10:N}");
}
}
二、Lambda表达式
比如要获取名称以A为开头的所有文件
IEnumerable<string> file = files.Where(StartWithA);
public bool StartWithA(string name)
{
return name.StartsWith("a");
}
使用匿名方法来实现,以下为通过内联方法incline()来实现
IEnumerable<string> file = files.Where(delegate(string s)
{return s.StartWithA("a")}
使用Lambda表达式来实现
var query = new DirectoryInfo(path).GetFiles()
.OrderByDescending(f => f.Length)
.Where(f => f.Name.StartsWith("a"))
.Take(5);
代码更加清晰简洁
三、Linq查询语法
Linq语句最终的决定,数据类型的读取是由select所决定的
class Program
{
static void Main(string[] args)
{
var customers = new List<Customer>
{
new Customer(1, "Jason", "广州"),
new Customer(2, "Janny", "北京"),
new Customer(3, "Pral", "上海"),
new Customer(4, "Anna", "广州"),
new Customer(5, "Angie", "深圳"),
new Customer(6, "Bob", "武汉"),
new Customer(7, "Tony", "深圳")
};
//var query = from c in customers
// where c.Address == "广州"
// orderby c.Name
// select c;
//等效对象语法结构
var query = customers
.Where(c => c.Address == "广州")
.OrderBy(c => c.Name);
foreach (var c in query)
{
Console.WriteLine($"客户:{c.Id}, {c.Name}, {c.Address}");
}
Console.Read();
}
}
public class Customer
{
public Customer(int id, string name, string address)
{
Id = id;
Name = name;
Address = address;
}
public int Id { get; set; }
public string Name { get; set; }
public string Address { get; set; }
}
四、Linq原理剖析
我们通过手写MyWhere语句来实现一个与where语句相同的效果
class Program
{
static void Main(string[] args)
{
var customers = new List<Customer>
{
new Customer(1, "Jason", "广州"),
new Customer(2, "Janny", "北京"),
new Customer(3, "Pral", "上海"),
new Customer(4, "Anna", "广州"),
new Customer(5, "Angie", "深圳"),
new Customer(6, "Bob", "武汉"),
new Customer(7, "Tony", "深圳")
};
var query = customers
.Mywhere(c => c.Address == "广州")
.OrderBy(c => c.Name);
foreach (var c in query)
{
Console.WriteLine($"客户:{c.Id}, {c.Name}, {c.Address}");
}
Console.Read();
}
}
public class Customer
{
public Customer(int id, string name, string address)
{
Id = id;
Name = name;
Address = address;
}
public int Id { get; set; }
public string Name { get; set; }
public string Address { get; set; }
}
MyList.cs
public static class MyLink
{
public static IEnumerable<T> Mywhere<T>(this IEnumerable<T> source, Func<T, bool> predicate)//如果source数据符合委托方法predicate要求,就返回数据,否则不返回任何数据
{
var result = new List<T>();
foreach ( var item in source)
{
if ( predicate(item))
{
result.Add(item);
}
}
return result;
}
}
不过Where语句的内部要远比我们实现的这个MyWhere要更复杂,其可以实现一个延迟执行的效果。从底层逻辑来说,where语句正是利用了yield return来实现数据的延迟加载,运行效率是非常高的。那么下面在利用yield return,对MyWhere进行更新。
public static class MyLink
{
public static IEnumerable<T> Mywhere<T>(this IEnumerable<T> source, Func<T, bool> predicate)//如果source数据符合委托方法predicate要求,就返回数据,否则不返回任何数据
{
var result = new List<T>();
foreach ( var item in source)
{
if ( predicate(item))
{
yield return item;
}
}
}
}
虽然我们在逻辑上没有看到大的变化,但是运行效率是有极大提升的!
五、使用Linq读取并过滤CSV数据
CSV数据已分享,放入项目中,文件属性调整为如果较新则复制
class Program
{
static void Main(String[] args)
{
List<Car> cars = ProcessCars("fuel.csv");
//var query = cars
// .OrderByDescending(c => c.Combined)
// .ThenByDescending(c => c.Model);
//输出油耗排名前十的车.....绝不可以连续使用OrderBy
//声明式语句更加简单
var query = from car in cars
orderby car.Combined descending, car.Model descending
select car;
foreach (var car in query.Take(10))
{
Console.WriteLine($"{car.Model} {car.Combined}");
}
//想要提取油耗最多的车
var query2 = (from car in cars
orderby car.Combined descending, car.Model descending
select car)
//.First();列表为空时,first会报错,但是FirstOrDefault不会
.FirstOrDefault();
Console.WriteLine($"{query2.Model} {query2.Combined}");
Console.Read();
}
private static List<Car> ProcessCars(string v)
{
var result = File.ReadAllLines(v)
.Skip(1)
.Where(l => l.Length > 1)
.Select(line =>
{
var columes = line.Split(",");
return new Car
{
Year = columes[0],
Manufacturer = columes[1],
Model = columes[2],
Displacement = double.Parse(columes[3]),
CylindersCount = int.Parse(columes[4]),
City = int.Parse(columes[5]),
Highway = int.Parse(columes[6]),
Combined = int.Parse(columes[7])
};
});
return result.ToList();
}
}
Car.cs
public class Car
{
public string Year { get; set; }
public string Manufacturer { get; set; }
public string Model { get; set; }
public double Displacement { get; set; }
public int CylindersCount { get; set; }
public int City { get; set; }
public int Highway { get; set; }
public int Combined { get; set; }
}
在读取获得最多耗油量车时,之所以不使用Take(1),是因为first或者FirstOrDefault方法,会在提取数据时,同时展开集合,可以为接下来的开发带来极大便利。
六、数据量化Any、All、Contains
Any,判断是否符合某一条件,返回bool值
//Any查询是否存在大众汽车
var query3 = cars.Any(car => car.Manufacturer == "Volkswagen");
Console.WriteLine(query3);
if (query3)
{
Console.WriteLine("有大众");
}
else
{
Console.WriteLine("无大众");
}
//判断集合是否为空
var isCarsEmpty = cars.Any();
Contains方法,参数接受的不是lambda方法,而是实例化对象。
//Contains
var isReal = cars.Contains(query2);
All方法,与any完全相反,查询是否完全符合某一条件
//All
var query4 = cars.All(car => car.Manufacturer == "Volkswagen");
Console.WriteLine(query3);
if (query4)
{
Console.WriteLine("全是大众");
}
else
{
Console.WriteLine("并不是全是大众");
}
七、数据投影与SelectMany
在上文中,使用select语句可以对数据进行塑性,数据类型发生了变化,对数据塑性的过程叫做数据投影
下面我们对select语句自己进行重写
private static List<Car> ProcessCars(string v)
{
var result = File.ReadAllLines(v)
.Skip(1)
.Where(l => l.Length > 1)
.ToCar()
return result.ToList();
}
public static class CarExtension
{
public static IEnumerable<Car> ToCar(this IEnumerable<string> source)
{
foreach (var line in source)
{
var columes = line.Split(",");
yield return new Car
{
Year = columes[0],
Manufacturer = columes[1],
Model = columes[2],
Displacement = double.Parse(columes[3]),
CylindersCount = int.Parse(columes[4]),
City = int.Parse(columes[5]),
Highway = int.Parse(columes[6]),
Combined = int.Parse(columes[7])
};
}
}
}
在上面想要得到耗油量最多的车时,我们只需要得到Model和Combined,任何其他的数据都是冗余数据,因此可以使用select来去除多余数据
//想要提取油耗最多的车
var query2 = (from car in cars
orderby car.Combined descending, car.Model descending
select new
{
Model = car.Model,
Combined = car.Combined
})
.FirstOrDefault();
SelectMany()–>展开嵌套集合,并且提取子集合中的所有数据
//输出每一辆车的型号
var query5 = cars.SelectMany(c => c.Model);
foreach (var car in query5)
{
Console.WriteLine(car);
}
八、数据连接join
Linq中的join对应sql中的inner join内链接,任何数据只要在任意一个数据源中缺失,就会忽略整个数据的输出
以下对多数据源,两个CSV文件的数据进行处理
Program.cs
class Program
{
static void Main(string[] args)
{
List<Car> cars = ProcessCars("fuel.csv");
List<Manufacturer> manufacturers = ProcessManufacturers("manufacturers.csv");
var query = (from car in cars
join manufacturer in manufacturers on car.Manufacturer equals manufacturer.Name
orderby car.Combined descending, car.Model descending
select new
{
Manufacturer = car.Manufacturer,
Model = car.Model,
Combined = car.Combined,
Headquarters = manufacturer.Headquarters,
Phone = manufacturer.Phone
})
.Take(10);
foreach (var c in query)
{
Console.WriteLine($"{c.Manufacturer} {c.Model} {c.Combined} {c.Headquarters} {c.Phone}");
}
Console.WriteLine("\n");
//使用对象调用方式实现
var query2 = cars.Join(manufacturers, (c) => c.Manufacturer, (m) => m.Name, (c, m) => new
{
Car = c,
Manufacturer = m
}).OrderByDescending(joinData => joinData.Car.Combined)
.ThenBy(joinData => joinData.Car.Model)
.Select(joinData => new
{
Manufacturer = joinData.Car.Manufacturer,
Model = joinData.Car.Model,
Combined = joinData.Car.Combined,
Headquarters = joinData.Manufacturer.Headquarters,
Phone = joinData.Manufacturer.Phone
}).Take(10);
foreach (var c in query2)
{
Console.WriteLine($"{c.Manufacturer} {c.Model} {c.Combined} {c.Headquarters} {c.Phone}");
}
Console.Read();
}
private static List<Car> ProcessCars(string v)
{
var result = File.ReadAllLines(v)
.Skip(1)
.Where(l => l.Length > 1)
.ToCar()
//.Select(line =>
//{
// var columns = line.Split(",");
// return new Car
// {
// Year = columns[0],
// Manufacturer = columns[1],
// Model = columns[2],
// Displacement = double.Parse(columns[3]),
// CylindersCount = int.Parse(columns[4]),
// City = int.Parse(columns[5]),
// Highway = int.Parse(columns[6]),
// Combined = int.Parse(columns[7])
// };
//})
;
return result.ToList();
}
private static List<Manufacturer> ProcessManufacturers(string path)
{
var query = File.ReadAllLines(path)
.Where(l => l.Length > 1)
.Select(l =>
{
var columns = l.Split(',');
return new Manufacturer
{
Name = columns[0],
Headquarters = columns[1],
Phone = columns[2]
};
});
return query.ToList();
}
}
public static class CarExtensions
{
public static IEnumerable<Car> ToCar(this IEnumerable<string> source)
{
foreach (var line in source)
{
var columns = line.Split(",");
yield return new Car
{
Year = columns[0],
Manufacturer = columns[1],
Model = columns[2],
Displacement = double.Parse(columns[3]),
CylindersCount = int.Parse(columns[4]),
City = int.Parse(columns[5]),
Highway = int.Parse(columns[6]),
Combined = int.Parse(columns[7])
};
}
}
}
Car.cs
public class Car
{
public string Year { get; set; }
public string Manufacturer { get; set; }
public string Model { get; set; }
public double Displacement { get; set; }
public int CylindersCount { get; set; }
public int City { get; set; }
public int Highway { get; set; }
public int Combined { get; set; }
}
Manufacturer.cs
public class Manufacturer
{
public string Name { get; set; }
public string Headquarters { get; set; }
public string Phone { get; set; }
}
九、数据分组group
例如想得到所有生产商排名油耗前两名的汽车型号
//想得到所有生产商排名油耗前两名的汽车型号
var query3 = from car in cars
group car by car.Manufacturer into manufacturerGroup
orderby manufacturerGroup.Key descending
select manufacturerGroup;
foreach (var group in query3)
{
Console.WriteLine($"{group.Key} 有 {group.Count()} 辆汽车 \n 油耗前两名为:" );
foreach(var car in group.OrderByDescending(c => c.Combined).Take(2))
{
Console.WriteLine($"\t{car.Model} 油耗为:{car.Combined}");
}
}
//使用对象化方法实现以上语句
var query4 = cars.GroupBy(c => c.Manufacturer).OrderByDescending(g => g.Key);
foreach (var group in query4)
{
Console.WriteLine($"{group.Key} 有 {group.Count()} 辆汽车 \n 油耗前两名为:");
foreach (var car in group.OrderByDescending(c => c.Combined).Take(2))
{
Console.WriteLine($"\t{car.Model} 油耗为:{car.Combined}");
}
}
十、数据分组连接 group join
可以对多个数据源同时进行分组
想得到所有生产商排名油耗前两名的汽车型号,同时想得到总部地址以及联系电话
//想得到所有生产商排名油耗前两名的汽车型号,同时想得到总部地址以及联系电话
var query5 = from manufacturer in manufacturers
join car in cars on manufacturer.Name equals car.Manufacturer into carGroup
orderby manufacturer.Name descending
select new
{
Manufacturer = manufacturer,
Cars = carGroup
};
foreach (var group in query5)
{
Console.WriteLine($"{group.Manufacturer.Name} {group.Manufacturer.Headquarters} {group.Manufacturer.Phone}");
foreach (var car in group.Cars.OrderByDescending(c => c.Combined).Take(2))
{
Console.WriteLine($"\t{car.Model} 油耗为:{car.Combined}");
}
}
//使用对象化方法实现以上语句
var query6 = manufacturers.GroupJoin(cars, m => m.Name, c => c.Manufacturer, (m, carGroup) => new
{
Manufacturer = m,
Cars = carGroup
}).OrderByDescending(m => m.Manufacturer.Name);
foreach (var group in query6)
{
Console.WriteLine($"{group.Manufacturer.Name} {group.Manufacturer.Headquarters} {group.Manufacturer.Phone}");
foreach (var car in group.Cars.OrderByDescending(c => c.Combined).Take(2))
{
Console.WriteLine($"\t{car.Model} 油耗为:{car.Combined}");
}
}
十一、数据聚合
Linq可以在处理分组数据的过程中进行聚合运算
例如实现找出每个品牌的平均油耗,最高油耗,最低油耗
//找出每个品牌的平均油耗,最高油耗,最低油耗
var query7 = from car in cars
group car by car.Manufacturer into carGroup
select new
{
Manufacture = carGroup.Key,
Avg = carGroup.Average(c => c.Combined),
Max = carGroup.Max(c => c.Combined),
Min = carGroup.Min(c => c.Combined)
} into tempGroup
orderby tempGroup.Avg descending
select tempGroup;
foreach (var group in query7)
{
Console.WriteLine($"{group.Manufacture} 油耗情况");
Console.WriteLine($"\t 最高:{group.Max}");
Console.WriteLine($"\t 最低:{group.Min}");
Console.WriteLine($"\t 平均:{group.Avg}");
}
//使用对象化方法实现以上语句
var query8 = cars.GroupBy(c => c.Manufacturer, (c, carGroup) => new
{
Manufacture = c,
Avg = carGroup.Average(c => c.Combined),
Max = carGroup.Max(c => c.Combined),
Min = carGroup.Min(c => c.Combined)
}).OrderByDescending(c => c.Avg);
foreach (var group in query8)
{
Console.WriteLine($"{group.Manufacture} 油耗情况");
Console.WriteLine($"\t 最高:{group.Max}");
Console.WriteLine($"\t 最低:{group.Min}");
Console.WriteLine($"\t 平均:{group.Avg}");
}