【MIT-OS6.S081作业1.4】Lab1-utilities find
本文记录MIT-OS6.S081 Lab1 utilities 的find函数的实现过程
文章目录
- 1. 作业要求
- find (moderate)
- 2. 实现过程
- 2.1 代码实现
1. 作业要求
find (moderate)
Write a simple version of the UNIX find program: find all the files in a directory tree with a specific name. Your solution should be in the file user/find.c.
Some hints:
- Look at user/ls.c to see how to read directories.
- Use recursion to allow find to descend into sub-directories.
- Don’t recurse into “.” and “…”.
- Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu.
- You’ll need to use C strings. Have a look at K&R (the C book), for example Section 5.5.
- Note that == does not compare strings like in Python. Use strcmp() instead.
- Add the program to UPROGS in Makefile.
- Your solution is correct if produces the following output (when the file system contains the files b and a/b):
$ make qemu
…
init: starting sh
$ echo > b
$ mkdir a
$ echo > a/b
$ find . b
./b
./a/b
$
2. 实现过程
2.1 代码实现
提示中说到要重点关注user/ls.c
的实现,我们来看一下ls.c
:
#include "user/user.h"
#include "kernel/fs.h"
char*
fmtname(char *path)
{
static char buf[DIRSIZ+1];
char *p;
// Find first character after last slash.
for(p=path+strlen(path); p >= path && *p != '/'; p--)
;
p++;
// Return blank-padded name.
if(strlen(p) >= DIRSIZ)
return p;
memmove(buf, p, strlen(p));
memset(buf+strlen(p), ' ', DIRSIZ-strlen(p));
return buf;
}
void
ls(char *path)
{
char buf[512], *p;
int fd;
struct dirent de;
struct stat st;
if((fd = open(path, 0)) < 0){
fprintf(2, "ls: cannot open %s\n", path);
return;
}
if(fstat(fd, &st) < 0){
fprintf(2, "ls: cannot stat %s\n", path);
close(fd);
return;
}
switch(st.type){
case T_FILE:
printf("%s %d %d %l\n", fmtname(path), st.type, st.ino, st.size);
break;
case T_DIR:
if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){
printf("ls: path too long\n");
break;
}
strcpy(buf, path);
p = buf+strlen(buf);
*p++ = '/';
while(read(fd, &de, sizeof(de)) == sizeof(de)){
if(de.inum == 0)
continue;
memmove(p, de.name, DIRSIZ);
p[DIRSIZ] = 0;
if(stat(buf, &st) < 0){
printf("ls: cannot stat %s\n", buf);
continue;
}
printf("%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size);
}
break;
}
close(fd);
}
int
main(int argc, char *argv[])
{
int i;
if(argc < 2){
ls(".");
exit(0);
}
for(i=1; i<argc; i++)
ls(argv[i]);
exit(0);
}
main函数很好懂,如果输入参数小于2(只有一个ls),那么对当前目录(.
)进行ls,否则对ls后面所有的参数视为目录进行ls。我们看下ls函数,下面进行了注释:
void
ls(char *path)
{
char buf[512], *p;
int fd;
struct dirent de;
struct stat st;
// 打开path路径
if((fd = open(path, 0)) < 0){
fprintf(2, "ls: cannot open %s\n", path);
return;
}
// 获取路径的信息
if(fstat(fd, &st) < 0){
fprintf(2, "ls: cannot stat %s\n", path);
close(fd);
return;
}
// 查看st是文件还是文件夹
switch(st.type){
case T_FILE: // 文件
// 打印文件名,文件引用号,文件大小
printf("%s %d %d %l\n", fmtname(path), st.type, st.ino, st.size);
break;
case T_DIR: // 文件夹
// 因为路径下还有文件要继续向下查找文件并在路径后面加上文件的名字,DIRSIZ表示当前文件夹下文件/文件夹名的最大长度,+1表示末尾的'\0',strlen不记录'\0',如果strlen(path) + 1 + DIRSIZ + 1 > sizeof buf那么路径太长了buf会越界,退出
if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){
printf("ls: path too long\n");
break;
}
// 路径拷贝到buf
strcpy(buf, path);
// p指向'\0'并替换为'/',然后自增
p = buf+strlen(buf);
*p++ = '/';
// 读取dirent 类型的de结构体
while(read(fd, &de, sizeof(de)) == sizeof(de)){
if(de.inum == 0)
continue;
// de.name搬动到p,移动长度为DIRSIZ,末尾设置为0
memmove(p, de.name, DIRSIZ);
p[DIRSIZ] = 0;
// 读取buf文件的状态
if(stat(buf, &st) < 0){
printf("ls: cannot stat %s\n", buf);
continue;
}
// 打印信息
printf("%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size);
}
break;
}
close(fd);
}
这里使用了fmtname,我们在qemu模拟使用ls,我们会发现打印的就是文件的名字:
那么我们看一下fmtname的实现,其实就是找到倒数第一个’/‘,然后p移动到’/'后面表示文件名的开头,设置了一个buf来复制文件名:
char*
fmtname(char *path)
{
static char buf[DIRSIZ+1];
char *p;
// Find first character after last slash.
for(p=path+strlen(path); p >= path && *p != '/'; p--)
;
p++;
// Return blank-padded name.
if(strlen(p) >= DIRSIZ)
return p;
//移动长度是strlen,DIRSIZ没有填满在后面填充' ',这样的好处是shell打印出来是对齐的,比如打印为下面,文件名和后面的数字中间的间隔是对齐的
/*
$ ./a/b 1 1 2
$ ./b 1 1 5
*/
memmove(buf, p, strlen(p));
memset(buf+strlen(p), ' ', DIRSIZ-strlen(p));
return buf;
}
我们可以编写我们的find函数了,其实就是把上面的ls拿来改一改,要注意的是进行递归搜索,如果发现是文件夹,那么向下递归的path参数是文件+文件名,如果发现是文件,判断是否是要找的文件名,如果是那么打印,递归停止。
为了不匹配’.‘和’…',我们需要在遍历到文件夹时进行判断来跳过:
if (de.inum == 0 || strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0)
完整代码:
#include "kernel/types.h"
#include "user/user.h"
#include "kernel/stat.h"
#include "kernel/fs.h"
char* getFileName(char *path)
{
char *p;
// Find first character after last slash.
for(p=path+strlen(path); p >= path && *p != '/'; p--)
;
p++;
return p;
}
void find(char* path, char* name)
{
char buf[512], *p;
int fd;
struct dirent de;
struct stat st;
if ((fd = open(path, 0)) < 0)
{
fprintf(2, "find: cannot open %s\n", path);
return;
}
if (fstat(fd, &st) < 0)
{
fprintf(2, "find: cannot stat %s\n", path);
close(fd);
return;
}
if (st.type == T_FILE)
{
char* fileName = getFileName(path);
if (strcmp(fileName, name) == 0)
printf("%s\n", path);
close(fd);
}
else if (st.type == T_DIR)
{
if (strlen(path) + 1 + DIRSIZ + 1 > sizeof(buf))
{
printf("find: path too long\n");
}
else
{
strcpy(buf, path);
p = buf + strlen(buf);
*p++ = '/';
while (read(fd, &de, sizeof(de)) == sizeof(de))
{
if (de.inum == 0 || strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0)
continue;
memmove(p, de.name, DIRSIZ);
p[DIRSIZ] = 0;
if (stat(buf, &st) < 0)
{
printf("find: cannot stat %s\n", buf);
continue;
}
find(buf, name);
}
}
close(fd);
}
}
int main(int argc, char* argv[])
{
if (argc != 3)
{
printf("error: find <?> <?>\n");
exit(1);
}
find(argv[1], argv[2]);
exit(0);
}
UPROGS=\
$U/_cat\
$U/_echo\
$U/_forktest\
$U/_grep\
$U/_init\
$U/_kill\
$U/_ln\
$U/_ls\
$U/_mkdir\
$U/_rm\
$U/_sh\
$U/_stressfs\
$U/_usertests\
$U/_grind\
$U/_wc\
$U/_zombie\
$U/_sleep\
$U/_pingpong\
$U/_primes\
$U/_find\
重新编译通过,测试find
,如题所述正确打印:
确实过了一会然后可以继续输入命令,然后我们再使用作业所说的测试命令:
./grade-lab-util find
完活!
文件夹和文件的递归还是比较好想的。