Code & Func

最近在看6.S081的实验，希望这次别半途而废吧。

sleep

简单的解析命令行参数，然后调用user.h里面sleep函数即可。这道题应该只是让人熟悉一下环境。

记得要修改Markfile来编译 sleep 命令。
./grade-lab-util sleep可以快速的运行测试。

1
#include "kernel/types.h"
2
#include "kernel/stat.h"
3
#include "user/user.h"
4

5
int
6
main(int argc, char *argv[])
7
{
8
    if (argc == 1) {
9
        fprintf(2, "usage: sleep ticks\n");
10
        exit(1);
11
    }
12

13
    int ticks = atoi(argv[1]);
14
    sleep(ticks);
15

2 collapsed lines
16
    exit(0);
17
}

pingpong

这一题要实现通过pipe的进程间通信，虽然只是简单的pingpong。但也有需要注意的点：

因为两个进程需要相互发信息，所以需要两个pipe，如果只有一个pipe的话，第一个进程向pipe写完后，需要读取第二个进程发过来的pong，这时就会出现两个进程同时读管道的情况，进而导致

1
#include "kernel/types.h"
2
#include "kernel/stat.h"
3
#include "user/user.h"
4

5
int
6
main(int argc, char *argv[])
7
{
8
    int fds1[2], fds2[2];
9
    if (pipe(fds1) != 0) {
10
        fprintf(2, "make pipe fail.\n");
11
        exit(1);
12
    }
13

14
    if (pipe(fds2) != 0) {
15
        fprintf(2, "make pipe fail.\n");
31 collapsed lines
16
        exit(1);
17
    }
18

19
    char byte1 = '1';
20
    char byte2 = '2';
21
    char temp;
22

23
    int pid = fork();
24
    if (pid < 0) {
25
        fprintf(2, "fork fail\n");
26
        exit(2);
27
    } else if (pid > 0) {
28
        // parent
29
        write(fds1[1], &byte1, 1);
30
        read(fds2[0], &temp, 1);
31
        if (temp == '2') {
32
            printf("%d: received pong\n", getpid());
33
        }
34
    wait(0);
35
    } else {
36
        // child
37
        read(fds1[0], &temp, 1);
38
        if (temp == '1') {
39
            printf("%d: received ping\n", getpid());
40
        }
41
        write(fds2[1], &byte2, 1);
42
    }
43

44
    exit(0);
45

46
}

primes

这道题是想让我们用pipe和fork来实现一个CSP风格的程序，从给出的链接可以找到要是程序的核心伪代码和图例：

1
p = get a number from left neighbor
2
print p
3
loop:
4
    n = get a number from left neighbor
5
    if (p does not divide n)
6
        send n to right neighbor

简单来讲就是，第一个进程先创建一个子进程，并使用管道和子进程通信，然后父进程向管道写入2到35的数字。子进程执行下面操作：

从与父进程之间的管道读取数字
如果没读到（read返回0），则退出
如果读到了，则输出该数字p，并创建新的子进程，同时创建与新进程间的管道，子进程跳转到1。
从与父进程之间的管道读取数字n，并判断n是否能够被p整除，如果不能则向与子进程的管道中写入该数字。当读不到数字时就退出。

上面的流程其实没有提及到管道的关闭和父进程的wait操作，但是这些对于程序的正确性是非常重要的，如果没有及时关闭不必要的读端和写端，会导致操作系统文件描述符耗尽，从而创建管道失败。而如果父进程在结束前没有wait的话，会导致子进程的输出与shell的输出混在一起。

实现代码如下：

1
#include "kernel/types.h"
2
#include "kernel/stat.h"
3
#include "user.h"
4

5
int
6
main(int argc, char *argv[])
7
{
8
    int fds[2];
9
    int pid, i, res, readfd, writefd;
10
    int p, n;
11
    if (pipe(fds)) {
12
        fprintf(2, "make pipe fail\n");
13
        exit(1);
14
    }
15

52 collapsed lines
16
    pid = fork();
17
    if (pid < 0) {
18
        fprintf(2, "fork fail\n");
19
        exit(1);
20
    } else if (pid) {
21
        // parent
22
        close(fds[0]); // close read
23
        for(i = 2;i <= 35;i++) {
24
            write(fds[1], &i, 4);
25
        }
26
        close(fds[1]);
27
        wait(0);
28
        exit(0);
29
    }
30

31
    // child
32
    while(1) {
33
        close(fds[1]);
34
        readfd = fds[0];
35
        res = read(readfd, &p, 4);
36
        if (res == 0) {
37
            close(readfd);
38
            exit(0);
39
        }
40
        if (pipe(fds)) {
41
            fprintf(2, "make pipe fail");
42
            exit(1);
43
        }
44

45
        pid = fork();
46
        if (pid < 0) {
47
            fprintf(2, "fork fail\n");
48
            exit(2);
49
        } else if (pid) {
50
            // parent
51
            writefd = fds[1];
52
            close(fds[0]);
53

54
            printf("prime %d\n", p);
55
            while(read(readfd, &n, 4)) {
56
                if (n % p) {
57
                    write(writefd, &n, 4);
58
                }
59
            }
60
            close(readfd);
61
            close(writefd);
62
            wait(0);
63
            break;
64
        }
65
    }
66
    exit(0);
67
}

find

这道题要求实现一个简易的find命令，虽然看上去听复杂的，但是基本上将ls里面的代码修改一下即可搞定。

所以我们可以先分析一下user/ls.c的代码，这个程序的核心在于ls函数中的实现，所以我们只看这个即可。

ls 的实现

首先是用open打开给定路径，然后用fstat来获取文件的一下属性：

1
if((fd = open(path, 0)) < 0){
2
  fprintf(2, "ls: cannot open %s\n", path);
3
  return;
4
}
5

6
if(fstat(fd, &st) < 0){
7
  fprintf(2, "ls: cannot stat %s\n", path);
8
  close(fd);
9
  return;
10
}

其中st是一个struct stat类型的变量，它的定义可以在kernel/stat.h中找到：

1
#define T_DIR     1   // Directory
2
#define T_FILE    2   // File
3
#define T_DEVICE  3   // Device
4

5
struct stat {
6
  int dev;     // File system's disk device
7
  uint ino;    // Inode number
8
  short type;  // Type of file
9
  short nlink; // Number of links to file
10
  uint64 size; // Size of file in bytes
11
};

从stat中我们可以获取到文件类型，通过这个我们就可以来判断当前打开的是目录还是文件了，如果是文件则输出文件信息，如果是目录则解析目录中所包含的文件，并逐个输出文件信息，解析目录的代码如下：

1
if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){
2
  printf("ls: path too long\n");
3
  break;
4
}
5
strcpy(buf, path);
6
p = buf+strlen(buf);
7
*p++ = '/';
8
while(read(fd, &de, sizeof(de)) == sizeof(de)){
9
  if(de.inum == 0)
10
  continue;
11
  memmove(p, de.name, DIRSIZ);
12
  p[DIRSIZ] = 0;
13
  if(stat(buf, &st) < 0){
14
  printf("ls: cannot stat %s\n", buf);
15
  continue;
3 collapsed lines
16
  }
17
  printf("%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size);
18
}

上面的代码的核心在于通过while循环来从目录的文件描述符中读取目录项，目录项的是一个struct dirent结构体，可以从kernel/fs.h找到相关的定义：

1
#define DIRSIZ 14
2
struct dirent {
3
   ushort inum;
4
   char name[DIRSIZ];
5
 };

其中inum是inode编号。

find 实现

find和ls的实现基本一致，区别点在于：

当打开的是文件时，需要与输入pattern进行比较，来决定是否输出
当打开的是目录时，需要递归调用find函数
由于与pattern进行比较是文件名，而不是路径，所以为了简便，find 的三个参数分别为path，filename，pattern。

1
#include "kernel/types.h"
2
#include "kernel/stat.h"
3
#include "user.h"
4
#include "kernel/fs.h"
5

6
void
7
find(char *path, char *filename, char *pattern)
8
{
9
    int fd;
10
    char buf[512], *p;
11
    struct stat st;
12
    struct dirent de;
13

14
    if ((fd = open(path, 0)) < 0) {
15
        fprintf(2, "find: cannot open %s\n", path);
49 collapsed lines
16
        exit(1);
17
    }
18

19
    if (fstat(fd, &st) < 0) {
20
        fprintf(2, "find: cannot stst %s\n", path);
21
        close(fd);
22
        exit(1);
23
    }
24

25
    switch(st.type) {
26
        case T_FILE:
27
            if (strcmp(filename, pattern) == 0) {
28
                printf("%s\n", path);
29
            }
30
            break;
31
        case T_DIR:
32
            if (strlen(path) + 1 + DIRSIZ + 1 > sizeof buf) {
33
                printf("find: path too long\n");
34
            }
35
            strcpy(buf, path);
36
            p = buf + strlen(buf);
37
            *p++ = '/';
38
            while(read(fd, &de, sizeof(de)) == sizeof(de)) {
39
                if (de.inum == 0) {
40
                    continue;
41
                }
42
                if (strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) {
43
                    continue;
44
                }
45
                memmove(p, de.name, DIRSIZ);
46
                p[DIRSIZ] = 0;
47
                find(buf, de.name, pattern);
48
            }
49
            break;
50
    }
51
    close(fd);
52
}
53

54
int
55
main(int argc, char *argv[])
56
{
57
    if (argc < 3) {
58
        fprintf(2, "usage: find <path> <pattern>\n");
59
        exit(1);
60
    }
61

62
    find(argv[1], argv[1], argv[2]);
63
    exit(0);
64
}

xargs

在解决这道题前，我们先了解一下xargs的使用。

xargs 基本使用

我们知道在shell中可以使用|将两个命令的输出和输入连接在一起，比如说cat text.txt | more，这可以让我们将多个简单的程序连接在一起实现复杂的功能。而有时候，我们希望将第一个命令的输出作为第二个命令的命令行参数来调用。比如说我们有一个文件cmd.txt包含以下内容：

1
echo
2
ls

我们希望用which命令来查询cmd.txt中每行命令所在的路径。如果我们直接通过|来连接cat cmd.txt和which的话，会没有任何输出，这是因为which命令是从命令行参数中读取要查询的命令名，而不是从标准输入中读取。这种时候，我们就可以用xargs来将标准输入转化为命令行参数了：

1
# wuxiaobai24 @ wuxiaobai24-pc in ~/code/6.828/xv6-labs-2020 on git:util x [15:56:08]
2
$ cat cmd.txt
3
echo
4
ls
5

6
# wuxiaobai24 @ wuxiaobai24-pc in ~/code/6.828/xv6-labs-2020 on git:util x [15:56:13]
7
$ cat cmd.txt | which
8

9
# wuxiaobai24 @ wuxiaobai24-pc in ~/code/6.828/xv6-labs-2020 on git:util x [15:56:18] C:1
10
$ cat cmd.txt | xargs which
11
/usr/bin/echo
12
/usr/bin/ls

xargs 的使用格式xargs [options] [command [initial-arguments]]

echo "world\nos" | xargs echo hello 会被转化成echo hello world和echo hello os来执行。

xargs

因为题目给出了这里的xargs是类似于xargs -n 1的执行效果就好了，这其实就降低了难度了，因为它其实限制了xargs一次只会传入一个参数，我们逐字符读取输入，然后判断是否为\n，如果不是就将该字符暂存，如果是就将这之前保存字符串添加到原本命令行参数的后面，并调用fork和exec执行即可。

但是其实正常xargs好像也没太大问题，即支持一行中有多个参数，只是需要在解析时考虑多一个和\t即可。

1
#include "kernel/types.h"
2
#include "kernel/stat.h"
3
#include "kernel/param.h"
4
#include "user.h"
5

6
int
7
main(int argc, char *argv[])
8
{
9
    int i, cmdLen;
10
    char *cmd[MAXARG];
11
    char buf[512];
12
    char c;
13

14
    if (argc < 2) {
15
        fprintf(2, "usage: xargs <command>\n");
39 collapsed lines
16
        exit(1);
17
    }
18

19
    cmdLen = 0;
20
    for(i = 1;argv[i];i++) {
21
        cmd[cmdLen++] = argv[i];
22
    }
23

24
    i = 0;
25
    cmd[cmdLen] = buf;
26
    while(read(0, &c, 1)) {
27
        switch(c) {
28
            case ' ':
29
            case '\t':
30
                buf[i++] = '\0';
31
                cmd[++cmdLen] = buf + i;
32
                break;
33
            case '\n':
34
                cmd[++cmdLen] = 0;
35
                buf[i++] = '\0';
36
                int pid = fork();
37
                if (pid < 0) {
38
                    fprintf(2, "xargs: fork fail\n");
39
                    exit(1);
40
                } else if (pid == 0) {
41
                    // child
42
                    exec(cmd[0], cmd);
43
                }
44
                wait(0);
45
                cmdLen = argc - 1;
46
                cmd[cmdLen] = buf;
47
                i = 0;
48
                break;
49
            default:
50
                buf[i++] = c;
51
        }
52
    }
53
    exit(0);
54
}

uptime

貌似是只需要调用uptime的返回值即可？

1
#include "kernel/types.h"
2
#include "kernel/stat.h"
3
#include "user.h"
4

5
int
6
main(int argc, char *argv[]) {
7
    int ticks = uptime();
8
    printf("%d\n", ticks);
9
    exit(0);
10
}

grep with regular expressions

因为给出了提示，在user/grep.c里面有关于正则表达式的代码，所以可以直接把那部分代码拷贝过来，然后将strcmp修改成对应的match即可。

正则表达式match代码实现如下：

1
// Regexp matcher from Kernighan & Pike,
2
// The Practice of Programming, Chapter 9.
3

4
int matchhere(char*, char*);
5
int matchstar(int, char*, char*);
6

7
int
8
match(char *re, char *text)
9
{
10
  if(re[0] == '^')
11
    return matchhere(re+1, text);
12
  do{  // must look at empty string
13
    if(matchhere(re, text))
14
      return 1;
15
  }while(*text++ != '\0');
26 collapsed lines
16
  return 0;
17
}
18

19
// matchhere: search for re at beginning of text
20
int matchhere(char *re, char *text)
21
{
22
  if(re[0] == '\0')
23
    return 1;
24
  if(re[1] == '*')
25
    return matchstar(re[0], re+2, text);
26
  if(re[0] == '$' && re[1] == '\0')
27
    return *text == '\0';
28
  if(*text!='\0' && (re[0]=='.' || re[0]==*text))
29
    return matchhere(re+1, text+1);
30
  return 0;
31
}
32

33
// matchstar: search for c*re at beginning of text
34
int matchstar(int c, char *re, char *text)
35
{
36
  do{  // a * matches zero or more instances
37
    if(matchhere(re, text))
38
      return 1;
39
  }while(*text!='\0' && (*text++==c || c=='.'));
40
  return 0;
41
}

sh

not print $ when processing shell command from a file

一开始挺懵的，要判断sh的输入是否从文件中来，后来发现它提供了一个fstat的系统调用，函数头为int fstat(int fd, struct stat*)，而这里的struct stat可以从kernel/stat.h中找到：

1
#define T_DIR     1   // Directory
2
#define T_FILE    2   // File
3
#define T_DEVICE  3   // Device
4

5
struct stat {
6
  int dev;     // File system's disk device
7
  uint ino;    // Inode number
8
  short type;  // Type of file
9
  short nlink; // Number of links to file
10
  uint64 size; // Size of file in bytes
11
};

通过fstat，我们可以判断一个文件描述符是否为设备，而从user/init.c中可以看到sh的标准输入正式从设备console中来的，所以我们在sh程序的开始判断一次即可。如果是终端，则每次getcmd都输出$ ，如果不是，则每次都不输出。

1
int cmdFromConsole; // check whether cmd from console
2

3
int
4
getcmd(char *buf, int nbuf)
5
{
6
  if (cmdFromConsole)
7
    fprintf(2, "$ ");
8
  //....
9
}
10

11
//....
12
  fstat(0, &st);
13
  cmdFromConsole = (st.type == T_DEVICE);

6.S081 laba1 util

sleep

pingpong

primes

find

ls 的实现

find 实现

xargs

xargs 基本使用

xargs

uptime

grep with regular expressions

sh

not print $ when processing shell command from a file