6.S081 laba1 util

最近在看6.S081的实验，希望这次别半途而废吧。

sleep

简单的解析命令行参数，然后调用user.h里面sleep函数即可。这道题应该只是让人熟悉一下环境。

记得要修改Markfile来编译 sleep 命令。
./grade-lab-util sleep可以快速的运行测试。

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int
main(int argc, char *argv[])
{
    if (argc == 1) {
        fprintf(2, "usage: sleep ticks\n");
        exit(1);
    }
    
    int ticks = atoi(argv[1]);
    sleep(ticks);
    
    exit(0);
}

pingpong

这一题要实现通过pipe的进程间通信，虽然只是简单的pingpong。但也有需要注意的点：

因为两个进程需要相互发信息，所以需要两个pipe，如果只有一个pipe的话，第一个进程向pipe写完后，需要读取第二个进程发过来的pong，这时就会出现两个进程同时读管道的情况，进而导致

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int
main(int argc, char *argv[])
{
    int fds1[2], fds2[2];
    if (pipe(fds1) != 0) {
        fprintf(2, "make pipe fail.\n");
        exit(1);
    }
    
    if (pipe(fds2) != 0) {
        fprintf(2, "make pipe fail.\n");
        exit(1);
    }

    char byte1 = '1';
    char byte2 = '2';
    char temp;

    int pid = fork();
    if (pid < 0) {
        fprintf(2, "fork fail\n");
        exit(2);
    } else if (pid > 0) {
        // parent
        write(fds1[1], &byte1, 1);
        read(fds2[0], &temp, 1);
        if (temp == '2') {
            printf("%d: received pong\n", getpid());
        }
		wait(0);
    } else {
        // child
        read(fds1[0], &temp, 1);
        if (temp == '1') {
            printf("%d: received ping\n", getpid());
        }
        write(fds2[1], &byte2, 1);
    }
    
    exit(0);

}

primes

这道题是想让我们用pipe和fork来实现一个CSP风格的程序，从给出的链接可以找到要是程序的核心伪代码和图例：

p = get a number from left neighbor
print p
loop:
    n = get a number from left neighbor
    if (p does not divide n)
        send n to right neighbor

primes

简单来讲就是，第一个进程先创建一个子进程，并使用管道和子进程通信，然后父进程向管道写入2到35的数字。子进程执行下面操作：

从与父进程之间的管道读取数字
如果没读到（read返回0），则退出
如果读到了，则输出该数字p，并创建新的子进程，同时创建与新进程间的管道，子进程跳转到1。
从与父进程之间的管道读取数字n，并判断n是否能够被p整除，如果不能则向与子进程的管道中写入该数字。当读不到数字时就退出。

上面的流程其实没有提及到管道的关闭和父进程的wait操作，但是这些对于程序的正确性是非常重要的，如果没有及时关闭不必要的读端和写端，会导致操作系统文件描述符耗尽，从而创建管道失败。而如果父进程在结束前没有wait的话，会导致子进程的输出与shell的输出混在一起。

实现代码如下：

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user.h"

int
main(int argc, char *argv[]) 
{
    int fds[2];
    int pid, i, res, readfd, writefd;
    int p, n;
    if (pipe(fds)) {
        fprintf(2, "make pipe fail\n");
        exit(1);
    }

    pid = fork();
    if (pid < 0) {
        fprintf(2, "fork fail\n");
        exit(1);
    } else if (pid) {
        // parent
        close(fds[0]); // close read
        for(i = 2;i <= 35;i++) {
            write(fds[1], &i, 4);
        }
        close(fds[1]);
        wait(0);
        exit(0);
    }
    
    // child
    while(1) {
        close(fds[1]);
        readfd = fds[0];
        res = read(readfd, &p, 4);
        if (res == 0) {
            close(readfd);
            exit(0);
        }
        if (pipe(fds)) {
            fprintf(2, "make pipe fail");
            exit(1);
        }

        pid = fork();
        if (pid < 0) {
            fprintf(2, "fork fail\n");
            exit(2);
        } else if (pid) {
            // parent
            writefd = fds[1];
            close(fds[0]);
            
            printf("prime %d\n", p);
            while(read(readfd, &n, 4)) {
                if (n % p) {
                    write(writefd, &n, 4);
                }
            }
            close(readfd);
            close(writefd);
            wait(0);
            break;
        }
    }
    exit(0);
}

find

这道题要求实现一个简易的find命令，虽然看上去听复杂的，但是基本上将ls里面的代码修改一下即可搞定。

所以我们可以先分析一下user/ls.c的代码，这个程序的核心在于ls函数中的实现，所以我们只看这个即可。

ls 的实现

首先是用open打开给定路径，然后用fstat来获取文件的一下属性：

if((fd = open(path, 0)) < 0){
	fprintf(2, "ls: cannot open %s\n", path);
	return;
}

if(fstat(fd, &st) < 0){
	fprintf(2, "ls: cannot stat %s\n", path);
	close(fd);
	return;
}

其中st是一个struct stat类型的变量，它的定义可以在kernel/stat.h中找到：

#define T_DIR     1   // Directory
#define T_FILE    2   // File
#define T_DEVICE  3   // Device

struct stat {
  int dev;     // File system's disk device
  uint ino;    // Inode number
  short type;  // Type of file
  short nlink; // Number of links to file
  uint64 size; // Size of file in bytes
};

从stat中我们可以获取到文件类型，通过这个我们就可以来判断当前打开的是目录还是文件了，如果是文件则输出文件信息，如果是目录则解析目录中所包含的文件，并逐个输出文件信息，解析目录的代码如下：

if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){
  printf("ls: path too long\n");
  break;
}
strcpy(buf, path);
p = buf+strlen(buf);
*p++ = '/';
while(read(fd, &de, sizeof(de)) == sizeof(de)){
  if(de.inum == 0)
	continue;
  memmove(p, de.name, DIRSIZ);
  p[DIRSIZ] = 0;
  if(stat(buf, &st) < 0){
	printf("ls: cannot stat %s\n", buf);
	continue;
  }
  printf("%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size);
}

上面的代码的核心在于通过while循环来从目录的文件描述符中读取目录项，目录项的是一个struct dirent结构体，可以从kernel/fs.h找到相关的定义：

#define DIRSIZ 14
struct dirent {
   ushort inum;
   char name[DIRSIZ];
 };

其中inum是inode编号。

find 实现

find和ls的实现基本一致，区别点在于：

当打开的是文件时，需要与输入pattern进行比较，来决定是否输出
当打开的是目录时，需要递归调用find函数
由于与pattern进行比较是文件名，而不是路径，所以为了简便，find 的三个参数分别为path，filename，pattern。

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user.h"
#include "kernel/fs.h"

void
find(char *path, char *filename, char *pattern) 
{
    int fd;
    char buf[512], *p;
    struct stat st;
    struct dirent de;

    if ((fd = open(path, 0)) < 0) {
        fprintf(2, "find: cannot open %s\n", path);
        exit(1);
    }

    if (fstat(fd, &st) < 0) {
        fprintf(2, "find: cannot stst %s\n", path);
        close(fd);
        exit(1);
    }

    switch(st.type) {
        case T_FILE:
            if (strcmp(filename, pattern) == 0) {
                printf("%s\n", path);
            }
            break;
        case T_DIR:
            if (strlen(path) + 1 + DIRSIZ + 1 > sizeof buf) {
                printf("find: path too long\n");
            }
            strcpy(buf, path);
            p = buf + strlen(buf);
            *p++ = '/';
            while(read(fd, &de, sizeof(de)) == sizeof(de)) {
                if (de.inum == 0) {
                    continue;
                }
                if (strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) {
                    continue;
                }
                memmove(p, de.name, DIRSIZ);
                p[DIRSIZ] = 0;
                find(buf, de.name, pattern);
            }
            break;
    }
    close(fd);
}

int 
main(int argc, char *argv[]) 
{
    if (argc < 3) {
        fprintf(2, "usage: find <path> <pattern>\n");
        exit(1);
    }

    find(argv[1], argv[1], argv[2]);
    exit(0);
}

xargs

在解决这道题前，我们先了解一下xargs的使用。

xargs 基本使用

我们知道在shell中可以使用|将两个命令的输出和输入连接在一起，比如说cat text.txt | more，这可以让我们将多个简单的程序连接在一起实现复杂的功能。而有时候，我们希望将第一个命令的输出作为第二个命令的命令行参数来调用。比如说我们有一个文件cmd.txt包含以下内容：

echo
ls

我们希望用which命令来查询cmd.txt中每行命令所在的路径。如果我们直接通过|来连接cat cmd.txt和which的话，会没有任何输出，这是因为which命令是从命令行参数中读取要查询的命令名，而不是从标准输入中读取。这种时候，我们就可以用xargs来将标准输入转化为命令行参数了：

# wuxiaobai24 @ wuxiaobai24-pc in ~/code/6.828/xv6-labs-2020 on git:util x [15:56:08] 
$ cat cmd.txt
echo
ls

# wuxiaobai24 @ wuxiaobai24-pc in ~/code/6.828/xv6-labs-2020 on git:util x [15:56:13] 
$ cat cmd.txt | which

# wuxiaobai24 @ wuxiaobai24-pc in ~/code/6.828/xv6-labs-2020 on git:util x [15:56:18] C:1
$ cat cmd.txt | xargs which
/usr/bin/echo
/usr/bin/ls

xargs 的使用格式xargs [options] [command [initial-arguments]]

echo "world\nos" | xargs echo hello 会被转化成echo hello world和echo hello os来执行。

xargs

因为题目给出了这里的xargs是类似于xargs -n 1的执行效果就好了，这其实就降低了难度了，因为它其实限制了xargs一次只会传入一个参数，我们逐字符读取输入，然后判断是否为\n，如果不是就将该字符暂存，如果是就将这之前保存字符串添加到原本命令行参数的后面，并调用fork和exec执行即可。

但是其实正常xargs好像也没太大问题，即支持一行中有多个参数，只是需要在解析时考虑多一个和\t即可。

#include "kernel/types.h"
#include "kernel/stat.h"
#include "kernel/param.h"
#include "user.h"

int
main(int argc, char *argv[])
{
    int i, cmdLen;
    char *cmd[MAXARG];
    char buf[512];
    char c;

    if (argc < 2) {
        fprintf(2, "usage: xargs <command>\n");
        exit(1);
    }

    cmdLen = 0;
    for(i = 1;argv[i];i++) {
        cmd[cmdLen++] = argv[i];
    }

    i = 0;
    cmd[cmdLen] = buf;
    while(read(0, &c, 1)) {
        switch(c) {
            case ' ':
            case '\t':
                buf[i++] = '\0';
                cmd[++cmdLen] = buf + i;
                break;
            case '\n':
                cmd[++cmdLen] = 0;
                buf[i++] = '\0';
                int pid = fork();
                if (pid < 0) {
                    fprintf(2, "xargs: fork fail\n");
                    exit(1);
                } else if (pid == 0) {
                    // child
                    exec(cmd[0], cmd);
                }
                wait(0);
                cmdLen = argc - 1;
                cmd[cmdLen] = buf;
                i = 0;
                break;
            default:
                buf[i++] = c;
        }
    }
    exit(0);
}

uptime

貌似是只需要调用uptime的返回值即可？

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user.h"

int 
main(int argc, char *argv[]) {
    int ticks = uptime();
    printf("%d\n", ticks);
    exit(0);
}

grep with regular expressions

因为给出了提示，在user/grep.c里面有关于正则表达式的代码，所以可以直接把那部分代码拷贝过来，然后将strcmp修改成对应的match即可。

正则表达式match代码实现如下：

// Regexp matcher from Kernighan & Pike,
// The Practice of Programming, Chapter 9.

int matchhere(char*, char*);
int matchstar(int, char*, char*);

int
match(char *re, char *text)
{
  if(re[0] == '^')
    return matchhere(re+1, text);
  do{  // must look at empty string
    if(matchhere(re, text))
      return 1;
  }while(*text++ != '\0');
  return 0;
}

// matchhere: search for re at beginning of text
int matchhere(char *re, char *text)
{
  if(re[0] == '\0')
    return 1;
  if(re[1] == '*')
    return matchstar(re[0], re+2, text);
  if(re[0] == '$' && re[1] == '\0')
    return *text == '\0';
  if(*text!='\0' && (re[0]=='.' || re[0]==*text))
    return matchhere(re+1, text+1);
  return 0;
}

// matchstar: search for c*re at beginning of text
int matchstar(int c, char *re, char *text)
{
  do{  // a * matches zero or more instances
    if(matchhere(re, text))
      return 1;
  }while(*text!='\0' && (*text++==c || c=='.'));
  return 0;
}

sh

not print $ when processing shell command from a file

一开始挺懵的，要判断sh的输入是否从文件中来，后来发现它提供了一个fstat的系统调用，函数头为int fstat(int fd, struct stat*)，而这里的struct stat可以从kernel/stat.h中找到：

#define T_DIR     1   // Directory
#define T_FILE    2   // File
#define T_DEVICE  3   // Device

struct stat {
  int dev;     // File system's disk device
  uint ino;    // Inode number
  short type;  // Type of file
  short nlink; // Number of links to file
  uint64 size; // Size of file in bytes
};

通过fstat，我们可以判断一个文件描述符是否为设备，而从user/init.c中可以看到sh的标准输入正式从设备console中来的，所以我们在sh程序的开始判断一次即可。如果是终端，则每次getcmd都输出$ ，如果不是，则每次都不输出。

int cmdFromConsole; // check whether cmd from console

int
getcmd(char *buf, int nbuf)
{
  if (cmdFromConsole)
    fprintf(2, "$ ");
	//....
}

//....
  fstat(0, &st);
  cmdFromConsole = (st.type == T_DEVICE);

作者：wuxiaobai24
发表日期：11/20/2020
本文首发地址：6.S081 laba1 util
版权声明：CC BY NC SA 4.0