本文主要是介绍程序员的自我修养:MiniCRT自制C语言运行库,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
程序员的自我修养一书中,在最后一章专门介绍一款小型的C语言运行库,并给出了详细的代码实现。阅读该代码实现,可以对C语言运行库提供的语言抽象层得到更深的理解。Talk is cheap, show code!
minicrt.h: MiniCRT的文件头
#ifndef __MINI_CRT_H__
#define __MINI_CRT_H__
#endif/*定义C++相关的函数,以使得函数的兼容性得到满足,但是依旧按照C的修饰规则来进行导出函数名修饰,即使用extern "C"*/
#ifdef __cplusplus
extern "C" {
#endif//malloc
#ifndef NULL
#define NULL (0)
#endifvoid free(void* ptr);
void* malloc( unsigned size );
static int brk( void* end_data_segment );
int mini_crt_init_heap();//字符串
char* itoa(int n, char* str, int radix);
int strcmp(const char* src, const char* dst);
char* strcpy(char* dest, const char* src);
unsigned strlen(const char* sr);//文件与IO
typedef int FILE;#define EOF (-1)/*FILE* 这个类型在Windows下实际上是内核句柄,要通过GetStdHandle的Windows API获得
而在Linux下则是文件描述符,标准输入输出是0,1,2,并不是指向FILE结构的地址*/#ifdef WIN32
#define stdin ( (FILE*) (GetStdHandle(STD_INPUT_HANDLE)) )
#define stdout ( (FILE*) (GetStdHandle(STD_OUTPUT_HANDLE)) )
#define stderr ( (FILE*) (GetStdHandle (STD_ERROR_HANDLE)) )
#else
#define stdin ((FILE*) 0)
#define stdout ((FILE*) 1)
#define stderr ((FILE*) 2)
#endifint mini_crt_init_io(); //省略了缓冲等诸多内容,miniCRT的IO基本无需初始化(即无需给打开文件列表进行空间分配),故而这个函数其实就是个空函数
FILE* fopen( const char* filename, const char* mode);
int fread(void* buffer, int size, int count, FILE* stream);
int fwrite(const void* bufffer, int size, int count, FILE* stream);
int fclose(FILE* fp);
int fseek(FILE* fp, int offset, int set);//printf
int fputc( int c, FILE* stream );
int fputs( const char* str, FILE* stream);
int printf( const char* format, ...);
int fprintf(FILE* stream, const char* format,...);//internal
void do_global_ctors();
void mini_crt_call_exit_routine();//atexit
typedef void (*atexit_func_t) (void);
int atexit(atexit_func_t func);#ifdef __cplusplus
}
#endif
entry.c:运行库入口函数定义
#include "minicrt.h"#ifdef WIN32
#include <Windows.h>
#endif
//entry.c
extern int main(int argc, char* argv[]);//申明程序员编写的程序中的main函数
void exit(int);static void crt_fatal_error(const char* msg)
{//printf("fatal error:%s", msg);exit(1);
}void mini_crt_entry(void)
{int ret;#ifdef WIN32int flag = 0;int argc = 0;char* argv[16]; //最多16个参数int sequentialSpaceNum = 0; //连续空格数目char* cl = GetCommandLineA(); //对于windows系统提供GetCommandLine这个API来返回整体命令行参数字符串//解析命令行argv[0] = cl;argc++;while(*cl) {if(*cl == '\"'){ //在字符串中的空格是有效的字符,所以需要设计标识位if(flag==0) flag=1;else flag = 0;}//如果不是在字符串期间遇到空格,则需要剔除空格else if(*cl == ' ' && flag == 0) {sequentialSpaceNum = 1; //空格数目为1while( *(cl+sequentialSpaceNum) == ' '){sequentialSpaceNum++;}if( *(cl+sequentialSpaceNum) ) {argv[argc] = cl + sequentialSpaceNum;argc++;}*cl = '\0';cl = cl + sequentialSpaceNum;}cl++;}#elseint argc; char** argv;char* ebp_reg;asm(" movl %%ebp, %0 \n\t":"=r"(ebp_reg));argc = *(int*)(ebp_reg + 4);argv = (char**)(ebp_reg + 8);#endifif (!mini_crt_init_heap())crt_fatal_error("heap initialize failed");if (!mini_crt_init_io())crt_fatal_error("IO initialize failed");ret = main(argc, argv);exit(ret);
}void exit(int exitCode)
{//mini_crt_call_exit_routine();
#ifdef WIN32ExitProcess(exitCode);
#elseasm("movl %0, %%ebx \n\t""movl $1, %%eax \n\t""int $0x80 \n\t""hlt \n\t"::"m"(exitCode));
#endif
}
malloc.c:堆的初始化和功能实现
//堆的实现
/*在遵循Mini CRT的原则下,我们将Mini CRT堆的实现归纳为以下几条
1.实现一个以空闲链表算法为基础的堆空间分配算法;
2.为了简单起见,堆空间大小固定为32MB,初始化后空间不再扩展或缩小;
3.在Windows平台下不适用HeapAlloc等堆分配算法,采用VirtualAlloc 向系统直接申请32MB空间,由我们自己的堆分配算法实现malloc
4.在Linux平台下,使用brk将数据段结束地址向后调整32MB,将这块空间作为堆空间
*/
/*brk系统调用可以设置进程的数据段.data边界,而sbrk可以移动进程的数据段边界,显然如果将数据段边界后移,就相当于分配了一定量的内存。但是这段内存初始只是分配了虚拟空间,这些空间的申请一开始是不会提交的(即不会分配物理页面),当进程师徒访问一个地址的时候,操作系统会检测到页缺少异常,从而会为被访问的地址所在的页分配物理页面。
故而这种被动的物理分配,又被称为按践踏分配,即不打不动。
*/
#include "minicrt.h"typedef struct _heap_header
{enum{HEAP_BLOCK_FREE = 0xABABABAB, //空闲块的魔数HEAP_BLOCK_USED = 0xCDCDCDCD, //占用快的魔数}type;unsigned size; //块的尺寸包括块的信息头struct _heap_header* next;struct _heap_header* prev;
}heap_header;#define ADDR_ADD(a,o) ( ((char*) (a)) + o)
#define HEADER_SIZE (sizeof(heap_header))static heap_header* list_head = NULL;void free(void* ptr)
{heap_header* header = (heap_header*) ADDR_ADD(ptr, -HEADER_SIZE);if(header->type != HEAP_BLOCK_USED)return;header->type = HEAP_BLOCK_FREE;if(header->prev != NULL && header->prev->type == HEAP_BLOCK_FREE) {//释放块的前一个块也正好为空header->prev->next = header->next;if(header->next != NULL)header->next->prev = header->prev;header->prev->size += header->size;header = header->prev;}if(header->next != NULL && header->next->type == HEAP_BLOCK_FREE) {//释放块的后一个块也是空块header->size += header->next->size;header->next = header->next->next;}
}void* malloc( unsigned size )
{heap_header* header;if(size == 0)return NULL;header = list_head;// fputs("\ninside the malloc\n", stdout);
// fputs("outside the malloc-fuck you asshole\n", stdout);while(header != 0) {if (header->type == HEAP_BLOCK_USED) {header = header->next;continue;}//刚好碰到一个空闲快,且其块的大小大于所需size加上一个信息头尺寸,但是小于所需size加上两个信息头尺寸,即剩余的内部碎片就算分离出来,也没有利用价值了,直接整个块都分配给used,等待整体释放if (header->size > size + HEADER_SIZE &&header->size <= size + HEADER_SIZE*2) {header->type = HEAP_BLOCK_USED;return ADDR_ADD(header, HEADER_SIZE);}//空闲块空间足够,且剩余的内部碎片分离出来还可以再使用if (header->size > size + HEADER_SIZE * 2) {//splitheap_header* next = (heap_header*) ADDR_ADD(header, size+HEADER_SIZE);next->prev = header;next->next = header->next;next->type = HEAP_BLOCK_FREE;next->size = header->size - (size + HEADER_SIZE); //此处有误吧if (header->next != NULL)header->next->prev = next;header->next = next;header->size = size + HEADER_SIZE;header->type = HEAP_BLOCK_USED;return ADDR_ADD(header, HEADER_SIZE);};header = header->next;}// fputs("outside the malloc-fuck you asshole\n", stdout);//delete header;return NULL;
}#ifndef WIN32
//Linux brk system call
static int brk(void* end_data_segment) {int ret = 0;//brk system call number:45//in /usr/include/asm-i386/unistd.h://#define __NR_brk 45asm("movl $45, %%eax \n\t""movl %1, %%ebx \n\t""int $0x80 \n\t""movl %%eax, %0 \n\t":"=r"(ret):"m"(end_data_segment) );
}
#endif#ifdef WIN32
#include <Windows.h>
#endifint mini_crt_init_heap()
{void* base = NULL;heap_header* header = NULL;//32MB heap sizeunsigned heap_size = 1024*1024*32;//以base为起点分配32MB的内存空间
#ifdef WIN32base = VirtualAlloc(0, heap_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);if (base == NULL)return 0;
#elsebase = (void*)brk(0);void* end = ADDR_ADD(base, heap_size);end = (void*)brk(end);if(!end){printf("Linux create heap fail\n");return 0;}
#endifheader = (heap_header*) base;header->size = heap_size;header->type = HEAP_BLOCK_FREE;header->next = NULL;header->prev = NULL;list_head = header;if(header == 0){printf("Linux create heap fail\n");return 0;}return 1;
}
string.c:字符串相关函数的封装和实现
/*这部分实现的是字符串相关的操作,主要是包括计算字符串长度、比较两个字符串、整数与字符串之间的转换等,由于这部分无需涉及任何和内核的交互,是纯粹的用户态的计算。实现较为简单*/
#include "minicrt.h"char* itoa(int n, char* str, int radix)
{char digit[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";char* p = str;char* head = str;if(!p || radix < 2 || radix > 36) //radix代表是几进制return p;if(radix != 10 && n < 0)return p;if (n == 0) //如果要转换的数字n为0,则直接在输出字符串中直接输出{*p++ = '0';*p = 0;return p;}if (radix == 10 && n < 0) //如果是10进制,且为负数,则先添加负号,然后转正留待后续处理{*p++ = '-';n = -n;}while (n){*p++ = digit[n % radix];n /= radix;}*p = 0; //数字转换完了,末尾添加0//上面的数字字符串是倒序的,这里将数字字符串倒过来for (--p; head<p; ++head, --p){char temp = *head;*head = *p;*p = temp;}return str;
}int strcmp (const char* src, const char* dst)
{int ret = 0;unsigned char* p1 = (unsigned char*)src;unsigned char* p2 = (unsigned char*)dst;while( !(ret = *p1 - *p2) && *p2){++p1, ++p2;}if (ret < 0)ret = -1;else if(ret > 0)ret = 1;return(ret);
}char* strcpy(char *dest, const char* src)
{char* ret = dest;while(*src)*dest++ = *src++;*dest = '\0';//字符串拷贝完后,手动在末尾添加\0return ret; //返回copy后的字符串的首
}unsigned strlen(const char* str)
{int cnt = 0;if (!str)return 0;for (; (int)*str != 204 && *str != '\0'; ++str){//printf("inside the for-loop %d\n", cnt);++cnt;}//printf("in the strlen return %d\n", cnt);return cnt;
}int fputc(int c, FILE* stream)
{if( fwrite(&c, 1, 1, stream) != 1)return EOF;elsereturn c;
}int fputs(const char* str, FILE* stream)
{
// fputc('\n', stream);int len = strlen(str);if (fwrite(str,1,len, stream) != len)return EOF;elsereturn len;
}
printf.c:printf输出函数的封装和实现
#include "minicrt.h"#ifndef WIN32
#define va_list char*
#define va_start(ap,arg) ( ap = (va_list)&arg + sizeof(arg))
#define va_arg(ap, t) ( *(t*) ( (ap+=sizeof(t)) - sizeof(t) ) )
#define va_end(ap) ( ap = (va_list) 0)
#else
#include <Windows.h>
#endif//Mini CRT 中并不支持特殊的格式操作,仅支持%d和%s两种简单的转换
int vfprintf(FILE* stream, const char* format, va_list arglist )
{int translating = 0;int ret = 0; //记录最终输出的字符个数const char* p = 0;// fputs("***********entry the Vprintf*********\n",stream);for (p = format; *p && *p != '\0'; ++p){switch (*p){case '%':if (!translating)translating = 1; //translating置为1,代表后面的字符需要解析else{if (fputc('%', stream) < 0)return EOF;++ret;translating = 0;}break;case 'd':if (translating) //%d{char buf[16] = {0};translating = 0;itoa( va_arg(arglist, int), buf, 10);if (fputs(buf, stream) < 0)return EOF;ret += strlen(buf);free(buf);//buf = 0;}else if (fputc('d', stream) < 0)return EOF;else++ret;break;case 's':if (translating) //%s{const char* str = va_arg(arglist, const char*);translating = 0;if (fputs(str, stream) < 0)return EOF;ret += strlen(str);}else if (fputc ('s' , stream) < 0)return EOF;else ++ret;break;default:if (translating)translating = 0;if ( fputc(*p, stream) < 0 )return EOF;else++ret;break;}}
// fputs("************Get out the Vprintf*****************\n", stream);return ret;
}int printf(const char* format, ...)
{//fputs("entry the printf*************\n", stdout);va_list(arglist);va_start(arglist, format);return vfprintf(stdout, format, arglist);
}int fprintf(FILE* stream, const char* format, ...)
{va_list(arglist);va_start(arglist, format);return vfprintf(stream, format, arglist);
}
stdio.c:IO初始化和函数实现
//stdio.h
/*
1.为了简单起见,这里miniCRT不实现此前介绍的附带的buffer机制,不对Windows下的换行机制进行转换,即\r\n与\n不进行转换。
2.在Windows下,文件基本操作使用了Windows API(CreateFile,ReadFile,WriteFile,CloseHandle,SetFilePointer)
3.在Linux下,则使用系统调用open\read\write\close\seek
4.fopen时仅区分"r""w""+"这几种模式及它们的组合,不对文本模式和二进制模式进行区分,不支持追加模式("a")。
*/#include "minicrt.h"int mini_crt_init_io()
{return 1;
}#ifdef WIN32
#include <Windows.h>FILE* fopen(const char* filename, const char* mode){Handle hFile = 0;int access = 0;int creation = 0;if(strcmp(mode, "w") == 0) {access != GENERIC_WRITE;creation |= CREATE_ALWAYS;}if(strcmp(mode, "w+") == 0) {access |= GENERIC_WRITE | GENERIC_READ;creation |= CREATR_ALWAYS;}if(strcmp(mode, "r") == 0) {access |= GENERIC_READ;creation += OPEN_EXISTING;}if(strcmp(mode, "r+") == 0) {access |= GENERIC_WRITE | GENERIC_READ;creation |= TRUNCATE_EXISTING;}hFile = CreateFileA(filename, access, 0, 0, creation, 0, 0);if (hFile == INVALID_HANDLE_VALUE)return 0;return (FILE*)hFile;
}int fread(void* buffer, int size, int count, FILE* stream)
{int read = 0;if (!ReadFile( (HANDLE)stream, buffer, size*count, &read, 0))return 0;return read;
}/*
Windows API的ReadFILE()
BOOL ReadFile(HANDLE hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数LPVOID lpBuffer,//lpBuffer是存储缓冲区的其实地址,对应fread函数中的bufferDWORD nNumberofBytesToRead,//nNumberofBytesToRead代表要读取字节总数,等于fread函数中count * elementSizeLPDWORD lpNumberofBytesRead,//lpNumberOfBytesRead代表一个指向DWORD类型的指针,用来表示读取了多少个字节LPOVERLAPPED lpOverlapped//lpOverlapped没用
);
*/int fwrite(const void* buffer, int size, int count, FILE* stream)
{int written = 0;if (!WriteFile( (HANDLE)stream, buffer, size*count, &written, 0))return 0;return written;
}int fclose(FILE* fp)
{return CloseHandle((HANDLE)fp);
}int fseek(FILE* fp, int offset, int set)
{return SetFilePointer((HANDLE)fp, offset, 0, set);
}
/*
Windows API的SetFilePointer(HANDLE hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数LONG IDistanceToMove, //偏移量(低位),指向64位偏移量的低32位PLONG lpDistanceToMoveHigh, //偏移量(高位),指向64位偏移量的高32位DWORD dwMoveMethod//基准位置,文件开始位置FILE_BEGIN/文件当前位置FILE_CURRENT/文件结束位置FILE_END
);
*/#else //#ifdef WIN32
//movl这种AT&T汇编语言是UNIX下惯用的汇编语言Assembly Language
//mov这种intel汇编语言则是Windows下常用,
static int open(const char* pathname, int flags, int mode)
{int fd = 0;asm("movl $5, %%eax \n\t""movl %1, %%ebx \n\t""movl %2, %%ecx \n\t""movl %3, %%edx \n\t""int $0x80 \n\t""movl %%eax, %0 \n\t":"=m"(fd):"m"(pathname), "m"(flags), "m"(mode) );
}static int read( int fd, void* buffer, unsigned size)
{int ret = 0;asm("movl $3, %%eax \n\t""movl %1, %%ebx \n\t""movl %2, %%ecx \n\t""movl %3, %%edx \n\t""int $0x80 \n\t""movl %%eax, %0 \n\t":"=m"(ret):"m"(fd), "m"(buffer), "m"(size) );return ret;
}static int write( int fd, const void* buffer, unsigned size)
{int ret = 0;asm("movl $4, %%eax \n\t""movl %1, %%ebx \n\t""movl %2, %%ecx \n\t""movl %3, %%edx \n\t""int $0x80 \n\t""movl %%eax, %0 \n\t":"=m"(ret):"m"(fd), "m"(buffer), "m"(size) );return ret;
}static int close(int fd)
{int ret = 0;asm("movl $6, %%eax \n\t""movl %1, %%ebx \n\t""int $0x80 \n\t""movl %%eax, %0 \n\t":"=m"(ret):"m"(fd) );return ret;
}static int seek(int fd, int offset, int mode)
{int ret = 0;asm("movl $19, %%eax \n\t""movl %1, %%ebx \n\t""movl %2, %%ecx \n\t""movl %3, %%edx \n\t""int $0x80 \n\t""movl %%eax, %0 \n\t":"=m"(ret):"m"(fd), "m"(offset), "m"(mode) );return ret;
}FILE* fopen(const char* filename, const char* mode)
{int fd = -1;int flags = 0;int access = 00700; //创建文件的权限//来自于/usr/include/bits/fcntl.h//注意:以0开始的数字是八进制的
#define O_RDONLY 00
#define O_WRONLY 01
#define O_RDWR 02
#define O_CREAT 0100
#define O_TRUNC 01000
#define O_APPEND 02000if(strcmp(mode, "w") == 0)flags |= O_WRONLY | O_CREAT | O_TRUNC;if(strcmp(mode, "w+") == 0)flags |= O_RDWR | O_CREAT | O_TRUNC;if(strcmp(mode, "r") == 0)flags |= O_RDONLY;if(strcmp(mode, "r+") == 0)flags |= O_RDWR | O_CREAT;fd = open(filename, flags, access);return (FILE*)fd;
} int fread(void* buffer, int size, int count, FILE* stream)
{return read( (int)stream, buffer, size*count );
}int fwrite(const void* buffer, int size, int count, FILE* stream)
{return write( (int)stream, buffer, size*count );
}int fclose(FILE* fp)
{return close( (int)fp);
}int fseek(FILE* fp, int offset, int set)
{return seek( (int)fp, offset, set);
}#endif
test.c:程序员的测试代码
#include "minicrt.h"int main(int argc, char* argv[]) //莫名奇妙的形参只能传递使用一次
{int i;FILE* fp;char** v =(char**) malloc( argc * sizeof(char*) );char** tempArgv = argv;int tempArgc = argc;for(i=0; i<argc; ++i){v[i] = (char*) malloc( strlen(tempArgv[i]) + 1);strcpy(v[i], tempArgv[i]);}fp = fopen("test.txt", "w");for(i = 0; i<tempArgc; ++i){int len = strlen(v[i]);fwrite(&len, 1, sizeof(int), fp);fwrite(v[i], 1, len, fp);}fclose(fp);fp = fopen("test.txt", "r");for(i=0; i<tempArgc; ++i){int len;char* buf;fread(&len, 1, sizeof(int), fp);buf =(char*) malloc(len + 1);fread(buf, 1, len, fp);buf[len] = '\0';printf("%d %s\n", len, buf);free(buf);free(v[i]);}fclose(fp);
}
MiniCRT运行库设计成可以兼容Linux和Windows,故而运行该程序需要分系统讨论
Linux下运行命令
$gcc -c -fno-builtin -nostdlib -fno-stack-protector entry.c malloc.c stdio.c string.c printf.c -m32 -g
$gcc -c -ggdb -fno-builtin -nostdlib -fno-stack-protector test.c -m32 -g
$ar -rs minicrt.a malloc.o printf.o stdio.o string.o
$ld -static -e mini_crt_entry entry.o test.o minicrt.a -o test -m elf_i386
-fno-builtin参数:关闭GCC的内置函数功能,默认情况下GCC会把strlen\strcmp等函数展开成它内部的实现;
-nostdlib:表示不适用任何来自Glibc、GCC的库文件和启动文件,它包含了-nostartfiles这个参数;
-fno-stack-protector:关闭堆栈保护功能,最新版本的GCC在处理变长参数函数的情况下会要求实现对堆栈的保护函数;
由于系统是64位ubuntu,故而需要在上面注明-m32或m elf_i386。
运行结果应该如下:
$ ./test arg1 arg2 124
6 ./test
4 arg1
4 arg2
3 124
Windows下运行命令
>cl /c /DWIN32 /GS- entry.c malloc.c printf.c stdio.c string.c
>lib entry.obj malloc.obj printf.obj stdio.obj string.obj /OUT:minicrt.lib
>cl /c /DWIN32 test.c
>link test.obj minicrt.lib kernel32.lib /NODEFAULTLIB /entry:mini_crt_entry
/DWIN32:启用cl的宏定义功能,即定义WIN32这个宏,这是代码中区分平台的关键宏;
/GS-:关闭堆栈保护功能,否则会在链接截断发生”_security_cookie”和”_security_check_cookie”符号未定义错误。
这篇关于程序员的自我修养:MiniCRT自制C语言运行库的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!