开发者

How can I copy a file on Unix using C?

开发者 https://www.devze.com 2022-12-19 07:58 出处:网络
I\'m looking for the Unix equivalent of Win32\'s CopyFile, I don\'t want to reinvent the wheel by writing开发者_开发百科 my own version.There is no need to either call non-portable APIs like sendfile,

I'm looking for the Unix equivalent of Win32's CopyFile, I don't want to reinvent the wheel by writing开发者_开发百科 my own version.


There is no need to either call non-portable APIs like sendfile, or shell out to external utilities. The same method that worked back in the 70s still works now:

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

int cp(const char *to, const char *from)
{
    int fd_to, fd_from;
    char buf[4096];
    ssize_t nread;
    int saved_errno;

    fd_from = open(from, O_RDONLY);
    if (fd_from < 0)
        return -1;

    fd_to = open(to, O_WRONLY | O_CREAT | O_EXCL, 0666);
    if (fd_to < 0)
        goto out_error;

    while (nread = read(fd_from, buf, sizeof buf), nread > 0)
    {
        char *out_ptr = buf;
        ssize_t nwritten;

        do {
            nwritten = write(fd_to, out_ptr, nread);

            if (nwritten >= 0)
            {
                nread -= nwritten;
                out_ptr += nwritten;
            }
            else if (errno != EINTR)
            {
                goto out_error;
            }
        } while (nread > 0);
    }

    if (nread == 0)
    {
        if (close(fd_to) < 0)
        {
            fd_to = -1;
            goto out_error;
        }
        close(fd_from);

        /* Success! */
        return 0;
    }

  out_error:
    saved_errno = errno;

    close(fd_from);
    if (fd_to >= 0)
        close(fd_to);

    errno = saved_errno;
    return -1;
}


There is no baked-in equivalent CopyFile function in the APIs. But sendfile can be used to copy a file in kernel mode which is a faster and better solution (for numerous reasons) than opening a file, looping over it to read into a buffer, and writing the output to another file.

Update:

As of Linux kernel version 2.6.33, the limitation requiring the output of sendfile to be a socket was lifted and the original code would work on both Linux and — however, as of OS X 10.9 Mavericks, sendfile on OS X now requires the output to be a socket and the code won't work!

The following code snippet should work on the most OS X (as of 10.5), (Free)BSD, and Linux (as of 2.6.33). The implementation is "zero-copy" for all platforms, meaning all of it is done in kernelspace and there is no copying of buffers or data in and out of userspace. Pretty much the best performance you can get.

#include <fcntl.h>
#include <unistd.h>
#if defined(__APPLE__) || defined(__FreeBSD__)
#include <copyfile.h>
#else
#include <sys/sendfile.h>
#endif

int OSCopyFile(const char* source, const char* destination)
{    
    int input, output;    
    if ((input = open(source, O_RDONLY)) == -1)
    {
        return -1;
    }    
    if ((output = creat(destination, 0660)) == -1)
    {
        close(input);
        return -1;
    }

    //Here we use kernel-space copying for performance reasons
#if defined(__APPLE__) || defined(__FreeBSD__)
    //fcopyfile works on FreeBSD and OS X 10.5+ 
    int result = fcopyfile(input, output, 0, COPYFILE_ALL);
#else
    //sendfile will work with non-socket output (i.e. regular file) on Linux 2.6.33+
    off_t bytesCopied = 0;
    struct stat fileinfo = {0};
    fstat(input, &fileinfo);
    int result = sendfile(output, input, &bytesCopied, fileinfo.st_size);
#endif

    close(input);
    close(output);

    return result;
}

EDIT: Replaced the opening of the destination with the call to creat() as we want the flag O_TRUNC to be specified. See comment below.


It's straight forward to use fork/execl to run cp to do the work for you. This has advantages over system in that it is not prone to a Bobby Tables attack and you don't need to sanitize the arguments to the same degree. Further, since system() requires you to cobble together the command argument, you are not likely to have a buffer overflow issue due to sloppy sprintf() checking.

The advantage to calling cp directly instead of writing it is not having to worry about elements of the target path existing in the destination. Doing that in roll-you-own code is error-prone and tedious.

I wrote this example in ANSI C and only stubbed out the barest error handling, other than that it's straight forward code.

void copy(char *source, char *dest)
{
    int childExitStatus;
    pid_t pid;
    int status;
    if (!source || !dest) {
        /* handle as you wish */
    }

    pid = fork();

    if (pid == 0) { /* child */
        execl("/bin/cp", "/bin/cp", source, dest, (char *)0);
    }
    else if (pid < 0) {
        /* error - couldn't start process - you decide how to handle */
    }
    else {
        /* parent - wait for child - this has all error handling, you
         * could just call wait() as long as you are only expecting to
         * have one child process at a time.
         */
        pid_t ws = waitpid( pid, &childExitStatus, WNOHANG);
        if (ws == -1)
        { /* error - handle as you wish */
        }

        if( WIFEXITED(childExitStatus)) /* exit code in childExitStatus */
        {
            status = WEXITSTATUS(childExitStatus); /* zero is normal exit */
            /* handle non-zero as you wish */
        }
        else if (WIFSIGNALED(childExitStatus)) /* killed */
        {
        }
        else if (WIFSTOPPED(childExitStatus)) /* stopped */
        {
        }
    }
}


Another variant of the copy function using normal POSIX calls and without any loop. Code inspired from the buffer copy variant of the answer of caf. Warning: Using mmap can easily fail on 32 bit systems, on 64 bit system the danger is less likely.

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>

int cp(const char *to, const char *from)
{
  int fd_from = open(from, O_RDONLY);
  if(fd_from < 0)
    return -1;
  struct stat Stat;
  if(fstat(fd_from, &Stat)<0)
    goto out_error;

  void *mem = mmap(NULL, Stat.st_size, PROT_READ, MAP_SHARED, fd_from, 0);
  if(mem == MAP_FAILED)
    goto out_error;

  int fd_to = creat(to, 0666);
  if(fd_to < 0)
    goto out_error;

  ssize_t nwritten = write(fd_to, mem, Stat.st_size);
  if(nwritten < Stat.st_size)
    goto out_error;

  if(close(fd_to) < 0) {
    fd_to = -1;
    goto out_error;
  }
  close(fd_from);

  /* Success! */
  return 0;
}
out_error:;
  int saved_errno = errno;

  close(fd_from);
  if(fd_to >= 0)
    close(fd_to);

  errno = saved_errno;
  return -1;
}

EDIT: Corrected the file creation bug. See comment in http://stackoverflow.com/questions/2180079/how-can-i-copy-a-file-on-unix-using-c/2180157#2180157 answer.


There is a way to do this, without resorting to the system call, you need to incorporate a wrapper something like this:

#include <sys/sendfile.h>
#include <fcntl.h>
#include <unistd.h>

/* 
** http://www.unixguide.net/unix/programming/2.5.shtml 
** About locking mechanism...
*/

int copy_file(const char *source, const char *dest){
   int fdSource = open(source, O_RDWR);

   /* Caf's comment about race condition... */
   if (fdSource > 0){
     if (lockf(fdSource, F_LOCK, 0) == -1) return 0; /* FAILURE */
   }else return 0; /* FAILURE */

   /* Now the fdSource is locked */

   int fdDest = open(dest, O_CREAT);
   off_t lCount;
   struct stat sourceStat;
   if (fdSource > 0 && fdDest > 0){
      if (!stat(source, &sourceStat)){
          int len = sendfile(fdDest, fdSource, &lCount, sourceStat.st_size);
          if (len > 0 && len == sourceStat.st_size){
               close(fdDest);
               close(fdSource);

               /* Sanity Check for Lock, if this is locked -1 is returned! */
               if (lockf(fdSource, F_TEST, 0) == 0){
                   if (lockf(fdSource, F_ULOCK, 0) == -1){
                      /* WHOOPS! WTF! FAILURE TO UNLOCK! */
                   }else{
                      return 1; /* Success */
                   }
               }else{
                   /* WHOOPS! WTF! TEST LOCK IS -1 WTF! */
                   return 0; /* FAILURE */
               }
          }
      }
   }
   return 0; /* Failure */
}

The above sample (error checking is omitted!) employs open, close and sendfile.

Edit: As caf has pointed out a race condition can occur between the open and stat so I thought I'd make this a bit more robust...Keep in mind that the locking mechanism varies from platform to platform...under Linux, this locking mechanism with lockf would suffice. If you want to make this portable, use the #ifdef macros to distinguish between different platforms/compilers...Thanks caf for spotting this...There is a link to a site that yielded "universal locking routines" here.


Copying files byte-by-byte does work, but is slow and wasteful on modern UNIXes. Modern UNIXes have “copy-on-write” support built-in to the filesystem: a system call makes a new directory entry pointing at the existing bytes on disk, and no file content bytes on disk are touched until one of the copies is modified, at which point only the changed blocks are written to disk. This allows near-instant file copies that use no additional file blocks, regardless of file size. For example, here are some details about how this works in xfs.

On linux, use the FICLONE ioctl as coreutils cp now does by default.

 #ifdef FICLONE
   return ioctl (dest_fd, FICLONE, src_fd);
 #else
   errno = ENOTSUP;
   return -1;
 #endif

On macOS, use clonefile(2) for instant copies on APFS volumes. This is what Apple’s cp -c uses. The docs are not completely clear but it is likely that copyfile(3) with COPYFILE_CLONE also uses this. Leave a comment if you’d like me to test that.

In case these copy-on-write operations are not supported—whether the OS is too old, the underlying file system does not support it, or because you are copying files between different filesystems—you do need to fall back to trying sendfile, or as a last resort, copying byte-by-byte. But to save everyone a lot of time and disk space, please give FICLONE and clonefile(2) a try first.


I see nobody mentioned yet copy_file_range, supported at least on Linux and FreeBSD. The advantage of this one is that it explicitly documents ability of making use of CoW techniques like reflinks. Quoting:

copy_file_range() gives filesystems an opportunity to implement "copy acceleration" techniques, such as the use of reflinks (i.e., two or more inodes that share pointers to the same copy-on-write disk blocks) or server-side-copy (in the case of NFS).

FWIW, I am not sure if older sendfile is able to do that. The few mentions I found claim that it doesn't. In that sense, copy_file_range is superior to sendfile.

Below is an example of using the call (which is copied verbatim from the manual). I also checked that after using this code to copy a bash binary within BTRFS filesystem, the copy is reflinked to the original (I did that by calling duperemove on the files, and seeing Skipping - extents are already deduped. messages).

#define _GNU_SOURCE
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <unistd.h>

int
main(int argc, char **argv)
{
    int fd_in, fd_out;
    struct stat stat;
    off64_t len, ret;

    if (argc != 3) {
        fprintf(stderr, "Usage: %s <source> <destination>\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    fd_in = open(argv[1], O_RDONLY);
    if (fd_in == -1) {
        perror("open (argv[1])");
        exit(EXIT_FAILURE);
    }

    if (fstat(fd_in, &stat) == -1) {
        perror("fstat");
        exit(EXIT_FAILURE);
    }

    len = stat.st_size;

    fd_out = open(argv[2], O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd_out == -1) {
        perror("open (argv[2])");
        exit(EXIT_FAILURE);
    }

    do {
        ret = copy_file_range(fd_in, NULL, fd_out, NULL, len, 0);
        if (ret == -1) {
            perror("copy_file_range");
            exit(EXIT_FAILURE);
        }

        len -= ret;
    } while (len > 0 && ret > 0);

    close(fd_in);
    close(fd_out);
    exit(EXIT_SUCCESS);
}


#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>

#define    print_err(format, args...)   printf("[%s:%d][error]" format "\n", __func__, __LINE__, ##args)
#define    DATA_BUF_SIZE                (64 * 1024)    //limit to read maximum 64 KB data per time

int32_t get_file_size(const char *fname){
    struct stat sbuf;

    if (NULL == fname || strlen(fname) < 1){
        return 0;
    }

    if (stat(fname, &sbuf) < 0){
        print_err("%s, %s", fname, strerror(errno));
        return 0;
    }

    return sbuf.st_size; /* off_t shall be signed interge types, used for file size */
}

bool copyFile(CHAR *pszPathIn, CHAR *pszPathOut)
{
    INT32 fdIn, fdOut;
    UINT32 ulFileSize_in = 0;
    UINT32 ulFileSize_out = 0;
    CHAR *szDataBuf;

    if (!pszPathIn || !pszPathOut)
    {
        print_err(" Invalid param!");
        return false;
    }

    if ((1 > strlen(pszPathIn)) || (1 > strlen(pszPathOut)))
    {
        print_err(" Invalid param!");
        return false;
    }

    if (0 != access(pszPathIn, F_OK))
    {
        print_err(" %s, %s!", pszPathIn, strerror(errno));
        return false;
    }

    if (0 > (fdIn = open(pszPathIn, O_RDONLY)))
    {
        print_err("open(%s, ) failed, %s", pszPathIn, strerror(errno));
        return false;
    }

    if (0 > (fdOut = open(pszPathOut, O_CREAT | O_WRONLY | O_TRUNC, 0777)))
    {
        print_err("open(%s, ) failed, %s", pszPathOut, strerror(errno));
        close(fdIn);
        return false;
    }

    szDataBuf = malloc(DATA_BUF_SIZE);
    if (NULL == szDataBuf)
    {
        print_err("malloc() failed!");
        return false;
    }

    while (1)
    {
        INT32 slSizeRead = read(fdIn, szDataBuf, sizeof(szDataBuf));
        INT32 slSizeWrite;
        if (slSizeRead <= 0)
        {
            break;
        }

        slSizeWrite = write(fdOut, szDataBuf, slSizeRead);
        if (slSizeWrite < 0)
        {
            print_err("write(, , slSizeRead) failed, %s", slSizeRead, strerror(errno));
            break;
        }

        if (slSizeWrite != slSizeRead) /* verify wheter write all byte data successfully */
        {
            print_err(" write(, , %d) failed!", slSizeRead);
            break;
        }
    }

    close(fdIn);
    fsync(fdOut); /* causes all modified data and attributes to be moved to a permanent storage device */
    close(fdOut);

    ulFileSize_in = get_file_size(pszPathIn);
    ulFileSize_out = get_file_size(pszPathOut);
    if (ulFileSize_in == ulFileSize_out) /* verify again wheter write all byte data successfully */
    {
        free(szDataBuf);
        return true;
    }
    free(szDataBuf);
    return false;
}


One option is that you could use system() to execute cp. This just re-uses the cp(1) command to do the work. If you only need to make another link to the file, this can be done with link() or symlink().


sprintf( cmd, "/bin/cp -p \'%s\' \'%s\'", old, new);

system( cmd);

Add some error checks...

Otherwise, open both and loop on read/write, but probably not what you want.

...

UPDATE to address valid security concerns:

Rather than using "system()", do a fork/wait, and call execv() or execl() in the child.

execl( "/bin/cp", "-p", old, new);


Very simple :

#define BUF_SIZE 65536

int cp(const char *from, const char*to){
FILE *src, *dst;
size_t in, out;
char *buf = (char*) malloc(BUF_SIZE* sizeof(char));
src = fopen(from, "rb");
if (NULL == src) exit(2);
dst = fopen(to, "wb");
if (dst < 0) exit(3);
while (1) {
    in = fread(buf, sizeof(char), BUF_SIZE, src);
    if (0 == in) break;
    out = fwrite(buf, sizeof(char), in, dst);
    if (0 == out) break;
}
fclose(src);
fclose(dst);
}

Works on windows and linux.


Good question. Related to another good question:

In C on linux how would you implement cp

There are two approaches to the "simplest" implementation of cp. One approach uses a file copying system call function of some kind - the closest thing we get to a C function version of the Unix cp command. The other approach uses a buffer and read/write system call functions, either directly, or using a FILE wrapper.

It's likely the file copying system calls that take place solely in kernel-owned memory are faster than the system calls that take place in both kernel- and user-owned memory, especially in a network filesystem setting (copying between machines). But that would require testing (e.g. with Unix command time) and will be dependent on the hardware where the code is compiled and executed.

It's also likely that someone with an OS that doesn't have the standard Unix library will want to use your code. Then you'd want to use the buffer read/write version, since it only depends on <stdlib.h> and <stdio.h> (and friends)

<unistd.h>

Here's an example that uses function copy_file_range from the unix standard library <unistd.h>, to copy a source file to a (possible non-existent) destination file. The copy takes place in kernel space.

/* copy.c
 *
 * Defines function copy:
 *
 * Copy source file to destination file on the same filesystem (possibly NFS).
 * If the destination file does not exist, it is created. If the destination
 * file does exist, the old data is truncated to zero and replaced by the 
 * source data. The copy takes place in the kernel space.
 *
 * Compile with:
 *
 * gcc copy.c -o copy -Wall -g
 */

#define _GNU_SOURCE 
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>

/* On versions of glibc < 2.27, need to use syscall.
 * 
 * To determine glibc version used by gcc, compute an integer representing the
 * version. The strides are chosen to allow enough space for two-digit 
 * minor version and patch level.
 *
 */
#define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __gnuc_patchlevel__)
#if GCC_VERSION < 22700
static loff_t copy_file_range(int in, loff_t* off_in, int out, 
  loff_t* off_out, size_t s, unsigned int flags)
{
  return syscall(__NR_copy_file_range, in, off_in, out, off_out, s,
    flags);
}
#endif

/* The copy function.
 */
int copy(const char* src, const char* dst){
  int in, out;
  struct stat stat;
  loff_t s, n;
  if(0>(in = open(src, O_RDONLY))){
    perror("open(src, ...)");
    exit(EXIT_FAILURE);
  }
  if(fstat(in, &stat)){
    perror("fstat(in, ...)");
    exit(EXIT_FAILURE);
  }
  s = stat.st_size; 
  if(0>(out = open(dst, O_CREAT|O_WRONLY|O_TRUNC, 0644))){
    perror("open(dst, ...)");
    exit(EXIT_FAILURE);
  }
  do{
    if(1>(n = copy_file_range(in, NULL, out, NULL, s, 0))){
      perror("copy_file_range(...)");
      exit(EXIT_FAILURE);
    }
    s-=n;
  }while(0<s && 0<n);
  close(in);
  close(out);
  return EXIT_SUCCESS;
}

/* Test it out.
 *
 * BASH:
 *
 * gcc copy.c -o copy -Wall -g
 * echo 'Hello, world!' > src.txt
 * ./copy src.txt dst.txt
 * [ -z "$(diff src.txt dst.txt)" ]
 *
 */

int main(int argc, char* argv[argc]){
  if(argc!=3){
    printf("Usage: %s <SOURCE> <DESTINATION>", argv[0]);
    exit(EXIT_FAILURE);
  }
  copy(argv[1], argv[2]);
  return EXIT_SUCCESS;
}

It's based on the example in my Ubuntu 20.x Linux distribution's man page for copy_file_range. Check your man pages for it with:

> man copy_file_range

Then hit j or Enter until you get to the example section. Or search by typing /example.

<stdio.h>/<stdlib.h> only

Here's an example that only uses stdlib/stdio. The downside is it uses an intermediate buffer in user-space.

/* copy.c
 *
 * Compile with:
 * 
 * gcc copy.c -o copy -Wall -g
 *
 * Defines function copy:
 *
 * Copy a source file to a destination file. If the destination file already
 * exists, this clobbers it. If the destination file does not exist, it is
 * created. 
 *
 * Uses a buffer in user-space, so may not perform as well as 
 * copy_file_range, which copies in kernel-space.
 *
 */

#include <stdlib.h>
#include <stdio.h>

#define BUF_SIZE 65536 //2^16

int copy(const char* in_path, const char* out_path){
  size_t n;
  FILE* in=NULL, * out=NULL;
  char* buf = calloc(BUF_SIZE, 1);
  if((in = fopen(in_path, "rb")) && (out = fopen(out_path, "wb")))
    while((n = fread(buf, 1, BUF_SIZE, in)) && fwrite(buf, 1, n, out));
  free(buf);
  if(in) fclose(in);
  if(out) fclose(out);
  return EXIT_SUCCESS;
}

/* Test it out.
 *
 * BASH:
 *
 * gcc copy.c -o copy -Wall -g
 * echo 'Hello, world!' > src.txt
 * ./copy src.txt dst.txt
 * [ -z "$(diff src.txt dst.txt)" ]
 *
 */
int main(int argc, char* argv[argc]){
  if(argc!=3){
    printf("Usage: %s <SOURCE> <DESTINATION>\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  return copy(argv[1], argv[2]);
}


Just use it

#include <stdio.h>
#include <unistd.h> // For system calls write, read e close
#include <fcntl.h>

#define BUFFER_SIZE 1024

int main(int argc, char* argv[]) {
    if (argc != 3) {
        printf("Usage %s Src_file Dest_file\n", argv[0]);
        return -1;
    }
    
    unsigned char buffer[BUFFER_SIZE] = {0};
    ssize_t byte = 0;
    
    int rfd, wfd;
    
    // open file in read mode
    if ((rfd = open(argv[1], O_RDONLY)) == -1) {
        printf("Failed to open input file %s\n", argv[1]);
        return -1;
    }
    
    // open file in write mode and already exists to overwrite
    if ((wfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 644)) == -1) {
        printf("Failed to create output file %s\n", argv[2]);
    }
    
    // loop
    while (1) {
        // read buffer
        byte = read(rfd, buffer, sizeof(buffer));
        // error with reading
        if (byte == -1) {
            printf("Encountered an error\n");
            break;
        } else if (byte == 0) {
            // file end exit loop
            printf("File copying successful.\n");
            break;
        }
        
        // error with writing
       if (write(wfd, buffer, byte) == -1) {
        printf("Failed to copying file\n");
          break;
       }
    
    }
    
    // Close file
    close(rfd);
    close(wfd);
    
    return 0;
}

Run

./program Src_file Dest_file
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号