module NameMap = Map.Make(String) type metadata = Unix.LargeFile.stats type header = { full_path : string; offset : int64; hard_link : bool; linked_path : string; (* Meaningless unless a hard or soft link *) md : metadata; } type entry = | File of int64 * metadata | Symlink of string * metadata | Hardlink of string | InferredDirectory of entry NameMap.t * metadata | Directory of entry NameMap.t * metadata let empty_dir_metadata : metadata = { st_dev = 0; st_ino = 1; st_kind = Unix.S_DIR; st_perm = 7 * 64 + 7 * 8 + 7; st_nlink = 0; st_uid = 0; st_gid = 0; st_rdev = 0; st_size = Int64.of_int 0; st_atime = 0.; st_mtime = 0.; st_ctime = 0.; } let default_file_metadata : metadata = { st_dev = 0; st_ino = 1; st_kind = Unix.S_REG; st_perm = 7 * 64 + 7 * 8 + 7; st_nlink = 1; st_uid = 0; st_gid = 0; st_rdev = 0; st_size = Int64.of_int 0; st_atime = 0.; st_mtime = 0.; st_ctime = 0.; } let clean_path path = let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in clean_split (String.split_on_char '/' (String.trim path)) let find path tree = let p = clean_path path in let rec aux = function | ([], tree) -> Some tree | (i :: rest, InferredDirectory (map, _)) | (i :: rest, Directory (map, _)) -> (match NameMap.find_opt i map with | None -> None | Some x -> aux (rest, x)) | _ -> failwith "cannot search non-directory" in aux (p, tree) let add_child name node map = match (NameMap.find_opt name map, node) with | Some (InferredDirectory (children, _)), Directory (_, md) -> NameMap.add name (Directory (children, md)) map | _ -> NameMap.add name node map (* TODO: add logic for links, device nodes, and replacing inferred directories *) let add_direct name node = function | InferredDirectory (map, md) -> InferredDirectory ((add_child name node map), md) | Directory (map, md) -> Directory ((add_child name node map), md) | _ -> Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata) let find_or_infer_dir name = function | InferredDirectory (children, _) | Directory (children, _) -> (match NameMap.find_opt name children with | None -> InferredDirectory (NameMap.empty, empty_dir_metadata) | Some x -> x) | _ -> failwith "findorinfer" let rec add_deep node = function | (last :: [], dir) -> add_direct last node dir | (one :: rest, dir) -> add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir | (_, dir) -> dir let rec inc_nlink path tree = add_deep (match find path tree with | Some (File (o, md)) -> File (o, {md with st_nlink = md.st_nlink + 1}) | Some (InferredDirectory (o, md)) -> InferredDirectory (o, {md with st_nlink = md.st_nlink + 1}) | Some (Symlink (o, md)) -> Symlink (o, {md with st_nlink = md.st_nlink + 1}) | Some (Hardlink p) -> inc_nlink p tree | Some (Directory (o, md)) -> Directory (o, {md with st_nlink = md.st_nlink + 1}) | _ -> failwith "can't increase hardlink count...") (clean_path path, tree) let read_path buf o c = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string buf o c)) (* Assume we have a block, extract data from it into a header object *) let read_header offset (block : bytes) : header = let name = read_path block 0 100 in let mode = Octal.to_int (Bytes.sub_string block 100 8) in let uid = Octal.to_int (Bytes.sub_string block 108 8) in let gid = Octal.to_int (Bytes.sub_string block 116 8) in let size = Octal.to_int64 (Bytes.sub_string block 124 12) in let time = Octal.to_int (Bytes.sub_string block 136 12) in (* tODO: checksum, link *) let type_flag = Bytes.get block 156 in let linked_path = read_path block 157 100 in let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in (*let () = assert ustar_used in*) let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in let path = if ustar_used then String.cat prefix name else name in let md = { default_file_metadata with st_ino = (Int64.to_int offset) + 1; st_kind = (match type_flag with | '2' -> Unix.S_LNK | '5' -> Unix.S_DIR | _ -> Unix.S_REG); st_perm = mode; st_uid = uid; st_gid = gid; st_size = size; st_mtime = float_of_int time; } in { full_path = path; md = md; hard_link = (type_flag == '1'); offset = offset; linked_path = linked_path; } let buffer = Bytes.create 512 let empty_p h = (String.equal "" h.full_path) && (h.md.st_size = 0L) let next_header_pos h = let (+) = Int64.add in let (/) = Int64.div in let (%) = Int64.rem in let ( * ) = Int64.mul in let o = (h.offset + 512L) + h.md.st_size in let b = 512L * (o / 512L) in if (o % 512L) > 0L then b + 512L else b let rec read_headers offset ch = let () = In_channel.seek ch offset in match In_channel.really_input ch buffer 0 512 with | None -> (* Print message *) Printf.printf "Tar file ended with no empty entry? Continuing anyway.\n"; [] | _ -> let h = read_header offset buffer in if empty_p h || ((In_channel.length ch) <= (next_header_pos h)) then [] else h :: (read_headers (next_header_pos h) ch) let make_node h = if h.hard_link then Hardlink (h.linked_path) else match h.md.st_kind with | Unix.S_DIR -> Directory (NameMap.empty, h.md) | Unix.S_REG -> File (h.offset, h.md) | Unix.S_LNK -> Symlink (h.linked_path, h.md) | _ -> failwith "other kinds of files not yet implemented" let add_node dir h = let node = make_node h in let root = (match node with | Hardlink (path) -> (inc_nlink path dir) | _ -> dir) in add_deep (make_node h) ((clean_path h.full_path), root) let build_tree headers = let root = Directory (NameMap.empty, empty_dir_metadata) in List.fold_left add_node root headers let parse_tar fname = let chan = In_channel.open_bin fname in let headers = read_headers 0L chan in (build_tree headers, chan) (* Helpful for debugging *) let rec print_tree path = function | File (off, _) -> Printf.printf "%d file: %s\n" (Int64.to_int off) path | InferredDirectory (children, _) | Directory (children, _) -> Printf.printf "dir: %s/\n" path; NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children | Symlink (s, _) -> Printf.printf "link '%s' -> '%s'" path s | Hardlink (s) -> Printf.printf "hardlink '%s' -> '%s'" path s (* Now that we know everything about a file, we need to be able to actually read it *) let read_file f ch off count = match f with | File (file_off, md) -> let remaining = Int64.max 0L (Int64.sub md.st_size off) in let to_read = Int64.to_int (Int64.min (Int64.of_int count) remaining) in let buf = Bytes.create to_read in let () = In_channel.seek ch (Int64.add file_off (Int64.add 512L off)) in (match In_channel.really_input ch buf 0 to_read with | None -> failwith "can't input" | _ -> (buf, to_read) ) | _ -> failwith "can't read non-file"