Files
fuse-tar/bin/tar.ml
2025-11-26 21:12:08 +03:00

161 lines
5.5 KiB
OCaml

module NameMap = Map.Make(String)
type metadata = Unix.LargeFile.stats
type entry =
| File of int64 * metadata
| Directory of entry NameMap.t * metadata
let empty_dir_metadata : metadata = {
st_dev = 0;
st_ino = 1;
st_kind = Unix.S_DIR;
st_perm = 7 * 64 + 7 * 8 + 7;
st_nlink = 0;
st_uid = 0;
st_gid = 0;
st_rdev = 0;
st_size = Int64.of_int 0;
st_atime = 0.;
st_mtime = 0.;
st_ctime = 0.;
}
let default_file_metadata : metadata = {
st_dev = 0;
st_ino = 1;
st_kind = Unix.S_REG;
st_perm = 7 * 64 + 7 * 8 + 7;
st_nlink = 0;
st_uid = 0;
st_gid = 0;
st_rdev = 0;
st_size = Int64.of_int 0;
st_atime = 0.;
st_mtime = 0.;
st_ctime = 0.;
}
let clean_path path =
let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in
clean_split (String.split_on_char '/' (String.trim path))
let rec find_from_split path tree =
match (path, tree) with
| ([], _) -> Some tree
| (_, File _) -> failwith "cannot search file!"
| ([name], Directory (children, _)) ->
NameMap.find_opt name children
| (name :: rest, Directory (children, _)) ->
(match NameMap.find_opt name children with
| None -> None
| Some d -> find_from_split rest d)
let find_or_empty_dir name dir =
match dir with
| File _ -> failwith "cannot search file!"
| Directory (children, _) ->
(match NameMap.find_opt name children with
| None -> Directory (NameMap.empty, empty_dir_metadata)
| Some (Directory (x,y)) -> (Directory (x,y))
| _ -> failwith "whoops, can't access child of file!")
let add_to_dir name node dir =
match dir with
| Directory (children, md) ->
(* There is a specific case we may need to handle here -
Normally when a file a/b/c is found, we infer the existence
of a/ and a/b/, however if an entry for a/ is later found
we need to keep the children of a/, but need to merge the metadata
from the new entry.
There is also the more general case of the same path being
seen multiple times - the standard seems to say the last one wins.
*)
Directory (NameMap.add name node children, md)
| _ -> failwith "not a directory"
let add_node path node tree =
let split_path = clean_path path in
let rec aux = function
| ([], dir) -> dir
| ([name], dir) ->
add_to_dir name node dir
| (dir_name :: rest, dir) ->
add_to_dir dir_name (aux (rest, find_or_empty_dir dir_name dir)) dir
(*| _ -> failwith "whatever"*)
in aux (split_path, tree)
(* Assume we have a block, extract metadata from it. *)
let read_metadata offset (block : bytes) =
let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in
let mode = Octal.to_int (Bytes.sub_string block 100 8) in
let uid = Octal.to_int (Bytes.sub_string block 108 8) in
let gid = Octal.to_int (Bytes.sub_string block 116 8) in
let size = Octal.to_int64 (Bytes.sub_string block 124 12) in
let time = Octal.to_int (Bytes.sub_string block 136 12) in
(* tODO: checksum, link *)
let type_flag = Bytes.get block 156 in
let _ = Bytes.sub_string block 157 100 in
if (String.equal "" name) && (size == 0L) && time == 0 then
("", 0L, File (0L, default_file_metadata))
else
let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in
(*let () = assert ustar_used in*)
let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in
let path = String.cat prefix name in
let md = {
default_file_metadata with
st_ino = (Int64.to_int offset) + 1;
st_kind = (match type_flag with
| '5' -> Unix.S_DIR
| _ -> Unix.S_REG);
st_perm = mode;
st_uid = uid;
st_gid = gid;
st_size = size;
st_mtime = float_of_int time;
} in
if (String.ends_with ~suffix:"/" path) || (ustar_used && type_flag == '5') then
(path, 0L, Directory ((NameMap.empty), md))
else
(path, size, File (offset, md))
let buffer = Bytes.create 512
let next_offset o =
Int64.add (Int64.mul 512L (Int64.div o 512L)) (if (Int64.rem o 512L) > 0L then 512L else 0L)
let read_file f ch off amount =
match f with
| File (file_offset, md) ->
(let () = In_channel.seek ch (Int64.add off (Int64.add 512L file_offset)) in
let n = Int64.to_int (max (Int64.of_int amount) (md.st_size)) in
let buf = Bytes.create n in
match In_channel.really_input ch buf 0 n with
| None -> failwith "can't read file!!!!"
| _ -> (buf, n))
| _ -> failwith "can't read file"
let rec read_structure accum offset ch =
let () = In_channel.seek ch offset in
match In_channel.really_input ch buffer 0 512 with
| None -> Printf.printf "End of file\n"; accum
| _ ->
let (path, size, v) = read_metadata offset buffer in
let new_dir = add_node path v accum in
match (path, size) with
| ("", 0L) -> accum(*read_structure accum (next_offset (Int64.add 512L offset)) ch*)
| _ -> read_structure new_dir (next_offset (Int64.add 512L (Int64.add offset size))) ch
and print_tree path = function
| File (offset, _) ->
Printf.printf "%s: %d\n" path (Int64.to_int offset)
| Directory (children, _) ->
NameMap.iter (fun s c -> print_tree (Printf.sprintf "%s/%s" path s) c) children
let parse_tar fname =
let chan = In_channel.open_bin fname in
let root = Directory (NameMap.empty, empty_dir_metadata) in
let structure = read_structure root 0L chan in
(structure, chan)