Reworked tar processing

This commit is contained in:
2025-11-27 16:30:58 +03:00
parent 0d618be4b5
commit 1a917a1442

View File

@@ -5,6 +5,7 @@ type metadata = Unix.LargeFile.stats
type header = { type header = {
full_path : string; full_path : string;
offset : int64; offset : int64;
hard_link : bool;
linked_path : string; (* Meaningless unless a hard or soft link *) linked_path : string; (* Meaningless unless a hard or soft link *)
md : metadata; md : metadata;
} }
@@ -12,6 +13,8 @@ type header = {
type entry = type entry =
| File of int64 * metadata | File of int64 * metadata
| Symlink of string * metadata
| Hardlink of string
| InferredDirectory of entry NameMap.t * metadata | InferredDirectory of entry NameMap.t * metadata
| Directory of entry NameMap.t * metadata | Directory of entry NameMap.t * metadata
@@ -34,7 +37,7 @@ let default_file_metadata : metadata = {
st_ino = 1; st_ino = 1;
st_kind = Unix.S_REG; st_kind = Unix.S_REG;
st_perm = 7 * 64 + 7 * 8 + 7; st_perm = 7 * 64 + 7 * 8 + 7;
st_nlink = 0; st_nlink = 1;
st_uid = 0; st_uid = 0;
st_gid = 0; st_gid = 0;
st_rdev = 0; st_rdev = 0;
@@ -74,6 +77,7 @@ let add_direct name node = function
| _ -> | _ ->
Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata) Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata)
let find_or_infer_dir name = function let find_or_infer_dir name = function
| InferredDirectory (children, _) | InferredDirectory (children, _)
| Directory (children, _) -> | Directory (children, _) ->
@@ -82,7 +86,6 @@ let find_or_infer_dir name = function
| Some x -> x) | Some x -> x)
| _ -> failwith "findorinfer" | _ -> failwith "findorinfer"
(* entry -> string list -> entry -> entry*)
let rec add_deep node = function let rec add_deep node = function
| (last :: [], dir) -> | (last :: [], dir) ->
add_direct last node dir add_direct last node dir
@@ -90,10 +93,21 @@ let rec add_deep node = function
add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir
| _ -> failwith "heh" | _ -> failwith "heh"
let inc_nlink path tree =
add_deep
(match find path tree with
| Some (File (o, md)) -> File (o, {md with st_nlink = md.st_nlink + 1})
| Some (Directory (o, md)) -> Directory (o, {md with st_nlink = md.st_nlink + 1})
| _ -> failwith "can't increase hardlink count...")
(clean_path path, tree)
let read_path buf o c =
List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string buf o c))
(* Assume we have a block, extract data from it into a header object *) (* Assume we have a block, extract data from it into a header object *)
let read_header offset (block : bytes) : header = let read_header offset (block : bytes) : header =
let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in let name = read_path block 0 100 in
let mode = Octal.to_int (Bytes.sub_string block 100 8) in let mode = Octal.to_int (Bytes.sub_string block 100 8) in
let uid = Octal.to_int (Bytes.sub_string block 108 8) in let uid = Octal.to_int (Bytes.sub_string block 108 8) in
let gid = Octal.to_int (Bytes.sub_string block 116 8) in let gid = Octal.to_int (Bytes.sub_string block 116 8) in
@@ -101,7 +115,7 @@ let read_header offset (block : bytes) : header =
let time = Octal.to_int (Bytes.sub_string block 136 12) in let time = Octal.to_int (Bytes.sub_string block 136 12) in
(* tODO: checksum, link *) (* tODO: checksum, link *)
let type_flag = Bytes.get block 156 in let type_flag = Bytes.get block 156 in
let linked_path = Bytes.sub_string block 157 100 in let linked_path = read_path block 157 100 in
let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in
(*let () = assert ustar_used in*) (*let () = assert ustar_used in*)
let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in
@@ -110,6 +124,7 @@ let read_header offset (block : bytes) : header =
default_file_metadata with default_file_metadata with
st_ino = (Int64.to_int offset) + 1; st_ino = (Int64.to_int offset) + 1;
st_kind = (match type_flag with st_kind = (match type_flag with
| '2' -> Unix.S_LNK
| '5' -> Unix.S_DIR | '5' -> Unix.S_DIR
| _ -> Unix.S_REG); | _ -> Unix.S_REG);
st_perm = mode; st_perm = mode;
@@ -121,6 +136,7 @@ let read_header offset (block : bytes) : header =
{ {
full_path = path; full_path = path;
md = md; md = md;
hard_link = (type_flag == '1');
offset = offset; offset = offset;
linked_path = linked_path; linked_path = linked_path;
} }
@@ -150,13 +166,23 @@ let rec read_headers offset ch =
else else
h :: (read_headers (next_header_pos h) ch) h :: (read_headers (next_header_pos h) ch)
let make_node h = let make_node h =
if h.hard_link then
Hardlink (h.linked_path)
else
match h.md.st_kind with match h.md.st_kind with
| Unix.S_DIR -> Directory (NameMap.empty, h.md) | Unix.S_DIR -> Directory (NameMap.empty, h.md)
| Unix.S_REG -> File (h.offset, h.md) | Unix.S_REG ->
File (h.offset, h.md)
| Unix.S_LNK -> Symlink (h.linked_path, h.md)
| _ -> failwith "other kinds of files not yet implemented" | _ -> failwith "other kinds of files not yet implemented"
let add_node dir h = let add_node dir h =
add_deep (make_node h) ((clean_path h.full_path), dir) let node = make_node h in
let root =
(match node with
| Hardlink (path) -> (inc_nlink path dir)
| _ -> dir) in
add_deep (make_node h) ((clean_path h.full_path), root)
let build_tree headers = let build_tree headers =
let root = Directory (NameMap.empty, empty_dir_metadata) in let root = Directory (NameMap.empty, empty_dir_metadata) in
@@ -175,6 +201,10 @@ let rec print_tree path = function
| Directory (children, _) -> | Directory (children, _) ->
Printf.printf "dir: %s/\n" path; Printf.printf "dir: %s/\n" path;
NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children
| Symlink (s, _) ->
Printf.printf "link '%s' -> '%s'" path s
| Hardlink (s) ->
Printf.printf "hardlink '%s' -> '%s'" path s
(* Now that we know everything about a file, we need to be able to actually read it *) (* Now that we know everything about a file, we need to be able to actually read it *)
let read_file f ch off count = let read_file f ch off count =