You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
274 lines
13 KiB
274 lines
13 KiB
Index: markup.ml-0.7.2/test/test_encoding.ml
|
|
===================================================================
|
|
--- markup.ml-0.7.2.orig/test/test_encoding.ml
|
|
+++ markup.ml-0.7.2/test/test_encoding.ml
|
|
@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1
|
|
expect_error (1, 2) (`Decoding_error (bad_bytes, name))
|
|
begin fun report ->
|
|
let chars = s1 |> string |> f ~report in
|
|
- next_option chars ok (assert_equal (Some (Char.code 'f')));
|
|
+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f')))));
|
|
next_option chars ok (assert_equal (Some Uutf.u_rep));
|
|
- next_option chars ok (assert_equal (Some (Char.code 'o')));
|
|
+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o')))));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None)
|
|
end;
|
|
@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1
|
|
expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name))
|
|
begin fun report ->
|
|
let chars = s2 |> string |> f ~report in
|
|
- next_option chars ok (assert_equal (Some (Char.code 'f')));
|
|
- next_option chars ok (assert_equal (Some 0x000A));
|
|
- next_option chars ok (assert_equal (Some (Char.code 'o')));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f'))));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A)));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o'))));
|
|
next_option chars ok (assert_equal (Some Uutf.u_rep));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None)
|
|
@@ -38,12 +38,12 @@ let tests = [
|
|
let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in
|
|
expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report ->
|
|
let chars = s |> string |> utf_8 ~report in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
- next_option chars ok (assert_equal (Some 0x1F419));
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
|
|
next_option chars ok (assert_equal (Some Uutf.u_rep));
|
|
next_n 4 chars ok
|
|
- (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e']));
|
|
+ (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None)
|
|
end);
|
|
@@ -53,11 +53,11 @@ let tests = [
|
|
expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be"))
|
|
begin fun report ->
|
|
let chars = s |> string |> utf_16be ~report in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
- next_option chars ok (assert_equal (Some 0x1F419));
|
|
- next_option chars ok (assert_equal (Some (Char.code 'b')));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));
|
|
next_option chars ok (assert_equal (Some Uutf.u_rep));
|
|
- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));
|
|
+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None)
|
|
end);
|
|
@@ -67,11 +67,11 @@ let tests = [
|
|
expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le"))
|
|
begin fun report ->
|
|
let chars = s |> string |> utf_16le ~report in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
- next_option chars ok (assert_equal (Some 0x1F419));
|
|
- next_option chars ok (assert_equal (Some (Char.code 'b')));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
|
|
+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));
|
|
next_option chars ok (assert_equal (Some Uutf.u_rep));
|
|
- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));
|
|
+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None)
|
|
end);
|
|
@@ -79,7 +79,7 @@ let tests = [
|
|
("encoding.iso_8859_1" >:: fun _ ->
|
|
let chars = string "foo\xa0" |> iso_8859_1 in
|
|
next_n 4 chars
|
|
- ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0']));
|
|
+ ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None));
|
|
|
|
@@ -88,26 +88,26 @@ let tests = [
|
|
expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii"))
|
|
begin fun report ->
|
|
let chars = s |> string |> us_ascii ~report in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
next_option chars ok (assert_equal (Some Uutf.u_rep));
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None)
|
|
end);
|
|
|
|
("encoding.windows_1251" >:: fun _ ->
|
|
let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
- next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]);
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
+ next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]);
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None));
|
|
|
|
("encoding.windows_1252" >:: fun _ ->
|
|
let chars = string "foo\x80\x83bar" |> windows_1252 in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
- next_n 2 chars ok (assert_equal [0x20AC; 0x0192]);
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
+ next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]);
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None));
|
|
|
|
@@ -137,7 +137,7 @@ let tests = [
|
|
|
|
("encoding.ebcdic" >:: fun _ ->
|
|
let chars = string "\x86\x96\x96" |> ebcdic in
|
|
- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
|
|
+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
|
|
next_option chars ok (assert_equal None);
|
|
next_option chars ok (assert_equal None));
|
|
]
|
|
Index: markup.ml-0.7.2/test/test_html_tokenizer.ml
|
|
===================================================================
|
|
--- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml
|
|
+++ markup.ml-0.7.2/test/test_html_tokenizer.ml
|
|
@@ -134,7 +134,7 @@ let tests = [
|
|
expect "�"
|
|
[ 1, 1, E (`Bad_token ("�",
|
|
reference, "out of range"));
|
|
- 1, 1, S (`Char Uutf.u_rep);
|
|
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 35, S `EOF];
|
|
|
|
expect "�"
|
|
@@ -142,22 +142,22 @@ let tests = [
|
|
reference, "missing ';' at end"));
|
|
1, 1, E (`Bad_token ("�",
|
|
reference, "out of range"));
|
|
- 1, 1, S (`Char Uutf.u_rep);
|
|
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 34, S `EOF];
|
|
|
|
expect "�"
|
|
[ 1, 1, E (`Bad_token ("�", reference, "out of range"));
|
|
- 1, 1, S (`Char Uutf.u_rep);
|
|
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 9, S `EOF];
|
|
|
|
expect "�"
|
|
[ 1, 1, E (`Bad_token ("�", reference, "out of range"));
|
|
- 1, 1, S (`Char Uutf.u_rep);
|
|
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 11, S `EOF];
|
|
|
|
expect "�"
|
|
[ 1, 1, E (`Bad_token ("�", reference, "out of range"));
|
|
- 1, 1, S (`Char Uutf.u_rep);
|
|
+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 5, S `EOF];
|
|
|
|
expect ""
|
|
@@ -264,7 +264,7 @@ let tests = [
|
|
expect ~state:`RCDATA "f\x00</foo>"
|
|
([ 1, 1, S (`Char 0x66);
|
|
1, 2, E (`Bad_token ("U+0000", "content", "null"));
|
|
- 1, 2, S (`Char Uutf.u_rep)] @
|
|
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
|
|
(char_sequence ~start:3 "</foo>"));
|
|
|
|
expect ~state:`RCDATA "<title>f</title >"
|
|
@@ -302,7 +302,7 @@ let tests = [
|
|
expect ~state:`RAWTEXT "f\x00</foo>"
|
|
([ 1, 1, S (`Char 0x66);
|
|
1, 2, E (`Bad_token ("U+0000", "content", "null"));
|
|
- 1, 2, S (`Char Uutf.u_rep)] @
|
|
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
|
|
(char_sequence ~start:3 "</foo>")));
|
|
|
|
("html.tokenizer.script-data" >:: fun _ ->
|
|
@@ -330,7 +330,7 @@ let tests = [
|
|
expect ~state:`Script_data "f<!--o\x00o"
|
|
((char_sequence ~no_eof:true "f<!--o") @
|
|
[1, 7, E (`Bad_token ("U+0000", "script", "null"));
|
|
- 1, 7, S (`Char Uutf.u_rep);
|
|
+ 1, 7, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 8, S (`Char 0x6F);
|
|
1, 9, E (`Unexpected_eoi "script");
|
|
1, 9, S `EOF]);
|
|
@@ -363,7 +363,7 @@ let tests = [
|
|
expect ~state:`Script_data "f<!--a-\x00-"
|
|
((char_sequence ~no_eof:true "f<!--a-") @
|
|
[ 1, 8, E (`Bad_token ("U+0000", "script", "null"));
|
|
- 1, 8, S (`Char Uutf.u_rep);
|
|
+ 1, 8, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 9, S (`Char 0x02D);
|
|
1, 10, E (`Unexpected_eoi "script");
|
|
1, 10, S `EOF]);
|
|
@@ -371,7 +371,7 @@ let tests = [
|
|
expect ~state:`Script_data "f<!--a--\x00--"
|
|
((char_sequence ~no_eof:true "f<!--a--") @
|
|
[ 1, 9, E (`Bad_token ("U+0000", "script", "null"));
|
|
- 1, 9, S (`Char Uutf.u_rep);
|
|
+ 1, 9, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 10, S (`Char 0x02D);
|
|
1, 11, S (`Char 0x02D);
|
|
1, 12, E (`Unexpected_eoi "script");
|
|
@@ -380,14 +380,14 @@ let tests = [
|
|
expect ~state:`Script_data "f<!--<script>\x00"
|
|
((char_sequence ~no_eof:true "f<!--<script>") @
|
|
[ 1, 14, E (`Bad_token ("U+0000", "script", "null"));
|
|
- 1, 14, S (`Char Uutf.u_rep);
|
|
+ 1, 14, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 15, E (`Unexpected_eoi "script");
|
|
1, 15, S `EOF]);
|
|
|
|
expect ~state:`Script_data "f<!--<script>-\x00-"
|
|
((char_sequence ~no_eof:true "f<!--<script>-") @
|
|
[ 1, 15, E (`Bad_token ("U+0000", "script", "null"));
|
|
- 1, 15, S (`Char Uutf.u_rep);
|
|
+ 1, 15, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 16, S (`Char 0x2D);
|
|
1, 17, E (`Unexpected_eoi "script");
|
|
1, 17, S `EOF]);
|
|
@@ -395,7 +395,7 @@ let tests = [
|
|
expect ~state:`Script_data "f<!--<script>--\x00--"
|
|
((char_sequence ~no_eof:true "f<!--<script>--") @
|
|
[ 1, 16, E (`Bad_token ("U+0000", "script", "null"));
|
|
- 1, 16, S (`Char Uutf.u_rep);
|
|
+ 1, 16, S (`Char (Uchar.to_int Uutf.u_rep));
|
|
1, 17, S (`Char 0x2D);
|
|
1, 18, S (`Char 0x2D);
|
|
1, 19, E (`Unexpected_eoi "script");
|
|
@@ -413,7 +413,7 @@ let tests = [
|
|
expect ~state:`Script_data "f\x00</foo>"
|
|
([ 1, 1, S (`Char 0x66);
|
|
1, 2, E (`Bad_token ("U+0000", "content", "null"));
|
|
- 1, 2, S (`Char Uutf.u_rep)] @
|
|
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
|
|
(char_sequence ~start:3 "</foo>")));
|
|
|
|
("html.tokenizer.plaintext" >:: fun _ ->
|
|
@@ -424,7 +424,7 @@ let tests = [
|
|
expect ~state:`PLAINTEXT "f\x00</foo>"
|
|
([ 1, 1, S (`Char 0x66);
|
|
1, 2, E (`Bad_token ("U+0000", "content", "null"));
|
|
- 1, 2, S (`Char Uutf.u_rep)] @
|
|
+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
|
|
(char_sequence ~start:3 "</foo>")));
|
|
|
|
("html.tokenizer.comment" >:: fun _ ->
|
|
Index: markup.ml-0.7.2/test/test_input.ml
|
|
===================================================================
|
|
--- markup.ml-0.7.2.orig/test/test_input.ml
|
|
+++ markup.ml-0.7.2/test/test_input.ml
|
|
@@ -71,7 +71,7 @@ let tests = [
|
|
end);
|
|
|
|
("input.bom" >:: fun _ ->
|
|
- [0xFEFF; 0x66]
|
|
+ [Uchar.of_int 0xFEFF; Uchar.of_int 0x66]
|
|
|> of_list
|
|
|> preprocess is_valid_xml_char Error.ignore_errors
|
|
|> fst
|