Skip to content

Commit 1cd5063

Browse files
committed
Bug in function check_char corrected: instead of only accepting \r\n as a new
line, it accepted any amount of \r followed by \n. Float scanning code revisited, commenting the code and avoiding side effect in function application. Type file_name now used to define in_channel_name, hence its definition goes before in_channel_name. open/close_in --> Pervasives.open/close_in when necessary. Indentation revisited. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@16421 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
1 parent 24b8bf5 commit 1cd5063

File tree

1 file changed

+75
-47
lines changed

1 file changed

+75
-47
lines changed

stdlib/scanf.ml

+75-47
Original file line numberDiff line numberDiff line change
@@ -38,40 +38,40 @@ module type SCANNING = sig
3838

3939
val stdin : in_channel;;
4040
(* The scanning buffer reading from [Pervasives.stdin].
41-
[stdib] is equivalent to [Scanning.from_channel Pervasives.stdin]. *)
41+
[stdib] is equivalent to [Scanning.from_channel Pervasives.stdin]. *)
4242

4343
val stdib : in_channel;;
4444
(* An alias for [Scanf.stdin], the scanning buffer reading from
4545
[Pervasives.stdin]. *)
4646

4747
val next_char : scanbuf -> char;;
4848
(* [Scanning.next_char ib] advance the scanning buffer for
49-
one character.
50-
If no more character can be read, sets a end of file condition and
51-
returns '\000'. *)
49+
one character.
50+
If no more character can be read, sets a end of file condition and
51+
returns '\000'. *)
5252

5353
val invalidate_current_char : scanbuf -> unit;;
5454
(* [Scanning.invalidate_current_char ib] mark the current_char as already
55-
scanned. *)
55+
scanned. *)
5656

5757
val peek_char : scanbuf -> char;;
5858
(* [Scanning.peek_char ib] returns the current char available in
59-
the buffer or reads one if necessary (when the current character is
60-
already scanned).
61-
If no character can be read, sets an end of file condition and
62-
returns '\000'. *)
59+
the buffer or reads one if necessary (when the current character is
60+
already scanned).
61+
If no character can be read, sets an end of file condition and
62+
returns '\000'. *)
6363

6464
val checked_peek_char : scanbuf -> char;;
65-
(* Same as above but always returns a valid char or fails:
66-
instead of returning a null char when the reading method of the
67-
input buffer has reached an end of file, the function raises exception
68-
[End_of_file]. *)
65+
(* Same as [Scanning.peek_char] above but always returns a valid char or
66+
fails: instead of returning a null char when the reading method of the
67+
input buffer has reached an end of file, the function raises exception
68+
[End_of_file]. *)
6969

7070
val store_char : int -> scanbuf -> char -> int;;
7171
(* [Scanning.store_char lim ib c] adds [c] to the token buffer
72-
of the scanning buffer. It also advances the scanning buffer for one
73-
character and returns [lim - 1], indicating the new limit
74-
for the length of the current token. *)
72+
of the scanning buffer [ib]. It also advances the scanning buffer for
73+
one character and returns [lim - 1], indicating the new limit for the
74+
length of the current token. *)
7575

7676
val skip_char : int -> scanbuf -> int;;
7777
(* [Scanning.skip_char lim ib] ignores the current character. *)
@@ -82,41 +82,41 @@ module type SCANNING = sig
8282

8383
val token : scanbuf -> string;;
8484
(* [Scanning.token ib] returns the string stored into the token
85-
buffer of the scanning buffer: it returns the token matched by the
86-
format. *)
85+
buffer of the scanning buffer: it returns the token matched by the
86+
format. *)
8787

8888
val reset_token : scanbuf -> unit;;
8989
(* [Scanning.reset_token ib] resets the token buffer of
90-
the given scanning buffer. *)
90+
the given scanning buffer. *)
9191

9292
val char_count : scanbuf -> int;;
9393
(* [Scanning.char_count ib] returns the number of characters
94-
read so far from the given buffer. *)
94+
read so far from the given buffer. *)
9595

9696
val line_count : scanbuf -> int;;
9797
(* [Scanning.line_count ib] returns the number of new line
98-
characters read so far from the given buffer. *)
98+
characters read so far from the given buffer. *)
9999

100100
val token_count : scanbuf -> int;;
101101
(* [Scanning.token_count ib] returns the number of tokens read
102-
so far from [ib]. *)
102+
so far from [ib]. *)
103103

104104
val eof : scanbuf -> bool;;
105105
(* [Scanning.eof ib] returns the end of input condition
106-
of the given buffer. *)
106+
of the given buffer. *)
107107

108108
val end_of_input : scanbuf -> bool;;
109109
(* [Scanning.end_of_input ib] tests the end of input condition
110-
of the given buffer (if no char has ever been read, an attempt to
111-
read one is performed). *)
110+
of the given buffer (if no char has ever been read, an attempt to
111+
read one is performed). *)
112112

113113
val beginning_of_input : scanbuf -> bool;;
114114
(* [Scanning.beginning_of_input ib] tests the beginning of input
115-
condition of the given buffer. *)
115+
condition of the given buffer. *)
116116

117117
val name_of_input : scanbuf -> string;;
118118
(* [Scanning.name_of_input ib] returns the name of the character
119-
source for input buffer [ib]. *)
119+
source for input buffer [ib]. *)
120120

121121
val open_in : file_name -> in_channel;;
122122
val open_in_bin : file_name -> in_channel;;
@@ -134,8 +134,11 @@ end
134134
module Scanning : SCANNING = struct
135135

136136
(* The run-time library for scanf. *)
137+
138+
type file_name = string;;
139+
137140
type in_channel_name =
138-
| From_file of string * Pervasives.in_channel
141+
| From_file of file_name * Pervasives.in_channel
139142
| From_string
140143
| From_function
141144
| From_channel of Pervasives.in_channel
@@ -156,13 +159,11 @@ module Scanning : SCANNING = struct
156159

157160
type scanbuf = in_channel;;
158161

159-
type file_name = string;;
160-
161162
let null_char = '\000';;
162163

163-
(* Reads a new character from input buffer. Next_char never fails,
164-
even in case of end of input: it then simply sets the end of file
165-
condition. *)
164+
(* Reads a new character from input buffer.
165+
Next_char never fails, even in case of end of input:
166+
it then simply sets the end of file condition. *)
166167
let next_char ib =
167168
try
168169
let c = ib.get_next_char () in
@@ -180,7 +181,8 @@ module Scanning : SCANNING = struct
180181
;;
181182

182183
let peek_char ib =
183-
if ib.current_char_is_valid then ib.current_char else next_char ib;;
184+
if ib.current_char_is_valid then ib.current_char else next_char ib
185+
;;
184186

185187
(* Returns a valid current char for the input buffer. In particular
186188
no irrelevant null character (as set by [next_char] in case of end
@@ -201,6 +203,7 @@ module Scanning : SCANNING = struct
201203
let eof ib = ib.eof;;
202204

203205
let beginning_of_input ib = ib.char_count = 0;;
206+
204207
let name_of_input ib =
205208
match ib.input_name with
206209
| From_file (fname, _ic) -> fname
@@ -212,8 +215,11 @@ module Scanning : SCANNING = struct
212215
let char_count ib =
213216
if ib.current_char_is_valid then ib.char_count - 1 else ib.char_count
214217
;;
218+
215219
let line_count ib = ib.line_count;;
220+
216221
let reset_token ib = Buffer.reset ib.tokbuf;;
222+
217223
let invalidate_current_char ib = ib.current_char_is_valid <- false;;
218224

219225
let token ib =
@@ -324,7 +330,7 @@ module Scanning : SCANNING = struct
324330
let file_buffer_size = ref 1024;;
325331

326332
(* The scanner closes the input channel at end of input. *)
327-
let scan_close_at_end ic = close_in ic; raise End_of_file;;
333+
let scan_close_at_end ic = Pervasives.close_in ic; raise End_of_file;;
328334

329335
(* The scanner does not close the input channel at end of input:
330336
it just raises [End_of_file]. *)
@@ -375,15 +381,15 @@ module Scanning : SCANNING = struct
375381
match fname with
376382
| "-" -> stdin
377383
| fname ->
378-
let ic = open_in fname in
384+
let ic = Pervasives.open_in fname in
379385
from_ic_close_at_end (From_file (fname, ic)) ic
380386
;;
381387

382388
let open_in_bin fname =
383389
match fname with
384390
| "-" -> stdin
385391
| fname ->
386-
let ic = open_in_bin fname in
392+
let ic = Pervasives.open_in_bin fname in
387393
from_ic_close_at_end (From_file (fname, ic)) ic
388394
;;
389395

@@ -431,14 +437,16 @@ let bad_token_length message =
431437
bad_input
432438
(Printf.sprintf
433439
"scanning of %s failed: \
434-
the specified length was too short for token" message)
440+
the specified length was too short for token"
441+
message)
435442
;;
436443

437444
let bad_end_of_input message =
438445
bad_input
439446
(Printf.sprintf
440447
"scanning of %s failed: \
441-
premature end of file occurred before end of token" message)
448+
premature end of file occurred before end of token"
449+
message)
442450
;;
443451

444452
let bad_float () =
@@ -478,13 +486,22 @@ let rec skip_whites ib =
478486
We are also careful to treat "\r\n" in the input as an end of line marker:
479487
it always matches a '\n' specification in the input format string. *)
480488
let rec check_char ib c =
481-
if c = ' ' then skip_whites ib else
482-
let ci = Scanning.checked_peek_char ib in
483-
if ci = c then Scanning.invalidate_current_char ib else
484-
match ci with
485-
| '\r' when c = '\n' ->
486-
Scanning.invalidate_current_char ib; check_char ib '\n'
487-
| _ -> character_mismatch c ci
489+
match c with
490+
| ' ' -> skip_whites ib
491+
| '\n' -> check_newline ib
492+
| c -> check_this_char ib c
493+
494+
and check_this_char ib c =
495+
let ci = Scanning.checked_peek_char ib in
496+
if ci = c then Scanning.invalidate_current_char ib else
497+
character_mismatch c ci
498+
499+
and check_newline ib =
500+
let ci = Scanning.checked_peek_char ib in
501+
match ci with
502+
| '\n' -> Scanning.invalidate_current_char ib
503+
| '\r' -> Scanning.invalidate_current_char ib; check_this_char ib '\n'
504+
| _ -> character_mismatch '\n' ci
488505
;;
489506

490507
(* Extracting tokens from the output token buffer. *)
@@ -682,6 +699,7 @@ let scan_int_conv conv width ib =
682699
;;
683700

684701
(* Scanning floating point numbers. *)
702+
685703
(* Fractional part is optional and can be reduced to 0 digits. *)
686704
let scan_frac_part width ib =
687705
if width = 0 then width else
@@ -768,12 +786,22 @@ let scan_caml_float width precision ib =
768786
match c with
769787
| '.' ->
770788
let width = Scanning.store_char width ib c in
789+
(* The effective width available for scanning the fractional part is
790+
the minimum of declared precision and width left. *)
771791
let precision = min width precision in
772-
let width = width - (precision - scan_frac_part precision ib) in
792+
(* After scanning the fractional part with [precision] provisional width,
793+
[width_precision] is left. *)
794+
let width_precision = scan_frac_part precision ib in
795+
(* Hence, scanning the fractional part took exactly
796+
[precision - width_precision] chars. *)
797+
let frac_width = precision - width_precision in
798+
(* And new provisional width is [width - width_precision. *)
799+
let width = width - frac_width in
773800
scan_exp_part width ib
774801
| 'e' | 'E' ->
775802
scan_exp_part width ib
776803
| _ -> bad_float ()
804+
;;
777805

778806
(* Scan a regular string:
779807
stops when encountering a space, if no scanning indication has been given;

0 commit comments

Comments
 (0)