r - Importing long character field with numbers with read.table() -
i try import large data set column representing document number. field contains number leading zeros of 25 digits. tried import data using read.table(), got specific field "1e+19", when assigning "character" class during import.
# import elyte colnames<-c("patnr","name","birthday","sex","casenr","bew","art","docnr","date","time","none","na","k","cl","ca","corca") classes <- rep("character",length(colnames)) elyte <- read.table(file="elyte.txt",skip=3,comment.char="",sep="|",col.names=colnames, header=false, colclasses=classes)
the original data looks this: 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011524|20000127|084800||140|3.7|100|2.1| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011541|20000127|080200|||||| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011562|20000127|101800||140|4.6|101|2.2| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011579|20000127|134500||138|4.0||2.2| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011591|20000128|084200||138|3.6|98|2.1| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011593|20000128|085900|||||| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011653|20000129|093400||140|4.2|99|2.2| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011717|20000129|094100||||||
what following:
patnr name birthday sex casenr bew art docnr date time none na k cl ca corca
1 0010000005 weber 19091220 1 0000337340 00000 lab 1e+19 20000127 084800 140 3.7 100 2.1
2 0010000005 weber 19091220 1 0000337340 00000 lab 1e+19 20000127 080200
3 0010000005 weber 19091220 1 0000337340 00000 lab 1e+19 20000127 101800 140 4.6 101 2.2
4 0010000005 weber 19091220 1 0000337340 00000 lab 1e+19 20000127 134500 138 4.0 2.2
5 0010000005 weber 19091220 1 0000337340 00000 lab 1e+19 20000128 084200 138 3.6 98 2.1
6 0010000005 weber 19091220 1 0000337340 00000 lab 1e+19 20000128 085900
how can prevent transformation of "docnr" "1e+19"?
... example setting column type character
, did:
txt <- "0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011524|20000127|084800||140|3.7|100|2.1| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011541|20000127|080200|||||| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011562|20000127|101800||140|4.6|101|2.2| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011579|20000127|134500||138|4.0||2.2| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011591|20000128|084200||138|3.6|98|2.1| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011593|20000128|085900|||||| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011653|20000129|093400||140|4.2|99|2.2| 0010000005|weber|19091220|1|0000337340|00000|lab|0000010000000000000011717|20000129|094100||||||" txt <- gsub(" ", "\n", txt) colnames<-c("patnr","name","birthday","sex","casenr","bew","art","docnr","date","time","none","na","k","cl","ca","corca") classes <- rep("character",length(colnames)) elyte <- read.table(text = txt, skip=3,comment.char="", sep="|", col.names=colnames, header=false, colclasses=classes) elyte # patnr name birthday sex casenr bew art docnr date time none na k cl ca corca # 1 0010000005 weber 19091220 1 0000337340 00000 lab 0000010000000000000011579 20000127 134500 138 4.0 2.2 # 2 0010000005 weber 19091220 1 0000337340 00000 lab 0000010000000000000011591 20000128 084200 138 3.6 98 2.1 # 3 0010000005 weber 19091220 1 0000337340 00000 lab 0000010000000000000011593 20000128 085900 # 4 0010000005 weber 19091220 1 0000337340 00000 lab 0000010000000000000011653 20000129 093400 140 4.2 99 2.2 # 5 0010000005 weber 19091220 1 0000337340 00000 lab 0000010000000000000011717 20000129 094100
Comments
Post a Comment