Library : nimdataframe.nim
Status : development
License : MIT opensource
Version : 0.0.5
ProjectStart: 2016-09-16
Latest : 2019-07-25
Compiler : Nim >= 0.19.x devel branch
OS : Linux
Description :
simple dataframe
create a dataframe for display or processing
from online or local csv files
able to create subdataframes from dataframes and sorting on columns and column statistics
Usage : import nimdataframe
Project : https://github.com/qqtop/NimDataFrame
Docs : http://qqtop.github.io/nimdataframeindex.html
Tested : OpenSuse Tumbleweed , Debian
- Todo : additional calculations on dataframes
- allow right or left align for each column fullRotate df improve tests and example dataframe names instead of col number use col names .. trying to better handle json data see new json lib by araq future filterDf(df:nimdf,cols:nimis,operator:nimss,vals:nimss) var ndf11 = filterDf(ndf9,@[3,5],@[">","=="],@["Borussia Dortmund","4"] strings with accents may mess up the frame alignment --> needs to be taken care off in showdf maybe toRunes directly use more datasources other than csv , eg: select query outputs etc.
Notes :
Install : nimble install https://github.com/qqtop/nimdataframe.git
Types
dfcellobject {...}{.inheritable.} = object cellrow*: int cellcol*: int cellcolor*: string
nimss = seq[string]
nimis = seq[int]
nimfs = seq[float]
nimbs = seq[bool]
nimcells = seq[dfcellobject]
nimdf {...}{.inheritable.} = ref object df*: seq[nimss] hasHeader*: bool colcount*: int rowcount*: int colcolors*: nimss colwidths*: nimis colHeaders*: nimss rowHeaders*: nimss dfcells*: nimcells status*: bool frtexttop*: nimss frtextbot*: nimss
Rowrange = nimis
Lets
NIMDATAFRAMEVERSION = "0.0.5"
Procs
proc newdfcellobject(): dfcellobject {...}{.raises: [], tags: [].}
proc getRowrange(nrows: int; nrowe: int): Rowrange {...}{.raises: [], tags: [].}
proc newNimDf(): nimdf {...}{.raises: [], tags: [].}
proc newNimSs(): nimss {...}{.raises: [], tags: [].}
proc newNimIs(): nimis {...}{.raises: [], tags: [].}
proc newNimFs(): nimfs {...}{.raises: [], tags: [].}
proc newNimBs(): nimbs {...}{.raises: [], tags: [].}
proc getColorConst[T](sc: T): string
-
getColorConst
this functions returns the colorname constant color escape sequence based on a colorname ready to be used in print routines , it is the reverse of the getColorName function. usefull if we have colorname strings read in from a file or a sequence
import nimcx var astringseq = split("lightgrey,pastelgreen,pastelpink,lightblue,goldenrod,truetomato,truetomato,white",sep=',') for acolor in astringseq: printLn("good color " & acolor , getColorConst(acolor))
proc getData1(url: string; timeout: int = 12000): string {...}{.raises: [ValueError, HttpRequestError, SslError, OverflowError, IOError, TimeoutError, ProtocolError, KeyError, Defect, Exception, OSError], tags: [ReadIOEffect, WriteIOEffect, TimeEffect, ReadEnvEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
getData1
used for internet based data in csv format
proc makeDf1(ufo1: string; hasHeader: bool = false): nimdf {...}{.raises: [], tags: [].}
-
makeDf
used to create a dataframe with data string received from getData1
proc getData2(filename: string; cols: int = 2; rows: int = -1; sep: char = ','): auto {...}{.raises: [ IOError, ValueError, Exception, OSError, Defect, CsvError, Defect, IOError, OSError], tags: [ TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
getData2
used for csv files with a path and filename available
proc makeDf2(ufo1: nimdf; cols: int = 0; rows: int = -1; hasHeader: bool = false; feedback: bool = false): nimdf {...}{.raises: [IOError, ValueError, Exception, OSError, Defect, IOError, ValueError, IOError, ValueError], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
makeDf2
used to create a dataframe with nimdf object received from getData2 that is local csv if we actually pass in a df and not use getdata2 as asource the df will be rotated , that is header line will become col1 which also may come handy note that overall it is better to preprocess data to check for row quality consistency which is not done here yet , so errors may show
proc rotateDf(ufo1: nimdf; cols: int = 0; hasHeader: bool = false; feedback: bool = false): nimdf {...}{. raises: [IOError, ValueError, Exception, OSError, Defect], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
proc getTotalHeaderColsWitdh(df: nimdf): int {...}{.raises: [], tags: [].}
-
getTotalHeaderColsWitdh
sum of all headers width
proc showRaw[T](df: nimdf; rrows: openArray[T])
-
showRaw
needs a df object and a seq with two values the first being the startrow the second being the end row to show, if you need to return certain rows see getRowDataRange()
proc showFirstLast(df: nimdf; nrows: int = df.rowcount) {...}{. raises: [IOError, ValueError, Exception], tags: [WriteIOEffect, ReadEnvEffect].}
- shows first and last n lines of df incl. headers if any of dataframe
proc showAnyRowRange(df: nimdf; rrows: seq[int]) {...}{. raises: [IOError, ValueError, Exception], tags: [WriteIOEffect, ReadEnvEffect].}
-
showAnyRowRange
shows first and last n lines of df incl. headers if any of dataframe
proc showHeaderStatus(df: nimdf; xpos: int = 2) {...}{. raises: [IOError, ValueError, Exception], tags: [WriteIOEffect, ReadEnvEffect].}
- showHeaderStatus
proc showCounts(df: nimdf; xpos: int = 2) {...}{.raises: [IOError, ValueError, Exception, OSError, Defect], tags: [WriteIOEffect, ReadEnvEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
proc colFitMax(df: nimdf; cols: int = 0; adjustwd: int = 0): nimis {...}{.raises: [ValueError], tags: [ReadEnvEffect].}
-
colFitMax
TODO : provide better fit tw as basis is to wide for df with few cols
calculates best column width to fit into terminal width
all column widths will be same size
cols parameter must state number of cols to be shown default = all cols
if the cols parameter in showDf is different an error will be thrown
adjustwd allows to nudge the column width if a few column chars are not shown
which may happen if no frame is shown
proc showDf(df: nimdf; rows: int = 10; cols: nimis = @[]; colwd: nimis = @[]; colcolors: nimss = @[white, white]; showframe: bool = false; framecolor: string = palegreen; showHeader: bool = false; showRowHeader: bool = false; rowHeadertext: nimss = @[]; headertext: nimss = @[]; leftalignflag: bool = false; cellcolors: nimss = @[]; cellrows: nimis = @[]; cellcols: nimis = @[]; cellcalc: nimss = @[]; frtexttop: nimss = @[]; frtextbot: nimss = @[]; xpos: int = 1) {...}{.raises: [IOError, ValueError, IOError, ValueError, Exception, OSError, Defect], tags: [WriteIOEffect, ReadEnvEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
showDf
Displays a dataframe
allows selective display of columns , with column numbers passed in as a seq
Convention : the first column = 1
number of rows default = 10 number of columns default = all if none given columnwidth default = 8 if none given
an equal columnwidth can be achieved with colwd = colfitmax(df,0) the second param is to nudge the width a bit if required
showFrame default = off
showHeader indicates if an actual header is available
frame character can be shown in selectable color
headerless data can be show with headertext supplied
cols,colwd,colcolors parameters seqs must be of equal length and corresponding to each other
Note : best to fill in desired values for all parameters , a quick showDf(mydf) will not always be satisfactory , colwd must be supplied
proc showDataframeInfo(df: nimdf; nrows: int = df.rowcount) {...}{. raises: [ValueError, IOError, Exception, OSError, Defect], tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
showDataframeInfo
some basic information of the dataframe mainly usefull during debugging.
proc showDfInfo(df: nimdf; nrows: int = df.rowcount) {...}{. raises: [ValueError, IOError, Exception, OSError, Defect], tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
proc getColData(df: nimdf; col: int): nimss {...}{.raises: [IOError, ValueError], tags: [ WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}
-
getColData
get one column from a nimdf dataframe
Note : col = 1 denotes first col of df , which is consistent with showDf
proc getRowDataRange(df: nimdf; rows: nimis = @[]; cols: nimis = @[]; rowheaders: nimss = @[]): nimdf {...}{.raises: [], tags: [].}
-
getRowDataRange
creates a new df with rows and cols as stipulated extracted from an exisiting df
if rows or cols not stipulated all rows will be brought in
Following example uses rows 1,2,4,6 and cols 1,2,3 from df ndf5 to create a new df
var ndf6 = getRowDataRange(ndf5,rows = @[1,2,4,6],cols = @[1,2,3])
proc sortdf(df: nimdf; sortcol: int = 1; sortorder = asc): nimdf {...}{. raises: [DbError, IOError, ValueError, Defect, OSError, Exception], tags: [DbEffect, ReadDbEffect, WriteDbEffect, WriteIOEffect, ReadEnvEffect, RootEffect, WriteDirEffect].}
-
sortdf
sorts a dataframe asc or desc
supported sort types are integer ,float or string columns
other types maybe added later
the idea implemented here is to read the df into a temp sqllite table sort it and return the sorted output as nimdf
var ndf2 = sortdf(ndf,5,"asc") $ sort a dataframe on the fifth col ascending
- Note : data columns passed in must be correct for all rows , that is rows with different column count will result in errors
- this will be addressed in future versions
proc filterDf(df: nimdf; cols: nimis; operator: nimss; vals: nimss) {...}{.raises: [], tags: [].}
-
filterDf
TODO
show rows passing a condition
proc makeNimDf(dfcols: seq[nimss]; status: bool = true; hasHeader: bool = false; feedback: bool = false): nimdf {...}{. raises: [IOError, ValueError, Exception, OSError, Defect], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
makeNimDf
creates a nimdf with passed in col data which should be of type nimss
proc dfDefaultSetup(df: nimdf; headertext: nimss = @[]): nimdf {...}{.raises: [], tags: [].}
-
dfDefaultSetup WIP , needs more testing
quick default setup , which can be adjusted later during showDf if needed
column colors : white column widths : 10 header text : pass in or auto column name will be generated
proc createDataFrame(filename: string; cols: int = 2; rows: int = -1; sep: char = ','; hasHeader: bool = false; feedback: bool = false): nimdf {...}{.raises: [ ValueError, HttpRequestError, SslError, OverflowError, IOError, TimeoutError, ProtocolError, KeyError, Defect, Exception, OSError, CsvError], tags: [ ReadIOEffect, WriteIOEffect, TimeEffect, ReadEnvEffect, ExecIOEffect, RootEffect, WriteDirEffect].}
-
createDataFrame
attempts to create a nimdf dataframe from url or local path
prefered are comma delimited csv or txt files
other should be clean , preprocess as needed
hasHeader refers to actual data having a header (true) or no header (false) if data has no header but a header will be added in showdf set hasHeader to true so showdfinfo will calculate the correct row count otherwise there may be an off by 1 error
proc createBinaryTestData(filename: string = "nimDfBinaryTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, Exception, ValueError], tags: [WriteIOEffect, ReadEnvEffect].}
proc createRandomTestData(filename: string = "nimDfTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, ValueError, Exception], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect].}
-
createRandomTestData
a file will be created in current working directory with mixed type cols
default name nimDfTestData.csv or as given
default columns 8 default rows 2000 default headers none
proc createRandomTestDataInt(filename: string = "nimDfTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, Exception, ValueError], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect].}
-
createRandomTestDataInt
a file will be created in current working directory with 8 int cols
default name nimDfTestData.csv or as given
default columns 8 default rows 2000 default headers none
proc createRandomTestDataFloat(filename: string = "nimDfTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, Exception, ValueError], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect].}
-
createRandomTestDataFloat
a file will be created in current working directory with 8 float cols
default name nimDfTestData.csv or as given
default columns 8 default rows 2000 default headers none
proc dfRowStats(df: nimdf; row: int; exceptCols: seq[int] = @[]): RunningStat {...}{. raises: [], tags: [].}
proc dfColumnStats(df: nimdf; colseq: seq[int]): seq[RunningStat] {...}{. raises: [IOError, ValueError], tags: [WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}
-
dfColumnStats
returns a seq[Runningstat] for all columns specified in colseq for dataframe df
so if colSeq = @[1,3,6] , we would get stats for cols 1,3,6
see nimdfT11.nim for an example
proc dfShowColumnStats(df: nimdf; desiredcols: seq[int]; colspace: int = 25; xpos: int = 1) {...}{.raises: [IOError, ValueError], tags: [ WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}
-
dfShowColumnStats
shows output from dfColumnStats
- TODO: check for headers in first line to avoid crashes
- assert that column data is Somenumber type or have an automatic selector for anything numeric
xpos the starting display position colspace allows to nudge the distance between the displayed column statistics
proc sumStats(df: nimdf; numericCols: nimis): RunningStat {...}{. raises: [IOError, ValueError], tags: [WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}
proc dfShowSumStats(df: nimdf; numericCols: nimis; xpos = 2) {...}{. raises: [IOError, ValueError], tags: [WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}
-
showSumStats
shows a statistic for all column sums
maybe usefull if a dataframe has many columns where there is a need to know the
total sum of all numeric columns and relevant statistics of the resulting sums row
proc dfLoad(filename: string): nimdf {...}{.raises: [IOError, ValueError, Exception, OSError], tags: [ReadIOEffect, WriteIOEffect].}
-
dfLoad
dfLoad creates a new df from a file created with dfSave
proc dfSave(df: nimdf; filename: string; quiet: bool = false) {...}{.raises: [Defect, IOError, OSError, Defect, IOError, OSError, Exception, ValueError], tags: [WriteIOEffect, ReadEnvEffect].}
-
dfSave
save a dataframe data to a csv file
quiet = true will show no feedback
Note if data is not clean crashes may occure if compiled with -d:release
Converters
converter toNimSs(aseq: seq[string]): nimss {...}{.raises: [], tags: [].}
converter toNimIs(aseq: seq[int]): nimis {...}{.raises: [], tags: [].}
converter toNimFs(aseq: seq[float]): nimfs {...}{.raises: [], tags: [].}
converter toNimBs(aseq: seq[bool]): nimbs {...}{.raises: [], tags: [].}
converter fsToNimSs(aseq: seq[float]): nimss {...}{.raises: [], tags: [].}
converter isToNimSs(aseq: seq[int]): nimss {...}{.raises: [], tags: [].}