Library : nimdataframe.nim

Status : development

License : MIT opensource

Version : 0.0.5

ProjectStart: 2016-09-16

Latest : 2019-07-25

Compiler : Nim >= 0.19.x devel branch

OS : Linux

Description :

simple dataframe

create a dataframe for display or processing

from online or local csv files

able to create subdataframes from dataframes and sorting on columns and column statistics

Usage : import nimdataframe

Project : https://github.com/qqtop/NimDataFrame

Docs : http://qqtop.github.io/nimdataframeindex.html

http://qqtop.github.io/nimdataframe.html

Tested : OpenSuse Tumbleweed , Debian

Todo : additional calculations on dataframes

allow right or left align for each column fullRotate df improve tests and example dataframe names instead of col number use col names .. trying to better handle json data see new json lib by araq future filterDf(df:nimdf,cols:nimis,operator:nimss,vals:nimss) var ndf11 = filterDf(ndf9,@[3,5],@[">","=="],@["Borussia Dortmund","4"] strings with accents may mess up the frame alignment --> needs to be taken care off in showdf maybe toRunes directly use more datasources other than csv , eg: select query outputs etc.

Notes :
Install : nimble install https://github.com/qqtop/nimdataframe.git

Types

dfcellobject {...}{.inheritable.} = object cellrow*: int cellcol*: int cellcolor*: string
nimss = seq[string]
nimis = seq[int]
nimfs = seq[float]
nimbs = seq[bool]
nimcells = seq[dfcellobject]
nimdf {...}{.inheritable.} = ref object df*: seq[nimss] hasHeader*: bool colcount*: int rowcount*: int colcolors*: nimss colwidths*: nimis colHeaders*: nimss rowHeaders*: nimss dfcells*: nimcells status*: bool frtexttop*: nimss frtextbot*: nimss
Rowrange = nimis

Lets

NIMDATAFRAMEVERSION = "0.0.5"

Consts

asc = "asc"
desc = "desc"

Procs

proc newdfcellobject(): dfcellobject {...}{.raises: [], tags: [].}

proc getRowrange(nrows: int; nrowe: int): Rowrange {...}{.raises: [], tags: [].}

proc newNimDf(): nimdf {...}{.raises: [], tags: [].}

proc newNimSs(): nimss {...}{.raises: [], tags: [].}

proc newNimIs(): nimis {...}{.raises: [], tags: [].}

proc newNimFs(): nimfs {...}{.raises: [], tags: [].}

proc newNimBs(): nimbs {...}{.raises: [], tags: [].}

proc getColorConst[T](sc: T): string

getColorConst

this functions returns the colorname constant color escape sequence based on a colorname ready to be used in print routines , it is the reverse of the getColorName function. usefull if we have colorname strings read in from a file or a sequence

import nimcx
var astringseq = split("lightgrey,pastelgreen,pastelpink,lightblue,goldenrod,truetomato,truetomato,white",sep=',')
for acolor in astringseq:
    printLn("good color " & acolor , getColorConst(acolor))

proc getData1(url: string; timeout: int = 12000): string {...}{.raises: [ValueError, HttpRequestError, SslError, OverflowError, IOError, TimeoutError, ProtocolError, KeyError, Defect, Exception, OSError], tags: [ReadIOEffect, WriteIOEffect, TimeEffect, ReadEnvEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

getData1

used for internet based data in csv format

proc makeDf1(ufo1: string; hasHeader: bool = false): nimdf {...}{.raises: [], tags: [].}

makeDf

used to create a dataframe with data string received from getData1

proc getData2(filename: string; cols: int = 2; rows: int = -1; sep: char = ','): auto {...}{.raises: [ IOError, ValueError, Exception, OSError, Defect, CsvError, Defect, IOError, OSError], tags: [ TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

getData2

used for csv files with a path and filename available

proc makeDf2(ufo1: nimdf; cols: int = 0; rows: int = -1; hasHeader: bool = false; feedback: bool = false): nimdf {...}{.raises: [IOError, ValueError, Exception, OSError, Defect, IOError, ValueError, IOError, ValueError], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

makeDf2

used to create a dataframe with nimdf object received from getData2 that is local csv if we actually pass in a df and not use getdata2 as asource the df will be rotated , that is header line will become col1 which also may come handy note that overall it is better to preprocess data to check for row quality consistency which is not done here yet , so errors may show

proc rotateDf(ufo1: nimdf; cols: int = 0; hasHeader: bool = false; feedback: bool = false): nimdf {...}{. raises: [IOError, ValueError, Exception, OSError, Defect], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

proc getTotalHeaderColsWitdh(df: nimdf): int {...}{.raises: [], tags: [].}

getTotalHeaderColsWitdh

sum of all headers width

proc showRaw[T](df: nimdf; rrows: openArray[T])

showRaw

needs a df object and a seq with two values the first being the startrow the second being the end row to show, if you need to return certain rows see getRowDataRange()

proc showFirstLast(df: nimdf; nrows: int = df.rowcount) {...}{. raises: [IOError, ValueError, Exception], tags: [WriteIOEffect, ReadEnvEffect].}

shows first and last n lines of df incl. headers if any of dataframe

proc showAnyRowRange(df: nimdf; rrows: seq[int]) {...}{. raises: [IOError, ValueError, Exception], tags: [WriteIOEffect, ReadEnvEffect].}

showAnyRowRange

shows first and last n lines of df incl. headers if any of dataframe

proc showHeaderStatus(df: nimdf; xpos: int = 2) {...}{. raises: [IOError, ValueError, Exception], tags: [WriteIOEffect, ReadEnvEffect].}

showHeaderStatus

proc showCounts(df: nimdf; xpos: int = 2) {...}{.raises: [IOError, ValueError, Exception, OSError, Defect], tags: [WriteIOEffect, ReadEnvEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

proc colFitMax(df: nimdf; cols: int = 0; adjustwd: int = 0): nimis {...}{.raises: [ValueError], tags: [ReadEnvEffect].}

colFitMax

TODO : provide better fit tw as basis is to wide for df with few cols

calculates best column width to fit into terminal width

all column widths will be same size

cols parameter must state number of cols to be shown default = all cols

if the cols parameter in showDf is different an error will be thrown

adjustwd allows to nudge the column width if a few column chars are not shown

which may happen if no frame is shown

proc showDf(df: nimdf; rows: int = 10; cols: nimis = @[]; colwd: nimis = @[]; colcolors: nimss = @[white, white]; showframe: bool = false; framecolor: string = palegreen; showHeader: bool = false; showRowHeader: bool = false; rowHeadertext: nimss = @[]; headertext: nimss = @[]; leftalignflag: bool = false; cellcolors: nimss = @[]; cellrows: nimis = @[]; cellcols: nimis = @[]; cellcalc: nimss = @[]; frtexttop: nimss = @[]; frtextbot: nimss = @[]; xpos: int = 1) {...}{.raises: [IOError, ValueError, IOError, ValueError, Exception, OSError, Defect], tags: [WriteIOEffect, ReadEnvEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

showDf

Displays a dataframe

allows selective display of columns , with column numbers passed in as a seq

Convention : the first column = 1

number of rows default = 10 number of columns default = all if none given columnwidth default = 8 if none given

an equal columnwidth can be achieved with colwd = colfitmax(df,0) the second param is to nudge the width a bit if required

showFrame default = off

showHeader indicates if an actual header is available

frame character can be shown in selectable color

headerless data can be show with headertext supplied

cols,colwd,colcolors parameters seqs must be of equal length and corresponding to each other

Note : best to fill in desired values for all parameters , a quick showDf(mydf) will not always be satisfactory , colwd must be supplied

proc showDataframeInfo(df: nimdf; nrows: int = df.rowcount) {...}{. raises: [ValueError, IOError, Exception, OSError, Defect], tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

showDataframeInfo

some basic information of the dataframe mainly usefull during debugging.

proc showDfInfo(df: nimdf; nrows: int = df.rowcount) {...}{. raises: [ValueError, IOError, Exception, OSError, Defect], tags: [ReadEnvEffect, WriteIOEffect, ReadIOEffect, TimeEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

proc getColData(df: nimdf; col: int): nimss {...}{.raises: [IOError, ValueError], tags: [ WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}

getColData

get one column from a nimdf dataframe

Note : col = 1 denotes first col of df , which is consistent with showDf

proc getRowDataRange(df: nimdf; rows: nimis = @[]; cols: nimis = @[]; rowheaders: nimss = @[]): nimdf {...}{.raises: [], tags: [].}

getRowDataRange

creates a new df with rows and cols as stipulated extracted from an exisiting df

if rows or cols not stipulated all rows will be brought in

Following example uses rows 1,2,4,6 and cols 1,2,3 from df ndf5 to create a new df

var ndf6 = getRowDataRange(ndf5,rows = @[1,2,4,6],cols = @[1,2,3])

proc sortdf(df: nimdf; sortcol: int = 1; sortorder = asc): nimdf {...}{. raises: [DbError, IOError, ValueError, Defect, OSError, Exception], tags: [DbEffect, ReadDbEffect, WriteDbEffect, WriteIOEffect, ReadEnvEffect, RootEffect, WriteDirEffect].}

sortdf

sorts a dataframe asc or desc

supported sort types are integer ,float or string columns

other types maybe added later

the idea implemented here is to read the df into a temp sqllite table sort it and return the sorted output as nimdf

var ndf2 = sortdf(ndf,5,"asc")  $ sort a dataframe on the fifth col ascending

Note : data columns passed in must be correct for all rows , that is rows with different column count will result in errors: this will be addressed in future versions

proc filterDf(df: nimdf; cols: nimis; operator: nimss; vals: nimss) {...}{.raises: [], tags: [].}

filterDf

TODO

show rows passing a condition

proc makeNimDf(dfcols: seq[nimss]; status: bool = true; hasHeader: bool = false; feedback: bool = false): nimdf {...}{. raises: [IOError, ValueError, Exception, OSError, Defect], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect, ReadIOEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

makeNimDf

creates a nimdf with passed in col data which should be of type nimss

proc dfDefaultSetup(df: nimdf; headertext: nimss = @[]): nimdf {...}{.raises: [], tags: [].}

dfDefaultSetup WIP , needs more testing

quick default setup , which can be adjusted later during showDf if needed

column colors : white column widths : 10 header text : pass in or auto column name will be generated

proc createDataFrame(filename: string; cols: int = 2; rows: int = -1; sep: char = ','; hasHeader: bool = false; feedback: bool = false): nimdf {...}{.raises: [ ValueError, HttpRequestError, SslError, OverflowError, IOError, TimeoutError, ProtocolError, KeyError, Defect, Exception, OSError, CsvError], tags: [ ReadIOEffect, WriteIOEffect, TimeEffect, ReadEnvEffect, ExecIOEffect, RootEffect, WriteDirEffect].}

createDataFrame

attempts to create a nimdf dataframe from url or local path

prefered are comma delimited csv or txt files

other should be clean , preprocess as needed

hasHeader refers to actual data having a header (true) or no header (false) if data has no header but a header will be added in showdf set hasHeader to true so showdfinfo will calculate the correct row count otherwise there may be an off by 1 error

proc createBinaryTestData(filename: string = "nimDfBinaryTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, Exception, ValueError], tags: [WriteIOEffect, ReadEnvEffect].}

proc createRandomTestData(filename: string = "nimDfTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, ValueError, Exception], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect].}

createRandomTestData

a file will be created in current working directory with mixed type cols

default name nimDfTestData.csv or as given

default columns 8 default rows 2000 default headers none

proc createRandomTestDataInt(filename: string = "nimDfTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, Exception, ValueError], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect].}

createRandomTestDataInt

a file will be created in current working directory with 8 int cols

default name nimDfTestData.csv or as given

default columns 8 default rows 2000 default headers none

proc createRandomTestDataFloat(filename: string = "nimDfTestData.csv"; datarows: int = 2000; withHeaders: bool = false) {...}{. raises: [Defect, IOError, OSError, Exception, ValueError], tags: [TimeEffect, WriteIOEffect, ReadEnvEffect].}

createRandomTestDataFloat

a file will be created in current working directory with 8 float cols

default name nimDfTestData.csv or as given

default columns 8 default rows 2000 default headers none

proc dfRowStats(df: nimdf; row: int; exceptCols: seq[int] = @[]): RunningStat {...}{. raises: [], tags: [].}

proc dfColumnStats(df: nimdf; colseq: seq[int]): seq[RunningStat] {...}{. raises: [IOError, ValueError], tags: [WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}

dfColumnStats

returns a seq[Runningstat] for all columns specified in colseq for dataframe df

so if colSeq = @[1,3,6] , we would get stats for cols 1,3,6

see nimdfT11.nim for an example

proc dfShowColumnStats(df: nimdf; desiredcols: seq[int]; colspace: int = 25; xpos: int = 1) {...}{.raises: [IOError, ValueError], tags: [ WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}

dfShowColumnStats

shows output from dfColumnStats

TODO: check for headers in first line to avoid crashes: assert that column data is Somenumber type or have an automatic selector for anything numeric

xpos the starting display position colspace allows to nudge the distance between the displayed column statistics

proc sumStats(df: nimdf; numericCols: nimis): RunningStat {...}{. raises: [IOError, ValueError], tags: [WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}

proc dfShowSumStats(df: nimdf; numericCols: nimis; xpos = 2) {...}{. raises: [IOError, ValueError], tags: [WriteIOEffect, ReadEnvEffect, WriteDirEffect, ReadIOEffect].}

showSumStats

shows a statistic for all column sums

maybe usefull if a dataframe has many columns where there is a need to know the

total sum of all numeric columns and relevant statistics of the resulting sums row

proc dfLoad(filename: string): nimdf {...}{.raises: [IOError, ValueError, Exception, OSError], tags: [ReadIOEffect, WriteIOEffect].}

dfLoad

dfLoad creates a new df from a file created with dfSave

proc dfSave(df: nimdf; filename: string; quiet: bool = false) {...}{.raises: [Defect, IOError, OSError, Defect, IOError, OSError, Exception, ValueError], tags: [WriteIOEffect, ReadEnvEffect].}

dfSave

save a dataframe data to a csv file

quiet = true will show no feedback

Note if data is not clean crashes may occure if compiled with -d:release

Converters

converter toNimSs(aseq: seq[string]): nimss {...}{.raises: [], tags: [].}
converter toNimIs(aseq: seq[int]): nimis {...}{.raises: [], tags: [].}
converter toNimFs(aseq: seq[float]): nimfs {...}{.raises: [], tags: [].}
converter toNimBs(aseq: seq[bool]): nimbs {...}{.raises: [], tags: [].}
converter fsToNimSs(aseq: seq[float]): nimss {...}{.raises: [], tags: [].}
converter isToNimSs(aseq: seq[int]): nimss {...}{.raises: [], tags: [].}