PyOrigin in Embedded Python > PyOrigin Classes > Worksheet

5.2 Worksheet

1 Import CSV with Auto Settings
2 Import CSV with Selected Rows
3 Import Excel Sheet
4 Import HTML Table from the Web
5 Import MATLAB
6 Import Multiple Files using Pandas into One Worksheet
7 Import Two Excel Sheets
8 Import Using a Filter
9 Import by Setting File Header Info
10 Manipulate Columns and Make Group Plot
11 Split Dataset to Training and Testing Data

This folder contains examples of data import, worksheet manipulation, calculation and graphing with originpro package.
For worksheet related functions in originpro, see worksheet.
For project related functions, see project.

Import CSV with Auto Settings

'''
This sample shows how to use from_file function to import text data to worksheet
'''
import originpro as op

f = op.path('e')+r'Samples\Curve Fitting\Enzyme.dat'

#assume active worksheet
wks = op.find_sheet()

#By default, CSV connector is used
wks.from_file(f)

print(wks.shape)

Import CSV with Selected Rows

'''
This example shows how to do partial import with CSV data connector
'''
import originpro as op

f = op.path('e')+r'Samples\Import and Export\S15-125-03.dat'
wks = op.new_sheet()

#Use CSV connector which is the default
#and connector will be removed after import
dc = op.Connector(wks)

#Specify the rows to import
ss = dc.settings()
partial = ss['partial']
partial[op.attrib_key('Use')]='1'
partial['row']='10:20'

#import from rwo 10 to row 20
dc.imp(f)

Import Excel Sheet

'''
This example shows how to Customize the Excel data connector.
1. Parse headerlines to column labels.
2. Import partial cols and rows.
3. Specify the spreadsheet to import.
'''
import originpro as op

f = op.path('e')+r'Samples\Import and Export\Partial Import.xlsx'
wks = op.new_sheet()

#Create data connector object
dc = op.Connector(wks, dctype='Excel', keep_DC=True)
ss = dc.settings()

#Headerlines to column label
labels = ss['labels']
labels[op.attrib_key('Use')] = '1'
labels['longname'] = 1
labels['unit'] = 2

#Setting for partial import
partial = ss['partial']
partial[op.attrib_key('Use')] = '1'
partial['col'] = '1:3'
partial['row'] = '1:99'

#Import the second spreadsheet 'expt2'
dc.imp(f, sel='expt2')

Import HTML Table from the Web

'''
Despite the name, from_file can actually import from the web
This example shows using the HTML connector to import a specific table 
from a wikipedia web page
'''
import originpro as op
fn = 'https://en.wikipedia.org/wiki/World_population'
wks=op.new_sheet()
wks.from_file(fn, True, 'HTML', 'Tables/_6')

Import MATLAB

'''
This sample shows how to import MATLAB file.
You can try different selection by using GUI
'''

import originpro as op
fn = op.path('e')+r'Samples\Import and Export\GaussianData.mat'
wks=op.new_sheet()

wks.from_file(fn, True, 'MATLAB', 'MATLAB/Data')

Import Multiple Files using Pandas into One Worksheet

"""
This shows import text files using DataFrame instead of Origin's internal import
"""
import originpro as op
import pandas as pd
import os
fd = op.path('e') + r'Samples\Batch2'
wks = op.new_sheet()
col = 0
for file in os.listdir(fd):
    df = pd.read_table(os.path.join(fd, file), header=[0,1,2])    
    wks.from_df(df, col)
    wks.cols_axis('xy', col)
    wks.set_labels(df.columns.get_level_values(0), 'L', col)
    wks.set_labels(df.columns.get_level_values(1), 'U', col)
    #the sample data file has no comment for X column, so we need to remove the 'Unnamed: 0_level_2'
    comments = df.columns.get_level_values(2).tolist()
    comments[0] = ""
    wks.set_labels(comments, 'C', col)
    col = wks.cols

Import Two Excel Sheets

'''
This example shows how to import a second sheet from an Exel file with multiple sheets.
'''
import originpro as op

f = op.path('e')+r'Samples\Import and Export\United States Energy (1980-2013).xls'
wks = op.new_sheet()

#Create data connector object
dc = op.Connector(wks, dctype='Excel', keep_DC=False)

ss = dc.settings()
#1st two rows are header, followed by column name and units
ss['mainheader']=2;
labels = ss['labels']
#labels branch use an attrtibute to turn on, so from a dict, 
#special node is needed to set internal tree attribute
labels[op.attrib_key('Use')] = '1'
labels['longname'] = 1
labels['unit'] = 2

#Import the first sheet, which is "oil"
dc.imp(f, sel='Oil')

#Import a new Excel sheet as a new Origin sheet
dc.new_sheet('Natural Gas')

Import Using a Filter

'''
This example shows how to use a filter created from Import Wizard
'''
import originpro as op

fn = op.path('e')+r'Samples\Import and Export\S15-125-03.dat'
wks=op.new_sheet()
#use a filter in the same folder as the data file
wks.from_file(fn, False, 'Import Filter','.\VarsFromFileNameAndHeader.oif')

Import by Setting File Header Info

'''
This example shows how to control file header. Typically the CSV connector can automatically
detect main header and column header info, but sometimes the automatic detection does not work
for certain file so you need to specify it yourself.
'''
import originpro as op

wks = op.new_sheet()
dc = op.Connector(wks)
dc.source = op.path('e')+r'Samples\Import and Export\F1.dat'
ss = dc.settings()
ss['mainheader']=5
ss['heading']=1
ss['unit']=2
#must not pass in filepath if you are controlling file header
#if file is passed in, the auto detection code will kick in to wipe out the settings above
dc.imp()

Manipulate Columns and Make Group Plot

'''
This sample shows how to prepare worksheet for plotting. 
'''
import originpro as op

#Import Data File
f = op.path('e')+r'Samples\Signal Processing\Sunspot.dat'
wks = op.new_sheet()
wks.from_file(f, keep_DC=False)

#Set worksheet property
wks.cols=5
wks.move_cols(1,2,1) #Insert a date column
wks.cols_axis('xyy',2,4) #Set column designation

#Set "Date" column property
ncol=2
wks.set_formula(ncol,'date(B$ + "/1/" + A$)')
wks.set_label(ncol, 'Date')
wks.as_date(ncol,"yyyy/MM")

#Set "Averaged Sunspot Number" column property
ncol=4
wks.set_formula(ncol,'movavg(D,20,20)')
wks.set_label(ncol, 'Averaged Sunspot Number')

#Plot "Sunspot Number" vs "Date"
gp = op.new_graph()
gp[0].add_plot(f'{wks.lt_range()}!(3,4:5)')  # Plot using data range
gp[0].group()
gp[0].rescale()

Split Dataset to Training and Testing Data

'''
This example splits a dataset with multiple columns to two datasets named Train and Test, 
using the package sklearn. To check for and install if needed, 
open the Script Window (Shift+Alt+3), type the following and press Enter.
 pip -chk pandas sklearn
'''
import numpy as np
import pandas as pd
import originpro as op
from sklearn.model_selection import train_test_split

# Import data and get the independent and dependent data to X and y respectively
ws=op.new_sheet()
ws.from_file(fname=op.path('e')+r"Samples\Statistics\Fisher's Iris Data.dat", keep_DC=False)

# Get first 4 columns as X
X = ws.to_df(c1=0, numcols=4)  

# Get the last column as y
y = ws.to_df(c1=4, numcols=1)  

# Split the dataset into train and test datasets
# train dataset contains 70% samples, and test dataset contains 30% samples
# shuffle the data before splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=1)

# Create worksheet for the splitted datasets
wks = op.new_sheet('w', 'Split')
Num = X_train.columns.shape[0]
wks.from_list(Num, y_train.iloc[:, 0].tolist(), comments='Train', lname=y_train.columns[0])
wks.from_list(Num*2+1, y_test.iloc[:, 0].tolist(), comments='Test', lname=y_train.columns[0])
for idx in range(Num):
    wks.from_list(idx, X_train.iloc[:, idx].tolist(), comments='Train', lname=X_train.columns[idx])
    wks.from_list(Num+1+idx, X_test.iloc[:, idx].tolist(), comments='Test', lname=X_train.columns[idx])