Display Chemical Structures in Worksheet cells using third party DLLs

 

Introduction

The Simplified Molecular-Input Line-Entry Specification or SMILES is a specification in the form of a line notation for describing the structure of chemical molecules using ASCII characters. This article describes how to use SMILES within Origin.

For more information about SMILES see the Wikipedia SMILES article.

Indigo Toolkit

The Indigo toolkit for organic chemistry from EPAM Life Sciences provides APIs for rendering SMILES strings to images. You can download both 64-bit and 32-bit DLLs from the Indigo Toolkit page of the EPAM website. We will use the DLLs from Origin C for processing SMILES strings within Origin.

The 32-bit and 64-bit DLLs have the same names and can not be used by Origin at the same time. Using the download link above, download only the DLLs that match the Origin platform (bits) you will be running. Origin only needs three DLLs from the Zip file. Open the Zip file and navigate to the dynamic folder. Copy the following DLLs to your User Files\OriginC\ folder:

  • indigo.dll
  • indigo-renderer.dll
  • zlib.dll

Origin C Code

Indigo.h

//--------------------------------------------------------------------------
// Indigo.h
//
// This header file and the required Indigo DLLs should be put into your 
// Origin 'User Files\OriginC' folder.
//
// The Required DLLs are:
// zlib.dll
// indigo.dll
// indigo-renderer.dll
//
// You can download both 64-bit and 32-bit DLLs from GGA Software's Indigo 
// Download Page ( http://ggasoftware.com/download/indigo ).  The 32-bit and
// 64-bit DLLs have the same names and can not be used by Origin at the same
// time. download only the DLLs that match the Origin platform (bits) you
// will be running.Open the Zip file and navigate to the 'dynamic' folder.
//--------------------------------------------------------------------------
#ifndef __INDIGO_H__
#define __INDIGO_H__

//--------------------------------------------------------------------------
// indigo.dll
// A pragma is used to tell Origin C the following declared functions
// are in the named DLL which is located in the same folder as this header file.
//--------------------------------------------------------------------------
#pragma dll(indigo, header)

int indigoLoadMoleculeFromString (const char *str);

// http://ggasoftware.com/opensource/indigo/api/options
int indigoSetOption (const char *name, const char *value);

int indigoWriteBuffer (void);
int indigoToBuffer (int handle, char **buf, int *size);
int indigoFree (int handle);

//--------------------------------------------------------------------------
// indigo-renderer.dll
// A pragma is used to tell Origin C the following declared functions
// are in the named DLL which is located in the same folder as this header file.
//--------------------------------------------------------------------------
#pragma dll(indigo-renderer, header)

int indigoRender (int object, int output);
int indigoRenderToFile (int object, const char *filename);

#endif // __INDIGO_H__

Chemical Structure.c

#include <origin.h>
#include <..\OriginLab\theme_utils.h> // For setting worksheet row height
#include "indigo.h" // Indigo DLL functions

//---------------------------------------------------------------------------
// render_molecule_to_file
//
// pcszFileName = file name of target image
// pcszMolecule = source molecule string
// pcszFileType = target image type
//
// Returns true for success or false for error.
//---------------------------------------------------------------------------
static bool render_molecule_to_file(LPCSTR pcszFileName, LPCSTR pcszMolecule, LPCSTR pcszFileType)
{
        bool bRet = false;
        int nMolecule = indigoLoadMoleculeFromString(pcszMolecule);
        if( nMolecule )
        {
                // There are many options that can be set.  See more options at:
                // http://ggasoftware.com/opensource/indigo/api/options
                indigoSetOption("render-output-format", pcszFileType);
                indigoSetOption("render-background-color", "255, 255, 255");

                int nWriteBuf = indigoWriteBuffer();
                if( nWriteBuf )
                {
                        bRet = indigoRenderToFile(nMolecule, pcszFileName);
                        indigoFree(nWriteBuf);
                }
                indigoFree(nMolecule);
        }
        return bRet;
}

enum {
        CHEM_SUCCESS = 0,
        CHEM_USER_CANCEL,
        CHEM_INVALID_WKS,
        CHEM_INVALID_SOURCE_COL,
        CHEM_INVALID_TARGET_COL,
        CHEM_CREATE_FOLDER,
        CHEM_CREATE_FILE_NAME,
        CHEM_RENDER_TO_FILE,
        CHEM_GET_CELL,
        CHEM_SET_CELL,
        CHEM_INSERT_COL,
};

//---------------------------------------------------------------------------
// convert_molecule_to_image_links
//
// wks = source worksheet
// nMoleculeCol = index of source molecule column
// nImgCol = index of target image column
//
// Returns 0 for success or a non-zero error number.
//---------------------------------------------------------------------------
static int convert_molecule_to_image_links(Worksheet& wks, int nMoleculeCol, int nImgCol)
{
        //--------------------------------------------------------
        // Check arguments.
        //--------------------------------------------------------
        if( !wks.IsValid() )
                return CHEM_INVALID_WKS;
        if( nMoleculeCol < 0 || wks.GetNumCols() <= nMoleculeCol )
                return CHEM_INVALID_SOURCE_COL;
        if( nImgCol < 0 || wks.GetNumCols() <= nImgCol )
                return CHEM_INVALID_TARGET_COL;

        //--------------------------------------------------------
        // Make sure the image target subfolder exists.
        //--------------------------------------------------------
        char szFolder[] = "chemical\\images";
        if( !create_folder_in_uff(szFolder) )
                return CHEM_CREATE_FOLDER;
        string strPath;
        strPath.Format("%s%s\\", GetOriginPath(ORIGIN_PATH_USER), szFolder);

        //--------------------------------------------------------
        // Variables used within the loop.
        //--------------------------------------------------------
        char szFileType[] = "png";
        string str, strMolecule, strFileName;
        int nErr = 0;

        //--------------------------------------------------------
        // Loop and process each molecule.
        //--------------------------------------------------------
        Dataset ds(wks, nMoleculeCol);

        progressBox pb("Processing Molecules", PBOX_TOPMOST);
        pb.SetRange(0, ds.GetSize());

        for( int nRow = 0; nRow < ds.GetSize(); nRow++ )
        {
                if( !pb.Set(nRow) )
                {
                        nErr = CHEM_USER_CANCEL;
                        break;
                }

                if( !wks.GetCell(nRow, nMoleculeCol, strMolecule) )
                        continue;

                strMolecule.TrimLeft();
                strMolecule.TrimRight();
                if( strMolecule.GetLength() == 0 )
                        continue; // no data to process

                LPSTR pszFileName = strFileName.GetBuffer(MAX_PATH);
                if( pszFileName )
                {
                        if( !create_file_name_from_data(pszFileName, strPath, (LPBYTE)(LPCSTR)strMolecule, strMolecule.GetLength(), szFileType) )
                                nErr = CHEM_CREATE_FILE_NAME;
                        strFileName.ReleaseBuffer();
                }
                if( nErr )
                        return nErr;

                if( !strFileName.IsFile() )
                {
                        if( !render_molecule_to_file(strFileName, strMolecule, szFileType) )
                        {
                                nErr = CHEM_RENDER_TO_FILE;
                                break;
                        }
                }

                str.Format("file://%s", strFileName);
                if( !wks.SetCell(nRow, nImgCol, str, false) )
                {
                        nErr = CHEM_SET_CELL;
                        break;
                }
        }
        return nErr;
}

//---------------------------------------------------------------------------
// create_molecule_images
//
// Create molecule images from chemical data.
// Chemical data is taken from the selected column in the active worksheet.
// Molecule images are put into the "Molecule Image" column.  If this column
// does not exist then a new column will be inserted after the data column.
// The new column will contain links to the image file.
//
// Returns 0 for success or a non-zero error number.
//---------------------------------------------------------------------------
int create_molecule_images()
{
        char s_szImageColName[] = "Molecule Image";

        Worksheet wks = Project.ActiveLayer();
        if( !wks.IsValid() )
                return CHEM_INVALID_WKS;

        vector<int> vnSelCols;
        if( !wks.GetSelectedColumns(vnSelCols) || vnSelCols.GetSize() != 1 )
                return CHEM_INVALID_SOURCE_COL;

        int nMoleculeCol = vnSelCols[0];

        Column col = wks.FindCol(s_szImageColName);
        if( !col.IsValid() )
        {
                string strColNameCreated;
                if( !wks.InsertCol(nMoleculeCol + 1, NULL, strColNameCreated) )
                        return CHEM_INSERT_COL;
                col = wks.FindCol(strColNameCreated);
                if( !col.IsValid() )
                        return CHEM_INSERT_COL;
                col.SetLongName(s_szImageColName);
                col.SetWidth(col.GetWidth() * 4);
        }

        wks_set_cell_heights(wks, 0, wks.GetNumRows() - 1, 400.0, false);

        int nImgCol = col.GetIndex();

        return convert_molecule_to_image_links(wks, nMoleculeCol, nImgCol);
}

string get_molecule_err_msg(int nErr)
{
        switch (nErr)
        {
        case CHEM_SUCCESS:
                return "Success";
        case CHEM_USER_CANCEL:
                return "User canceled.";
        case CHEM_INVALID_WKS:
                return "Invalid active worksheet.";
        case CHEM_INVALID_SOURCE_COL:
                return "Invalid source column. Select a single column.";
        case CHEM_INVALID_TARGET_COL:
                return "Invalid target column.";
        case CHEM_CREATE_FOLDER:
                return "Failed to create subfolder for images.";
        case CHEM_CREATE_FILE_NAME:
                return "Failed to create image file name.";
        case CHEM_RENDER_TO_FILE:
                return "Failed to create image.";
        case CHEM_GET_CELL:
                return "Failed to access source cell.";
        case CHEM_SET_CELL:
                return "Failed to access target cell.";
        case CHEM_INSERT_COL:
                return "Failed to insert images column.";
        }
        return "Unknown Error";
}

static bool create_folder_in_uff(LPCSTR pcszFolderName)
{
        string strFolderNames = pcszFolderName;
        string strPath = GetOriginPath(ORIGIN_PATH_USER);
        for (int i = 0; i < strFolderNames.GetNumTokens('\\'); i++)
        {
                strPath += strFolderNames.GetToken(i, '\\');
                if (!strPath.IsPath())
                {
                        if (!CreateDirectory(strPath, NULL))
                                return false;
                }
                strPath += "\\";
        }
        return true;
}

//---------------------------------------------------------------------------
// Utility functions for creating a unique file name from a chemical
// structure string.
//---------------------------------------------------------------------------
static bool checksum(LPBYTE pBuf, UINT nBufLen, DWORD& nChksum, bool bReverse)
{
        if( NULL == pBuf || 0 == nBufLen )
                return false;

        int nInc = 1;
        if( bReverse )
        {
                pBuf += nBufLen - 1;
                nInc = -1;
        }

        WORD r = 55665, c1 = 52845, c2 = 22719;
        BYTE cipher, value;

        nChksum = 0;
        while( nBufLen-- )
        {
                value = *pBuf;
                pBuf += nInc;
                cipher = (value ^ (r >> 8));
                r = (cipher + r) * c1 + c2;
                nChksum += cipher;
        }
        return true;
}