Skip to content

相对issue 207改动量较少的LogStore读性能优化方案 #225

@dyx2025

Description

@dyx2025

#207 的读优化思路提及到使用mmap,lru,引用计数器等技术,尽管减少了很多系统调用和省去内核数据到 m_oTmpBuffer 的拷贝,但改动量较大。
本issue讲述一种改动量较少但也能提高LogStore读性能的方案。
先看原代码,分析现有的读性能问题。
原代码路径:
src/logstorage/log_store.cpp

int LogStore :: Read(const std::string & sFileID, uint64_t & llInstanceID, std::string & sBuffer)
{
    int iFileID = -1;
    int iOffset = -1;
    uint32_t iCheckSum = 0;
    ParseFileID(sFileID, iFileID, iOffset, iCheckSum);
    
    int iFd = -1;
    int ret = OpenFile(iFileID, iFd);
    if (ret != 0)
    {
        return ret;
    }
    
    off_t iSeekPos = lseek(iFd, iOffset, SEEK_SET);
    if (iSeekPos == -1)
    {
        return -1;
    }
    
    int iLen = 0;
    ssize_t iReadLen = read(iFd, (char *)&iLen, sizeof(int));
    if (iReadLen != (ssize_t)sizeof(int))
    {
        close(iFd);
        PLG1Err("readlen %zd not qual to %zu", iReadLen, sizeof(int));
        return -1;
    }
   
    // 加锁保护m_oTmpBuffer 
    std::lock_guard<std::mutex> oLock(m_oReadMutex);

    // 把文件数据拷贝到m_oTmpBuffer
    m_oTmpBuffer.Ready(iLen);
    iReadLen = read(iFd, m_oTmpBuffer.GetPtr(), iLen);
    if (iReadLen != iLen)
    {
        close(iFd);
        PLG1Err("readlen %zd not qual to %zu", iReadLen, iLen);
        return -1;
    }

    close(iFd);

    uint32_t iFileCheckSum = crc32(0, (const uint8_t *)m_oTmpBuffer.GetPtr(), iLen, CRC32SKIP);

    if (iFileCheckSum != iCheckSum)
    {
        BP->GetLogStorageBP()->GetFileChecksumNotEquel();
        PLG1Err("checksum not equal, filechecksum %u checksum %u", iFileCheckSum, iCheckSum);
        return -2;
    }

    memcpy(&llInstanceID, m_oTmpBuffer.GetPtr(), sizeof(uint64_t));
    //  把m_oTmpBuffer相关的buffer数据先拷贝到临时string类型的对象,再把临时string类型对象拷贝到sBuffer。
    sBuffer = string(m_oTmpBuffer.GetPtr() + sizeof(uint64_t), iLen - sizeof(uint64_t));

    PLG1Imp("ok, fileid %d offset %d instanceid %lu buffer size %zu", 
            iFileID, iOffset, llInstanceID, sBuffer.size());

    return 0;
}

原代码的一次LogStore读操作,有如下开销。

  1. 对oLock加锁,保护m_oTmpBuffer。
  2. 把文件数据拷贝到m_oTmpBuffer。
  3. 把m_oTmpBuffer相关的buffer数据先拷贝到临时string类型的对象。
  4. 再把临时string类型对象拷贝到sBuffer。这里隐含则一次sBuffer的空间分配以便容纳从临时string类型对象拷贝过来的数据。

LogStore::Read的出参sBuffer对于每一个函数调用来说应该是相互独立的,直接把文件数据拷贝到sBuffer即可,不需要加锁保护共享的m_oTmpBuffer,也不需要通过临时string类型对象拷贝拷贝数据。
为了保持read系统调用次数不变,第一次要读的数据长度是sizeof(int) + sizeof(uint64_t)(iLen是int类型,llInstanceID是uint64_t类型),第二次要re读的数据长度是iLen - sizeof(uint64_t)(llInstanceID是uint64_t类型),并且修改计算checksum的逻辑。

修改后代码:
src/logstorage/log_store.cpp

int LogStore :: Read(const std::string & sFileID, uint64_t & llInstanceID, std::string & sBuffer)
{
    int iFileID = -1;
    int iOffset = -1;
    uint32_t iCheckSum = 0;
    ParseFileID(sFileID, iFileID, iOffset, iCheckSum);
    
    int iFd = -1;
    int ret = OpenFile(iFileID, iFd);
    if (ret != 0)
    {
        return ret;
    }
    
    off_t iSeekPos = lseek(iFd, iOffset, SEEK_SET);
    if (iSeekPos == -1)
    {
        return -1;
    }
 
    int iLen = 0;
    /*
    ssize_t iReadLen = read(iFd, (char *)&iLen, sizeof(int));
    if (iReadLen != (ssize_t)sizeof(int))
    {
        close(iFd);
        PLG1Err("readlen %zd not qual to %zu", iReadLen, sizeof(int));
        return -1;
    }
    */

    constexpr size_t buf_len = sizeof(int) + sizeof(uint64_t);
    char buf[buf_len];
    ssize_t iReadLen = read(iFd, buf, buf_len);
    if (iReadLen != (ssize_t)buf_len)
    {
        close(iFd);
        PLG1Err("readlen %zd not qual to %zu", iReadLen, buf_len);
        return -1;
    }

    memcpy(&iLen, buf, sizeof(int));
    memcpy(&llInstanceID, buf + sizeof(int), sizeof(uint64_t));
    
    /*
    std::lock_guard<std::mutex> oLock(m_oReadMutex);

    m_oTmpBuffer.Ready(iLen);
    iReadLen = read(iFd, m_oTmpBuffer.GetPtr(), iLen);
    if (iReadLen != iLen)
    {
        close(iFd);
        PLG1Err("readlen %zd not qual to %zu", iReadLen, iLen);
        return -1;
    }
    */

    // 直接把文件数据拷贝到sBuffer
    size_t content_len = (size_t)iLen - sizeof(uint64_t);
    sBuffer.resize(content_len ); // 原代码也有隐含的空间分配,这里没有引入更多开销。      
    iReadLen = read(iFd, sBuffer.c_str(), content_len);
    if (iReadLen != content_len )
    {
        close(iFd);
        PLG1Err("readlen %zd not qual to %zu", iReadLen, content_len );
        return -1;
    }

    close(iFd);

    /*
    uint32_t iFileCheckSum = crc32(0, (const uint8_t *)m_oTmpBuffer.GetPtr(), iLen, CRC32SKIP);
    */
  
    // 修改计算checksum的逻辑
    uint32_t  iFileCheckSum = crc32(0, (const uint8_t *)(buf + sizeof(int)), sizeof(uint64_t), CRC32SKIP);
    iFileCheckSum  =  crc32(iFileCheckSum , (const uint8_t *)(sBuffer.c_str()), content_len , CRC32SKIP);

    if (iFileCheckSum != iCheckSum)
    {
        BP->GetLogStorageBP()->GetFileChecksumNotEquel();
        PLG1Err("checksum not equal, filechecksum %u checksum %u", iFileCheckSum, iCheckSum);
        return -2;
    }

    /*
    memcpy(&llInstanceID, m_oTmpBuffer.GetPtr(), sizeof(uint64_t));
    sBuffer = string(m_oTmpBuffer.GetPtr() + sizeof(uint64_t), iLen - sizeof(uint64_t));
    */

    PLG1Imp("ok, fileid %d offset %d instanceid %lu buffer size %zu", 
            iFileID, iOffset, llInstanceID, sBuffer.size());

    return 0;
}

本方案比#207改动量少,但依旧涉及很多系统调用(如open,lseek, read, close 的系统调用),所以本方案的读性能提高不如#207

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions