Well not Mega16 but I've been using a T6963C display with Mega32. Should be pretty much the same thing.
I'll do some copy pasting below from some project of mine. It's for GCC.
Code:
#include <avr/io.h>
#define G_BASE 0x0200   // grafiikkamuistin base address
#define T_BASE 0x0000   // tekstimuistin base address
#define BYTES_PER_ROW 40 // merkkiä rivillä
#define CD_0    0
#define CD_1    0x08
#define CE_0    0
#define CE_1    0x01
#define RD_0    0
#define RD_1    0x02
#define WR_0    0
#define WR_1    0x04
#define DATAPORT      PORTA
#define CONTROLPORT   PORTC
void wait(unsigned int value) 
{ 
        volatile unsigned i=0; 
        for(i=0;i<value;i++)
   asm volatile("nop");
} 
static void WriteCtrl(unsigned char cmd)
{
    outb(DDRA, 0xFF); // databus output
    outp(cmd, DATAPORT);
    outp(CD_1 | CE_0 | RD_1 | WR_0, CONTROLPORT);
    wait(0); 
    outp(CD_1 | CE_1 | RD_1 | WR_1, CONTROLPORT);
}
void WriteData(unsigned char dat)
{
    outb(DDRA, 0xff); // databus output
    outp(dat, DATAPORT);
    outp(CD_0 | CE_0 | RD_1 | WR_0, CONTROLPORT);
    wait(0);
    outp(CD_0 | CE_1 | RD_1 | WR_1, CONTROLPORT);
}
void InitLCD(void)
{
  // Display init
  WriteData(T_BASE & 0xFF);   //Data1: LowAddress
  WriteData(T_BASE >> 8);     //Data2: HighAddress
  WriteCtrl(0x40);            //Command: 0x40 -> 01000000
  WriteData(BYTES_PER_ROW);  //Data1: Colums
  WriteData(0);              //Data2: 0
  WriteCtrl(0x41);           //Command: 0x41 -> 01000001
  WriteData(G_BASE & 0xFF);  //Data1: LowAddress
  WriteData(G_BASE >> 8);    //Data2: HighAddress
  WriteCtrl(0x42);           //Command: 0x42 -> 01000010
  WriteData(BYTES_PER_ROW);  //Data1: Colums
  WriteData(0);              //Data2: 0
  WriteCtrl(0x43);           //Command: 0x43 -> 01000011
  //Internal CGROM Mode, OR Mode
  WriteCtrl(0x80); // OR Mode    //80->10000000
  WriteCtrl(0xa7); // cursor is 8 lines high
  WriteData(0x00);
  WriteData(0x00);
  WriteCtrl(0x21); // put cursor at (x,y)
  //DisplayMode
  WriteCtrl(0x9D);
}
void SetLCDXY(int x, int y)
{
  unsigned short addr;
  addr = T_BASE + (y * BYTES_PER_ROW) + x;
  WriteData(addr & 0xFF);
  WriteData(addr >> 8);
  WriteCtrl(0x24);
}
void SetLCDCursor(int x, int y)
{
  WriteData(x);
  WriteData(y);
  WriteCtrl(0x21);
}
void ClearLCDText(void)
{
  int i;
  WriteData(T_BASE & 0xFF);
  WriteData(T_BASE >> 8);
  WriteCtrl(0x24);      // address pointer T_BASE alkuun
  for (i=0;i<320;i++) {
        WriteData(0); WriteCtrl(0xc0); // kirjoita data ja inc ptr
  }
}
void ClearLCDGraph(void)
{
  int i;
  WriteData(G_BASE & 0xFF);
  WriteData(G_BASE >> 8);
  WriteCtrl(0x24); // address pointer G_BASE alkuun
  for (i=0;i<2560;i++){
        WriteData(0); WriteCtrl(0xc0); // kirjoita data ja inc ptr
  }
}
void PrintLCDChar(unsigned char ch)
{
        WriteData(ch - ' ');
        WriteCtrl(0xc0);
}
void SetLCDPixel(int x, int y)
{
  int addr;
  addr = G_BASE + (y*BYTES_PER_ROW) + (x/6);
  WriteData(addr & 0xFF);
  WriteData(addr >> 8);
  WriteCtrl(0x24);
  WriteCtrl(0xf8 | (5-(x % 6)));
}
void ClearLCDPixel(int x, int y)
{
  int addr;
  addr = G_BASE + (y*BYTES_PER_ROW) + (x/6);
  WriteData(addr & 0xFF);
  WriteData(addr >> 8);
  WriteCtrl(0x24);
  WriteCtrl(0xf0 | (5-(x % 6)));
}
int main (void)
{
    int i,j;
    outb(DDRA, 0xFF); // lcd, FTDI data bus output
    outb(DDRC, 0xCF); // lcd control pins output 
    InitLCD();
    ClearLCDText();
    ClearLCDGraph();
    SetLCDXY(0,0);
    
    PrintLCDChar('H');
    PrintLCDChar('e');
    PrintLCDChar('l');
    PrintLCDChar('l');
    PrintLCDChar('o');
    PrintLCDChar('!');
    for(i=10;i<100;i++)
    {
        for(j=20;j<30;j++)
        {
            SetLCDPixel(i,j);
        }
        for(j=20;j<30;j++)
        {
            ClearLCDPixel(i-1,j);
        }
    }
}
I stripped a lot of the code and the code above isn't tested but this should be the basics to get something on a T6963C lcd.