-- Don
Golding --
Angelus Research Corp. --
dgolding@angelusresearch.com --
Version 3 --
Forth Processor Design
-- -- This
code represents my current thoughts on designing a Forth Processor in
VHDL. --
Please review it and email me with your input on either Forth design issues or
-- VHDL
design issues. -- -- The
goal is to build a generic Forth processor that can be included in VHDL
designs. -- If
it could fit into a Xilinx 4005 or 4010 it would be ideal! --
Forth is really a virtual microprocessor implemented on other various
processors -- from
68HC11 to VAX machines and supercomputers.
You will currently find Forth used -- as
the driver for PCI hardware in high end Macintosh's and Sun
Workstations. -- -- This
is an attempt to create a real Forth Processor on an FPGA or ASIC using
VHDL. --
Previous real Forth Microprocessors include: Harris RTX2000, SHABOOM,
F21,etc. -- The
current attempts F21, etc. are trying to make 500mips
screamers. --
There are also people like Dr. Ting using the Schematic editor to create
Forth --
processors. I wonder how a
Schematic designed Forth processor will compare to a VHDL --
based design in speed and the number of gates used. -- I
think a straight forward simple design will have considerable
applications -- when
you need a processor included in your FPGA/ASIC design. -- FPGA
operate at 200mhz, I don't know how fast this design will be, but it's speed
--
should be limited to the external RAM speed when memory access is
required. --
Internal register to register operations should be 50-200mhz
range. --
-- The
preliminary specifications are: -- -- 16 bit data bus (to save space, could be
8 bit but it would take more statements) -- 16 bit address bus -- by editing the code in the Entity
declariations, you implement 32, 64, ? designs -- -- Return Stack
levels=16 -- Data Stack levels=16 (could be smaller,
4 items could be ok) -- Output port A is 8
lines -- Output port B is 8 lines -- Motorola SPI compatible port
(SPI_In,SPI_Out,SPI_Ck,SS/) -- -- By editing the code in the Entity
declariations, you can add serial ports, parallel -- ports, adc's or just about anything you
can imagine. -- library
IEEE; use
IEEE.std_logic_1164.all; entity
Proc is port (
DataBus: inout STD_LOGIC_VECTOR (15 downto 0);
OutPortA: out STD_LOGIC_VECTOR (7 downto 0);
OutPortB: out STD_LOGIC_VECTOR (7 downto 0);
OutputA: out STD_LOGIC_LOGIC;
OutputB: out STD_LOGIC_LOGIC;
AddressBus: out STD_LOGIC_VECTOR (15 downto 0);
Reset: in STD_LOGIC;
SPI_In: in STD_LOGIC;
SPI_Out: out STD_LOGIC; SS:
in STD_LOGIC;
SPI_Ck: in STD_LOGIC;
clock: in STD_LOGIC; rd:
out STD_LOGIC; ); end
Proc; architecture Proc_arch of Proc
is --define op codes, only 25 so
far... type op_code is(abort, depth, dup,
pick, over, swap, >r, r>, r@, drop,
rot, equal, zero_equal, greater, greater_than,
less_than, store, +store, fetch, plus, minus, times
divide, branch, Obranch );
-- check these for correct
sizes type data_word is array(15 downto
0) of STD_ULOGIC; --16
bit wide type Return_stack is array (15
downto 0) of data_word; --16 bits wide, 16 deep type Data_stack is array (15
downto 0) of data_word; --16
bits wide, 16 deep type memory_size is range (15
downto 0);
--64K max?
constant stack_depth:
integer:=16;
--16 items max variable rp of stack_depth; -- return stack pointer
variable dp of stack_depth; -- data stack
pointer variable mp of memory_size; -- memory pointer variable temp1 of data_word; -- reg:Temp1 internal
variable error of data_word; --
reg:Error code variable sucessful of bit; -- Flag:operation
sucessful constant
dstack_start:integer:=0; constant
write:bit:=0; constant
read:bit:=1; constant
dstack_overflow:integer:=1;
--Errorcodes are defined here constant
dstack_underflow:integer:=2; constant
rstack_overflow:integer:=3; constant
rstack_underflow:integer:=4; constant
invalid_instruction:integer:=5; --Forth stack
manipulation primitives --I think we
should implement a circular que here. --data_stack(dp)
points to next available location, can use as temp
variable --before using
push_dp_stack or pop_dp_stack procedures. --each stack are
really 16 registers! Stack
operations should be real fast! procedure reset_proc
is begin dp
<= '0'; rp
<= '0'; mp
<= '0'; end
reset_proc; procedure push_dp_stack
is -- dp points the the next stack
element not the current one after operation is completed. begin
if dp = stack_depth then
error<=dstack_overflow;
reset_proc;
else dp <= dp+1;
end if end push_dp_stack; procedure pop_dp_stack
is -- dp points the the next stack
element not the current one after operation is completed. begin
if dp = dstack_start then
error<=dstack_underflow;
reset_proc;
else dp <= dp-1;
end if end pop_dp_stack;
procedure push_rp_stack
is -- dp points the the next stack
element not the current one after operation is completed.
begin
if rp = 16 then
error<=rstack_overflow;
reset_proc;
else rp <= rp+1;
end if end push_rp_stack;
procedure pop_rp_stack
is -- dp points the the next stack
element not the current one after operation is completed. begin
if rp = 0 then
error<=rstack_underflow;
reset_proc;
else rp <= rp-1;
end if end
pop_rp_stack; procedure proc_code(sucessful)
is --is the parameter list
ok? begin
sucessful<=true; case data_bus
is
when abort => --reset
processor
reset_proc;
when depth => --put the
depth of the stack on the top
data_stack(dp) <= dp;
up_data_stack;
when dup => --duplicate
the top item on data stack
data_stack(dp)<=data_stack(dp+1);
up_data_stack;
when pick => --get on
data stack pointed to by TOS
data_stack(dp)<=data_stack(data_stack(dp+1);
up_data_stack;
when over => --duplicate
the second number on data stack
data_stack(dp) <= data_stack(data_stack(dp+2);
up_data_stack;
when swap => --swap top
two numbers on data stack
return_stack(rp) <= data_stack(dp+1);
data_stack(dp+1) <= data_stack(dp+2); data_stack(dp+2)
<= return_stack(rp);
when >r => --move top
of data stack to return stack
return_stack(rp) <= data_stack(dp+1);
pop_data_stack;
push_return_stack;
when r> => --move top
of return stack to data stack
data_stack(dp+1) <= return_stack(rp+1);
pop_return_stack;
push_data_stack;
when r@ => --move top of
return stack to data stack
data_stack(dp) <= return_stack(rp+1);
push_data_stack;
when drop => --drop top
number from data stack
pop_dp_stack;
when rot => --rotate 3rd
numbr to 1st on data stack
return_stack(rp) <= data_stack(dp+1);
data_stack(dp+1) <= data_stack(dp+3);
when equal => -- if tos
and second are equal then true
if data_stack(dp+1)=data_stack(dp+2) then
pop_data_stack;
data_stack(dp+1)<='1';
end if;
when zero_equal => -- if
tos=0 then tos=true
if data_stack(dp+1)='0' then data_stack(dp+1)<='1';
end if;
when greater_than => --
if tos is greater then the sec then tos=true
if data_stack(dp+1)>data_stack(dp+2) then
pop_data_stack;
data_stack(dp+1)<='1';
end if;
when less_than => -- if
tos is less than the second item then tos=true
if data_stack(dp+1)<data_stack(dp+2) then
pop_data_stack;
data_stack(dp+1)<='1';
end if;
when store => -- store 16
bit value to memory
rd<=write;
addressBus <= data_stack(dp+1);
dataBus <= data_stack(dp+2) rd<=read; -- probably need a delay here
pop_data_stack;
pop_data_stack;
when +store => --
increment 16 bit value in memory
rd<=read;
addressBus <= data_stack(dp+1);
data_bus <= data_bus+data_stack(dp+1);
rd<=write;
pop_data_stack;
pop_data_stack;
rd<=read; -- probably
need a delay here
when fetch => -- get 16 bit value from memory
rd<=read;
data_stack(dp) <= dataBus;
push_data_stack;
when plus => --add two 16
bit numbers
data_stack(dp+1) <= data_stack(dp+2) +
data_stack(dp+1);
pop_data_stack; when minus =>
--subtract two 16 bit numbers
data_stack(dp+2) <= data_stack(dp+1) -
data_stack(dp+2);
pop_data_stack;
when times => --multiply
two 16 bit numbers
data_stack(dp+2) <= data_stack(dp+1) *
data_stack(dp+2);
pop_data_stack;
when divide => --divide two 16 bit numbers
data_stack(dp+2) <= data_stack(dp+1) /
data_stack(dp+2);
pop_data_stack;
when branch => --branch unconditionally
mp=mp+1;
rd<=read;
mp=DataBus;
when Obranch => --branch if tos = 0
if data_stack(dp+1)='0' then
mp=mp+1;
rd<=read;
mp=DataBus;
end if;
when
others => -- not an opcode
sucessful<=false; end case;
end
proc_code; synch:
process(clock) begin if clock'event and
clock='1' then
clock<= not clock; --
need a delay here? end
if; end
process;
code:
process(clock,reset,mp,rp) begin
if reset ='0'
then reset_proc;
else --get and process
instruction
rd<=read;
--set read/write line to read
addressBus<=mp; --output
address
proc_code;
--Forth's inner interpreter(next) if
sucessful=true then -- it was a valid instruction
mp <= mp+1;
sucessful<=false;
else
-- it wasn't a valid instruction
error<=invalid_instruction;
reset_proc;
--
end
if;
end if; end
process;
end
Proc_arch; |