FloatingPointAddition/iFloat.h at master · turbothad/FloatingPointAddition · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#ifndef __IFLOAT_H__
#define __IFLOAT_H__

/*
 * iFloat.h - header for simple floating point functions.
 *
 * "Copyright (c) 2017 by Fritz Sieker."
 *
 * Permission to use, copy, modify, and distribute this software and its
 * documentation for any purpose, without fee, and without written
 * agreement is hereby granted, provided that the above copyright notice
 * and the following two paragraphs appear in all copies of this software.
 *
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR
 * HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
 * BASIS, AND THE AUTHOR NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS."
 */

/** @file iFloat.h
 *  @brief Defines interface of iFloat.c functions (do not modify)
 *  @details This file defines the interface to a C file iFloat.c that
 *  you will complete.  You will learn how to do floating point arithmetic
 *  <b>without using any <code>float</code> variables</b>. Rather you will
 *  perform the operations by using the sign, exponent, and digit fields
 *  as defined in
 *  <a href="http://en.wikipedia.org/wiki/bfloat16">Google Brain's floating point
 *  number specification</a>.
 *  <p>
 *  Everything in a computer is stored as a series of 0/1's. When you use an
 *  <code>int</code> to type a value, you are telling the compiler (and
 *  ultimately the CPU) to treat the 0/1's as a two's complement number. When
 *  you use <code>float</code>, the 0/1's represent a floating point number.
 *  When an addition is performed, the computer knows whether to use the
 *  integer or floating point add instruction. The two instructions do different
 *  things to the 0/1's. <p> In this assignment, you are doing floating point
 *  operations  <b>without</b> using floating point instructions. You are
 *  directly doing the bit manipulations necessary to complete the add. Since
 *  <code>iFloat_t</code> is an integral type, the compiler will generate integer
 *  instructions. The <code>iFloat_t</code> is to remind you (the programmer),
 *  that although the computer is going to treat all values as integers, you
 *  know it is really 3 values (sign, exponent, mantissa) packed into a single
 *  integer number. Your responsibility is to unpack the three pieces, do the
 *  operations necessary to complete the operation, then put the three pieces
 *  back together.
 */

#ifdef HALF
typedef short iFloat_t;

#define BITS      16
#define BITS_EXP   8
#define BITS_MANT  7
#define EXP_BIAS  127

#else
typedef int iFloat_t;

#define BITS      32
#define BITS_EXP   8
#define BITS_MANT 23
#define EXP_BIAS  127

#endif

/** Extract the sign of the argument.
 *  @param x the integer containing a bfloat16 value
 *  @return 0 if the value is 0 or positive, 1 if it is negative
 */
iFloat_t floatGetSign (iFloat_t x);

/** Extract the exponent of the argument.
 *  @param x the integer containing a bfloat16 value
 *  @return the biased exponent of the argument
 */
iFloat_t floatGetExp (iFloat_t x);

/** Extract the value of the argument. The value is the mantissa with the
 *  implicit 1 made explicit and adjusted for the sign of the argument. Please
 *  refer to the floating point addition example (step 1) in the instructions for
 *  an example on how the value is extracted. Basically, you'll have to do three
 *  things: 1) extract the mantissa; 2) set the implicit 1 in the extracted
 *  mantissa; 3) if the sign of the argument is negative, return the 2's complement
 *  of the mantissa from step (2). Otherwise, return it as-is. You may want to use
 *  the <code>getField</code> function from R3.
 *  @param x the integer containing a bfloat16 value
 *  @return the bits representing the value. If x represents 0.0, you should still
 *  set the implicit 1 in the extracted mantissa.
 */
iFloat_t floatGetVal (iFloat_t x);

/** Get the sign, exponent, and value in a single call.
 *  @param x the integer containing a bfloat16 value
 *  @param sign pointer to location where the sign will be stored
 *  @param exp pointer to location where the exponent will be stored
 *  @param val pointer to location where the value will be stored
 */
void floatGetAll(iFloat_t x, iFloat_t* sign, iFloat_t*exp, iFloat_t* val);

/** Obtain the position of the leftmost 1 in the argument's bits.
 *  @param bits the integer
 *  @return -1 if the value is 0, otherwise the position (0 to 15) of the
 *  leftmost 1 bit. In a binary number, the positions are numbered from right to
 *  left with the rightmost position being 0.
 */
iFloat_t floatLeftMost1 (iFloat_t bits);

/** Absolute value of the argument. This can be done with a simple bit
 *  manipulation operation. No conditionals are required.
 *  @param x the integer containing a bfloat16 value
 *  @return the absolute value of the parameter
 */
iFloat_t floatAbs (iFloat_t x);

/** Negate the argument. This can be done with a simple bit manipulation
 *  function. No conditionals are required. This is NOT the bitwise negation
 *  of the argument. As an example, if the argument represents 2.25, this function
 *  should return the <code>bfloat16</code>  bit pattern for -2.25.
 *  @param x the integer containing a bfloat16 value
 *  @return the negation of the value. Note that the negation of 0.0 is 0.0 (not
 *  -0.0).
 */
iFloat_t floatNegate (iFloat_t x);

/** Add two floating point values.
 *  @param x an integer containing a bfloat16 value
 *  @param y an integer containing a bfloat16 value
 *  @return x + y. Your code needs to account for a value of 0.0, but no other
 *  special cases (e.g. infinities).
 */
iFloat_t floatAdd (iFloat_t x, iFloat_t y);

/** Subtract two floating point values.
 *  @param x an integer containing a bfloat16 value
 *  @param y an integer containing a bfloat16 value
 *  @return x - y. Your code needs to account for a value of 0.0, but no other
 *  special cases (e.g. infinities).
 */
iFloat_t floatSub (iFloat_t x, iFloat_t y);

#endif