Update modeling_phi3.py
Browse files- modeling_phi3.py +2 -2
modeling_phi3.py
CHANGED
|
@@ -262,8 +262,8 @@ class Phi3MLP(nn.Module):
|
|
| 262 |
self.config = config
|
| 263 |
|
| 264 |
self.gate = nn.Linear(config.hidden_size, self.config.num_experts, bias=False)
|
| 265 |
-
self.gate_up_proj = nn.ModuleList([nn.Linear(config.hidden_size, 2 * config.intermediate_size, bias=False) for i in range(
|
| 266 |
-
self.down_proj = nn.ModuleList([nn.Linear(config.intermediate_size, config.hidden_size, bias=False) for i in range(
|
| 267 |
self.activation_fn = ACT2FN[config.hidden_act]
|
| 268 |
|
| 269 |
def forward(self, hidden_states: torch.FloatTensor) -> torch.FloatTensor:
|
|
|
|
| 262 |
self.config = config
|
| 263 |
|
| 264 |
self.gate = nn.Linear(config.hidden_size, self.config.num_experts, bias=False)
|
| 265 |
+
self.gate_up_proj = nn.ModuleList([nn.Linear(config.hidden_size, 2 * config.intermediate_size, bias=False) for i in range(self.config.num_experts)])
|
| 266 |
+
self.down_proj = nn.ModuleList([nn.Linear(config.intermediate_size, config.hidden_size, bias=False) for i in range(self.config.num_experts)])
|
| 267 |
self.activation_fn = ACT2FN[config.hidden_act]
|
| 268 |
|
| 269 |
def forward(self, hidden_states: torch.FloatTensor) -> torch.FloatTensor:
|