diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index f95bb24a..ea4ff92d 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -107,6 +107,7 @@ class model_backend(HFTorchInferenceModel):
 
         tf_kwargs = {
             "low_cpu_mem_usage": True,
+            "use_cache": True # Workaround for models that accidentally turn cache to false
         }
         
         if not hasattr(self.model_config, 'quantization_config'):
@@ -130,8 +131,8 @@ class model_backend(HFTorchInferenceModel):
                 })
 
         if self.model_type == "gpt2":
-            # We must disable low_cpu_mem_usage and if using a GPT-2 model
-            # because GPT-2 is not compatible with this feature yet.
+            # We must disable low_cpu_mem_usage and quantization if using a GPT-2 model
+            # because GPT-2 is not compatible with these features yet.
             tf_kwargs.pop("low_cpu_mem_usage", None)
             tf_kwargs.pop("quantization_config", None)
             
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index e3e919b3..7e291b93 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -230,7 +230,6 @@ class HFInferenceModel(InferenceModel):
     def _post_load(self) -> None:
         self.badwordsids = koboldai_settings.badwordsids_default
         self.model_type = str(self.model_config.model_type)
-        self.model.use_cache = True # Workaround for models that accidentally uploaded with False
         
         # These are model specific tokenizer overrides if a model has bad defaults
         if self.model_type == "llama":