Issue
Question #1:
Is there a way of using Mac with M1 CPU and llama_index
together?
I cannot pass the bellow assertion:
AssertionError Traceback (most recent call last)
<ipython-input-1-f2d62b66882b> in <module>
6 from transformers import pipeline
7
----> 8 class customLLM(LLM):
9 model_name = "google/flan-t5-large"
10 pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
<ipython-input-1-f2d62b66882b> in customLLM()
8 class customLLM(LLM):
9 model_name = "google/flan-t5-large"
---> 10 pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
11
12 def _call(self, prompt, stop=None):
~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/__init__.py in pipeline(task, model, config, tokenizer, feature_extractor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
868 kwargs["device"] = device
869
--> 870 return pipeline_class(model=model, framework=framework, task=task, **kwargs)
~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/text2text_generation.py in __init__(self, *args, **kwargs)
63
64 def __init__(self, *args, **kwargs):
---> 65 super().__init__(*args, **kwargs)
66
67 self.check_model_type(
~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/base.py in __init__(self, model, tokenizer, feature_extractor, modelcard, framework, task, args_parser, device, binary_output, **kwargs)
776 # Special handling
777 if self.framework == "pt" and self.device.type != "cpu":
--> 778 self.model = self.model.to(self.device)
779
780 # Update config with task specific parameters
~/Library/Python/3.9/lib/python/site-packages/transformers/modeling_utils.py in to(self, *args, **kwargs)
1680 )
1681 else:
-> 1682 return super().to(*args, **kwargs)
1683
1684 def half(self, *args):
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
1143 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
1144
-> 1145 return self._apply(convert)
1146
1147 def register_full_backward_pre_hook(
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
795 def _apply(self, fn):
796 for module in self.children():
--> 797 module._apply(fn)
798
799 def compute_should_use_set_data(tensor, tensor_applied):
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
818 # `with torch.no_grad():`
819 with torch.no_grad():
--> 820 param_applied = fn(param)
821 should_use_set_data = compute_should_use_set_data(param, param_applied)
822 if should_use_set_data:
~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in convert(t)
1141 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
1142 non_blocking, memory_format=convert_to_format)
-> 1143 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
1144
1145 return self._apply(convert)
~/Library/Python/3.9/lib/python/site-packages/torch/cuda/__init__.py in _lazy_init()
237 "multiprocessing, you must use the 'spawn' start method")
238 if not hasattr(torch._C, '_cuda_getDeviceCount'):
--> 239 raise AssertionError("Torch not compiled with CUDA enabled")
240 if _cudart is None:
241 raise AssertionError(
AssertionError: Torch not compiled with CUDA enabled
Obviously I've no Nvidia card, but I've read Pytorch is now supporting Mac M1 as well
I'm trying to run the below example:
from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex,GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor, ServiceContext
import torch
from langchain.llms.base import LLM
from transformers import pipeline
class customLLM(LLM):
model_name = "google/flan-t5-large"
pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
def _call(self, prompt, stop=None):
return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
def _identifying_params(self):
return {"name_of_model": self.model_name}
def _llm_type(self):
return "custom"
llm_predictor = LLMPredictor(llm=customLLM())
Question #2:
Assuming the answer for the above is no - I don't mind using Google Colab with GPU, but once the index will be made, will it be possible to download it and use it on my Mac?
i.e. something like:
on Google Colab:
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index.save_to_disk('index.json')
... and later on my Mac use load_from_file
Solution
Why are you passing device=0
? If isinstance(device, int)
, PyTorch will assume device
is the index of a CUDA device, hence the error. Try device="cpu"
(or maybe simply removing the device
kwarg), and this issue should disappear.
Answered By - Luke G
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.