Spaces:

ariG23498
/

zero-shot-od

Running on Zero

App Files Files Community

ariG23498 HF Staff commited on Aug 11

Commit

ff97ba5

1 Parent(s): 8983f6a

same device

Browse files

Files changed (1) hide show

app.py +8 -6

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ZSDetBundle:
 # LLMDet
 model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
 processor_llmdet = AutoProcessor.from_pretrained(model_llmdet_id)
-model_llmdet = AutoModelForZeroShotObjectDetection.from_pretrained(model_llmdet_id).to(DEVICE).eval()
 bundle_llmdet = ZSDetBundle(
     model_id=model_llmdet_id,
     model_name=extract_model_short_name(model_llmdet_id),
@@ -47,7 +47,7 @@ bundle_llmdet = ZSDetBundle(
 # MM GroundingDINO
 model_mm_grounding_id = "rziga/mm_grounding_dino_tiny_o365v1_goldg"
 processor_mm_grounding = AutoProcessor.from_pretrained(model_mm_grounding_id)
-model_mm_grounding = AutoModelForZeroShotObjectDetection.from_pretrained(model_mm_grounding_id).to(DEVICE).eval()
 bundle_mm_grounding = ZSDetBundle(
     model_id=model_mm_grounding_id,
     model_name=extract_model_short_name(model_mm_grounding_id),
@@ -59,7 +59,7 @@ bundle_mm_grounding = ZSDetBundle(
 # OMDet Turbo
 model_omdet_id = "omlab/omdet-turbo-swin-tiny-hf"
 processor_omdet = AutoProcessor.from_pretrained(model_omdet_id)
-model_omdet = AutoModelForZeroShotObjectDetection.from_pretrained(model_omdet_id).to(DEVICE).eval()
 bundle_omdet = ZSDetBundle(
     model_id=model_omdet_id,
     model_name=extract_model_short_name(model_omdet_id),
@@ -71,7 +71,7 @@ bundle_omdet = ZSDetBundle(
 # OWLv2
 model_owlv2_id = "google/owlv2-large-patch14-ensemble"
 processor_owlv2 = AutoProcessor.from_pretrained(model_owlv2_id)
-model_owlv2 = AutoModelForZeroShotObjectDetection.from_pretrained(model_owlv2_id).to(DEVICE).eval()
 bundle_owlv2 = ZSDetBundle(
     model_id=model_owlv2_id,
     model_name=extract_model_short_name(model_owlv2_id),
@@ -95,13 +95,15 @@ def detect(
     Returns [(bbox, label_score_str), ...], time_str
     """
     t0 = time.perf_counter()
     # HF zero-shot OD expects list-of-list text
     texts = [prompts]
-    inputs = bundle.processor(images=image, text=texts, return_tensors="pt").to(DEVICE)
     with torch.inference_mode():
-        outputs = bundle.model(**inputs)
     results = bundle.processor.post_process_grounded_object_detection(
         outputs, threshold=threshold, target_sizes=[image.size[::-1]]

 # LLMDet
 model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
 processor_llmdet = AutoProcessor.from_pretrained(model_llmdet_id)
+model_llmdet = AutoModelForZeroShotObjectDetection.from_pretrained(model_llmdet_id)
 bundle_llmdet = ZSDetBundle(
     model_id=model_llmdet_id,
     model_name=extract_model_short_name(model_llmdet_id),
 # MM GroundingDINO
 model_mm_grounding_id = "rziga/mm_grounding_dino_tiny_o365v1_goldg"
 processor_mm_grounding = AutoProcessor.from_pretrained(model_mm_grounding_id)
+model_mm_grounding = AutoModelForZeroShotObjectDetection.from_pretrained(model_mm_grounding_id)
 bundle_mm_grounding = ZSDetBundle(
     model_id=model_mm_grounding_id,
     model_name=extract_model_short_name(model_mm_grounding_id),
 # OMDet Turbo
 model_omdet_id = "omlab/omdet-turbo-swin-tiny-hf"
 processor_omdet = AutoProcessor.from_pretrained(model_omdet_id)
+model_omdet = AutoModelForZeroShotObjectDetection.from_pretrained(model_omdet_id)
 bundle_omdet = ZSDetBundle(
     model_id=model_omdet_id,
     model_name=extract_model_short_name(model_omdet_id),
 # OWLv2
 model_owlv2_id = "google/owlv2-large-patch14-ensemble"
 processor_owlv2 = AutoProcessor.from_pretrained(model_owlv2_id)
+model_owlv2 = AutoModelForZeroShotObjectDetection.from_pretrained(model_owlv2_id)
 bundle_owlv2 = ZSDetBundle(
     model_id=model_owlv2_id,
     model_name=extract_model_short_name(model_owlv2_id),
     Returns [(bbox, label_score_str), ...], time_str
     """
     t0 = time.perf_counter()
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     # HF zero-shot OD expects list-of-list text
     texts = [prompts]
+    inputs = bundle.processor(images=image, text=texts, return_tensors="pt").to(device)
+    model = bundle.model.to(device).eval()
     with torch.inference_mode():
+        outputs = model(**inputs)
     results = bundle.processor.post_process_grounded_object_detection(
         outputs, threshold=threshold, target_sizes=[image.size[::-1]]