httpz

- Hyper-fast HTTP Scraping Tool
git clone git://git.acid.vegas/httpz.git
Log | Files | Refs | Archive | README | LICENSE

commit 41d7e53d30a07776137eb3a5846d210e8599170d
parent db9590f59d333fdaa1ccdd0b5e137437ddd908fd
Author: acidvegas <acid.vegas@acid.vegas>
Date: Wed, 12 Feb 2025 00:32:28 -0500

fixed chunk output

Diffstat:
Mhttpz_scanner/__init__.py | 4++--
Mhttpz_scanner/scanner.py | 40++++++++++++++++------------------------
Msetup.py | 2+-

3 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/httpz_scanner/__init__.py b/httpz_scanner/__init__.py
@@ -6,4 +6,4 @@ from .colors  import Colors
 from .scanner import HTTPZScanner
 
 
-__version__ = '2.1.1'
-\ No newline at end of file
+__version__ = '2.1.2'
+\ No newline at end of file
diff --git a/httpz_scanner/scanner.py b/httpz_scanner/scanner.py
@@ -197,7 +197,7 @@ class HTTPZScanner:
             self.resolvers = await load_resolvers(self.resolver_file)
 
         async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
-            tasks = set()
+            tasks = {}  # Change to dict to track domain for each task
             domain_queue = asyncio.Queue()
             queue_empty = False
             
@@ -249,52 +249,44 @@ class HTTPZScanner:
             
             try:
                 while not queue_empty or tasks:
-                    # Fill up tasks to concurrent_limit
+                    # Start new tasks if needed
                     while len(tasks) < self.concurrent_limit and not queue_empty:
                         try:
                             domain = await domain_queue.get()
-                            if domain is None:  # Queue is empty
+                            if domain is None:
                                 queue_empty = True
                                 break
                             task = asyncio.create_task(process_domain(domain))
-                            tasks.add(task)
-                        except asyncio.CancelledError:
-                            break
+                            tasks[task] = domain
                         except Exception as e:
                             debug(f'Error creating task: {str(e)}')
                     
                     if not tasks:
                         break
-                    
-                    # Wait for any task to complete with timeout
+
+                    # Wait for the FIRST task to complete
                     try:
-                        done, pending = await asyncio.wait(
-                            tasks,
+                        done, _ = await asyncio.wait(
+                            tasks.keys(),
                             timeout=self.timeout,
                             return_when=asyncio.FIRST_COMPLETED
                         )
                         
-                        # Handle completed tasks
+                        # Process completed task immediately
                         for task in done:
-                            tasks.remove(task)
+                            domain = tasks.pop(task)
                             try:
                                 if result := await task:
                                     yield result
                             except Exception as e:
-                                debug(f'Error processing task result: {str(e)}')
-                        
-                        # Handle timed out tasks
-                        if not done and pending:
-                            for task in pending:
-                                task.cancel()
-                                try:
-                                    await task
-                                except asyncio.CancelledError:
-                                    pass
-                                tasks.remove(task)
-                                
+                                debug(f'Error processing result for {domain}: {str(e)}')
+                            
                     except Exception as e:
                         debug(f'Error in task processing loop: {str(e)}')
+                        # Remove any failed tasks
+                        failed_tasks = [t for t in tasks if t.done() and t.exception()]
+                        for task in failed_tasks:
+                            tasks.pop(task)
                     
             finally:
                 # Clean up
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@ with open('README.md', 'r', encoding='utf-8') as f:
 
 setup(
     name='httpz_scanner',
-    version='2.1.1',
+    version='2.1.2',
     author='acidvegas',
     author_email='acid.vegas@acid.vegas',
     description='Hyper-fast HTTP Scraping Tool',