block.rs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. mod mbr;
  2. use super::{
  3. constants::ENOENT,
  4. mem::{paging::Page, AsMemoryBlock as _},
  5. vfs::types::DeviceId,
  6. };
  7. use crate::kernel::constants::{EEXIST, EINVAL};
  8. use crate::{
  9. io::{Buffer, FillResult},
  10. prelude::*,
  11. };
  12. use alloc::{
  13. collections::btree_map::{BTreeMap, Entry},
  14. sync::Arc,
  15. };
  16. use async_trait::async_trait;
  17. use core::cmp::Ordering;
  18. use mbr::MBRPartTable;
  19. pub struct Partition {
  20. pub lba_offset: u64,
  21. pub sector_count: u64,
  22. }
  23. pub trait PartTable {
  24. fn partitions(&self) -> impl Iterator<Item = Partition> + use<'_, Self>;
  25. }
  26. #[async_trait]
  27. pub trait BlockRequestQueue: Send + Sync {
  28. /// Maximum number of sectors that can be read in one request
  29. fn max_request_pages(&self) -> u64;
  30. async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()>;
  31. }
  32. enum BlockDeviceType {
  33. Disk {
  34. queue: Arc<dyn BlockRequestQueue>,
  35. },
  36. Partition {
  37. disk_dev: DeviceId,
  38. lba_offset: u64,
  39. queue: Arc<dyn BlockRequestQueue>,
  40. },
  41. }
  42. pub struct BlockDevice {
  43. /// Unique device identifier, major and minor numbers
  44. devid: DeviceId,
  45. /// Total size of the device in sectors (512 bytes each)
  46. sector_count: u64,
  47. dev_type: BlockDeviceType,
  48. }
  49. impl PartialEq for BlockDevice {
  50. fn eq(&self, other: &Self) -> bool {
  51. self.devid == other.devid
  52. }
  53. }
  54. impl PartialOrd for BlockDevice {
  55. fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
  56. Some(self.devid.cmp(&other.devid))
  57. }
  58. }
  59. impl Eq for BlockDevice {}
  60. impl Ord for BlockDevice {
  61. fn cmp(&self, other: &Self) -> Ordering {
  62. self.devid.cmp(&other.devid)
  63. }
  64. }
  65. static BLOCK_DEVICE_LIST: Spin<BTreeMap<DeviceId, Arc<BlockDevice>>> = Spin::new(BTreeMap::new());
  66. impl BlockDevice {
  67. pub fn register_disk(
  68. devid: DeviceId,
  69. size: u64,
  70. queue: Arc<dyn BlockRequestQueue>,
  71. ) -> KResult<Arc<Self>> {
  72. let device = Arc::new(Self {
  73. devid,
  74. sector_count: size,
  75. dev_type: BlockDeviceType::Disk { queue },
  76. });
  77. match BLOCK_DEVICE_LIST.lock().entry(devid) {
  78. Entry::Vacant(entry) => Ok(entry.insert(device).clone()),
  79. Entry::Occupied(_) => Err(EEXIST),
  80. }
  81. }
  82. pub fn get(devid: DeviceId) -> KResult<Arc<Self>> {
  83. BLOCK_DEVICE_LIST.lock().get(&devid).cloned().ok_or(ENOENT)
  84. }
  85. }
  86. impl BlockDevice {
  87. pub fn devid(&self) -> DeviceId {
  88. self.devid
  89. }
  90. fn queue(&self) -> &Arc<dyn BlockRequestQueue> {
  91. match &self.dev_type {
  92. BlockDeviceType::Disk { queue } => queue,
  93. BlockDeviceType::Partition { queue, .. } => queue,
  94. }
  95. }
  96. pub fn register_partition(&self, idx: usize, offset: u64, size: u64) -> KResult<Arc<Self>> {
  97. let queue = match &self.dev_type {
  98. BlockDeviceType::Disk { queue } => queue.clone(),
  99. BlockDeviceType::Partition { .. } => return Err(EINVAL),
  100. };
  101. let device = Arc::new(BlockDevice {
  102. devid: DeviceId::new(self.devid.major, self.devid.minor + idx as u16 + 1),
  103. sector_count: size,
  104. dev_type: BlockDeviceType::Partition {
  105. disk_dev: self.devid,
  106. lba_offset: offset,
  107. queue,
  108. },
  109. });
  110. match BLOCK_DEVICE_LIST.lock().entry(device.devid()) {
  111. Entry::Vacant(entry) => Ok(entry.insert(device).clone()),
  112. Entry::Occupied(_) => Err(EEXIST),
  113. }
  114. }
  115. pub async fn partprobe(&self) -> KResult<()> {
  116. match self.dev_type {
  117. BlockDeviceType::Partition { .. } => Err(EINVAL),
  118. BlockDeviceType::Disk { .. } => {
  119. if let Ok(mbr_table) = MBRPartTable::from_disk(self).await {
  120. for (idx, partition) in mbr_table.partitions().enumerate() {
  121. self.register_partition(idx, partition.lba_offset, partition.sector_count)?;
  122. }
  123. }
  124. Ok(())
  125. }
  126. }
  127. }
  128. /// No extra overhead, send the request directly to the queue
  129. /// If any of the parameters does not meet the requirement, the operation will fail
  130. ///
  131. /// # Requirements
  132. /// - `req.count` must not exceed the disk size and maximum request size
  133. /// - `req.sector` must be within the disk size
  134. /// - `req.buffer` must be enough to hold the data
  135. ///
  136. pub async fn commit_request(&self, mut req: BlockDeviceRequest<'_>) -> KResult<()> {
  137. // Verify the request parameters.
  138. match &mut req {
  139. BlockDeviceRequest::Read { sector, count, .. } => {
  140. if *sector + *count > self.sector_count {
  141. return Err(EINVAL);
  142. }
  143. if let BlockDeviceType::Partition { lba_offset, .. } = &self.dev_type {
  144. // Adjust the sector for partition offset.
  145. *sector += lba_offset;
  146. }
  147. }
  148. BlockDeviceRequest::Write { sector, count, .. } => {
  149. if *sector + *count > self.sector_count {
  150. return Err(EINVAL);
  151. }
  152. if let BlockDeviceType::Partition { lba_offset, .. } = &self.dev_type {
  153. // Adjust the sector for partition offset.
  154. *sector += lba_offset;
  155. }
  156. }
  157. }
  158. self.queue().submit(req).await
  159. }
  160. /// Read some from the block device, may involve some copy and fragmentation
  161. ///
  162. /// Further optimization may be needed, including caching, read-ahead and reordering
  163. ///
  164. /// # Arguments
  165. /// `offset` - offset in bytes
  166. ///
  167. pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult<FillResult> {
  168. let mut sector_start = offset as u64 / 512;
  169. let mut first_sector_offset = offset as u64 % 512;
  170. let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512;
  171. let mut nfilled = 0;
  172. 'outer: while sector_count != 0 {
  173. let pages: &[Page];
  174. let page: Option<Page>;
  175. let page_vec: Option<Vec<Page>>;
  176. let nread;
  177. match sector_count {
  178. count if count <= 8 => {
  179. nread = count;
  180. let _page = Page::alloc();
  181. page = Some(_page);
  182. pages = core::slice::from_ref(page.as_ref().unwrap());
  183. }
  184. count if count <= 16 => {
  185. nread = count;
  186. let _pages = Page::alloc_order(1);
  187. page = Some(_pages);
  188. pages = core::slice::from_ref(page.as_ref().unwrap());
  189. }
  190. count => {
  191. nread = count.min(self.queue().max_request_pages());
  192. let npages = (nread + 15) / 16;
  193. let mut _page_vec = Vec::with_capacity(npages as usize);
  194. for _ in 0..npages {
  195. _page_vec.push(Page::alloc_order(1));
  196. }
  197. page_vec = Some(_page_vec);
  198. pages = page_vec.as_ref().unwrap().as_slice();
  199. }
  200. }
  201. let req = BlockDeviceRequest::Read {
  202. sector: sector_start,
  203. count: nread,
  204. buffer: &pages,
  205. };
  206. self.commit_request(req).await?;
  207. for page in pages.iter() {
  208. // SAFETY: We are the only owner of the page so no one could be mutating it.
  209. let data = unsafe { &page.as_memblk().as_bytes()[first_sector_offset as usize..] };
  210. first_sector_offset = 0;
  211. match buffer.fill(data)? {
  212. FillResult::Done(n) => nfilled += n,
  213. FillResult::Partial(n) => {
  214. nfilled += n;
  215. break 'outer;
  216. }
  217. FillResult::Full => {
  218. break 'outer;
  219. }
  220. }
  221. }
  222. sector_start += nread;
  223. sector_count -= nread;
  224. }
  225. if nfilled == buffer.total() {
  226. Ok(FillResult::Done(nfilled))
  227. } else {
  228. Ok(FillResult::Partial(nfilled))
  229. }
  230. }
  231. /// Write some data to the block device, may involve some copy and fragmentation
  232. ///
  233. /// # Arguments
  234. /// `offset` - offset in bytes
  235. /// `data` - data to write
  236. ///
  237. pub async fn write_some(&self, offset: usize, data: &[u8]) -> KResult<usize> {
  238. let mut sector_start = offset as u64 / 512;
  239. let mut first_sector_offset = offset as u64 % 512;
  240. let mut remaining_data = data;
  241. let mut nwritten = 0;
  242. while !remaining_data.is_empty() {
  243. let pages: &[Page];
  244. let page: Option<Page>;
  245. let page_vec: Option<Vec<Page>>;
  246. // Calculate sectors needed for this write
  247. let write_end = first_sector_offset + remaining_data.len() as u64;
  248. let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages());
  249. match sector_count {
  250. count if count <= 8 => {
  251. let _page = Page::alloc();
  252. page = Some(_page);
  253. pages = core::slice::from_ref(page.as_ref().unwrap());
  254. }
  255. count if count <= 16 => {
  256. let _pages = Page::alloc_order(1);
  257. page = Some(_pages);
  258. pages = core::slice::from_ref(page.as_ref().unwrap());
  259. }
  260. count => {
  261. let npages = (count + 15) / 16;
  262. let mut _page_vec = Vec::with_capacity(npages as usize);
  263. for _ in 0..npages {
  264. _page_vec.push(Page::alloc_order(1));
  265. }
  266. page_vec = Some(_page_vec);
  267. pages = page_vec.as_ref().unwrap().as_slice();
  268. }
  269. }
  270. if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize {
  271. let read_req = BlockDeviceRequest::Read {
  272. sector: sector_start,
  273. count: sector_count,
  274. buffer: pages,
  275. };
  276. self.commit_request(read_req).await?;
  277. }
  278. let mut data_offset = 0;
  279. let mut page_offset = first_sector_offset as usize;
  280. for page in pages.iter() {
  281. // SAFETY: We own the page and can modify it
  282. let page_data = unsafe {
  283. let memblk = page.as_memblk();
  284. core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len())
  285. };
  286. let copy_len =
  287. (remaining_data.len() - data_offset).min(page_data.len() - page_offset);
  288. if copy_len == 0 {
  289. break;
  290. }
  291. page_data[page_offset..page_offset + copy_len]
  292. .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]);
  293. data_offset += copy_len;
  294. page_offset = 0; // Only first page has offset
  295. if data_offset >= remaining_data.len() {
  296. break;
  297. }
  298. }
  299. let write_req = BlockDeviceRequest::Write {
  300. sector: sector_start,
  301. count: sector_count,
  302. buffer: pages,
  303. };
  304. self.commit_request(write_req).await?;
  305. let bytes_written = data_offset;
  306. nwritten += bytes_written;
  307. remaining_data = &remaining_data[bytes_written..];
  308. sector_start += sector_count;
  309. first_sector_offset = 0;
  310. }
  311. Ok(nwritten)
  312. }
  313. }
  314. pub enum BlockDeviceRequest<'lt> {
  315. Read {
  316. /// Sector to read from, in 512-byte blocks
  317. sector: u64,
  318. /// Number of sectors to read
  319. count: u64,
  320. /// Buffer pages to read into
  321. buffer: &'lt [Page],
  322. },
  323. Write {
  324. /// Sector to write to, in 512-byte blocks
  325. sector: u64,
  326. /// Number of sectors to write
  327. count: u64,
  328. /// Buffer pages to write from
  329. buffer: &'lt [Page],
  330. },
  331. }